1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
| import numpy as np import tensorflow as tfdef convolutional(input_layer, filters_shape, downsample=False, activate=True, bn=True): '''卷积层''' if downsample: input_layer = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0)))(input_layer) padding = 'valid' strides = 2 else: strides = 1 padding = 'same' conv = tf.keras.layers.Conv2D(filters=filters_shape[-1], kernel_size=filters_shape[0], strides=strides, padding=padding, use_bias=not bn, kernel_regularizer=tf.keras.regularizers.l2(0.0005), kernel_initializer=tf.random_normal_initializer(stddev=0.01), bias_initializer=tf.constant_initializer(0.))(input_layer) if bn: conv = tf.keras.layers.BatchNormalization()(conv) if activate: conv = tf.keras.layers.LeakyReLU(alpha=0.1)(conv) return conv def residual_block(input_layer, filters_num1, filters_num2): """ :param input_layer: 残差块的输入特征图 :param filters_num1: 第一个卷积层的卷积核数量 :param filters_num2: 第二个卷积层的卷积核数量 :return: """ '''残差块''' short_cut = input_layer conv = convolutional(input_layer, filters_shape=(1, 1, input_layer.shape[-1], filters_num1)) conv = convolutional(conv, filters_shape=(3, 3, filters_num1, filters_num2)) residual_output = short_cut + conv return residual_output def darknet53(input_data): """ :param input_data: :return: 阶段输出一, 阶段输出二, 最终输出 """ '''Darknet-53主干网络''' # input_data : (416, 416, 3) input_data = convolutional(input_data, (3, 3, 3, 32)) # 第一个卷积块后,输出特征图形状为 (416, 416, 32) input_data = convolutional(input_data, (3, 3, 32, 64), downsample=True) # 第二个卷积块后,输出特征图形状为 (208, 208, 64) # 残差块 1× for i in range(1): input_data = residual_block(input_data, 32, 64) # 第一个残差块后,输出特征图形状不变 (208, 208, 64) input_data = convolutional(input_data, (3, 3, 64, 128), downsample=True) # 第三个卷积块后,输出特征图形状为 (104, 104, 128) # 残差块 2× for i in range(2): input_data = residual_block(input_data, 64, 128) # 第二个残差块后,输出特征图形状不变 (104, 104, 128) input_data = convolutional(input_data, (3, 3, 128, 256), downsample=True) # 第四个卷积块后,输出特征图形状为 (52, 52, 256) # 残差块 8× for i in range(8): input_data = residual_block(input_data, 128, 256) # 第三个残差块后,输出特征图形状不变 (52, 52, 256) route_1 = input_data # 保存第一个特征图,形状不变 (52, 52, 256) input_data = convolutional(input_data, (3, 3, 256, 512), downsample=True) # 第五个卷积块后,输出特征图形状为 (26, 26, 512) # 残差块 8× for i in range(8): input_data = residual_block(input_data, 256, 512) # 第四个残差块后,输出特征图形状不变 (26, 26, 512) route_2 = input_data # 保存第二个特征图,形状不变 (26, 26, 512) input_data = convolutional(input_data, (3, 3, 512, 1024), downsample=True) # 第六个卷积块后,输出特征图形状为 (13, 13, 1024) # 残差块 4× for i in range(4): input_data = residual_block(input_data, 512, 1024) # 第五个残差块后,输出特征图形状不变 (13, 13, 1024) return route_1, route_2, input_data def yolo_convolutional(input_layer, filters_shape): '''Yolo卷积层''' conv = convolutional(input_layer, filters_shape) conv = tf.keras.layers.Conv2D(filters=3 * (4 + 1 + 80), kernel_size=(1, 1), strides=(1, 1), padding='same', use_bias=True, kernel_initializer=tf.random_normal_initializer(stddev=0.01), bias_initializer=tf.constant_initializer(0.))(conv) return conv def decode(conv_output, num_classes, anchors, i=0): """ return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes] contains (x, y, w, h, score, probability) """ conv_shape = tf.shape(conv_output) batch_size = conv_shape[0] output_size = conv_shape[1] # 对 tensor 进行 reshape conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + num_classes)) # 按顺序提取[x, y, w, h, c] conv_raw_dxdy = conv_output[:, :, :, :, 0:2] # 中心位置的偏移量 conv_raw_dwdh = conv_output[:, :, :, :, 2:4] # 预测框长宽的偏移量 conv_raw_conf = conv_output[:, :, :, :, 4:5] # 预测框的置信度 conv_raw_prob = conv_output[:, :, :, :, 5:] # 预测框的类别概率 # 好了,接下来是画网格。其中,output_size 等于 13、26 或者 52 y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size]) x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1]) xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1) xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1]) xy_grid = tf.cast(xy_grid, tf.float32) # 计算网格左上角的位置,即cx cy的值 # 根据上图公式计算预测框的中心位置 # 这里的 i=0、1 或者 2, 以分别对应三种网格尺度 pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * 32.0 # 计算预测框在原图尺寸上的x y pred_wh = (tf.exp(conv_raw_dwdh) * anchors[i]) * 32.0 # 计算预测框在原图尺寸上的w h pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1) # 拼接起来 pred_conf = tf.sigmoid(conv_raw_conf) # 计算预测框里object的置信度 pred_prob = tf.sigmoid(conv_raw_prob) # 计算预测框里object的类别概率 return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1) def YoloV3(input_layer, NUM_CLASS): """ :param input_layer: 输入层[416, 416, 3] :param NUM_CLASS: 类别数量 :return: YoloV3模型 """ '''YoloV3模型''' # input_layer : 输入张量的形状为 (416, 416, 3) route_1, route_2, conv = darknet53(input_layer) # route_1: (52, 52, 256) # route_2: (26, 26, 512) # conv: (13, 13, 1024) '''Convolution Set''' conv = convolutional(conv, (1, 1, 1024, 512)) # 第一个卷积块后,输出特征图形状为 (13, 13, 512) (通道数减半为512) conv = convolutional(conv, (3, 3, 512, 1024)) # 第二个卷积块后,输出特征图形状不变 (13, 13, 1024) (通道数翻倍为1024) conv = convolutional(conv, (1, 1, 1024, 512)) # 第三个卷积块后,输出特征图形状不变 (13, 13, 512 conv = convolutional(conv, (3, 3, 512, 1024)) # 第四个卷积块后,输出特征图形状不变 (13, 13, 1024) conv = convolutional(conv, (1, 1, 1024, 512)) # 第五个卷积块后,输出特征图形状不变 (13, 13, 512) ''' Convolution Set ''' conv_lobj_branch = convolutional(conv, (3, 3, 512, 1024)) # 输出特征图形状不变 (13, 13, 1024) conv_lbbox = yolo_convolutional(conv_lobj_branch, (1, 1, 1024, 3 * (NUM_CLASS + 5)))# 输出特征图形状不变 (13, 13, 255) # conv_lbbox: Predict One conv = convolutional(conv, (1, 1, 512, 256)) # 第一个卷积块后,输出特征图形状为 (13, 13, 256) conv = tf.keras.layers.UpSampling2D(2)(conv) # 上采样后,输出特征图形状为 (26, 26, 256) conv = tf.concat([conv, route_2], axis=-1) # 在通道维度上连接特征图,输出特征图形状不变 (26, 26, 768) ''' Convolution Set ''' conv = convolutional(conv, (1, 1, 768, 256)) # 第二个卷积块后,输出特征图形状为 (26, 26, 256) conv = convolutional(conv, (3, 3, 256, 512)) # 第三个卷积块后,输出特征图形状不变 (26, 26, 512) conv = convolutional(conv, (1, 1, 512, 256)) # 第四个卷积块后,输出特征图形状不变 (26, 26, 256) conv = convolutional(conv, (3, 3, 256, 512)) # 第五个卷积块后,输出特征图形状不变 (26, 26, 512) conv = convolutional(conv, (1, 1, 512, 256)) # 第六个卷积块后,输出特征图形状不变 (26, 26, 256) ''' Convolution Set ''' conv_mobj_branch = convolutional(conv, (3, 3, 256, 512)) # 输出特征图形状不变 (26, 26, 512) conv_mbbox = yolo_convolutional(conv_mobj_branch, (1, 1, 512, 3 * (NUM_CLASS + 5))) # 输出特征图形状不变 (26, 26, 255) # conv_mbbox: Predict Two conv = convolutional(conv, (1, 1, 256, 128)) # 第一个卷积块后,输出特征图形状为 (26, 26, 128) conv = tf.keras.layers.UpSampling2D(2)(conv) # 上采样后,输出特征图形状为 (52, 52, 128) conv = tf.concat([conv, route_1], axis=-1) # 在通道维度上连接特征图,输出特征图形状不变 (52, 52, 384) ''' Convolution Set ''' conv = convolutional(conv, (1, 1, 384, 128)) # 第二个卷积块后,输出特征图形状为 (52, 52, 128) conv = convolutional(conv, (3, 3, 128, 256)) # 第三个卷积块后,输出特征图形状不变 (52, 52, 256) conv = convolutional(conv, (1, 1, 256, 128)) # 第四个卷积块后,输出特征图形状不变 (52, 52, 128) conv = convolutional(conv, (3, 3, 128, 256)) # 第五个卷积块后,输出特征图形状不变 (52, 52, 256) conv = convolutional(conv, (1, 1, 256, 128)) # 第六个卷积块后,输出特征图形状不变 (52, 52, 128) ''' Convolution Set ''' conv_sobj_branch = convolutional(conv, (3, 3, 128, 256)) # 输出特征图形状不变 (52, 52, 256) conv_sbbox = yolo_convolutional(conv_sobj_branch, (1, 1, 256, 3 * (NUM_CLASS + 5))) # 输出特征图形状不变 (52, 52, 255) # conv_sbbox: Predict Three # [Predict Three, Predict Two, Predict One] # [(52, 52, 255), (26, 26, 255), (13, 13, 255)] return [conv_sbbox, conv_mbbox, conv_lbbox] def YoloLoss(output_size, num_classes, anchors): '''Yolo损失函数''' def yolo_loss(y_true, y_pred): pred_xywh = y_pred[..., :4] pred_conf = y_pred[..., 4:5] pred_prob = y_pred[..., 5:] true_xywh = y_true[..., :4] true_conf = y_true[..., 4:5] true_prob = y_true[..., 5:] true_xy = true_xywh[..., :2] true_wh = true_xywh[..., 2:4] pred_xy = pred_xywh[..., :2] pred_wh = pred_xywh[..., 2:4] intersect_wh = tf.maximum(tf.minimum(pred_xy + pred_wh / 2, true_xy + true_wh / 2) - tf.maximum(pred_xy - pred_wh / 2, true_xy - true_wh / 2), 0) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] true_area = true_wh[..., 0] * true_wh[..., 1] pred_area = pred_wh[..., 0] * pred_wh[..., 1] union_area = pred_area + true_area - intersect_area iou = intersect_area / union_area best_iou = tf.reduce_max(iou, axis=-1) mask = tf.cast(best_iou < 0.5, dtype=tf.float32) xy_loss = tf.reduce_sum(tf.square(true_xy - pred_xy) * true_conf * mask) / tf.reduce_sum(true_conf * mask + 1e-6) * 2.0 wh_loss = tf.reduce_sum(tf.square(true_wh - pred_wh) * true_conf * mask) / tf.reduce_sum(true_conf * mask + 1e-6) * 2.0 obj_loss = tf.reduce_sum(tf.square(true_conf - pred_conf) * true_conf * mask) / tf.reduce_sum(true_conf * mask + 1e-6) no_obj_loss = tf.reduce_sum(tf.square(true_conf - pred_conf) * (1.0 - true_conf) * (1.0 - mask)) / tf.reduce_sum((1.0 - true_conf) * (1.0 - mask) + 1e-6) class_loss = tf.keras.losses.sparse_categorical_crossentropy(y_true[..., 5:], y_pred[..., 5:]) class_loss = tf.reduce_sum(class_loss * true_conf) / tf.reduce_sum(true_conf + 1e-6) return xy_loss + wh_loss + obj_loss + no_obj_loss + class_loss return yolo_loss # 设置anchors anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], np.float32) / 416.0 # 模型输入 input_layer = tf.keras.layers.Input([416, 416, 3]) num_classes = 13 # 创建YoloV3模型 output = YoloV3(input_layer, num_classes) model = tf.keras.Model(input_layer, output) # 编译模型 model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss=[YoloLoss(13, 80, anchors), YoloLoss(26, 80, anchors), YoloLoss(52, 80, anchors)]) # 打印模型结构 model.summary() # # 保存模型为SavedModel格式 # tf.saved_model.save(model, 'yolov3_saved_model') # 保存模型为.h5格式 model.save('yolov3_model.h5')
|