model = nn.Sequential( collections.OrderedDict( [ ("conv1_1", nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(3, 3), stride=(1, 1), # (1(32-1)- 32 + 3)/2 = 1 padding=1)), ("relu1_1", nn.ReLU()), ("conv1_2", nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=1)), ("relu1_2", nn.ReLU()), ("pool1", nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))), ("conv2_1", nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=1)), ("relu_2_1", nn.ReLU()), ("conv2_2", nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=1)), ("relu2_2", nn.ReLU()), ("pool2", nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))), ("conv3_1", nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1)), ("relu3_1", nn.ReLU()), ("conv3_2", nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1)), ("relu3_2", nn.ReLU()), ("conv3_3", nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1)), ("relu3_3", nn.ReLU()), ("pool3", nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))), ("conv4_1", nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1)), ("relu4_1", nn.ReLU()), ("conv4_2", nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1)), ("relu4_2", nn.ReLU()), ("conv4_3", nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1)), ("relu4_3", nn.ReLU()), ("pool4", nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))), ("conv5_1", nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1)), ("relu5_1", nn.ReLU()), ("conv5_2", nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), # Written by Ross Girshick and Sean Bell stride=(1, 1))), ("relu5_2", nn.ReLU()), ("conv5_3", nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1)), ("relu5_3", nn.ReLU()), ("pool5", nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))), ("rpn_conv/3x3", nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1 )), ("rpn_relu/3x3", nn.ReLU()), ("rpn_cls_score", nn.Conv2d(in_channels=512, out_channels=18, kernel_size=1,padding=0, stride=1)), ("rpn_bbox_pred", nn.Conv2d(in_channels =512, out_channels=36, kernel_size=1, padding=0, stride=1)), ("rpn_cls_score_reshape", Reshape([0, 2, -1, 0])), ("rpn_cls_prob", nn.Softmax()), ("rpn_cls_rpob_reshape", Reshape([0, 18, -1, 0])), ("proposal", ProposalLayer(feat_stride=16, scales=1737, ratios=[0.5,1,2])), ("roi_pool5", RoIPool((7,7), spatial_scale=0.0625)), ("fc6", nn.Linear(25088, 4096)), ("relu6", nn.ReLU()), ("drop6", nn.Dropout(0.5)), ("fc7", nn.Linear(4096, 4096)), ("drop7", nn.Dropout(0.5)), ("cls_score", nn.Linear(4096, 3)), ("bbox_pred", nn.Linear(4096, 12)), ("cls_prob", nn.Softmax()) ] ) )