diff --git a/model.py b/model.py index f7323dd..10fd812 100644 --- a/model.py +++ b/model.py @@ -71,7 +71,18 @@ class FastRCNN(nn.Module): # hidden_dim -> hidden_dim. # ############################################################################## # Replace "pass" statement with your code - pass + self.cls_head = nn.Sequential( + nn.Linear(in_dim, hidden_dim), + nn.Dropout(drop_ratio), + nn.ReLU(), + nn.Linear(hidden_dim, num_classes+1) + ) + self.bbox_head = nn.Sequential( + nn.Linear(in_dim, hidden_dim), + nn.Dropout(drop_ratio), + nn.ReLU(), + nn.Linear(hidden_dim, 4) + ) ############################################################################## # END OF YOUR CODE # ############################################################################## @@ -118,23 +129,51 @@ class FastRCNN(nn.Module): B, _, H, W = images.shape # extract image feature - pass + feat=self.feat_extractor.forward(images) + # print(feat.shape) # perform RoI Pool & mean pool - pass + feat=torchvision.ops.roi_pool(feat, torch.cat((proposal_batch_ids.unsqueeze(1), proposals),dim=1), output_size=(self.roi_output_w, self.roi_output_h)) + # print(feat.shape) + feat=feat.mean(dim=[2,3]) + # print(feat.shape) # forward heads, get predicted cls scores & offsets - pass + cls_scores=self.cls_head(feat) + bbox_offsets=self.bbox_head(feat) + # print(cls_scores.shape, bbox_offsets.shape) # assign targets with proposals pos_masks, neg_masks, GT_labels, GT_bboxes = [], [], [], [] for img_idx in range(B): # get the positive/negative proposals and corresponding # GT box & class label of this image - pass + pos_mask, neg_mask, GT_label, GT_bbox = assign_label(proposals[proposal_batch_ids==img_idx,:], bboxes[bbox_batch_ids==img_idx,:], self.num_classes) + # print(pos_mask.shape, neg_mask.shape, GT_label.shape, GT_bbox.shape) + pos_masks.append(pos_mask) + neg_masks.append(neg_mask) + GT_labels.append(GT_label) + GT_bboxes.append(GT_bbox) # compute loss - pass + cls_loss = 0 + img_idx = 0 + for GT_label in GT_labels: + # print(cls_scores.shape, GT_label.shape) + cls_loss += ClsScoreRegression(cls_scores[proposal_batch_ids==img_idx,:], GT_label, B) + img_idx += 1 + bbox_loss = 0 + img_idx=0 + + for GT_bbox in GT_bboxes: + bbox_offsets_cur=bbox_offsets[proposal_batch_ids==img_idx,:] + pos_box_offsets = bbox_offsets_cur[pos_masks[img_idx],:] + proposals_cur = proposals[proposal_batch_ids==img_idx,:] + pos_proposals = proposals_cur[pos_masks[img_idx],:] + # print(pos_box_offsets.shape, GT_bbox.shape) + bbox_loss += BboxRegression(pos_box_offsets, compute_offsets(pos_proposals, GT_bbox), B) + img_idx += 1 + total_loss=cls_loss+bbox_loss ############################################################################## # END OF YOUR CODE # @@ -183,16 +222,19 @@ class FastRCNN(nn.Module): B = images.shape[0] # extract image feature - pass + feat = self.feat_extractor.forward(images) # perform RoI Pool & mean pool - pass + feat=torchvision.ops.roi_pool(feat, torch.cat((proposal_batch_ids.unsqueeze(1), proposals),dim=1), output_size=(self.roi_output_w, self.roi_output_h)) + feat = feat.mean(dim=[2, 3]) # forward heads, get predicted cls scores & offsets - pass - + cls_scores = self.cls_head(feat) + print(cls_scores.shape) + bbox_offsets = self.bbox_head(feat) + print(bbox_offsets.shape) # get predicted boxes & class label & confidence probability - pass + proposals = generate_proposal(proposals, bbox_offsets) final_proposals = [] final_conf_probs = [] @@ -201,10 +243,23 @@ class FastRCNN(nn.Module): for img_idx in range(B): # filter by threshold - pass + cls_prob = torch.softmax(cls_scores[proposal_batch_ids == img_idx], dim=1) + print(cls_prob.shape) + pos_mask = cls_prob[:, 1] > thresh + print(pos_mask.shape) + proposals_img = proposals[proposal_batch_ids == img_idx][pos_mask] + print(proposals_img.shape) + + print(cls_prob.shape) + final_proposals.append(proposals_img) + final_conf_probs.append(cls_prob[pos_mask, 1].unsqueeze(1)) + # nms - pass + keep = torchvision.ops.nms(proposals_img, cls_prob[:, 1], nms_thresh) + proposals_img = proposals_img[keep] + cls_prob = cls_prob[keep] + ############################################################################## # END OF YOUR CODE # diff --git a/utils.py b/utils.py index 858cf26..b3d3ca2 100644 --- a/utils.py +++ b/utils.py @@ -142,12 +142,22 @@ def compute_iou(anchors, bboxes): Outputs: - iou: IoU matrix of shape (M, N) """ - iou = None + iou = torch.zeros((anchors.shape[0], bboxes.shape[0])) + iou = iou.to(anchors.device) ############################################################################## # TODO: Given anchors and gt bboxes, # # compute the iou between each anchor and gt bbox. # ############################################################################## - pass + for i in range(anchors.shape[0]): + for j in range(bboxes.shape[0]): + x1 = max(anchors[i][0], bboxes[j][0]) + y1 = max(anchors[i][1], bboxes[j][1]) + x2 = min(anchors[i][2], bboxes[j][2]) + y2 = min(anchors[i][3], bboxes[j][3]) + inter = max(0, x2 - x1) * max(0, y2 - y1) + area1 = (anchors[i][2] - anchors[i][0]) * (anchors[i][3] - anchors[i][1]) + area2 = (bboxes[j][2] - bboxes[j][0]) * (bboxes[j][3] - bboxes[j][1]) + iou[i][j] = inter / (area1 + area2 - inter) ############################################################################## # END OF YOUR CODE # @@ -206,7 +216,10 @@ def generate_proposal(anchors, offsets): # compute the proposal coordinates using the transformation formulas above. # ############################################################################## # Replace "pass" statement with your code - pass + proposals = torch.zeros_like(anchors) + proposals[:, :2] = anchors[:, :2] + offsets[:, :2] * (anchors[:, 2:4] - anchors[:, :2]) + proposals[:, 2:4] = anchors[:, 2:4] * torch.exp(offsets[:, 2:4]) + ############################################################################## # END OF YOUR CODE #