From b3e8ac055b3aac9b6ae8b8fce8d7015e467b93c5 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 12 Feb 2019 21:29:36 +0900 Subject: [PATCH 001/100] [wip] add mask_rcnn --- chainercv/links/__init__.py | 2 + chainercv/links/model/mask_rcnn/__init__.py | 6 + chainercv/links/model/mask_rcnn/mask_head.py | 227 +++++++++++++++++ chainercv/links/model/mask_rcnn/mask_rcnn.py | 147 +++++++++++ .../model/mask_rcnn/mask_rcnn_fpn_resnet.py | 68 ++++++ examples/instance_segmentation/eval_coco.py | 23 +- examples/mask_rcnn/demo.py | 58 +++++ examples/mask_rcnn/train_multi.py | 229 ++++++++++++++++++ 8 files changed, 756 insertions(+), 4 deletions(-) create mode 100644 chainercv/links/model/mask_rcnn/__init__.py create mode 100644 chainercv/links/model/mask_rcnn/mask_head.py create mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn.py create mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py create mode 100644 examples/mask_rcnn/demo.py create mode 100644 examples/mask_rcnn/train_multi.py diff --git a/chainercv/links/__init__.py b/chainercv/links/__init__.py index 5aa5ae5d37..be7f150873 100644 --- a/chainercv/links/__init__.py +++ b/chainercv/links/__init__.py @@ -9,6 +9,8 @@ from chainercv.links.model.faster_rcnn.faster_rcnn_vgg import FasterRCNNVGG16 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50 # NOQA +from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA +from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA from chainercv.links.model.resnet import ResNet101 # NOQA from chainercv.links.model.resnet import ResNet152 # NOQA from chainercv.links.model.resnet import ResNet50 # NOQA diff --git a/chainercv/links/model/mask_rcnn/__init__.py b/chainercv/links/model/mask_rcnn/__init__.py new file mode 100644 index 0000000000..c9e910a524 --- /dev/null +++ b/chainercv/links/model/mask_rcnn/__init__.py @@ -0,0 +1,6 @@ +from chainercv.links.model.mask_rcnn.mask_head import mask_loss_post # NOQA +from chainercv.links.model.mask_rcnn.mask_head import mask_loss_pre # NOQA +from chainercv.links.model.mask_rcnn.mask_head import MaskHead # NOQA +from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN # NOQA +from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA +from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py new file mode 100644 index 0000000000..2b2b5c4cbb --- /dev/null +++ b/chainercv/links/model/mask_rcnn/mask_head.py @@ -0,0 +1,227 @@ +from __future__ import division + +import numpy as np +import PIL + +import cv2 + +import chainer +from chainer.backends import cuda +import chainer.functions as F +from chainer.initializers import HeNormal +import chainer.links as L + +from chainercv.transforms.image.resize import resize +from chainercv.utils.bbox.bbox_iou import bbox_iou +from chainercv.utils.mask.mask_to_bbox import mask_to_bbox + + +class MaskHead(chainer.Chain): + + _canonical_scale = 224 + _roi_size = 14 + _roi_sample_ratio = 2 + mask_size = _roi_size * 2 + + # Remember, initialization is MSRAFill + def __init__(self, n_class, scales): + super(MaskHead, self).__init__() + + initialW = HeNormal(1, fan_option='fan_out') + with self.init_scope(): + self.conv1 = L.Convolution2D(256, 3, pad=1, initialW=initialW) + self.conv2 = L.Convolution2D(256, 3, pad=1, initialW=initialW) + self.conv3 = L.Convolution2D(256, 3, pad=1, initialW=initialW) + self.conv4 = L.Convolution2D(256, 3, pad=1, initialW=initialW) + self.conv5 = L.Deconvolution2D( + 256, 2, pad=0, stride=2, initialW=initialW) + self.seg = L.Convolution2D(n_class, 1, pad=0, initialW=initialW) + + self._n_class = n_class + self._scales = scales + + def __call__(self, hs, rois, roi_indices): + pooled_hs = [] + for l, h in enumerate(hs): + if len(rois[l]) == 0: + continue + + pooled_hs.append(F.roi_average_align_2d( + h, rois[l], roi_indices[l], + self._roi_size, + self._scales[l], self._roi_sample_ratio)) + + if len(pooled_hs) == 0: + out_size = self.mask_size + segs = chainer.Variable( + self.xp.empty((0, self._n_class, out_size, out_size), + dtype=np.float32)) + return segs + + h = F.concat(pooled_hs, axis=0) + h = F.relu(self.conv1(h)) + h = F.relu(self.conv2(h)) + h = F.relu(self.conv3(h)) + h = F.relu(self.conv4(h)) + h = F.relu(self.conv5(h)) + return self.seg(h) + + def distribute(self, rois, roi_indices): + size = self.xp.sqrt( + self.xp.prod(rois[:, 2:] + 1 - rois[:, :2], axis=1)) + level = self.xp.floor(self.xp.log2( + size / self._canonical_scale + 1e-6)).astype(np.int32) + # skip last level + level = self.xp.clip( + level + len(self._scales) // 2, 0, len(self._scales) - 2) + + masks = [level == l for l in range(len(self._scales))] + rois = [rois[mask] for mask in masks] + roi_indices = [roi_indices[mask] for mask in masks] + order = self.xp.argsort( + self.xp.concatenate([self.xp.where(mask)[0] for mask in masks])) + return rois, roi_indices, order + + def decode(self, segms, bboxes, labels, sizes): + # CPU is used because cv2.resize only accepts numpy arrays. + segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms] + bboxes = [chainer.backends.cuda.to_cpu(bbox) for bbox in bboxes] + labels = [chainer.backends.cuda.to_cpu(label) for label in labels] + + masks = [] + # To work around an issue with cv2.resize (it seems to automatically + # pad with repeated border values), we manually zero-pad the masks by 1 + # pixel prior to resizing back to the original image resolution. + # This prevents "top hat" artifacts. We therefore need to expand + # the reference boxes by an appropriate factor. + cv2_expand_scale = (self.mask_size + 2) / self.mask_size + padded_mask = np.zeros((self.mask_size + 2, self.mask_size + 2), + dtype=np.float32) + for bbox, segm, label, size in zip( + bboxes, segms, labels, sizes): + img_H, img_W = size + mask = np.zeros((len(bbox), img_H, img_W), dtype=np.bool) + + bbox = expand_boxes(bbox, cv2_expand_scale) + for i, (bb, sgm, lbl) in enumerate(zip(bbox, segm, label)): + bb = bb.astype(np.int32) + padded_mask[1:-1, 1:-1] = sgm[lbl + 1] + + # TODO(yuyu2172): Ignore +1 later + bb_height = np.maximum(bb[2] - bb[0] + 1, 1) + bb_width = np.maximum(bb[3] - bb[1] + 1, 1) + + crop_mask = cv2.resize(padded_mask, (bb_width, bb_height)) + crop_mask = crop_mask > 0.5 + + y_min = max(bb[0], 0) + x_min = max(bb[1], 0) + y_max = min(bb[2] + 1, img_H) + x_max = min(bb[3] + 1, img_W) + mask[i, y_min:y_max, x_min:x_max] = crop_mask[ + (y_min - bb[0]):(y_max - bb[0]), + (x_min - bb[1]):(x_max - bb[1])] + masks.append(mask) + return masks + + +def expand_boxes(bbox, scale): + """Expand an array of boxes by a given scale.""" + xp = chainer.backends.cuda.get_array_module(bbox) + + h_half = (bbox[:, 2] - bbox[:, 0]) * .5 + w_half = (bbox[:, 3] - bbox[:, 1]) * .5 + y_c = (bbox[:, 2] + bbox[:, 0]) * .5 + x_c = (bbox[:, 3] + bbox[:, 1]) * .5 + + h_half *= scale + w_half *= scale + + expanded_bbox = xp.zeros(bbox.shape) + expanded_bbox[:, 0] = y_c - h_half + expanded_bbox[:, 1] = x_c - w_half + expanded_bbox[:, 2] = y_c + h_half + expanded_bbox[:, 3] = x_c + w_half + + return expanded_bbox + + +def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels, + mask_size=28): + xp = cuda.get_array_module(*rois) + + n_level = len(rois) + + roi_levels = xp.hstack( + xp.array((l,) * len(rois[l])) for l in range(n_level)).astype(np.int32) + rois = xp.vstack(rois).astype(np.float32) + roi_indices = xp.hstack(roi_indices).astype(np.int32) + gt_head_labels = xp.hstack(gt_head_labels) + + index = (gt_head_labels > 0).nonzero()[0] + mask_roi_levels = roi_levels[index] + mask_rois = rois[index] + mask_roi_indices = roi_indices[index] + gt_mask_labels = gt_head_labels[index] + + gt_segms = xp.empty((len(mask_rois), mask_size, mask_size), dtype=np.bool) + for i in np.unique(cuda.to_cpu(mask_roi_indices)): + gt_mask = gt_masks[i] + gt_bbox = mask_to_bbox(gt_mask) + + index = (mask_roi_indices == i).nonzero()[0] + mask_roi = mask_rois[index] + iou = bbox_iou(mask_roi, gt_bbox) + gt_index = iou.argmax(axis=1) + gt_segms[index] = segm_wrt_bbox( + gt_mask[gt_index], mask_roi, (M, M)) + + # indices = [(mask_roi_levels == l).nonzero() for l in range(n_level)] + flag_masks = [mask_roi_levels == l for l in range(n_level)] + mask_rois = [mask_rois[m] for m in flag_masks] + mask_roi_indices = [mask_roi_indices[m] for m in flag_masks] + gt_segms = [gt_segms[m] for m in flag_masks] + gt_mask_labels = [gt_mask_labels[m] for m in flag_masks] + return mask_rois, mask_roi_indices, gt_segms, gt_mask_labels + + +def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels, + batchsize): + # Just compute loss for the foreground class + # divide by the batchsize + xp = cuda.get_array_module(segms.array) + + mask_roi_indices = xp.hstack(mask_roi_indices).astype(np.int32) + gt_segms = xp.vstack(gt_segms).astype(np.float32) + gt_mask_labels = xp.hstack(gt_mask_labels).astype(np.int32) + + mask_loss = 0 + for i in np.unique(cuda.to_cpu(mask_roi_indices)): + index = (mask_roi_indices == i).nonzero()[0] + gt_segm = gt_segms[index] + gt_mask_label = gt_mask_labels[index] + + mask_loss += F.sigmoid_cross_entropy( + segms[index, gt_mask_label], gt_segm.astype(np.int32)) + + mask_loss /= batchsize + return mask_loss + + +def segm_wrt_bbox(mask, bbox, size): + xp = chainer.backends.cuda.get_array_module(mask) + + bbox = bbox.astype(np.int32) + + segm = [] + for m, bb in zip(mask, bbox): + if bb[2] - bb[0] == 0 or bb[3] - bb[1] == 0: + segm.append(xp.zeros(size, dtype=np.bool)) + continue + cropped_m = m[bb[0]:bb[2], bb[1]:bb[3]] + cropped_m = chainer.backends.cuda.to_cpu(cropped_m) + + segm.append(resize( + cropped_m[None].astype(np.float32), + size, interpolation=PIL.Image.NEAREST)[0].astype(np.bool)) + return xp.array(segm, dtype=np.bool) diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py new file mode 100644 index 0000000000..94347b1cdd --- /dev/null +++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py @@ -0,0 +1,147 @@ +from __future__ import division + +import numpy as np +import PIL + +import chainer +from chainer.backends import cuda +import chainer.functions as F + +from chainercv import transforms + + +class MaskRCNN(chainer.Chain): + + _min_size = 800 + _max_size = 1333 + _stride = 32 + + def __init__(self, extractor, rpn, head, mask_head): + super(MaskRCNN, self).__init__() + with self.init_scope(): + self.extractor = extractor + self.rpn = rpn + self.head = head + self.mask_head = mask_head + + self.use_preset('visualize') + + def use_preset(self, preset): + if preset == 'visualize': + self.nms_thresh = 0.5 + self.score_thresh = 0.7 + elif preset == 'evaluate': + self.nms_thresh = 0.5 + self.score_thresh = 0.05 + else: + raise ValueError('preset must be visualize or evaluate') + + def __call__(self, x): + assert(not chainer.config.train) + hs = self.extractor(x) + rpn_locs, rpn_confs = self.rpn(hs) + anchors = self.rpn.anchors(h.shape[2:] for h in hs) + rois, roi_indices = self.rpn.decode( + rpn_locs, rpn_confs, anchors, x.shape) + rois, roi_indices = self.head.distribute(rois, roi_indices) + return hs, rois, roi_indices + + def predict(self, imgs): + sizes = [img.shape[1:] for img in imgs] + x, scales = self.prepare(imgs) + + with chainer.using_config('train', False), chainer.no_backprop_mode(): + hs, rois, roi_indices = self(x) + head_locs, head_confs = self.head(hs, rois, roi_indices) + bboxes, labels, scores = self.head.decode( + rois, roi_indices, head_locs, head_confs, + scales, sizes, self.nms_thresh, self.score_thresh) + + # Rescale bbox to the scaled resolution + rescaled_bboxes = [bbox * scale for scale, bbox in zip(scales, bboxes)] + # Change bboxes to RoI and RoI indices format + mask_rois_before_reordering, mask_roi_indices_before_reordering =\ + _list_to_flat(rescaled_bboxes) + mask_rois, mask_roi_indices, order = self.mask_head.distribute( + mask_rois_before_reordering, mask_roi_indices_before_reordering) + with chainer.using_config('train', False), chainer.no_backprop_mode(): + segms = F.sigmoid( + self.mask_head(hs, mask_rois, mask_roi_indices)).data + # Put the order of proposals back to the one used by bbox head + # from the ordering respective FPN levels. + segms = segms[order] + segms = _flat_to_list(segms, mask_roi_indices_before_reordering) + if len(segms) == 0: + segms = [ + self.xp.zeros((0, self.mask_head.mask_size, + self.mask_head.mask_size), dtype=np.float32)] + + masks = self.mask_head.decode( + segms, + [bbox / scale for bbox, scale in zip(rescaled_bboxes, scales)], + labels, sizes) + + masks = [cuda.to_cpu(mask) for mask in masks] + labels = [cuda.to_cpu(label) for label in labels] + scores = [cuda.to_cpu(score) for score in scores] + return masks, labels, scores + + def prepare(self, imgs, masks=None): + scales = [] + resized_imgs = [] + sizes = [] + for img in imgs: + _, H, W = img.shape + scale = self._min_size / min(H, W) + if scale * max(H, W) > self._max_size: + scale = self._max_size / max(H, W) + scales.append(scale) + H, W = int(H * scale), int(W * scale) + img = transforms.resize(img, (H, W)) + img -= self.extractor.mean + resized_imgs.append(img) + sizes.append((H, W)) + pad_size = np.array( + [im.shape[1:] for im in resized_imgs]).max(axis=0) + pad_size = ( + np.ceil(pad_size / self._stride) * self._stride).astype(int) + pad_imgs = np.zeros( + (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32) + for i, im in enumerate(resized_imgs): + _, H, W = img.shape + pad_imgs[i, :, :H, :W] = im + pad_imgs = self.xp.array(pad_imgs) + + if masks is None: + return pad_imgs, scales + + resized_masks = [] + for size, mask in zip(sizes, masks): + resized_masks.append(transforms.resize( + mask.astype(np.float32), + size, interpolation=PIL.Image.NEAREST).astype(np.bool)) + pad_masks = [] + for mask in resized_masks: + n_class, H, W = mask.shape + pad_mask = self.xp.zeros( + (n_class, pad_size[0], pad_size[1]), dtype=np.bool) + pad_mask[:, :H, :W] = self.xp.array(mask) + pad_masks.append(pad_mask) + return pad_imgs, pad_masks, scales + + +def _list_to_flat(array_list): + xp = chainer.backends.cuda.get_array_module(array_list[0]) + + indices = xp.concatenate( + [i * xp.ones((len(array),), dtype=np.int32) for + i, array in enumerate(array_list)], axis=0) + flat = xp.concatenate(array_list, axis=0) + return flat, indices + + +def _flat_to_list(flat, indices): + array_list = [] + for i in np.unique(chainer.backends.cuda.to_cpu(indices)): + array_list.append(flat[indices == i]) + return array_list diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py new file mode 100644 index 0000000000..2e1b132d42 --- /dev/null +++ b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py @@ -0,0 +1,68 @@ +from __future__ import division + +import chainer +import chainer.functions as F + +from chainercv.links.model.fpn import FPN +from chainercv.links.model.fpn import Head +from chainercv.links.model.fpn import RPN +from chainercv.links.model.mask_rcnn.mask_head import MaskHead +from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN +from chainercv.links.model.resnet import ResNet101 +from chainercv.links.model.resnet import ResNet50 +from chainercv import utils + +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import _copyparams + + +class MaskRCNNFPNResNet(MaskRCNN): + + def __init__(self, n_fg_class=None, pretrained_model=None): + param, path = utils.prepare_pretrained_model( + {'n_fg_class': n_fg_class}, pretrained_model, self._models) + + base = self._base(n_class=1, arch='he') + base.pick = ('res2', 'res3', 'res4', 'res5') + base.pool1 = lambda x: F.max_pooling_2d( + x, 3, stride=2, pad=1, cover_all=False) + base.remove_unused() + extractor = FPN( + base, len(base.pick), (1 / 4, 1 / 8, 1 / 16, 1 / 32, 1 / 64)) + + n_class = param['n_fg_class'] + 1 + super(MaskRCNNFPNResNet, self).__init__( + extractor=extractor, + rpn=RPN(extractor.scales), + head=Head(n_class, extractor.scales), + mask_head=MaskHead(n_class, extractor.scales) + ) + if path == 'imagenet': + _copyparams( + self.extractor.base, + self._base(pretrained_model='imagenet', arch='he')) + elif path: + chainer.serializers.load_npz(path, self) + + +class MaskRCNNFPNResNet50(MaskRCNNFPNResNet): + + _base = ResNet50 + _models = { + 'coco': { + 'param': {'n_fg_class': 80}, + 'url': None, + 'cv2': True + }, + } + + +class MaskRCNNFPNResNet101(MaskRCNNFPNResNet): + + _base = ResNet101 + _models = { + 'coco': { + 'param': {'n_fg_class': 80}, + 'url': None, + 'cv2': True + }, + } diff --git a/examples/instance_segmentation/eval_coco.py b/examples/instance_segmentation/eval_coco.py index a8e531ba07..98258252b8 100755 --- a/examples/instance_segmentation/eval_coco.py +++ b/examples/instance_segmentation/eval_coco.py @@ -7,6 +7,8 @@ from chainercv.datasets import COCOInstanceSegmentationDataset from chainercv.evaluations import eval_instance_segmentation_coco from chainercv.experimental.links import FCISResNet101 +from chainercv.links import MaskRCNNFPNResNet101 +from chainercv.links import MaskRCNNFPNResNet50 from chainercv.utils import apply_to_iterator from chainercv.utils import ProgressHook @@ -14,15 +16,17 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument( - '--model', choices=('fcis_resnet101',), + '--model', choices=( + 'fcis_resnet101', + 'mask_rcnn_fpn_resnet101', 'mask_rcnn_fpn_resnet50'), default='fcis_resnet101') parser.add_argument('--pretrained-model', default=None) parser.add_argument('--gpu', type=int, default=-1) args = parser.parse_args() + if args.pretrained_model is None: + args.pretrained_model = 'coco' if args.model == 'fcis_resnet101': - if args.pretrained_model is None: - args.pretrained_model = 'coco' proposal_creator_params = FCISResNet101.proposal_creator_params proposal_creator_params['min_size'] = 2 model = FCISResNet101( @@ -30,8 +34,19 @@ def main(): anchor_scales=(4, 8, 16, 32), pretrained_model=args.pretrained_model, proposal_creator_params=proposal_creator_params) + preset = 'coco_evaluate' + elif args.model == 'mask_rcnn_fpn_resnet50': + model = MaskRCNNFPNResNet50( + len(coco_instance_segmentation_label_names), + args.pretrained_model) + preset = 'evaluate' + elif args.model == 'mask_rcnn_fpn_resnet101': + model = MaskRCNNFPNResNet101( + len(coco_instance_segmentation_label_names), + args.pretrained_model) + preset = 'evaluate' - model.use_preset('coco_evaluate') + model.use_preset(preset) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py new file mode 100644 index 0000000000..aa4b7adbe4 --- /dev/null +++ b/examples/mask_rcnn/demo.py @@ -0,0 +1,58 @@ +import argparse +import matplotlib.pyplot as plt + +import chainer + +import chainercv +from chainercv.datasets import coco_instance_segmentation_label_names +from chainercv import utils + +from chainercv.links import MaskRCNNFPNResNet101 +from chainercv.links import MaskRCNNFPNResNet50 + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--gpu', type=int, default=-1) + parser.add_argument('--model', choices=('resnet50', 'resnet101')) + group = parser.add_mutually_exclusive_group() + group.add_argument('--pretrained-model') + group.add_argument('--snapshot') + parser.add_argument('image') + args = parser.parse_args() + + if args.model == 'resnet50': + model = MaskRCNNFPNResNet50( + n_fg_class=len(coco_instance_segmentation_label_names), + pretrained_model=args.pretrained_model) + elif args.model == 'resnet101': + model = MaskRCNNFPNResNet101( + n_fg_class=len(coco_instance_segmentation_label_names), + pretrained_model=args.pretrained_model) + + if args.gpu >= 0: + chainer.cuda.get_device_from_id(args.gpu).use() + model.to_gpu() + + img = utils.read_image(args.image) + # bboxes, masks, labels, scores = model.predict([img]) + masks, labels, scores = model.predict([img]) + # bbox = bboxes[0] + mask = masks[0] + label = labels[0] + score = scores[0] + + # chainercv.visualizations.vis_bbox( + # img, bbox, label, score, label_names=coco_bbox_label_names) + + import numpy as np + # flag = np.array([bb[3] - bb[1] < 300 for bb in bbox], dtype=np.bool) + flag = np.ones(len(mask), dtype=np.bool) + chainercv.visualizations.vis_instance_segmentation( + img, mask[flag], label[flag], score[flag], + label_names=coco_instance_segmentation_label_names) + plt.show() + + +if __name__ == '__main__': + main() diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py new file mode 100644 index 0000000000..cb68857a93 --- /dev/null +++ b/examples/mask_rcnn/train_multi.py @@ -0,0 +1,229 @@ +import argparse +import numpy as np + +import chainer +import chainer.links as L +from chainer.optimizer_hooks import WeightDecay +from chainer import serializers +from chainer import training +from chainer.training import extensions + +import chainermn + +from chainercv.chainer_experimental.datasets.sliceable import TransformDataset +from chainercv.chainer_experimental.training.extensions import make_shift +from chainercv.datasets import coco_instance_segmentation_label_names +from chainercv.datasets import COCOInstanceSegmentationDataset +from chainercv.links import MaskRCNNFPNResNet101 +from chainercv.links import MaskRCNNFPNResNet50 +from chainercv import transforms + +from chainercv.links.model.fpn import head_loss_post +from chainercv.links.model.fpn import head_loss_pre +from chainercv.links.model.fpn import rpn_loss +from chainercv.links.model.mask_rcnn import mask_loss_post +from chainercv.links.model.mask_rcnn import mask_loss_pre + +# https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator +try: + import cv2 + cv2.setNumThreads(0) +except ImportError: + pass + + +class TrainChain(chainer.Chain): + + def __init__(self, model): + super().__init__() + with self.init_scope(): + self.model = model + + def __call__(self, imgs, masks, labels, bboxes): + x, masks, scales = self.model.prepare(imgs, masks) + B = len(x) + bboxes = [self.xp.array(bbox) * scale + for bbox, scale in zip(bboxes, scales)] + labels = [self.xp.array(label) for label in labels] + + with chainer.using_config('train', False): + hs = self.model.extractor(x) + + rpn_locs, rpn_confs = self.model.rpn(hs) + anchors = self.model.rpn.anchors(h.shape[2:] for h in hs) + rpn_loc_loss, rpn_conf_loss = rpn_loss( + rpn_locs, rpn_confs, anchors, + [(int(img.shape[1] * scale), int(img.shape[2] * scale)) + for img, scale in zip(imgs, scales)], + bboxes) + + rois, roi_indices = self.model.rpn.decode( + rpn_locs, rpn_confs, anchors, x.shape) + rois = self.xp.vstack([rois] + bboxes) + roi_indices = self.xp.hstack( + [roi_indices] + + [self.xp.array((i,) * len(bbox)) + for i, bbox in enumerate(bboxes)]) + rois, roi_indices = self.model.head.distribute(rois, roi_indices) + rois, roi_indices, head_gt_locs, head_gt_labels = head_loss_pre( + rois, roi_indices, self.model.head.std, bboxes, labels) + head_locs, head_confs = self.model.head(hs, rois, roi_indices) + head_loc_loss, head_conf_loss = head_loss_post( + head_locs, head_confs, + roi_indices, head_gt_locs, head_gt_labels, B) + + mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre( + rois, roi_indices, masks, head_gt_labels, + self.model.mask_head.mask_size) + segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) + mask_loss = mask_loss_post( + segms, mask_roi_indices, gt_segms, gt_mask_labels, B) + + loss = (rpn_loc_loss + rpn_conf_loss + + head_loc_loss + head_conf_loss + mask_loss) + chainer.reporter.report({ + 'loss': loss, + 'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss, + 'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss, + 'loss/mask': mask_loss}, + self) + + return loss + + +def transform(in_data): + img, mask, label, bbox = in_data + + img, params = transforms.random_flip( + img, x_random=True, return_param=True) + mask = transforms.flip(mask, x_flip=params['x_flip']) + bbox = transforms.flip_bbox( + bbox, img.shape[1:], x_flip=params['x_flip']) + + return img, mask, label, bbox + + +def converter(batch, device=None): + # do not send data to gpu (device is ignored) + return tuple(list(v) for v in zip(*batch)) + + +def copyparams(dst, src): + if isinstance(dst, chainer.Chain): + for link in dst.children(): + copyparams(link, src[link.name]) + elif isinstance(dst, chainer.ChainList): + for i, link in enumerate(dst): + copyparams(link, src[i]) + else: + dst.copyparams(src) + if isinstance(dst, L.BatchNormalization): + dst.avg_mean = src.avg_mean + dst.avg_var = src.avg_var + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + '--model', choices=('resnet50', 'resnet101')) + parser.add_argument('--batchsize', type=int, default=16) + parser.add_argument('--out', default='result') + parser.add_argument('--resume') + parser.add_argument('--communicator', default='hierarchical') + args = parser.parse_args() + + comm = chainermn.create_communicator(args.communicator) + device = comm.intra_rank + + if args.model == 'resnet50': + model = MaskRCNNFPNResNet50( + n_fg_class=len(coco_instance_segmentation_label_names), + pretrained_model='imagenet') + elif args.model == 'resnet101': + model = MaskRCNNFPNResNet101( + n_fg_class=len(coco_instance_segmentation_label_names), + pretrained_model='imagenet') + + model.use_preset('evaluate') + train_chain = TrainChain(model) + chainer.cuda.get_device_from_id(device).use() + train_chain.to_gpu() + + train = TransformDataset( + COCOInstanceSegmentationDataset( + split='train', return_bbox=True), + ('img', 'mask', 'label', 'bbox'), transform) + + if comm.rank == 0: + indices = np.arange(len(train)) + else: + indices = None + indices = chainermn.scatter_dataset(indices, comm, shuffle=True) + train = train.slice[indices] + + train_iter = chainer.iterators.MultithreadIterator( + train, args.batchsize // comm.size) + + optimizer = chainermn.create_multi_node_optimizer( + chainer.optimizers.MomentumSGD(), comm) + optimizer.setup(train_chain) + optimizer.add_hook(WeightDecay(0.0001)) + + model.extractor.base.conv1.disable_update() + model.extractor.base.res2.disable_update() + for link in model.links(): + if isinstance(link, L.BatchNormalization): + link.disable_update() + + updater = training.updaters.StandardUpdater( + train_iter, optimizer, converter=converter, device=device) + trainer = training.Trainer( + updater, (90000 * 16 / args.batchsize, 'iteration'), args.out) + + @make_shift('lr') + def lr_schedule(trainer): + base_lr = 0.02 * args.batchsize / 16 + warm_up_duration = 500 + warm_up_rate = 1 / 3 + + iteration = trainer.updater.iteration + if iteration < warm_up_duration: + rate = warm_up_rate \ + + (1 - warm_up_rate) * iteration / warm_up_duration + else: + rate = 1 + for step in args.step: + if iteration >= step * 16 / args.batchsize: + rate *= 0.1 + + return base_lr * rate + + trainer.extend(lr_schedule) + + if comm.rank == 0: + log_interval = 10, 'iteration' + trainer.extend(extensions.LogReport(trigger=log_interval)) + trainer.extend(extensions.observe_lr(), trigger=log_interval) + trainer.extend(extensions.PrintReport( + ['epoch', 'iteration', 'lr', 'main/loss', + 'main/loss/rpn/loc', 'main/loss/rpn/conf', + 'main/loss/head/loc', 'main/loss/head/conf', + 'main/loss/mask' + ]), + trigger=log_interval) + trainer.extend(extensions.ProgressBar(update_interval=10)) + + trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) + trainer.extend( + extensions.snapshot_object( + model, 'model_iter_{.updater.iteration}'), + trigger=(90000 * 16 / args.batchsize, 'iteration')) + + if args.resume: + serializers.load_npz(args.resume, trainer, strict=False) + + trainer.run() + + +if __name__ == '__main__': + main() From bcd68fac657d25a835d8de823c9f26ace124843b Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 12 Feb 2019 22:04:35 +0900 Subject: [PATCH 002/100] misc --- chainercv/links/model/mask_rcnn/mask_head.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py index 2b2b5c4cbb..fd2d35e763 100644 --- a/chainercv/links/model/mask_rcnn/mask_head.py +++ b/chainercv/links/model/mask_rcnn/mask_head.py @@ -147,7 +147,7 @@ def expand_boxes(bbox, scale): def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels, - mask_size=28): + mask_size): xp = cuda.get_array_module(*rois) n_level = len(rois) @@ -174,9 +174,8 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels, iou = bbox_iou(mask_roi, gt_bbox) gt_index = iou.argmax(axis=1) gt_segms[index] = segm_wrt_bbox( - gt_mask[gt_index], mask_roi, (M, M)) + gt_mask[gt_index], mask_roi, (mask_size, mask_size)) - # indices = [(mask_roi_levels == l).nonzero() for l in range(n_level)] flag_masks = [mask_roi_levels == l for l in range(n_level)] mask_rois = [mask_rois[m] for m in flag_masks] mask_roi_indices = [mask_roi_indices[m] for m in flag_masks] From f1d4e46835de8aea8bc56c73e2b7dcec30477c85 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 13 Feb 2019 15:13:39 +0900 Subject: [PATCH 003/100] doc --- docs/source/reference/links.rst | 11 ++++++ docs/source/reference/links/mask_rcnn.rst | 45 +++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 docs/source/reference/links/mask_rcnn.rst diff --git a/docs/source/reference/links.rst b/docs/source/reference/links.rst index 5c7426a97f..6dbf1a1e86 100644 --- a/docs/source/reference/links.rst +++ b/docs/source/reference/links.rst @@ -51,6 +51,17 @@ For more details, please read :func:`SegNetBasic.predict`. links/segnet +Instance Segmentation +~~~~~~~~~~~~~~~~~~~~~ + +Instance segmentation links share a common method :meth:`predict` to detect masks that cover objects in an image. +For more details, please read :func:`MaskRCNN.predict`. + +.. toctree:: + + links/mask_rcnn + + Classifiers ~~~~~~~~~~~ diff --git a/docs/source/reference/links/mask_rcnn.rst b/docs/source/reference/links/mask_rcnn.rst new file mode 100644 index 0000000000..4c0870e2e5 --- /dev/null +++ b/docs/source/reference/links/mask_rcnn.rst @@ -0,0 +1,45 @@ +Mask R-CNN +========== + +.. module:: chainercv.links.model.mask_rcnn + + +Instance Segmentation Links +--------------------------- + +MaskRCNNFPNResNet50 +~~~~~~~~~~~~~~~~~~~ +.. autoclass:: MaskRCNNFPNResNet50 + :members: + +MaskRCNNFPNResNet101 +~~~~~~~~~~~~~~~~~~~~ +.. autoclass:: MaskRCNNFPNResNet101 + :members: + + +Utility +------- + +MaskRCNN +~~~~~~~~ +.. autoclass:: MaskRCNN + :members: + +MaskHead +~~~~~~~~ +.. autoclass:: MaskHead + :members: + :special-members: __call__ + + +Train-only Utility +------------------ + +mask_loss_pre +~~~~~~~~~~~~~ +.. autofunction:: mask_loss_pre + +mask_loss_post +~~~~~~~~~~~~~~ +.. autofunction:: mask_loss_post From 1e10d3210f7fc59c3ca65b492b78c1521208f921 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 13 Feb 2019 16:06:40 +0900 Subject: [PATCH 004/100] remove unnecessary print --- .../testing/assertions/assert_is_instance_segmentation_link.py | 1 - 1 file changed, 1 deletion(-) diff --git a/chainercv/utils/testing/assertions/assert_is_instance_segmentation_link.py b/chainercv/utils/testing/assertions/assert_is_instance_segmentation_link.py index 1faf7aaf7e..09f55c900c 100644 --- a/chainercv/utils/testing/assertions/assert_is_instance_segmentation_link.py +++ b/chainercv/utils/testing/assertions/assert_is_instance_segmentation_link.py @@ -21,7 +21,6 @@ def assert_is_instance_segmentation_link(link, n_fg_class): np.random.randint(0, 256, size=(3, 480, 320)).astype(np.float32)] result = link.predict(imgs) - print(result) assert len(result) == 3, \ 'Link must return three elements: masks, labels and scores.' masks, labels, scores = result From e468545cb34df7c38cd7b6b6b10d2f28710f88d1 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 13 Feb 2019 16:06:29 +0900 Subject: [PATCH 005/100] misc --- chainercv/links/model/mask_rcnn/mask_head.py | 7 +- chainercv/links/model/mask_rcnn/mask_rcnn.py | 146 ++++++++--- examples/mask_rcnn/train_multi.py | 41 ++- .../mask_rcnn_tests/test_mask_head.py | 240 ++++++++++++++++++ .../mask_rcnn_tests/test_mask_rcnn.py | 132 ++++++++++ .../test_mask_rcnn_fpn_resnet.py | 68 +++++ 6 files changed, 590 insertions(+), 44 deletions(-) create mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py create mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py create mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py index fd2d35e763..0e98cc50fc 100644 --- a/chainercv/links/model/mask_rcnn/mask_head.py +++ b/chainercv/links/model/mask_rcnn/mask_head.py @@ -18,12 +18,12 @@ class MaskHead(chainer.Chain): + _canonical_level = 2 _canonical_scale = 224 _roi_size = 14 _roi_sample_ratio = 2 mask_size = _roi_size * 2 - # Remember, initialization is MSRAFill def __init__(self, n_class, scales): super(MaskHead, self).__init__() @@ -67,13 +67,12 @@ def __call__(self, hs, rois, roi_indices): return self.seg(h) def distribute(self, rois, roi_indices): - size = self.xp.sqrt( - self.xp.prod(rois[:, 2:] + 1 - rois[:, :2], axis=1)) + size = self.xp.sqrt(self.xp.prod(rois[:, 2:] - rois[:, :2], axis=1)) level = self.xp.floor(self.xp.log2( size / self._canonical_scale + 1e-6)).astype(np.int32) # skip last level level = self.xp.clip( - level + len(self._scales) // 2, 0, len(self._scales) - 2) + level + self._canonical_level, 0, len(self._scales) - 2) masks = [level == l for l in range(len(self._scales))] rois = [rois[mask] for mask in masks] diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py index 94347b1cdd..0ddc65ce13 100644 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py @@ -12,6 +12,39 @@ class MaskRCNN(chainer.Chain): + """Base class of Mask R-CNN. + + This is a base class of Mask R-CNN [#]_. + + .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017 + + Args: + extractor (Link): A link that extracts feature maps. + This link must have :obj:`scales`, :obj:`mean` and + :meth:`__call__`. + rpn (Link): A link that has the same interface as + :class:`~chainercv.links.model.fpn.RPN`. + Please refer to the documentation found there. + head (Link): A link that has the same interface as + :class:`~chainercv.links.model.fpn.Head`. + Please refer to the documentation found there. + mask_head (Link): A link that has the same interface as + :class:`~chainercv.links.model.mask_rcnn.MaskRCNN`. + Please refer to the documentation found there. + + Parameters: + nms_thresh (float): The threshold value + for :func:`~chainercv.utils.non_maximum_suppression`. + The default value is :obj:`0.5`. + This value can be changed directly or by using :meth:`use_preset`. + score_thresh (float): The threshold value for confidence score. + If a bounding box whose confidence score is lower than this value, + the bounding box will be suppressed. + The default value is :obj:`0.7`. + This value can be changed directly or by using :meth:`use_preset`. + + """ + _min_size = 800 _max_size = 1333 _stride = 32 @@ -27,6 +60,23 @@ def __init__(self, extractor, rpn, head, mask_head): self.use_preset('visualize') def use_preset(self, preset): + """Use the given preset during prediction. + + This method changes values of :obj:`nms_thresh` and + :obj:`score_thresh`. These values are a threshold value + used for non maximum suppression and a threshold value + to discard low confidence proposals in :meth:`predict`, + respectively. + + If the attributes need to be changed to something + other than the values provided in the presets, please modify + them by directly accessing the public attributes. + + Args: + preset ({'visualize', 'evaluate'}): A string to determine the + preset to use. + """ + if preset == 'visualize': self.nms_thresh = 0.5 self.score_thresh = 0.7 @@ -47,8 +97,34 @@ def __call__(self, x): return hs, rois, roi_indices def predict(self, imgs): + """Segment object instances from images. + + This method predicts instance-aware object regions for each image. + + Args: + imgs (iterable of numpy.ndarray): Arrays holding images of shape + :math:`(B, C, H, W)`. All images are in CHW and RGB format + and the range of their value is :math:`[0, 255]`. + + Returns: + tuple of lists: + This method returns a tuple of three lists, + :obj:`(masks, labels, scores)`. + + * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \ + where :math:`R` is the number of masks in a image. \ + Each pixel holds value if it is inside the object inside or not. + * **labels** : A list of integer arrays of shape :math:`(R,)`. \ + Each value indicates the class of the masks. \ + Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ + number of the foreground classes. + * **scores** : A list of float arrays of shape :math:`(R,)`. \ + Each value indicates how confident the prediction is. + + """ + sizes = [img.shape[1:] for img in imgs] - x, scales = self.prepare(imgs) + x, scales, _ = self.prepare(imgs) with chainer.using_config('train', False), chainer.no_backprop_mode(): hs, rois, roi_indices = self(x) @@ -67,14 +143,15 @@ def predict(self, imgs): with chainer.using_config('train', False), chainer.no_backprop_mode(): segms = F.sigmoid( self.mask_head(hs, mask_rois, mask_roi_indices)).data - # Put the order of proposals back to the one used by bbox head - # from the ordering respective FPN levels. + # Put the order of proposals back to the one used by bbox head. segms = segms[order] - segms = _flat_to_list(segms, mask_roi_indices_before_reordering) - if len(segms) == 0: - segms = [ - self.xp.zeros((0, self.mask_head.mask_size, - self.mask_head.mask_size), dtype=np.float32)] + segms = _flat_to_list( + segms, mask_roi_indices_before_reordering, len(imgs)) + segms = [segm if segm is not None else + self.xp.zeros( + (0, self.mask_head.mask_size, self.mask_head.mask_size), + dtype=np.float32) + for segm in segms] masks = self.mask_head.decode( segms, @@ -87,9 +164,21 @@ def predict(self, imgs): return masks, labels, scores def prepare(self, imgs, masks=None): + """Preprocess images. + + Args: + imgs (iterable of numpy.ndarray): Arrays holding images. + All images are in CHW and RGB format + and the range of their value is :math:`[0, 255]`. + + Returns: + Two arrays: preprocessed images and \ + scales that were caluclated in prepocessing. + + """ scales = [] resized_imgs = [] - sizes = [] + resized_sizes = [] for img in imgs: _, H, W = img.shape scale = self._min_size / min(H, W) @@ -100,34 +189,19 @@ def prepare(self, imgs, masks=None): img = transforms.resize(img, (H, W)) img -= self.extractor.mean resized_imgs.append(img) - sizes.append((H, W)) + resized_sizes.append((H, W)) pad_size = np.array( [im.shape[1:] for im in resized_imgs]).max(axis=0) pad_size = ( np.ceil(pad_size / self._stride) * self._stride).astype(int) - pad_imgs = np.zeros( + x = np.zeros( (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32) for i, im in enumerate(resized_imgs): - _, H, W = img.shape - pad_imgs[i, :, :H, :W] = im - pad_imgs = self.xp.array(pad_imgs) - - if masks is None: - return pad_imgs, scales - - resized_masks = [] - for size, mask in zip(sizes, masks): - resized_masks.append(transforms.resize( - mask.astype(np.float32), - size, interpolation=PIL.Image.NEAREST).astype(np.bool)) - pad_masks = [] - for mask in resized_masks: - n_class, H, W = mask.shape - pad_mask = self.xp.zeros( - (n_class, pad_size[0], pad_size[1]), dtype=np.bool) - pad_mask[:, :H, :W] = self.xp.array(mask) - pad_masks.append(pad_mask) - return pad_imgs, pad_masks, scales + _, H, W = im.shape + x[i, :, :H, :W] = im + x = self.xp.array(x) + + return x, scales, resized_sizes def _list_to_flat(array_list): @@ -140,8 +214,12 @@ def _list_to_flat(array_list): return flat, indices -def _flat_to_list(flat, indices): +def _flat_to_list(flat, indices, B): array_list = [] - for i in np.unique(chainer.backends.cuda.to_cpu(indices)): - array_list.append(flat[indices == i]) + for i in range(B): + array = flat[indices == i] + if len(array) > 0: + array_list.append(array) + else: + array_list.append(None) return array_list diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index cb68857a93..71957c86e0 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -1,5 +1,7 @@ import argparse +import multiprocessing import numpy as np +import PIL import chainer import chainer.links as L @@ -35,13 +37,29 @@ class TrainChain(chainer.Chain): def __init__(self, model): - super().__init__() + super(TrainChain, self).__init__() with self.init_scope(): self.model = model + def prepare_mask(self, masks, resized_sizes, pad_size): + resized_masks = [] + for size, mask in zip(resized_sizes, masks): + resized_masks.append(transforms.resize( + mask.astype(np.float32), + size, interpolation=PIL.Image.NEAREST).astype(np.bool)) + pad_masks = [] + for mask in resized_masks: + n_class, H, W = mask.shape + pad_mask = self.xp.zeros( + (n_class, pad_size[0], pad_size[1]), dtype=np.bool) + pad_mask[:, :H, :W] = self.xp.array(mask) + pad_masks.append(pad_mask) + return pad_masks + def __call__(self, imgs, masks, labels, bboxes): - x, masks, scales = self.model.prepare(imgs, masks) - B = len(x) + x, scales, resized_sizes = self.model.prepare(imgs, masks) + B, _, pad_H, pad_W = x.shape + masks = self.prepare_mask(masks, resized_sizes, (pad_H, pad_W)) bboxes = [self.xp.array(bbox) * scale for bbox, scale in zip(bboxes, scales)] labels = [self.xp.array(label) for label in labels] @@ -125,13 +143,23 @@ def copyparams(dst, src): def main(): parser = argparse.ArgumentParser() parser.add_argument( - '--model', choices=('resnet50', 'resnet101')) + '--model', choices=('resnet50', 'resnet101'), + default='resnet50') parser.add_argument('--batchsize', type=int, default=16) + parser.add_argument('--iteration', type=int, default=90000) + parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000]) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--communicator', default='hierarchical') args = parser.parse_args() + # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator + if hasattr(multiprocessing, 'set_start_method'): + multiprocessing.set_start_method('forkserver') + p = multiprocessing.Process() + p.start() + p.join() + comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank @@ -175,10 +203,11 @@ def main(): if isinstance(link, L.BatchNormalization): link.disable_update() + n_iteration = args.iteration * 16 / args.batchsize updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=converter, device=device) trainer = training.Trainer( - updater, (90000 * 16 / args.batchsize, 'iteration'), args.out) + updater, (n_iteration, 'iteration'), args.out) @make_shift('lr') def lr_schedule(trainer): @@ -217,7 +246,7 @@ def lr_schedule(trainer): trainer.extend( extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), - trigger=(90000 * 16 / args.batchsize, 'iteration')) + trigger=(n_iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer, strict=False) diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py new file mode 100644 index 0000000000..9c8760f388 --- /dev/null +++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py @@ -0,0 +1,240 @@ +from __future__ import division + +import numpy as np +import unittest + +import chainer +from chainer import testing +from chainer.testing import attr + +from chainercv.links.model.mask_rcnn import MaskHead +from chainercv.links.model.mask_rcnn import mask_loss_post +from chainercv.links.model.mask_rcnn import mask_loss_pre + + +def _random_array(xp, shape): + return xp.array( + np.random.uniform(-1, 1, size=shape), dtype=np.float32) + + +# @testing.parameterize( +# {'n_class': 1 + 1}, +# {'n_class': 5 + 1}, +# {'n_class': 20 + 1}, +# ) +# class TestMaskHead(unittest.TestCase): +# +# def setUp(self): +# self.link = MaskHead( +# n_class=self.n_class, scales=(1 / 2, 1 / 4, 1 / 8)) +# +# def _check_call(self): +# hs = [ +# chainer.Variable(_random_array(self.link.xp, (2, 64, 32, 32))), +# chainer.Variable(_random_array(self.link.xp, (2, 64, 16, 16))), +# chainer.Variable(_random_array(self.link.xp, (2, 64, 8, 8))), +# ] +# rois = [ +# self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32), +# self.link.xp.array( +# ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32), +# self.link.xp.array(((10, 4, 12, 10),), dtype=np.float32), +# ] +# roi_indices = [ +# self.link.xp.array((0,), dtype=np.int32), +# self.link.xp.array((1, 0), dtype=np.int32), +# self.link.xp.array((1,), dtype=np.int32), +# ] +# +# segs = self.link(hs, rois, roi_indices) +# +# self.assertIsInstance(segs, chainer.Variable) +# self.assertIsInstance(segs.array, self.link.xp.ndarray) +# self.assertEqual( +# segs.shape, +# (4, self.n_class, self.link.mask_size, self.link.mask_size)) +# +# def test_call_cpu(self): +# self._check_call() +# +# @attr.gpu +# def test_call_gpu(self): +# self.link.to_gpu() +# self._check_call() +# +# def _check_distribute(self): +# rois = self.link.xp.array(( +# (0, 0, 10, 10), +# (0, 1000, 0, 1000), +# (0, 0, 224, 224), +# (100, 100, 224, 224), +# ), dtype=np.float32) +# roi_indices = self.link.xp.array((0, 1, 0, 0), dtype=np.int32) +# n_roi = len(roi_indices) +# +# rois, roi_indices, order = self.link.distribute(rois, roi_indices) +# +# self.assertEqual(len(rois), 3) +# self.assertEqual(len(roi_indices), 3) +# for l in range(3): +# self.assertIsInstance(rois[l], self.link.xp.ndarray) +# self.assertIsInstance(roi_indices[l], self.link.xp.ndarray) +# +# self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0]) +# self.assertEqual(rois[l].shape[1:], (4,)) +# self.assertEqual(roi_indices[l].shape[1:], ()) +# +# self.assertEqual(sum(rois[l].shape[0] for l in range(3)), 4) +# +# self.assertEqual(len(order), n_roi) +# self.assertIsInstance(order, self.link.xp.ndarray) +# +# def test_distribute_cpu(self): +# self._check_distribute() +# +# @attr.gpu +# def test_distribute_gpu(self): +# self.link.to_gpu() +# self._check_distribute() +# +# def _check_decode(self): +# segms = [ +# _random_array( +# self.link.xp, +# (1, self.n_class, self.link.mask_size, self.link.mask_size)), +# _random_array( +# self.link.xp, +# (2, self.n_class, self.link.mask_size, self.link.mask_size)), +# _random_array( +# self.link.xp, +# (1, self.n_class, self.link.mask_size, self.link.mask_size)) +# ] +# bboxes = [ +# self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32), +# self.link.xp.array( +# ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32), +# self.link.xp.array(((10, 4, 12, 10),), dtype=np.float32), +# ] +# labels = [ +# self.link.xp.random.randint( +# 0, self.n_class - 1, size=(1,), dtype=np.int32), +# self.link.xp.random.randint( +# 0, self.n_class - 1, size=(2,), dtype=np.int32), +# self.link.xp.random.randint( +# 0, self.n_class - 1, size=(1,), dtype=np.int32), +# ] +# +# sizes = [(56, 56), (48, 48), (72, 72)] +# masks = self.link.decode( +# segms, bboxes, labels, sizes) +# +# self.assertEqual(len(masks), 3) +# for n in range(3): +# self.assertIsInstance(masks[n], self.link.xp.ndarray) +# +# self.assertEqual(masks[n].shape[0], labels[n].shape[0]) +# self.assertEqual(masks[n].shape[1:], sizes[n]) +# +# def test_decode_cpu(self): +# self._check_decode() +# +# @attr.gpu +# def test_decode_gpu(self): +# self.link.to_gpu() +# self._check_decode() +# +# +class TestMaskHeadLoss(unittest.TestCase): + + def _check_mask_loss_pre(self, xp): + n_class = 12 + mask_size = 28 + rois = [ + xp.array(((4, 1, 6, 3),), dtype=np.float32), + xp.array( + ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32), + xp.array(((10, 4, 12, 10),), dtype=np.float32), + ] + roi_indices = [ + xp.array((0,), dtype=np.int32), + xp.array((1, 0), dtype=np.int32), + xp.array((1,), dtype=np.int32), + ] + masks = [ + _random_array(xp, (n_class, mask_size, mask_size)), + _random_array(xp, (n_class, mask_size, mask_size)), + _random_array(xp, (n_class, mask_size, mask_size)), + ] + labels = [ + xp.array((10, 4), dtype=np.float32), + xp.array((1,), dtype=np.float32), + ] + rois, roi_indices, gt_segms, gt_mask_labels = mask_loss_pre( + rois, roi_indices, masks, labels, mask_size) + + self.assertEqual(len(rois), 3) + self.assertEqual(len(roi_indices), 3) + self.assertEqual(len(gt_segms), 3) + self.assertEqual(len(gt_mask_labels), 3) + # for l in range(3): + # self.assertIsInstance(rois[l], xp.ndarray) + # self.assertIsInstance(roi_indices[l], xp.ndarray) + # self.assertIsInstance(gt_locs[l], xp.ndarray) + # self.assertIsInstance(gt_labels[l], xp.ndarray) + + # self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0]) + # self.assertEqual(rois[l].shape[0], gt_locs[l].shape[0]) + # self.assertEqual(rois[l].shape[0], gt_labels[l].shape[0]) + # self.assertEqual(rois[l].shape[1:], (4,)) + # self.assertEqual(roi_indices[l].shape[1:], ()) + # self.assertEqual(gt_locs[l].shape[1:], (4,)) + # self.assertEqual(gt_labels[l].shape[1:], ()) + + def test_mask_loss_pre_cpu(self): + self._check_mask_loss_pre(np) + + @attr.gpu + def test_mask_loss_pre_gpu(self): + import cupy + self._check_mask_loss_pre(cupy) + + # def _check_head_loss_post(self, xp): + # locs = chainer.Variable(_random_array(xp, (20, 81, 4))) + # confs = chainer.Variable(_random_array(xp, (20, 81))) + # roi_indices = [ + # xp.random.randint(0, 2, size=5).astype(np.int32), + # xp.random.randint(0, 2, size=7).astype(np.int32), + # xp.random.randint(0, 2, size=8).astype(np.int32), + # ] + # gt_locs = [ + # _random_array(xp, (5, 4)), + # _random_array(xp, (7, 4)), + # _random_array(xp, (8, 4)), + # ] + # gt_labels = [ + # xp.random.randint(0, 80, size=5).astype(np.int32), + # xp.random.randint(0, 80, size=7).astype(np.int32), + # xp.random.randint(0, 80, size=8).astype(np.int32), + # ] + + # loc_loss, conf_loss = head_loss_post( + # locs, confs, roi_indices, gt_locs, gt_labels, 2) + + # self.assertIsInstance(loc_loss, chainer.Variable) + # self.assertIsInstance(loc_loss.array, xp.ndarray) + # self.assertEqual(loc_loss.shape, ()) + + # self.assertIsInstance(conf_loss, chainer.Variable) + # self.assertIsInstance(conf_loss.array, xp.ndarray) + # self.assertEqual(conf_loss.shape, ()) + + # def test_head_loss_post_cpu(self): + # self._check_head_loss_post(np) + + # @attr.gpu + # def test_head_loss_post_gpu(self): + # import cupy + # self._check_head_loss_post(cupy) + + +testing.run_module(__name__, __file__) diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py new file mode 100644 index 0000000000..637bab61c4 --- /dev/null +++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py @@ -0,0 +1,132 @@ +from __future__ import division + +import numpy as np +import unittest + +import chainer +from chainer import testing +from chainer.testing import attr + +from chainercv.links.model.fpn import Head +from chainercv.links.model.fpn import RPN +from chainercv.links.model.mask_rcnn import MaskRCNN +from chainercv.links.model.mask_rcnn import MaskHead +from chainercv.utils import assert_is_instance_segmentation_link + + +def _random_array(xp, shape): + return xp.array( + np.random.uniform(-1, 1, size=shape), dtype=np.float32) + + +class DummyExtractor(chainer.Link): + scales = (1 / 2, 1 / 4, 1 / 8) + mean = _random_array(np, (3, 1, 1)) + n_channel = 16 + + def __call__(self, x): + n, _, h, w = x.shape + return [chainer.Variable(_random_array( + self.xp, (n, self.n_channel, int(h * scale), int(w * scale)))) + for scale in self.scales] + + +class DummyMaskRCNN(MaskRCNN): + + def __init__(self, n_fg_class): + extractor = DummyExtractor() + n_class = n_fg_class + 1 + super(DummyMaskRCNN, self).__init__( + extractor=extractor, + rpn=RPN(extractor.scales), + head=Head(n_class, extractor.scales), + mask_head=MaskHead(n_class, extractor.scales) + ) + + +@testing.parameterize( + {'n_fg_class': 1}, + {'n_fg_class': 5}, + {'n_fg_class': 20}, +) +class TestMaskRCNN(unittest.TestCase): + + def setUp(self): + self.link = DummyMaskRCNN(n_fg_class=self.n_fg_class) + + def test_use_preset(self): + self.link.nms_thresh = 0 + self.link.score_thresh = 0 + + self.link.use_preset('visualize') + self.assertEqual(self.link.nms_thresh, 0.5) + self.assertEqual(self.link.score_thresh, 0.7) + + self.link.nms_thresh = 0 + self.link.score_thresh = 0 + + self.link.use_preset('evaluate') + self.assertEqual(self.link.nms_thresh, 0.5) + self.assertEqual(self.link.score_thresh, 0.05) + + with self.assertRaises(ValueError): + self.link.use_preset('unknown') + + def _check_call(self): + B = 2 + size = 32 + x = _random_array(self.link.xp, (B, 3, size, size)) + with chainer.using_config('train', False): + hs, rois, roi_indices = self.link(x) + + self.assertEqual(len(hs), len(self.link.extractor.scales)) + self.assertEqual(len(rois), len(self.link.extractor.scales)) + self.assertEqual(len(roi_indices), len(self.link.extractor.scales)) + for l, scale in enumerate(self.link.extractor.scales): + self.assertIsInstance(rois[l], self.link.xp.ndarray) + self.assertEqual(rois[l].shape[1:], (4,)) + + self.assertIsInstance(roi_indices[l], self.link.xp.ndarray) + self.assertEqual(roi_indices[l].shape[1:], ()) + + self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0]) + + self.assertIsInstance(hs[l], chainer.Variable) + self.assertIsInstance(hs[l].array, self.link.xp.ndarray) + feat_size = int(size * scale) + self.assertEqual( + hs[l].shape, + (B, self.link.extractor.n_channel, feat_size, feat_size)) + + def test_call_cpu(self): + self._check_call() + + @attr.gpu + def test_call_gpu(self): + self.link.to_gpu() + self._check_call() + + def test_call_train_mode(self): + x = _random_array(self.link.xp, (2, 3, 32, 32)) + with self.assertRaises(AssertionError): + with chainer.using_config('train', True): + self.link(x) + + def test_predict_cpu(self): + assert_is_instance_segmentation_link(self.link, self.n_fg_class) + + @attr.gpu + def test_predict_gpu(self): + self.link.to_gpu() + assert_is_instance_segmentation_link(self.link, self.n_fg_class) + + def test_prepare(self): + imgs = [ + np.random.randint(0, 255, size=(3, 480, 640)).astype(np.float32), + np.random.randint(0, 255, size=(3, 320, 320)).astype(np.float32), + ] + x, _, _ = self.link.prepare(imgs) + self.assertEqual(x.shape, (2, 3, 800, 1088)) + + +testing.run_module(__name__, __file__) diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py new file mode 100644 index 0000000000..b7cedc364d --- /dev/null +++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py @@ -0,0 +1,68 @@ +import numpy as np +import unittest + +import chainer +from chainer import testing +from chainer.testing import attr + +from chainercv.links import MaskRCNNFPNResNet101 +from chainercv.links import MaskRCNNFPNResNet50 + + +@testing.parameterize(*testing.product({ + 'model': [MaskRCNNFPNResNet50, MaskRCNNFPNResNet101], + 'n_fg_class': [1, 5, 20], +})) +class TestFasterRCNNFPNResNet(unittest.TestCase): + + def setUp(self): + self.link = self.model(n_fg_class=self.n_fg_class) + + def _check_call(self): + imgs = [ + np.random.uniform(-1, 1, size=(3, 48, 48)).astype(np.float32), + np.random.uniform(-1, 1, size=(3, 32, 64)).astype(np.float32), + ] + x, _, _ = self.link.prepare(imgs) + with chainer.using_config('train', False): + self.link(self.link.xp.array(x)) + + @attr.slow + def test_call_cpu(self): + self._check_call() + + @attr.gpu + @attr.slow + def test_call_gpu(self): + self.link.to_gpu() + self._check_call() + + +@testing.parameterize(*testing.product({ + 'model': [MaskRCNNFPNResNet50, MaskRCNNFPNResNet101], + 'n_fg_class': [None, 10, 80], + # 'pretrained_model': ['coco', 'imagenet'], + 'pretrained_model': ['imagenet'], +})) +class TestFasterRCNNFPNResNetPretrained(unittest.TestCase): + + @attr.slow + def test_pretrained(self): + kwargs = { + 'n_fg_class': self.n_fg_class, + 'pretrained_model': self.pretrained_model, + } + + if self.pretrained_model == 'coco': + valid = self.n_fg_class in {None, 80} + elif self.pretrained_model == 'imagenet': + valid = self.n_fg_class is not None + + if valid: + self.model(**kwargs) + else: + with self.assertRaises(ValueError): + self.model(**kwargs) + + +testing.run_module(__name__, __file__) From 75fb3ffdc4cb9371214acbd29db38d98071dfe82 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 13 Feb 2019 22:41:00 +0900 Subject: [PATCH 006/100] misc --- chainercv/links/model/mask_rcnn/mask_head.py | 132 ++++++- chainercv/links/model/mask_rcnn/mask_rcnn.py | 20 +- .../model/mask_rcnn/mask_rcnn_fpn_resnet.py | 65 ++++ examples/mask_rcnn/demo.py | 21 +- examples/mask_rcnn/train_multi.py | 23 +- examples_tests/mask_rcnn_tests/test_demo.sh | 8 + .../mask_rcnn_tests/test_train_multi.sh | 4 + .../mask_rcnn_tests/test_mask_head.py | 356 +++++++++--------- 8 files changed, 391 insertions(+), 238 deletions(-) create mode 100644 examples_tests/mask_rcnn_tests/test_demo.sh create mode 100644 examples_tests/mask_rcnn_tests/test_train_multi.sh diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py index 0e98cc50fc..7be88c98cb 100644 --- a/chainercv/links/model/mask_rcnn/mask_head.py +++ b/chainercv/links/model/mask_rcnn/mask_head.py @@ -18,6 +18,14 @@ class MaskHead(chainer.Chain): + """Mask Head network of Mask R-CNN. + + Args: + n_class (int): The number of classes including background. + scales (tuple of floats): The scales of feature maps. + + """ + _canonical_level = 2 _canonical_scale = 224 _roi_size = 14 @@ -67,6 +75,30 @@ def __call__(self, hs, rois, roi_indices): return self.seg(h) def distribute(self, rois, roi_indices): + """Assigns feature levels to Rois based on their size. + + Args: + rois (array): An array of shape :math:`(R, 4)`, \ + where :math:`R` is the total number of RoIs in the given batch. + roi_indices (array): An array of shape :math:`(R,)`. + + Returns: + two lists and one array: + :obj:`out_rois`, :obj:`out_roi_indices` and :obj:`order`. + + * **out_rois**: A list of arrays of shape :math:`(R_l, 4)`, \ + where :math:`R_l` is the number of RoIs in the :math:`l`-th \ + feature map. + * **out_roi_indices** : A list of arrays of shape :math:`(R_l,)`. + * **order**: A correspondence between the output and the input. \ + The relationship below is satisfied. + + .. code:: python + + xp.concatenate(out_rois, axis=0)[order[i]] == rois[i] + + """ + size = self.xp.sqrt(self.xp.prod(rois[:, 2:] - rois[:, :2], axis=1)) level = self.xp.floor(self.xp.log2( size / self._canonical_scale + 1e-6)).astype(np.int32) @@ -75,18 +107,39 @@ def distribute(self, rois, roi_indices): level + self._canonical_level, 0, len(self._scales) - 2) masks = [level == l for l in range(len(self._scales))] - rois = [rois[mask] for mask in masks] - roi_indices = [roi_indices[mask] for mask in masks] + out_rois = [rois[mask] for mask in masks] + out_roi_indices = [roi_indices[mask] for mask in masks] order = self.xp.argsort( self.xp.concatenate([self.xp.where(mask)[0] for mask in masks])) - return rois, roi_indices, order + return out_rois, out_roi_indices, order def decode(self, segms, bboxes, labels, sizes): - # CPU is used because cv2.resize only accepts numpy arrays. - segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms] - bboxes = [chainer.backends.cuda.to_cpu(bbox) for bbox in bboxes] - labels = [chainer.backends.cuda.to_cpu(label) for label in labels] - + """Decodes back to masks. + + Args: + segms (iterable of arrays): An iterable of arrays of + shape :math:`(R_n, n\_class, M, M)`. + bboxes (iterable of arrays): An iterable of arrays of + shape :math:`(R_n, 4)`. + labels (iterable of arrays): An iterable of arrays of + shape :math:`(R_n,)`. + sizes (list of tuples of two ints): A list of + :math:`(H_n, W_n)`, where :math:`H_n` and :math:`W_n` + are height and width of the :math:`n`-th image. + + Returns: + list of arrays: + This list contains instance segmentation for each image + in the batch. + More precisely, this is a list of boolean arrays of shape + :math:`(R'_n, H_n, W_n)`, where :math:`R'_n` is the number of + bounding boxes in the :math:`n`-th image. + """ + + xp = chainer.backends.cuda.get_array_module(*segms) + if xp != np: + raise ValueError( + 'MaskHead.decode only supports numpy inputs for now.') masks = [] # To work around an issue with cv2.resize (it seems to automatically # pad with repeated border values), we manually zero-pad the masks by 1 @@ -101,7 +154,7 @@ def decode(self, segms, bboxes, labels, sizes): img_H, img_W = size mask = np.zeros((len(bbox), img_H, img_W), dtype=np.bool) - bbox = expand_boxes(bbox, cv2_expand_scale) + bbox = _expand_boxes(bbox, cv2_expand_scale) for i, (bb, sgm, lbl) in enumerate(zip(bbox, segm, label)): bb = bb.astype(np.int32) padded_mask[1:-1, 1:-1] = sgm[lbl + 1] @@ -124,7 +177,7 @@ def decode(self, segms, bboxes, labels, sizes): return masks -def expand_boxes(bbox, scale): +def _expand_boxes(bbox, scale): """Expand an array of boxes by a given scale.""" xp = chainer.backends.cuda.get_array_module(bbox) @@ -147,6 +200,42 @@ def expand_boxes(bbox, scale): def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels, mask_size): + """Loss function for Mask Head (pre). + + This function processes RoIs for :func:`mask_loss_post` by + selecting RoIs for mask loss calculation and + preparing ground truth network output. + + Args: + rois (iterable of arrays): An iterable of arrays of + shape :math:`(R_l, 4)`, where :math:`R_l` is the number + of RoIs in the :math:`l`-th feature map. + roi_indices (iterable of arrays): An iterable of arrays of + shape :math:`(R_l,)`. + gt_masks (iterable of arrays): An iterable of arrays whose shape is + :math:`(R_n, H, W)`, where :math:`R_n` is the number of + ground truth objects. + gt_head_labels (iterable of arrays): An iterable of arrays of + shape :math:`(R_l,)`. This is a collection of ground-truth + labels assigned to :obj:`rois` during bounding box localization + stage. The range of value is :math:`(0, n\_class - 1)`. + mask_size (int): Size of the ground truth network output. + + Returns: + tuple of four lists: + :obj:`mask_rois`, :obj:`mask_roi_indices`, + :obj:`gt_segms`, and :obj:`gt_mask_labels`. + + * **rois**: A list of arrays of shape :math:`(R'_l, 4)`, \ + where :math:`R'_l` is the number of RoIs in the :math:`l`-th \ + feature map. + * **roi_indices**: A list of arrays of shape :math:`(R'_l,)`. + * **gt_segms**: A list of arrays of shape :math:`(R'_l, M, M). \ + :math:`M` is the argument :obj:`mask_size`. + * **gt_mask_labels**: A list of arrays of shape :math:`(R'_l,)` \ + indicating the classes of ground truth. + """ + xp = cuda.get_array_module(*rois) n_level = len(rois) @@ -172,7 +261,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels, mask_roi = mask_rois[index] iou = bbox_iou(mask_roi, gt_bbox) gt_index = iou.argmax(axis=1) - gt_segms[index] = segm_wrt_bbox( + gt_segms[index] = _segm_wrt_bbox( gt_mask[gt_index], mask_roi, (mask_size, mask_size)) flag_masks = [mask_roi_levels == l for l in range(n_level)] @@ -185,8 +274,23 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels, def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels, batchsize): - # Just compute loss for the foreground class - # divide by the batchsize + """Loss function for Head (post). + + Args: + segms (array): An array whose shape is :math:`(R, n\_class, M, M)`, + where :math:`R` is the total number of RoIs in the given batch. + mask_roi_indices (array): A list of arrays returned by + :func:`mask_loss_pre`. + gt_segms (list of arrays): A list of arrays returned by + :func:`mask_loss_pre`. + gt_mask_labels (list of arrays): A list of arrays returned by + :func:`mask_loss_pre`. + batchsize (int): The size of batch. + + Returns: + chainer.Variable: + Mask loss. + """ xp = cuda.get_array_module(segms.array) mask_roi_indices = xp.hstack(mask_roi_indices).astype(np.int32) @@ -206,7 +310,7 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels, return mask_loss -def segm_wrt_bbox(mask, bbox, size): +def _segm_wrt_bbox(mask, bbox, size): xp = chainer.backends.cuda.get_array_module(mask) bbox = bbox.astype(np.int32) diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py index 0ddc65ce13..876ce06060 100644 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py @@ -1,7 +1,6 @@ from __future__ import division import numpy as np -import PIL import chainer from chainer.backends import cuda @@ -133,7 +132,6 @@ def predict(self, imgs): rois, roi_indices, head_locs, head_confs, scales, sizes, self.nms_thresh, self.score_thresh) - # Rescale bbox to the scaled resolution rescaled_bboxes = [bbox * scale for scale, bbox in zip(scales, bboxes)] # Change bboxes to RoI and RoI indices format mask_rois_before_reordering, mask_roi_indices_before_reordering =\ @@ -153,13 +151,12 @@ def predict(self, imgs): dtype=np.float32) for segm in segms] - masks = self.mask_head.decode( - segms, - [bbox / scale for bbox, scale in zip(rescaled_bboxes, scales)], - labels, sizes) - - masks = [cuda.to_cpu(mask) for mask in masks] - labels = [cuda.to_cpu(label) for label in labels] + segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms] + bboxes = [chainer.backends.cuda.to_cpu(bbox / scale) + for bbox, scale in zip(rescaled_bboxes, scales)] + labels = [chainer.backends.cuda.to_cpu(label) for label in labels] + # Currently MaskHead only supports numpy inputs + masks = self.mask_head.decode(segms, bboxes, labels, sizes) scores = [cuda.to_cpu(score) for score in scores] return masks, labels, scores @@ -172,8 +169,9 @@ def prepare(self, imgs, masks=None): and the range of their value is :math:`[0, 255]`. Returns: - Two arrays: preprocessed images and \ - scales that were caluclated in prepocessing. + Three arrays: preprocessed images, \ + scales that were caluclated in prepocessing and + the size of the images after resizing. """ scales = [] diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py index 2e1b132d42..d18f92f628 100644 --- a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py +++ b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py @@ -17,6 +17,11 @@ class MaskRCNNFPNResNet(MaskRCNN): + """Base class for Mask R-CNN with ResNet backbone. + + A subclass of this class should have :obj:`_base` and :obj:`_models`. + """ + def __init__(self, n_fg_class=None, pretrained_model=None): param, path = utils.prepare_pretrained_model( {'n_fg_class': n_fg_class}, pretrained_model, self._models) @@ -46,6 +51,36 @@ def __init__(self, n_fg_class=None, pretrained_model=None): class MaskRCNNFPNResNet50(MaskRCNNFPNResNet): + """Mask R-CNN with ResNet-50. + + This is a model of Mask R-CNN [#]_. + This model uses :class:`~chainercv.links.ResNet50` as + its base feature extractor. + + .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017 + + Args: + n_fg_class (int): The number of classes excluding the background. + pretrained_model (string): The weight file to be loaded. + This can take :obj:`'coco'`, `filepath` or :obj:`None`. + The default value is :obj:`None`. + + * :obj:`'coco'`: Load weights trained on train split of \ + MS COCO 2017. \ + The weight file is downloaded and cached automatically. \ + :obj:`n_fg_class` must be :obj:`80` or :obj:`None`. + * :obj:`'imagenet'`: Load weights of ResNet-50 trained on \ + ImageNet. \ + The weight file is downloaded and cached automatically. \ + This option initializes weights partially and the rests are \ + initialized randomly. In this case, :obj:`n_fg_class` \ + can be set to any number. + * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \ + must be specified properly. + * :obj:`None`: Do not load weights. + + """ + _base = ResNet50 _models = { 'coco': { @@ -58,6 +93,36 @@ class MaskRCNNFPNResNet50(MaskRCNNFPNResNet): class MaskRCNNFPNResNet101(MaskRCNNFPNResNet): + """Mask R-CNN with ResNet-101. + + This is a model of Mask R-CNN [#]_. + This model uses :class:`~chainercv.links.ResNet101` as + its base feature extractor. + + .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017 + + Args: + n_fg_class (int): The number of classes excluding the background. + pretrained_model (string): The weight file to be loaded. + This can take :obj:`'coco'`, `filepath` or :obj:`None`. + The default value is :obj:`None`. + + * :obj:`'coco'`: Load weights trained on train split of \ + MS COCO 2017. \ + The weight file is downloaded and cached automatically. \ + :obj:`n_fg_class` must be :obj:`80` or :obj:`None`. + * :obj:`'imagenet'`: Load weights of ResNet-101 trained on \ + ImageNet. \ + The weight file is downloaded and cached automatically. \ + This option initializes weights partially and the rests are \ + initialized randomly. In this case, :obj:`n_fg_class` \ + can be set to any number. + * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \ + must be specified properly. + * :obj:`None`: Do not load weights. + + """ + _base = ResNet101 _models = { 'coco': { diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py index aa4b7adbe4..ef16dbdac0 100644 --- a/examples/mask_rcnn/demo.py +++ b/examples/mask_rcnn/demo.py @@ -14,18 +14,22 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) - parser.add_argument('--model', choices=('resnet50', 'resnet101')) + parser.add_argument( + '--model', + choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), + default='mask_rcnn_fpn_resnet50' + ) group = parser.add_mutually_exclusive_group() group.add_argument('--pretrained-model') group.add_argument('--snapshot') parser.add_argument('image') args = parser.parse_args() - if args.model == 'resnet50': + if args.model == 'mask_rcnn_fpn_resnet50': model = MaskRCNNFPNResNet50( n_fg_class=len(coco_instance_segmentation_label_names), pretrained_model=args.pretrained_model) - elif args.model == 'resnet101': + elif args.model == 'mask_rcnn_fpn_resnet101': model = MaskRCNNFPNResNet101( n_fg_class=len(coco_instance_segmentation_label_names), pretrained_model=args.pretrained_model) @@ -35,21 +39,12 @@ def main(): model.to_gpu() img = utils.read_image(args.image) - # bboxes, masks, labels, scores = model.predict([img]) masks, labels, scores = model.predict([img]) - # bbox = bboxes[0] mask = masks[0] label = labels[0] score = scores[0] - - # chainercv.visualizations.vis_bbox( - # img, bbox, label, score, label_names=coco_bbox_label_names) - - import numpy as np - # flag = np.array([bb[3] - bb[1] < 300 for bb in bbox], dtype=np.bool) - flag = np.ones(len(mask), dtype=np.bool) chainercv.visualizations.vis_instance_segmentation( - img, mask[flag], label[flag], score[flag], + img, mask, label, score, label_names=coco_instance_segmentation_label_names) plt.show() diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index 71957c86e0..b7ba734910 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -126,25 +126,12 @@ def converter(batch, device=None): return tuple(list(v) for v in zip(*batch)) -def copyparams(dst, src): - if isinstance(dst, chainer.Chain): - for link in dst.children(): - copyparams(link, src[link.name]) - elif isinstance(dst, chainer.ChainList): - for i, link in enumerate(dst): - copyparams(link, src[i]) - else: - dst.copyparams(src) - if isinstance(dst, L.BatchNormalization): - dst.avg_mean = src.avg_mean - dst.avg_var = src.avg_var - - def main(): parser = argparse.ArgumentParser() parser.add_argument( - '--model', choices=('resnet50', 'resnet101'), - default='resnet50') + '--model', + choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), + default='mask_rcnn_fpn_resnet50') parser.add_argument('--batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=90000) parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000]) @@ -163,11 +150,11 @@ def main(): comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank - if args.model == 'resnet50': + if args.model == 'mask_rcnn_fpn_resnet50': model = MaskRCNNFPNResNet50( n_fg_class=len(coco_instance_segmentation_label_names), pretrained_model='imagenet') - elif args.model == 'resnet101': + elif args.model == 'mask_rcnn_fpn_resnet101': model = MaskRCNNFPNResNet101( n_fg_class=len(coco_instance_segmentation_label_names), pretrained_model='imagenet') diff --git a/examples_tests/mask_rcnn_tests/test_demo.sh b/examples_tests/mask_rcnn_tests/test_demo.sh new file mode 100644 index 0000000000..344ae45c19 --- /dev/null +++ b/examples_tests/mask_rcnn_tests/test_demo.sh @@ -0,0 +1,8 @@ +cd examples/mask_rcnn +curl -L https://cloud.githubusercontent.com/assets/2062128/26187667/9cb236da-3bd5-11e7-8bcf-7dbd4302e2dc.jpg \ + -o sample.jpg + +$PYTHON demo.py --model mask_rcnn_fpn_resnet50 sample.jpg +$PYTHON demo.py --model mask_rcnn_fpn_resnet50 --gpu 0 sample.jpg +$PYTHON demo.py --model mask_rcnn_fpn_resnet101 sample.jpg +$PYTHON demo.py --model mask_rcnn_fpn_resnet101 --gpu 0 sample.jpg diff --git a/examples_tests/mask_rcnn_tests/test_train_multi.sh b/examples_tests/mask_rcnn_tests/test_train_multi.sh new file mode 100644 index 0000000000..5f5227d2f7 --- /dev/null +++ b/examples_tests/mask_rcnn_tests/test_train_multi.sh @@ -0,0 +1,4 @@ +cd examples/mask_rcnn + +$MPIEXEC $PYTHON train_multi.py --model mask_rcnn_fpn_resnet50 --batchsize 4 --iteration 9 --step 6 8 +$MPIEXEC $PYTHON train_multi.py --model mask_rcnn_fpn_resnet101 --batchsize 4 --iteration 9 --step 6 8 diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py index 9c8760f388..d1832d1b8b 100644 --- a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py +++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py @@ -17,133 +17,128 @@ def _random_array(xp, shape): np.random.uniform(-1, 1, size=shape), dtype=np.float32) -# @testing.parameterize( -# {'n_class': 1 + 1}, -# {'n_class': 5 + 1}, -# {'n_class': 20 + 1}, -# ) -# class TestMaskHead(unittest.TestCase): -# -# def setUp(self): -# self.link = MaskHead( -# n_class=self.n_class, scales=(1 / 2, 1 / 4, 1 / 8)) -# -# def _check_call(self): -# hs = [ -# chainer.Variable(_random_array(self.link.xp, (2, 64, 32, 32))), -# chainer.Variable(_random_array(self.link.xp, (2, 64, 16, 16))), -# chainer.Variable(_random_array(self.link.xp, (2, 64, 8, 8))), -# ] -# rois = [ -# self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32), -# self.link.xp.array( -# ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32), -# self.link.xp.array(((10, 4, 12, 10),), dtype=np.float32), -# ] -# roi_indices = [ -# self.link.xp.array((0,), dtype=np.int32), -# self.link.xp.array((1, 0), dtype=np.int32), -# self.link.xp.array((1,), dtype=np.int32), -# ] -# -# segs = self.link(hs, rois, roi_indices) -# -# self.assertIsInstance(segs, chainer.Variable) -# self.assertIsInstance(segs.array, self.link.xp.ndarray) -# self.assertEqual( -# segs.shape, -# (4, self.n_class, self.link.mask_size, self.link.mask_size)) -# -# def test_call_cpu(self): -# self._check_call() -# -# @attr.gpu -# def test_call_gpu(self): -# self.link.to_gpu() -# self._check_call() -# -# def _check_distribute(self): -# rois = self.link.xp.array(( -# (0, 0, 10, 10), -# (0, 1000, 0, 1000), -# (0, 0, 224, 224), -# (100, 100, 224, 224), -# ), dtype=np.float32) -# roi_indices = self.link.xp.array((0, 1, 0, 0), dtype=np.int32) -# n_roi = len(roi_indices) -# -# rois, roi_indices, order = self.link.distribute(rois, roi_indices) -# -# self.assertEqual(len(rois), 3) -# self.assertEqual(len(roi_indices), 3) -# for l in range(3): -# self.assertIsInstance(rois[l], self.link.xp.ndarray) -# self.assertIsInstance(roi_indices[l], self.link.xp.ndarray) -# -# self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0]) -# self.assertEqual(rois[l].shape[1:], (4,)) -# self.assertEqual(roi_indices[l].shape[1:], ()) -# -# self.assertEqual(sum(rois[l].shape[0] for l in range(3)), 4) -# -# self.assertEqual(len(order), n_roi) -# self.assertIsInstance(order, self.link.xp.ndarray) -# -# def test_distribute_cpu(self): -# self._check_distribute() -# -# @attr.gpu -# def test_distribute_gpu(self): -# self.link.to_gpu() -# self._check_distribute() -# -# def _check_decode(self): -# segms = [ -# _random_array( -# self.link.xp, -# (1, self.n_class, self.link.mask_size, self.link.mask_size)), -# _random_array( -# self.link.xp, -# (2, self.n_class, self.link.mask_size, self.link.mask_size)), -# _random_array( -# self.link.xp, -# (1, self.n_class, self.link.mask_size, self.link.mask_size)) -# ] -# bboxes = [ -# self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32), -# self.link.xp.array( -# ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32), -# self.link.xp.array(((10, 4, 12, 10),), dtype=np.float32), -# ] -# labels = [ -# self.link.xp.random.randint( -# 0, self.n_class - 1, size=(1,), dtype=np.int32), -# self.link.xp.random.randint( -# 0, self.n_class - 1, size=(2,), dtype=np.int32), -# self.link.xp.random.randint( -# 0, self.n_class - 1, size=(1,), dtype=np.int32), -# ] -# -# sizes = [(56, 56), (48, 48), (72, 72)] -# masks = self.link.decode( -# segms, bboxes, labels, sizes) -# -# self.assertEqual(len(masks), 3) -# for n in range(3): -# self.assertIsInstance(masks[n], self.link.xp.ndarray) -# -# self.assertEqual(masks[n].shape[0], labels[n].shape[0]) -# self.assertEqual(masks[n].shape[1:], sizes[n]) -# -# def test_decode_cpu(self): -# self._check_decode() -# -# @attr.gpu -# def test_decode_gpu(self): -# self.link.to_gpu() -# self._check_decode() -# -# +@testing.parameterize( + {'n_class': 1 + 1}, + {'n_class': 5 + 1}, + {'n_class': 20 + 1}, +) +class TestMaskHead(unittest.TestCase): + + def setUp(self): + self.link = MaskHead( + n_class=self.n_class, scales=(1 / 2, 1 / 4, 1 / 8)) + + def _check_call(self): + hs = [ + chainer.Variable(_random_array(self.link.xp, (2, 64, 32, 32))), + chainer.Variable(_random_array(self.link.xp, (2, 64, 16, 16))), + chainer.Variable(_random_array(self.link.xp, (2, 64, 8, 8))), + ] + rois = [ + self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32), + self.link.xp.array( + ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32), + self.link.xp.array(((10, 4, 12, 10),), dtype=np.float32), + ] + roi_indices = [ + self.link.xp.array((0,), dtype=np.int32), + self.link.xp.array((1, 0), dtype=np.int32), + self.link.xp.array((1,), dtype=np.int32), + ] + + segs = self.link(hs, rois, roi_indices) + + self.assertIsInstance(segs, chainer.Variable) + self.assertIsInstance(segs.array, self.link.xp.ndarray) + self.assertEqual( + segs.shape, + (4, self.n_class, self.link.mask_size, self.link.mask_size)) + + def test_call_cpu(self): + self._check_call() + + @attr.gpu + def test_call_gpu(self): + self.link.to_gpu() + self._check_call() + + def _check_distribute(self): + rois = self.link.xp.array(( + (0, 0, 10, 10), + (0, 1000, 0, 1000), + (0, 0, 224, 224), + (100, 100, 224, 224), + ), dtype=np.float32) + roi_indices = self.link.xp.array((0, 1, 0, 0), dtype=np.int32) + n_roi = len(roi_indices) + + rois, roi_indices, order = self.link.distribute(rois, roi_indices) + + self.assertEqual(len(rois), 3) + self.assertEqual(len(roi_indices), 3) + for l in range(3): + self.assertIsInstance(rois[l], self.link.xp.ndarray) + self.assertIsInstance(roi_indices[l], self.link.xp.ndarray) + + self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0]) + self.assertEqual(rois[l].shape[1:], (4,)) + self.assertEqual(roi_indices[l].shape[1:], ()) + + self.assertEqual(sum(rois[l].shape[0] for l in range(3)), 4) + + self.assertEqual(len(order), n_roi) + self.assertIsInstance(order, self.link.xp.ndarray) + + def test_distribute_cpu(self): + self._check_distribute() + + @attr.gpu + def test_distribute_gpu(self): + self.link.to_gpu() + self._check_distribute() + + def _check_decode(self): + segms = [ + _random_array( + self.link.xp, + (1, self.n_class, self.link.mask_size, self.link.mask_size)), + _random_array( + self.link.xp, + (2, self.n_class, self.link.mask_size, self.link.mask_size)), + _random_array( + self.link.xp, + (1, self.n_class, self.link.mask_size, self.link.mask_size)) + ] + bboxes = [ + self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32), + self.link.xp.array( + ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32), + self.link.xp.array(((10, 4, 12, 10),), dtype=np.float32), + ] + labels = [ + self.link.xp.random.randint( + 0, self.n_class - 1, size=(1,), dtype=np.int32), + self.link.xp.random.randint( + 0, self.n_class - 1, size=(2,), dtype=np.int32), + self.link.xp.random.randint( + 0, self.n_class - 1, size=(1,), dtype=np.int32), + ] + + sizes = [(56, 56), (48, 48), (72, 72)] + masks = self.link.decode( + segms, bboxes, labels, sizes) + + self.assertEqual(len(masks), 3) + for n in range(3): + self.assertIsInstance(masks[n], self.link.xp.ndarray) + + self.assertEqual(masks[n].shape[0], labels[n].shape[0]) + self.assertEqual(masks[n].shape[1:], sizes[n]) + + def test_decode_cpu(self): + self._check_decode() + + class TestMaskHeadLoss(unittest.TestCase): def _check_mask_loss_pre(self, xp): @@ -161,13 +156,14 @@ def _check_mask_loss_pre(self, xp): xp.array((1,), dtype=np.int32), ] masks = [ - _random_array(xp, (n_class, mask_size, mask_size)), - _random_array(xp, (n_class, mask_size, mask_size)), - _random_array(xp, (n_class, mask_size, mask_size)), + _random_array(xp, (n_class, 60, 70)), + _random_array(xp, (n_class, 60, 70)), + _random_array(xp, (n_class, 60, 70)), ] labels = [ - xp.array((10, 4), dtype=np.float32), - xp.array((1,), dtype=np.float32), + xp.array((10, 4), dtype=np.int32), + xp.array((1,), dtype=np.int32), + xp.array((3,), dtype=np.int32), ] rois, roi_indices, gt_segms, gt_mask_labels = mask_loss_pre( rois, roi_indices, masks, labels, mask_size) @@ -176,19 +172,19 @@ def _check_mask_loss_pre(self, xp): self.assertEqual(len(roi_indices), 3) self.assertEqual(len(gt_segms), 3) self.assertEqual(len(gt_mask_labels), 3) - # for l in range(3): - # self.assertIsInstance(rois[l], xp.ndarray) - # self.assertIsInstance(roi_indices[l], xp.ndarray) - # self.assertIsInstance(gt_locs[l], xp.ndarray) - # self.assertIsInstance(gt_labels[l], xp.ndarray) - - # self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0]) - # self.assertEqual(rois[l].shape[0], gt_locs[l].shape[0]) - # self.assertEqual(rois[l].shape[0], gt_labels[l].shape[0]) - # self.assertEqual(rois[l].shape[1:], (4,)) - # self.assertEqual(roi_indices[l].shape[1:], ()) - # self.assertEqual(gt_locs[l].shape[1:], (4,)) - # self.assertEqual(gt_labels[l].shape[1:], ()) + for l in range(3): + self.assertIsInstance(rois[l], xp.ndarray) + self.assertIsInstance(roi_indices[l], xp.ndarray) + self.assertIsInstance(gt_segms[l], xp.ndarray) + self.assertIsInstance(gt_mask_labels[l], xp.ndarray) + + self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0]) + self.assertEqual(rois[l].shape[0], gt_segms[l].shape[0]) + self.assertEqual(rois[l].shape[0], gt_mask_labels[l].shape[0]) + self.assertEqual(rois[l].shape[1:], (4,)) + self.assertEqual(roi_indices[l].shape[1:], ()) + self.assertEqual(gt_segms[l].shape[1:], (mask_size, mask_size)) + self.assertEqual(gt_mask_labels[l].shape[1:], ()) def test_mask_loss_pre_cpu(self): self._check_mask_loss_pre(np) @@ -198,43 +194,39 @@ def test_mask_loss_pre_gpu(self): import cupy self._check_mask_loss_pre(cupy) - # def _check_head_loss_post(self, xp): - # locs = chainer.Variable(_random_array(xp, (20, 81, 4))) - # confs = chainer.Variable(_random_array(xp, (20, 81))) - # roi_indices = [ - # xp.random.randint(0, 2, size=5).astype(np.int32), - # xp.random.randint(0, 2, size=7).astype(np.int32), - # xp.random.randint(0, 2, size=8).astype(np.int32), - # ] - # gt_locs = [ - # _random_array(xp, (5, 4)), - # _random_array(xp, (7, 4)), - # _random_array(xp, (8, 4)), - # ] - # gt_labels = [ - # xp.random.randint(0, 80, size=5).astype(np.int32), - # xp.random.randint(0, 80, size=7).astype(np.int32), - # xp.random.randint(0, 80, size=8).astype(np.int32), - # ] - - # loc_loss, conf_loss = head_loss_post( - # locs, confs, roi_indices, gt_locs, gt_labels, 2) - - # self.assertIsInstance(loc_loss, chainer.Variable) - # self.assertIsInstance(loc_loss.array, xp.ndarray) - # self.assertEqual(loc_loss.shape, ()) - - # self.assertIsInstance(conf_loss, chainer.Variable) - # self.assertIsInstance(conf_loss.array, xp.ndarray) - # self.assertEqual(conf_loss.shape, ()) - - # def test_head_loss_post_cpu(self): - # self._check_head_loss_post(np) - - # @attr.gpu - # def test_head_loss_post_gpu(self): - # import cupy - # self._check_head_loss_post(cupy) + def _check_head_loss_post(self, xp): + B = 2 + segms = chainer.Variable(_random_array(xp, (20, 81, 28, 28))) + mask_roi_indices = [ + xp.random.randint(0, B, size=5).astype(np.int32), + xp.random.randint(0, B, size=7).astype(np.int32), + xp.random.randint(0, B, size=8).astype(np.int32), + ] + gt_segms = [ + _random_array(xp, (5, 28, 28)), + _random_array(xp, (7, 28, 28)), + _random_array(xp, (8, 28, 28)), + ] + gt_mask_labels = [ + xp.random.randint(0, 80, size=5).astype(np.int32), + xp.random.randint(0, 80, size=7).astype(np.int32), + xp.random.randint(0, 80, size=8).astype(np.int32), + ] + + mask_loss = mask_loss_post( + segms, mask_roi_indices, gt_segms, gt_mask_labels, B) + + self.assertIsInstance(mask_loss, chainer.Variable) + self.assertIsInstance(mask_loss.array, xp.ndarray) + self.assertEqual(mask_loss.shape, ()) + + def test_head_loss_post_cpu(self): + self._check_head_loss_post(np) + + @attr.gpu + def test_head_loss_post_gpu(self): + import cupy + self._check_head_loss_post(cupy) testing.run_module(__name__, __file__) From e19c8d03f0fc289b6e082c43a705f0c6f925b466 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 14 Feb 2019 10:33:46 +0900 Subject: [PATCH 007/100] COCOPointDataset --- chainercv/datasets/__init__.py | 2 + chainercv/datasets/coco/coco_point_dataset.py | 111 ++++++++++++++++++ chainercv/datasets/coco/coco_utils.py | 21 ++++ 3 files changed, 134 insertions(+) create mode 100644 chainercv/datasets/coco/coco_point_dataset.py diff --git a/chainercv/datasets/__init__.py b/chainercv/datasets/__init__.py index 1370b54c75..c2ca52af4d 100644 --- a/chainercv/datasets/__init__.py +++ b/chainercv/datasets/__init__.py @@ -12,9 +12,11 @@ from chainercv.datasets.cityscapes.cityscapes_utils import cityscapes_semantic_segmentation_label_names # NOQA from chainercv.datasets.coco.coco_bbox_dataset import COCOBboxDataset # NOQA from chainercv.datasets.coco.coco_instance_segmentation_dataset import COCOInstanceSegmentationDataset # NOQA +from chainercv.datasets.coco.coco_point_dataset import COCOPointDataset # NOQA from chainercv.datasets.coco.coco_semantic_segmentation_dataset import COCOSemanticSegmentationDataset # NOQA from chainercv.datasets.coco.coco_utils import coco_bbox_label_names # NOQA from chainercv.datasets.coco.coco_utils import coco_instance_segmentation_label_names # NOQA +from chainercv.datasets.coco.coco_utils import coco_point_names # NOQA from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_colors # NOQA from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_names # NOQA from chainercv.datasets.cub.cub_label_dataset import CUBLabelDataset # NOQA diff --git a/chainercv/datasets/coco/coco_point_dataset.py b/chainercv/datasets/coco/coco_point_dataset.py new file mode 100644 index 0000000000..6438ef0bf2 --- /dev/null +++ b/chainercv/datasets/coco/coco_point_dataset.py @@ -0,0 +1,111 @@ +from collections import defaultdict +import json +import numpy as np +import os + +from chainercv.chainer_experimental.datasets.sliceable import GetterDataset +from chainercv.datasets.coco.coco_instances_base_dataset import \ + COCOInstancesBaseDataset +from chainercv.datasets.coco.coco_utils import get_coco +from chainercv import utils + + +class COCOPointDataset(GetterDataset): + + def __init__(self, data_dir='auto', split='train', year='2017', + use_crowded=False, return_area=False, return_crowded=False): + super(COCOPointDataset, self).__init__() + self.use_crowded = use_crowded + if data_dir == 'auto': + data_dir = get_coco(split, split, year, 'instances') + + self.img_root = os.path.join( + data_dir, 'images', '{}{}'.format(split, year)) + self.data_dir = data_dir + + point_anno_path = os.path.join( + self.data_dir, 'annotations', 'person_keypoints_{}{}.json'.format( + split, year)) + annos = json.load(open(point_anno_path, 'r')) + + self.id_to_prop = {} + for prop in annos['images']: + self.id_to_prop[prop['id']] = prop + self.ids = sorted(list(self.id_to_prop.keys())) + + self.cat_ids = [cat['id'] for cat in annos['categories']] + + self.id_to_anno = defaultdict(list) + for anno in annos['annotations']: + self.id_to_anno[anno['image_id']].append(anno) + + self.add_getter('img', self._get_image) + self.add_getter( + ['point', 'bbox', 'label', 'area', 'crowded'], + self._get_annotations) + keys = ('img', 'point', 'bbox', 'label') + if return_area: + keys += ('area',) + if return_crowded: + keys += ('crowded',) + self.keys = keys + + def __len__(self): + return len(self.ids) + + def _get_image(self, i): + img_path = os.path.join( + self.img_root, self.id_to_prop[self.ids[i]]['file_name']) + img = utils.read_image(img_path, dtype=np.float32, color=True) + return img + + def _get_annotations(self, i): + # List[{'segmentation', 'area', 'iscrowd', + # 'image_id', 'bbox', 'category_id', 'id'}] + annotation = self.id_to_anno[self.ids[i]] + bbox = np.array([ann['bbox'] for ann in annotation], + dtype=np.float32) + if len(bbox) == 0: + bbox = np.zeros((0, 4), dtype=np.float32) + # (x, y, width, height) -> (x_min, y_min, x_max, y_max) + bbox[:, 2] = bbox[:, 0] + bbox[:, 2] + bbox[:, 3] = bbox[:, 1] + bbox[:, 3] + # (x_min, y_min, x_max, y_max) -> (y_min, x_min, y_max, x_max) + bbox = bbox[:, [1, 0, 3, 2]] + + label = np.array([self.cat_ids.index(ann['category_id']) + for ann in annotation], dtype=np.int32) + + area = np.array([ann['area'] + for ann in annotation], dtype=np.float32) + + crowded = np.array([ann['iscrowd'] + for ann in annotation], dtype=np.bool) + + point = np.array( + [anno['keypoints'] for anno in annotation], dtype=np.float32) + if len(point) > 0: + x = point[:, 0::3] + y = point[:, 1::3] + # 0: not labeled; 1: labeled, not inside mask; + # 2: labeled and inside mask + v = point[:, 2::3] + point = np.stack((y, x, v), axis=2) + else: + point = np.array((0, 0, 3), dtype=np.float32) + + # Remove invalid boxes + bbox_area = np.prod(bbox[:, 2:] - bbox[:, :2], axis=1) + keep_mask = np.logical_and(bbox[:, 0] <= bbox[:, 2], + bbox[:, 1] <= bbox[:, 3]) + keep_mask = np.logical_and(keep_mask, bbox_area > 0) + + if not self.use_crowded: + keep_mask = np.logical_and(keep_mask, np.logical_not(crowded)) + + point = point[keep_mask] + bbox = bbox[keep_mask] + label = label[keep_mask] + area = area[keep_mask] + crowded = crowded[keep_mask] + return point, bbox, label, area, crowded diff --git a/chainercv/datasets/coco/coco_utils.py b/chainercv/datasets/coco/coco_utils.py index cf1a6e195e..10841d567a 100644 --- a/chainercv/datasets/coco/coco_utils.py +++ b/chainercv/datasets/coco/coco_utils.py @@ -439,3 +439,24 @@ def get_coco(split, img_split, year, mode): coco_instance_segmentation_label_names = coco_bbox_label_names + + +coco_point_names = [ + 'nose', + 'left_eye', + 'right_eye', + 'left_ear', + 'right_ear', + 'left_shoulder', + 'right_shoulder', + 'left_elbow', + 'right_elbow', + 'left_wrist', + 'right_wrist', + 'left_hip', + 'right_hip', + 'left_knee', + 'right_knee', + 'left_ankle', + 'right_ankle' +] From 1d97870bb9ed4c4e6c59c0be6d56da890fcd4d0c Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Feb 2019 16:31:25 +0900 Subject: [PATCH 008/100] add vis_coco_point --- chainercv/visualizations/__init__.py | 1 + chainercv/visualizations/vis_coco_point.py | 118 +++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 chainercv/visualizations/vis_coco_point.py diff --git a/chainercv/visualizations/__init__.py b/chainercv/visualizations/__init__.py index 2adf9f7ba8..33ef5a9d1f 100644 --- a/chainercv/visualizations/__init__.py +++ b/chainercv/visualizations/__init__.py @@ -1,4 +1,5 @@ from chainercv.visualizations.vis_bbox import vis_bbox # NOQA +from chainercv.visualizations.vis_coco_point import vis_coco_point # NOQA from chainercv.visualizations.vis_image import vis_image # NOQA from chainercv.visualizations.vis_instance_segmentation import vis_instance_segmentation # NOQA from chainercv.visualizations.vis_point import vis_point # NOQA diff --git a/chainercv/visualizations/vis_coco_point.py b/chainercv/visualizations/vis_coco_point.py new file mode 100644 index 0000000000..438ff278e9 --- /dev/null +++ b/chainercv/visualizations/vis_coco_point.py @@ -0,0 +1,118 @@ +from __future__ import division + +import matplotlib.pyplot as plt +import numpy as np + +from chainercv.datasets import coco_point_names +from chainercv.visualizations.vis_image import vis_image + + +coco_point_skeleton = [ + [coco_point_names.index('left_eye'), + coco_point_names.index('right_eye')], + [coco_point_names.index('left_eye'), + coco_point_names.index('nose')], + [coco_point_names.index('right_eye'), + coco_point_names.index('nose')], + [coco_point_names.index('right_eye'), + coco_point_names.index('right_ear')], + [coco_point_names.index('left_eye'), + coco_point_names.index('left_ear')], + [coco_point_names.index('right_shoulder'), + coco_point_names.index('right_elbow')], + [coco_point_names.index('right_elbow'), + coco_point_names.index('right_wrist')], + [coco_point_names.index('left_shoulder'), + coco_point_names.index('left_elbow')], + [coco_point_names.index('left_elbow'), + coco_point_names.index('left_wrist')], + [coco_point_names.index('right_hip'), + coco_point_names.index('right_knee')], + [coco_point_names.index('right_knee'), + coco_point_names.index('right_ankle')], + [coco_point_names.index('left_hip'), + coco_point_names.index('left_knee')], + [coco_point_names.index('left_knee'), + coco_point_names.index('left_ankle')], + [coco_point_names.index('right_shoulder'), + coco_point_names.index('left_shoulder')], + [coco_point_names.index('right_hip'), + coco_point_names.index('left_hip')] +] + + +def vis_coco_point(img, point, point_score, thresh=2, ax=None): + from matplotlib import pyplot as plt + + # Returns newly instantiated matplotlib.axes.Axes object if ax is None + ax = vis_image(img, ax=ax) + + cmap = plt.get_cmap('rainbow') + colors = [cmap(i) for i in np.linspace(0, 1, len(coco_point_skeleton) + 2)] + + # plt.autoscale(False) + for i in range(len(point)): + pnt = point[i] + pnt_sc = point_score[i] + for l in range(len(coco_point_skeleton)): + i0 = coco_point_skeleton[l][0] + i1 = coco_point_skeleton[l][1] + s0 = pnt_sc[i0] + y0 = pnt[i0, 0] + x0 = pnt[i0, 1] + s1 = pnt_sc[i1] + y1 = pnt[i1, 0] + x1 = pnt[i1, 1] + if s0 > thresh and s1 > thresh: + line = ax.plot([x0, x1], [y0, y1]) + plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7) + if s0 > thresh: + ax.plot( + x0, y0, '.', color=colors[l], + markersize=3.0, alpha=0.7) + if s1 > thresh: + ax.plot( + x1, y1, '.', color=colors[l], + markersize=3.0, alpha=0.7) + + # for better visualization, add mid shoulder / mid hip + mid_shoulder = ( + pnt[coco_point_names.index('right_shoulder'), :2] + + pnt[coco_point_names.index('left_shoulder'), :2]) / 2 + mid_shoulder_sc = np.minimum( + pnt[coco_point_names.index('right_shoulder'), 2], + pnt[coco_point_names.index('left_shoulder'), 2]) + + mid_hip = ( + pnt[coco_point_names.index('right_hip'), :2] + + pnt[coco_point_names.index('left_hip'), :2]) / 2 + mid_hip_sc = np.minimum( + pnt[coco_point_names.index('right_hip'), 2], + pnt[coco_point_names.index('left_hip'), 2]) + if (mid_shoulder_sc > thresh and + pnt[coco_point_names.index('nose'), 2] > thresh): + y = [mid_shoulder[0], pnt[coco_point_names.index('nose'), 0]] + x = [mid_shoulder[1], pnt[coco_point_names.index('nose'), 1]] + line = ax.plot(x, y) + plt.setp( + line, color=colors[len(coco_point_skeleton)], + linewidth=1.0, alpha=0.7) + if (mid_shoulder_sc > thresh and mid_hip_sc > thresh): + y = [mid_shoulder[0], mid_hip[0]] + x = [mid_shoulder[1], mid_hip[1]] + line = ax.plot(x, y) + plt.setp( + line, color=colors[len(coco_point_skeleton) + 1], + linewidth=1.0, alpha=0.7) + + return ax + + +if __name__ == '__main__': + data = np.load('vis_point.npz') + img = data['img'] + point = data['point'] + point_score = data['point_score'] + # plt.imshow(img) + vis_coco_point(img, point, point_score) + plt.show() From 43e8acca63eec94ffcfe9e0ad12912876da7df96 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Feb 2019 17:51:39 +0900 Subject: [PATCH 009/100] handle the case when #RoI is 0 --- examples/mask_rcnn/train_multi.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index b7ba734910..c8e030d1a9 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -93,19 +93,23 @@ def __call__(self, imgs, masks, labels, bboxes): mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre( rois, roi_indices, masks, head_gt_labels, self.model.mask_head.mask_size) - segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) - mask_loss = mask_loss_post( - segms, mask_roi_indices, gt_segms, gt_mask_labels, B) - - loss = (rpn_loc_loss + rpn_conf_loss + + n_roi = sum([len(roi) for roi in mask_rois]) + if n_roi > 0: + segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) + mask_loss = mask_loss_post( + segms, mask_roi_indices, gt_segms, gt_mask_labels, B) + loss = (rpn_loc_loss + rpn_conf_loss + head_loc_loss + head_conf_loss + mask_loss) - chainer.reporter.report({ - 'loss': loss, - 'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss, - 'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss, - 'loss/mask': mask_loss}, - self) - + chainer.reporter.report({ + 'loss': loss, + 'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss, + 'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss, + 'loss/mask': mask_loss}, + self) + else: + # ChainerMN hangs when a subset of nodes has a different + # computational graph from the rest. + loss = chainer.Variable(self.xp.array(0, dtype=np.float32)) return loss From cb2ad16719bf9bdd00a88cf1a23af701fdc08039 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sat, 16 Feb 2019 14:43:14 +0900 Subject: [PATCH 010/100] fix some bug --- chainercv/visualizations/vis_coco_point.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/chainercv/visualizations/vis_coco_point.py b/chainercv/visualizations/vis_coco_point.py index 438ff278e9..9666bbbfe6 100644 --- a/chainercv/visualizations/vis_coco_point.py +++ b/chainercv/visualizations/vis_coco_point.py @@ -51,9 +51,7 @@ def vis_coco_point(img, point, point_score, thresh=2, ax=None): colors = [cmap(i) for i in np.linspace(0, 1, len(coco_point_skeleton) + 2)] # plt.autoscale(False) - for i in range(len(point)): - pnt = point[i] - pnt_sc = point_score[i] + for pnt, pnt_sc in zip(point, point_score): for l in range(len(coco_point_skeleton)): i0 = coco_point_skeleton[l][0] i1 = coco_point_skeleton[l][1] @@ -80,17 +78,17 @@ def vis_coco_point(img, point, point_score, thresh=2, ax=None): pnt[coco_point_names.index('right_shoulder'), :2] + pnt[coco_point_names.index('left_shoulder'), :2]) / 2 mid_shoulder_sc = np.minimum( - pnt[coco_point_names.index('right_shoulder'), 2], - pnt[coco_point_names.index('left_shoulder'), 2]) + pnt_sc[coco_point_names.index('right_shoulder')], + pnt_sc[coco_point_names.index('left_shoulder')]) mid_hip = ( pnt[coco_point_names.index('right_hip'), :2] + pnt[coco_point_names.index('left_hip'), :2]) / 2 mid_hip_sc = np.minimum( - pnt[coco_point_names.index('right_hip'), 2], - pnt[coco_point_names.index('left_hip'), 2]) + pnt_sc[coco_point_names.index('right_hip')], + pnt_sc[coco_point_names.index('left_hip')]) if (mid_shoulder_sc > thresh and - pnt[coco_point_names.index('nose'), 2] > thresh): + pnt_sc[coco_point_names.index('nose')] > thresh): y = [mid_shoulder[0], pnt[coco_point_names.index('nose'), 0]] x = [mid_shoulder[1], pnt[coco_point_names.index('nose'), 1]] line = ax.plot(x, y) From a4319e052a51cdc6541265149f8ede8e2fc1da34 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 18 Feb 2019 10:18:57 +0900 Subject: [PATCH 011/100] delete mask option for MaskRCNN.prepare --- chainercv/links/model/mask_rcnn/mask_rcnn.py | 2 +- examples/mask_rcnn/train_multi.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py index 876ce06060..6de944fc34 100644 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py @@ -160,7 +160,7 @@ def predict(self, imgs): scores = [cuda.to_cpu(score) for score in scores] return masks, labels, scores - def prepare(self, imgs, masks=None): + def prepare(self, imgs): """Preprocess images. Args: diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index c8e030d1a9..478a263b26 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -57,7 +57,7 @@ def prepare_mask(self, masks, resized_sizes, pad_size): return pad_masks def __call__(self, imgs, masks, labels, bboxes): - x, scales, resized_sizes = self.model.prepare(imgs, masks) + x, scales, resized_sizes = self.model.prepare(imgs) B, _, pad_H, pad_W = x.shape masks = self.prepare_mask(masks, resized_sizes, (pad_H, pad_W)) bboxes = [self.xp.array(bbox) * scale From 496dd9369a86390993dcf7c717ec641463627e03 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 18 Feb 2019 14:32:09 +0900 Subject: [PATCH 012/100] use MultiprocessIterator --- chainercv/links/model/mask_rcnn/mask_rcnn.py | 14 +-- examples/mask_rcnn/train_multi.py | 119 +++++++++++++------ 2 files changed, 91 insertions(+), 42 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py index 876ce06060..4158846ed4 100644 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py @@ -44,9 +44,9 @@ class MaskRCNN(chainer.Chain): """ - _min_size = 800 - _max_size = 1333 - _stride = 32 + min_size = 800 + max_size = 1333 + stride = 32 def __init__(self, extractor, rpn, head, mask_head): super(MaskRCNN, self).__init__() @@ -179,9 +179,9 @@ def prepare(self, imgs, masks=None): resized_sizes = [] for img in imgs: _, H, W = img.shape - scale = self._min_size / min(H, W) - if scale * max(H, W) > self._max_size: - scale = self._max_size / max(H, W) + scale = self.min_size / min(H, W) + if scale * max(H, W) > self.max_size: + scale = self.max_size / max(H, W) scales.append(scale) H, W = int(H * scale), int(W * scale) img = transforms.resize(img, (H, W)) @@ -191,7 +191,7 @@ def prepare(self, imgs, masks=None): pad_size = np.array( [im.shape[1:] for im in resized_imgs]).max(axis=0) pad_size = ( - np.ceil(pad_size / self._stride) * self._stride).astype(int) + np.ceil(pad_size / self.stride) * self.stride).astype(int) x = np.zeros( (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32) for i, im in enumerate(resized_imgs): diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index c8e030d1a9..5f38949069 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -41,28 +41,31 @@ def __init__(self, model): with self.init_scope(): self.model = model - def prepare_mask(self, masks, resized_sizes, pad_size): - resized_masks = [] - for size, mask in zip(resized_sizes, masks): - resized_masks.append(transforms.resize( - mask.astype(np.float32), - size, interpolation=PIL.Image.NEAREST).astype(np.bool)) - pad_masks = [] - for mask in resized_masks: - n_class, H, W = mask.shape - pad_mask = self.xp.zeros( - (n_class, pad_size[0], pad_size[1]), dtype=np.bool) - pad_mask[:, :H, :W] = self.xp.array(mask) - pad_masks.append(pad_mask) - return pad_masks - def __call__(self, imgs, masks, labels, bboxes): - x, scales, resized_sizes = self.model.prepare(imgs, masks) - B, _, pad_H, pad_W = x.shape - masks = self.prepare_mask(masks, resized_sizes, (pad_H, pad_W)) - bboxes = [self.xp.array(bbox) * scale - for bbox, scale in zip(bboxes, scales)] + B = len(imgs) + pad_size = np.array( + [im.shape[1:] for im in imgs]).max(axis=0) + pad_size = ( + np.ceil(pad_size / self.model.stride) * self.model.stride).astype(int) + x = np.zeros( + (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32) + for i, img in enumerate(imgs): + _, H, W = img.shape + x[i, :, :H, :W] = img + x = self.xp.array(x) + + pad_masks = [ + self.xp.zeros( + (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool) + for mask in masks] + for i, mask in enumerate(masks): + _, H, W = mask.shape + pad_masks[i][:, :H, :W] = self.xp.array(mask) + masks = pad_masks + + bboxes = [self.xp.array(bbox) for bbox in bboxes] labels = [self.xp.array(label) for label in labels] + sizes = [img.shape[1:] for img in imgs] with chainer.using_config('train', False): hs = self.model.extractor(x) @@ -70,10 +73,7 @@ def __call__(self, imgs, masks, labels, bboxes): rpn_locs, rpn_confs = self.model.rpn(hs) anchors = self.model.rpn.anchors(h.shape[2:] for h in hs) rpn_loc_loss, rpn_conf_loss = rpn_loss( - rpn_locs, rpn_confs, anchors, - [(int(img.shape[1] * scale), int(img.shape[2] * scale)) - for img, scale in zip(imgs, scales)], - bboxes) + rpn_locs, rpn_confs, anchors, sizes, bboxes) rois, roi_indices = self.model.rpn.decode( rpn_locs, rpn_confs, anchors, x.shape) @@ -110,19 +110,43 @@ def __call__(self, imgs, masks, labels, bboxes): # ChainerMN hangs when a subset of nodes has a different # computational graph from the rest. loss = chainer.Variable(self.xp.array(0, dtype=np.float32)) + self.zerograds() return loss -def transform(in_data): - img, mask, label, bbox = in_data +class Transform(object): + + def __init__(self, mean, min_size, max_size): + self.mean = mean + self.min_size = min_size + self.max_size = max_size + + def __call__(self, in_data): + img, mask, label, bbox = in_data + + # Flipping + img, params = transforms.random_flip( + img, x_random=True, return_param=True) + mask = transforms.flip(mask, x_flip=params['x_flip']) + bbox = transforms.flip_bbox( + bbox, img.shape[1:], x_flip=params['x_flip']) - img, params = transforms.random_flip( - img, x_random=True, return_param=True) - mask = transforms.flip(mask, x_flip=params['x_flip']) - bbox = transforms.flip_bbox( - bbox, img.shape[1:], x_flip=params['x_flip']) + # TODO: make this part reusable + # Scaling + _, H, W = img.shape + scale = self.min_size / min(H, W) + if scale * max(H, W) > self.max_size: + scale = self.max_size / max(H, W) + H, W = int(H * scale), int(W * scale) + img = transforms.resize(img, (H, W)) + mask = transforms.resize( + mask.astype(np.float32), + (H, W), interpolation=PIL.Image.NEAREST).astype(np.bool) + bbox = bbox * scale - return img, mask, label, bbox + # Subtract mean + img -= self.mean + return img, mask, label, bbox, scale def converter(batch, device=None): @@ -142,7 +166,10 @@ def main(): parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--communicator', default='hierarchical') + parser.add_argument('--cprofile', action='store_true', help='cprofile') args = parser.parse_args() + chainer.global_config.cv_resize_backend = 'PIL' + # chainer.global_config.cv_read_image_backend = 'PIL' # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): @@ -153,6 +180,8 @@ def main(): comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank + global rank + rank = comm.rank if args.model == 'mask_rcnn_fpn_resnet50': model = MaskRCNNFPNResNet50( @@ -170,8 +199,10 @@ def main(): train = TransformDataset( COCOInstanceSegmentationDataset( + data_dir='/home/yuyu2172/coco', split='train', return_bbox=True), - ('img', 'mask', 'label', 'bbox'), transform) + ('img', 'mask', 'label', 'bbox'), + Transform(model.extractor.mean, model.min_size, model.max_size)) if comm.rank == 0: indices = np.arange(len(train)) @@ -180,8 +211,9 @@ def main(): indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] - train_iter = chainer.iterators.MultithreadIterator( - train, args.batchsize // comm.size) + train_iter = chainer.iterators.MultiprocessIterator( + train, args.batchsize // comm.size, + n_processes=args.batchsize // comm.size, shared_mem=100 * 1000 * 1000 * 4) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) @@ -242,7 +274,24 @@ def lr_schedule(trainer): if args.resume: serializers.load_npz(args.resume, trainer, strict=False) + if args.cprofile: + import cProfile + import io + import pstats + print('cprofiling') + pr = cProfile.Profile() + pr.enable() trainer.run() + if args.cprofile: + pr.disable() + s = io.StringIO() + sort_by = 'tottime' + ps = pstats.Stats(pr, stream=s).sort_stats(sort_by) + ps.print_stats() + if comm.rank == 0: + print(s.getvalue()) + + pr.dump_stats('{0}/rank_{1}.cprofile'.format(args.out, comm.rank)) if __name__ == '__main__': From 183229c1d53faf95ca91cc7615be21cf1031d0d3 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 18 Feb 2019 14:40:03 +0900 Subject: [PATCH 013/100] change url link --- chainercv/datasets/coco/coco_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chainercv/datasets/coco/coco_utils.py b/chainercv/datasets/coco/coco_utils.py index 10841d567a..f96ec5803c 100644 --- a/chainercv/datasets/coco/coco_utils.py +++ b/chainercv/datasets/coco/coco_utils.py @@ -18,10 +18,10 @@ } instances_anno_urls = { '2014': { - 'train': 'http://msvocds.blob.core.windows.net/annotations-1-0-3/' - 'instances_train-val2014.zip', - 'val': 'http://msvocds.blob.core.windows.net/annotations-1-0-3/' - 'instances_train-val2014.zip', + 'train': 'http://images.cocodataset.org/annotations/' + 'annotations_trainval2014.zip', + 'val': 'http://images.cocodataset.org/annotations/' + 'annotations_trainval2014.zip', 'valminusminival': 'https://dl.dropboxusercontent.com/s/' 's3tw5zcg7395368/instances_valminusminival2014.json.zip', 'minival': 'https://dl.dropboxusercontent.com/s/o43o90bna78omob/' From 8417559815c9b27fe5d0d7d021fb4457f23fa211 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 18 Feb 2019 16:58:37 +0900 Subject: [PATCH 014/100] add eval_point_coco --- chainercv/evaluations/__init__.py | 1 + chainercv/evaluations/eval_point_coco.py | 189 ++++++++++++++++++ .../evaluations_tests/test_eval_point_coco.py | 136 +++++++++++++ 3 files changed, 326 insertions(+) create mode 100644 chainercv/evaluations/eval_point_coco.py create mode 100644 tests/evaluations_tests/test_eval_point_coco.py diff --git a/chainercv/evaluations/__init__.py b/chainercv/evaluations/__init__.py index 1f12332cdb..b3937cebfd 100644 --- a/chainercv/evaluations/__init__.py +++ b/chainercv/evaluations/__init__.py @@ -5,6 +5,7 @@ from chainercv.evaluations.eval_instance_segmentation_coco import eval_instance_segmentation_coco # NOQA from chainercv.evaluations.eval_instance_segmentation_voc import calc_instance_segmentation_voc_prec_rec # NOQA from chainercv.evaluations.eval_instance_segmentation_voc import eval_instance_segmentation_voc # NOQA +from chainercv.evaluations.eval_point_coco import eval_point_coco # NOQA from chainercv.evaluations.eval_semantic_segmentation import calc_semantic_segmentation_confusion # NOQA from chainercv.evaluations.eval_semantic_segmentation import calc_semantic_segmentation_iou # NOQA from chainercv.evaluations.eval_semantic_segmentation import eval_semantic_segmentation # NOQA diff --git a/chainercv/evaluations/eval_point_coco.py b/chainercv/evaluations/eval_point_coco.py new file mode 100644 index 0000000000..68f3e00975 --- /dev/null +++ b/chainercv/evaluations/eval_point_coco.py @@ -0,0 +1,189 @@ +import itertools +import numpy as np +import os +import six + +from chainercv.evaluations.eval_detection_coco import _redirect_stdout +from chainercv.evaluations.eval_detection_coco import _summarize + +try: + import pycocotools.coco + import pycocotools.cocoeval + _available = True +except ImportError: + _available = False + + +def eval_point_coco(pred_points, pred_labels, pred_scores, + gt_points, gt_is_valids, gt_bboxes, gt_labels, + gt_areas, gt_crowdeds=None): + if not _available: + raise ValueError( + 'Please install pycocotools \n' + 'pip install -e \'git+https://github.com/cocodataset/coco.git' + '#egg=pycocotools&subdirectory=PythonAPI\'') + + gt_coco = pycocotools.coco.COCO() + pred_coco = pycocotools.coco.COCO() + + pred_points = iter(pred_points) + pred_labels = iter(pred_labels) + pred_scores = iter(pred_scores) + gt_points = iter(gt_points) + gt_is_valids = iter(gt_is_valids) + gt_bboxes = iter(gt_bboxes) + gt_labels = iter(gt_labels) + + if gt_areas is None: + compute_area_dependent_metrics = False + gt_areas = itertools.repeat(None) + else: + compute_area_dependent_metrics = True + gt_areas = iter(gt_areas) + gt_crowdeds = (iter(gt_crowdeds) if gt_crowdeds is not None + else itertools.repeat(None)) + + ids = [] + pred_annos = [] + gt_annos = [] + existent_labels = {} + for i, (pred_point, pred_label, pred_score, gt_point, gt_is_valid, + gt_bbox, gt_label, + gt_area, gt_crowded) in enumerate(six.moves.zip( + pred_points, pred_labels, pred_scores, + gt_points, gt_is_valids, gt_bboxes, gt_labels, + gt_areas, gt_crowdeds)): + if gt_area is None: + gt_area = itertools.repeat(None) + if gt_crowded is None: + gt_crowded = itertools.repeat(None) + # Starting ids from 1 is important when using COCO. + img_id = i + 1 + + for pred_pnt, pred_lb, pred_sc in zip(pred_point, pred_label, + pred_score): + # http://cocodataset.org/#format-results + # Visibility flag is currently not used for evaluation + is_v = np.ones(len(pred_pnt)) + pred_annos.append( + _create_anno(pred_pnt, is_v, None, + pred_lb, pred_sc, + img_id=img_id, anno_id=len(pred_annos) + 1, + ar=None, crw=0)) + existent_labels[pred_lb] = True + + for gt_pnt, gt_is_v, gt_bb, gt_lb, gt_ar, gt_crw in zip( + gt_point, gt_is_valid, gt_bbox, gt_label, gt_area, gt_crowded): + gt_annos.append( + _create_anno(gt_pnt, gt_is_v, gt_bb, gt_lb, None, + img_id=img_id, anno_id=len(gt_annos) + 1, + ar=gt_ar, crw=gt_crw)) + ids.append({'id': img_id}) + existent_labels = sorted(existent_labels.keys()) + + pred_coco.dataset['categories'] = [{'id': i} for i in existent_labels] + gt_coco.dataset['categories'] = [{'id': i} for i in existent_labels] + pred_coco.dataset['annotations'] = pred_annos + gt_coco.dataset['annotations'] = gt_annos + pred_coco.dataset['images'] = ids + gt_coco.dataset['images'] = ids + + with _redirect_stdout(open(os.devnull, 'w')): + pred_coco.createIndex() + gt_coco.createIndex() + coco_eval = pycocotools.cocoeval.COCOeval( + gt_coco, pred_coco, 'keypoints') + coco_eval.evaluate() + coco_eval.accumulate() + + results = {'coco_eval': coco_eval} + p = coco_eval.params + common_kwargs = { + 'prec': coco_eval.eval['precision'], + 'rec': coco_eval.eval['recall'], + 'iou_threshs': p.iouThrs, + 'area_ranges': p.areaRngLbl, + 'max_detection_list': p.maxDets, + } + all_kwargs = { + 'ap/iou=0.50:0.95/area=all/max_dets=20': { + 'ap': True, 'iou_thresh': None, 'area_range': 'all', + 'max_detection': 20}, + 'ap/iou=0.50/area=all/max_dets=20': { + 'ap': True, 'iou_thresh': 0.5, 'area_range': 'all', + 'max_detection': 20}, + 'ap/iou=0.75/area=all/max_dets=20': { + 'ap': True, 'iou_thresh': 0.75, 'area_range': 'all', + 'max_detection': 20}, + 'ar/iou=0.50:0.95/area=all/max_dets=20': { + 'ap': False, 'iou_thresh': None, 'area_range': 'all', + 'max_detection': 20}, + 'ar/iou=0.50/area=all/max_dets=20': { + 'ap': False, 'iou_thresh': 0.5, 'area_range': 'all', + 'max_detection': 20}, + 'ar/iou=0.75/area=all/max_dets=20': { + 'ap': False, 'iou_thresh': 0.75, 'area_range': 'all', + 'max_detection': 20}, + } + if compute_area_dependent_metrics: + all_kwargs.update({ + 'ap/iou=0.50:0.95/area=medium/max_dets=20': { + 'ap': True, 'iou_thresh': None, 'area_range': 'medium', + 'max_detection': 20}, + 'ap/iou=0.50:0.95/area=large/max_dets=20': { + 'ap': True, 'iou_thresh': None, 'area_range': 'large', + 'max_detection': 20}, + 'ar/iou=0.50:0.95/area=medium/max_dets=20': { + 'ap': False, 'iou_thresh': None, 'area_range': 'medium', + 'max_detection': 20}, + 'ar/iou=0.50:0.95/area=large/max_dets=20': { + 'ap': False, 'iou_thresh': None, 'area_range': 'large', + 'max_detection': 20}, + }) + + for key, kwargs in all_kwargs.items(): + kwargs.update(common_kwargs) + metrics, mean_metric = _summarize(**kwargs) + + # pycocotools ignores classes that are not included in + # either gt or prediction, but lies between 0 and + # the maximum label id. + # We set values for these classes to np.nan. + results[key] = np.nan * np.ones(np.max(existent_labels) + 1) + results[key][existent_labels] = metrics + results['m' + key] = mean_metric + + results['existent_labels'] = existent_labels + return results + + +def _create_anno(pnt, is_v, bb, lb, sc, img_id, anno_id, ar=None, crw=None): + # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L342 + y_min = np.min(pnt[:, 0]) + x_min = np.min(pnt[:, 1]) + y_max = np.max(pnt[:, 0]) + x_max = np.max(pnt[:, 1]) + if ar is None: + ar = (y_max - y_min) * (x_max - x_min) + + if crw is None: + crw = False + # Rounding is done to make the result consistent with COCO. + + if bb is None: + bb_xywh = [x_min, y_min, x_max - x_min, y_max - y_min] + else: + bb_xywh = [bb[1], bb[0], bb[3] - bb[1], bb[2] - bb[0]] + pnt = np.concatenate((pnt[:, [1, 0]], is_v[:, None]), axis=1) + anno = { + 'image_id': img_id, 'category_id': lb, + 'keypoints': pnt.reshape((-1)).tolist(), + 'area': ar, + 'bbox': bb_xywh, + 'id': anno_id, + 'iscrowd': crw, + 'num_keypoints': (pnt[:, 0] > 0).sum() + } + if sc is not None: + anno.update({'score': sc}) + return anno diff --git a/tests/evaluations_tests/test_eval_point_coco.py b/tests/evaluations_tests/test_eval_point_coco.py new file mode 100644 index 0000000000..bc2095eefd --- /dev/null +++ b/tests/evaluations_tests/test_eval_point_coco.py @@ -0,0 +1,136 @@ +import numpy as np +import os +from six.moves.urllib import request +import unittest + +from chainer import testing + +from chainercv.evaluations import eval_point_coco + +try: + import pycocotools # NOQA + _available = True +except ImportError: + _available = False + + +# @unittest.skipUnless(_available, 'pycocotools is not installed') +# class TestEvalPointCOCOSingleClass(unittest.TestCase): +# +# def setUp(self): +# self.pred_bboxes = np.array([[[0, 0, 10, 10], [0, 0, 20, 20]]]) +# self.pred_labels = np.array([[0, 0]]) +# self.pred_scores = np.array([[0.8, 0.9]]) +# self.gt_bboxes = np.array([[[0, 0, 10, 9]]]) +# self.gt_labels = np.array([[0, 0]]) +# +# def test_crowded(self): +# result = eval_detection_coco(self.pred_bboxes, self.pred_labels, +# self.pred_scores, +# self.gt_bboxes, self.gt_labels, +# gt_crowdeds=[[True]]) +# # When the only ground truth is crowded, nothing is evaluated. +# # In that case, all the results are nan. +# self.assertTrue( +# np.isnan(result['map/iou=0.50:0.95/area=all/max_dets=100'])) +# self.assertTrue( +# np.isnan(result['map/iou=0.50/area=all/max_dets=100'])) +# self.assertTrue( +# np.isnan(result['map/iou=0.75/area=all/max_dets=100'])) +# +# def test_area_not_supplied(self): +# result = eval_detection_coco(self.pred_bboxes, self.pred_labels, +# self.pred_scores, +# self.gt_bboxes, self.gt_labels) +# self.assertFalse( +# 'map/iou=0.50:0.95/area=small/max_dets=100' in result) +# self.assertFalse( +# 'map/iou=0.50:0.95/area=medium/max_dets=100' in result) +# self.assertFalse( +# 'map/iou=0.50:0.95/area=large/max_dets=100' in result) +# +# def test_area_specified(self): +# result = eval_detection_coco(self.pred_bboxes, self.pred_labels, +# self.pred_scores, +# self.gt_bboxes, self.gt_labels, +# gt_areas=[[2048]]) +# self.assertFalse( +# np.isnan(result['map/iou=0.50:0.95/area=medium/max_dets=100'])) +# self.assertTrue( +# np.isnan(result['map/iou=0.50:0.95/area=small/max_dets=100'])) +# self.assertTrue( +# np.isnan(result['map/iou=0.50:0.95/area=large/max_dets=100'])) + + +# @unittest.skipUnless(_available, 'pycocotools is not installed') +# class TestEvalPointCOCOSomeClassNonExistent(unittest.TestCase): +# +# def setUp(self): +# self.pred_bboxes = np.array([[[0, 0, 10, 10], [0, 0, 20, 20]]]) +# self.pred_labels = np.array([[1, 2]]) +# self.pred_scores = np.array([[0.8, 0.9]]) +# self.gt_bboxes = np.array([[[0, 0, 10, 9]]]) +# self.gt_labels = np.array([[1, 2]]) +# +# def test(self): +# result = eval_detection_coco(self.pred_bboxes, self.pred_labels, +# self.pred_scores, +# self.gt_bboxes, self.gt_labels) +# self.assertEqual( +# result['ap/iou=0.50:0.95/area=all/max_dets=100'].shape, (3,)) +# self.assertTrue( +# np.isnan(result['ap/iou=0.50:0.95/area=all/max_dets=100'][0])) +# self.assertEqual( +# np.nanmean(result['ap/iou=0.50:0.95/area=all/max_dets=100'][1:]), +# result['map/iou=0.50:0.95/area=all/max_dets=100']) +# + +@unittest.skipUnless(_available, 'pycocotools is not installed') +class TestEvalPointCOCO(unittest.TestCase): + + @classmethod + def setUpClass(cls): + base_url = 'https://chainercv-models.preferred.jp/tests' + + cls.dataset = np.load(request.urlretrieve(os.path.join( + base_url, 'eval_point_coco_dataset_2019_02_18.npz'))[0]) + cls.result = np.load(request.urlretrieve(os.path.join( + base_url, 'eval_point_coco_result_2019_02_18.npz'))[0]) + + def test_eval_detection_coco(self): + pred_points = self.result['points'] + pred_labels = self.result['labels'] + pred_scores = self.result['scores'] + + gt_points = self.dataset['points'] + gt_is_valids = self.dataset['is_valids'] + gt_bboxes = self.dataset['bboxes'] + gt_labels = self.dataset['labels'] + gt_areas = self.dataset['areas'] + gt_crowdeds = self.dataset['crowdeds'] + + result = eval_point_coco( + pred_points, pred_labels, pred_scores, + gt_points, gt_is_valids, gt_bboxes, + gt_labels, gt_areas, gt_crowdeds) + + + expected = { + 'map/iou=0.50:0.95/area=all/max_dets=20': 0.37733572721481323, + 'map/iou=0.50/area=all/max_dets=20': 0.6448841691017151, + 'map/iou=0.75/area=all/max_dets=20': 0.35469090938568115, + 'map/iou=0.50:0.95/area=medium/max_dets=20': 0.3894105851650238, + 'map/iou=0.50:0.95/area=large/max_dets=20': 0.39169296622276306, + 'mar/iou=0.50:0.95/area=all/max_dets=20': 0.5218977928161621, + 'mar/iou=0.50/area=all/max_dets=20': 0.7445255517959595, + 'mar/iou=0.75/area=all/max_dets=20': 0.510948896408081, + 'mar/iou=0.50:0.95/area=medium/max_dets=20': 0.5150684714317322, + 'mar/iou=0.50:0.95/area=large/max_dets=20': 0.5296875238418579, + } + + for key, item in expected.items(): + np.testing.assert_almost_equal( + result[key], expected[key], decimal=5) + + +testing.run_module(__name__, __file__) From a8fcf2a6a4604145e2c8a467c0f2dc19e6bec21c Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 18 Feb 2019 21:53:17 +0900 Subject: [PATCH 015/100] train now works --- examples/mask_rcnn/train_multi.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index 5f38949069..74323658f5 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -27,11 +27,11 @@ from chainercv.links.model.mask_rcnn import mask_loss_pre # https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator -try: - import cv2 - cv2.setNumThreads(0) -except ImportError: - pass +# try: +# import cv2 +# cv2.setNumThreads(0) +# except ImportError: +# pass class TrainChain(chainer.Chain): @@ -122,8 +122,10 @@ def __init__(self, mean, min_size, max_size): self.max_size = max_size def __call__(self, in_data): + import time + start = time.time() img, mask, label, bbox = in_data - + original = mask.shape # Flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) @@ -168,7 +170,7 @@ def main(): parser.add_argument('--communicator', default='hierarchical') parser.add_argument('--cprofile', action='store_true', help='cprofile') args = parser.parse_args() - chainer.global_config.cv_resize_backend = 'PIL' + # chainer.global_config.cv_resize_backend = 'PIL' # chainer.global_config.cv_read_image_backend = 'PIL' # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator From 0d9ba6528c514197912fc72bf2781146e03db2fd Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 19 Feb 2019 11:16:28 +0900 Subject: [PATCH 016/100] some speed up --- chainercv/links/model/mask_rcnn/mask_head.py | 23 +++++++++----------- examples/mask_rcnn/train_multi.py | 11 +++++----- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py index 7be88c98cb..8815774fcf 100644 --- a/chainercv/links/model/mask_rcnn/mask_head.py +++ b/chainercv/links/model/mask_rcnn/mask_head.py @@ -13,7 +13,6 @@ from chainercv.transforms.image.resize import resize from chainercv.utils.bbox.bbox_iou import bbox_iou -from chainercv.utils.mask.mask_to_bbox import mask_to_bbox class MaskHead(chainer.Chain): @@ -198,8 +197,8 @@ def _expand_boxes(bbox, scale): return expanded_bbox -def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels, - mask_size): +def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes, + gt_head_labels, mask_size): """Loss function for Mask Head (pre). This function processes RoIs for :func:`mask_loss_post` by @@ -255,14 +254,14 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels, gt_segms = xp.empty((len(mask_rois), mask_size, mask_size), dtype=np.bool) for i in np.unique(cuda.to_cpu(mask_roi_indices)): gt_mask = gt_masks[i] - gt_bbox = mask_to_bbox(gt_mask) + gt_bbox = gt_bboxes[i] index = (mask_roi_indices == i).nonzero()[0] mask_roi = mask_rois[index] iou = bbox_iou(mask_roi, gt_bbox) - gt_index = iou.argmax(axis=1) + gt_index = chainer.backends.cuda.to_cpu(iou.argmax(axis=1)) gt_segms[index] = _segm_wrt_bbox( - gt_mask[gt_index], mask_roi, (mask_size, mask_size)) + gt_mask[gt_index], mask_roi, (mask_size, mask_size), xp) flag_masks = [mask_roi_levels == l for l in range(n_level)] mask_rois = [mask_rois[m] for m in flag_masks] @@ -310,18 +309,16 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels, return mask_loss -def _segm_wrt_bbox(mask, bbox, size): - xp = chainer.backends.cuda.get_array_module(mask) - - bbox = bbox.astype(np.int32) +def _segm_wrt_bbox(mask, bbox, size, xp): + bbox = chainer.backends.cuda.to_cpu(bbox.astype(np.int32)) segm = [] for m, bb in zip(mask, bbox): - if bb[2] - bb[0] == 0 or bb[3] - bb[1] == 0: - segm.append(xp.zeros(size, dtype=np.bool)) - continue cropped_m = m[bb[0]:bb[2], bb[1]:bb[3]] cropped_m = chainer.backends.cuda.to_cpu(cropped_m) + if cropped_m.shape[0] == 0 or cropped_m.shape[1] == 0: + segm.append(np.zeros(size, dtype=np.bool)) + continue segm.append(resize( cropped_m[None].astype(np.float32), diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index 74323658f5..a944aac58b 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -54,13 +54,14 @@ def __call__(self, imgs, masks, labels, bboxes): x[i, :, :H, :W] = img x = self.xp.array(x) + # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU. pad_masks = [ - self.xp.zeros( + np.zeros( (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool) for mask in masks] for i, mask in enumerate(masks): _, H, W = mask.shape - pad_masks[i][:, :H, :W] = self.xp.array(mask) + pad_masks[i][:, :H, :W] = mask masks = pad_masks bboxes = [self.xp.array(bbox) for bbox in bboxes] @@ -91,8 +92,8 @@ def __call__(self, imgs, masks, labels, bboxes): roi_indices, head_gt_locs, head_gt_labels, B) mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre( - rois, roi_indices, masks, head_gt_labels, - self.model.mask_head.mask_size) + rois, roi_indices, masks, bboxes, + head_gt_labels, self.model.mask_head.mask_size) n_roi = sum([len(roi) for roi in mask_rois]) if n_roi > 0: segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) @@ -122,8 +123,6 @@ def __init__(self, mean, min_size, max_size): self.max_size = max_size def __call__(self, in_data): - import time - start = time.time() img, mask, label, bbox = in_data original = mask.shape # Flipping From 97497ea68d999be043cb769a767426dd9aeec35f Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 19 Feb 2019 03:03:19 +0000 Subject: [PATCH 017/100] reduce copy --- chainercv/links/model/mask_rcnn/mask_head.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py index 8815774fcf..90594be847 100644 --- a/chainercv/links/model/mask_rcnn/mask_head.py +++ b/chainercv/links/model/mask_rcnn/mask_head.py @@ -259,9 +259,9 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes, index = (mask_roi_indices == i).nonzero()[0] mask_roi = mask_rois[index] iou = bbox_iou(mask_roi, gt_bbox) - gt_index = chainer.backends.cuda.to_cpu(iou.argmax(axis=1)) + gt_index = iou.argmax(axis=1) gt_segms[index] = _segm_wrt_bbox( - gt_mask[gt_index], mask_roi, (mask_size, mask_size), xp) + gt_mask, gt_index, mask_roi, (mask_size, mask_size), xp) flag_masks = [mask_roi_levels == l for l in range(n_level)] mask_rois = [mask_rois[m] for m in flag_masks] @@ -293,7 +293,7 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels, xp = cuda.get_array_module(segms.array) mask_roi_indices = xp.hstack(mask_roi_indices).astype(np.int32) - gt_segms = xp.vstack(gt_segms).astype(np.float32) + gt_segms = xp.vstack(gt_segms).astype(np.float32, copy=False) gt_mask_labels = xp.hstack(gt_mask_labels).astype(np.int32) mask_loss = 0 @@ -309,8 +309,9 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels, return mask_loss -def _segm_wrt_bbox(mask, bbox, size, xp): +def _segm_wrt_bbox(mask, gt_index, bbox, size, xp): bbox = chainer.backends.cuda.to_cpu(bbox.astype(np.int32)) + mask = mask[chainer.backends.cuda.to_cpu(gt_index)] segm = [] for m, bb in zip(mask, bbox): @@ -322,5 +323,5 @@ def _segm_wrt_bbox(mask, bbox, size, xp): segm.append(resize( cropped_m[None].astype(np.float32), - size, interpolation=PIL.Image.NEAREST)[0].astype(np.bool)) - return xp.array(segm, dtype=np.bool) + size, interpolation=PIL.Image.NEAREST)[0]) + return xp.array(segm, dtype=np.float32) From 1e38522af2f99f0e70b1d7c11be8bd63a73427ee Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 19 Feb 2019 12:06:48 +0900 Subject: [PATCH 018/100] delete unnecessary --- examples/mask_rcnn/train_multi.py | 33 +++++++------------------------ 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index a944aac58b..c9c5856d13 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -27,11 +27,11 @@ from chainercv.links.model.mask_rcnn import mask_loss_pre # https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator -# try: -# import cv2 -# cv2.setNumThreads(0) -# except ImportError: -# pass +try: + import cv2 + cv2.setNumThreads(0) +except ImportError: + pass class TrainChain(chainer.Chain): @@ -167,10 +167,7 @@ def main(): parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--communicator', default='hierarchical') - parser.add_argument('--cprofile', action='store_true', help='cprofile') args = parser.parse_args() - # chainer.global_config.cv_resize_backend = 'PIL' - # chainer.global_config.cv_read_image_backend = 'PIL' # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): @@ -214,7 +211,8 @@ def main(): train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize // comm.size, - n_processes=args.batchsize // comm.size, shared_mem=100 * 1000 * 1000 * 4) + n_processes=args.batchsize // comm.size, + shared_mem=100 * 1000 * 1000 * 4) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) @@ -275,24 +273,7 @@ def lr_schedule(trainer): if args.resume: serializers.load_npz(args.resume, trainer, strict=False) - if args.cprofile: - import cProfile - import io - import pstats - print('cprofiling') - pr = cProfile.Profile() - pr.enable() trainer.run() - if args.cprofile: - pr.disable() - s = io.StringIO() - sort_by = 'tottime' - ps = pstats.Stats(pr, stream=s).sort_stats(sort_by) - ps.print_stats() - if comm.rank == 0: - print(s.getvalue()) - - pr.dump_stats('{0}/rank_{1}.cprofile'.format(args.out, comm.rank)) if __name__ == '__main__': From c63c3068b17b5bcbd6d1546b3b266f4558155498 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 19 Feb 2019 12:13:04 +0900 Subject: [PATCH 019/100] reuse prepare function --- chainercv/links/model/mask_rcnn/mask_rcnn.py | 36 +++++++++++--------- examples/mask_rcnn/train_multi.py | 21 +++--------- 2 files changed, 24 insertions(+), 33 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py index 4158846ed4..1386f071b2 100644 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py @@ -44,8 +44,8 @@ class MaskRCNN(chainer.Chain): """ - min_size = 800 - max_size = 1333 + _min_size = 800 + _max_size = 1333 stride = 32 def __init__(self, extractor, rpn, head, mask_head): @@ -123,7 +123,7 @@ def predict(self, imgs): """ sizes = [img.shape[1:] for img in imgs] - x, scales, _ = self.prepare(imgs) + x, scales = self.prepare(imgs) with chainer.using_config('train', False), chainer.no_backprop_mode(): hs, rois, roi_indices = self(x) @@ -160,7 +160,7 @@ def predict(self, imgs): scores = [cuda.to_cpu(score) for score in scores] return masks, labels, scores - def prepare(self, imgs, masks=None): + def prepare(self, imgs): """Preprocess images. Args: @@ -169,25 +169,16 @@ def prepare(self, imgs, masks=None): and the range of their value is :math:`[0, 255]`. Returns: - Three arrays: preprocessed images, \ - scales that were caluclated in prepocessing and - the size of the images after resizing. + Two arrays: preprocessed images and \ + scales that were caluclated in prepocessing. """ scales = [] resized_imgs = [] - resized_sizes = [] for img in imgs: - _, H, W = img.shape - scale = self.min_size / min(H, W) - if scale * max(H, W) > self.max_size: - scale = self.max_size / max(H, W) + img, scale = self.prepare_img(img) scales.append(scale) - H, W = int(H * scale), int(W * scale) - img = transforms.resize(img, (H, W)) - img -= self.extractor.mean resized_imgs.append(img) - resized_sizes.append((H, W)) pad_size = np.array( [im.shape[1:] for im in resized_imgs]).max(axis=0) pad_size = ( @@ -199,7 +190,18 @@ def prepare(self, imgs, masks=None): x[i, :, :H, :W] = im x = self.xp.array(x) - return x, scales, resized_sizes + return x, scales + + def prepare_img(self, img): + """Process image.""" + _, H, W = img.shape + scale = self._min_size / min(H, W) + if scale * max(H, W) > self._max_size: + scale = self._max_size / max(H, W) + H, W = int(H * scale), int(W * scale) + img = transforms.resize(img, (H, W)) + img -= self.extractor.mean + return img, scale def _list_to_flat(array_list): diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index c9c5856d13..593b0ceb55 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -117,10 +117,8 @@ def __call__(self, imgs, masks, labels, bboxes): class Transform(object): - def __init__(self, mean, min_size, max_size): - self.mean = mean - self.min_size = min_size - self.max_size = max_size + def __init__(self, prepare_img): + self.prepare_img = prepare_img def __call__(self, in_data): img, mask, label, bbox = in_data @@ -132,21 +130,12 @@ def __call__(self, in_data): bbox = transforms.flip_bbox( bbox, img.shape[1:], x_flip=params['x_flip']) - # TODO: make this part reusable - # Scaling - _, H, W = img.shape - scale = self.min_size / min(H, W) - if scale * max(H, W) > self.max_size: - scale = self.max_size / max(H, W) - H, W = int(H * scale), int(W * scale) - img = transforms.resize(img, (H, W)) + # Scaling and mean subtraction + img, scale = self.prepare_img(img) mask = transforms.resize( mask.astype(np.float32), (H, W), interpolation=PIL.Image.NEAREST).astype(np.bool) bbox = bbox * scale - - # Subtract mean - img -= self.mean return img, mask, label, bbox, scale @@ -200,7 +189,7 @@ def main(): data_dir='/home/yuyu2172/coco', split='train', return_bbox=True), ('img', 'mask', 'label', 'bbox'), - Transform(model.extractor.mean, model.min_size, model.max_size)) + Transform(model.prepare_img)) if comm.rank == 0: indices = np.arange(len(train)) From bb8fd686f602c0bfcf26e683675b22f4f242f3d5 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 19 Feb 2019 03:31:34 +0000 Subject: [PATCH 020/100] don't use instance method --- chainercv/links/model/mask_rcnn/mask_rcnn.py | 21 ++++++-------------- chainercv/links/model/mask_rcnn/misc.py | 12 +++++++++++ examples/mask_rcnn/train_multi.py | 16 ++++++++++----- 3 files changed, 29 insertions(+), 20 deletions(-) create mode 100644 chainercv/links/model/mask_rcnn/misc.py diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py index 1386f071b2..9f59f49d92 100644 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py @@ -6,7 +6,7 @@ from chainer.backends import cuda import chainer.functions as F -from chainercv import transforms +from chainercv.links.model.mask_rcnn.misc import scale_img class MaskRCNN(chainer.Chain): @@ -44,8 +44,8 @@ class MaskRCNN(chainer.Chain): """ - _min_size = 800 - _max_size = 1333 + min_size = 800 + max_size = 1333 stride = 32 def __init__(self, extractor, rpn, head, mask_head): @@ -176,7 +176,9 @@ def prepare(self, imgs): scales = [] resized_imgs = [] for img in imgs: - img, scale = self.prepare_img(img) + img, scale = scale_img( + img, self.min_size, self.max_size) + img -= self.extractor.mean scales.append(scale) resized_imgs.append(img) pad_size = np.array( @@ -192,17 +194,6 @@ def prepare(self, imgs): return x, scales - def prepare_img(self, img): - """Process image.""" - _, H, W = img.shape - scale = self._min_size / min(H, W) - if scale * max(H, W) > self._max_size: - scale = self._max_size / max(H, W) - H, W = int(H * scale), int(W * scale) - img = transforms.resize(img, (H, W)) - img -= self.extractor.mean - return img, scale - def _list_to_flat(array_list): xp = chainer.backends.cuda.get_array_module(array_list[0]) diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py new file mode 100644 index 0000000000..abb233443b --- /dev/null +++ b/chainercv/links/model/mask_rcnn/misc.py @@ -0,0 +1,12 @@ +from chainercv import transforms + + +def scale_img(img, min_size, max_size): + """Process image.""" + _, H, W = img.shape + scale = min_size / min(H, W) + if scale * max(H, W) > max_size: + scale = max_size / max(H, W) + H, W = int(H * scale), int(W * scale) + img = transforms.resize(img, (H, W)) + return img, scale diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index 593b0ceb55..36b76ce293 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -18,6 +18,7 @@ from chainercv.datasets import COCOInstanceSegmentationDataset from chainercv.links import MaskRCNNFPNResNet101 from chainercv.links import MaskRCNNFPNResNet50 +from chainercv.links.model.mask_rcnn.misc import scale_img from chainercv import transforms from chainercv.links.model.fpn import head_loss_post @@ -117,8 +118,10 @@ def __call__(self, imgs, masks, labels, bboxes): class Transform(object): - def __init__(self, prepare_img): - self.prepare_img = prepare_img + def __init__(self, min_size, max_size, mean): + self.min_size = min_size + self.max_size = max_size + self.mean = mean def __call__(self, in_data): img, mask, label, bbox = in_data @@ -131,10 +134,13 @@ def __call__(self, in_data): bbox, img.shape[1:], x_flip=params['x_flip']) # Scaling and mean subtraction - img, scale = self.prepare_img(img) + img, scale = scale_img( + img, self.min_size, self.max_size) + img -= self.mean mask = transforms.resize( mask.astype(np.float32), - (H, W), interpolation=PIL.Image.NEAREST).astype(np.bool) + img.shape[1:], + interpolation=PIL.Image.NEAREST).astype(np.bool) bbox = bbox * scale return img, mask, label, bbox, scale @@ -189,7 +195,7 @@ def main(): data_dir='/home/yuyu2172/coco', split='train', return_bbox=True), ('img', 'mask', 'label', 'bbox'), - Transform(model.prepare_img)) + Transform(model.min_size, model.max_size, model.extractor.mean)) if comm.rank == 0: indices = np.arange(len(train)) From 45e77be1d128a71f19cd03447b4953cf7339aab5 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 19 Feb 2019 13:43:12 +0900 Subject: [PATCH 021/100] delete eval_coco --- examples/instance_segmentation/eval_coco.py | 94 --------------------- 1 file changed, 94 deletions(-) delete mode 100755 examples/instance_segmentation/eval_coco.py diff --git a/examples/instance_segmentation/eval_coco.py b/examples/instance_segmentation/eval_coco.py deleted file mode 100755 index 98258252b8..0000000000 --- a/examples/instance_segmentation/eval_coco.py +++ /dev/null @@ -1,94 +0,0 @@ -import argparse - -import chainer -from chainer import iterators - -from chainercv.datasets import coco_instance_segmentation_label_names -from chainercv.datasets import COCOInstanceSegmentationDataset -from chainercv.evaluations import eval_instance_segmentation_coco -from chainercv.experimental.links import FCISResNet101 -from chainercv.links import MaskRCNNFPNResNet101 -from chainercv.links import MaskRCNNFPNResNet50 -from chainercv.utils import apply_to_iterator -from chainercv.utils import ProgressHook - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - '--model', choices=( - 'fcis_resnet101', - 'mask_rcnn_fpn_resnet101', 'mask_rcnn_fpn_resnet50'), - default='fcis_resnet101') - parser.add_argument('--pretrained-model', default=None) - parser.add_argument('--gpu', type=int, default=-1) - args = parser.parse_args() - - if args.pretrained_model is None: - args.pretrained_model = 'coco' - if args.model == 'fcis_resnet101': - proposal_creator_params = FCISResNet101.proposal_creator_params - proposal_creator_params['min_size'] = 2 - model = FCISResNet101( - n_fg_class=len(coco_instance_segmentation_label_names), - anchor_scales=(4, 8, 16, 32), - pretrained_model=args.pretrained_model, - proposal_creator_params=proposal_creator_params) - preset = 'coco_evaluate' - elif args.model == 'mask_rcnn_fpn_resnet50': - model = MaskRCNNFPNResNet50( - len(coco_instance_segmentation_label_names), - args.pretrained_model) - preset = 'evaluate' - elif args.model == 'mask_rcnn_fpn_resnet101': - model = MaskRCNNFPNResNet101( - len(coco_instance_segmentation_label_names), - args.pretrained_model) - preset = 'evaluate' - - model.use_preset(preset) - - if args.gpu >= 0: - chainer.cuda.get_device_from_id(args.gpu).use() - model.to_gpu() - - dataset = COCOInstanceSegmentationDataset( - split='minival', year='2014', - use_crowded=True, return_crowded=True, return_area=True) - iterator = iterators.SerialIterator( - dataset, 1, repeat=False, shuffle=False) - - in_values, out_values, rest_values = apply_to_iterator( - model.predict, iterator, hook=ProgressHook(len(dataset))) - # delete unused iterators explicitly - del in_values - - pred_masks, pred_labels, pred_scores = out_values - gt_masks, gt_labels, gt_areas, gt_crowdeds = rest_values - - result = eval_instance_segmentation_coco( - pred_masks, pred_labels, pred_scores, - gt_masks, gt_labels, gt_areas, gt_crowdeds) - - keys = [ - 'map/iou=0.50:0.95/area=all/max_dets=100', - 'map/iou=0.50/area=all/max_dets=100', - 'map/iou=0.75/area=all/max_dets=100', - 'map/iou=0.50:0.95/area=small/max_dets=100', - 'map/iou=0.50:0.95/area=medium/max_dets=100', - 'map/iou=0.50:0.95/area=large/max_dets=100', - 'mar/iou=0.50:0.95/area=all/max_dets=1', - 'mar/iou=0.50:0.95/area=all/max_dets=10', - 'mar/iou=0.50:0.95/area=all/max_dets=100', - 'mar/iou=0.50:0.95/area=small/max_dets=100', - 'mar/iou=0.50:0.95/area=medium/max_dets=100', - 'mar/iou=0.50:0.95/area=large/max_dets=100', - ] - - print('') - for key in keys: - print('{:s}: {:f}'.format(key, result[key])) - - -if __name__ == '__main__': - main() From c8b08a4a59c3e700b2ad7f76564004d68049e920 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 19 Feb 2019 13:46:48 +0900 Subject: [PATCH 022/100] speed up _segm_wrt_bbox --- chainercv/links/model/mask_rcnn/mask_head.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py index 90594be847..5f6e5c03d2 100644 --- a/chainercv/links/model/mask_rcnn/mask_head.py +++ b/chainercv/links/model/mask_rcnn/mask_head.py @@ -311,11 +311,10 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels, def _segm_wrt_bbox(mask, gt_index, bbox, size, xp): bbox = chainer.backends.cuda.to_cpu(bbox.astype(np.int32)) - mask = mask[chainer.backends.cuda.to_cpu(gt_index)] segm = [] - for m, bb in zip(mask, bbox): - cropped_m = m[bb[0]:bb[2], bb[1]:bb[3]] + for i, bb in zip(chainer.backends.cuda.to_cpu(gt_index), bbox): + cropped_m = mask[i, bb[0]:bb[2], bb[1]:bb[3]] cropped_m = chainer.backends.cuda.to_cpu(cropped_m) if cropped_m.shape[0] == 0 or cropped_m.shape[1] == 0: segm.append(np.zeros(size, dtype=np.bool)) From 9ef9564eda60f103d1b06606efec5cbf05d1584b Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 19 Feb 2019 13:53:10 +0900 Subject: [PATCH 023/100] delete unnecessary --- examples/mask_rcnn/train_multi.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index 36b76ce293..44f1e23249 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -173,8 +173,6 @@ def main(): comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank - global rank - rank = comm.rank if args.model == 'mask_rcnn_fpn_resnet50': model = MaskRCNNFPNResNet50( From 2e18bf682288b435436c77b8b6131c794eb42945 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 19 Feb 2019 21:00:45 +0900 Subject: [PATCH 024/100] COCOPointDataset -> COCOKeypointDataset --- chainercv/datasets/__init__.py | 2 +- ...nt_dataset.py => coco_keypoint_dataset.py} | 20 +++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) rename chainercv/datasets/coco/{coco_point_dataset.py => coco_keypoint_dataset.py} (86%) diff --git a/chainercv/datasets/__init__.py b/chainercv/datasets/__init__.py index c2ca52af4d..d6598093db 100644 --- a/chainercv/datasets/__init__.py +++ b/chainercv/datasets/__init__.py @@ -12,7 +12,7 @@ from chainercv.datasets.cityscapes.cityscapes_utils import cityscapes_semantic_segmentation_label_names # NOQA from chainercv.datasets.coco.coco_bbox_dataset import COCOBboxDataset # NOQA from chainercv.datasets.coco.coco_instance_segmentation_dataset import COCOInstanceSegmentationDataset # NOQA -from chainercv.datasets.coco.coco_point_dataset import COCOPointDataset # NOQA +from chainercv.datasets.coco.coco_keypoint_dataset import COCOKeypointDataset # NOQA from chainercv.datasets.coco.coco_semantic_segmentation_dataset import COCOSemanticSegmentationDataset # NOQA from chainercv.datasets.coco.coco_utils import coco_bbox_label_names # NOQA from chainercv.datasets.coco.coco_utils import coco_instance_segmentation_label_names # NOQA diff --git a/chainercv/datasets/coco/coco_point_dataset.py b/chainercv/datasets/coco/coco_keypoint_dataset.py similarity index 86% rename from chainercv/datasets/coco/coco_point_dataset.py rename to chainercv/datasets/coco/coco_keypoint_dataset.py index 6438ef0bf2..de40491100 100644 --- a/chainercv/datasets/coco/coco_point_dataset.py +++ b/chainercv/datasets/coco/coco_keypoint_dataset.py @@ -10,11 +10,12 @@ from chainercv import utils -class COCOPointDataset(GetterDataset): +class COCOKeypointDataset(GetterDataset): def __init__(self, data_dir='auto', split='train', year='2017', - use_crowded=False, return_area=False, return_crowded=False): - super(COCOPointDataset, self).__init__() + use_crowded=False, + return_area=False, return_crowded=False): + super(COCOKeypointDataset, self).__init__() self.use_crowded = use_crowded if data_dir == 'auto': data_dir = get_coco(split, split, year, 'instances') @@ -41,9 +42,9 @@ def __init__(self, data_dir='auto', split='train', year='2017', self.add_getter('img', self._get_image) self.add_getter( - ['point', 'bbox', 'label', 'area', 'crowded'], + ['point', 'valid', 'bbox', 'label', 'area', 'crowded'], self._get_annotations) - keys = ('img', 'point', 'bbox', 'label') + keys = ('img', 'point', 'valid', 'bbox', 'label') if return_area: keys += ('area',) if return_crowded: @@ -90,9 +91,11 @@ def _get_annotations(self, i): # 0: not labeled; 1: labeled, not inside mask; # 2: labeled and inside mask v = point[:, 2::3] - point = np.stack((y, x, v), axis=2) + valid = v > 0 + point = np.stack((y, x), axis=2) else: - point = np.array((0, 0, 3), dtype=np.float32) + point = np.empty((0, 0, 2), dtype=np.float32) + valid = np.empty((0, 0), dtype=np.bool) # Remove invalid boxes bbox_area = np.prod(bbox[:, 2:] - bbox[:, :2], axis=1) @@ -104,8 +107,9 @@ def _get_annotations(self, i): keep_mask = np.logical_and(keep_mask, np.logical_not(crowded)) point = point[keep_mask] + valid = valid[keep_mask] bbox = bbox[keep_mask] label = label[keep_mask] area = area[keep_mask] crowded = crowded[keep_mask] - return point, bbox, label, area, crowded + return point, valid, bbox, label, area, crowded From 8ad8a74a5980f67a599eaa430394d80ef7933999 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 19 Feb 2019 21:16:31 +0900 Subject: [PATCH 025/100] vis_coco_point -> vis_keypoint_coco --- chainercv/visualizations/__init__.py | 2 +- ...vis_coco_point.py => vis_keypoint_coco.py} | 37 ++++++++++--------- 2 files changed, 21 insertions(+), 18 deletions(-) rename chainercv/visualizations/{vis_coco_point.py => vis_keypoint_coco.py} (83%) diff --git a/chainercv/visualizations/__init__.py b/chainercv/visualizations/__init__.py index 33ef5a9d1f..edc5c41286 100644 --- a/chainercv/visualizations/__init__.py +++ b/chainercv/visualizations/__init__.py @@ -1,5 +1,5 @@ from chainercv.visualizations.vis_bbox import vis_bbox # NOQA -from chainercv.visualizations.vis_coco_point import vis_coco_point # NOQA +from chainercv.visualizations.vis_keypoint_coco import vis_keypoint_coco # NOQA from chainercv.visualizations.vis_image import vis_image # NOQA from chainercv.visualizations.vis_instance_segmentation import vis_instance_segmentation # NOQA from chainercv.visualizations.vis_point import vis_point # NOQA diff --git a/chainercv/visualizations/vis_coco_point.py b/chainercv/visualizations/vis_keypoint_coco.py similarity index 83% rename from chainercv/visualizations/vis_coco_point.py rename to chainercv/visualizations/vis_keypoint_coco.py index 9666bbbfe6..b5559930ad 100644 --- a/chainercv/visualizations/vis_coco_point.py +++ b/chainercv/visualizations/vis_keypoint_coco.py @@ -41,7 +41,13 @@ ] -def vis_coco_point(img, point, point_score, thresh=2, ax=None): +def vis_keypoint_coco( + img, point, valid=None, + point_score=None, thresh=2, + markersize=3, linewidth=1, ax=None): + if valid.dtype != np.bool: + raise ValueError('The dtype of `valid` should be np.bool') + from matplotlib import pyplot as plt # Returns newly instantiated matplotlib.axes.Axes object if ax is None @@ -50,7 +56,13 @@ def vis_coco_point(img, point, point_score, thresh=2, ax=None): cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, len(coco_point_skeleton) + 2)] - # plt.autoscale(False) + if point_score is None: + point_score = np.inf * np.ones(point.shape[:2], dtype=np.float32) + + if valid is not None: + for i, vld in enumerate(valid): + point_score[i, np.logical_not(vld)] = -np.inf + for pnt, pnt_sc in zip(point, point_score): for l in range(len(coco_point_skeleton)): i0 = coco_point_skeleton[l][0] @@ -63,15 +75,16 @@ def vis_coco_point(img, point, point_score, thresh=2, ax=None): x1 = pnt[i1, 1] if s0 > thresh and s1 > thresh: line = ax.plot([x0, x1], [y0, y1]) - plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7) + plt.setp(line, color=colors[l], + linewidth=linewidth, alpha=0.7) if s0 > thresh: ax.plot( x0, y0, '.', color=colors[l], - markersize=3.0, alpha=0.7) + markersize=markersize, alpha=0.7) if s1 > thresh: ax.plot( x1, y1, '.', color=colors[l], - markersize=3.0, alpha=0.7) + markersize=markersize, alpha=0.7) # for better visualization, add mid shoulder / mid hip mid_shoulder = ( @@ -94,23 +107,13 @@ def vis_coco_point(img, point, point_score, thresh=2, ax=None): line = ax.plot(x, y) plt.setp( line, color=colors[len(coco_point_skeleton)], - linewidth=1.0, alpha=0.7) + linewidth=linewidth, alpha=0.7) if (mid_shoulder_sc > thresh and mid_hip_sc > thresh): y = [mid_shoulder[0], mid_hip[0]] x = [mid_shoulder[1], mid_hip[1]] line = ax.plot(x, y) plt.setp( line, color=colors[len(coco_point_skeleton) + 1], - linewidth=1.0, alpha=0.7) + linewidth=linewidth, alpha=0.7) return ax - - -if __name__ == '__main__': - data = np.load('vis_point.npz') - img = data['img'] - point = data['point'] - point_score = data['point_score'] - # plt.imshow(img) - vis_coco_point(img, point, point_score) - plt.show() From a48b2ba5df12f30ea99875ab035ff58627453eca Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 19 Feb 2019 21:20:53 +0900 Subject: [PATCH 026/100] change representation of coco_keypoint_names --- chainercv/datasets/__init__.py | 2 +- chainercv/datasets/coco/coco_utils.py | 40 ++++---- chainercv/visualizations/__init__.py | 2 +- chainercv/visualizations/vis_keypoint_coco.py | 92 ++++++++++--------- 4 files changed, 72 insertions(+), 64 deletions(-) diff --git a/chainercv/datasets/__init__.py b/chainercv/datasets/__init__.py index d6598093db..ce4030f419 100644 --- a/chainercv/datasets/__init__.py +++ b/chainercv/datasets/__init__.py @@ -16,7 +16,7 @@ from chainercv.datasets.coco.coco_semantic_segmentation_dataset import COCOSemanticSegmentationDataset # NOQA from chainercv.datasets.coco.coco_utils import coco_bbox_label_names # NOQA from chainercv.datasets.coco.coco_utils import coco_instance_segmentation_label_names # NOQA -from chainercv.datasets.coco.coco_utils import coco_point_names # NOQA +from chainercv.datasets.coco.coco_utils import coco_keypoint_names # NOQA from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_colors # NOQA from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_names # NOQA from chainercv.datasets.cub.cub_label_dataset import CUBLabelDataset # NOQA diff --git a/chainercv/datasets/coco/coco_utils.py b/chainercv/datasets/coco/coco_utils.py index f96ec5803c..aac12861fb 100644 --- a/chainercv/datasets/coco/coco_utils.py +++ b/chainercv/datasets/coco/coco_utils.py @@ -441,22 +441,24 @@ def get_coco(split, img_split, year, mode): coco_instance_segmentation_label_names = coco_bbox_label_names -coco_point_names = [ - 'nose', - 'left_eye', - 'right_eye', - 'left_ear', - 'right_ear', - 'left_shoulder', - 'right_shoulder', - 'left_elbow', - 'right_elbow', - 'left_wrist', - 'right_wrist', - 'left_hip', - 'right_hip', - 'left_knee', - 'right_knee', - 'left_ankle', - 'right_ankle' -] +coco_keypoint_names = { + 0: [ + 'nose', + 'left_eye', + 'right_eye', + 'left_ear', + 'right_ear', + 'left_shoulder', + 'right_shoulder', + 'left_elbow', + 'right_elbow', + 'left_wrist', + 'right_wrist', + 'left_hip', + 'right_hip', + 'left_knee', + 'right_knee', + 'left_ankle', + 'right_ankle' + ] +} diff --git a/chainercv/visualizations/__init__.py b/chainercv/visualizations/__init__.py index edc5c41286..bf77cf892c 100644 --- a/chainercv/visualizations/__init__.py +++ b/chainercv/visualizations/__init__.py @@ -1,6 +1,6 @@ from chainercv.visualizations.vis_bbox import vis_bbox # NOQA -from chainercv.visualizations.vis_keypoint_coco import vis_keypoint_coco # NOQA from chainercv.visualizations.vis_image import vis_image # NOQA from chainercv.visualizations.vis_instance_segmentation import vis_instance_segmentation # NOQA +from chainercv.visualizations.vis_keypoint_coco import vis_keypoint_coco # NOQA from chainercv.visualizations.vis_point import vis_point # NOQA from chainercv.visualizations.vis_semantic_segmentation import vis_semantic_segmentation # NOQA diff --git a/chainercv/visualizations/vis_keypoint_coco.py b/chainercv/visualizations/vis_keypoint_coco.py index b5559930ad..f750d23378 100644 --- a/chainercv/visualizations/vis_keypoint_coco.py +++ b/chainercv/visualizations/vis_keypoint_coco.py @@ -1,43 +1,44 @@ from __future__ import division -import matplotlib.pyplot as plt import numpy as np -from chainercv.datasets import coco_point_names +from chainercv.datasets import coco_keypoint_names from chainercv.visualizations.vis_image import vis_image +human_id = 0 + coco_point_skeleton = [ - [coco_point_names.index('left_eye'), - coco_point_names.index('right_eye')], - [coco_point_names.index('left_eye'), - coco_point_names.index('nose')], - [coco_point_names.index('right_eye'), - coco_point_names.index('nose')], - [coco_point_names.index('right_eye'), - coco_point_names.index('right_ear')], - [coco_point_names.index('left_eye'), - coco_point_names.index('left_ear')], - [coco_point_names.index('right_shoulder'), - coco_point_names.index('right_elbow')], - [coco_point_names.index('right_elbow'), - coco_point_names.index('right_wrist')], - [coco_point_names.index('left_shoulder'), - coco_point_names.index('left_elbow')], - [coco_point_names.index('left_elbow'), - coco_point_names.index('left_wrist')], - [coco_point_names.index('right_hip'), - coco_point_names.index('right_knee')], - [coco_point_names.index('right_knee'), - coco_point_names.index('right_ankle')], - [coco_point_names.index('left_hip'), - coco_point_names.index('left_knee')], - [coco_point_names.index('left_knee'), - coco_point_names.index('left_ankle')], - [coco_point_names.index('right_shoulder'), - coco_point_names.index('left_shoulder')], - [coco_point_names.index('right_hip'), - coco_point_names.index('left_hip')] + [coco_keypoint_names[human_id].index('left_eye'), + coco_keypoint_names[human_id].index('right_eye')], + [coco_keypoint_names[human_id].index('left_eye'), + coco_keypoint_names[human_id].index('nose')], + [coco_keypoint_names[human_id].index('right_eye'), + coco_keypoint_names[human_id].index('nose')], + [coco_keypoint_names[human_id].index('right_eye'), + coco_keypoint_names[human_id].index('right_ear')], + [coco_keypoint_names[human_id].index('left_eye'), + coco_keypoint_names[human_id].index('left_ear')], + [coco_keypoint_names[human_id].index('right_shoulder'), + coco_keypoint_names[human_id].index('right_elbow')], + [coco_keypoint_names[human_id].index('right_elbow'), + coco_keypoint_names[human_id].index('right_wrist')], + [coco_keypoint_names[human_id].index('left_shoulder'), + coco_keypoint_names[human_id].index('left_elbow')], + [coco_keypoint_names[human_id].index('left_elbow'), + coco_keypoint_names[human_id].index('left_wrist')], + [coco_keypoint_names[human_id].index('right_hip'), + coco_keypoint_names[human_id].index('right_knee')], + [coco_keypoint_names[human_id].index('right_knee'), + coco_keypoint_names[human_id].index('right_ankle')], + [coco_keypoint_names[human_id].index('left_hip'), + coco_keypoint_names[human_id].index('left_knee')], + [coco_keypoint_names[human_id].index('left_knee'), + coco_keypoint_names[human_id].index('left_ankle')], + [coco_keypoint_names[human_id].index('right_shoulder'), + coco_keypoint_names[human_id].index('left_shoulder')], + [coco_keypoint_names[human_id].index('right_hip'), + coco_keypoint_names[human_id].index('left_hip')] ] @@ -45,6 +46,9 @@ def vis_keypoint_coco( img, point, valid=None, point_score=None, thresh=2, markersize=3, linewidth=1, ax=None): + """Visualize bounding boxes inside image. + + """ if valid.dtype != np.bool: raise ValueError('The dtype of `valid` should be np.bool') @@ -88,22 +92,24 @@ def vis_keypoint_coco( # for better visualization, add mid shoulder / mid hip mid_shoulder = ( - pnt[coco_point_names.index('right_shoulder'), :2] + - pnt[coco_point_names.index('left_shoulder'), :2]) / 2 + pnt[coco_keypoint_names[human_id].index('right_shoulder'), :2] + + pnt[coco_keypoint_names[human_id].index('left_shoulder'), :2]) / 2 mid_shoulder_sc = np.minimum( - pnt_sc[coco_point_names.index('right_shoulder')], - pnt_sc[coco_point_names.index('left_shoulder')]) + pnt_sc[coco_keypoint_names[human_id].index('right_shoulder')], + pnt_sc[coco_keypoint_names[human_id].index('left_shoulder')]) mid_hip = ( - pnt[coco_point_names.index('right_hip'), :2] + - pnt[coco_point_names.index('left_hip'), :2]) / 2 + pnt[coco_keypoint_names[human_id].index('right_hip'), :2] + + pnt[coco_keypoint_names[human_id].index('left_hip'), :2]) / 2 mid_hip_sc = np.minimum( - pnt_sc[coco_point_names.index('right_hip')], - pnt_sc[coco_point_names.index('left_hip')]) + pnt_sc[coco_keypoint_names[human_id].index('right_hip')], + pnt_sc[coco_keypoint_names[human_id].index('left_hip')]) if (mid_shoulder_sc > thresh and - pnt_sc[coco_point_names.index('nose')] > thresh): - y = [mid_shoulder[0], pnt[coco_point_names.index('nose'), 0]] - x = [mid_shoulder[1], pnt[coco_point_names.index('nose'), 1]] + pnt_sc[coco_keypoint_names[human_id].index('nose')] > thresh): + y = [mid_shoulder[0], + pnt[coco_keypoint_names[human_id].index('nose'), 0]] + x = [mid_shoulder[1], + pnt[coco_keypoint_names[human_id].index('nose'), 1]] line = ax.plot(x, y) plt.setp( line, color=colors[len(coco_point_skeleton)], From deb9f9c245f1e7c946e0afb7c68f5a5e2fb52476 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Feb 2019 10:41:33 +0900 Subject: [PATCH 027/100] add doc --- .../datasets/coco/coco_keypoint_dataset.py | 53 +++++++++++++++++++ chainercv/visualizations/vis_keypoint_coco.py | 43 ++++++++++++++- docs/source/reference/datasets.rst | 4 ++ docs/source/reference/visualizations.rst | 4 ++ 4 files changed, 103 insertions(+), 1 deletion(-) diff --git a/chainercv/datasets/coco/coco_keypoint_dataset.py b/chainercv/datasets/coco/coco_keypoint_dataset.py index de40491100..f3d8c4434e 100644 --- a/chainercv/datasets/coco/coco_keypoint_dataset.py +++ b/chainercv/datasets/coco/coco_keypoint_dataset.py @@ -12,9 +12,62 @@ class COCOKeypointDataset(GetterDataset): + """Keypoint dataset for `MS COCO`_. + + This only returns annotation for objects categorized to the "person" + category. + + .. _`MS COCO`: http://cocodataset.org/#home + + Args: + data_dir (string): Path to the root of the training data. If this is + :obj:`auto`, this class will automatically download data for you + under :obj:`$CHAINER_DATASET_ROOT/pfnet/chainercv/coco`. + split ({'train', 'val'}): Select a split of the dataset. + year ({'2014', '2017'}): Use a dataset released in :obj:`year`. + use_crowded (bool): If true, use bounding boxes that are labeled as + crowded in the original annotation. The default value is + :obj:`False`. + return_area (bool): If true, this dataset returns areas of masks + around objects. The default value is :obj:`False`. + return_crowded (bool): If true, this dataset returns a boolean array + that indicates whether bounding boxes are labeled as crowded + or not. The default value is :obj:`False`. + + This dataset returns the following data. + + .. csv-table:: + :header: name, shape, dtype, format + + :obj:`img`, ":math:`(3, H, W)`", :obj:`float32`, \ + "RGB, :math:`[0, 255]`" + :obj:`point` [#coco_point_1]_, ":math:`(R, K, 2)`", :obj:`float32`, \ + ":math:`(y, x)`" + :obj:`valid` [#coco_point_1]_, ":math:`(R, K)`", :obj:`bool`, \ + "true when a keypoint is visible." + :obj:`bbox` [#coco_point_1]_, ":math:`(R, 4)`", :obj:`float32`, \ + ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`" + :obj:`label` [#coco_point_1]_, ":math:`(R,)`", :obj:`int32`, \ + ":math:`[0, \#fg\_class - 1]`" + :obj:`area` [#coco_point_1]_ [#coco_point_2]_, ":math:`(R,)`", \ + :obj:`float32`, -- + :obj:`crowded` [#coco_point_3]_, ":math:`(R,)`", :obj:`bool`, -- + + .. [#coco_point_1] If :obj:`use_crowded = True`, :obj:`point`, \ + :obj:`valid`, :obj:`bbox`, \ + :obj:`label` and :obj:`area` contain crowded instances. + .. [#coco_point_2] :obj:`area` is available \ + if :obj:`return_area = True`. + .. [#coco_point_3] :obj:`crowded` is available \ + if :obj:`return_crowded = True`. + + """ + def __init__(self, data_dir='auto', split='train', year='2017', use_crowded=False, return_area=False, return_crowded=False): + if split not in ['train', 'val']: + raise ValueError('Unsupported split is given.') super(COCOKeypointDataset, self).__init__() self.use_crowded = use_crowded if data_dir == 'auto': diff --git a/chainercv/visualizations/vis_keypoint_coco.py b/chainercv/visualizations/vis_keypoint_coco.py index f750d23378..d977a59bf9 100644 --- a/chainercv/visualizations/vis_keypoint_coco.py +++ b/chainercv/visualizations/vis_keypoint_coco.py @@ -46,7 +46,48 @@ def vis_keypoint_coco( img, point, valid=None, point_score=None, thresh=2, markersize=3, linewidth=1, ax=None): - """Visualize bounding boxes inside image. + """Visualize keypoints organized as in COCO. + + Example: + + >>> from chainercv.datasets import COCOKeypointDataset + >>> from chainercv.visualizations import vis_keypoint_coco + >>> import matplotlib.pyplot as plt + >>> data = COCOKeypointDataset(split='val') + >>> img, point, valid = data[10][:3] + >>> vis_keypoint_coco(img, point, valid) + >>> plt.show() + + Args: + img (~numpy.ndarray): See the table below. + If this is :obj:`None`, no image is displayed. + point (~numpy.ndarray): See the table below. + valid (~numpy.ndarray): See the table below. If this is + :obj:`None`, all points are assumed to be visible. + point_score (~numpy.ndarray): See the table below. If this + is :obj:`None`, the confidence of all points is infinitely + large. + thresh (float): Points with confidence below :obj:`thresh` are + not visualized. + markersize (float): The size of vertices. + linewidth (float): The thickness of edges. + ax (matplotlib.axes.Axis): The visualization is displayed on this + axis. If this is :obj:`None` (default), a new axis is created. + + .. csv-table:: + :header: name, shape, dtype, format + + :obj:`img`, ":math:`(3, H, W)`", :obj:`float32`, \ + "RGB, :math:`[0, 255]`" + :obj:`point`, ":math:`(R, K, 2)`", :obj:`float32`, \ + ":math:`(y, x)`" + :obj:`valid`, ":math:`(R, K)`", :obj:`bool`, \ + "true when a keypoint is visible." + :obj:`point_score`, ":math:`(R, K)`", :obj:`float32`, -- + + Returns: + ~matploblib.axes.Axes: + Returns the Axes object with the plot for further tweaking. """ if valid.dtype != np.bool: diff --git a/docs/source/reference/datasets.rst b/docs/source/reference/datasets.rst index ebf878354e..276c3249d9 100644 --- a/docs/source/reference/datasets.rst +++ b/docs/source/reference/datasets.rst @@ -73,6 +73,10 @@ COCOInstanceSegmentationDataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: COCOInstanceSegmentationDataset +COCOKeypointDataset +~~~~~~~~~~~~~~~~~~~ +.. autoclass:: COCOKeypointDataset + COCOSemanticSegmentationDataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: COCOSemanticSegmentationDataset diff --git a/docs/source/reference/visualizations.rst b/docs/source/reference/visualizations.rst index 685b498e43..c316209839 100644 --- a/docs/source/reference/visualizations.rst +++ b/docs/source/reference/visualizations.rst @@ -12,6 +12,10 @@ vis_image ~~~~~~~~~ .. autofunction:: vis_image +vis_keypoint_coco +~~~~~~~~~~~~~~~~~ +.. autofunction:: vis_keypoint_coco + vis_instance_segmentation ~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: vis_instance_segmentation From 98301e3390fa74e39b26dd0717aed3268a263002 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Feb 2019 11:16:01 +0900 Subject: [PATCH 028/100] add test for vis_keypoint_coco --- chainercv/visualizations/vis_keypoint_coco.py | 11 ++- .../test_vis_keypoint_coco.py | 97 +++++++++++++++++++ 2 files changed, 105 insertions(+), 3 deletions(-) create mode 100644 tests/visualizations_tests/test_vis_keypoint_coco.py diff --git a/chainercv/visualizations/vis_keypoint_coco.py b/chainercv/visualizations/vis_keypoint_coco.py index d977a59bf9..58624fe35c 100644 --- a/chainercv/visualizations/vis_keypoint_coco.py +++ b/chainercv/visualizations/vis_keypoint_coco.py @@ -90,9 +90,6 @@ def vis_keypoint_coco( Returns the Axes object with the plot for further tweaking. """ - if valid.dtype != np.bool: - raise ValueError('The dtype of `valid` should be np.bool') - from matplotlib import pyplot as plt # Returns newly instantiated matplotlib.axes.Axes object if ax is None @@ -103,8 +100,16 @@ def vis_keypoint_coco( if point_score is None: point_score = np.inf * np.ones(point.shape[:2], dtype=np.float32) + if point_score.shape != point.shape[:2]: + raise ValueError('Mismatch in the number of instances or joints.') + if point.shape[1:] != (len(coco_keypoint_names[human_id]), 2): + raise ValueError('point has invalid shape') if valid is not None: + if valid.dtype != np.bool: + raise ValueError('The dtype of `valid` should be np.bool') + if valid.shape != point.shape[:2]: + raise ValueError('Mismatch in the number of instances or joints.') for i, vld in enumerate(valid): point_score[i, np.logical_not(vld)] = -np.inf diff --git a/tests/visualizations_tests/test_vis_keypoint_coco.py b/tests/visualizations_tests/test_vis_keypoint_coco.py new file mode 100644 index 0000000000..0a80260ec2 --- /dev/null +++ b/tests/visualizations_tests/test_vis_keypoint_coco.py @@ -0,0 +1,97 @@ +import unittest + +import numpy as np + +from chainer import testing + +from chainercv.datasets import coco_keypoint_names +from chainercv.visualizations import vis_keypoint_coco + +try: + import matplotlib # NOQA + _available = True +except ImportError: + _available = False + + +human_id = 0 + + +def _generate_point(n_inst, size): + H, W = size + n_joint = len(coco_keypoint_names[human_id]) + ys = np.random.uniform(0, H, size=(n_inst, n_joint)) + xs = np.random.uniform(0, W, size=(n_inst, n_joint)) + point = np.stack((ys, xs), axis=2).astype(np.float32) + + valid = np.random.randint(0, 2, size=(n_inst, n_joint)).astype(np.bool) + + point_score = np.random.uniform( + 0, 6, size=(n_inst, n_joint)).astype(np.float32) + return point, valid, point_score + + +@testing.parameterize(*testing.product({ + 'n_inst': [3, 0], + 'use_img': [False, True], + 'use_valid': [False, True], + 'use_point_score': [False, True] +})) +@unittest.skipUnless(_available, 'matplotlib is not installed') +class TestVisKeypointCOCO(unittest.TestCase): + + def setUp(self): + size = (32, 48) + self.point, valid, point_score = _generate_point(self.n_inst, size) + self.img = (np.random.randint( + 0, 255, size=(3,) + size).astype(np.float32) + if self.use_img else None) + self.valid = valid if self.use_valid else None + self.point_score = point_score if self.use_point_score else None + + def test_vis_keypoint_coco(self): + ax = vis_keypoint_coco( + self.img, self.point, self.valid, + self.point_score) + + self.assertIsInstance(ax, matplotlib.axes.Axes) + + +@unittest.skipUnless(_available, 'matplotlib is not installed') +class TestVisKeypointCOCOInvalidInputs(unittest.TestCase): + + def setUp(self): + size = (32, 48) + n_inst = 10 + self.point, self.valid, self.point_score = _generate_point( + n_inst, size) + self.img = np.random.randint( + 0, 255, size=(3,) + size).astype(np.float32) + + def _check(self, img, point, valid, point_score): + with self.assertRaises(ValueError): + vis_keypoint_coco(img, point, valid, point_score) + + def test_invalid_n_inst_point(self): + self._check(self.img, self.point[:5], self.valid, self.point_score) + + def test_invalid_n_inst_valid(self): + self._check(self.img, self.point, self.valid[:5], self.point_score) + + def test_invalid_n_inst_point_score(self): + self._check(self.img, self.point, self.valid, self.point_score[:5]) + + def test_invalid_n_joint_point(self): + self._check(self.img, self.point[:, :15], self.valid, self.point_score) + + def test_invalid_n_joint_valid(self): + self._check(self.img, self.point, self.valid[:, :15], self.point_score) + + def test_invalid_n_joint_point_score(self): + self._check(self.img, self.point, self.valid, self.point_score[:, :15]) + + def test_invalid_valid_dtype(self): + self._check(self.img, self.point, self.valid.astype(np.int32), + self.point_score) + +testing.run_module(__name__, __file__) From b63254e4977608e32e7cb2d80868b322a526ed2f Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Feb 2019 14:57:26 +0900 Subject: [PATCH 029/100] change name and add test --- chainercv/evaluations/__init__.py | 2 +- ...oco.py => eval_keypoint_detection_coco.py} | 17 +- docs/source/reference/evaluations.rst | 4 + .../test_eval_keypoint_detection_coco.py | 171 ++++++++++++++++++ .../evaluations_tests/test_eval_point_coco.py | 136 -------------- 5 files changed, 186 insertions(+), 144 deletions(-) rename chainercv/evaluations/{eval_point_coco.py => eval_keypoint_detection_coco.py} (93%) create mode 100644 tests/evaluations_tests/test_eval_keypoint_detection_coco.py delete mode 100644 tests/evaluations_tests/test_eval_point_coco.py diff --git a/chainercv/evaluations/__init__.py b/chainercv/evaluations/__init__.py index b3937cebfd..53017c6bb1 100644 --- a/chainercv/evaluations/__init__.py +++ b/chainercv/evaluations/__init__.py @@ -5,7 +5,7 @@ from chainercv.evaluations.eval_instance_segmentation_coco import eval_instance_segmentation_coco # NOQA from chainercv.evaluations.eval_instance_segmentation_voc import calc_instance_segmentation_voc_prec_rec # NOQA from chainercv.evaluations.eval_instance_segmentation_voc import eval_instance_segmentation_voc # NOQA -from chainercv.evaluations.eval_point_coco import eval_point_coco # NOQA +from chainercv.evaluations.eval_keypoint_detection_coco import eval_keypoint_detection_coco # NOQA from chainercv.evaluations.eval_semantic_segmentation import calc_semantic_segmentation_confusion # NOQA from chainercv.evaluations.eval_semantic_segmentation import calc_semantic_segmentation_iou # NOQA from chainercv.evaluations.eval_semantic_segmentation import eval_semantic_segmentation # NOQA diff --git a/chainercv/evaluations/eval_point_coco.py b/chainercv/evaluations/eval_keypoint_detection_coco.py similarity index 93% rename from chainercv/evaluations/eval_point_coco.py rename to chainercv/evaluations/eval_keypoint_detection_coco.py index 68f3e00975..50e573ed57 100644 --- a/chainercv/evaluations/eval_point_coco.py +++ b/chainercv/evaluations/eval_keypoint_detection_coco.py @@ -14,9 +14,10 @@ _available = False -def eval_point_coco(pred_points, pred_labels, pred_scores, - gt_points, gt_is_valids, gt_bboxes, gt_labels, - gt_areas, gt_crowdeds=None): +def eval_keypoint_detection_coco( + pred_points, pred_labels, pred_scores, + gt_points, gt_valids, gt_bboxes=None, gt_labels=None, + gt_areas=None, gt_crowdeds=None): if not _available: raise ValueError( 'Please install pycocotools \n' @@ -30,10 +31,10 @@ def eval_point_coco(pred_points, pred_labels, pred_scores, pred_labels = iter(pred_labels) pred_scores = iter(pred_scores) gt_points = iter(gt_points) - gt_is_valids = iter(gt_is_valids) - gt_bboxes = iter(gt_bboxes) + gt_valids = iter(gt_valids) + gt_bboxes = (iter(gt_bboxes) if gt_bboxes is not None + else itertools.repeat(None)) gt_labels = iter(gt_labels) - if gt_areas is None: compute_area_dependent_metrics = False gt_areas = itertools.repeat(None) @@ -51,8 +52,10 @@ def eval_point_coco(pred_points, pred_labels, pred_scores, gt_bbox, gt_label, gt_area, gt_crowded) in enumerate(six.moves.zip( pred_points, pred_labels, pred_scores, - gt_points, gt_is_valids, gt_bboxes, gt_labels, + gt_points, gt_valids, gt_bboxes, gt_labels, gt_areas, gt_crowdeds)): + if gt_bbox is None: + gt_bbox = itertools.repeat(None) if gt_area is None: gt_area = itertools.repeat(None) if gt_crowded is None: diff --git a/docs/source/reference/evaluations.rst b/docs/source/reference/evaluations.rst index 2befc38e47..553f1b52f6 100644 --- a/docs/source/reference/evaluations.rst +++ b/docs/source/reference/evaluations.rst @@ -45,6 +45,10 @@ calc_instance_segmentation_voc_prec_rec ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autofunction:: calc_instance_segmentation_voc_prec_rec +Keypoint Detection COCO +----------------------- +.. autofunction:: eval_keypoint_detection_coco + Semantic Segmentation IoU ------------------------- diff --git a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py new file mode 100644 index 0000000000..4e3ece2955 --- /dev/null +++ b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py @@ -0,0 +1,171 @@ +import numpy as np +import os +from six.moves.urllib import request +import unittest + +from chainer import testing + +from chainercv.datasets import coco_keypoint_names +from chainercv.evaluations import eval_keypoint_detection_coco + +try: + import pycocotools # NOQA + _available = True +except ImportError: + _available = False + + +human_id = 0 + + +def _generate_point(n_inst, size): + H, W = size + n_joint = len(coco_keypoint_names[human_id]) + ys = np.random.uniform(0, H, size=(n_inst, n_joint)) + xs = np.random.uniform(0, W, size=(n_inst, n_joint)) + point = np.stack((ys, xs), axis=2).astype(np.float32) + + valid = np.random.randint(0, 2, size=(n_inst, n_joint)).astype(np.bool) + return point, valid + + +@unittest.skipUnless(_available, 'pycocotools is not installed') +class TestEvalPointCOCOSingleClass(unittest.TestCase): + + n_inst = 3 + + def setUp(self): + self.pred_points = [] + self.pred_labels = [] + self.pred_scores = [] + self.gt_points = [] + self.gt_valids = [] + self.gt_bboxes = [] + self.gt_labels = [] + for i in range(2): + point, valid = _generate_point(self.n_inst, (32, 48)) + self.pred_points.append(point) + self.pred_labels.append(np.zeros((self.n_inst,), dtype=np.int32)) + self.pred_scores.append(np.random.uniform( + 0.5, 1, size=(self.n_inst,)).astype(np.float32)) + self.gt_points.append(point) + self.gt_valids.append(valid) + bbox = np.zeros((self.n_inst, 4), dtype=np.float32) + for i, pnt in enumerate(point): + y_min = np.min(pnt[:, 0]) + x_min = np.min(pnt[:, 1]) + y_max = np.max(pnt[:, 0]) + x_max = np.max(pnt[:, 1]) + bbox[i] = [y_min, x_min, y_max, x_max] + self.gt_bboxes.append(bbox) + self.gt_labels.append(np.zeros((self.n_inst,), dtype=np.int32)) + + def _check(self, result): + self.assertEqual(result['map/iou=0.50:0.95/area=all/max_dets=20'], 1) + self.assertEqual(result['map/iou=0.50/area=all/max_dets=20'], 1) + self.assertEqual(result['map/iou=0.75/area=all/max_dets=20'], 1) + self.assertEqual(result['mar/iou=0.50:0.95/area=all/max_dets=20'], 1) + self.assertEqual(result['mar/iou=0.50/area=all/max_dets=20'], 1) + self.assertEqual(result['mar/iou=0.75/area=all/max_dets=20'], 1) + + def test_gt_bboxes_not_supplied(self): + result = eval_keypoint_detection_coco( + self.pred_points, self.pred_labels, self.pred_scores, + self.gt_points, self.gt_valids, None, self.gt_labels) + self._check(result) + + def test_area_not_supplied(self): + result = eval_keypoint_detection_coco( + self.pred_points, self.pred_labels, self.pred_scores, + self.gt_points, self.gt_valids, self.gt_bboxes, self.gt_labels) + self._check(result) + + self.assertFalse( + 'map/iou=0.50:0.95/area=medium/max_dets=20' in result) + self.assertFalse( + 'map/iou=0.50:0.95/area=large/max_dets=20' in result) + self.assertFalse( + 'mar/iou=0.50:0.95/area=medium/max_dets=20' in result) + self.assertFalse( + 'mar/iou=0.50:0.95/area=large/max_dets=20' in result) + + def test_area_supplied(self): + gt_areas = [[100] * self.n_inst for _ in range(2)] + result = eval_keypoint_detection_coco( + self.pred_points, self.pred_labels, self.pred_scores, + self.gt_points, self.gt_valids, self.gt_bboxes, self.gt_labels, + gt_areas=gt_areas, + ) + self._check(result) + self.assertTrue( + 'map/iou=0.50:0.95/area=medium/max_dets=20' in result) + self.assertTrue( + 'map/iou=0.50:0.95/area=large/max_dets=20' in result) + self.assertTrue( + 'mar/iou=0.50:0.95/area=medium/max_dets=20' in result) + self.assertTrue( + 'mar/iou=0.50:0.95/area=large/max_dets=20' in result) + + def test_crowded_supplied(self): + gt_crowdeds = [[True] * self.n_inst for _ in range(2)] + result = eval_keypoint_detection_coco( + self.pred_points, self.pred_labels, self.pred_scores, + self.gt_points, self.gt_valids, self.gt_bboxes, self.gt_labels, + gt_crowdeds=gt_crowdeds, + ) + # When the only ground truth is crowded, nothing is evaluated. + # In that case, all the results are nan. + self.assertTrue( + np.isnan(result['map/iou=0.50:0.95/area=all/max_dets=20'])) + + +@unittest.skipUnless(_available, 'pycocotools is not installed') +class TestEvalKeypointDetectionCOCO(unittest.TestCase): + + @classmethod + def setUpClass(cls): + base_url = 'https://chainercv-models.preferred.jp/tests' + + cls.dataset = np.load(request.urlretrieve(os.path.join( + base_url, + 'eval_keypoint_detection_coco_dataset_2019_02_20.npz'))[0]) + cls.result = np.load(request.urlretrieve(os.path.join( + base_url, + 'eval_keypoint_detection_coco_result_2019_02_20.npz'))[0]) + + def test_eval_keypoint_detection_coco(self): + pred_points = self.result['points'] + pred_labels = self.result['labels'] + pred_scores = self.result['scores'] + + gt_points = self.dataset['points'] + gt_valids = self.dataset['valids'] + gt_bboxes = self.dataset['bboxes'] + gt_labels = self.dataset['labels'] + gt_areas = self.dataset['areas'] + gt_crowdeds = self.dataset['crowdeds'] + + result = eval_keypoint_detection_coco( + pred_points, pred_labels, pred_scores, + gt_points, gt_valids, gt_bboxes, + gt_labels, gt_areas, gt_crowdeds) + + expected = { + 'map/iou=0.50:0.95/area=all/max_dets=20': 0.37733572721481323, + 'map/iou=0.50/area=all/max_dets=20': 0.6448841691017151, + 'map/iou=0.75/area=all/max_dets=20': 0.35469090938568115, + 'map/iou=0.50:0.95/area=medium/max_dets=20': 0.3894105851650238, + 'map/iou=0.50:0.95/area=large/max_dets=20': 0.39169296622276306, + 'mar/iou=0.50:0.95/area=all/max_dets=20': 0.5218977928161621, + 'mar/iou=0.50/area=all/max_dets=20': 0.7445255517959595, + 'mar/iou=0.75/area=all/max_dets=20': 0.510948896408081, + 'mar/iou=0.50:0.95/area=medium/max_dets=20': 0.5150684714317322, + 'mar/iou=0.50:0.95/area=large/max_dets=20': 0.5296875238418579, + } + + for key, item in expected.items(): + np.testing.assert_almost_equal( + result[key], expected[key], decimal=5) + + +testing.run_module(__name__, __file__) diff --git a/tests/evaluations_tests/test_eval_point_coco.py b/tests/evaluations_tests/test_eval_point_coco.py deleted file mode 100644 index bc2095eefd..0000000000 --- a/tests/evaluations_tests/test_eval_point_coco.py +++ /dev/null @@ -1,136 +0,0 @@ -import numpy as np -import os -from six.moves.urllib import request -import unittest - -from chainer import testing - -from chainercv.evaluations import eval_point_coco - -try: - import pycocotools # NOQA - _available = True -except ImportError: - _available = False - - -# @unittest.skipUnless(_available, 'pycocotools is not installed') -# class TestEvalPointCOCOSingleClass(unittest.TestCase): -# -# def setUp(self): -# self.pred_bboxes = np.array([[[0, 0, 10, 10], [0, 0, 20, 20]]]) -# self.pred_labels = np.array([[0, 0]]) -# self.pred_scores = np.array([[0.8, 0.9]]) -# self.gt_bboxes = np.array([[[0, 0, 10, 9]]]) -# self.gt_labels = np.array([[0, 0]]) -# -# def test_crowded(self): -# result = eval_detection_coco(self.pred_bboxes, self.pred_labels, -# self.pred_scores, -# self.gt_bboxes, self.gt_labels, -# gt_crowdeds=[[True]]) -# # When the only ground truth is crowded, nothing is evaluated. -# # In that case, all the results are nan. -# self.assertTrue( -# np.isnan(result['map/iou=0.50:0.95/area=all/max_dets=100'])) -# self.assertTrue( -# np.isnan(result['map/iou=0.50/area=all/max_dets=100'])) -# self.assertTrue( -# np.isnan(result['map/iou=0.75/area=all/max_dets=100'])) -# -# def test_area_not_supplied(self): -# result = eval_detection_coco(self.pred_bboxes, self.pred_labels, -# self.pred_scores, -# self.gt_bboxes, self.gt_labels) -# self.assertFalse( -# 'map/iou=0.50:0.95/area=small/max_dets=100' in result) -# self.assertFalse( -# 'map/iou=0.50:0.95/area=medium/max_dets=100' in result) -# self.assertFalse( -# 'map/iou=0.50:0.95/area=large/max_dets=100' in result) -# -# def test_area_specified(self): -# result = eval_detection_coco(self.pred_bboxes, self.pred_labels, -# self.pred_scores, -# self.gt_bboxes, self.gt_labels, -# gt_areas=[[2048]]) -# self.assertFalse( -# np.isnan(result['map/iou=0.50:0.95/area=medium/max_dets=100'])) -# self.assertTrue( -# np.isnan(result['map/iou=0.50:0.95/area=small/max_dets=100'])) -# self.assertTrue( -# np.isnan(result['map/iou=0.50:0.95/area=large/max_dets=100'])) - - -# @unittest.skipUnless(_available, 'pycocotools is not installed') -# class TestEvalPointCOCOSomeClassNonExistent(unittest.TestCase): -# -# def setUp(self): -# self.pred_bboxes = np.array([[[0, 0, 10, 10], [0, 0, 20, 20]]]) -# self.pred_labels = np.array([[1, 2]]) -# self.pred_scores = np.array([[0.8, 0.9]]) -# self.gt_bboxes = np.array([[[0, 0, 10, 9]]]) -# self.gt_labels = np.array([[1, 2]]) -# -# def test(self): -# result = eval_detection_coco(self.pred_bboxes, self.pred_labels, -# self.pred_scores, -# self.gt_bboxes, self.gt_labels) -# self.assertEqual( -# result['ap/iou=0.50:0.95/area=all/max_dets=100'].shape, (3,)) -# self.assertTrue( -# np.isnan(result['ap/iou=0.50:0.95/area=all/max_dets=100'][0])) -# self.assertEqual( -# np.nanmean(result['ap/iou=0.50:0.95/area=all/max_dets=100'][1:]), -# result['map/iou=0.50:0.95/area=all/max_dets=100']) -# - -@unittest.skipUnless(_available, 'pycocotools is not installed') -class TestEvalPointCOCO(unittest.TestCase): - - @classmethod - def setUpClass(cls): - base_url = 'https://chainercv-models.preferred.jp/tests' - - cls.dataset = np.load(request.urlretrieve(os.path.join( - base_url, 'eval_point_coco_dataset_2019_02_18.npz'))[0]) - cls.result = np.load(request.urlretrieve(os.path.join( - base_url, 'eval_point_coco_result_2019_02_18.npz'))[0]) - - def test_eval_detection_coco(self): - pred_points = self.result['points'] - pred_labels = self.result['labels'] - pred_scores = self.result['scores'] - - gt_points = self.dataset['points'] - gt_is_valids = self.dataset['is_valids'] - gt_bboxes = self.dataset['bboxes'] - gt_labels = self.dataset['labels'] - gt_areas = self.dataset['areas'] - gt_crowdeds = self.dataset['crowdeds'] - - result = eval_point_coco( - pred_points, pred_labels, pred_scores, - gt_points, gt_is_valids, gt_bboxes, - gt_labels, gt_areas, gt_crowdeds) - - - expected = { - 'map/iou=0.50:0.95/area=all/max_dets=20': 0.37733572721481323, - 'map/iou=0.50/area=all/max_dets=20': 0.6448841691017151, - 'map/iou=0.75/area=all/max_dets=20': 0.35469090938568115, - 'map/iou=0.50:0.95/area=medium/max_dets=20': 0.3894105851650238, - 'map/iou=0.50:0.95/area=large/max_dets=20': 0.39169296622276306, - 'mar/iou=0.50:0.95/area=all/max_dets=20': 0.5218977928161621, - 'mar/iou=0.50/area=all/max_dets=20': 0.7445255517959595, - 'mar/iou=0.75/area=all/max_dets=20': 0.510948896408081, - 'mar/iou=0.50:0.95/area=medium/max_dets=20': 0.5150684714317322, - 'mar/iou=0.50:0.95/area=large/max_dets=20': 0.5296875238418579, - } - - for key, item in expected.items(): - np.testing.assert_almost_equal( - result[key], expected[key], decimal=5) - - -testing.run_module(__name__, __file__) From b1f6454abcb5fa4242524b1a58a7cbb359ee012d Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Feb 2019 15:37:43 +0900 Subject: [PATCH 030/100] update doc --- .../eval_keypoint_detection_coco.py | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/chainercv/evaluations/eval_keypoint_detection_coco.py b/chainercv/evaluations/eval_keypoint_detection_coco.py index 50e573ed57..6ff5692bc9 100644 --- a/chainercv/evaluations/eval_keypoint_detection_coco.py +++ b/chainercv/evaluations/eval_keypoint_detection_coco.py @@ -18,6 +18,122 @@ def eval_keypoint_detection_coco( pred_points, pred_labels, pred_scores, gt_points, gt_valids, gt_bboxes=None, gt_labels=None, gt_areas=None, gt_crowdeds=None): + """Evaluate keypoint detection based on evaluation code of MS COCO. + + This function evaluates predicted keypints obtained by using average + precision for each class. + The code is based on the evaluation code used in MS COCO. + + Args: + pred_points (iterable of numpy.ndarray): See the table below. + pred_labels (iterable of numpy.ndarray): See the table below. + pred_scores (iterable of numpy.ndarray): See the table below. + This is used to rank instances. Note that this is not + the confidene for each keypoint. + gt_points (iterable of numpy.ndarray): See the table below. + gt_valids (iterable of numpy.ndarray): See the table below. + gt_bboxes (iterable of numpy.ndarray): See the table below. + This is optional. If this is :obj:`None`, the ground truth + bounding boxes are esitmated from the ground truth + keypoints. + gt_labels (iterable of numpy.ndarray): See the table below. + gt_areas (iterable of numpy.ndarray): See the table below. If + :obj:`None`, some scores are not returned. + gt_crowdeds (iterable of numpy.ndarray): See the table below. + + .. csv-table:: + :header: name, shape, dtype, format + + :obj:`pred_points`, ":math:`[(R, K, 2)]`", :obj:`float32`, \ + ":math:`(y, x)`" + :obj:`pred_labels`, ":math:`[(R,)]`", :obj:`int32`, \ + ":math:`[0, \#fg\_class - 1]`" + :obj:`pred_scores`, ":math:`[(R,)]`", :obj:`float32`, \ + -- + :obj:`gt_points`, ":math:`[(R, K, 2)]`", :obj:`float32`, \ + ":math:`(y, x)`" + :obj:`gt_valids`, ":math:`[(R, K)]`", :obj:`bool`, -- + :obj:`gt_bboxes`, ":math:`[(R, 4)]`", :obj:`float32`, \ + ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`" + :obj:`gt_labels`, ":math:`[(R,)]`", :obj:`int32`, \ + ":math:`[0, \#fg\_class - 1]`" + :obj:`gt_areas`, ":math:`[(R,)]`", \ + :obj:`float32`, -- + :obj:`gt_crowdeds`, ":math:`[(R,)]`", :obj:`bool`, -- + + + Returns: + dict: + + The keys, value-types and the description of the values are listed + below. The APs and ARs calculated with different iou + thresholds, sizes of objects, and numbers of detections + per image. For more details on the 12 patterns of evaluation metrics, + please refer to COCO's official `evaluation page`_. + + .. csv-table:: + :header: key, type, description + + ap/iou=0.50:0.95/area=all/max_dets=20, *numpy.ndarray*, \ + [#coco_kp_eval_1]_ + ap/iou=0.50/area=all/max_dets=20, *numpy.ndarray*, \ + [#coco_kp_eval_1]_ + ap/iou=0.75/area=all/max_dets=20, *numpy.ndarray*, \ + [#coco_kp_eval_1]_ + ap/iou=0.50:0.95/area=medium/max_dets=20, *numpy.ndarray*, \ + [#coco_kp_eval_1]_ [#coco_kp_eval_5]_ + ap/iou=0.50:0.95/area=large/max_dets=20, *numpy.ndarray*, \ + [#coco_kp_eval_1]_ [#coco_kp_eval_5]_ + ar/iou=0.50:0.95/area=all/max_dets=20, *numpy.ndarray*, \ + [#coco_kp_eval_2]_ + ar/iou=0.50/area=all/max_dets=20, *numpy.ndarray*, \ + [#coco_kp_eval_2]_ + ar/iou=0.75/area=all/max_dets=20, *numpy.ndarray*, \ + [#coco_kp_eval_2]_ + ar/iou=0.50:0.95/area=medium/max_dets=20, *numpy.ndarray*, \ + [#coco_kp_eval_2]_ [#coco_kp_eval_5]_ + ar/iou=0.50:0.95/area=large/max_dets=20, *numpy.ndarray*, \ + [#coco_kp_eval_2]_ [#coco_kp_eval_5]_ + map/iou=0.50:0.95/area=all/max_dets=20, *float*, \ + [#coco_kp_eval_3]_ + map/iou=0.50/area=all/max_dets=20, *float*, \ + [#coco_kp_eval_3]_ + map/iou=0.75/area=all/max_dets=20, *float*, \ + [#coco_kp_eval_3]_ + map/iou=0.50:0.95/area=medium/max_dets=20, *float*, \ + [#coco_kp_eval_3]_ [#coco_kp_eval_5]_ + map/iou=0.50:0.95/area=large/max_dets=20, *float*, \ + [#coco_kp_eval_3]_ [#coco_kp_eval_5]_ + mar/iou=0.50:0.95/area=all/max_dets=20, *float*, \ + [#coco_kp_eval_4]_ + mar/iou=0.50/area=all/max_dets=20, *float*, \ + [#coco_kp_eval_4]_ + mar/iou=0.75/area=all/max_dets=20, *float*, \ + [#coco_kp_eval_4]_ + mar/iou=0.50:0.95/area=medium/max_dets=20, *float*, \ + [#coco_kp_eval_4]_ [#coco_kp_eval_5]_ + mar/iou=0.50:0.95/area=large/max_dets=20, *float*, \ + [#coco_kp_eval_4]_ [#coco_kp_eval_5]_ + coco_eval, *pycocotools.cocoeval.COCOeval*, \ + result from :obj:`pycocotools` + existent_labels, *numpy.ndarray*, \ + used labels \ + + .. [#coco_kp_eval_1] An array of average precisions. \ + The :math:`l`-th value corresponds to the average precision \ + for class :math:`l`. If class :math:`l` does not exist in \ + either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \ + value is set to :obj:`numpy.nan`. + .. [#coco_kp_eval_2] An array of average recalls. \ + The :math:`l`-th value corresponds to the average precision \ + for class :math:`l`. If class :math:`l` does not exist in \ + either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \ + value is set to :obj:`numpy.nan`. + .. [#coco_kp_eval_3] The average of average precisions over classes. + .. [#coco_kp_eval_4] The average of average recalls over classes. + .. [#coco_kp_eval_5] Skip if :obj:`gt_areas` is :obj:`None`. + + """ if not _available: raise ValueError( 'Please install pycocotools \n' From 9cc885ab04b19221b53ce47b6c8f053b25942fcf Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Feb 2019 21:19:11 +0900 Subject: [PATCH 031/100] use Conv2DActiv --- chainercv/links/model/mask_rcnn/mask_head.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py index 5f6e5c03d2..d1436785c1 100644 --- a/chainercv/links/model/mask_rcnn/mask_head.py +++ b/chainercv/links/model/mask_rcnn/mask_head.py @@ -11,6 +11,7 @@ from chainer.initializers import HeNormal import chainer.links as L +from chainercv.links import Conv2DActiv from chainercv.transforms.image.resize import resize from chainercv.utils.bbox.bbox_iou import bbox_iou @@ -36,10 +37,10 @@ def __init__(self, n_class, scales): initialW = HeNormal(1, fan_option='fan_out') with self.init_scope(): - self.conv1 = L.Convolution2D(256, 3, pad=1, initialW=initialW) - self.conv2 = L.Convolution2D(256, 3, pad=1, initialW=initialW) - self.conv3 = L.Convolution2D(256, 3, pad=1, initialW=initialW) - self.conv4 = L.Convolution2D(256, 3, pad=1, initialW=initialW) + self.conv1 = Conv2DActiv(256, 3, pad=1, initialW=initialW) + self.conv2 = Conv2DActiv(256, 3, pad=1, initialW=initialW) + self.conv3 = Conv2DActiv(256, 3, pad=1, initialW=initialW) + self.conv4 = Conv2DActiv(256, 3, pad=1, initialW=initialW) self.conv5 = L.Deconvolution2D( 256, 2, pad=0, stride=2, initialW=initialW) self.seg = L.Convolution2D(n_class, 1, pad=0, initialW=initialW) @@ -66,10 +67,10 @@ def __call__(self, hs, rois, roi_indices): return segs h = F.concat(pooled_hs, axis=0) - h = F.relu(self.conv1(h)) - h = F.relu(self.conv2(h)) - h = F.relu(self.conv3(h)) - h = F.relu(self.conv4(h)) + h = self.conv1(h) + h = self.conv2(h) + h = self.conv3(h) + h = self.conv4(h) h = F.relu(self.conv5(h)) return self.seg(h) From 5ffd8b3d27ad4bf4d264a7982d3e6e03b849a27c Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 21 Feb 2019 16:40:54 +0900 Subject: [PATCH 032/100] change interface of point/transforms --- chainercv/transforms/point/flip_point.py | 39 +++++++++++++------ chainercv/transforms/point/resize_point.py | 33 +++++++++++----- chainercv/transforms/point/translate_point.py | 30 +++++++++----- .../point_tests/test_flip_point.py | 24 ++++++++++-- .../point_tests/test_resize_point.py | 18 +++++++-- .../point_tests/test_translate_point.py | 19 +++++++-- 6 files changed, 120 insertions(+), 43 deletions(-) diff --git a/chainercv/transforms/point/flip_point.py b/chainercv/transforms/point/flip_point.py index 104929e5bf..36e279ab7d 100644 --- a/chainercv/transforms/point/flip_point.py +++ b/chainercv/transforms/point/flip_point.py @@ -1,12 +1,11 @@ +import numpy as np + + def flip_point(point, size, y_flip=False, x_flip=False): """Modify points according to image flips. Args: - point (~numpy.ndarray): Points in the image. - The shape of this array is :math:`(P, 2)`. :math:`P` is the number - of points in the image. - The last dimension is composed of :math:`y` and :math:`x` - coordinates of the points. + point (~numpy.ndarray or list of arrays): See the table below. size (tuple): A tuple of length 2. The height and the width of the image, which is associated with the points. y_flip (bool): Modify points according to a vertical flip of @@ -14,15 +13,31 @@ def flip_point(point, size, y_flip=False, x_flip=False): x_flip (bool): Modify keypoipoints according to a horizontal flip of an image. + .. csv-table:: + :header: name, shape, dtype, format + + :obj:`point`, ":math:`[(K, 2)]` or :math:`(R, K, 2)`", \ + :obj:`float32`, ":math:`(y, x)`" + Returns: - ~numpy.ndarray: + ~numpy.ndarray or list of arrays: Points modified according to image flips. """ H, W = size - point = point.copy() - if y_flip: - point[:, 0] = H - point[:, 0] - if x_flip: - point[:, 1] = W - point[:, 1] - return point + if isinstance(point, np.ndarray): + out_point = point.copy() + if y_flip: + out_point[:, :, 0] = H - out_point[:, :, 0] + if x_flip: + out_point[:, :, 1] = W - out_point[:, :, 1] + else: + out_point = [] + for pnt in point: + pnt = pnt.copy() + if y_flip: + pnt[:, 0] = H - pnt[:, 0] + if x_flip: + pnt[:, 1] = W - pnt[:, 1] + out_point.append(pnt) + return out_point diff --git a/chainercv/transforms/point/resize_point.py b/chainercv/transforms/point/resize_point.py index 0991fd4170..061efc0410 100644 --- a/chainercv/transforms/point/resize_point.py +++ b/chainercv/transforms/point/resize_point.py @@ -1,25 +1,38 @@ +import numpy as np + + def resize_point(point, in_size, out_size): """Adapt point coordinates to the rescaled image space. Args: - point (~numpy.ndarray): Points in the image. - The shape of this array is :math:`(P, 2)`. :math:`P` is the number - of points in the image. - The last dimension is composed of :math:`y` and :math:`x` - coordinates of the points. + point (~numpy.ndarray or list of arrays): See the table below. in_size (tuple): A tuple of length 2. The height and the width of the image before resized. out_size (tuple): A tuple of length 2. The height and the width of the image after resized. + .. csv-table:: + :header: name, shape, dtype, format + + :obj:`point`, ":math:`[(K, 2)]` or :math:`(R, K, 2)`", \ + :obj:`float32`, ":math:`(y, x)`" + Returns: - ~numpy.ndarray: + ~numpy.ndarray or list of arrays: Points rescaled according to the given image shapes. """ - point = point.copy() y_scale = float(out_size[0]) / in_size[0] x_scale = float(out_size[1]) / in_size[1] - point[:, 0] = y_scale * point[:, 0] - point[:, 1] = x_scale * point[:, 1] - return point + if isinstance(point, np.ndarray): + out_point = point.copy() + out_point[:, :, 0] = y_scale * point[:, :, 0] + out_point[:, :, 1] = x_scale * point[:, :, 1] + else: + out_point = [] + for pnt in point: + out_pnt = pnt.copy() + out_pnt[:, 0] = y_scale * pnt[:, 0] + out_pnt[:, 1] = x_scale * pnt[:, 1] + out_point.append(out_pnt) + return out_point diff --git a/chainercv/transforms/point/translate_point.py b/chainercv/transforms/point/translate_point.py index bd05f91244..c4a9e911bf 100644 --- a/chainercv/transforms/point/translate_point.py +++ b/chainercv/transforms/point/translate_point.py @@ -1,3 +1,6 @@ +import numpy as np + + def translate_point(point, y_offset=0, x_offset=0): """Translate points. @@ -6,23 +9,32 @@ def translate_point(point, y_offset=0, x_offset=0): to the coordinate :math:`(y, x) = (y_{offset}, x_{offset})`. Args: - point (~numpy.ndarray): Points in the image. - The shape of this array is :math:`(P, 2)`. :math:`P` is the number - of points in the image. - The last dimension is composed of :math:`y` and :math:`x` - coordinates of the points. + point (~numpy.ndarray or list of arrays): See the table below. y_offset (int or float): The offset along y axis. x_offset (int or float): The offset along x axis. + .. csv-table:: + :header: name, shape, dtype, format + + :obj:`point`, ":math:`[(K, 2)]` or :math:`(R, K, 2)`", \ + :obj:`float32`, ":math:`(y, x)`" + Returns: ~numpy.ndarray: Points modified translation of an image. """ - out_point = point.copy() - - out_point[:, 0] += y_offset - out_point[:, 1] += x_offset + if isinstance(point, np.ndarray): + out_point = point.copy() + out_point[:, :, 0] += y_offset + out_point[:, :, 1] += x_offset + else: + out_point = [] + for pnt in point: + out_pnt = pnt.copy() + out_pnt[:, 0] += y_offset + out_pnt[:, 1] += x_offset + out_point.append(out_pnt) return out_point diff --git a/tests/transforms_tests/point_tests/test_flip_point.py b/tests/transforms_tests/point_tests/test_flip_point.py index ac6dc4d690..f02ae8b33d 100644 --- a/tests/transforms_tests/point_tests/test_flip_point.py +++ b/tests/transforms_tests/point_tests/test_flip_point.py @@ -8,19 +8,35 @@ class TestFlipPoint(unittest.TestCase): - def test_flip_point(self): + def test_flip_point_ndarray(self): point = np.random.uniform( - low=0., high=32., size=(12, 2)) + low=0., high=32., size=(3, 12, 2)) out = flip_point(point, size=(34, 32), y_flip=True) point_expected = point.copy() - point_expected[:, 0] = 34 - point[:, 0] + point_expected[:, :, 0] = 34 - point[:, :, 0] np.testing.assert_equal(out, point_expected) out = flip_point(point, size=(34, 32), x_flip=True) point_expected = point.copy() - point_expected[:, 1] = 32 - point[:, 1] + point_expected[:, :, 1] = 32 - point[:, :, 1] np.testing.assert_equal(out, point_expected) + def test_flip_point_list(self): + point = [np.random.uniform( + low=0., high=32., size=(12, 2))] + + out = flip_point(point, size=(34, 32), y_flip=True) + for i, pnt in enumerate(point): + pnt_expected = pnt.copy() + pnt_expected[:, 0] = 34 - pnt[:, 0] + np.testing.assert_equal(out[i], pnt_expected) + + out = flip_point(point, size=(34, 32), x_flip=True) + for i, pnt in enumerate(point): + pnt_expected = pnt.copy() + pnt_expected[:, 1] = 32 - pnt[:, 1] + np.testing.assert_equal(out[i], pnt_expected) + testing.run_module(__name__, __file__) diff --git a/tests/transforms_tests/point_tests/test_resize_point.py b/tests/transforms_tests/point_tests/test_resize_point.py index a3fb7b172b..79ce01daff 100644 --- a/tests/transforms_tests/point_tests/test_resize_point.py +++ b/tests/transforms_tests/point_tests/test_resize_point.py @@ -8,14 +8,24 @@ class TestResizePoint(unittest.TestCase): - def test_resize_point(self): + def test_resize_point_ndarray(self): point = np.random.uniform( - low=0., high=32., size=(12, 2)) + low=0., high=32., size=(3, 12, 2)) out = resize_point(point, in_size=(16, 32), out_size=(8, 64)) - point[:, 0] *= 0.5 - point[:, 1] *= 2 + point[:, :, 0] *= 0.5 + point[:, :, 1] *= 2 np.testing.assert_equal(out, point) + def test_resize_point_list(self): + point = [np.random.uniform( + low=0., high=32., size=(12, 2))] + + out = resize_point(point, in_size=(16, 32), out_size=(8, 64)) + for i, pnt in enumerate(point): + pnt[:, 0] *= 0.5 + pnt[:, 1] *= 2 + np.testing.assert_equal(out[i], pnt) + testing.run_module(__name__, __file__) diff --git a/tests/transforms_tests/point_tests/test_translate_point.py b/tests/transforms_tests/point_tests/test_translate_point.py index 1030bf22cb..8851d13e3d 100644 --- a/tests/transforms_tests/point_tests/test_translate_point.py +++ b/tests/transforms_tests/point_tests/test_translate_point.py @@ -8,15 +8,26 @@ class TestTranslatePoint(unittest.TestCase): - def test_translate_point(self): + def test_translate_point_ndarray(self): point = np.random.uniform( - low=0., high=32., size=(10, 2)) + low=0., high=32., size=(3, 10, 2)) out = translate_point(point, y_offset=3, x_offset=5) expected = np.empty_like(point) - expected[:, 0] = point[:, 0] + 3 - expected[:, 1] = point[:, 1] + 5 + expected[:, :, 0] = point[:, :, 0] + 3 + expected[:, :, 1] = point[:, :, 1] + 5 np.testing.assert_equal(out, expected) + def test_translate_point_list(self): + point = [np.random.uniform( + low=0., high=32., size=(10, 2))] + + out = translate_point(point, y_offset=3, x_offset=5) + for i, pnt in enumerate(point): + expected = np.empty_like(pnt) + expected[:, 0] = pnt[:, 0] + 3 + expected[:, 1] = pnt[:, 1] + 5 + np.testing.assert_equal(out[i], expected) + testing.run_module(__name__, __file__) From 45bd712a3a1ffbb16f8a3f2540539ada582b8ef9 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 21 Feb 2019 16:58:50 +0900 Subject: [PATCH 033/100] fix variable name --- .../datasets/coco/coco_keypoint_dataset.py | 18 +++---- chainercv/visualizations/vis_keypoint_coco.py | 22 ++++----- .../test_vis_keypoint_coco.py | 48 +++++++++---------- 3 files changed, 44 insertions(+), 44 deletions(-) diff --git a/chainercv/datasets/coco/coco_keypoint_dataset.py b/chainercv/datasets/coco/coco_keypoint_dataset.py index f3d8c4434e..886eb2f252 100644 --- a/chainercv/datasets/coco/coco_keypoint_dataset.py +++ b/chainercv/datasets/coco/coco_keypoint_dataset.py @@ -43,7 +43,7 @@ class COCOKeypointDataset(GetterDataset): "RGB, :math:`[0, 255]`" :obj:`point` [#coco_point_1]_, ":math:`(R, K, 2)`", :obj:`float32`, \ ":math:`(y, x)`" - :obj:`valid` [#coco_point_1]_, ":math:`(R, K)`", :obj:`bool`, \ + :obj:`visible` [#coco_point_1]_, ":math:`(R, K)`", :obj:`bool`, \ "true when a keypoint is visible." :obj:`bbox` [#coco_point_1]_, ":math:`(R, 4)`", :obj:`float32`, \ ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`" @@ -54,7 +54,7 @@ class COCOKeypointDataset(GetterDataset): :obj:`crowded` [#coco_point_3]_, ":math:`(R,)`", :obj:`bool`, -- .. [#coco_point_1] If :obj:`use_crowded = True`, :obj:`point`, \ - :obj:`valid`, :obj:`bbox`, \ + :obj:`visible`, :obj:`bbox`, \ :obj:`label` and :obj:`area` contain crowded instances. .. [#coco_point_2] :obj:`area` is available \ if :obj:`return_area = True`. @@ -95,9 +95,9 @@ def __init__(self, data_dir='auto', split='train', year='2017', self.add_getter('img', self._get_image) self.add_getter( - ['point', 'valid', 'bbox', 'label', 'area', 'crowded'], + ['point', 'visible', 'bbox', 'label', 'area', 'crowded'], self._get_annotations) - keys = ('img', 'point', 'valid', 'bbox', 'label') + keys = ('img', 'point', 'visible', 'bbox', 'label') if return_area: keys += ('area',) if return_crowded: @@ -144,13 +144,13 @@ def _get_annotations(self, i): # 0: not labeled; 1: labeled, not inside mask; # 2: labeled and inside mask v = point[:, 2::3] - valid = v > 0 + visible = v > 0 point = np.stack((y, x), axis=2) else: point = np.empty((0, 0, 2), dtype=np.float32) - valid = np.empty((0, 0), dtype=np.bool) + visible = np.empty((0, 0), dtype=np.bool) - # Remove invalid boxes + # Remove invisible boxes bbox_area = np.prod(bbox[:, 2:] - bbox[:, :2], axis=1) keep_mask = np.logical_and(bbox[:, 0] <= bbox[:, 2], bbox[:, 1] <= bbox[:, 3]) @@ -160,9 +160,9 @@ def _get_annotations(self, i): keep_mask = np.logical_and(keep_mask, np.logical_not(crowded)) point = point[keep_mask] - valid = valid[keep_mask] + visible = visible[keep_mask] bbox = bbox[keep_mask] label = label[keep_mask] area = area[keep_mask] crowded = crowded[keep_mask] - return point, valid, bbox, label, area, crowded + return point, visible, bbox, label, area, crowded diff --git a/chainercv/visualizations/vis_keypoint_coco.py b/chainercv/visualizations/vis_keypoint_coco.py index 58624fe35c..61f47e8a27 100644 --- a/chainercv/visualizations/vis_keypoint_coco.py +++ b/chainercv/visualizations/vis_keypoint_coco.py @@ -43,7 +43,7 @@ def vis_keypoint_coco( - img, point, valid=None, + img, point, visible=None, point_score=None, thresh=2, markersize=3, linewidth=1, ax=None): """Visualize keypoints organized as in COCO. @@ -54,15 +54,15 @@ def vis_keypoint_coco( >>> from chainercv.visualizations import vis_keypoint_coco >>> import matplotlib.pyplot as plt >>> data = COCOKeypointDataset(split='val') - >>> img, point, valid = data[10][:3] - >>> vis_keypoint_coco(img, point, valid) + >>> img, point, visible = data[10][:3] + >>> vis_keypoint_coco(img, point, visible) >>> plt.show() Args: img (~numpy.ndarray): See the table below. If this is :obj:`None`, no image is displayed. point (~numpy.ndarray): See the table below. - valid (~numpy.ndarray): See the table below. If this is + visible (~numpy.ndarray): See the table below. If this is :obj:`None`, all points are assumed to be visible. point_score (~numpy.ndarray): See the table below. If this is :obj:`None`, the confidence of all points is infinitely @@ -81,7 +81,7 @@ def vis_keypoint_coco( "RGB, :math:`[0, 255]`" :obj:`point`, ":math:`(R, K, 2)`", :obj:`float32`, \ ":math:`(y, x)`" - :obj:`valid`, ":math:`(R, K)`", :obj:`bool`, \ + :obj:`visible`, ":math:`(R, K)`", :obj:`bool`, \ "true when a keypoint is visible." :obj:`point_score`, ":math:`(R, K)`", :obj:`float32`, -- @@ -103,14 +103,14 @@ def vis_keypoint_coco( if point_score.shape != point.shape[:2]: raise ValueError('Mismatch in the number of instances or joints.') if point.shape[1:] != (len(coco_keypoint_names[human_id]), 2): - raise ValueError('point has invalid shape') + raise ValueError('point has invisible shape') - if valid is not None: - if valid.dtype != np.bool: - raise ValueError('The dtype of `valid` should be np.bool') - if valid.shape != point.shape[:2]: + if visible is not None: + if visible.dtype != np.bool: + raise ValueError('The dtype of `visible` should be np.bool') + if visible.shape != point.shape[:2]: raise ValueError('Mismatch in the number of instances or joints.') - for i, vld in enumerate(valid): + for i, vld in enumerate(visible): point_score[i, np.logical_not(vld)] = -np.inf for pnt, pnt_sc in zip(point, point_score): diff --git a/tests/visualizations_tests/test_vis_keypoint_coco.py b/tests/visualizations_tests/test_vis_keypoint_coco.py index 0a80260ec2..75ad231810 100644 --- a/tests/visualizations_tests/test_vis_keypoint_coco.py +++ b/tests/visualizations_tests/test_vis_keypoint_coco.py @@ -24,17 +24,17 @@ def _generate_point(n_inst, size): xs = np.random.uniform(0, W, size=(n_inst, n_joint)) point = np.stack((ys, xs), axis=2).astype(np.float32) - valid = np.random.randint(0, 2, size=(n_inst, n_joint)).astype(np.bool) + visible = np.random.randint(0, 2, size=(n_inst, n_joint)).astype(np.bool) point_score = np.random.uniform( 0, 6, size=(n_inst, n_joint)).astype(np.float32) - return point, valid, point_score + return point, visible, point_score @testing.parameterize(*testing.product({ 'n_inst': [3, 0], 'use_img': [False, True], - 'use_valid': [False, True], + 'use_visible': [False, True], 'use_point_score': [False, True] })) @unittest.skipUnless(_available, 'matplotlib is not installed') @@ -42,56 +42,56 @@ class TestVisKeypointCOCO(unittest.TestCase): def setUp(self): size = (32, 48) - self.point, valid, point_score = _generate_point(self.n_inst, size) + self.point, visible, point_score = _generate_point(self.n_inst, size) self.img = (np.random.randint( 0, 255, size=(3,) + size).astype(np.float32) if self.use_img else None) - self.valid = valid if self.use_valid else None + self.visible = visible if self.use_visible else None self.point_score = point_score if self.use_point_score else None def test_vis_keypoint_coco(self): ax = vis_keypoint_coco( - self.img, self.point, self.valid, + self.img, self.point, self.visible, self.point_score) self.assertIsInstance(ax, matplotlib.axes.Axes) @unittest.skipUnless(_available, 'matplotlib is not installed') -class TestVisKeypointCOCOInvalidInputs(unittest.TestCase): +class TestVisKeypointCOCOInvisibleInputs(unittest.TestCase): def setUp(self): size = (32, 48) n_inst = 10 - self.point, self.valid, self.point_score = _generate_point( + self.point, self.visible, self.point_score = _generate_point( n_inst, size) self.img = np.random.randint( 0, 255, size=(3,) + size).astype(np.float32) - def _check(self, img, point, valid, point_score): + def _check(self, img, point, visible, point_score): with self.assertRaises(ValueError): - vis_keypoint_coco(img, point, valid, point_score) + vis_keypoint_coco(img, point, visible, point_score) - def test_invalid_n_inst_point(self): - self._check(self.img, self.point[:5], self.valid, self.point_score) + def test_invisible_n_inst_point(self): + self._check(self.img, self.point[:5], self.visible, self.point_score) - def test_invalid_n_inst_valid(self): - self._check(self.img, self.point, self.valid[:5], self.point_score) + def test_invisible_n_inst_visible(self): + self._check(self.img, self.point, self.visible[:5], self.point_score) - def test_invalid_n_inst_point_score(self): - self._check(self.img, self.point, self.valid, self.point_score[:5]) + def test_invisible_n_inst_point_score(self): + self._check(self.img, self.point, self.visible, self.point_score[:5]) - def test_invalid_n_joint_point(self): - self._check(self.img, self.point[:, :15], self.valid, self.point_score) + def test_invisible_n_joint_point(self): + self._check(self.img, self.point[:, :15], self.visible, self.point_score) - def test_invalid_n_joint_valid(self): - self._check(self.img, self.point, self.valid[:, :15], self.point_score) + def test_invisible_n_joint_visible(self): + self._check(self.img, self.point, self.visible[:, :15], self.point_score) - def test_invalid_n_joint_point_score(self): - self._check(self.img, self.point, self.valid, self.point_score[:, :15]) + def test_invisible_n_joint_point_score(self): + self._check(self.img, self.point, self.visible, self.point_score[:, :15]) - def test_invalid_valid_dtype(self): - self._check(self.img, self.point, self.valid.astype(np.int32), + def test_invisible_visible_dtype(self): + self._check(self.img, self.point, self.visible.astype(np.int32), self.point_score) testing.run_module(__name__, __file__) From 26080942e21b3d5b80cf9130e476230d8a64a4bf Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 21 Feb 2019 17:05:50 +0900 Subject: [PATCH 034/100] change variable names --- .../eval_keypoint_detection_coco.py | 26 +++++++++---------- .../test_eval_keypoint_detection_coco.py | 18 ++++++------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/chainercv/evaluations/eval_keypoint_detection_coco.py b/chainercv/evaluations/eval_keypoint_detection_coco.py index 6ff5692bc9..4258552bfd 100644 --- a/chainercv/evaluations/eval_keypoint_detection_coco.py +++ b/chainercv/evaluations/eval_keypoint_detection_coco.py @@ -16,7 +16,7 @@ def eval_keypoint_detection_coco( pred_points, pred_labels, pred_scores, - gt_points, gt_valids, gt_bboxes=None, gt_labels=None, + gt_points, gt_visibles, gt_bboxes=None, gt_labels=None, gt_areas=None, gt_crowdeds=None): """Evaluate keypoint detection based on evaluation code of MS COCO. @@ -31,7 +31,7 @@ def eval_keypoint_detection_coco( This is used to rank instances. Note that this is not the confidene for each keypoint. gt_points (iterable of numpy.ndarray): See the table below. - gt_valids (iterable of numpy.ndarray): See the table below. + gt_visibles (iterable of numpy.ndarray): See the table below. gt_bboxes (iterable of numpy.ndarray): See the table below. This is optional. If this is :obj:`None`, the ground truth bounding boxes are esitmated from the ground truth @@ -52,7 +52,7 @@ def eval_keypoint_detection_coco( -- :obj:`gt_points`, ":math:`[(R, K, 2)]`", :obj:`float32`, \ ":math:`(y, x)`" - :obj:`gt_valids`, ":math:`[(R, K)]`", :obj:`bool`, -- + :obj:`gt_visibles`, ":math:`[(R, K)]`", :obj:`bool`, -- :obj:`gt_bboxes`, ":math:`[(R, 4)]`", :obj:`float32`, \ ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`" :obj:`gt_labels`, ":math:`[(R,)]`", :obj:`int32`, \ @@ -147,7 +147,7 @@ def eval_keypoint_detection_coco( pred_labels = iter(pred_labels) pred_scores = iter(pred_scores) gt_points = iter(gt_points) - gt_valids = iter(gt_valids) + gt_visibles = iter(gt_visibles) gt_bboxes = (iter(gt_bboxes) if gt_bboxes is not None else itertools.repeat(None)) gt_labels = iter(gt_labels) @@ -164,11 +164,11 @@ def eval_keypoint_detection_coco( pred_annos = [] gt_annos = [] existent_labels = {} - for i, (pred_point, pred_label, pred_score, gt_point, gt_is_valid, + for i, (pred_point, pred_label, pred_score, gt_point, gt_visible, gt_bbox, gt_label, gt_area, gt_crowded) in enumerate(six.moves.zip( pred_points, pred_labels, pred_scores, - gt_points, gt_valids, gt_bboxes, gt_labels, + gt_points, gt_visibles, gt_bboxes, gt_labels, gt_areas, gt_crowdeds)): if gt_bbox is None: gt_bbox = itertools.repeat(None) @@ -183,18 +183,18 @@ def eval_keypoint_detection_coco( pred_score): # http://cocodataset.org/#format-results # Visibility flag is currently not used for evaluation - is_v = np.ones(len(pred_pnt)) + v = np.ones(len(pred_pnt)) pred_annos.append( - _create_anno(pred_pnt, is_v, None, + _create_anno(pred_pnt, v, None, pred_lb, pred_sc, img_id=img_id, anno_id=len(pred_annos) + 1, ar=None, crw=0)) existent_labels[pred_lb] = True - for gt_pnt, gt_is_v, gt_bb, gt_lb, gt_ar, gt_crw in zip( - gt_point, gt_is_valid, gt_bbox, gt_label, gt_area, gt_crowded): + for gt_pnt, gt_v, gt_bb, gt_lb, gt_ar, gt_crw in zip( + gt_point, gt_visible, gt_bbox, gt_label, gt_area, gt_crowded): gt_annos.append( - _create_anno(gt_pnt, gt_is_v, gt_bb, gt_lb, None, + _create_anno(gt_pnt, gt_v, gt_bb, gt_lb, None, img_id=img_id, anno_id=len(gt_annos) + 1, ar=gt_ar, crw=gt_crw)) ids.append({'id': img_id}) @@ -276,7 +276,7 @@ def eval_keypoint_detection_coco( return results -def _create_anno(pnt, is_v, bb, lb, sc, img_id, anno_id, ar=None, crw=None): +def _create_anno(pnt, v, bb, lb, sc, img_id, anno_id, ar=None, crw=None): # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L342 y_min = np.min(pnt[:, 0]) x_min = np.min(pnt[:, 1]) @@ -293,7 +293,7 @@ def _create_anno(pnt, is_v, bb, lb, sc, img_id, anno_id, ar=None, crw=None): bb_xywh = [x_min, y_min, x_max - x_min, y_max - y_min] else: bb_xywh = [bb[1], bb[0], bb[3] - bb[1], bb[2] - bb[0]] - pnt = np.concatenate((pnt[:, [1, 0]], is_v[:, None]), axis=1) + pnt = np.concatenate((pnt[:, [1, 0]], v[:, None]), axis=1) anno = { 'image_id': img_id, 'category_id': lb, 'keypoints': pnt.reshape((-1)).tolist(), diff --git a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py index 4e3ece2955..11d636d7d7 100644 --- a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py +++ b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py @@ -39,7 +39,7 @@ def setUp(self): self.pred_labels = [] self.pred_scores = [] self.gt_points = [] - self.gt_valids = [] + self.gt_visibles = [] self.gt_bboxes = [] self.gt_labels = [] for i in range(2): @@ -49,7 +49,7 @@ def setUp(self): self.pred_scores.append(np.random.uniform( 0.5, 1, size=(self.n_inst,)).astype(np.float32)) self.gt_points.append(point) - self.gt_valids.append(valid) + self.gt_visibles.append(valid) bbox = np.zeros((self.n_inst, 4), dtype=np.float32) for i, pnt in enumerate(point): y_min = np.min(pnt[:, 0]) @@ -71,13 +71,13 @@ def _check(self, result): def test_gt_bboxes_not_supplied(self): result = eval_keypoint_detection_coco( self.pred_points, self.pred_labels, self.pred_scores, - self.gt_points, self.gt_valids, None, self.gt_labels) + self.gt_points, self.gt_visibles, None, self.gt_labels) self._check(result) def test_area_not_supplied(self): result = eval_keypoint_detection_coco( self.pred_points, self.pred_labels, self.pred_scores, - self.gt_points, self.gt_valids, self.gt_bboxes, self.gt_labels) + self.gt_points, self.gt_visibles, self.gt_bboxes, self.gt_labels) self._check(result) self.assertFalse( @@ -93,7 +93,7 @@ def test_area_supplied(self): gt_areas = [[100] * self.n_inst for _ in range(2)] result = eval_keypoint_detection_coco( self.pred_points, self.pred_labels, self.pred_scores, - self.gt_points, self.gt_valids, self.gt_bboxes, self.gt_labels, + self.gt_points, self.gt_visibles, self.gt_bboxes, self.gt_labels, gt_areas=gt_areas, ) self._check(result) @@ -110,7 +110,7 @@ def test_crowded_supplied(self): gt_crowdeds = [[True] * self.n_inst for _ in range(2)] result = eval_keypoint_detection_coco( self.pred_points, self.pred_labels, self.pred_scores, - self.gt_points, self.gt_valids, self.gt_bboxes, self.gt_labels, + self.gt_points, self.gt_visibles, self.gt_bboxes, self.gt_labels, gt_crowdeds=gt_crowdeds, ) # When the only ground truth is crowded, nothing is evaluated. @@ -128,7 +128,7 @@ def setUpClass(cls): cls.dataset = np.load(request.urlretrieve(os.path.join( base_url, - 'eval_keypoint_detection_coco_dataset_2019_02_20.npz'))[0]) + 'eval_keypoint_detection_coco_dataset_2019_02_21.npz'))[0]) cls.result = np.load(request.urlretrieve(os.path.join( base_url, 'eval_keypoint_detection_coco_result_2019_02_20.npz'))[0]) @@ -139,7 +139,7 @@ def test_eval_keypoint_detection_coco(self): pred_scores = self.result['scores'] gt_points = self.dataset['points'] - gt_valids = self.dataset['valids'] + gt_visibles = self.dataset['visibles'] gt_bboxes = self.dataset['bboxes'] gt_labels = self.dataset['labels'] gt_areas = self.dataset['areas'] @@ -147,7 +147,7 @@ def test_eval_keypoint_detection_coco(self): result = eval_keypoint_detection_coco( pred_points, pred_labels, pred_scores, - gt_points, gt_valids, gt_bboxes, + gt_points, gt_visibles, gt_bboxes, gt_labels, gt_areas, gt_crowdeds) expected = { From 716043e92056fb35501f354750e4987d574d4768 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 21 Feb 2019 21:30:30 +0900 Subject: [PATCH 035/100] flake8 and test --- .../datasets/coco/coco_keypoint_dataset.py | 2 - .../coco_tests/test_coco_keypoint_dataset.py | 86 +++++++++++++++++++ .../test_vis_keypoint_coco.py | 9 +- 3 files changed, 92 insertions(+), 5 deletions(-) create mode 100644 tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py diff --git a/chainercv/datasets/coco/coco_keypoint_dataset.py b/chainercv/datasets/coco/coco_keypoint_dataset.py index 886eb2f252..2f0dfebf0a 100644 --- a/chainercv/datasets/coco/coco_keypoint_dataset.py +++ b/chainercv/datasets/coco/coco_keypoint_dataset.py @@ -4,8 +4,6 @@ import os from chainercv.chainer_experimental.datasets.sliceable import GetterDataset -from chainercv.datasets.coco.coco_instances_base_dataset import \ - COCOInstancesBaseDataset from chainercv.datasets.coco.coco_utils import get_coco from chainercv import utils diff --git a/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py b/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py new file mode 100644 index 0000000000..191e9c96ee --- /dev/null +++ b/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py @@ -0,0 +1,86 @@ +import unittest + +import numpy as np + +from chainer import testing +from chainer.testing import attr + +from chainercv.datasets import coco_keypoint_names +from chainercv.datasets import COCOKeypointDataset +from chainercv.utils import assert_is_bbox +from chainercv.utils import assert_is_point_dataset + + +def _create_paramters(): + split_years = testing.product({ + 'split': ['train', 'val'], + 'year': ['2014', '2017']}) + split_years += [{'split': 'minival', 'year': '2014'}, + {'split': 'valminusminival', 'year': '2014'}] + use_and_return_args = testing.product({ + 'use_crowded': [False, True], + 'return_crowded': [False, True], + 'return_area': [False, True]}) + params = testing.product_dict( + split_years, + use_and_return_args) + return params + + +@testing.parameterize(*testing.product( + { + 'split': ['train', 'val'], + 'year': ['2014', '2017'], + 'use_crowded': [False, True], + 'return_crowded': [False, True], + 'return_area': [False, True], + } +)) +class TestCOCOKeypointDataset(unittest.TestCase): + + def setUp(self): + self.dataset = COCOKeypointDataset( + split=self.split, year=self.year, + use_crowded=self.use_crowded, return_area=self.return_area, + return_crowded=self.return_crowded) + + @attr.slow + def test_coco_bbox_dataset(self): + human_id = 0 + assert_is_point_dataset( + self.dataset, len(coco_keypoint_names[human_id]), + n_example=30) + + for _ in range(10): + i = np.random.randint(0, len(self.dataset)) + img, point, _, bbox, label = self.dataset[i][:5] + assert_is_bbox(bbox, img.shape[1:]) + self.assertEqual(len(bbox), len(point)) + + self.assertIsInstance(label, np.ndarray) + self.assertEqual(label.dtype, np.int32) + self.assertEqual(label.shape, (point.shape[0],)) + + if self.return_area: + for _ in range(10): + i = np.random.randint(0, len(self.dataset)) + _, point, _, _, _, area = self.dataset[i][:6] + self.assertIsInstance(area, np.ndarray) + self.assertEqual(area.dtype, np.float32) + self.assertEqual(area.shape, (point.shape[0],)) + + if self.return_crowded: + for _ in range(10): + i = np.random.randint(0, len(self.dataset)) + example = self.dataset[i] + crowded = example[-1] + point = example[1] + self.assertIsInstance(crowded, np.ndarray) + self.assertEqual(crowded.dtype, np.bool) + self.assertEqual(crowded.shape, (point.shape[0],)) + + if not self.use_crowded: + np.testing.assert_equal(crowded, 0) + + +testing.run_module(__name__, __file__) diff --git a/tests/visualizations_tests/test_vis_keypoint_coco.py b/tests/visualizations_tests/test_vis_keypoint_coco.py index 75ad231810..0e776ce8b1 100644 --- a/tests/visualizations_tests/test_vis_keypoint_coco.py +++ b/tests/visualizations_tests/test_vis_keypoint_coco.py @@ -82,13 +82,16 @@ def test_invisible_n_inst_point_score(self): self._check(self.img, self.point, self.visible, self.point_score[:5]) def test_invisible_n_joint_point(self): - self._check(self.img, self.point[:, :15], self.visible, self.point_score) + self._check( + self.img, self.point[:, :15], self.visible, self.point_score) def test_invisible_n_joint_visible(self): - self._check(self.img, self.point, self.visible[:, :15], self.point_score) + self._check( + self.img, self.point, self.visible[:, :15], self.point_score) def test_invisible_n_joint_point_score(self): - self._check(self.img, self.point, self.visible, self.point_score[:, :15]) + self._check( + self.img, self.point, self.visible, self.point_score[:, :15]) def test_invisible_visible_dtype(self): self._check(self.img, self.point, self.visible.astype(np.int32), From 3320b399a6a5d93ad95e5c1f62a2d2f62c204307 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 21 Feb 2019 21:33:31 +0900 Subject: [PATCH 036/100] fix class name --- tests/evaluations_tests/test_eval_keypoint_detection_coco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py index 11d636d7d7..aa030cccb4 100644 --- a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py +++ b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py @@ -30,7 +30,7 @@ def _generate_point(n_inst, size): @unittest.skipUnless(_available, 'pycocotools is not installed') -class TestEvalPointCOCOSingleClass(unittest.TestCase): +class TestEvalKeypointCOCOSingleClass(unittest.TestCase): n_inst = 3 From a467173facf3e8da8bbbd8f946a8d2d2995a247d Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 21 Feb 2019 21:33:31 +0900 Subject: [PATCH 037/100] fix class name --- tests/evaluations_tests/test_eval_keypoint_detection_coco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py index 11d636d7d7..2235847640 100644 --- a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py +++ b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py @@ -30,7 +30,7 @@ def _generate_point(n_inst, size): @unittest.skipUnless(_available, 'pycocotools is not installed') -class TestEvalPointCOCOSingleClass(unittest.TestCase): +class TestEvalKeypointDetectionCOCOSimple(unittest.TestCase): n_inst = 3 From 91b2b5225b30113c60fe25a6964353093377fc84 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 21 Feb 2019 21:49:34 +0900 Subject: [PATCH 038/100] fix demo --- examples/mask_rcnn/demo.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py index ef16dbdac0..9483876076 100644 --- a/examples/mask_rcnn/demo.py +++ b/examples/mask_rcnn/demo.py @@ -19,9 +19,8 @@ def main(): choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), default='mask_rcnn_fpn_resnet50' ) - group = parser.add_mutually_exclusive_group() - group.add_argument('--pretrained-model') - group.add_argument('--snapshot') + parser.add_argument('--gpu', type=int, default=-1) + parser.add_argument('--pretrained-model', default='coco') parser.add_argument('image') args = parser.parse_args() From 8dd3706e61e9a37af78387792ef5e42c7df4248a Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 21 Feb 2019 21:57:13 +0900 Subject: [PATCH 039/100] fix demo --- examples/mask_rcnn/demo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py index 9483876076..d95eacc567 100644 --- a/examples/mask_rcnn/demo.py +++ b/examples/mask_rcnn/demo.py @@ -13,7 +13,6 @@ def main(): parser = argparse.ArgumentParser() - parser.add_argument('--gpu', type=int, default=-1) parser.add_argument( '--model', choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), From 7191dc036c0e53b9dda85c63cc258a99ccd79d72 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 21 Feb 2019 21:56:53 +0900 Subject: [PATCH 040/100] demo works --- chainercv/links/model/mask_rcnn/__init__.py | 1 + .../links/model/mask_rcnn/keypoint_head.py | 124 ++++++++++++++++++ chainercv/links/model/mask_rcnn/mask_rcnn.py | 93 +++++++++---- .../model/mask_rcnn/mask_rcnn_fpn_resnet.py | 8 +- examples/mask_rcnn/demo.py | 49 +++++-- 5 files changed, 232 insertions(+), 43 deletions(-) create mode 100644 chainercv/links/model/mask_rcnn/keypoint_head.py diff --git a/chainercv/links/model/mask_rcnn/__init__.py b/chainercv/links/model/mask_rcnn/__init__.py index c9e910a524..a8ceb6978a 100644 --- a/chainercv/links/model/mask_rcnn/__init__.py +++ b/chainercv/links/model/mask_rcnn/__init__.py @@ -1,3 +1,4 @@ +from chainercv.links.model.mask_rcnn.keypoint_head import KeypointHead # NOQA from chainercv.links.model.mask_rcnn.mask_head import mask_loss_post # NOQA from chainercv.links.model.mask_rcnn.mask_head import mask_loss_pre # NOQA from chainercv.links.model.mask_rcnn.mask_head import MaskHead # NOQA diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py new file mode 100644 index 0000000000..9994c19554 --- /dev/null +++ b/chainercv/links/model/mask_rcnn/keypoint_head.py @@ -0,0 +1,124 @@ +from __future__ import division + +import numpy as np +import PIL + +import cv2 + +import chainer +import chainer.links as L +import chainer.functions as F +from chainer.backends import cuda +from chainer.initializers import HeNormal +from chainer.initializers import Normal + +from chainercv.links import Conv2DActiv +from chainercv.transforms.image.resize import resize +from chainercv.utils.bbox.bbox_iou import bbox_iou +from chainercv.utils.mask.mask_to_bbox import mask_to_bbox + + +class KeypointHead(chainer.Chain): + + _canonical_scale = 224 + _roi_size = 14 + _roi_sample_ratio = 2 + map_size = 56 + + def __init__(self, n_point, scales): + super(KeypointHead, self).__init__() + + initialW = HeNormal(1, fan_option='fan_out') + with self.init_scope(): + self.conv1 = Conv2DActiv(512, 3, pad=1, initialW=initialW) + self.conv2 = Conv2DActiv(512, 3, pad=1, initialW=initialW) + self.conv3 = Conv2DActiv(512, 3, pad=1, initialW=initialW) + self.conv4 = Conv2DActiv(512, 3, pad=1, initialW=initialW) + self.conv5 = Conv2DActiv(512, 3, pad=1, initialW=initialW) + self.conv6 = Conv2DActiv(512, 3, pad=1, initialW=initialW) + self.conv7 = Conv2DActiv(512, 3, pad=1, initialW=initialW) + self.conv8 = Conv2DActiv(512, 3, pad=1, initialW=initialW) + self.point = L.Deconvolution2D( + n_point, 4, pad=1, stride=2, initialW=initialW) + + self._scales = scales + self._n_point = n_point + + def __call__(self, hs, rois, roi_indices): + pooled_hs = [] + for l, h in enumerate(hs): + if len(rois[l]) == 0: + continue + + pooled_hs.append(F.roi_average_align_2d( + h, rois[l], roi_indices[l], + self._roi_size, + self._scales[l], self._roi_sample_ratio)) + + if len(pooled_hs) == 0: + out_size = self.map_size + point = chainer.Variable( + self.xp.empty((0, self._n_class, out_size, out_size), dtype=np.float32)) + return segs + + h = F.concat(pooled_hs, axis=0) + h = self.conv1(h) + h = self.conv2(h) + h = self.conv3(h) + h = self.conv4(h) + h = self.conv5(h) + h = self.conv6(h) + h = self.conv7(h) + h = self.conv8(h) + h = self.point(h) + return F.resize_images(h, (self.map_size, self.map_size)) + + def distribute(self, rois, roi_indices): + # Compleetely same as MaskHead.distribute + size = self.xp.sqrt(self.xp.prod(rois[:, 2:] + 1 - rois[:, :2], axis=1)) + level = self.xp.floor(self.xp.log2( + size / self._canonical_scale + 1e-6)).astype(np.int32) + # skip last level + level = self.xp.clip( + level + len(self._scales) // 2, 0, len(self._scales) - 2) + + masks = [level == l for l in range(len(self._scales))] + rois = [rois[mask] for mask in masks] + roi_indices = [roi_indices[mask] for mask in masks] + order = self.xp.argsort( + self.xp.concatenate([self.xp.where(mask)[0] for mask in masks])) + return rois, roi_indices, order + + def decode(self, point_maps, bboxes): + points = [] + point_scores = [] + for bbox, point_map in zip(bboxes, point_maps): + point = np.zeros((len(bbox), self._n_point, 2), dtype=np.float32) + point_score = np.zeros((len(bbox), self._n_point), dtype=np.float32) + + hs = bbox[:, 2] - bbox[:, 0] + ws = bbox[:, 3] - bbox[:, 1] + h_ceils = np.ceil(np.maximum(hs, 1)) + w_ceils = np.ceil(np.maximum(ws, 1)) + h_corrections = hs / h_ceils + w_corrections = ws / w_ceils + for i, (bb, point_m) in enumerate(zip(bbox, point_map)): + point_m = cv2.resize( + point_m.transpose((1, 2, 0)), + (w_ceils[i], h_ceils[i]), + interpolation=cv2.INTER_CUBIC).transpose( + (2, 0, 1)) + _, H, W = point_m.shape + for k in range(self._n_point): + pos = point_m[k].argmax() + x_int = pos % W + y_int = (pos - x_int) // W + + y = (y_int + 0.5) * h_corrections[i] + x = (x_int + 0.5) * w_corrections[i] + point[i, k, 0] = y + bb[0] + point[i, k, 1] = x + bb[1] + point_score[i, k] = point_m[k, y_int, x_int] + points.append(point) + point_scores.append(point_score) + return points, point_scores diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py index 9f59f49d92..81e282219d 100644 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py @@ -48,13 +48,18 @@ class MaskRCNN(chainer.Chain): max_size = 1333 stride = 32 - def __init__(self, extractor, rpn, head, mask_head): + def __init__(self, extractor, rpn, head, mask_head, + keypoint_head, mode='mask'): super(MaskRCNN, self).__init__() with self.init_scope(): self.extractor = extractor self.rpn = rpn self.head = head - self.mask_head = mask_head + if mode == 'mask': + self.mask_head = mask_head + elif mode =='keypoint': + self.keypoint_head = keypoint_head + self.mode = mode self.use_preset('visualize') @@ -133,32 +138,64 @@ def predict(self, imgs): scales, sizes, self.nms_thresh, self.score_thresh) rescaled_bboxes = [bbox * scale for scale, bbox in zip(scales, bboxes)] - # Change bboxes to RoI and RoI indices format - mask_rois_before_reordering, mask_roi_indices_before_reordering =\ - _list_to_flat(rescaled_bboxes) - mask_rois, mask_roi_indices, order = self.mask_head.distribute( - mask_rois_before_reordering, mask_roi_indices_before_reordering) - with chainer.using_config('train', False), chainer.no_backprop_mode(): - segms = F.sigmoid( - self.mask_head(hs, mask_rois, mask_roi_indices)).data - # Put the order of proposals back to the one used by bbox head. - segms = segms[order] - segms = _flat_to_list( - segms, mask_roi_indices_before_reordering, len(imgs)) - segms = [segm if segm is not None else - self.xp.zeros( - (0, self.mask_head.mask_size, self.mask_head.mask_size), - dtype=np.float32) - for segm in segms] - - segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms] - bboxes = [chainer.backends.cuda.to_cpu(bbox / scale) - for bbox, scale in zip(rescaled_bboxes, scales)] - labels = [chainer.backends.cuda.to_cpu(label) for label in labels] - # Currently MaskHead only supports numpy inputs - masks = self.mask_head.decode(segms, bboxes, labels, sizes) - scores = [cuda.to_cpu(score) for score in scores] - return masks, labels, scores + if self.mode == 'mask': + # Change bboxes to RoI and RoI indices format + mask_rois_before_reordering, mask_roi_indices_before_reordering =\ + _list_to_flat(rescaled_bboxes) + mask_rois, mask_roi_indices, order = self.mask_head.distribute( + mask_rois_before_reordering, mask_roi_indices_before_reordering) + with chainer.using_config('train', False), chainer.no_backprop_mode(): + segms = F.sigmoid( + self.mask_head(hs, mask_rois, mask_roi_indices)).data + # Put the order of proposals back to the one used by bbox head. + segms = segms[order] + segms = _flat_to_list( + segms, mask_roi_indices_before_reordering, len(imgs)) + segms = [segm if segm is not None else + self.xp.zeros( + (0, self.mask_head.mask_size, self.mask_head.mask_size), + dtype=np.float32) + for segm in segms] + + segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms] + bboxes = [chainer.backends.cuda.to_cpu(bbox / scale) + for bbox, scale in zip(rescaled_bboxes, scales)] + labels = [chainer.backends.cuda.to_cpu(label) for label in labels] + # Currently MaskHead only supports numpy inputs + masks = self.mask_head.decode(segms, bboxes, labels, sizes) + scores = [cuda.to_cpu(score) for score in scores] + return masks, labels, scores + elif self.mode == 'keypoint': + (point_rois_before_reordering, + point_roi_indices_before_reordering) = _list_to_flat( + rescaled_bboxes) + point_rois, point_roi_indices, order =\ + self.keypoint_head.distribute( + point_rois_before_reordering, + point_roi_indices_before_reordering) + with chainer.using_config('train', False), chainer.no_backprop_mode(): + point_maps = self.keypoint_head( + hs, point_rois, point_roi_indices).data + point_maps = point_maps[order] + point_maps = _flat_to_list( + point_maps, point_roi_indices_before_reordering, len(imgs)) + point_maps = [point_map if point_map is not None else + self.xp.zeros( + (0, self.keypoint_head.n_point, + self.keypoint_head.map_size, + self.keypoint_head.map_size), + dtype=np.float32) + for point_map in point_maps] + point_maps = [ + chainer.backends.cuda.to_cpu(point_map) + for point_map in point_maps] + bboxes = [chainer.cuda.to_cpu(bbox / scale) + for bbox, scale in zip(rescaled_bboxes, scales)] + points, point_scores = self.keypoint_head.decode( + point_maps, bboxes) + labels = [cuda.to_cpu(label) for label in labels] + scores = [cuda.to_cpu(score) for score in scores] + return points, point_scores, bboxes, labels, scores def prepare(self, imgs): """Preprocess images. diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py index d18f92f628..3048ce80cf 100644 --- a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py +++ b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py @@ -6,6 +6,7 @@ from chainercv.links.model.fpn import FPN from chainercv.links.model.fpn import Head from chainercv.links.model.fpn import RPN +from chainercv.links.model.mask_rcnn.keypoint_head import KeypointHead from chainercv.links.model.mask_rcnn.mask_head import MaskHead from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN from chainercv.links.model.resnet import ResNet101 @@ -22,7 +23,8 @@ class MaskRCNNFPNResNet(MaskRCNN): A subclass of this class should have :obj:`_base` and :obj:`_models`. """ - def __init__(self, n_fg_class=None, pretrained_model=None): + def __init__(self, n_fg_class=None, pretrained_model=None, + n_point=17, mode='mask'): param, path = utils.prepare_pretrained_model( {'n_fg_class': n_fg_class}, pretrained_model, self._models) @@ -39,7 +41,9 @@ def __init__(self, n_fg_class=None, pretrained_model=None): extractor=extractor, rpn=RPN(extractor.scales), head=Head(n_class, extractor.scales), - mask_head=MaskHead(n_class, extractor.scales) + mask_head=MaskHead(n_class, extractor.scales), + keypoint_head=KeypointHead(n_point, extractor.scales), + mode=mode, ) if path == 'imagenet': _copyparams( diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py index 9483876076..682b742fa2 100644 --- a/examples/mask_rcnn/demo.py +++ b/examples/mask_rcnn/demo.py @@ -13,7 +13,6 @@ def main(): parser = argparse.ArgumentParser() - parser.add_argument('--gpu', type=int, default=-1) parser.add_argument( '--model', choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), @@ -21,31 +20,55 @@ def main(): ) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--pretrained-model', default='coco') + parser.add_argument( + '--mode', + choices=('mask', 'keypoint'), + default='mask') parser.add_argument('image') args = parser.parse_args() + if args.mode == 'mask': + n_fg_class = len(coco_instance_segmentation_label_names) + elif args.mode == 'keypoint': + n_fg_class = 1 if args.model == 'mask_rcnn_fpn_resnet50': model = MaskRCNNFPNResNet50( - n_fg_class=len(coco_instance_segmentation_label_names), - pretrained_model=args.pretrained_model) + n_fg_class=n_fg_class, + pretrained_model=args.pretrained_model, + mode=args.mode + ) elif args.model == 'mask_rcnn_fpn_resnet101': model = MaskRCNNFPNResNet101( - n_fg_class=len(coco_instance_segmentation_label_names), - pretrained_model=args.pretrained_model) + n_fg_class=n_fg_class, + pretrained_model=args.pretrained_model, + mode=args.mode + ) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() img = utils.read_image(args.image) - masks, labels, scores = model.predict([img]) - mask = masks[0] - label = labels[0] - score = scores[0] - chainercv.visualizations.vis_instance_segmentation( - img, mask, label, score, - label_names=coco_instance_segmentation_label_names) - plt.show() + if args.mode == 'mask': + masks, labels, scores = model.predict([img]) + mask = masks[0] + label = labels[0] + score = scores[0] + chainercv.visualizations.vis_instance_segmentation( + img, mask, label, score, + label_names=coco_instance_segmentation_label_names) + plt.show() + elif args.mode == 'keypoint': + points, point_scores, bboxes, labels, scores = model.predict([img]) + point = points[0] + point_score = point_scores[0] + bbox = bboxes[0] + label = labels[0] + score = scores[0] + ax = chainercv.visualizations.vis_keypoint_coco( + img, point, None, point_score) + chainercv.visualizations.vis_bbox(None, bbox, score=score, ax=ax) + plt.show() if __name__ == '__main__': From 62cb5e825287d71a26429540e677fc38266bc607 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 21 Feb 2019 22:43:16 +0900 Subject: [PATCH 041/100] eval --- .../links/model/mask_rcnn/keypoint_head.py | 16 ++-- .../eval_keypoint_detection.py | 93 +++++++++++++++++++ 2 files changed, 101 insertions(+), 8 deletions(-) create mode 100644 examples/keypoint_detection/eval_keypoint_detection.py diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py index 9994c19554..7f4d967803 100644 --- a/chainercv/links/model/mask_rcnn/keypoint_head.py +++ b/chainercv/links/model/mask_rcnn/keypoint_head.py @@ -42,7 +42,7 @@ def __init__(self, n_point, scales): n_point, 4, pad=1, stride=2, initialW=initialW) self._scales = scales - self._n_point = n_point + self.n_point = n_point def __call__(self, hs, rois, roi_indices): pooled_hs = [] @@ -56,10 +56,10 @@ def __call__(self, hs, rois, roi_indices): self._scales[l], self._roi_sample_ratio)) if len(pooled_hs) == 0: - out_size = self.map_size - point = chainer.Variable( - self.xp.empty((0, self._n_class, out_size, out_size), dtype=np.float32)) - return segs + return chainer.Variable( + self.xp.empty( + (0, self.n_point, self.map_size, self.map_size), + dtype=np.float32)) h = F.concat(pooled_hs, axis=0) h = self.conv1(h) @@ -93,8 +93,8 @@ def decode(self, point_maps, bboxes): points = [] point_scores = [] for bbox, point_map in zip(bboxes, point_maps): - point = np.zeros((len(bbox), self._n_point, 2), dtype=np.float32) - point_score = np.zeros((len(bbox), self._n_point), dtype=np.float32) + point = np.zeros((len(bbox), self.n_point, 2), dtype=np.float32) + point_score = np.zeros((len(bbox), self.n_point), dtype=np.float32) hs = bbox[:, 2] - bbox[:, 0] ws = bbox[:, 3] - bbox[:, 1] @@ -109,7 +109,7 @@ def decode(self, point_maps, bboxes): interpolation=cv2.INTER_CUBIC).transpose( (2, 0, 1)) _, H, W = point_m.shape - for k in range(self._n_point): + for k in range(self.n_point): pos = point_m[k].argmax() x_int = pos % W y_int = (pos - x_int) // W diff --git a/examples/keypoint_detection/eval_keypoint_detection.py b/examples/keypoint_detection/eval_keypoint_detection.py new file mode 100644 index 0000000000..14da196e8c --- /dev/null +++ b/examples/keypoint_detection/eval_keypoint_detection.py @@ -0,0 +1,93 @@ +import argparse + +import chainer +from chainer import iterators + +from chainercv.datasets import COCOKeypointDataset +from chainercv.evaluations import eval_keypoint_detection_coco +from chainercv.links import MaskRCNNFPNResNet101 +from chainercv.links import MaskRCNNFPNResNet50 +from chainercv.utils import apply_to_iterator +from chainercv.utils import ProgressHook + +models = { + # model: (class, dataset -> pretrained_model, default batchsize) + 'mask_rcnn_fpn_resnet50': (MaskRCNNFPNResNet50, + {}, 1), + 'mask_rcnn_fpn_resnet101': (MaskRCNNFPNResNet101, + {}, 1), +} + + +def setup(dataset, model_name, pretrained_model, batchsize): + cls, pretrained_models, default_batchsize = models[model_name] + dataset_name = dataset + if pretrained_model is None: + pretrained_model = pretrained_models.get(dataset_name, dataset_name) + if batchsize is None: + batchsize = default_batchsize + + if dataset_name == 'coco': + dataset = COCOKeypointDataset( + split='val', + use_crowded=True, return_crowded=True, + return_area=True) + n_fg_class = 1 + n_point = 17 + model = cls( + n_fg_class=n_fg_class, + pretrained_model=pretrained_model, + n_point=n_point, + mode='keypoint' + ) + model.use_preset('evaluate') + + def eval_(out_values, rest_values): + (pred_points, pred_point_scores, pred_bboxes, pred_labels, + pred_scores) = out_values + (gt_points, gt_visibles, gt_bboxes, gt_labels, + gt_areas, gt_crowdeds) = rest_values + + result = eval_keypoint_detection_coco( + pred_points, pred_labels, pred_scores, + gt_points, gt_visibles, gt_bboxes, gt_labels, + gt_areas, gt_crowdeds) + + print() + for area in ('all', 'large', 'medium'): + print('mmAP ({}):'.format(area), + result['map/iou=0.50:0.95/area={}/max_dets=20'.format( + area)]) + + return dataset, eval_, model, batchsize + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--dataset', choices=('coco',), default='coco') + parser.add_argument('--model', choices=sorted(models.keys())) + parser.add_argument('--pretrained-model') + parser.add_argument('--batchsize', type=int) + parser.add_argument('--gpu', type=int, default=-1) + args = parser.parse_args() + + dataset, eval_, model, batchsize = setup( + args.dataset, args.model, args.pretrained_model, args.batchsize) + + if args.gpu >= 0: + chainer.cuda.get_device_from_id(args.gpu).use() + model.to_gpu() + + iterator = iterators.MultithreadIterator( + dataset, batchsize, repeat=False, shuffle=False) + + in_values, out_values, rest_values = apply_to_iterator( + model.predict, iterator, hook=ProgressHook(len(dataset))) + # delete unused iterators explicitly + del in_values + + eval_(out_values, rest_values) + + +if __name__ == '__main__': + main() From 18af5fb58356af351761d12d598d4ef302491fde Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 22 Feb 2019 08:24:16 +0900 Subject: [PATCH 042/100] flake8 --- chainercv/datasets/__init__.py | 2 +- tests/visualizations_tests/test_vis_keypoint_coco.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/chainercv/datasets/__init__.py b/chainercv/datasets/__init__.py index fcb3a6c772..bb6ed650dc 100644 --- a/chainercv/datasets/__init__.py +++ b/chainercv/datasets/__init__.py @@ -19,8 +19,8 @@ from chainercv.datasets.coco.coco_utils import coco_keypoint_names # NOQA from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_colors # NOQA from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_names # NOQA -from chainercv.datasets.cub.cub_label_dataset import CUBLabelDataset # NOQA from chainercv.datasets.cub.cub_keypoint_dataset import CUBKeypointDataset # NOQA +from chainercv.datasets.cub.cub_label_dataset import CUBLabelDataset # NOQA from chainercv.datasets.cub.cub_utils import cub_label_names # NOQA from chainercv.datasets.directory_parsing_label_dataset import directory_parsing_label_names # NOQA from chainercv.datasets.directory_parsing_label_dataset import DirectoryParsingLabelDataset # NOQA diff --git a/tests/visualizations_tests/test_vis_keypoint_coco.py b/tests/visualizations_tests/test_vis_keypoint_coco.py index 0e776ce8b1..97c2f09a9b 100644 --- a/tests/visualizations_tests/test_vis_keypoint_coco.py +++ b/tests/visualizations_tests/test_vis_keypoint_coco.py @@ -97,4 +97,5 @@ def test_invisible_visible_dtype(self): self._check(self.img, self.point, self.visible.astype(np.int32), self.point_score) + testing.run_module(__name__, __file__) From 9f4c9274392932e02f3f5cca75fa80d42cb1b60c Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 22 Feb 2019 08:26:07 +0900 Subject: [PATCH 043/100] delete zerograd --- examples/mask_rcnn/train_multi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index 44f1e23249..5061e68dce 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -112,7 +112,6 @@ def __call__(self, imgs, masks, labels, bboxes): # ChainerMN hangs when a subset of nodes has a different # computational graph from the rest. loss = chainer.Variable(self.xp.array(0, dtype=np.float32)) - self.zerograds() return loss From 7e610d48ae564427105da31621c0c56f6f312972 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 25 Feb 2019 16:08:17 +0900 Subject: [PATCH 044/100] complete graph when n_roi == 0 --- examples/mask_rcnn/train_multi.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index 5061e68dce..d0a4255af4 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -4,6 +4,7 @@ import PIL import chainer +import chainer.functions as F import chainer.links as L from chainer.optimizer_hooks import WeightDecay from chainer import serializers @@ -96,22 +97,28 @@ def __call__(self, imgs, masks, labels, bboxes): rois, roi_indices, masks, bboxes, head_gt_labels, self.model.mask_head.mask_size) n_roi = sum([len(roi) for roi in mask_rois]) + if n_roi == 0: + H, W = sizes[0] + mask_rois = [np.array([[ + H // 4, + W // 4, + 3 * H // 4, + 3 * W // 4]], dtype=np.float32)] + mask_roi_indices = [np.array([0], dtype=np.int32)] + segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) if n_roi > 0: - segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) mask_loss = mask_loss_post( segms, mask_roi_indices, gt_segms, gt_mask_labels, B) - loss = (rpn_loc_loss + rpn_conf_loss + - head_loc_loss + head_conf_loss + mask_loss) - chainer.reporter.report({ - 'loss': loss, - 'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss, - 'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss, - 'loss/mask': mask_loss}, - self) else: - # ChainerMN hangs when a subset of nodes has a different - # computational graph from the rest. - loss = chainer.Variable(self.xp.array(0, dtype=np.float32)) + mask_loss = 0 * F.sum(segms) + loss = (rpn_loc_loss + rpn_conf_loss + + head_loc_loss + head_conf_loss + mask_loss) + chainer.reporter.report({ + 'loss': loss, + 'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss, + 'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss, + 'loss/mask': mask_loss}, + self) return loss From b5cb93a5b572d72aabea0f67d4be4312416a5196 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 25 Feb 2019 16:08:23 +0900 Subject: [PATCH 045/100] flake8 --- examples/mask_rcnn/train_multi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index d0a4255af4..217b31dc86 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -48,7 +48,8 @@ def __call__(self, imgs, masks, labels, bboxes): pad_size = np.array( [im.shape[1:] for im in imgs]).max(axis=0) pad_size = ( - np.ceil(pad_size / self.model.stride) * self.model.stride).astype(int) + np.ceil( + pad_size / self.model.stride) * self.model.stride).astype(int) x = np.zeros( (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32) for i, img in enumerate(imgs): @@ -131,7 +132,6 @@ def __init__(self, min_size, max_size, mean): def __call__(self, in_data): img, mask, label, bbox = in_data - original = mask.shape # Flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) From 7e707d8ab247a03de433135c59ef2bf3fd9de35b Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 25 Feb 2019 16:29:18 +0900 Subject: [PATCH 046/100] fix --- examples/mask_rcnn/train_multi.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index 217b31dc86..44273026f7 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -98,19 +98,15 @@ def __call__(self, imgs, masks, labels, bboxes): rois, roi_indices, masks, bboxes, head_gt_labels, self.model.mask_head.mask_size) n_roi = sum([len(roi) for roi in mask_rois]) - if n_roi == 0: - H, W = sizes[0] - mask_rois = [np.array([[ - H // 4, - W // 4, - 3 * H // 4, - 3 * W // 4]], dtype=np.float32)] - mask_roi_indices = [np.array([0], dtype=np.int32)] - segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) if n_roi > 0: + segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) mask_loss = mask_loss_post( segms, mask_roi_indices, gt_segms, gt_mask_labels, B) else: + # Compute dummy variables to complete the computational graph + mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32) + mask_roi_indices[0] = self.xp.array([0], dtype=np.int32) + segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) mask_loss = 0 * F.sum(segms) loss = (rpn_loc_loss + rpn_conf_loss + head_loc_loss + head_conf_loss + mask_loss) From 9a2606566e3f23bef0375bef2119d3b7a090279b Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 4 Mar 2019 18:29:22 +0900 Subject: [PATCH 047/100] use bilinear interpolation with kernel size 4 --- .../links/model/mask_rcnn/keypoint_head.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py index 7f4d967803..0d67ab2c21 100644 --- a/chainercv/links/model/mask_rcnn/keypoint_head.py +++ b/chainercv/links/model/mask_rcnn/keypoint_head.py @@ -18,6 +18,19 @@ from chainercv.utils.mask.mask_to_bbox import mask_to_bbox +# make a bilinear interpolation kernel +# credit @longjon +def _upsample_filt(size): + factor = (size + 1) // 2 + if size % 2 == 1: + center = factor - 1 + else: + center = factor - 0.5 + og = np.ogrid[:size, :size] + return (1 - abs(og[0] - center) / factor) * \ + (1 - abs(og[1] - center) / factor) + + class KeypointHead(chainer.Chain): _canonical_scale = 224 @@ -40,6 +53,11 @@ def __init__(self, n_point, scales): self.conv8 = Conv2DActiv(512, 3, pad=1, initialW=initialW) self.point = L.Deconvolution2D( n_point, 4, pad=1, stride=2, initialW=initialW) + # Do not update the weight of this link + self.upsample = L.Deconvolution2D( + n_point, n_point, 4, pad=1, stride=2, nobias=True) + self.upsample.W.data[:] = 0 + self.upsample.W.data[np.arange(n_point), np.arange(n_point)] = _upsample_filt(4) self._scales = scales self.n_point = n_point @@ -71,7 +89,7 @@ def __call__(self, hs, rois, roi_indices): h = self.conv7(h) h = self.conv8(h) h = self.point(h) - return F.resize_images(h, (self.map_size, self.map_size)) + return self.upsample(h) def distribute(self, rois, roi_indices): # Compleetely same as MaskHead.distribute From 2d44d66f74c0be1f2952af80cc11a9076fd71e3c Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 5 Mar 2019 15:28:26 +0900 Subject: [PATCH 048/100] change mask_to_segm and divide mask loss by #RoI --- chainercv/links/model/mask_rcnn/mask_head.py | 75 ++------------ chainercv/links/model/mask_rcnn/misc.py | 100 +++++++++++++++++++ 2 files changed, 111 insertions(+), 64 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py index d1436785c1..44fd6df6df 100644 --- a/chainercv/links/model/mask_rcnn/mask_head.py +++ b/chainercv/links/model/mask_rcnn/mask_head.py @@ -15,6 +15,9 @@ from chainercv.transforms.image.resize import resize from chainercv.utils.bbox.bbox_iou import bbox_iou +from chainercv.links.model.mask_rcnn.misc import segm_to_mask +from chainercv.links.model.mask_rcnn.misc import mask_to_segm + class MaskHead(chainer.Chain): @@ -141,63 +144,14 @@ def decode(self, segms, bboxes, labels, sizes): raise ValueError( 'MaskHead.decode only supports numpy inputs for now.') masks = [] - # To work around an issue with cv2.resize (it seems to automatically - # pad with repeated border values), we manually zero-pad the masks by 1 - # pixel prior to resizing back to the original image resolution. - # This prevents "top hat" artifacts. We therefore need to expand - # the reference boxes by an appropriate factor. - cv2_expand_scale = (self.mask_size + 2) / self.mask_size - padded_mask = np.zeros((self.mask_size + 2, self.mask_size + 2), - dtype=np.float32) for bbox, segm, label, size in zip( bboxes, segms, labels, sizes): - img_H, img_W = size - mask = np.zeros((len(bbox), img_H, img_W), dtype=np.bool) - - bbox = _expand_boxes(bbox, cv2_expand_scale) - for i, (bb, sgm, lbl) in enumerate(zip(bbox, segm, label)): - bb = bb.astype(np.int32) - padded_mask[1:-1, 1:-1] = sgm[lbl + 1] - - # TODO(yuyu2172): Ignore +1 later - bb_height = np.maximum(bb[2] - bb[0] + 1, 1) - bb_width = np.maximum(bb[3] - bb[1] + 1, 1) - - crop_mask = cv2.resize(padded_mask, (bb_width, bb_height)) - crop_mask = crop_mask > 0.5 - - y_min = max(bb[0], 0) - x_min = max(bb[1], 0) - y_max = min(bb[2] + 1, img_H) - x_max = min(bb[3] + 1, img_W) - mask[i, y_min:y_max, x_min:x_max] = crop_mask[ - (y_min - bb[0]):(y_max - bb[0]), - (x_min - bb[1]):(x_max - bb[1])] - masks.append(mask) + masks.append( + segm_to_mask(segm[np.arange(len(label)), label + 1], + bbox, size)) return masks -def _expand_boxes(bbox, scale): - """Expand an array of boxes by a given scale.""" - xp = chainer.backends.cuda.get_array_module(bbox) - - h_half = (bbox[:, 2] - bbox[:, 0]) * .5 - w_half = (bbox[:, 3] - bbox[:, 1]) * .5 - y_c = (bbox[:, 2] + bbox[:, 0]) * .5 - x_c = (bbox[:, 3] + bbox[:, 1]) * .5 - - h_half *= scale - w_half *= scale - - expanded_bbox = xp.zeros(bbox.shape) - expanded_bbox[:, 0] = y_c - h_half - expanded_bbox[:, 1] = x_c - w_half - expanded_bbox[:, 2] = y_c + h_half - expanded_bbox[:, 3] = x_c + w_half - - return expanded_bbox - - def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes, gt_head_labels, mask_size): """Loss function for Mask Head (pre). @@ -261,8 +215,8 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes, mask_roi = mask_rois[index] iou = bbox_iou(mask_roi, gt_bbox) gt_index = iou.argmax(axis=1) - gt_segms[index] = _segm_wrt_bbox( - gt_mask, gt_index, mask_roi, (mask_size, mask_size), xp) + gt_segms[index] = xp.array( + mask_to_segm(gt_mask, mask_roi, mask_size, gt_index)) flag_masks = [mask_roi_levels == l for l in range(n_level)] mask_rois = [mask_rois[m] for m in flag_masks] @@ -297,16 +251,9 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels, gt_segms = xp.vstack(gt_segms).astype(np.float32, copy=False) gt_mask_labels = xp.hstack(gt_mask_labels).astype(np.int32) - mask_loss = 0 - for i in np.unique(cuda.to_cpu(mask_roi_indices)): - index = (mask_roi_indices == i).nonzero()[0] - gt_segm = gt_segms[index] - gt_mask_label = gt_mask_labels[index] - - mask_loss += F.sigmoid_cross_entropy( - segms[index, gt_mask_label], gt_segm.astype(np.int32)) - - mask_loss /= batchsize + mask_loss = F.sigmoid_cross_entropy( + segms[np.arange(len(gt_mask_labels)), gt_mask_labels], + gt_segms.astype(np.int32)) return mask_loss diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py index abb233443b..8d0fca37f4 100644 --- a/chainercv/links/model/mask_rcnn/misc.py +++ b/chainercv/links/model/mask_rcnn/misc.py @@ -1,3 +1,10 @@ +from __future__ import division + +import cv2 +import numpy as np + +import chainer + from chainercv import transforms @@ -10,3 +17,96 @@ def scale_img(img, min_size, max_size): H, W = int(H * scale), int(W * scale) img = transforms.resize(img, (H, W)) return img, scale + + +def mask_to_segm(mask, bbox, segm_size, index=None, pad=1): + _, H, W = mask.shape + bbox = chainer.backends.cuda.to_cpu(bbox) + padded_segm_size = segm_size + pad * 2 + cv2_expand_scale = padded_segm_size / segm_size + bbox = _expand_boxes(bbox, cv2_expand_scale).astype(np.int32) + + segm = [] + if index is None: + index = np.arange(len(index)) + else: + index = chainer.backends.cuda.to_cpu(index) + + for i, bb in zip(index, bbox): + y_min = max(bb[0], 0) + x_min = max(bb[1], 0) + y_max = min(bb[2] + 1, H) + x_max = min(bb[3] + 1, W) + cropped_m = mask[i, y_min:y_max, x_min:x_max] + cropped_m = chainer.backends.cuda.to_cpu(cropped_m) + if cropped_m.shape[0] <= 1 or cropped_m.shape[1] <= 1: + segm.append(np.zeros((segm_size, segm_size), dtype=np.float32)) + continue + + sgm = transforms.resize( + cropped_m[None].astype(np.float32), + (padded_segm_size, padded_segm_size))[0] + segm.append(sgm[pad:-pad, pad:-pad]) + + return np.array(segm, dtype=np.int32) + + +def segm_to_mask(segm, bbox, size, pad=1): + """ + segm: (R, H, W) float32 + + """ + H, W = size + _, segm_size, _ = segm.shape + + mask = np.zeros((len(bbox), H, W), dtype=np.bool) + + # To work around an issue with cv2.resize (it seems to automatically + # pad with repeated border values), we manually zero-pad the masks by 1 + # pixel prior to resizing back to the original image resolution. + # This prevents "top hat" artifacts. We therefore need to expand + # the reference boxes by an appropriate factor. + cv2_expand_scale = (segm_size + pad * 2) / segm_size + padded_mask = np.zeros( + (segm_size + pad * 2, segm_size + pad * 2), dtype=np.float32) + + bbox = _expand_boxes(bbox, cv2_expand_scale) + for i, (bb, sgm) in enumerate(zip(bbox, segm)): + bb = bb.astype(np.int32) + padded_mask[1:-1, 1:-1] = sgm + + bb_height = np.maximum(bb[2] - bb[0] + 1, 1) + bb_width = np.maximum(bb[3] - bb[1] + 1, 1) + + crop_mask = cv2.resize(padded_mask, (bb_width, bb_height)) + crop_mask = crop_mask > 0.5 + + y_min = max(bb[0], 0) + x_min = max(bb[1], 0) + y_max = min(bb[2] + 1, H) + x_max = min(bb[3] + 1, W) + mask[i, y_min:y_max, x_min:x_max] = crop_mask[ + (y_min - bb[0]):(y_max - bb[0]), + (x_min - bb[1]):(x_max - bb[1])] + return mask + + +def _expand_boxes(bbox, scale): + """Expand an array of boxes by a given scale.""" + xp = chainer.backends.cuda.get_array_module(bbox) + + h_half = (bbox[:, 2] - bbox[:, 0]) * .5 + w_half = (bbox[:, 3] - bbox[:, 1]) * .5 + y_c = (bbox[:, 2] + bbox[:, 0]) * .5 + x_c = (bbox[:, 3] + bbox[:, 1]) * .5 + + h_half *= scale + w_half *= scale + + expanded_bbox = xp.zeros(bbox.shape) + expanded_bbox[:, 0] = y_c - h_half + expanded_bbox[:, 1] = x_c - w_half + expanded_bbox[:, 2] = y_c + h_half + expanded_bbox[:, 3] = x_c + w_half + + return expanded_bbox From be4e8ad5bd50ff0e0329c06bea6e2ba4c71b8bed Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 5 Mar 2019 15:45:09 +0900 Subject: [PATCH 049/100] use segm_size instead of mask_size --- chainercv/links/model/mask_rcnn/mask_head.py | 14 +++++++------- chainercv/links/model/mask_rcnn/mask_rcnn.py | 2 +- examples/mask_rcnn/train_multi.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py index 44fd6df6df..8a8ca8a01f 100644 --- a/chainercv/links/model/mask_rcnn/mask_head.py +++ b/chainercv/links/model/mask_rcnn/mask_head.py @@ -33,7 +33,7 @@ class MaskHead(chainer.Chain): _canonical_scale = 224 _roi_size = 14 _roi_sample_ratio = 2 - mask_size = _roi_size * 2 + segm_size = _roi_size * 2 def __init__(self, n_class, scales): super(MaskHead, self).__init__() @@ -63,7 +63,7 @@ def __call__(self, hs, rois, roi_indices): self._scales[l], self._roi_sample_ratio)) if len(pooled_hs) == 0: - out_size = self.mask_size + out_size = self.segm_size segs = chainer.Variable( self.xp.empty((0, self._n_class, out_size, out_size), dtype=np.float32)) @@ -153,7 +153,7 @@ def decode(self, segms, bboxes, labels, sizes): def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes, - gt_head_labels, mask_size): + gt_head_labels, segm_size): """Loss function for Mask Head (pre). This function processes RoIs for :func:`mask_loss_post` by @@ -173,7 +173,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes, shape :math:`(R_l,)`. This is a collection of ground-truth labels assigned to :obj:`rois` during bounding box localization stage. The range of value is :math:`(0, n\_class - 1)`. - mask_size (int): Size of the ground truth network output. + segm_size (int): Size of the ground truth network output. Returns: tuple of four lists: @@ -185,7 +185,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes, feature map. * **roi_indices**: A list of arrays of shape :math:`(R'_l,)`. * **gt_segms**: A list of arrays of shape :math:`(R'_l, M, M). \ - :math:`M` is the argument :obj:`mask_size`. + :math:`M` is the argument :obj:`segm_size`. * **gt_mask_labels**: A list of arrays of shape :math:`(R'_l,)` \ indicating the classes of ground truth. """ @@ -206,7 +206,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes, mask_roi_indices = roi_indices[index] gt_mask_labels = gt_head_labels[index] - gt_segms = xp.empty((len(mask_rois), mask_size, mask_size), dtype=np.bool) + gt_segms = xp.empty((len(mask_rois), segm_size, segm_size), dtype=np.bool) for i in np.unique(cuda.to_cpu(mask_roi_indices)): gt_mask = gt_masks[i] gt_bbox = gt_bboxes[i] @@ -216,7 +216,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes, iou = bbox_iou(mask_roi, gt_bbox) gt_index = iou.argmax(axis=1) gt_segms[index] = xp.array( - mask_to_segm(gt_mask, mask_roi, mask_size, gt_index)) + mask_to_segm(gt_mask, mask_roi, segm_size, gt_index)) flag_masks = [mask_roi_levels == l for l in range(n_level)] mask_rois = [mask_rois[m] for m in flag_masks] diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py index 9f59f49d92..65b76c5b0d 100644 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py @@ -147,7 +147,7 @@ def predict(self, imgs): segms, mask_roi_indices_before_reordering, len(imgs)) segms = [segm if segm is not None else self.xp.zeros( - (0, self.mask_head.mask_size, self.mask_head.mask_size), + (0, self.mask_head.segm_size, self.mask_head.segm_size), dtype=np.float32) for segm in segms] diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py index 44273026f7..921b1e53dc 100644 --- a/examples/mask_rcnn/train_multi.py +++ b/examples/mask_rcnn/train_multi.py @@ -96,7 +96,7 @@ def __call__(self, imgs, masks, labels, bboxes): mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre( rois, roi_indices, masks, bboxes, - head_gt_labels, self.model.mask_head.mask_size) + head_gt_labels, self.model.mask_head.segm_size) n_roi = sum([len(roi) for roi in mask_rois]) if n_roi > 0: segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) From 28227892cca86fb52b07f5df44def25665a89704 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 6 Mar 2019 10:55:01 +0900 Subject: [PATCH 050/100] fix mask_head --- chainercv/links/model/mask_rcnn/mask_head.py | 32 +++++--------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py index 8a8ca8a01f..462085d536 100644 --- a/chainercv/links/model/mask_rcnn/mask_head.py +++ b/chainercv/links/model/mask_rcnn/mask_head.py @@ -1,9 +1,6 @@ from __future__ import division import numpy as np -import PIL - -import cv2 import chainer from chainer.backends import cuda @@ -12,11 +9,10 @@ import chainer.links as L from chainercv.links import Conv2DActiv -from chainercv.transforms.image.resize import resize from chainercv.utils.bbox.bbox_iou import bbox_iou -from chainercv.links.model.mask_rcnn.misc import segm_to_mask from chainercv.links.model.mask_rcnn.misc import mask_to_segm +from chainercv.links.model.mask_rcnn.misc import segm_to_mask class MaskHead(chainer.Chain): @@ -146,9 +142,12 @@ def decode(self, segms, bboxes, labels, sizes): masks = [] for bbox, segm, label, size in zip( bboxes, segms, labels, sizes): - masks.append( - segm_to_mask(segm[np.arange(len(label)), label + 1], - bbox, size)) + if len(segm) > 0: + masks.append( + segm_to_mask(segm[np.arange(len(label)), label + 1], + bbox, size)) + else: + masks.append(np.zeros((0,) + size, dtype=np.bool)) return masks @@ -255,20 +254,3 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels, segms[np.arange(len(gt_mask_labels)), gt_mask_labels], gt_segms.astype(np.int32)) return mask_loss - - -def _segm_wrt_bbox(mask, gt_index, bbox, size, xp): - bbox = chainer.backends.cuda.to_cpu(bbox.astype(np.int32)) - - segm = [] - for i, bb in zip(chainer.backends.cuda.to_cpu(gt_index), bbox): - cropped_m = mask[i, bb[0]:bb[2], bb[1]:bb[3]] - cropped_m = chainer.backends.cuda.to_cpu(cropped_m) - if cropped_m.shape[0] == 0 or cropped_m.shape[1] == 0: - segm.append(np.zeros(size, dtype=np.bool)) - continue - - segm.append(resize( - cropped_m[None].astype(np.float32), - size, interpolation=PIL.Image.NEAREST)[0]) - return xp.array(segm, dtype=np.float32) From 6513e2480e98ce3773ec5566c9923be51f75bb23 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 6 Mar 2019 13:20:10 +0900 Subject: [PATCH 051/100] delete +1 --- chainercv/links/model/mask_rcnn/misc.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py index 8d0fca37f4..796da612ff 100644 --- a/chainercv/links/model/mask_rcnn/misc.py +++ b/chainercv/links/model/mask_rcnn/misc.py @@ -24,7 +24,7 @@ def mask_to_segm(mask, bbox, segm_size, index=None, pad=1): bbox = chainer.backends.cuda.to_cpu(bbox) padded_segm_size = segm_size + pad * 2 cv2_expand_scale = padded_segm_size / segm_size - bbox = _expand_boxes(bbox, cv2_expand_scale).astype(np.int32) + bbox = _integerize_bbox(_expand_boxes(bbox, cv2_expand_scale)) segm = [] if index is None: @@ -35,11 +35,11 @@ def mask_to_segm(mask, bbox, segm_size, index=None, pad=1): for i, bb in zip(index, bbox): y_min = max(bb[0], 0) x_min = max(bb[1], 0) - y_max = min(bb[2] + 1, H) - x_max = min(bb[3] + 1, W) + y_max = min(bb[2], H) + x_max = min(bb[3], W) cropped_m = mask[i, y_min:y_max, x_min:x_max] cropped_m = chainer.backends.cuda.to_cpu(cropped_m) - if cropped_m.shape[0] <= 1 or cropped_m.shape[1] <= 1: + if cropped_m.shape[0] == 0 or cropped_m.shape[1] == 0: segm.append(np.zeros((segm_size, segm_size), dtype=np.float32)) continue @@ -70,27 +70,30 @@ def segm_to_mask(segm, bbox, size, pad=1): padded_mask = np.zeros( (segm_size + pad * 2, segm_size + pad * 2), dtype=np.float32) - bbox = _expand_boxes(bbox, cv2_expand_scale) + bbox = _integerize_bbox(_expand_boxes(bbox, cv2_expand_scale)) for i, (bb, sgm) in enumerate(zip(bbox, segm)): - bb = bb.astype(np.int32) padded_mask[1:-1, 1:-1] = sgm - bb_height = np.maximum(bb[2] - bb[0] + 1, 1) - bb_width = np.maximum(bb[3] - bb[1] + 1, 1) + bb_height = np.maximum(bb[2] - bb[0], 1) + bb_width = np.maximum(bb[3] - bb[1], 1) crop_mask = cv2.resize(padded_mask, (bb_width, bb_height)) crop_mask = crop_mask > 0.5 y_min = max(bb[0], 0) x_min = max(bb[1], 0) - y_max = min(bb[2] + 1, H) - x_max = min(bb[3] + 1, W) + y_max = min(bb[2], H) + x_max = min(bb[3], W) mask[i, y_min:y_max, x_min:x_max] = crop_mask[ (y_min - bb[0]):(y_max - bb[0]), (x_min - bb[1]):(x_max - bb[1])] return mask +def _integerize_bbox(bbox): + return np.round(bbox).astype(np.int32) + + def _expand_boxes(bbox, scale): """Expand an array of boxes by a given scale.""" xp = chainer.backends.cuda.get_array_module(bbox) From 7e1e3ecee5930c527c7f1c89cdd4719d826095e5 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 6 Mar 2019 14:45:46 +0900 Subject: [PATCH 052/100] fix mask_to_segm and segm_to_mask --- chainercv/links/model/mask_rcnn/misc.py | 40 ++++++++++++++++--------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py index 796da612ff..c589d7f2d2 100644 --- a/chainercv/links/model/mask_rcnn/misc.py +++ b/chainercv/links/model/mask_rcnn/misc.py @@ -35,20 +35,28 @@ def mask_to_segm(mask, bbox, segm_size, index=None, pad=1): for i, bb in zip(index, bbox): y_min = max(bb[0], 0) x_min = max(bb[1], 0) - y_max = min(bb[2], H) - x_max = min(bb[3], W) - cropped_m = mask[i, y_min:y_max, x_min:x_max] - cropped_m = chainer.backends.cuda.to_cpu(cropped_m) - if cropped_m.shape[0] == 0 or cropped_m.shape[1] == 0: + y_max = max(min(bb[2], H), 0) + x_max = max(min(bb[3], W), 0) + if y_max - y_min == 0 or x_max - x_min == 0: segm.append(np.zeros((segm_size, segm_size), dtype=np.float32)) continue + bb_height = bb[2] - bb[0] + bb_width = bb[3] - bb[1] + cropped_m = np.zeros((bb_height, bb_width), dtype=np.bool) + + y_offset = y_min - bb[0] + x_offset = x_min - bb[1] + cropped_m[y_offset:y_offset + y_max - y_min, + x_offset:x_offset + x_max - x_min] =\ + chainer.backends.cuda.to_cpu(mask[i, y_min:y_max, x_min:x_max]) + sgm = transforms.resize( cropped_m[None].astype(np.float32), - (padded_segm_size, padded_segm_size))[0] + (padded_segm_size, padded_segm_size))[0].astype(np.int32) segm.append(sgm[pad:-pad, pad:-pad]) - return np.array(segm, dtype=np.int32) + return np.array(segm, dtype=np.float32) def segm_to_mask(segm, bbox, size, pad=1): @@ -74,19 +82,23 @@ def segm_to_mask(segm, bbox, size, pad=1): for i, (bb, sgm) in enumerate(zip(bbox, segm)): padded_mask[1:-1, 1:-1] = sgm - bb_height = np.maximum(bb[2] - bb[0], 1) - bb_width = np.maximum(bb[3] - bb[1], 1) + bb_height = bb[2] - bb[0] + bb_width = bb[3] - bb[1] + if bb_height == 0 or bb_width == 0: + continue - crop_mask = cv2.resize(padded_mask, (bb_width, bb_height)) + crop_mask = transforms.resize(padded_mask[None], (bb_width, bb_height))[0] crop_mask = crop_mask > 0.5 y_min = max(bb[0], 0) x_min = max(bb[1], 0) - y_max = min(bb[2], H) - x_max = min(bb[3], W) + y_max = max(min(bb[2], H), 0) + x_max = max(min(bb[3], W), 0) + y_offset = y_min - bb[0] + x_offset = x_min - bb[1] mask[i, y_min:y_max, x_min:x_max] = crop_mask[ - (y_min - bb[0]):(y_max - bb[0]), - (x_min - bb[1]):(x_max - bb[1])] + y_offset:y_offset + y_max - y_min, + x_offset:x_offset + x_max - x_min] return mask From c78925c640b15cb9dbbbe8f5763556bbe5fdaa07 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 6 Mar 2019 14:59:30 +0900 Subject: [PATCH 053/100] add test --- chainercv/links/model/mask_rcnn/misc.py | 2 +- .../model_tests/mask_rcnn_tests/test_misc.py | 52 +++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py index c589d7f2d2..4f10c699c8 100644 --- a/chainercv/links/model/mask_rcnn/misc.py +++ b/chainercv/links/model/mask_rcnn/misc.py @@ -28,7 +28,7 @@ def mask_to_segm(mask, bbox, segm_size, index=None, pad=1): segm = [] if index is None: - index = np.arange(len(index)) + index = np.arange(len(bbox)) else: index = chainer.backends.cuda.to_cpu(index) diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py new file mode 100644 index 0000000000..6bd6722c7a --- /dev/null +++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py @@ -0,0 +1,52 @@ +from __future__ import division + +import numpy as np +import unittest + +from chainer import testing + +from chainercv.links.model.mask_rcnn.misc import segm_to_mask +from chainercv.links.model.mask_rcnn.misc import mask_to_segm + + +class TestSegmToMask(unittest.TestCase): + + def setUp(self): + # When n_inst >= 3, the test fails. + # This is due to the fact that the transformed image of `transforms.resize` + # is misaligned to the corners. + n_inst = 2 + self.segm_size = 3 + self.size = (36, 48) + + self.segm = np.ones((n_inst, self.segm_size, self.segm_size), dtype=np.float32) + self.bbox = np.zeros((n_inst, 4), dtype=np.float32) + for i in range(n_inst): + self.bbox[i, 0] = 10 + i + self.bbox[i, 1] = 10 + i + self.bbox[i, 2] = self.bbox[i, 0] + self.segm_size * (1 + i) + self.bbox[i, 3] = self.bbox[i, 1] + self.segm_size * (1 + i) + + self.mask = np.zeros((n_inst,) + self.size, dtype=np.bool) + for i, bb in enumerate(self.bbox): + bb = bb.astype(np.int32) + self.mask[i, bb[0]:bb[2], bb[1]:bb[3]] = 1 + + def test_segm_to_mask(self): + mask = segm_to_mask(self.segm, self.bbox, self.size) + np.testing.assert_equal(mask, self.mask) + + def test_mask_to_segm(self): + segm = mask_to_segm(self.mask, self.bbox, self.segm_size) + np.testing.assert_equal(segm, self.segm) + + def test_mask_to_segm_index(self): + index = np.arange(len(self.bbox))[::-1] + segm = mask_to_segm( + self.mask, self.bbox[::-1], + self.segm_size, index=index) + segm = segm[::-1] + np.testing.assert_equal(segm, self.segm) + + +testing.run_module(__name__, __file__) From b181dfdd16e35a743380c611a782df710ee72013 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 6 Mar 2019 15:15:28 +0900 Subject: [PATCH 054/100] add mask_to_segm and segm_to_mask to doc --- chainercv/links/model/mask_rcnn/__init__.py | 2 + chainercv/links/model/mask_rcnn/misc.py | 47 +++++++++++++++++++-- docs/source/reference/links/mask_rcnn.rst | 7 +++ 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/__init__.py b/chainercv/links/model/mask_rcnn/__init__.py index c9e910a524..9f1b210dbc 100644 --- a/chainercv/links/model/mask_rcnn/__init__.py +++ b/chainercv/links/model/mask_rcnn/__init__.py @@ -4,3 +4,5 @@ from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN # NOQA from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA +from chainercv.links.model.mask_rcnn.misc import mask_to_segm # NOQA +from chainercv.links.model.mask_rcnn.misc import segm_to_mask # NOQA diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py index 8d0fca37f4..a62b3bf43b 100644 --- a/chainercv/links/model/mask_rcnn/misc.py +++ b/chainercv/links/model/mask_rcnn/misc.py @@ -20,6 +20,30 @@ def scale_img(img, min_size, max_size): def mask_to_segm(mask, bbox, segm_size, index=None, pad=1): + """Crop and resize mask. + + Args: + mask (~numpy.ndarray): See below. + bbox (~numpy.ndarray): See below. + segm_size (int): The size of segm :math:`S`. + index (~numpy.ndarray): See below. :math:`R = N` when + :obj:`index` is :obj:`None`. + pad (int): The amount of padding used for bbox. + + Returns: + ~numpy.ndarray: See below. + + .. csv-table:: + :header: name, shape, dtype, format + + :obj:`mask`, ":math:`(N, H, W)`", :obj:`bool`, -- + :obj:`bbox`, ":math:`(R, 4)`", :obj:`float32`, \ + ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`" + :obj:`index` (optional), ":math:`(R,)`", :obj:`int32`, -- + :obj:`segms` (output), ":math:`(R, S, S)`", :obj:`float32`, \ + ":math:`[0, 1]`" + + """ _, H, W = mask.shape bbox = chainer.backends.cuda.to_cpu(bbox) padded_segm_size = segm_size + pad * 2 @@ -48,12 +72,29 @@ def mask_to_segm(mask, bbox, segm_size, index=None, pad=1): (padded_segm_size, padded_segm_size))[0] segm.append(sgm[pad:-pad, pad:-pad]) - return np.array(segm, dtype=np.int32) + return np.array(segm, dtype=np.float32) def segm_to_mask(segm, bbox, size, pad=1): - """ - segm: (R, H, W) float32 + """Recover mask from cropped and resized mask. + + Args: + segm (~numpy.ndarray): See below. + bbox (~numpy.ndarray): See below. + size (tuple): This is a tuple of length 2. Its elements are + ordered as (height, width). + pad (int): The amount of padding used for bbox. + + Returns: + ~numpy.ndarray: See below. + + .. csv-table:: + :header: name, shape, dtype, format + + :obj:`segm`, ":math:`(R, S, S)`", :obj:`float32`, -- + :obj:`bbox`, ":math:`(R, 4)`", :obj:`float32`, \ + ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`" + :obj:`mask` (output), ":math:`(R, H, W)`", :obj:`bool`, -- """ H, W = size diff --git a/docs/source/reference/links/mask_rcnn.rst b/docs/source/reference/links/mask_rcnn.rst index 4c0870e2e5..9fce65c343 100644 --- a/docs/source/reference/links/mask_rcnn.rst +++ b/docs/source/reference/links/mask_rcnn.rst @@ -32,6 +32,9 @@ MaskHead :members: :special-members: __call__ +segm_to_mask +~~~~~~~~~~~~ +.. autofunction:: segm_to_mask Train-only Utility ------------------ @@ -43,3 +46,7 @@ mask_loss_pre mask_loss_post ~~~~~~~~~~~~~~ .. autofunction:: mask_loss_post + +mask_to_segm +~~~~~~~~~~~~ +.. autofunction:: mask_to_segm From acf73a67317785df16af4427553d12ff7452145c Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 6 Mar 2019 18:52:44 +0900 Subject: [PATCH 055/100] add keypoint_loss --- chainercv/links/model/mask_rcnn/__init__.py | 2 + .../links/model/mask_rcnn/keypoint_head.py | 70 ++++++++- chainercv/links/model/mask_rcnn/misc.py | 39 +++++ .../mask_rcnn_tests/test_keypoint_head.py | 146 ++++++++++++++++++ .../mask_rcnn_tests/test_mask_head.py | 10 +- 5 files changed, 260 insertions(+), 7 deletions(-) create mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py diff --git a/chainercv/links/model/mask_rcnn/__init__.py b/chainercv/links/model/mask_rcnn/__init__.py index 1dc597cb9d..3391efe1f9 100644 --- a/chainercv/links/model/mask_rcnn/__init__.py +++ b/chainercv/links/model/mask_rcnn/__init__.py @@ -1,4 +1,6 @@ from chainercv.links.model.mask_rcnn.keypoint_head import KeypointHead # NOQA +from chainercv.links.model.mask_rcnn.keypoint_head import keypoint_loss_post # NOQA +from chainercv.links.model.mask_rcnn.keypoint_head import keypoint_loss_pre # NOQA from chainercv.links.model.mask_rcnn.mask_head import mask_loss_post # NOQA from chainercv.links.model.mask_rcnn.mask_head import mask_loss_pre # NOQA from chainercv.links.model.mask_rcnn.mask_head import MaskHead # NOQA diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py index 0d67ab2c21..8b8e3fc428 100644 --- a/chainercv/links/model/mask_rcnn/keypoint_head.py +++ b/chainercv/links/model/mask_rcnn/keypoint_head.py @@ -10,12 +10,12 @@ import chainer.functions as F from chainer.backends import cuda from chainer.initializers import HeNormal -from chainer.initializers import Normal from chainercv.links import Conv2DActiv from chainercv.transforms.image.resize import resize from chainercv.utils.bbox.bbox_iou import bbox_iou -from chainercv.utils.mask.mask_to_bbox import mask_to_bbox + +from chainercv.links.model.mask_rcnn.misc import point_to_roi_points # make a bilinear interpolation kernel @@ -140,3 +140,69 @@ def decode(self, point_maps, bboxes): points.append(point) point_scores.append(point_score) return points, point_scores + + +def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles, + gt_bboxes, gt_head_labels, point_map_size): + _, n_point, _ = gt_points[0].shape + + xp = cuda.get_array_module(*rois) + + n_level = len(rois) + + roi_levels = xp.hstack( + xp.array((l,) * len(rois[l])) for l in range(n_level)).astype(np.int32) + rois = xp.vstack(rois).astype(np.float32) + roi_indices = xp.hstack(roi_indices).astype(np.int32) + gt_head_labels = xp.hstack(gt_head_labels) + + index = (gt_head_labels > 0).nonzero()[0] + point_roi_levels = roi_levels[index] + point_rois = rois[index] + point_roi_indices = roi_indices[index] + + gt_roi_points = xp.empty( + (len(point_rois), n_point, 2), dtype=np.float32) + gt_roi_visibles = xp.empty( + (len(point_rois), n_point), dtype=np.bool) + for i in np.unique(cuda.to_cpu(point_roi_indices)): + gt_point = gt_points[i] + gt_visible = gt_visibles[i] + gt_bbox = gt_bboxes[i] + + index = (point_roi_indices == i).nonzero()[0] + point_roi = point_rois[index] + iou = bbox_iou(point_roi, gt_bbox) + gt_index = iou.argmax(axis=1) + gt_roi_point, gt_roi_visible = point_to_roi_points( + gt_point[gt_index], gt_visible[gt_index], + point_roi, point_map_size) + gt_roi_points[index] = xp.array(gt_roi_point) + gt_roi_visibles[index] = xp.array(gt_roi_visible) + + flag_masks = [point_roi_levels == l for l in range(n_level)] + point_rois = [point_rois[m] for m in flag_masks] + point_roi_indices = [point_roi_indices[m] for m in flag_masks] + gt_roi_points = [gt_roi_points[m] for m in flag_masks] + gt_roi_visibles = [gt_roi_visibles[m] for m in flag_masks] + return point_rois, point_roi_indices, gt_roi_points, gt_roi_visibles + + +def keypoint_loss_post( + point_maps, point_roi_indices, gt_roi_points, + gt_roi_visibles, batchsize): + xp = cuda.get_array_module(point_maps.array) + + point_roi_indices = xp.hstack(point_roi_indices).astype(np.int32) + gt_roi_points = xp.vstack(gt_roi_points).astype(np.int32) + gt_roi_visibles = xp.vstack(gt_roi_visibles).astype(np.bool) + + B, K, H, W = point_maps.shape + point_maps = point_maps.reshape((B * K, H * W)) + spatial_labels = gt_roi_points[:, :, 0] * W + gt_roi_points[:, :, 1] + spatial_labels = spatial_labels.reshape((B * K,)) + spatial_labels[xp.logical_not(gt_roi_visibles.reshape((B * K,)))] = -1 + # Remember that the loss is normalized by the total number of + # visible keypoints. + keypoint_loss = F.softmax_cross_entropy(point_maps, spatial_labels) + return keypoint_loss diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py index a62b3bf43b..6b0c7a3f91 100644 --- a/chainercv/links/model/mask_rcnn/misc.py +++ b/chainercv/links/model/mask_rcnn/misc.py @@ -151,3 +151,42 @@ def _expand_boxes(bbox, scale): expanded_bbox[:, 3] = x_c + w_half return expanded_bbox + + +def point_to_roi_points( + point, visible, bbox, point_map_size): + xp = chainer.backends.cuda.get_array_module(point) + + R, K, _ = point.shape + + roi_point = xp.zeros((len(bbox), K, 2)) + roi_visible = xp.zeros((len(bbox), K), dtype=np.bool) + + offset_y = bbox[:, 0] + offset_x = bbox[:, 1] + scale_y = point_map_size / (bbox[:, 2] - bbox[:, 0]) + scale_x = point_map_size / (bbox[:, 3] - bbox[:, 1]) + + for k in range(K): + y_boundary_index = xp.where(point[:, k, 0] == bbox[:, 2])[0] + x_boundary_index = xp.where(point[:, k, 1] == bbox[:, 3])[0] + + ys = (point[:, k, 0] - offset_y) * scale_y + ys = xp.floor(ys) + if len(y_boundary_index) > 0: + ys[y_boundary_index] = point_map_size - 1 + xs = (point[:, k, 1] - offset_x) * scale_x + xs = xp.floor(xs) + if len(x_boundary_index) > 0: + xs[x_boundary_index] = point_map_size - 1 + + valid = xp.logical_and( + xp.logical_and( + xp.logical_and(ys >= 0, xs >= 0), + xp.logical_and(ys < point_map_size, xs < point_map_size)), + visible[:, k]) + + roi_point[:, k, 0] = ys + roi_point[:, k, 1] = xs + roi_visible[:, k] = valid + return roi_point, roi_visible diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py new file mode 100644 index 0000000000..836e9425f3 --- /dev/null +++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py @@ -0,0 +1,146 @@ +from __future__ import division + +import numpy as np +import unittest + +import chainer +from chainer import testing +from chainer.testing import attr + +from chainercv.links.model.mask_rcnn import KeypointHead +from chainercv.links.model.mask_rcnn import keypoint_loss_post +from chainercv.links.model.mask_rcnn import keypoint_loss_pre + + +def _random_array(xp, shape): + return xp.array( + np.random.uniform(-1, 1, size=shape), dtype=np.float32) + + +def _point_to_bbox(point, visible=None): + xp = chainer.backends.cuda.get_array_module(point) + + bbox = xp.zeros((len(point), 4), dtype=np.float32) + + for i, pnt in enumerate(point): + if visible is None: + vsbl = xp.ones((len(pnt),), dtype=np.bool) + else: + vsbl = visible[i] + pnt = pnt[vsbl] + bbox[i, 0] = xp.min(pnt[:, 0]) + bbox[i, 1] = xp.min(pnt[:, 1]) + bbox[i, 2] = xp.max(pnt[:, 0]) + bbox[i, 3] = xp.max(pnt[:, 1]) + return bbox + + +class TestKeypointHeadLoss(unittest.TestCase): + + def _check_keypoint_loss_pre(self, xp): + point_map_size = 28 + n_point = 17 + rois = [ + xp.array(((4, 1, 6, 3),), dtype=np.float32), + xp.array( + ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32), + xp.array(((10, 4, 12, 10),), dtype=np.float32), + ] + roi_indices = [ + xp.array((0,), dtype=np.int32), + xp.array((1, 0), dtype=np.int32), + xp.array((1,), dtype=np.int32), + ] + points = [ + xp.zeros((1, n_point, 2), dtype=np.float32), + xp.zeros((2, n_point, 2), dtype=np.float32), + xp.zeros((1, n_point, 2), dtype=np.float32), + ] + visibles = [ + xp.ones((1, n_point), dtype=np.bool), + xp.ones((2, n_point), dtype=np.bool), + xp.ones((1, n_point), dtype=np.bool) + ] + bboxes = [_point_to_bbox(point, visible) + for point, visible in zip(points, visibles)] + labels = [ + xp.array((1, 1), dtype=np.int32), + xp.array((1,), dtype=np.int32), + xp.array((1,), dtype=np.int32), + ] + rois, roi_indices, gt_roi_points, gt_roi_visibles = keypoint_loss_pre( + rois, roi_indices, points, visibles, bboxes, + labels, point_map_size) + + self.assertEqual(len(rois), 3) + self.assertEqual(len(roi_indices), 3) + self.assertEqual(len(gt_roi_points), 3) + self.assertEqual(len(gt_roi_visibles), 3) + for l in range(3): + self.assertIsInstance(rois[l], xp.ndarray) + self.assertIsInstance(roi_indices[l], xp.ndarray) + self.assertIsInstance(gt_roi_points[l], xp.ndarray) + self.assertIsInstance(gt_roi_visibles[l], xp.ndarray) + + self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0]) + self.assertEqual(rois[l].shape[0], gt_roi_points[l].shape[0]) + self.assertEqual(rois[l].shape[0], gt_roi_visibles[l].shape[0]) + self.assertEqual(rois[l].shape[1:], (4,)) + self.assertEqual(roi_indices[l].shape[1:], ()) + self.assertEqual( + gt_roi_points[l].shape[1:], (n_point, 2)) + self.assertEqual( + gt_roi_visibles[l].shape[1:], (n_point,)) + + self.assertEqual( + gt_roi_points[l].dtype, np.float32) + self.assertEqual( + gt_roi_visibles[l].dtype, np.bool) + + def test_keypoint_loss_pre_cpu(self): + self._check_keypoint_loss_pre(np) + + @attr.gpu + def test_keypoint_loss_pre_gpu(self): + import cupy + self._check_keypoint_loss_pre(cupy) + + def _check_keypoint_loss_post(self, xp): + B = 2 + n_point = 17 + + point_maps = chainer.Variable(_random_array(xp, (20, n_point, 28, 28))) + point_roi_indices = [ + xp.random.randint(0, B, size=5).astype(np.int32), + xp.random.randint(0, B, size=7).astype(np.int32), + xp.random.randint(0, B, size=8).astype(np.int32), + ] + gt_roi_points = [ + xp.random.randint(0, 28, size=(5, n_point, 2)).astype(np.int32), + xp.random.randint(0, 28, size=(7, n_point, 2)).astype(np.int32), + xp.random.randint(0, 28, size=(8, n_point, 2)).astype(np.int32), + ] + gt_roi_visibles = [ + xp.random.randint(0, 2, size=(5, n_point)).astype(np.bool), + xp.random.randint(0, 2, size=(7, n_point)).astype(np.bool), + xp.random.randint(0, 2, size=(8, n_point)).astype(np.bool), + ] + + keypoint_loss = keypoint_loss_post( + point_maps, point_roi_indices, gt_roi_points, + gt_roi_visibles, B) + + self.assertIsInstance(keypoint_loss, chainer.Variable) + self.assertIsInstance(keypoint_loss.array, xp.ndarray) + self.assertEqual(keypoint_loss.shape, ()) + + def test_keypoint_loss_post_cpu(self): + self._check_keypoint_loss_post(np) + + @attr.gpu + def test_keypoint_loss_post_gpu(self): + import cupy + self._check_keypoint_loss_post(cupy) + + +testing.run_module(__name__, __file__) diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py index d1832d1b8b..ba2f132b15 100644 --- a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py +++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py @@ -194,7 +194,7 @@ def test_mask_loss_pre_gpu(self): import cupy self._check_mask_loss_pre(cupy) - def _check_head_loss_post(self, xp): + def _check_mask_loss_post(self, xp): B = 2 segms = chainer.Variable(_random_array(xp, (20, 81, 28, 28))) mask_roi_indices = [ @@ -220,13 +220,13 @@ def _check_head_loss_post(self, xp): self.assertIsInstance(mask_loss.array, xp.ndarray) self.assertEqual(mask_loss.shape, ()) - def test_head_loss_post_cpu(self): - self._check_head_loss_post(np) + def test_mask_loss_post_cpu(self): + self._check_mask_loss_post(np) @attr.gpu - def test_head_loss_post_gpu(self): + def test_mask_loss_post_gpu(self): import cupy - self._check_head_loss_post(cupy) + self._check_mask_loss_post(cupy) testing.run_module(__name__, __file__) From 6cf563717a1cad2fce9eca3a2dbdfc892a85ffab Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 6 Mar 2019 18:53:08 +0900 Subject: [PATCH 056/100] wip --- examples/mask_rcnn/train_multi_keypoint.py | 278 +++++++++++++++++++++ 1 file changed, 278 insertions(+) create mode 100644 examples/mask_rcnn/train_multi_keypoint.py diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py new file mode 100644 index 0000000000..42ae655ee7 --- /dev/null +++ b/examples/mask_rcnn/train_multi_keypoint.py @@ -0,0 +1,278 @@ +import argparse +import multiprocessing +import numpy as np +import PIL + +import chainer +import chainer.functions as F +import chainer.links as L +from chainer.optimizer_hooks import WeightDecay +from chainer import serializers +from chainer import training +from chainer.training import extensions + +import chainermn + +from chainercv.chainer_experimental.datasets.sliceable import TransformDataset +from chainercv.chainer_experimental.training.extensions import make_shift +from chainercv.datasets import coco_keypoint_names +from chainercv.datasets import COCOKeypointDataset +from chainercv.links import MaskRCNNFPNResNet101 +from chainercv.links import MaskRCNNFPNResNet50 +from chainercv.links.model.mask_rcnn.misc import scale_img +from chainercv import transforms + +from chainercv.links.model.fpn import head_loss_post +from chainercv.links.model.fpn import head_loss_pre +from chainercv.links.model.fpn import rpn_loss +from chainercv.links.model.mask_rcnn import keypoint_loss_pre +from chainercv.links.model.mask_rcnn import keypoint_loss_post + +# https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator +try: + import cv2 + cv2.setNumThreads(0) +except ImportError: + pass + + +class TrainChain(chainer.Chain): + + def __init__(self, model): + super(TrainChain, self).__init__() + with self.init_scope(): + self.model = model + + def __call__(self, imgs, points, visibles, bboxes): + B = len(imgs) + pad_size = np.array( + [im.shape[1:] for im in imgs]).max(axis=0) + pad_size = ( + np.ceil( + pad_size / self.model.stride) * self.model.stride).astype(int) + x = np.zeros( + (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32) + for i, img in enumerate(imgs): + _, H, W = img.shape + x[i, :, :H, :W] = img + x = self.xp.array(x) + + # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU. + pad_masks = [ + np.zeros( + (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool) + for mask in masks] + for i, mask in enumerate(masks): + _, H, W = mask.shape + pad_masks[i][:, :H, :W] = mask + masks = pad_masks + + bboxes = [self.xp.array(bbox) for bbox in bboxes] + labels = [self.xp.array(label) for label in labels] + sizes = [img.shape[1:] for img in imgs] + + with chainer.using_config('train', False): + hs = self.model.extractor(x) + + rpn_locs, rpn_confs = self.model.rpn(hs) + anchors = self.model.rpn.anchors(h.shape[2:] for h in hs) + rpn_loc_loss, rpn_conf_loss = rpn_loss( + rpn_locs, rpn_confs, anchors, sizes, bboxes) + + rois, roi_indices = self.model.rpn.decode( + rpn_locs, rpn_confs, anchors, x.shape) + rois = self.xp.vstack([rois] + bboxes) + roi_indices = self.xp.hstack( + [roi_indices] + + [self.xp.array((i,) * len(bbox)) + for i, bbox in enumerate(bboxes)]) + rois, roi_indices = self.model.head.distribute(rois, roi_indices) + rois, roi_indices, head_gt_locs, head_gt_labels = head_loss_pre( + rois, roi_indices, self.model.head.std, bboxes, labels) + head_locs, head_confs = self.model.head(hs, rois, roi_indices) + head_loc_loss, head_conf_loss = head_loss_post( + head_locs, head_confs, + roi_indices, head_gt_locs, head_gt_labels, B) + losses = [ + rpn_loc_loss + rpn_conf_loss + head_loc_loss + head_conf_loss] + + # mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre( + # rois, roi_indices, masks, bboxes, + # head_gt_labels, self.model.mask_head.segm_size) + # n_roi = sum([len(roi) for roi in mask_rois]) + # if n_roi > 0: + # segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) + # mask_loss = mask_loss_post( + # segms, mask_roi_indices, gt_segms, gt_mask_labels, B) + # else: + # # Compute dummy variables to complete the computational graph + # mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32) + # mask_roi_indices[0] = self.xp.array([0], dtype=np.int32) + # segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) + # mask_loss = 0 * F.sum(segms) + loss = sum(losses) + chainer.reporter.report({ + 'loss': loss, + 'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss, + 'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss, + 'loss/keypoint': keypoint_loss}, + self) + return loss + + +class Transform(object): + + def __init__(self, min_size, max_size, mean): + self.min_size = min_size + self.max_size = max_size + self.mean = mean + + def __call__(self, in_data): + img, point, visible, _, bbox = in_data + # Flipping + size = img.shape[1:] + img, params = transforms.random_flip( + img, x_random=True, return_param=True) + point = transforms.flip_point( + point, size, x_flip=params['x_flip']) + bbox = transforms.flip_bbox( + bbox, size, x_flip=params['x_flip']) + + # Scaling and mean subtraction + img, scale = scale_img(img, self.min_size, self.max_size) + img -= self.mean + point = transforms.resize_point(point, size, img.shape[1:]) + bbox = bbox * scale + return img, point, visible, bbox + + +def converter(batch, device=None): + # do not send data to gpu (device is ignored) + return tuple(list(v) for v in zip(*batch)) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + '--model', + choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), + default='mask_rcnn_fpn_resnet50') + parser.add_argument('--batchsize', type=int, default=16) + parser.add_argument('--iteration', type=int, default=90000) + parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000]) + parser.add_argument('--out', default='result') + parser.add_argument('--resume') + parser.add_argument('--communicator', default='hierarchical') + args = parser.parse_args() + + # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator + if hasattr(multiprocessing, 'set_start_method'): + multiprocessing.set_start_method('forkserver') + p = multiprocessing.Process() + p.start() + p.join() + + comm = chainermn.create_communicator(args.communicator) + device = comm.intra_rank + + if args.model == 'mask_rcnn_fpn_resnet50': + model = MaskRCNNFPNResNet50( + n_fg_class=1, + pretrained_model='imagenet', + mode='keypoint' + ) + elif args.model == 'mask_rcnn_fpn_resnet101': + model = MaskRCNNFPNResNet101( + n_fg_class=1, + pretrained_model='imagenet', + mode='keypoint' + ) + + model.use_preset('evaluate') + train_chain = TrainChain(model) + chainer.cuda.get_device_from_id(device).use() + train_chain.to_gpu() + + train = TransformDataset( + COCOKeypointDataset( + data_dir='/home/yuyu2172/coco', + split='train'), + ('img', 'point', 'visible', 'bbox'), + Transform(model.min_size, model.max_size, model.extractor.mean)) + + if comm.rank == 0: + indices = np.arange(len(train)) + else: + indices = None + indices = chainermn.scatter_dataset(indices, comm, shuffle=True) + train = train.slice[indices] + + train_iter = chainer.iterators.MultiprocessIterator( + train, args.batchsize // comm.size, + n_processes=args.batchsize // comm.size, + shared_mem=3 * 1000 * 1000 * 4) + + optimizer = chainermn.create_multi_node_optimizer( + chainer.optimizers.MomentumSGD(), comm) + optimizer.setup(train_chain) + optimizer.add_hook(WeightDecay(0.0001)) + + model.extractor.base.conv1.disable_update() + model.extractor.base.res2.disable_update() + for link in model.links(): + if isinstance(link, L.BatchNormalization): + link.disable_update() + + n_iteration = args.iteration * 16 / args.batchsize + updater = training.updaters.StandardUpdater( + train_iter, optimizer, converter=converter, device=device) + trainer = training.Trainer( + updater, (n_iteration, 'iteration'), args.out) + + @make_shift('lr') + def lr_schedule(trainer): + base_lr = 0.02 * args.batchsize / 16 + warm_up_duration = 500 + warm_up_rate = 1 / 3 + + iteration = trainer.updater.iteration + if iteration < warm_up_duration: + rate = warm_up_rate \ + + (1 - warm_up_rate) * iteration / warm_up_duration + else: + rate = 1 + for step in args.step: + if iteration >= step * 16 / args.batchsize: + rate *= 0.1 + + return base_lr * rate + + trainer.extend(lr_schedule) + + if comm.rank == 0: + log_interval = 10, 'iteration' + trainer.extend(extensions.LogReport(trigger=log_interval)) + trainer.extend(extensions.observe_lr(), trigger=log_interval) + trainer.extend(extensions.PrintReport( + ['epoch', 'iteration', 'lr', 'main/loss', + 'main/loss/rpn/loc', 'main/loss/rpn/conf', + 'main/loss/head/loc', 'main/loss/head/conf', + 'main/loss/keypoint' + ]), + trigger=log_interval) + trainer.extend(extensions.ProgressBar(update_interval=10)) + + trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) + trainer.extend( + extensions.snapshot_object( + model, 'model_iter_{.updater.iteration}'), + trigger=(n_iteration, 'iteration')) + + if args.resume: + serializers.load_npz(args.resume, trainer, strict=False) + + trainer.run() + + +if __name__ == '__main__': + main() From c045eaefc48fcdf877a0a9c5007a4bdcd9d099d5 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 7 Mar 2019 11:34:06 +0900 Subject: [PATCH 057/100] fix broken flake8 --- examples/mask_rcnn/train_multi_keypoint.py | 52 ++++++++++------------ 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py index 42ae655ee7..001effbae4 100644 --- a/examples/mask_rcnn/train_multi_keypoint.py +++ b/examples/mask_rcnn/train_multi_keypoint.py @@ -1,7 +1,6 @@ import argparse import multiprocessing import numpy as np -import PIL import chainer import chainer.functions as F @@ -15,7 +14,6 @@ from chainercv.chainer_experimental.datasets.sliceable import TransformDataset from chainercv.chainer_experimental.training.extensions import make_shift -from chainercv.datasets import coco_keypoint_names from chainercv.datasets import COCOKeypointDataset from chainercv.links import MaskRCNNFPNResNet101 from chainercv.links import MaskRCNNFPNResNet50 @@ -43,7 +41,7 @@ def __init__(self, model): with self.init_scope(): self.model = model - def __call__(self, imgs, points, visibles, bboxes): + def __call__(self, imgs, points, visibles, labels, bboxes): B = len(imgs) pad_size = np.array( [im.shape[1:] for im in imgs]).max(axis=0) @@ -57,17 +55,11 @@ def __call__(self, imgs, points, visibles, bboxes): x[i, :, :H, :W] = img x = self.xp.array(x) - # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU. - pad_masks = [ - np.zeros( - (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool) - for mask in masks] - for i, mask in enumerate(masks): - _, H, W = mask.shape - pad_masks[i][:, :H, :W] = mask - masks = pad_masks + points = [self.xp.array(point) for point in points] + visibles = [self.xp.array(visible) for visible in visibles] bboxes = [self.xp.array(bbox) for bbox in bboxes] + assert all([np.all(label == 1) for label in labels]) labels = [self.xp.array(label) for label in labels] sizes = [img.shape[1:] for img in imgs] @@ -96,26 +88,28 @@ def __call__(self, imgs, points, visibles, bboxes): losses = [ rpn_loc_loss + rpn_conf_loss + head_loc_loss + head_conf_loss] - # mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre( - # rois, roi_indices, masks, bboxes, - # head_gt_labels, self.model.mask_head.segm_size) - # n_roi = sum([len(roi) for roi in mask_rois]) - # if n_roi > 0: - # segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) - # mask_loss = mask_loss_post( - # segms, mask_roi_indices, gt_segms, gt_mask_labels, B) - # else: - # # Compute dummy variables to complete the computational graph - # mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32) - # mask_roi_indices[0] = self.xp.array([0], dtype=np.int32) - # segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) - # mask_loss = 0 * F.sum(segms) + point_rois, point_roi_indices, gt_points, gt_visibles = keypoint_loss_pre( + rois, roi_indices, points, visibles, bboxes, head_gt_labels, + self.model.keypoint_head.point_map_size) + n_roi = sum([len(roi) for roi in point_rois]) + if n_roi > 0: + point_maps = self.model.keypoint_head(hs, point_rois, point_roi_indices) + point_loss = keypoint_loss_post( + point_maps, point_roi_indices, + gt_points, gt_visibles, B) + else: + # Compute dummy variables to complete the computational graph + point_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32) + point_roi_indices[0] = self.xp.array([0], dtype=np.int32) + point_maps = self.model.keypoint_head(hs, point_rois, point_roi_indices) + point_loss = 0 * F.sum(point_maps) + losses.append(point_loss) loss = sum(losses) chainer.reporter.report({ 'loss': loss, 'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss, 'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss, - 'loss/keypoint': keypoint_loss}, + 'loss/point': point_loss}, self) return loss @@ -128,7 +122,7 @@ def __init__(self, min_size, max_size, mean): self.mean = mean def __call__(self, in_data): - img, point, visible, _, bbox = in_data + img, point, visible, label, bbox = in_data # Flipping size = img.shape[1:] img, params = transforms.random_flip( @@ -143,7 +137,7 @@ def __call__(self, in_data): img -= self.mean point = transforms.resize_point(point, size, img.shape[1:]) bbox = bbox * scale - return img, point, visible, bbox + return img, point, visible, label, bbox def converter(batch, device=None): From ac18d36085b98b7283b354009ee3bd193b5de49e Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 7 Mar 2019 20:06:44 +0900 Subject: [PATCH 058/100] fix mistake --- examples/mask_rcnn/train_multi_keypoint.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py index 001effbae4..fedaaa321b 100644 --- a/examples/mask_rcnn/train_multi_keypoint.py +++ b/examples/mask_rcnn/train_multi_keypoint.py @@ -189,9 +189,8 @@ def main(): train = TransformDataset( COCOKeypointDataset( - data_dir='/home/yuyu2172/coco', split='train'), - ('img', 'point', 'visible', 'bbox'), + ('img', 'point', 'visible', 'label', 'bbox'), Transform(model.min_size, model.max_size, model.extractor.mean)) if comm.rank == 0: From 781792b2f7426fabe117f289aec019946192f49c Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 7 Mar 2019 20:30:21 +0900 Subject: [PATCH 059/100] fix the order of data --- chainercv/datasets/coco/coco_keypoint_dataset.py | 6 +++--- .../datasets_tests/coco_tests/test_coco_keypoint_dataset.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/chainercv/datasets/coco/coco_keypoint_dataset.py b/chainercv/datasets/coco/coco_keypoint_dataset.py index 2f0dfebf0a..234d7e0942 100644 --- a/chainercv/datasets/coco/coco_keypoint_dataset.py +++ b/chainercv/datasets/coco/coco_keypoint_dataset.py @@ -43,10 +43,10 @@ class COCOKeypointDataset(GetterDataset): ":math:`(y, x)`" :obj:`visible` [#coco_point_1]_, ":math:`(R, K)`", :obj:`bool`, \ "true when a keypoint is visible." - :obj:`bbox` [#coco_point_1]_, ":math:`(R, 4)`", :obj:`float32`, \ - ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`" :obj:`label` [#coco_point_1]_, ":math:`(R,)`", :obj:`int32`, \ ":math:`[0, \#fg\_class - 1]`" + :obj:`bbox` [#coco_point_1]_, ":math:`(R, 4)`", :obj:`float32`, \ + ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`" :obj:`area` [#coco_point_1]_ [#coco_point_2]_, ":math:`(R,)`", \ :obj:`float32`, -- :obj:`crowded` [#coco_point_3]_, ":math:`(R,)`", :obj:`bool`, -- @@ -95,7 +95,7 @@ def __init__(self, data_dir='auto', split='train', year='2017', self.add_getter( ['point', 'visible', 'bbox', 'label', 'area', 'crowded'], self._get_annotations) - keys = ('img', 'point', 'visible', 'bbox', 'label') + keys = ('img', 'point', 'visible', 'label', 'bbox') if return_area: keys += ('area',) if return_crowded: diff --git a/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py b/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py index 191e9c96ee..984245f9ba 100644 --- a/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py +++ b/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py @@ -53,7 +53,7 @@ def test_coco_bbox_dataset(self): for _ in range(10): i = np.random.randint(0, len(self.dataset)) - img, point, _, bbox, label = self.dataset[i][:5] + img, point, _, label, bbox = self.dataset[i][:5] assert_is_bbox(bbox, img.shape[1:]) self.assertEqual(len(bbox), len(point)) From c6639f77dec617573534b5f32260c5c1e12d1ece Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 7 Mar 2019 20:34:41 +0900 Subject: [PATCH 060/100] fix assertion --- examples/mask_rcnn/train_multi_keypoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py index fedaaa321b..ccda7b11ef 100644 --- a/examples/mask_rcnn/train_multi_keypoint.py +++ b/examples/mask_rcnn/train_multi_keypoint.py @@ -59,7 +59,7 @@ def __call__(self, imgs, points, visibles, labels, bboxes): visibles = [self.xp.array(visible) for visible in visibles] bboxes = [self.xp.array(bbox) for bbox in bboxes] - assert all([np.all(label == 1) for label in labels]) + assert all([np.all(label == 0) for label in labels]) labels = [self.xp.array(label) for label in labels] sizes = [img.shape[1:] for img in imgs] From 665f34b2bb4ddd69306b5b2948929cd053a95069 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 7 Mar 2019 20:37:15 +0900 Subject: [PATCH 061/100] fix --- chainercv/links/model/mask_rcnn/keypoint_head.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py index 8b8e3fc428..a048bbd218 100644 --- a/chainercv/links/model/mask_rcnn/keypoint_head.py +++ b/chainercv/links/model/mask_rcnn/keypoint_head.py @@ -36,7 +36,7 @@ class KeypointHead(chainer.Chain): _canonical_scale = 224 _roi_size = 14 _roi_sample_ratio = 2 - map_size = 56 + point_map_size = 56 def __init__(self, n_point, scales): super(KeypointHead, self).__init__() @@ -76,7 +76,7 @@ def __call__(self, hs, rois, roi_indices): if len(pooled_hs) == 0: return chainer.Variable( self.xp.empty( - (0, self.n_point, self.map_size, self.map_size), + (0, self.n_point, self.point_map_size, self.point_map_size), dtype=np.float32)) h = F.concat(pooled_hs, axis=0) From df8c274a2e7c98cebdfd925fd5a50bdcd945b816 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 7 Mar 2019 22:42:48 +0900 Subject: [PATCH 062/100] fix test --- .../model_tests/mask_rcnn_tests/test_keypoint_head.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py index 836e9425f3..17616f156c 100644 --- a/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py +++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py @@ -53,19 +53,17 @@ def _check_keypoint_loss_pre(self, xp): ] points = [ xp.zeros((1, n_point, 2), dtype=np.float32), - xp.zeros((2, n_point, 2), dtype=np.float32), - xp.zeros((1, n_point, 2), dtype=np.float32), + xp.zeros((3, n_point, 2), dtype=np.float32), ] visibles = [ xp.ones((1, n_point), dtype=np.bool), - xp.ones((2, n_point), dtype=np.bool), - xp.ones((1, n_point), dtype=np.bool) + xp.ones((3, n_point), dtype=np.bool), ] bboxes = [_point_to_bbox(point, visible) for point, visible in zip(points, visibles)] labels = [ - xp.array((1, 1), dtype=np.int32), xp.array((1,), dtype=np.int32), + xp.array((1, 1), dtype=np.int32), xp.array((1,), dtype=np.int32), ] rois, roi_indices, gt_roi_points, gt_roi_visibles = keypoint_loss_pre( From f5adabc4e75c88bb7c5bb2fe4f56068395c138c8 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 7 Mar 2019 22:52:01 +0900 Subject: [PATCH 063/100] fix test --- chainercv/links/model/mask_rcnn/mask_head.py | 5 ++-- .../mask_rcnn_tests/test_mask_head.py | 28 +++++++++++-------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py index 462085d536..dc65fd6718 100644 --- a/chainercv/links/model/mask_rcnn/mask_head.py +++ b/chainercv/links/model/mask_rcnn/mask_head.py @@ -205,7 +205,8 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes, mask_roi_indices = roi_indices[index] gt_mask_labels = gt_head_labels[index] - gt_segms = xp.empty((len(mask_rois), segm_size, segm_size), dtype=np.bool) + gt_segms = xp.empty( + (len(mask_rois), segm_size, segm_size), dtype=np.float32) for i in np.unique(cuda.to_cpu(mask_roi_indices)): gt_mask = gt_masks[i] gt_bbox = gt_bboxes[i] @@ -247,7 +248,7 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels, xp = cuda.get_array_module(segms.array) mask_roi_indices = xp.hstack(mask_roi_indices).astype(np.int32) - gt_segms = xp.vstack(gt_segms).astype(np.float32, copy=False) + gt_segms = xp.vstack(gt_segms) gt_mask_labels = xp.hstack(gt_mask_labels).astype(np.int32) mask_loss = F.sigmoid_cross_entropy( diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py index d1832d1b8b..e89cf3c38d 100644 --- a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py +++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py @@ -11,6 +11,8 @@ from chainercv.links.model.mask_rcnn import mask_loss_post from chainercv.links.model.mask_rcnn import mask_loss_pre +from chainercv.utils import mask_to_bbox + def _random_array(xp, shape): return xp.array( @@ -52,7 +54,7 @@ def _check_call(self): self.assertIsInstance(segs.array, self.link.xp.ndarray) self.assertEqual( segs.shape, - (4, self.n_class, self.link.mask_size, self.link.mask_size)) + (4, self.n_class, self.link.segm_size, self.link.segm_size)) def test_call_cpu(self): self._check_call() @@ -101,13 +103,13 @@ def _check_decode(self): segms = [ _random_array( self.link.xp, - (1, self.n_class, self.link.mask_size, self.link.mask_size)), + (1, self.n_class, self.link.segm_size, self.link.segm_size)), _random_array( self.link.xp, - (2, self.n_class, self.link.mask_size, self.link.mask_size)), + (2, self.n_class, self.link.segm_size, self.link.segm_size)), _random_array( self.link.xp, - (1, self.n_class, self.link.mask_size, self.link.mask_size)) + (1, self.n_class, self.link.segm_size, self.link.segm_size)) ] bboxes = [ self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32), @@ -142,8 +144,8 @@ def test_decode_cpu(self): class TestMaskHeadLoss(unittest.TestCase): def _check_mask_loss_pre(self, xp): - n_class = 12 - mask_size = 28 + n_inst = 12 + segm_size = 28 rois = [ xp.array(((4, 1, 6, 3),), dtype=np.float32), xp.array( @@ -156,17 +158,17 @@ def _check_mask_loss_pre(self, xp): xp.array((1,), dtype=np.int32), ] masks = [ - _random_array(xp, (n_class, 60, 70)), - _random_array(xp, (n_class, 60, 70)), - _random_array(xp, (n_class, 60, 70)), + _random_array(xp, (n_inst, 60, 70)), + _random_array(xp, (n_inst, 60, 70)), ] + bboxes = [mask_to_bbox(mask) for mask in masks] labels = [ - xp.array((10, 4), dtype=np.int32), xp.array((1,), dtype=np.int32), + xp.array((10, 4), dtype=np.int32), xp.array((3,), dtype=np.int32), ] rois, roi_indices, gt_segms, gt_mask_labels = mask_loss_pre( - rois, roi_indices, masks, labels, mask_size) + rois, roi_indices, masks, bboxes, labels, segm_size) self.assertEqual(len(rois), 3) self.assertEqual(len(roi_indices), 3) @@ -183,8 +185,10 @@ def _check_mask_loss_pre(self, xp): self.assertEqual(rois[l].shape[0], gt_mask_labels[l].shape[0]) self.assertEqual(rois[l].shape[1:], (4,)) self.assertEqual(roi_indices[l].shape[1:], ()) - self.assertEqual(gt_segms[l].shape[1:], (mask_size, mask_size)) + self.assertEqual(gt_segms[l].shape[1:], (segm_size, segm_size)) self.assertEqual(gt_mask_labels[l].shape[1:], ()) + self.assertEqual(gt_segms[l].dtype, np.float32) + self.assertEqual(gt_mask_labels[l].dtype, np.int32) def test_mask_loss_pre_cpu(self): self._check_mask_loss_pre(np) From 24492f7d8e83d8fde224a0db52e0655be2b43474 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 7 Mar 2019 23:33:26 +0900 Subject: [PATCH 064/100] filter invalid --- examples/mask_rcnn/train_multi_keypoint.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py index ccda7b11ef..9553bbb4e1 100644 --- a/examples/mask_rcnn/train_multi_keypoint.py +++ b/examples/mask_rcnn/train_multi_keypoint.py @@ -145,6 +145,14 @@ def converter(batch, device=None): return tuple(list(v) for v in zip(*batch)) +def valid_annotation(visible): + if len(visible) == 0: + return False + min_keypoint_per_image = 10 + n_visible = visible.sum() + return n_visible >= min_keypoint_per_image + + def main(): parser = argparse.ArgumentParser() parser.add_argument( @@ -187,10 +195,12 @@ def main(): chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu() + train = COCOKeypointDataset(split='train') + indices = [i for i, visible in enumerate(train.slice[:, 'visible']) + if valid_annotation(visible)] + train = train.slice[indices] train = TransformDataset( - COCOKeypointDataset( - split='train'), - ('img', 'point', 'visible', 'label', 'bbox'), + train, ('img', 'point', 'visible', 'label', 'bbox'), Transform(model.min_size, model.max_size, model.extractor.mean)) if comm.rank == 0: From f9228366c4e8c786d7bdec32a7379d712a2caa97 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 8 Mar 2019 13:21:57 +0900 Subject: [PATCH 065/100] add balanced sampling for KeypointHead --- chainercv/links/model/fpn/head.py | 28 +++---- chainercv/links/model/fpn/misc.py | 17 +++++ .../links/model/mask_rcnn/keypoint_head.py | 73 +++++++++++-------- examples/mask_rcnn/train_multi_keypoint.py | 4 +- 4 files changed, 70 insertions(+), 52 deletions(-) diff --git a/chainercv/links/model/fpn/head.py b/chainercv/links/model/fpn/head.py index f0c0fc7b63..3422c07dde 100644 --- a/chainercv/links/model/fpn/head.py +++ b/chainercv/links/model/fpn/head.py @@ -7,7 +7,7 @@ import chainer.links as L from chainercv.links.model.fpn.misc import argsort -from chainercv.links.model.fpn.misc import choice +from chainercv.links.model.fpn.misc import balanced_sampling from chainercv.links.model.fpn.misc import exp_clip from chainercv.links.model.fpn.misc import smooth_l1 from chainercv import utils @@ -285,25 +285,15 @@ def head_loss_pre(rois, roi_indices, std, bboxes, labels): else: gt_label = xp.zeros(int(mask.sum()), dtype=np.int32) - fg_index = xp.where(gt_label > 0)[0] - n_fg = int(batchsize_per_image * fg_ratio) - if len(fg_index) > n_fg: - gt_label[choice(fg_index, size=len(fg_index) - n_fg)] = -1 - - bg_index = xp.where(gt_label == 0)[0] - n_bg = batchsize_per_image - int((gt_label > 0).sum()) - if len(bg_index) > n_bg: - gt_label[choice(bg_index, size=len(bg_index) - n_bg)] = -1 - gt_locs[mask] = gt_loc - gt_labels[mask] = gt_label - - mask = gt_labels >= 0 - rois = rois[mask] - roi_indices = roi_indices[mask] - roi_levels = roi_levels[mask] - gt_locs = gt_locs[mask] - gt_labels = gt_labels[mask] + gt_labels[mask] = balanced_sampling(gt_label) + + is_sampled = gt_labels >= 0 + rois = rois[is_sampled] + roi_indices = roi_indices[is_sampled] + roi_levels = roi_levels[is_sampled] + gt_locs = gt_locs[is_sampled] + gt_labels = gt_labels[is_sampled] masks = [roi_levels == l for l in range(n_level)] rois = [rois[m] for m in masks] diff --git a/chainercv/links/model/fpn/misc.py b/chainercv/links/model/fpn/misc.py index c699e3d2f6..19204cf9c7 100644 --- a/chainercv/links/model/fpn/misc.py +++ b/chainercv/links/model/fpn/misc.py @@ -13,6 +13,23 @@ def smooth_l1(x, t, beta): return F.huber_loss(x, t, beta, reduce='no') / beta +def balanced_sampling(label, n_sample, fg_ratio): + label = label.copy() + + xp = cuda.get_array_module(label) + + fg_index = xp.where(label > 0)[0] + n_fg = int(n_sample * fg_ratio) + if len(fg_index) > n_fg: + label[choice(fg_index, size=len(fg_index) - n_fg)] = -1 + + bg_index = xp.where(label == 0)[0] + n_bg = n_sample - int((label > 0).sum()) + if len(bg_index) > n_bg: + label[choice(bg_index, size=len(bg_index) - n_bg)] = -1 + return label + + # to avoid out of memory def argsort(x): xp = cuda.get_array_module(x) diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py index a048bbd218..58a060ddd1 100644 --- a/chainercv/links/model/mask_rcnn/keypoint_head.py +++ b/chainercv/links/model/mask_rcnn/keypoint_head.py @@ -17,6 +17,8 @@ from chainercv.links.model.mask_rcnn.misc import point_to_roi_points +from chainercv.links.model.fpn.misc import balanced_sampling + # make a bilinear interpolation kernel # credit @longjon @@ -144,6 +146,9 @@ def decode(self, point_maps, bboxes): def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles, gt_bboxes, gt_head_labels, point_map_size): + batchsize_per_image = 512 + fg_ratio = 0.25 + _, n_point, _ = gt_points[0].shape xp = cuda.get_array_module(*rois) @@ -156,52 +161,58 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles, roi_indices = xp.hstack(roi_indices).astype(np.int32) gt_head_labels = xp.hstack(gt_head_labels) - index = (gt_head_labels > 0).nonzero()[0] - point_roi_levels = roi_levels[index] - point_rois = rois[index] - point_roi_indices = roi_indices[index] - - gt_roi_points = xp.empty( - (len(point_rois), n_point, 2), dtype=np.float32) - gt_roi_visibles = xp.empty( - (len(point_rois), n_point), dtype=np.bool) - for i in np.unique(cuda.to_cpu(point_roi_indices)): + gt_head_points = xp.empty( + (len(rois), n_point, 2), dtype=np.float32) + gt_head_visibles = xp.empty( + (len(rois), n_point), dtype=np.bool) + for i in np.unique(cuda.to_cpu(roi_indices)): gt_point = gt_points[i] gt_visible = gt_visibles[i] gt_bbox = gt_bboxes[i] - index = (point_roi_indices == i).nonzero()[0] - point_roi = point_rois[index] - iou = bbox_iou(point_roi, gt_bbox) - gt_index = iou.argmax(axis=1) - gt_roi_point, gt_roi_visible = point_to_roi_points( - gt_point[gt_index], gt_visible[gt_index], - point_roi, point_map_size) - gt_roi_points[index] = xp.array(gt_roi_point) - gt_roi_visibles[index] = xp.array(gt_roi_visible) + index = (roi_indices == i).nonzero()[0] + roi = rois[index] - flag_masks = [point_roi_levels == l for l in range(n_level)] - point_rois = [point_rois[m] for m in flag_masks] - point_roi_indices = [point_roi_indices[m] for m in flag_masks] - gt_roi_points = [gt_roi_points[m] for m in flag_masks] - gt_roi_visibles = [gt_roi_visibles[m] for m in flag_masks] - return point_rois, point_roi_indices, gt_roi_points, gt_roi_visibles + iou = bbox_iou(roi, gt_bbox) + gt_index = iou.argmax(axis=1) + gt_head_point, gt_head_visible = point_to_roi_points( + gt_point[gt_index], gt_visible[gt_index], + roi, point_map_size) + gt_head_points[index] = xp.array(gt_head_point) + gt_head_visibles[index] = xp.array(gt_head_visible) + + gt_head_labels[index] = balanced_sampling( + gt_head_labels[index], batchsize_per_image, fg_ratio) + + is_sampled = gt_head_labels >= 0 + rois = rois[is_sampled] + roi_indices = roi_indices[is_sampled] + roi_levels = roi_levels[is_sampled] + gt_head_points = gt_head_points[is_sampled] + gt_head_visibles = gt_head_visibles[is_sampled] + + flag_masks = [roi_levels == l for l in range(n_level)] + rois = [rois[m] for m in flag_masks] + roi_indices = [roi_indices[m] for m in flag_masks] + gt_head_points = [gt_head_points[m] for m in flag_masks] + gt_head_visibles = [gt_head_visibles[m] for m in flag_masks] + return rois, roi_indices, gt_head_points, gt_head_visibles def keypoint_loss_post( - point_maps, point_roi_indices, gt_roi_points, - gt_roi_visibles, batchsize): + point_maps, point_roi_indices, gt_head_points, + gt_head_visibles, batchsize): xp = cuda.get_array_module(point_maps.array) point_roi_indices = xp.hstack(point_roi_indices).astype(np.int32) - gt_roi_points = xp.vstack(gt_roi_points).astype(np.int32) - gt_roi_visibles = xp.vstack(gt_roi_visibles).astype(np.bool) + gt_head_points = xp.vstack(gt_head_points).astype(np.int32) + gt_head_visibles = xp.vstack(gt_head_visibles).astype(np.bool) B, K, H, W = point_maps.shape point_maps = point_maps.reshape((B * K, H * W)) - spatial_labels = gt_roi_points[:, :, 0] * W + gt_roi_points[:, :, 1] + spatial_labels = gt_head_points[:, :, 0] * W + gt_head_points[:, :, 1] spatial_labels = spatial_labels.reshape((B * K,)) - spatial_labels[xp.logical_not(gt_roi_visibles.reshape((B * K,)))] = -1 + spatial_labels[xp.logical_not(gt_head_visibles.reshape((B * K,)))] = -1 # Remember that the loss is normalized by the total number of # visible keypoints. keypoint_loss = F.softmax_cross_entropy(point_maps, spatial_labels) diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py index 9553bbb4e1..1824e2afe9 100644 --- a/examples/mask_rcnn/train_multi_keypoint.py +++ b/examples/mask_rcnn/train_multi_keypoint.py @@ -88,7 +88,7 @@ def __call__(self, imgs, points, visibles, labels, bboxes): losses = [ rpn_loc_loss + rpn_conf_loss + head_loc_loss + head_conf_loss] - point_rois, point_roi_indices, gt_points, gt_visibles = keypoint_loss_pre( + point_rois, point_roi_indices, gt_head_points, gt_head_visibles = keypoint_loss_pre( rois, roi_indices, points, visibles, bboxes, head_gt_labels, self.model.keypoint_head.point_map_size) n_roi = sum([len(roi) for roi in point_rois]) @@ -96,7 +96,7 @@ def __call__(self, imgs, points, visibles, labels, bboxes): point_maps = self.model.keypoint_head(hs, point_rois, point_roi_indices) point_loss = keypoint_loss_post( point_maps, point_roi_indices, - gt_points, gt_visibles, B) + gt_head_points, gt_head_visibles, B) else: # Compute dummy variables to complete the computational graph point_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32) From 7ec9a18c937a85d8150442acdca13f8164f0b301 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 8 Mar 2019 13:34:43 +0900 Subject: [PATCH 066/100] remove invalid box --- chainercv/links/model/mask_rcnn/keypoint_head.py | 10 ++++++++++ chainercv/links/model/mask_rcnn/misc.py | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py index 58a060ddd1..5e450654fd 100644 --- a/chainercv/links/model/mask_rcnn/keypoint_head.py +++ b/chainercv/links/model/mask_rcnn/keypoint_head.py @@ -16,6 +16,7 @@ from chainercv.utils.bbox.bbox_iou import bbox_iou from chainercv.links.model.mask_rcnn.misc import point_to_roi_points +from chainercv.links.model.mask_rcnn.misc import within_bbox from chainercv.links.model.fpn.misc import balanced_sampling @@ -171,6 +172,7 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles, gt_bbox = gt_bboxes[i] index = (roi_indices == i).nonzero()[0] + gt_head_label = gt_head_labels[index] roi = rois[index] iou = bbox_iou(roi, gt_bbox) @@ -181,6 +183,14 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles, gt_head_points[index] = xp.array(gt_head_point) gt_head_visibles[index] = xp.array(gt_head_visible) + # Ignore RoIs that are closest to a bounding box that does + # not contain any valid keypoints. + valid_point = within_bbox(gt_point, gt_bbox) + valid_point = xp.logical_and(valid_point, gt_visible) + visible_roi = valid_point.sum(axis=1) > 0 + visible_roi = visible_roi[gt_index] + gt_head_label[xp.logical_not(gt_index)] = -1 + gt_head_labels[index] = balanced_sampling( gt_head_labels[index], batchsize_per_image, fg_ratio) diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py index 6b0c7a3f91..8d2dfce1b5 100644 --- a/chainercv/links/model/mask_rcnn/misc.py +++ b/chainercv/links/model/mask_rcnn/misc.py @@ -190,3 +190,11 @@ def point_to_roi_points( roi_point[:, k, 1] = xs roi_visible[:, k] = valid return roi_point, roi_visible + + +def within_bbox(point, bbox): + y_within = (point[:, :, 0] >= bbox[:, 0][:, None]) & ( + point[:, :, 0] <= bbox[:, 2][:, None]) + x_within = (point[:, :, 1] >= bbox[:, 1][:, None]) & ( + point[:, :, 1] <= bbox[:, 3][:, None]) + return y_within & x_within From 1d6f599322b8693100813f6f7ab91f3730d90e24 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 8 Mar 2019 13:40:58 +0900 Subject: [PATCH 067/100] multiscale training --- examples/mask_rcnn/train_multi_keypoint.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py index 1824e2afe9..6d7d4f6dd1 100644 --- a/examples/mask_rcnn/train_multi_keypoint.py +++ b/examples/mask_rcnn/train_multi_keypoint.py @@ -1,6 +1,7 @@ import argparse import multiprocessing import numpy as np +import random import chainer import chainer.functions as F @@ -117,6 +118,8 @@ def __call__(self, imgs, points, visibles, labels, bboxes): class Transform(object): def __init__(self, min_size, max_size, mean): + if isinstance(min_size, (tuple, list)): + min_size = (min_size,) self.min_size = min_size self.max_size = max_size self.mean = mean @@ -133,7 +136,8 @@ def __call__(self, in_data): bbox, size, x_flip=params['x_flip']) # Scaling and mean subtraction - img, scale = scale_img(img, self.min_size, self.max_size) + min_size = random.choice(self.min_size) + img, scale = scale_img(img, min_size, self.max_size) img -= self.mean point = transforms.resize_point(point, size, img.shape[1:]) bbox = bbox * scale @@ -201,7 +205,9 @@ def main(): train = train.slice[indices] train = TransformDataset( train, ('img', 'point', 'visible', 'label', 'bbox'), - Transform(model.min_size, model.max_size, model.extractor.mean)) + Transform( + (640, 672, 704, 736, 768, 800), model.max_size, + model.extractor.mean)) if comm.rank == 0: indices = np.arange(len(train)) From b8fa344234e00698672f8442951de64872e7ba2c Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 8 Mar 2019 13:46:35 +0900 Subject: [PATCH 068/100] add eval_keypoint_detection_multi --- .../eval_keypoint_detection_multi.py | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 examples/keypoint_detection/eval_keypoint_detection_multi.py diff --git a/examples/keypoint_detection/eval_keypoint_detection_multi.py b/examples/keypoint_detection/eval_keypoint_detection_multi.py new file mode 100644 index 0000000000..9040a301b8 --- /dev/null +++ b/examples/keypoint_detection/eval_keypoint_detection_multi.py @@ -0,0 +1,48 @@ +import argparse + +import chainer +from chainer import iterators +import chainermn + +from chainercv.utils import apply_to_iterator +from chainercv.utils import ProgressHook + +from eval_keypoint_detection import models +from eval_keypoint_detection import setup + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + '--dataset', choices=('coco',), default='coco') + parser.add_argument('--model', choices=sorted(models.keys())) + parser.add_argument('--pretrained-model') + parser.add_argument('--batchsize', type=int) + args = parser.parse_args() + + comm = chainermn.create_communicator() + device = comm.intra_rank + + dataset, label_names, eval_, model, batchsize = setup( + args.dataset, args.model, args.pretrained_model, args.batchsize) + + chainer.cuda.get_device_from_id(device).use() + model.to_gpu() + + if not comm.rank == 0: + apply_to_iterator(model.predict, None, comm=comm) + return + + iterator = iterators.MultithreadIterator( + dataset, batchsize * comm.size, repeat=False, shuffle=False) + + in_values, out_values, rest_values = apply_to_iterator( + model.predict, iterator, hook=ProgressHook(len(dataset)), comm=comm) + # delete unused iterators explicitly + del in_values + + eval_(out_values, rest_values) + + +if __name__ == '__main__': + main() From a94cca4b1e3b2b00ed9008d6adee21acd4b7bca9 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 8 Mar 2019 13:52:15 +0900 Subject: [PATCH 069/100] fix --- examples/mask_rcnn/train_multi_keypoint.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py index 6d7d4f6dd1..bab67649a8 100644 --- a/examples/mask_rcnn/train_multi_keypoint.py +++ b/examples/mask_rcnn/train_multi_keypoint.py @@ -118,7 +118,7 @@ def __call__(self, imgs, points, visibles, labels, bboxes): class Transform(object): def __init__(self, min_size, max_size, mean): - if isinstance(min_size, (tuple, list)): + if not isinstance(min_size, (tuple, list)): min_size = (min_size,) self.min_size = min_size self.max_size = max_size @@ -171,6 +171,11 @@ def main(): parser.add_argument('--communicator', default='hierarchical') args = parser.parse_args() + + # from chainer.configuration import global_config + # global_config.cv_resize_backend = 'PIL' + # global_config.cv_read_image_backend = 'PIL' + # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') @@ -218,8 +223,7 @@ def main(): train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize // comm.size, - n_processes=args.batchsize // comm.size, - shared_mem=3 * 1000 * 1000 * 4) + n_processes=args.batchsize // comm.size) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) From 2b0b8a4f244b75e1678a700b8b99d0ad1ddc2af1 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 8 Mar 2019 13:57:12 +0900 Subject: [PATCH 070/100] fix --- chainercv/links/model/fpn/head.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chainercv/links/model/fpn/head.py b/chainercv/links/model/fpn/head.py index 3422c07dde..12c85b7b31 100644 --- a/chainercv/links/model/fpn/head.py +++ b/chainercv/links/model/fpn/head.py @@ -286,7 +286,8 @@ def head_loss_pre(rois, roi_indices, std, bboxes, labels): gt_label = xp.zeros(int(mask.sum()), dtype=np.int32) gt_locs[mask] = gt_loc - gt_labels[mask] = balanced_sampling(gt_label) + gt_labels[mask] = balanced_sampling( + gt_label, batchsize_per_image, fg_ratio) is_sampled = gt_labels >= 0 rois = rois[is_sampled] From e964faea9a4805703604abd2120e8ac851b34d62 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sat, 9 Mar 2019 00:13:25 +0900 Subject: [PATCH 071/100] do not use bg sample --- .../links/model/mask_rcnn/keypoint_head.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py index 5e450654fd..057ccdb9a7 100644 --- a/chainercv/links/model/mask_rcnn/keypoint_head.py +++ b/chainercv/links/model/mask_rcnn/keypoint_head.py @@ -18,8 +18,6 @@ from chainercv.links.model.mask_rcnn.misc import point_to_roi_points from chainercv.links.model.mask_rcnn.misc import within_bbox -from chainercv.links.model.fpn.misc import balanced_sampling - # make a bilinear interpolation kernel # credit @longjon @@ -162,6 +160,13 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles, roi_indices = xp.hstack(roi_indices).astype(np.int32) gt_head_labels = xp.hstack(gt_head_labels) + # Ignore all negative samples + index = (gt_head_labels > 0).nonzero()[0] + roi_levels = roi_levels[index] + rois = rois[index] + roi_indices = roi_indices[index] + gt_head_labels = gt_head_labels[index] + gt_head_points = xp.empty( (len(rois), n_point, 2), dtype=np.float32) gt_head_visibles = xp.empty( @@ -183,18 +188,16 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles, gt_head_points[index] = xp.array(gt_head_point) gt_head_visibles[index] = xp.array(gt_head_visible) - # Ignore RoIs that are closest to a bounding box that does - # not contain any valid keypoints. + # Ignore RoIs whose closest bounding box does not contain + # any valid keypoints. valid_point = within_bbox(gt_point, gt_bbox) valid_point = xp.logical_and(valid_point, gt_visible) visible_roi = valid_point.sum(axis=1) > 0 visible_roi = visible_roi[gt_index] gt_head_label[xp.logical_not(gt_index)] = -1 + gt_head_labels[index] = gt_head_label - gt_head_labels[index] = balanced_sampling( - gt_head_labels[index], batchsize_per_image, fg_ratio) - - is_sampled = gt_head_labels >= 0 + is_sampled = (gt_head_labels > 0).nonzero()[0] rois = rois[is_sampled] roi_indices = roi_indices[is_sampled] roi_levels = roi_levels[is_sampled] From afb6e2a864f37cb7ff87d5a438e6b34331c74f6c Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sat, 9 Mar 2019 00:14:44 +0900 Subject: [PATCH 072/100] add shared_mem option --- examples/mask_rcnn/train_multi_keypoint.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py index bab67649a8..37cb683beb 100644 --- a/examples/mask_rcnn/train_multi_keypoint.py +++ b/examples/mask_rcnn/train_multi_keypoint.py @@ -223,7 +223,8 @@ def main(): train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize // comm.size, - n_processes=args.batchsize // comm.size) + n_processes=args.batchsize // comm.size, + shared_mem=10 * 1000 * 1000 * 3) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) From 3d317ba8c252219908e782d47e838853e95c7c3b Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sun, 10 Mar 2019 10:38:02 +0900 Subject: [PATCH 073/100] fix --- chainercv/links/model/mask_rcnn/misc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py index 600cbd0562..2cdcd52bdb 100644 --- a/chainercv/links/model/mask_rcnn/misc.py +++ b/chainercv/links/model/mask_rcnn/misc.py @@ -128,7 +128,8 @@ def segm_to_mask(segm, bbox, size, pad=1): if bb_height == 0 or bb_width == 0: continue - crop_mask = transforms.resize(padded_mask[None], (bb_width, bb_height))[0] + crop_mask = transforms.resize( + padded_mask[None], (bb_height, bb_width))[0] crop_mask = crop_mask > 0.5 y_min = max(bb[0], 0) From 902d090b006c3038f006bc0b4eb8bc551b146877 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sun, 10 Mar 2019 11:03:06 +0900 Subject: [PATCH 074/100] fix --- chainercv/links/model/mask_rcnn/mask_rcnn.py | 4 ++-- examples/keypoint_detection/eval_keypoint_detection_multi.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py index 98eb319956..a8a5731c96 100644 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py @@ -182,8 +182,8 @@ def predict(self, imgs): point_maps = [point_map if point_map is not None else self.xp.zeros( (0, self.keypoint_head.n_point, - self.keypoint_head.map_size, - self.keypoint_head.map_size), + self.keypoint_head.point_map_size, + self.keypoint_head.point_map_size), dtype=np.float32) for point_map in point_maps] point_maps = [ diff --git a/examples/keypoint_detection/eval_keypoint_detection_multi.py b/examples/keypoint_detection/eval_keypoint_detection_multi.py index 9040a301b8..8a49017c21 100644 --- a/examples/keypoint_detection/eval_keypoint_detection_multi.py +++ b/examples/keypoint_detection/eval_keypoint_detection_multi.py @@ -23,7 +23,7 @@ def main(): comm = chainermn.create_communicator() device = comm.intra_rank - dataset, label_names, eval_, model, batchsize = setup( + dataset, eval_, model, batchsize = setup( args.dataset, args.model, args.pretrained_model, args.batchsize) chainer.cuda.get_device_from_id(device).use() From 8145849b592c42265e290264f6eb68b67746fed2 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sun, 10 Mar 2019 11:10:38 +0900 Subject: [PATCH 075/100] fix order of bbox and label --- .../eval_keypoint_detection_coco.py | 26 +++++++++---------- .../test_eval_keypoint_detection_coco.py | 14 +++++----- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/chainercv/evaluations/eval_keypoint_detection_coco.py b/chainercv/evaluations/eval_keypoint_detection_coco.py index 4258552bfd..97dfc75b6f 100644 --- a/chainercv/evaluations/eval_keypoint_detection_coco.py +++ b/chainercv/evaluations/eval_keypoint_detection_coco.py @@ -16,7 +16,7 @@ def eval_keypoint_detection_coco( pred_points, pred_labels, pred_scores, - gt_points, gt_visibles, gt_bboxes=None, gt_labels=None, + gt_points, gt_visibles, gt_labels=None, gt_bboxes=None, gt_areas=None, gt_crowdeds=None): """Evaluate keypoint detection based on evaluation code of MS COCO. @@ -32,11 +32,11 @@ def eval_keypoint_detection_coco( the confidene for each keypoint. gt_points (iterable of numpy.ndarray): See the table below. gt_visibles (iterable of numpy.ndarray): See the table below. + gt_labels (iterable of numpy.ndarray): See the table below. gt_bboxes (iterable of numpy.ndarray): See the table below. This is optional. If this is :obj:`None`, the ground truth bounding boxes are esitmated from the ground truth keypoints. - gt_labels (iterable of numpy.ndarray): See the table below. gt_areas (iterable of numpy.ndarray): See the table below. If :obj:`None`, some scores are not returned. gt_crowdeds (iterable of numpy.ndarray): See the table below. @@ -53,10 +53,10 @@ def eval_keypoint_detection_coco( :obj:`gt_points`, ":math:`[(R, K, 2)]`", :obj:`float32`, \ ":math:`(y, x)`" :obj:`gt_visibles`, ":math:`[(R, K)]`", :obj:`bool`, -- - :obj:`gt_bboxes`, ":math:`[(R, 4)]`", :obj:`float32`, \ - ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`" :obj:`gt_labels`, ":math:`[(R,)]`", :obj:`int32`, \ ":math:`[0, \#fg\_class - 1]`" + :obj:`gt_bboxes`, ":math:`[(R, 4)]`", :obj:`float32`, \ + ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`" :obj:`gt_areas`, ":math:`[(R,)]`", \ :obj:`float32`, -- :obj:`gt_crowdeds`, ":math:`[(R,)]`", :obj:`bool`, -- @@ -148,9 +148,9 @@ def eval_keypoint_detection_coco( pred_scores = iter(pred_scores) gt_points = iter(gt_points) gt_visibles = iter(gt_visibles) + gt_labels = iter(gt_labels) gt_bboxes = (iter(gt_bboxes) if gt_bboxes is not None else itertools.repeat(None)) - gt_labels = iter(gt_labels) if gt_areas is None: compute_area_dependent_metrics = False gt_areas = itertools.repeat(None) @@ -165,10 +165,10 @@ def eval_keypoint_detection_coco( gt_annos = [] existent_labels = {} for i, (pred_point, pred_label, pred_score, gt_point, gt_visible, - gt_bbox, gt_label, + gt_label, gt_bbox, gt_area, gt_crowded) in enumerate(six.moves.zip( pred_points, pred_labels, pred_scores, - gt_points, gt_visibles, gt_bboxes, gt_labels, + gt_points, gt_visibles, gt_labels, gt_bboxes, gt_areas, gt_crowdeds)): if gt_bbox is None: gt_bbox = itertools.repeat(None) @@ -185,16 +185,16 @@ def eval_keypoint_detection_coco( # Visibility flag is currently not used for evaluation v = np.ones(len(pred_pnt)) pred_annos.append( - _create_anno(pred_pnt, v, None, - pred_lb, pred_sc, + _create_anno(pred_pnt, v, + pred_lb, pred_sc, None, img_id=img_id, anno_id=len(pred_annos) + 1, ar=None, crw=0)) existent_labels[pred_lb] = True - for gt_pnt, gt_v, gt_bb, gt_lb, gt_ar, gt_crw in zip( - gt_point, gt_visible, gt_bbox, gt_label, gt_area, gt_crowded): + for gt_pnt, gt_v, gt_lb, gt_bb, gt_ar, gt_crw in zip( + gt_point, gt_visible, gt_label, gt_bbox, gt_area, gt_crowded): gt_annos.append( - _create_anno(gt_pnt, gt_v, gt_bb, gt_lb, None, + _create_anno(gt_pnt, gt_v, gt_lb, None, gt_bb, img_id=img_id, anno_id=len(gt_annos) + 1, ar=gt_ar, crw=gt_crw)) ids.append({'id': img_id}) @@ -276,7 +276,7 @@ def eval_keypoint_detection_coco( return results -def _create_anno(pnt, v, bb, lb, sc, img_id, anno_id, ar=None, crw=None): +def _create_anno(pnt, v, lb, sc, bb, img_id, anno_id, ar=None, crw=None): # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L342 y_min = np.min(pnt[:, 0]) x_min = np.min(pnt[:, 1]) diff --git a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py index 2235847640..8112f007f8 100644 --- a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py +++ b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py @@ -71,13 +71,13 @@ def _check(self, result): def test_gt_bboxes_not_supplied(self): result = eval_keypoint_detection_coco( self.pred_points, self.pred_labels, self.pred_scores, - self.gt_points, self.gt_visibles, None, self.gt_labels) + self.gt_points, self.gt_visibles, self.gt_labels, None) self._check(result) def test_area_not_supplied(self): result = eval_keypoint_detection_coco( self.pred_points, self.pred_labels, self.pred_scores, - self.gt_points, self.gt_visibles, self.gt_bboxes, self.gt_labels) + self.gt_points, self.gt_visibles, self.gt_labels, self.gt_bboxes) self._check(result) self.assertFalse( @@ -93,7 +93,7 @@ def test_area_supplied(self): gt_areas = [[100] * self.n_inst for _ in range(2)] result = eval_keypoint_detection_coco( self.pred_points, self.pred_labels, self.pred_scores, - self.gt_points, self.gt_visibles, self.gt_bboxes, self.gt_labels, + self.gt_points, self.gt_visibles, self.gt_labels, self.gt_bboxes, gt_areas=gt_areas, ) self._check(result) @@ -110,7 +110,7 @@ def test_crowded_supplied(self): gt_crowdeds = [[True] * self.n_inst for _ in range(2)] result = eval_keypoint_detection_coco( self.pred_points, self.pred_labels, self.pred_scores, - self.gt_points, self.gt_visibles, self.gt_bboxes, self.gt_labels, + self.gt_points, self.gt_visibles, self.gt_labels, self.gt_bboxes, gt_crowdeds=gt_crowdeds, ) # When the only ground truth is crowded, nothing is evaluated. @@ -140,15 +140,15 @@ def test_eval_keypoint_detection_coco(self): gt_points = self.dataset['points'] gt_visibles = self.dataset['visibles'] - gt_bboxes = self.dataset['bboxes'] gt_labels = self.dataset['labels'] + gt_bboxes = self.dataset['bboxes'] gt_areas = self.dataset['areas'] gt_crowdeds = self.dataset['crowdeds'] result = eval_keypoint_detection_coco( pred_points, pred_labels, pred_scores, - gt_points, gt_visibles, gt_bboxes, - gt_labels, gt_areas, gt_crowdeds) + gt_points, gt_visibles, gt_labels, gt_bboxes, + gt_areas, gt_crowdeds) expected = { 'map/iou=0.50:0.95/area=all/max_dets=20': 0.37733572721481323, From e5c80fef74502a9436ff6334ffa2f63190de79fd Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sun, 10 Mar 2019 11:13:21 +0900 Subject: [PATCH 076/100] fix order --- chainercv/links/model/mask_rcnn/mask_rcnn.py | 2 +- examples/keypoint_detection/eval_keypoint_detection.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py index a8a5731c96..baca550577 100644 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py @@ -195,7 +195,7 @@ def predict(self, imgs): point_maps, bboxes) labels = [cuda.to_cpu(label) for label in labels] scores = [cuda.to_cpu(score) for score in scores] - return points, point_scores, bboxes, labels, scores + return points, point_scores, labels, scores, bboxes def prepare(self, imgs): """Preprocess images. diff --git a/examples/keypoint_detection/eval_keypoint_detection.py b/examples/keypoint_detection/eval_keypoint_detection.py index 14da196e8c..74fbd5ca6c 100644 --- a/examples/keypoint_detection/eval_keypoint_detection.py +++ b/examples/keypoint_detection/eval_keypoint_detection.py @@ -43,14 +43,14 @@ def setup(dataset, model_name, pretrained_model, batchsize): model.use_preset('evaluate') def eval_(out_values, rest_values): - (pred_points, pred_point_scores, pred_bboxes, pred_labels, - pred_scores) = out_values - (gt_points, gt_visibles, gt_bboxes, gt_labels, + (pred_points, pred_point_scores, pred_labels, pred_scores, + pred_bboxes) = out_values + (gt_points, gt_visibles, gt_labels, gt_bboxes, gt_areas, gt_crowdeds) = rest_values result = eval_keypoint_detection_coco( pred_points, pred_labels, pred_scores, - gt_points, gt_visibles, gt_bboxes, gt_labels, + gt_points, gt_visibles, gt_labels, gt_bboxes, gt_areas, gt_crowdeds) print() From d50227c447f2e0b2446f415e399ab3381383e2db Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sun, 10 Mar 2019 11:20:08 +0900 Subject: [PATCH 077/100] change order --- chainercv/links/model/mask_rcnn/mask_rcnn.py | 2 +- examples/keypoint_detection/eval_keypoint_detection.py | 2 +- examples/mask_rcnn/demo.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py index baca550577..8bb88f9789 100644 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py @@ -195,7 +195,7 @@ def predict(self, imgs): point_maps, bboxes) labels = [cuda.to_cpu(label) for label in labels] scores = [cuda.to_cpu(score) for score in scores] - return points, point_scores, labels, scores, bboxes + return points, labels, scores, point_scores, bboxes def prepare(self, imgs): """Preprocess images. diff --git a/examples/keypoint_detection/eval_keypoint_detection.py b/examples/keypoint_detection/eval_keypoint_detection.py index 74fbd5ca6c..94954c5cd7 100644 --- a/examples/keypoint_detection/eval_keypoint_detection.py +++ b/examples/keypoint_detection/eval_keypoint_detection.py @@ -43,7 +43,7 @@ def setup(dataset, model_name, pretrained_model, batchsize): model.use_preset('evaluate') def eval_(out_values, rest_values): - (pred_points, pred_point_scores, pred_labels, pred_scores, + (pred_points, pred_labels, pred_scores, pred_point_scores, pred_bboxes) = out_values (gt_points, gt_visibles, gt_labels, gt_bboxes, gt_areas, gt_crowdeds) = rest_values diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py index 682b742fa2..81659c862b 100644 --- a/examples/mask_rcnn/demo.py +++ b/examples/mask_rcnn/demo.py @@ -59,12 +59,12 @@ def main(): label_names=coco_instance_segmentation_label_names) plt.show() elif args.mode == 'keypoint': - points, point_scores, bboxes, labels, scores = model.predict([img]) + points, labels, scores, point_scores, bboxes = model.predict([img]) point = points[0] - point_score = point_scores[0] - bbox = bboxes[0] label = labels[0] score = scores[0] + point_score = point_scores[0] + bbox = bboxes[0] ax = chainercv.visualizations.vis_keypoint_coco( img, point, None, point_score) chainercv.visualizations.vis_bbox(None, bbox, score=score, ax=ax) From 6c9d3bcab5db8be634aea27c05818f0e7625eda6 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sun, 10 Mar 2019 13:54:45 +0900 Subject: [PATCH 078/100] do not update bilinear interpolation layer --- examples/mask_rcnn/train_multi_keypoint.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py index 37cb683beb..45b282a01e 100644 --- a/examples/mask_rcnn/train_multi_keypoint.py +++ b/examples/mask_rcnn/train_multi_keypoint.py @@ -236,6 +236,7 @@ def main(): for link in model.links(): if isinstance(link, L.BatchNormalization): link.disable_update() + model.keypoint_head.upsample.disable_update() n_iteration = args.iteration * 16 / args.batchsize updater = training.updaters.StandardUpdater( From 553f901150743f6ebefe3a6073cececa01f2f1b2 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sun, 10 Mar 2019 14:03:13 +0900 Subject: [PATCH 079/100] fix reporter --- examples/mask_rcnn/train_multi_keypoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py index 45b282a01e..e751aae619 100644 --- a/examples/mask_rcnn/train_multi_keypoint.py +++ b/examples/mask_rcnn/train_multi_keypoint.py @@ -110,7 +110,7 @@ def __call__(self, imgs, points, visibles, labels, bboxes): 'loss': loss, 'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss, 'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss, - 'loss/point': point_loss}, + 'loss/keypoint': point_loss}, self) return loss From ae067896e6a3760b69f485d5a88ab1db6a9b7e51 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sun, 10 Mar 2019 14:07:36 +0900 Subject: [PATCH 080/100] fix ignore logic --- chainercv/links/model/mask_rcnn/keypoint_head.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py index 057ccdb9a7..5cde86464a 100644 --- a/chainercv/links/model/mask_rcnn/keypoint_head.py +++ b/chainercv/links/model/mask_rcnn/keypoint_head.py @@ -190,10 +190,9 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles, # Ignore RoIs whose closest bounding box does not contain # any valid keypoints. - valid_point = within_bbox(gt_point, gt_bbox) - valid_point = xp.logical_and(valid_point, gt_visible) + valid_point = within_bbox(gt_point[gt_index], roi) + valid_point = xp.logical_and(valid_point, gt_visible[gt_index]) visible_roi = valid_point.sum(axis=1) > 0 - visible_roi = visible_roi[gt_index] gt_head_label[xp.logical_not(gt_index)] = -1 gt_head_labels[index] = gt_head_label From b0c733e8fd536438c8121092da0b7f42c4ea00e4 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sun, 10 Mar 2019 14:18:37 +0900 Subject: [PATCH 081/100] fix --- chainercv/links/model/mask_rcnn/keypoint_head.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py index 5cde86464a..f53a44a102 100644 --- a/chainercv/links/model/mask_rcnn/keypoint_head.py +++ b/chainercv/links/model/mask_rcnn/keypoint_head.py @@ -193,7 +193,7 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles, valid_point = within_bbox(gt_point[gt_index], roi) valid_point = xp.logical_and(valid_point, gt_visible[gt_index]) visible_roi = valid_point.sum(axis=1) > 0 - gt_head_label[xp.logical_not(gt_index)] = -1 + gt_head_label[xp.logical_not(visible_roi)] = -1 gt_head_labels[index] = gt_head_label is_sampled = (gt_head_labels > 0).nonzero()[0] From 80b8b6aef678f421d8e3f542f4057e1dd91e7000 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 13:19:29 +0900 Subject: [PATCH 082/100] Head -> BboxHead --- chainercv/links/model/fpn/__init__.py | 6 +++--- .../links/model/fpn/{head.py => bbox_head.py} | 18 +++++++++--------- chainercv/links/model/fpn/faster_rcnn.py | 14 +++++++------- .../links/model/fpn/faster_rcnn_fpn_resnet.py | 4 ++-- docs/source/reference/links/fpn.rst | 18 +++++++++--------- 5 files changed, 30 insertions(+), 30 deletions(-) rename chainercv/links/model/fpn/{head.py => bbox_head.py} (96%) diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py index 0ceacd4fe5..1d53b7f70c 100644 --- a/chainercv/links/model/fpn/__init__.py +++ b/chainercv/links/model/fpn/__init__.py @@ -2,8 +2,8 @@ from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50 # NOQA from chainercv.links.model.fpn.fpn import FPN # NOQA -from chainercv.links.model.fpn.head import Head # NOQA -from chainercv.links.model.fpn.head import head_loss_post # NOQA -from chainercv.links.model.fpn.head import head_loss_pre # NOQA +from chainercv.links.model.fpn.bbox_head import BboxHead # NOQA +from chainercv.links.model.fpn.bbox_head import bbox_head_loss_post # NOQA +from chainercv.links.model.fpn.bbox_head import bbox_head_loss_pre # NOQA from chainercv.links.model.fpn.rpn import RPN # NOQA from chainercv.links.model.fpn.rpn import rpn_loss # NOQA diff --git a/chainercv/links/model/fpn/head.py b/chainercv/links/model/fpn/bbox_head.py similarity index 96% rename from chainercv/links/model/fpn/head.py rename to chainercv/links/model/fpn/bbox_head.py index f0c0fc7b63..502baf4775 100644 --- a/chainercv/links/model/fpn/head.py +++ b/chainercv/links/model/fpn/bbox_head.py @@ -13,8 +13,8 @@ from chainercv import utils -class Head(chainer.Chain): - """Head network of Feature Pyramid Networks. +class BboxHead(chainer.Chain): + """Bounding box head network of Feature Pyramid Networks. Args: n_class (int): The number of classes including background. @@ -28,7 +28,7 @@ class Head(chainer.Chain): std = (0.1, 0.2) def __init__(self, n_class, scales): - super(Head, self).__init__() + super(BboxHead, self).__init__() fc_init = { 'initialW': Caffe2FCUniform(), @@ -210,10 +210,10 @@ def decode(self, rois, roi_indices, locs, confs, return bboxes, labels, scores -def head_loss_pre(rois, roi_indices, std, bboxes, labels): +def bbox_head_loss_pre(rois, roi_indices, std, bboxes, labels): """Loss function for Head (pre). - This function processes RoIs for :func:`head_loss_post`. + This function processes RoIs for :func:`bbox_head_loss_post`. Args: rois (iterable of arrays): An iterable of arrays of @@ -314,7 +314,7 @@ def head_loss_pre(rois, roi_indices, std, bboxes, labels): return rois, roi_indices, gt_locs, gt_labels -def head_loss_post(locs, confs, roi_indices, gt_locs, gt_labels, batchsize): +def bbox_head_loss_post(locs, confs, roi_indices, gt_locs, gt_labels, batchsize): """Loss function for Head (post). Args: @@ -323,11 +323,11 @@ def head_loss_post(locs, confs, roi_indices, gt_locs, gt_labels, batchsize): confs (array): An iterable of arrays whose shape is :math:`(R, n\_class)`. roi_indices (list of arrays): A list of arrays returned by - :func:`head_locs_pre`. + :func:`bbox_head_locs_pre`. gt_locs (list of arrays): A list of arrays returned by - :func:`head_locs_pre`. + :func:`bbox_head_locs_pre`. gt_labels (list of arrays): A list of arrays returned by - :func:`head_locs_pre`. + :func:`bbox_head_locs_pre`. batchsize (int): The size of batch. Returns: diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py index c64a563db2..3b727c6029 100644 --- a/chainercv/links/model/fpn/faster_rcnn.py +++ b/chainercv/links/model/fpn/faster_rcnn.py @@ -23,8 +23,8 @@ class FasterRCNN(chainer.Chain): rpn (Link): A link that has the same interface as :class:`~chainercv.links.model.fpn.RPN`. Please refer to the documentation found there. - head (Link): A link that has the same interface as - :class:`~chainercv.links.model.fpn.Head`. + bbox_head (Link): A link that has the same interface as + :class:`~chainercv.links.model.fpn.BboxHead`. Please refer to the documentation found there. min_size (int): A preprocessing paramter for :meth:`prepare`. Please refer to a docstring found for :meth:`prepare`. @@ -47,13 +47,13 @@ class FasterRCNN(chainer.Chain): _stride = 32 - def __init__(self, extractor, rpn, head, + def __init__(self, extractor, rpn, bbox_head, min_size=800, max_size=1333): super(FasterRCNN, self).__init__() with self.init_scope(): self.extractor = extractor self.rpn = rpn - self.head = head + self.bbox_head = bbox_head self._min_size = min_size self._max_size = max_size @@ -94,8 +94,8 @@ def __call__(self, x): anchors = self.rpn.anchors(h.shape[2:] for h in hs) rois, roi_indices = self.rpn.decode( rpn_locs, rpn_confs, anchors, x.shape) - rois, roi_indices = self.head.distribute(rois, roi_indices) - head_locs, head_confs = self.head(hs, rois, roi_indices) + rois, roi_indices = self.bbox_head.distribute(rois, roi_indices) + head_locs, head_confs = self.bbox_head(hs, rois, roi_indices) return rois, roi_indices, head_locs, head_confs def predict(self, imgs): @@ -132,7 +132,7 @@ def predict(self, imgs): with chainer.using_config('train', False), chainer.no_backprop_mode(): rois, roi_indices, head_locs, head_confs = self(x) - bboxes, labels, scores = self.head.decode( + bboxes, labels, scores = self.bbox_head.decode( rois, roi_indices, head_locs, head_confs, scales, sizes, self.nms_thresh, self.score_thresh) diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py index 4b86e0cf7e..970fc0d449 100644 --- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py +++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py @@ -6,7 +6,7 @@ from chainercv.links.model.fpn.faster_rcnn import FasterRCNN from chainercv.links.model.fpn.fpn import FPN -from chainercv.links.model.fpn.head import Head +from chainercv.links.model.fpn.bbox_head import BboxHead from chainercv.links.model.fpn.rpn import RPN from chainercv.links.model.resnet import ResNet101 from chainercv.links.model.resnet import ResNet50 @@ -35,7 +35,7 @@ def __init__(self, n_fg_class=None, pretrained_model=None, super(FasterRCNNFPNResNet, self).__init__( extractor=extractor, rpn=RPN(extractor.scales), - head=Head(param['n_fg_class'] + 1, extractor.scales), + bbox_head=BboxHead(param['n_fg_class'] + 1, extractor.scales), min_size=min_size, max_size=max_size ) diff --git a/docs/source/reference/links/fpn.rst b/docs/source/reference/links/fpn.rst index d97aa3599f..5d267ff026 100644 --- a/docs/source/reference/links/fpn.rst +++ b/docs/source/reference/links/fpn.rst @@ -31,9 +31,9 @@ FPN .. autoclass:: FPN :members: -Head -~~~~ -.. autoclass:: Head +BboxHead +~~~~~~~~ +.. autoclass:: BboxHead :members: :special-members: __call__ @@ -46,13 +46,13 @@ RPN Train-only Utility ------------------ -head_loss_pre -~~~~~~~~~~~~~ -.. autofunction:: head_loss_pre +bbox_head_loss_pre +~~~~~~~~~~~~~~~~~~ +.. autofunction:: bbox_head_loss_pre -head_loss_post -~~~~~~~~~~~~~~ -.. autofunction:: head_loss_post +bbox_head_loss_post +~~~~~~~~~~~~~~~~~~~ +.. autofunction:: bbox_head_loss_post rpn_loss ~~~~~~~~ From fa96b48f21099a6e3f77ab1778b41df8f021e993 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 14:59:14 +0900 Subject: [PATCH 083/100] merge Mask R-CNN to chainercv.links.model.fpn --- chainercv/links/__init__.py | 4 +- chainercv/links/model/fpn/__init__.py | 2 + chainercv/links/model/fpn/faster_rcnn.py | 165 ++++++++--- .../links/model/fpn/faster_rcnn_fpn_resnet.py | 140 ++++++--- .../model/{mask_rcnn => fpn}/mask_head.py | 4 +- .../{mask_rcnn/misc.py => fpn/mask_utils.py} | 11 - chainercv/links/model/fpn/misc.py | 13 + chainercv/links/model/mask_rcnn/__init__.py | 8 - chainercv/links/model/mask_rcnn/mask_rcnn.py | 216 -------------- .../model/mask_rcnn/mask_rcnn_fpn_resnet.py | 133 --------- examples/fpn/demo.py | 41 ++- examples/fpn/train_multi.py | 149 +++++++--- examples/mask_rcnn/demo.py | 51 ---- examples/mask_rcnn/train_multi.py | 275 ------------------ 14 files changed, 376 insertions(+), 836 deletions(-) rename chainercv/links/model/{mask_rcnn => fpn}/mask_head.py (98%) rename chainercv/links/model/{mask_rcnn/misc.py => fpn/mask_utils.py} (94%) delete mode 100644 chainercv/links/model/mask_rcnn/__init__.py delete mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn.py delete mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py delete mode 100644 examples/mask_rcnn/demo.py delete mode 100644 examples/mask_rcnn/train_multi.py diff --git a/chainercv/links/__init__.py b/chainercv/links/__init__.py index d65e14375f..72b4d32106 100644 --- a/chainercv/links/__init__.py +++ b/chainercv/links/__init__.py @@ -11,8 +11,8 @@ from chainercv.links.model.faster_rcnn.faster_rcnn_vgg import FasterRCNNVGG16 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50 # NOQA -from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA -from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA from chainercv.links.model.resnet import ResNet101 # NOQA from chainercv.links.model.resnet import ResNet152 # NOQA from chainercv.links.model.resnet import ResNet50 # NOQA diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py index 1d53b7f70c..78f6a7684b 100644 --- a/chainercv/links/model/fpn/__init__.py +++ b/chainercv/links/model/fpn/__init__.py @@ -1,6 +1,8 @@ from chainercv.links.model.fpn.faster_rcnn import FasterRCNN # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50 # NOQA +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA from chainercv.links.model.fpn.fpn import FPN # NOQA from chainercv.links.model.fpn.bbox_head import BboxHead # NOQA from chainercv.links.model.fpn.bbox_head import bbox_head_loss_post # NOQA diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py index 3b727c6029..40df122f81 100644 --- a/chainercv/links/model/fpn/faster_rcnn.py +++ b/chainercv/links/model/fpn/faster_rcnn.py @@ -3,9 +3,10 @@ import numpy as np import chainer +import chainer.functions as F from chainer.backends import cuda -from chainercv import transforms +from chainercv.links.model.fpn.misc import scale_img class FasterRCNN(chainer.Chain): @@ -26,6 +27,9 @@ class FasterRCNN(chainer.Chain): bbox_head (Link): A link that has the same interface as :class:`~chainercv.links.model.fpn.BboxHead`. Please refer to the documentation found there. + mask_head (Link): A link that has the same interface as + :class:`~chainercv.links.model.mask_rcnn.MaskRCNN`. + Please refer to the documentation found there. min_size (int): A preprocessing paramter for :meth:`prepare`. Please refer to a docstring found for :meth:`prepare`. max_size (int): A preprocessing paramter for :meth:`prepare`. Note @@ -45,18 +49,35 @@ class FasterRCNN(chainer.Chain): """ - _stride = 32 + stride = 32 + _accepted_return_values = ('rois', 'bboxes', 'labels', 'scores', 'masks') def __init__(self, extractor, rpn, bbox_head, + mask_head, return_values, min_size=800, max_size=1333): + for value_name in return_values: + if value_name not in self._accepted_return_values: + raise ValueError( + '{} is not included in accepted value names {}'.format( + value_name, self._accepted_return_values)) + self._return_values = return_values + + self._store_rpn_outputs = 'rois' in self._return_values + self._run_bbox = any([key in self._return_values + for key in ['bboxes', 'labels', 'scores', 'masks']]) + self._run_mask = 'masks' in self._return_values super(FasterRCNN, self).__init__() + with self.init_scope(): self.extractor = extractor self.rpn = rpn - self.bbox_head = bbox_head + if self._run_bbox: + self.bbox_head = bbox_head + if self._run_mask: + self.mask_head = mask_head - self._min_size = min_size - self._max_size = max_size + self.min_size = min_size + self.max_size = max_size self.use_preset('visualize') @@ -94,52 +115,90 @@ def __call__(self, x): anchors = self.rpn.anchors(h.shape[2:] for h in hs) rois, roi_indices = self.rpn.decode( rpn_locs, rpn_confs, anchors, x.shape) - rois, roi_indices = self.bbox_head.distribute(rois, roi_indices) - head_locs, head_confs = self.bbox_head(hs, rois, roi_indices) - return rois, roi_indices, head_locs, head_confs + return hs, rois, roi_indices def predict(self, imgs): - """Detect objects from images. + """Segment object instances from images. - This method predicts objects for each image. + This method predicts instance-aware object regions for each image. Args: - imgs (iterable of numpy.ndarray): Arrays holding images. - All images are in CHW and RGB format + imgs (iterable of numpy.ndarray): Arrays holding images of shape + :math:`(B, C, H, W)`. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, - :obj:`(bboxes, labels, scores)`. + :obj:`(masks, labels, scores)`. - * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ - where :math:`R` is the number of bounding boxes in a image. \ - Each bounding box is organized by \ - :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ - in the second axis. + * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \ + where :math:`R` is the number of masks in a image. \ + Each pixel holds value if it is inside the object inside or not. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ - Each value indicates the class of the bounding box. \ + Each value indicates the class of the masks. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ + output = {} sizes = [img.shape[1:] for img in imgs] x, scales = self.prepare(imgs) with chainer.using_config('train', False), chainer.no_backprop_mode(): - rois, roi_indices, head_locs, head_confs = self(x) - bboxes, labels, scores = self.bbox_head.decode( - rois, roi_indices, head_locs, head_confs, - scales, sizes, self.nms_thresh, self.score_thresh) - - bboxes = [cuda.to_cpu(bbox) for bbox in bboxes] - labels = [cuda.to_cpu(label) for label in labels] - scores = [cuda.to_cpu(score) for score in scores] - return bboxes, labels, scores + hs, rpn_rois, rpn_roi_indices = self(x) + if self._store_rpn_outputs: + rpn_rois_cpu = [ + chainer.backends.cuda.to_cpu(rpn_roi) for rpn_roi in + _flat_to_list(rpn_rois, rpn_roi_indices, len(imgs))] + output.update({'rois': rpn_rois_cpu}) + + if self._run_bbox: + bbox_rois, bbox_roi_indices = self.bbox_head.distribute( + rpn_rois, rpn_roi_indices) + with chainer.using_config( + 'train', False), chainer.no_backprop_mode(): + head_locs, head_confs = self.bbox_head( + hs, bbox_rois, bbox_roi_indices) + bboxes, labels, scores = self.bbox_head.decode( + bbox_rois, bbox_roi_indices, head_locs, head_confs, + scales, sizes, self.nms_thresh, self.score_thresh) + bboxes_cpu = [chainer.backends.cuda.to_cpu(bbox) + for bbox in bboxes] + labels_cpu = [chainer.backends.cuda.to_cpu(label) for label in labels] + scores_cpu = [cuda.to_cpu(score) for score in scores] + output.update({'bboxes': bboxes_cpu, 'labels': labels_cpu, + 'scores': scores_cpu}) + + if self._run_mask: + rescaled_bboxes = [bbox * scale + for scale, bbox in zip(scales, bboxes)] + # Change bboxes to RoI and RoI indices format + mask_rois_before_reordering, mask_roi_indices_before_reordering =\ + _list_to_flat(rescaled_bboxes) + mask_rois, mask_roi_indices, order = self.mask_head.distribute( + mask_rois_before_reordering, mask_roi_indices_before_reordering) + with chainer.using_config( + 'train', False), chainer.no_backprop_mode(): + segms = F.sigmoid( + self.mask_head(hs, mask_rois, mask_roi_indices)).data + # Put the order of proposals back to the one used by bbox head. + segms = segms[order] + segms = _flat_to_list( + segms, mask_roi_indices_before_reordering, len(imgs)) + segms = [segm if segm is not None else + self.xp.zeros( + (0, self.mask_head.segm_size, self.mask_head.segm_size), + dtype=np.float32) + for segm in segms] + segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms] + # Currently MaskHead only supports numpy inputs + masks_cpu = self.mask_head.decode(segms, bboxes_cpu, labels_cpu, sizes) + output.update({'masks': masks_cpu}) + return (output[key] for key in self._return_values) def prepare(self, imgs): """Preprocess images. @@ -154,26 +213,44 @@ def prepare(self, imgs): scales that were caluclated in prepocessing. """ - scales = [] resized_imgs = [] for img in imgs: - _, H, W = img.shape - scale = self._min_size / min(H, W) - if scale * max(H, W) > self._max_size: - scale = self._max_size / max(H, W) - scales.append(scale) - H, W = int(H * scale), int(W * scale) - img = transforms.resize(img, (H, W)) + img, scale = scale_img( + img, self.min_size, self.max_size) img -= self.extractor.mean + scales.append(scale) resized_imgs.append(img) - - size = np.array([im.shape[1:] for im in resized_imgs]).max(axis=0) - size = (np.ceil(size / self._stride) * self._stride).astype(int) - x = np.zeros((len(imgs), 3, size[0], size[1]), dtype=np.float32) - for i, img in enumerate(resized_imgs): - _, H, W = img.shape - x[i, :, :H, :W] = img - + pad_size = np.array( + [im.shape[1:] for im in resized_imgs]).max(axis=0) + pad_size = ( + np.ceil(pad_size / self.stride) * self.stride).astype(int) + x = np.zeros( + (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32) + for i, im in enumerate(resized_imgs): + _, H, W = im.shape + x[i, :, :H, :W] = im x = self.xp.array(x) + return x, scales + + +def _list_to_flat(array_list): + xp = chainer.backends.cuda.get_array_module(array_list[0]) + + indices = xp.concatenate( + [i * xp.ones((len(array),), dtype=np.int32) for + i, array in enumerate(array_list)], axis=0) + flat = xp.concatenate(array_list, axis=0) + return flat, indices + + +def _flat_to_list(flat, indices, B): + array_list = [] + for i in range(B): + array = flat[indices == i] + if len(array) > 0: + array_list.append(array) + else: + array_list.append(None) + return array_list diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py index 970fc0d449..29e6119c0b 100644 --- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py +++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py @@ -7,6 +7,7 @@ from chainercv.links.model.fpn.faster_rcnn import FasterRCNN from chainercv.links.model.fpn.fpn import FPN from chainercv.links.model.fpn.bbox_head import BboxHead +from chainercv.links.model.fpn.mask_head import MaskHead from chainercv.links.model.fpn.rpn import RPN from chainercv.links.model.resnet import ResNet101 from chainercv.links.model.resnet import ResNet50 @@ -17,9 +18,34 @@ class FasterRCNNFPNResNet(FasterRCNN): """Base class for FasterRCNNFPNResNet50 and FasterRCNNFPNResNet101. A subclass of this class should have :obj:`_base` and :obj:`_models`. + + Args: + n_fg_class (int): The number of classes excluding the background. + pretrained_model (string): The weight file to be loaded. + This can take :obj:`'coco'`, `filepath` or :obj:`None`. + The default value is :obj:`None`. + + * :obj:`'coco'`: Load weights trained on train split of \ + MS COCO 2017. \ + The weight file is downloaded and cached automatically. \ + :obj:`n_fg_class` must be :obj:`80` or :obj:`None`. + * :obj:`'imagenet'`: Load weights of ResNet-50 trained on \ + ImageNet. \ + The weight file is downloaded and cached automatically. \ + This option initializes weights partially and the rests are \ + initialized randomly. In this case, :obj:`n_fg_class` \ + can be set to any number. + * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \ + must be specified properly. + * :obj:`None`: Do not load weights. + min_size (int): A preprocessing paramter for :meth:`prepare`. Please \ + refer to :meth:`prepare`. + max_size (int): A preprocessing paramter for :meth:`prepare`. + """ def __init__(self, n_fg_class=None, pretrained_model=None, + return_values=['bboxes', 'labels', 'scores'], min_size=800, max_size=1333): param, path = utils.prepare_pretrained_model( {'n_fg_class': n_fg_class}, pretrained_model, self._models) @@ -36,6 +62,8 @@ def __init__(self, n_fg_class=None, pretrained_model=None, extractor=extractor, rpn=RPN(extractor.scales), bbox_head=BboxHead(param['n_fg_class'] + 1, extractor.scales), + mask_head=MaskHead(param['n_fg_class'] + 1, extractor.scales), + return_values=return_values, min_size=min_size, max_size=max_size ) @@ -57,28 +85,6 @@ class FasterRCNNFPNResNet50(FasterRCNNFPNResNet): .. [#] Tsung-Yi Lin et al. Feature Pyramid Networks for Object Detection. CVPR 2017 - Args: - n_fg_class (int): The number of classes excluding the background. - pretrained_model (string): The weight file to be loaded. - This can take :obj:`'coco'`, `filepath` or :obj:`None`. - The default value is :obj:`None`. - - * :obj:`'coco'`: Load weights trained on train split of \ - MS COCO 2017. \ - The weight file is downloaded and cached automatically. \ - :obj:`n_fg_class` must be :obj:`80` or :obj:`None`. - * :obj:`'imagenet'`: Load weights of ResNet-50 trained on \ - ImageNet. \ - The weight file is downloaded and cached automatically. \ - This option initializes weights partially and the rests are \ - initialized randomly. In this case, :obj:`n_fg_class` \ - can be set to any number. - * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \ - must be specified properly. - * :obj:`None`: Do not load weights. - min_size (int): A preprocessing paramter for :meth:`prepare`. Please \ - refer to :meth:`prepare`. - max_size (int): A preprocessing paramter for :meth:`prepare`. """ @@ -103,29 +109,6 @@ class FasterRCNNFPNResNet101(FasterRCNNFPNResNet): .. [#] Tsung-Yi Lin et al. Feature Pyramid Networks for Object Detection. CVPR 2017 - Args: - n_fg_class (int): The number of classes excluding the background. - pretrained_model (string): The weight file to be loaded. - This can take :obj:`'coco'`, `filepath` or :obj:`None`. - The default value is :obj:`None`. - - * :obj:`'coco'`: Load weights trained on train split of \ - MS COCO 2017. \ - The weight file is downloaded and cached automatically. \ - :obj:`n_fg_class` must be :obj:`80` or :obj:`None`. - * :obj:`'imagenet'`: Load weights of ResNet-101 trained on \ - ImageNet. \ - The weight file is downloaded and cached automatically. \ - This option initializes weights partially and the rests are \ - initialized randomly. In this case, :obj:`n_fg_class` \ - can be set to any number. - * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \ - must be specified properly. - * :obj:`None`: Do not load weights. - min_size (int): A preprocessing paramter for :meth:`prepare`. Please \ - refer to :meth:`prepare`. - max_size (int): A preprocessing paramter for :meth:`prepare`. - """ _base = ResNet101 @@ -139,6 +122,73 @@ class FasterRCNNFPNResNet101(FasterRCNNFPNResNet): } +class MaskRCNNFPNResNet(FasterRCNNFPNResNet): + """Feature Pyramid Networks with ResNet-50. + + This is a model of Feature Pyramid Networks [#]_. + This model uses :class:`~chainercv.links.ResNet50` as + its base feature extractor. + + .. [#] Tsung-Yi Lin et al. + Feature Pyramid Networks for Object Detection. CVPR 2017 + + + """ + + def __init__(self, n_fg_class=None, pretrained_model=None, + min_size=800, max_size=1333): + super(MaskRCNNFPNResNet, self).__init__( + n_fg_class, pretrained_model, ['masks', 'labels', 'scores'], + min_size, max_size) + + +class MaskRCNNFPNResNet50(MaskRCNNFPNResNet): + """Feature Pyramid Networks with ResNet-50. + + This is a model of Feature Pyramid Networks [#]_. + This model uses :class:`~chainercv.links.ResNet50` as + its base feature extractor. + + .. [#] Tsung-Yi Lin et al. + Feature Pyramid Networks for Object Detection. CVPR 2017 + + + """ + + _base = ResNet50 + _models = { + 'coco': { + 'param': {'n_fg_class': 80}, + 'url': '', + 'cv2': True + }, + } + + +class MaskRCNNFPNResNet101(MaskRCNNFPNResNet): + """Feature Pyramid Networks with ResNet-50. + + This is a model of Feature Pyramid Networks [#]_. + This model uses :class:`~chainercv.links.ResNet50` as + its base feature extractor. + + .. [#] Tsung-Yi Lin et al. + Feature Pyramid Networks for Object Detection. CVPR 2017 + + + """ + + _base = ResNet101 + _models = { + 'coco': { + 'param': {'n_fg_class': 80}, + 'url': '', + 'cv2': True + }, + } + + + def _copyparams(dst, src): if isinstance(dst, chainer.Chain): for link in dst.children(): diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/fpn/mask_head.py similarity index 98% rename from chainercv/links/model/mask_rcnn/mask_head.py rename to chainercv/links/model/fpn/mask_head.py index dc65fd6718..b89857fa5d 100644 --- a/chainercv/links/model/mask_rcnn/mask_head.py +++ b/chainercv/links/model/fpn/mask_head.py @@ -11,8 +11,8 @@ from chainercv.links import Conv2DActiv from chainercv.utils.bbox.bbox_iou import bbox_iou -from chainercv.links.model.mask_rcnn.misc import mask_to_segm -from chainercv.links.model.mask_rcnn.misc import segm_to_mask +from chainercv.links.model.fpn.mask_utils import mask_to_segm +from chainercv.links.model.fpn.mask_utils import segm_to_mask class MaskHead(chainer.Chain): diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/fpn/mask_utils.py similarity index 94% rename from chainercv/links/model/mask_rcnn/misc.py rename to chainercv/links/model/fpn/mask_utils.py index 2cdcd52bdb..d9167ec046 100644 --- a/chainercv/links/model/mask_rcnn/misc.py +++ b/chainercv/links/model/fpn/mask_utils.py @@ -8,17 +8,6 @@ from chainercv import transforms -def scale_img(img, min_size, max_size): - """Process image.""" - _, H, W = img.shape - scale = min_size / min(H, W) - if scale * max(H, W) > max_size: - scale = max_size / max(H, W) - H, W = int(H * scale), int(W * scale) - img = transforms.resize(img, (H, W)) - return img, scale - - def mask_to_segm(mask, bbox, segm_size, index=None, pad=1): """Crop and resize mask. diff --git a/chainercv/links/model/fpn/misc.py b/chainercv/links/model/fpn/misc.py index c699e3d2f6..0912de3eab 100644 --- a/chainercv/links/model/fpn/misc.py +++ b/chainercv/links/model/fpn/misc.py @@ -5,6 +5,8 @@ from chainer.backends import cuda import chainer.functions as F +from chainercv import transforms + exp_clip = np.log(1000 / 16) @@ -31,3 +33,14 @@ def choice(x, size): return y else: return cuda.to_gpu(y) + + +def scale_img(img, min_size, max_size): + """Process image.""" + _, H, W = img.shape + scale = min_size / min(H, W) + if scale * max(H, W) > max_size: + scale = max_size / max(H, W) + H, W = int(H * scale), int(W * scale) + img = transforms.resize(img, (H, W)) + return img, scale diff --git a/chainercv/links/model/mask_rcnn/__init__.py b/chainercv/links/model/mask_rcnn/__init__.py deleted file mode 100644 index 9f1b210dbc..0000000000 --- a/chainercv/links/model/mask_rcnn/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from chainercv.links.model.mask_rcnn.mask_head import mask_loss_post # NOQA -from chainercv.links.model.mask_rcnn.mask_head import mask_loss_pre # NOQA -from chainercv.links.model.mask_rcnn.mask_head import MaskHead # NOQA -from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN # NOQA -from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA -from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA -from chainercv.links.model.mask_rcnn.misc import mask_to_segm # NOQA -from chainercv.links.model.mask_rcnn.misc import segm_to_mask # NOQA diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py deleted file mode 100644 index 65b76c5b0d..0000000000 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ /dev/null @@ -1,216 +0,0 @@ -from __future__ import division - -import numpy as np - -import chainer -from chainer.backends import cuda -import chainer.functions as F - -from chainercv.links.model.mask_rcnn.misc import scale_img - - -class MaskRCNN(chainer.Chain): - - """Base class of Mask R-CNN. - - This is a base class of Mask R-CNN [#]_. - - .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017 - - Args: - extractor (Link): A link that extracts feature maps. - This link must have :obj:`scales`, :obj:`mean` and - :meth:`__call__`. - rpn (Link): A link that has the same interface as - :class:`~chainercv.links.model.fpn.RPN`. - Please refer to the documentation found there. - head (Link): A link that has the same interface as - :class:`~chainercv.links.model.fpn.Head`. - Please refer to the documentation found there. - mask_head (Link): A link that has the same interface as - :class:`~chainercv.links.model.mask_rcnn.MaskRCNN`. - Please refer to the documentation found there. - - Parameters: - nms_thresh (float): The threshold value - for :func:`~chainercv.utils.non_maximum_suppression`. - The default value is :obj:`0.5`. - This value can be changed directly or by using :meth:`use_preset`. - score_thresh (float): The threshold value for confidence score. - If a bounding box whose confidence score is lower than this value, - the bounding box will be suppressed. - The default value is :obj:`0.7`. - This value can be changed directly or by using :meth:`use_preset`. - - """ - - min_size = 800 - max_size = 1333 - stride = 32 - - def __init__(self, extractor, rpn, head, mask_head): - super(MaskRCNN, self).__init__() - with self.init_scope(): - self.extractor = extractor - self.rpn = rpn - self.head = head - self.mask_head = mask_head - - self.use_preset('visualize') - - def use_preset(self, preset): - """Use the given preset during prediction. - - This method changes values of :obj:`nms_thresh` and - :obj:`score_thresh`. These values are a threshold value - used for non maximum suppression and a threshold value - to discard low confidence proposals in :meth:`predict`, - respectively. - - If the attributes need to be changed to something - other than the values provided in the presets, please modify - them by directly accessing the public attributes. - - Args: - preset ({'visualize', 'evaluate'}): A string to determine the - preset to use. - """ - - if preset == 'visualize': - self.nms_thresh = 0.5 - self.score_thresh = 0.7 - elif preset == 'evaluate': - self.nms_thresh = 0.5 - self.score_thresh = 0.05 - else: - raise ValueError('preset must be visualize or evaluate') - - def __call__(self, x): - assert(not chainer.config.train) - hs = self.extractor(x) - rpn_locs, rpn_confs = self.rpn(hs) - anchors = self.rpn.anchors(h.shape[2:] for h in hs) - rois, roi_indices = self.rpn.decode( - rpn_locs, rpn_confs, anchors, x.shape) - rois, roi_indices = self.head.distribute(rois, roi_indices) - return hs, rois, roi_indices - - def predict(self, imgs): - """Segment object instances from images. - - This method predicts instance-aware object regions for each image. - - Args: - imgs (iterable of numpy.ndarray): Arrays holding images of shape - :math:`(B, C, H, W)`. All images are in CHW and RGB format - and the range of their value is :math:`[0, 255]`. - - Returns: - tuple of lists: - This method returns a tuple of three lists, - :obj:`(masks, labels, scores)`. - - * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \ - where :math:`R` is the number of masks in a image. \ - Each pixel holds value if it is inside the object inside or not. - * **labels** : A list of integer arrays of shape :math:`(R,)`. \ - Each value indicates the class of the masks. \ - Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ - number of the foreground classes. - * **scores** : A list of float arrays of shape :math:`(R,)`. \ - Each value indicates how confident the prediction is. - - """ - - sizes = [img.shape[1:] for img in imgs] - x, scales = self.prepare(imgs) - - with chainer.using_config('train', False), chainer.no_backprop_mode(): - hs, rois, roi_indices = self(x) - head_locs, head_confs = self.head(hs, rois, roi_indices) - bboxes, labels, scores = self.head.decode( - rois, roi_indices, head_locs, head_confs, - scales, sizes, self.nms_thresh, self.score_thresh) - - rescaled_bboxes = [bbox * scale for scale, bbox in zip(scales, bboxes)] - # Change bboxes to RoI and RoI indices format - mask_rois_before_reordering, mask_roi_indices_before_reordering =\ - _list_to_flat(rescaled_bboxes) - mask_rois, mask_roi_indices, order = self.mask_head.distribute( - mask_rois_before_reordering, mask_roi_indices_before_reordering) - with chainer.using_config('train', False), chainer.no_backprop_mode(): - segms = F.sigmoid( - self.mask_head(hs, mask_rois, mask_roi_indices)).data - # Put the order of proposals back to the one used by bbox head. - segms = segms[order] - segms = _flat_to_list( - segms, mask_roi_indices_before_reordering, len(imgs)) - segms = [segm if segm is not None else - self.xp.zeros( - (0, self.mask_head.segm_size, self.mask_head.segm_size), - dtype=np.float32) - for segm in segms] - - segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms] - bboxes = [chainer.backends.cuda.to_cpu(bbox / scale) - for bbox, scale in zip(rescaled_bboxes, scales)] - labels = [chainer.backends.cuda.to_cpu(label) for label in labels] - # Currently MaskHead only supports numpy inputs - masks = self.mask_head.decode(segms, bboxes, labels, sizes) - scores = [cuda.to_cpu(score) for score in scores] - return masks, labels, scores - - def prepare(self, imgs): - """Preprocess images. - - Args: - imgs (iterable of numpy.ndarray): Arrays holding images. - All images are in CHW and RGB format - and the range of their value is :math:`[0, 255]`. - - Returns: - Two arrays: preprocessed images and \ - scales that were caluclated in prepocessing. - - """ - scales = [] - resized_imgs = [] - for img in imgs: - img, scale = scale_img( - img, self.min_size, self.max_size) - img -= self.extractor.mean - scales.append(scale) - resized_imgs.append(img) - pad_size = np.array( - [im.shape[1:] for im in resized_imgs]).max(axis=0) - pad_size = ( - np.ceil(pad_size / self.stride) * self.stride).astype(int) - x = np.zeros( - (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32) - for i, im in enumerate(resized_imgs): - _, H, W = im.shape - x[i, :, :H, :W] = im - x = self.xp.array(x) - - return x, scales - - -def _list_to_flat(array_list): - xp = chainer.backends.cuda.get_array_module(array_list[0]) - - indices = xp.concatenate( - [i * xp.ones((len(array),), dtype=np.int32) for - i, array in enumerate(array_list)], axis=0) - flat = xp.concatenate(array_list, axis=0) - return flat, indices - - -def _flat_to_list(flat, indices, B): - array_list = [] - for i in range(B): - array = flat[indices == i] - if len(array) > 0: - array_list.append(array) - else: - array_list.append(None) - return array_list diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py deleted file mode 100644 index d18f92f628..0000000000 --- a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py +++ /dev/null @@ -1,133 +0,0 @@ -from __future__ import division - -import chainer -import chainer.functions as F - -from chainercv.links.model.fpn import FPN -from chainercv.links.model.fpn import Head -from chainercv.links.model.fpn import RPN -from chainercv.links.model.mask_rcnn.mask_head import MaskHead -from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN -from chainercv.links.model.resnet import ResNet101 -from chainercv.links.model.resnet import ResNet50 -from chainercv import utils - -from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import _copyparams - - -class MaskRCNNFPNResNet(MaskRCNN): - - """Base class for Mask R-CNN with ResNet backbone. - - A subclass of this class should have :obj:`_base` and :obj:`_models`. - """ - - def __init__(self, n_fg_class=None, pretrained_model=None): - param, path = utils.prepare_pretrained_model( - {'n_fg_class': n_fg_class}, pretrained_model, self._models) - - base = self._base(n_class=1, arch='he') - base.pick = ('res2', 'res3', 'res4', 'res5') - base.pool1 = lambda x: F.max_pooling_2d( - x, 3, stride=2, pad=1, cover_all=False) - base.remove_unused() - extractor = FPN( - base, len(base.pick), (1 / 4, 1 / 8, 1 / 16, 1 / 32, 1 / 64)) - - n_class = param['n_fg_class'] + 1 - super(MaskRCNNFPNResNet, self).__init__( - extractor=extractor, - rpn=RPN(extractor.scales), - head=Head(n_class, extractor.scales), - mask_head=MaskHead(n_class, extractor.scales) - ) - if path == 'imagenet': - _copyparams( - self.extractor.base, - self._base(pretrained_model='imagenet', arch='he')) - elif path: - chainer.serializers.load_npz(path, self) - - -class MaskRCNNFPNResNet50(MaskRCNNFPNResNet): - - """Mask R-CNN with ResNet-50. - - This is a model of Mask R-CNN [#]_. - This model uses :class:`~chainercv.links.ResNet50` as - its base feature extractor. - - .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017 - - Args: - n_fg_class (int): The number of classes excluding the background. - pretrained_model (string): The weight file to be loaded. - This can take :obj:`'coco'`, `filepath` or :obj:`None`. - The default value is :obj:`None`. - - * :obj:`'coco'`: Load weights trained on train split of \ - MS COCO 2017. \ - The weight file is downloaded and cached automatically. \ - :obj:`n_fg_class` must be :obj:`80` or :obj:`None`. - * :obj:`'imagenet'`: Load weights of ResNet-50 trained on \ - ImageNet. \ - The weight file is downloaded and cached automatically. \ - This option initializes weights partially and the rests are \ - initialized randomly. In this case, :obj:`n_fg_class` \ - can be set to any number. - * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \ - must be specified properly. - * :obj:`None`: Do not load weights. - - """ - - _base = ResNet50 - _models = { - 'coco': { - 'param': {'n_fg_class': 80}, - 'url': None, - 'cv2': True - }, - } - - -class MaskRCNNFPNResNet101(MaskRCNNFPNResNet): - - """Mask R-CNN with ResNet-101. - - This is a model of Mask R-CNN [#]_. - This model uses :class:`~chainercv.links.ResNet101` as - its base feature extractor. - - .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017 - - Args: - n_fg_class (int): The number of classes excluding the background. - pretrained_model (string): The weight file to be loaded. - This can take :obj:`'coco'`, `filepath` or :obj:`None`. - The default value is :obj:`None`. - - * :obj:`'coco'`: Load weights trained on train split of \ - MS COCO 2017. \ - The weight file is downloaded and cached automatically. \ - :obj:`n_fg_class` must be :obj:`80` or :obj:`None`. - * :obj:`'imagenet'`: Load weights of ResNet-101 trained on \ - ImageNet. \ - The weight file is downloaded and cached automatically. \ - This option initializes weights partially and the rests are \ - initialized randomly. In this case, :obj:`n_fg_class` \ - can be set to any number. - * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \ - must be specified properly. - * :obj:`None`: Do not load weights. - - """ - - _base = ResNet101 - _models = { - 'coco': { - 'param': {'n_fg_class': 80}, - 'url': None, - 'cv2': True - }, - } diff --git a/examples/fpn/demo.py b/examples/fpn/demo.py index 053d0351e2..0d615cacfb 100644 --- a/examples/fpn/demo.py +++ b/examples/fpn/demo.py @@ -4,17 +4,22 @@ import chainer from chainercv.datasets import coco_bbox_label_names +from chainercv.datasets import coco_instance_segmentation_label_names from chainercv.links import FasterRCNNFPNResNet101 from chainercv.links import FasterRCNNFPNResNet50 +from chainercv.links import MaskRCNNFPNResNet101 +from chainercv.links import MaskRCNNFPNResNet50 from chainercv import utils from chainercv.visualizations import vis_bbox +from chainercv.visualizations import vis_instance_segmentation def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', - choices=('faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'), + choices=('faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101', + 'mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), default='faster_rcnn_fpn_resnet50') parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--pretrained-model', default='coco') @@ -22,26 +27,48 @@ def main(): args = parser.parse_args() if args.model == 'faster_rcnn_fpn_resnet50': + mode = 'bbox' model = FasterRCNNFPNResNet50( n_fg_class=len(coco_bbox_label_names), pretrained_model=args.pretrained_model) elif args.model == 'faster_rcnn_fpn_resnet101': + mode = 'bbox' model = FasterRCNNFPNResNet101( n_fg_class=len(coco_bbox_label_names), pretrained_model=args.pretrained_model) + elif args.model == 'mask_rcnn_fpn_resnet50': + mode = 'instance_segmentation' + model = MaskRCNNFPNResNet50( + n_fg_class=len(coco_instance_segmentation_label_names), + pretrained_model=args.pretrained_model) + elif args.model == 'mask_rcnn_fpn_resnet101': + mode = 'instance_segmentation' + model = MaskRCNNFPNResNet101( + n_fg_class=len(coco_instance_segmentation_label_names), + pretrained_model=args.pretrained_model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() img = utils.read_image(args.image) - bboxes, labels, scores = model.predict([img]) - bbox = bboxes[0] - label = labels[0] - score = scores[0] - vis_bbox( - img, bbox, label, score, label_names=coco_bbox_label_names) + if mode == 'bbox': + bboxes, labels, scores = model.predict([img]) + bbox = bboxes[0] + label = labels[0] + score = scores[0] + + vis_bbox( + img, bbox, label, score, label_names=coco_bbox_label_names) + elif mode == 'instance_segmentation': + masks, labels, scores = model.predict([img]) + mask = masks[0] + label = labels[0] + score = scores[0] + vis_instance_segmentation( + img, mask, label, score, + label_names=coco_instance_segmentation_label_names) plt.show() diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py index 1adff045d8..2bd2d26916 100644 --- a/examples/fpn/train_multi.py +++ b/examples/fpn/train_multi.py @@ -1,10 +1,10 @@ -from __future__ import division - import argparse import multiprocessing import numpy as np +import PIL import chainer +import chainer.functions as F import chainer.links as L from chainer.optimizer_hooks import WeightDecay from chainer import serializers @@ -15,15 +15,18 @@ from chainercv.chainer_experimental.datasets.sliceable import TransformDataset from chainercv.chainer_experimental.training.extensions import make_shift -from chainercv.datasets import coco_bbox_label_names -from chainercv.datasets import COCOBboxDataset -from chainercv.links import FasterRCNNFPNResNet101 -from chainercv.links import FasterRCNNFPNResNet50 +from chainercv.datasets import coco_instance_segmentation_label_names +from chainercv.datasets import COCOInstanceSegmentationDataset +# from chainercv.links import MaskRCNNFPNResNet101 +# from chainercv.links import MaskRCNNFPNResNet50 +from chainercv.links.model.mask_rcnn.misc import scale_img from chainercv import transforms from chainercv.links.model.fpn import head_loss_post from chainercv.links.model.fpn import head_loss_pre from chainercv.links.model.fpn import rpn_loss +from chainercv.links.model.mask_rcnn import mask_loss_post +from chainercv.links.model.mask_rcnn import mask_loss_pre # https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator try: @@ -40,11 +43,33 @@ def __init__(self, model): with self.init_scope(): self.model = model - def __call__(self, imgs, bboxes, labels): - x, scales = self.model.prepare(imgs) - bboxes = [self.xp.array(bbox) * scale - for bbox, scale in zip(bboxes, scales)] + def __call__(self, imgs, masks, labels, bboxes): + B = len(imgs) + pad_size = np.array( + [im.shape[1:] for im in imgs]).max(axis=0) + pad_size = ( + np.ceil( + pad_size / self.model.stride) * self.model.stride).astype(int) + x = np.zeros( + (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32) + for i, img in enumerate(imgs): + _, H, W = img.shape + x[i, :, :H, :W] = img + x = self.xp.array(x) + + # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU. + pad_masks = [ + np.zeros( + (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool) + for mask in masks] + for i, mask in enumerate(masks): + _, H, W = mask.shape + pad_masks[i][:, :H, :W] = mask + masks = pad_masks + + bboxes = [self.xp.array(bbox) for bbox in bboxes] labels = [self.xp.array(label) for label in labels] + sizes = [img.shape[1:] for img in imgs] with chainer.using_config('train', False): hs = self.model.extractor(x) @@ -52,10 +77,7 @@ def __call__(self, imgs, bboxes, labels): rpn_locs, rpn_confs = self.model.rpn(hs) anchors = self.model.rpn.anchors(h.shape[2:] for h in hs) rpn_loc_loss, rpn_conf_loss = rpn_loss( - rpn_locs, rpn_confs, anchors, - [(int(img.shape[1] * scale), int(img.shape[2] * scale)) - for img, scale in zip(imgs, scales)], - bboxes) + rpn_locs, rpn_confs, anchors, sizes, bboxes) rois, roi_indices = self.model.rpn.decode( rpn_locs, rpn_confs, anchors, x.shape) @@ -70,27 +92,59 @@ def __call__(self, imgs, bboxes, labels): head_locs, head_confs = self.model.head(hs, rois, roi_indices) head_loc_loss, head_conf_loss = head_loss_post( head_locs, head_confs, - roi_indices, head_gt_locs, head_gt_labels, len(x)) - - loss = rpn_loc_loss + rpn_conf_loss + head_loc_loss + head_conf_loss + roi_indices, head_gt_locs, head_gt_labels, B) + + mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre( + rois, roi_indices, masks, bboxes, + head_gt_labels, self.model.mask_head.segm_size) + n_roi = sum([len(roi) for roi in mask_rois]) + if n_roi > 0: + segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) + mask_loss = mask_loss_post( + segms, mask_roi_indices, gt_segms, gt_mask_labels, B) + else: + # Compute dummy variables to complete the computational graph + mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32) + mask_roi_indices[0] = self.xp.array([0], dtype=np.int32) + segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) + mask_loss = 0 * F.sum(segms) + loss = (rpn_loc_loss + rpn_conf_loss + + head_loc_loss + head_conf_loss + mask_loss) chainer.reporter.report({ 'loss': loss, 'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss, - 'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss}, + 'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss, + 'loss/mask': mask_loss}, self) - return loss -def transform(in_data): - img, bbox, label = in_data +class Transform(object): + + def __init__(self, min_size, max_size, mean): + self.min_size = min_size + self.max_size = max_size + self.mean = mean - img, params = transforms.random_flip( - img, x_random=True, return_param=True) - bbox = transforms.flip_bbox( - bbox, img.shape[1:], x_flip=params['x_flip']) + def __call__(self, in_data): + img, mask, label, bbox = in_data + # Flipping + img, params = transforms.random_flip( + img, x_random=True, return_param=True) + mask = transforms.flip(mask, x_flip=params['x_flip']) + bbox = transforms.flip_bbox( + bbox, img.shape[1:], x_flip=params['x_flip']) - return img, bbox, label + # Scaling and mean subtraction + img, scale = scale_img( + img, self.min_size, self.max_size) + img -= self.mean + mask = transforms.resize( + mask.astype(np.float32), + img.shape[1:], + interpolation=PIL.Image.NEAREST).astype(np.bool) + bbox = bbox * scale + return img, mask, label, bbox, scale def converter(batch, device=None): @@ -102,13 +156,14 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', - choices=('faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'), - default='faster_rcnn_fpn_resnet50') + choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), + default='mask_rcnn_fpn_resnet50') parser.add_argument('--batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=90000) parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000]) parser.add_argument('--out', default='result') parser.add_argument('--resume') + parser.add_argument('--communicator', default='hierarchical') args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator @@ -118,15 +173,17 @@ def main(): p.start() p.join() - comm = chainermn.create_communicator() + comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank - if args.model == 'faster_rcnn_fpn_resnet50': - model = FasterRCNNFPNResNet50( - n_fg_class=len(coco_bbox_label_names), pretrained_model='imagenet') - elif args.model == 'faster_rcnn_fpn_resnet101': - model = FasterRCNNFPNResNet101( - n_fg_class=len(coco_bbox_label_names), pretrained_model='imagenet') + if args.model == 'mask_rcnn_fpn_resnet50': + model = MaskRCNNFPNResNet50( + n_fg_class=len(coco_instance_segmentation_label_names), + pretrained_model='imagenet') + elif args.model == 'mask_rcnn_fpn_resnet101': + model = MaskRCNNFPNResNet101( + n_fg_class=len(coco_instance_segmentation_label_names), + pretrained_model='imagenet') model.use_preset('evaluate') train_chain = TrainChain(model) @@ -134,8 +191,11 @@ def main(): train_chain.to_gpu() train = TransformDataset( - COCOBboxDataset(year='2017', split='train'), - ('img', 'bbox', 'label'), transform) + COCOInstanceSegmentationDataset( + data_dir='/home/yuyu2172/coco', + split='train', return_bbox=True), + ('img', 'mask', 'label', 'bbox'), + Transform(model.min_size, model.max_size, model.extractor.mean)) if comm.rank == 0: indices = np.arange(len(train)) @@ -144,8 +204,10 @@ def main(): indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] - train_iter = chainer.iterators.MultithreadIterator( - train, args.batchsize // comm.size) + train_iter = chainer.iterators.MultiprocessIterator( + train, args.batchsize // comm.size, + n_processes=args.batchsize // comm.size, + shared_mem=100 * 1000 * 1000 * 4) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) @@ -158,10 +220,11 @@ def main(): if isinstance(link, L.BatchNormalization): link.disable_update() + n_iteration = args.iteration * 16 / args.batchsize updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=converter, device=device) trainer = training.Trainer( - updater, (args.iteration * 16 / args.batchsize, 'iteration'), args.out) + updater, (n_iteration, 'iteration'), args.out) @make_shift('lr') def lr_schedule(trainer): @@ -190,7 +253,9 @@ def lr_schedule(trainer): trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/rpn/loc', 'main/loss/rpn/conf', - 'main/loss/head/loc', 'main/loss/head/conf']), + 'main/loss/head/loc', 'main/loss/head/conf', + 'main/loss/mask' + ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) @@ -198,7 +263,7 @@ def lr_schedule(trainer): trainer.extend( extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), - trigger=(90000 * 16 / args.batchsize, 'iteration')) + trigger=(n_iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer, strict=False) diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py deleted file mode 100644 index d95eacc567..0000000000 --- a/examples/mask_rcnn/demo.py +++ /dev/null @@ -1,51 +0,0 @@ -import argparse -import matplotlib.pyplot as plt - -import chainer - -import chainercv -from chainercv.datasets import coco_instance_segmentation_label_names -from chainercv import utils - -from chainercv.links import MaskRCNNFPNResNet101 -from chainercv.links import MaskRCNNFPNResNet50 - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - '--model', - choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), - default='mask_rcnn_fpn_resnet50' - ) - parser.add_argument('--gpu', type=int, default=-1) - parser.add_argument('--pretrained-model', default='coco') - parser.add_argument('image') - args = parser.parse_args() - - if args.model == 'mask_rcnn_fpn_resnet50': - model = MaskRCNNFPNResNet50( - n_fg_class=len(coco_instance_segmentation_label_names), - pretrained_model=args.pretrained_model) - elif args.model == 'mask_rcnn_fpn_resnet101': - model = MaskRCNNFPNResNet101( - n_fg_class=len(coco_instance_segmentation_label_names), - pretrained_model=args.pretrained_model) - - if args.gpu >= 0: - chainer.cuda.get_device_from_id(args.gpu).use() - model.to_gpu() - - img = utils.read_image(args.image) - masks, labels, scores = model.predict([img]) - mask = masks[0] - label = labels[0] - score = scores[0] - chainercv.visualizations.vis_instance_segmentation( - img, mask, label, score, - label_names=coco_instance_segmentation_label_names) - plt.show() - - -if __name__ == '__main__': - main() diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py deleted file mode 100644 index 921b1e53dc..0000000000 --- a/examples/mask_rcnn/train_multi.py +++ /dev/null @@ -1,275 +0,0 @@ -import argparse -import multiprocessing -import numpy as np -import PIL - -import chainer -import chainer.functions as F -import chainer.links as L -from chainer.optimizer_hooks import WeightDecay -from chainer import serializers -from chainer import training -from chainer.training import extensions - -import chainermn - -from chainercv.chainer_experimental.datasets.sliceable import TransformDataset -from chainercv.chainer_experimental.training.extensions import make_shift -from chainercv.datasets import coco_instance_segmentation_label_names -from chainercv.datasets import COCOInstanceSegmentationDataset -from chainercv.links import MaskRCNNFPNResNet101 -from chainercv.links import MaskRCNNFPNResNet50 -from chainercv.links.model.mask_rcnn.misc import scale_img -from chainercv import transforms - -from chainercv.links.model.fpn import head_loss_post -from chainercv.links.model.fpn import head_loss_pre -from chainercv.links.model.fpn import rpn_loss -from chainercv.links.model.mask_rcnn import mask_loss_post -from chainercv.links.model.mask_rcnn import mask_loss_pre - -# https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator -try: - import cv2 - cv2.setNumThreads(0) -except ImportError: - pass - - -class TrainChain(chainer.Chain): - - def __init__(self, model): - super(TrainChain, self).__init__() - with self.init_scope(): - self.model = model - - def __call__(self, imgs, masks, labels, bboxes): - B = len(imgs) - pad_size = np.array( - [im.shape[1:] for im in imgs]).max(axis=0) - pad_size = ( - np.ceil( - pad_size / self.model.stride) * self.model.stride).astype(int) - x = np.zeros( - (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32) - for i, img in enumerate(imgs): - _, H, W = img.shape - x[i, :, :H, :W] = img - x = self.xp.array(x) - - # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU. - pad_masks = [ - np.zeros( - (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool) - for mask in masks] - for i, mask in enumerate(masks): - _, H, W = mask.shape - pad_masks[i][:, :H, :W] = mask - masks = pad_masks - - bboxes = [self.xp.array(bbox) for bbox in bboxes] - labels = [self.xp.array(label) for label in labels] - sizes = [img.shape[1:] for img in imgs] - - with chainer.using_config('train', False): - hs = self.model.extractor(x) - - rpn_locs, rpn_confs = self.model.rpn(hs) - anchors = self.model.rpn.anchors(h.shape[2:] for h in hs) - rpn_loc_loss, rpn_conf_loss = rpn_loss( - rpn_locs, rpn_confs, anchors, sizes, bboxes) - - rois, roi_indices = self.model.rpn.decode( - rpn_locs, rpn_confs, anchors, x.shape) - rois = self.xp.vstack([rois] + bboxes) - roi_indices = self.xp.hstack( - [roi_indices] - + [self.xp.array((i,) * len(bbox)) - for i, bbox in enumerate(bboxes)]) - rois, roi_indices = self.model.head.distribute(rois, roi_indices) - rois, roi_indices, head_gt_locs, head_gt_labels = head_loss_pre( - rois, roi_indices, self.model.head.std, bboxes, labels) - head_locs, head_confs = self.model.head(hs, rois, roi_indices) - head_loc_loss, head_conf_loss = head_loss_post( - head_locs, head_confs, - roi_indices, head_gt_locs, head_gt_labels, B) - - mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre( - rois, roi_indices, masks, bboxes, - head_gt_labels, self.model.mask_head.segm_size) - n_roi = sum([len(roi) for roi in mask_rois]) - if n_roi > 0: - segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) - mask_loss = mask_loss_post( - segms, mask_roi_indices, gt_segms, gt_mask_labels, B) - else: - # Compute dummy variables to complete the computational graph - mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32) - mask_roi_indices[0] = self.xp.array([0], dtype=np.int32) - segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) - mask_loss = 0 * F.sum(segms) - loss = (rpn_loc_loss + rpn_conf_loss + - head_loc_loss + head_conf_loss + mask_loss) - chainer.reporter.report({ - 'loss': loss, - 'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss, - 'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss, - 'loss/mask': mask_loss}, - self) - return loss - - -class Transform(object): - - def __init__(self, min_size, max_size, mean): - self.min_size = min_size - self.max_size = max_size - self.mean = mean - - def __call__(self, in_data): - img, mask, label, bbox = in_data - # Flipping - img, params = transforms.random_flip( - img, x_random=True, return_param=True) - mask = transforms.flip(mask, x_flip=params['x_flip']) - bbox = transforms.flip_bbox( - bbox, img.shape[1:], x_flip=params['x_flip']) - - # Scaling and mean subtraction - img, scale = scale_img( - img, self.min_size, self.max_size) - img -= self.mean - mask = transforms.resize( - mask.astype(np.float32), - img.shape[1:], - interpolation=PIL.Image.NEAREST).astype(np.bool) - bbox = bbox * scale - return img, mask, label, bbox, scale - - -def converter(batch, device=None): - # do not send data to gpu (device is ignored) - return tuple(list(v) for v in zip(*batch)) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - '--model', - choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), - default='mask_rcnn_fpn_resnet50') - parser.add_argument('--batchsize', type=int, default=16) - parser.add_argument('--iteration', type=int, default=90000) - parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000]) - parser.add_argument('--out', default='result') - parser.add_argument('--resume') - parser.add_argument('--communicator', default='hierarchical') - args = parser.parse_args() - - # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator - if hasattr(multiprocessing, 'set_start_method'): - multiprocessing.set_start_method('forkserver') - p = multiprocessing.Process() - p.start() - p.join() - - comm = chainermn.create_communicator(args.communicator) - device = comm.intra_rank - - if args.model == 'mask_rcnn_fpn_resnet50': - model = MaskRCNNFPNResNet50( - n_fg_class=len(coco_instance_segmentation_label_names), - pretrained_model='imagenet') - elif args.model == 'mask_rcnn_fpn_resnet101': - model = MaskRCNNFPNResNet101( - n_fg_class=len(coco_instance_segmentation_label_names), - pretrained_model='imagenet') - - model.use_preset('evaluate') - train_chain = TrainChain(model) - chainer.cuda.get_device_from_id(device).use() - train_chain.to_gpu() - - train = TransformDataset( - COCOInstanceSegmentationDataset( - data_dir='/home/yuyu2172/coco', - split='train', return_bbox=True), - ('img', 'mask', 'label', 'bbox'), - Transform(model.min_size, model.max_size, model.extractor.mean)) - - if comm.rank == 0: - indices = np.arange(len(train)) - else: - indices = None - indices = chainermn.scatter_dataset(indices, comm, shuffle=True) - train = train.slice[indices] - - train_iter = chainer.iterators.MultiprocessIterator( - train, args.batchsize // comm.size, - n_processes=args.batchsize // comm.size, - shared_mem=100 * 1000 * 1000 * 4) - - optimizer = chainermn.create_multi_node_optimizer( - chainer.optimizers.MomentumSGD(), comm) - optimizer.setup(train_chain) - optimizer.add_hook(WeightDecay(0.0001)) - - model.extractor.base.conv1.disable_update() - model.extractor.base.res2.disable_update() - for link in model.links(): - if isinstance(link, L.BatchNormalization): - link.disable_update() - - n_iteration = args.iteration * 16 / args.batchsize - updater = training.updaters.StandardUpdater( - train_iter, optimizer, converter=converter, device=device) - trainer = training.Trainer( - updater, (n_iteration, 'iteration'), args.out) - - @make_shift('lr') - def lr_schedule(trainer): - base_lr = 0.02 * args.batchsize / 16 - warm_up_duration = 500 - warm_up_rate = 1 / 3 - - iteration = trainer.updater.iteration - if iteration < warm_up_duration: - rate = warm_up_rate \ - + (1 - warm_up_rate) * iteration / warm_up_duration - else: - rate = 1 - for step in args.step: - if iteration >= step * 16 / args.batchsize: - rate *= 0.1 - - return base_lr * rate - - trainer.extend(lr_schedule) - - if comm.rank == 0: - log_interval = 10, 'iteration' - trainer.extend(extensions.LogReport(trigger=log_interval)) - trainer.extend(extensions.observe_lr(), trigger=log_interval) - trainer.extend(extensions.PrintReport( - ['epoch', 'iteration', 'lr', 'main/loss', - 'main/loss/rpn/loc', 'main/loss/rpn/conf', - 'main/loss/head/loc', 'main/loss/head/conf', - 'main/loss/mask' - ]), - trigger=log_interval) - trainer.extend(extensions.ProgressBar(update_interval=10)) - - trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) - trainer.extend( - extensions.snapshot_object( - model, 'model_iter_{.updater.iteration}'), - trigger=(n_iteration, 'iteration')) - - if args.resume: - serializers.load_npz(args.resume, trainer, strict=False) - - trainer.run() - - -if __name__ == '__main__': - main() From ffc87ddb31281c10fd51a5c091443c9640da8605 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 15:07:01 +0900 Subject: [PATCH 084/100] fix variable names --- chainercv/links/model/fpn/mask_utils.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/chainercv/links/model/fpn/mask_utils.py b/chainercv/links/model/fpn/mask_utils.py index d9167ec046..5c28e20232 100644 --- a/chainercv/links/model/fpn/mask_utils.py +++ b/chainercv/links/model/fpn/mask_utils.py @@ -1,6 +1,5 @@ from __future__ import division -import cv2 import numpy as np import chainer @@ -36,8 +35,8 @@ def mask_to_segm(mask, bbox, segm_size, index=None, pad=1): _, H, W = mask.shape bbox = chainer.backends.cuda.to_cpu(bbox) padded_segm_size = segm_size + pad * 2 - cv2_expand_scale = padded_segm_size / segm_size - bbox = _integerize_bbox(_expand_boxes(bbox, cv2_expand_scale)) + expand_scale = padded_segm_size / segm_size + bbox = _integerize_bbox(_expand_boxes(bbox, expand_scale)) segm = [] if index is None: @@ -104,11 +103,11 @@ def segm_to_mask(segm, bbox, size, pad=1): # pixel prior to resizing back to the original image resolution. # This prevents "top hat" artifacts. We therefore need to expand # the reference boxes by an appropriate factor. - cv2_expand_scale = (segm_size + pad * 2) / segm_size + expand_scale = (segm_size + pad * 2) / segm_size padded_mask = np.zeros( (segm_size + pad * 2, segm_size + pad * 2), dtype=np.float32) - bbox = _integerize_bbox(_expand_boxes(bbox, cv2_expand_scale)) + bbox = _integerize_bbox(_expand_boxes(bbox, expand_scale)) for i, (bb, sgm) in enumerate(zip(bbox, segm)): padded_mask[1:-1, 1:-1] = sgm From 42ca5d2950d4269c935891e785d06dfd4f1aa2ea Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 15:35:17 +0900 Subject: [PATCH 085/100] update train_multi --- examples/fpn/train_multi.py | 145 ++++++++++++++++++++++-------------- 1 file changed, 89 insertions(+), 56 deletions(-) diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py index 2bd2d26916..726147d5bf 100644 --- a/examples/fpn/train_multi.py +++ b/examples/fpn/train_multi.py @@ -15,18 +15,24 @@ from chainercv.chainer_experimental.datasets.sliceable import TransformDataset from chainercv.chainer_experimental.training.extensions import make_shift -from chainercv.datasets import coco_instance_segmentation_label_names -from chainercv.datasets import COCOInstanceSegmentationDataset -# from chainercv.links import MaskRCNNFPNResNet101 -# from chainercv.links import MaskRCNNFPNResNet50 -from chainercv.links.model.mask_rcnn.misc import scale_img +from chainercv.links.model.fpn.misc import scale_img from chainercv import transforms -from chainercv.links.model.fpn import head_loss_post -from chainercv.links.model.fpn import head_loss_pre +from chainercv.datasets import coco_instance_segmentation_label_names +from chainercv.datasets import COCOInstanceSegmentationDataset +from chainercv.links import MaskRCNNFPNResNet101 +from chainercv.links import MaskRCNNFPNResNet50 + +from chainercv.datasets import coco_bbox_label_names +from chainercv.datasets import COCOBboxDataset +from chainercv.links import FasterRCNNFPNResNet101 +from chainercv.links import FasterRCNNFPNResNet50 + +from chainercv.links.model.fpn import bbox_head_loss_post +from chainercv.links.model.fpn import bbox_head_loss_pre +from chainercv.links.model.fpn import mask_loss_post +from chainercv.links.model.fpn import mask_loss_pre from chainercv.links.model.fpn import rpn_loss -from chainercv.links.model.mask_rcnn import mask_loss_post -from chainercv.links.model.mask_rcnn import mask_loss_pre # https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator try: @@ -43,7 +49,7 @@ def __init__(self, model): with self.init_scope(): self.model = model - def __call__(self, imgs, masks, labels, bboxes): + def __call__(self, imgs, bboxes, labels, masks=None): B = len(imgs) pad_size = np.array( [im.shape[1:] for im in imgs]).max(axis=0) @@ -57,16 +63,6 @@ def __call__(self, imgs, masks, labels, bboxes): x[i, :, :H, :W] = img x = self.xp.array(x) - # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU. - pad_masks = [ - np.zeros( - (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool) - for mask in masks] - for i, mask in enumerate(masks): - _, H, W = mask.shape - pad_masks[i][:, :H, :W] = mask - masks = pad_masks - bboxes = [self.xp.array(bbox) for bbox in bboxes] labels = [self.xp.array(label) for label in labels] sizes = [img.shape[1:] for img in imgs] @@ -87,34 +83,48 @@ def __call__(self, imgs, masks, labels, bboxes): + [self.xp.array((i,) * len(bbox)) for i, bbox in enumerate(bboxes)]) rois, roi_indices = self.model.head.distribute(rois, roi_indices) - rois, roi_indices, head_gt_locs, head_gt_labels = head_loss_pre( + rois, roi_indices, head_gt_locs, head_gt_labels = bbox_head_loss_pre( rois, roi_indices, self.model.head.std, bboxes, labels) head_locs, head_confs = self.model.head(hs, rois, roi_indices) - head_loc_loss, head_conf_loss = head_loss_post( + head_loc_loss, head_conf_loss = bbox_head_loss_post( head_locs, head_confs, roi_indices, head_gt_locs, head_gt_labels, B) - mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre( - rois, roi_indices, masks, bboxes, - head_gt_labels, self.model.mask_head.segm_size) - n_roi = sum([len(roi) for roi in mask_rois]) - if n_roi > 0: - segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) - mask_loss = mask_loss_post( - segms, mask_roi_indices, gt_segms, gt_mask_labels, B) - else: - # Compute dummy variables to complete the computational graph - mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32) - mask_roi_indices[0] = self.xp.array([0], dtype=np.int32) - segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) - mask_loss = 0 * F.sum(segms) + mask_loss = 0 + if masks is not None: + # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU. + pad_masks = [ + np.zeros( + (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool) + for mask in masks] + for i, mask in enumerate(masks): + _, H, W = mask.shape + pad_masks[i][:, :H, :W] = mask + masks = pad_masks + + mask_rois, mask_roi_indices, gt_segms, gt_mask_labels =\ + mask_loss_pre( + rois, roi_indices, masks, bboxes, + head_gt_labels, self.model.mask_head.segm_size) + n_roi = sum([len(roi) for roi in mask_rois]) + if n_roi > 0: + segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) + mask_loss = mask_loss_post( + segms, mask_roi_indices, gt_segms, gt_mask_labels, B) + else: + # Compute dummy variables to complete the computational graph + mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32) + mask_roi_indices[0] = self.xp.array([0], dtype=np.int32) + segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) + mask_loss = 0 * F.sum(segms) loss = (rpn_loc_loss + rpn_conf_loss + head_loc_loss + head_conf_loss + mask_loss) chainer.reporter.report({ 'loss': loss, 'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss, - 'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss, - 'loss/mask': mask_loss}, + 'loss/bbox_head/loc': head_loc_loss, + 'loss/bbox_head/conf': head_conf_loss, + 'loss/mask_head': mask_loss}, self) return loss @@ -127,24 +137,30 @@ def __init__(self, min_size, max_size, mean): self.mean = mean def __call__(self, in_data): - img, mask, label, bbox = in_data + img, bbox, label = in_data[:3] # Flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) - mask = transforms.flip(mask, x_flip=params['x_flip']) + x_flip = params['x_flip'] bbox = transforms.flip_bbox( - bbox, img.shape[1:], x_flip=params['x_flip']) + bbox, img.shape[1:], x_flip=x_flip) # Scaling and mean subtraction img, scale = scale_img( img, self.min_size, self.max_size) img -= self.mean - mask = transforms.resize( - mask.astype(np.float32), - img.shape[1:], - interpolation=PIL.Image.NEAREST).astype(np.bool) bbox = bbox * scale - return img, mask, label, bbox, scale + + if len(in_data) == 4: + mask = in_data[3] + mask = transforms.flip(mask, x_flip=x_flip) + mask = transforms.resize( + mask.astype(np.float32), + img.shape[1:], + interpolation=PIL.Image.NEAREST).astype(np.bool) + return img, bbox, label, mask + else: + return img, bbox, label def converter(batch, device=None): @@ -156,7 +172,8 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', - choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), + choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101', + 'faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'), default='mask_rcnn_fpn_resnet50') parser.add_argument('--batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=90000) @@ -176,11 +193,23 @@ def main(): comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank - if args.model == 'mask_rcnn_fpn_resnet50': + if args.model == 'faster_rcnn_fpn_resnet50': + mode = 'bbox' + model = FasterRCNNFPNResNet50( + n_fg_class=len(coco_bbox_label_names), + pretrained_model='imagenet') + elif args.model == 'faster_rcnn_fpn_resnet101': + mode = 'bbox' + model = FasterRCNNFPNResNet101( + n_fg_class=len(coco_bbox_label_names), + pretrained_model='imagenet') + elif args.model == 'mask_rcnn_fpn_resnet50': + mode = 'instance_segmentation' model = MaskRCNNFPNResNet50( n_fg_class=len(coco_instance_segmentation_label_names), pretrained_model='imagenet') elif args.model == 'mask_rcnn_fpn_resnet101': + mode = 'instance_segmentation' model = MaskRCNNFPNResNet101( n_fg_class=len(coco_instance_segmentation_label_names), pretrained_model='imagenet') @@ -190,12 +219,16 @@ def main(): chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu() - train = TransformDataset( - COCOInstanceSegmentationDataset( - data_dir='/home/yuyu2172/coco', - split='train', return_bbox=True), - ('img', 'mask', 'label', 'bbox'), - Transform(model.min_size, model.max_size, model.extractor.mean)) + if mode == 'bbox': + train = TransformDataset( + COCOBboxDataset(year='2017', split='train'), + ('img', 'bbox', 'label'), + Transform(model.min_size, model.max_size, model.extractor.mean)) + elif mode == 'instance_segmentation': + train = TransformDataset( + COCOInstanceSegmentationDataset(split='train', return_bbox=True), + ('img', 'bbox', 'label', 'mask'), + Transform(model.min_size, model.max_size, model.extractor.mean)) if comm.rank == 0: indices = np.arange(len(train)) @@ -253,8 +286,8 @@ def lr_schedule(trainer): trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/rpn/loc', 'main/loss/rpn/conf', - 'main/loss/head/loc', 'main/loss/head/conf', - 'main/loss/mask' + 'main/loss/bbox_head/loc', 'main/loss/bbox_head/conf', + 'main/loss/mask_head' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) From eeac6db5ebcdaf2650ff707b9f54936a303679df Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 15:35:37 +0900 Subject: [PATCH 086/100] Mask R-CNN class --- .../links/model/fpn/faster_rcnn_fpn_resnet.py | 47 ++++++++++--------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py index 29e6119c0b..debadb10ea 100644 --- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py +++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py @@ -75,6 +75,26 @@ def __init__(self, n_fg_class=None, pretrained_model=None, chainer.serializers.load_npz(path, self) +class MaskRCNNFPNResNet(FasterRCNNFPNResNet): + """Feature Pyramid Networks with ResNet-50. + + This is a model of Feature Pyramid Networks [#]_. + This model uses :class:`~chainercv.links.ResNet50` as + its base feature extractor. + + .. [#] Tsung-Yi Lin et al. + Feature Pyramid Networks for Object Detection. CVPR 2017 + + + """ + + def __init__(self, n_fg_class=None, pretrained_model=None, + min_size=800, max_size=1333): + super(MaskRCNNFPNResNet, self).__init__( + n_fg_class, pretrained_model, ['masks', 'labels', 'scores'], + min_size, max_size) + + class FasterRCNNFPNResNet50(FasterRCNNFPNResNet): """Feature Pyramid Networks with ResNet-50. @@ -93,7 +113,7 @@ class FasterRCNNFPNResNet50(FasterRCNNFPNResNet): 'coco': { 'param': {'n_fg_class': 80}, 'url': 'https://chainercv-models.preferred.jp/' - 'faster_rcnn_fpn_resnet50_coco_trained_2018_12_13.npz', + 'faster_rcnn_fpn_resnet50_coco_trained_2019_03_15.npz', 'cv2': True }, } @@ -116,32 +136,12 @@ class FasterRCNNFPNResNet101(FasterRCNNFPNResNet): 'coco': { 'param': {'n_fg_class': 80}, 'url': 'https://chainercv-models.preferred.jp/' - 'faster_rcnn_fpn_resnet101_coco_trained_2018_12_13.npz', + 'faster_rcnn_fpn_resnet101_coco_trained_2019_03_15.npz', 'cv2': True }, } -class MaskRCNNFPNResNet(FasterRCNNFPNResNet): - """Feature Pyramid Networks with ResNet-50. - - This is a model of Feature Pyramid Networks [#]_. - This model uses :class:`~chainercv.links.ResNet50` as - its base feature extractor. - - .. [#] Tsung-Yi Lin et al. - Feature Pyramid Networks for Object Detection. CVPR 2017 - - - """ - - def __init__(self, n_fg_class=None, pretrained_model=None, - min_size=800, max_size=1333): - super(MaskRCNNFPNResNet, self).__init__( - n_fg_class, pretrained_model, ['masks', 'labels', 'scores'], - min_size, max_size) - - class MaskRCNNFPNResNet50(MaskRCNNFPNResNet): """Feature Pyramid Networks with ResNet-50. @@ -159,7 +159,8 @@ class MaskRCNNFPNResNet50(MaskRCNNFPNResNet): _models = { 'coco': { 'param': {'n_fg_class': 80}, - 'url': '', + 'url': 'https://chainercv-models.preferred.jp/' + 'faster_rcnn_fpn_resnet50_mask_coco_trained_2019_03_15.npz', 'cv2': True }, } From 20414c46bf9d7d92293011ede21baac64add717e Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 15:45:58 +0900 Subject: [PATCH 087/100] fix --- chainercv/links/model/fpn/__init__.py | 6 ++++-- chainercv/links/model/fpn/bbox_head.py | 4 ++-- examples/fpn/train_multi.py | 17 ++++++++++------- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py index 78f6a7684b..aab5fb64cb 100644 --- a/chainercv/links/model/fpn/__init__.py +++ b/chainercv/links/model/fpn/__init__.py @@ -5,7 +5,9 @@ from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA from chainercv.links.model.fpn.fpn import FPN # NOQA from chainercv.links.model.fpn.bbox_head import BboxHead # NOQA -from chainercv.links.model.fpn.bbox_head import bbox_head_loss_post # NOQA -from chainercv.links.model.fpn.bbox_head import bbox_head_loss_pre # NOQA +from chainercv.links.model.fpn.bbox_head import bbox_loss_post # NOQA +from chainercv.links.model.fpn.bbox_head import bbox_loss_pre # NOQA +from chainercv.links.model.fpn.mask_head import mask_loss_post # NOQA +from chainercv.links.model.fpn.mask_head import mask_loss_pre # NOQA from chainercv.links.model.fpn.rpn import RPN # NOQA from chainercv.links.model.fpn.rpn import rpn_loss # NOQA diff --git a/chainercv/links/model/fpn/bbox_head.py b/chainercv/links/model/fpn/bbox_head.py index 502baf4775..199d0b3508 100644 --- a/chainercv/links/model/fpn/bbox_head.py +++ b/chainercv/links/model/fpn/bbox_head.py @@ -210,7 +210,7 @@ def decode(self, rois, roi_indices, locs, confs, return bboxes, labels, scores -def bbox_head_loss_pre(rois, roi_indices, std, bboxes, labels): +def bbox_loss_pre(rois, roi_indices, std, bboxes, labels): """Loss function for Head (pre). This function processes RoIs for :func:`bbox_head_loss_post`. @@ -314,7 +314,7 @@ def bbox_head_loss_pre(rois, roi_indices, std, bboxes, labels): return rois, roi_indices, gt_locs, gt_labels -def bbox_head_loss_post(locs, confs, roi_indices, gt_locs, gt_labels, batchsize): +def bbox_loss_post(locs, confs, roi_indices, gt_locs, gt_labels, batchsize): """Loss function for Head (post). Args: diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py index 726147d5bf..ffa308166d 100644 --- a/examples/fpn/train_multi.py +++ b/examples/fpn/train_multi.py @@ -28,8 +28,8 @@ from chainercv.links import FasterRCNNFPNResNet101 from chainercv.links import FasterRCNNFPNResNet50 -from chainercv.links.model.fpn import bbox_head_loss_post -from chainercv.links.model.fpn import bbox_head_loss_pre +from chainercv.links.model.fpn import bbox_loss_post +from chainercv.links.model.fpn import bbox_loss_pre from chainercv.links.model.fpn import mask_loss_post from chainercv.links.model.fpn import mask_loss_pre from chainercv.links.model.fpn import rpn_loss @@ -83,10 +83,10 @@ def __call__(self, imgs, bboxes, labels, masks=None): + [self.xp.array((i,) * len(bbox)) for i, bbox in enumerate(bboxes)]) rois, roi_indices = self.model.head.distribute(rois, roi_indices) - rois, roi_indices, head_gt_locs, head_gt_labels = bbox_head_loss_pre( + rois, roi_indices, head_gt_locs, head_gt_labels = bbox_loss_pre( rois, roi_indices, self.model.head.std, bboxes, labels) head_locs, head_confs = self.model.head(hs, rois, roi_indices) - head_loc_loss, head_conf_loss = bbox_head_loss_post( + head_loc_loss, head_conf_loss = bbox_loss_post( head_locs, head_confs, roi_indices, head_gt_locs, head_gt_labels, B) @@ -170,11 +170,12 @@ def converter(batch, device=None): def main(): parser = argparse.ArgumentParser() + parser.add_argument('--data-dir', default='auto') parser.add_argument( '--model', choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101', 'faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'), - default='mask_rcnn_fpn_resnet50') + default='faster__rcnn_fpn_resnet50') parser.add_argument('--batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=90000) parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000]) @@ -221,12 +222,14 @@ def main(): if mode == 'bbox': train = TransformDataset( - COCOBboxDataset(year='2017', split='train'), + COCOBboxDataset( + data_dir=args.data_dir, year='2017', split='train'), ('img', 'bbox', 'label'), Transform(model.min_size, model.max_size, model.extractor.mean)) elif mode == 'instance_segmentation': train = TransformDataset( - COCOInstanceSegmentationDataset(split='train', return_bbox=True), + COCOInstanceSegmentationDataset( + data_dir=args.data_dir, split='train', return_bbox=True), ('img', 'bbox', 'label', 'mask'), Transform(model.min_size, model.max_size, model.extractor.mean)) From 1c31a000438d5d331fb847858712675365648dd4 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 15:55:43 +0900 Subject: [PATCH 088/100] fix --- examples/fpn/train_multi.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py index ffa308166d..9d3d08b633 100644 --- a/examples/fpn/train_multi.py +++ b/examples/fpn/train_multi.py @@ -82,10 +82,10 @@ def __call__(self, imgs, bboxes, labels, masks=None): [roi_indices] + [self.xp.array((i,) * len(bbox)) for i, bbox in enumerate(bboxes)]) - rois, roi_indices = self.model.head.distribute(rois, roi_indices) + rois, roi_indices = self.model.bbox_head.distribute(rois, roi_indices) rois, roi_indices, head_gt_locs, head_gt_labels = bbox_loss_pre( - rois, roi_indices, self.model.head.std, bboxes, labels) - head_locs, head_confs = self.model.head(hs, rois, roi_indices) + rois, roi_indices, self.model.bbox_head.std, bboxes, labels) + head_locs, head_confs = self.model.bbox_head(hs, rois, roi_indices) head_loc_loss, head_conf_loss = bbox_loss_post( head_locs, head_confs, roi_indices, head_gt_locs, head_gt_labels, B) @@ -137,7 +137,10 @@ def __init__(self, min_size, max_size, mean): self.mean = mean def __call__(self, in_data): - img, bbox, label = in_data[:3] + if len(in_data) == 4: + img, mask, label, bbox = in_data + else: + img, bbox, label = in_data # Flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) @@ -152,7 +155,6 @@ def __call__(self, in_data): bbox = bbox * scale if len(in_data) == 4: - mask = in_data[3] mask = transforms.flip(mask, x_flip=x_flip) mask = transforms.resize( mask.astype(np.float32), From 80401b136053264ec702e8485fb355b573b2342c Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 16:42:37 +0900 Subject: [PATCH 089/100] fix tests --- chainercv/links/model/fpn/__init__.py | 1 + chainercv/links/model/fpn/faster_rcnn.py | 8 +- .../model_tests/fpn_tests/test_faster_rcnn.py | 83 +++++++---- .../test_mask_head.py | 6 +- .../test_mask_utils.py} | 4 +- .../mask_rcnn_tests/test_mask_rcnn.py | 132 ------------------ 6 files changed, 66 insertions(+), 168 deletions(-) rename tests/links_tests/model_tests/{mask_rcnn_tests => fpn_tests}/test_mask_head.py (97%) rename tests/links_tests/model_tests/{mask_rcnn_tests/test_misc.py => fpn_tests/test_mask_utils.py} (92%) delete mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py index aab5fb64cb..7f2f16d62e 100644 --- a/chainercv/links/model/fpn/__init__.py +++ b/chainercv/links/model/fpn/__init__.py @@ -7,6 +7,7 @@ from chainercv.links.model.fpn.bbox_head import BboxHead # NOQA from chainercv.links.model.fpn.bbox_head import bbox_loss_post # NOQA from chainercv.links.model.fpn.bbox_head import bbox_loss_pre # NOQA +from chainercv.links.model.fpn.mask_head import MaskHead # NOQA from chainercv.links.model.fpn.mask_head import mask_loss_post # NOQA from chainercv.links.model.fpn.mask_head import mask_loss_pre # NOQA from chainercv.links.model.fpn.rpn import RPN # NOQA diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py index 40df122f81..68b4506233 100644 --- a/chainercv/links/model/fpn/faster_rcnn.py +++ b/chainercv/links/model/fpn/faster_rcnn.py @@ -152,8 +152,10 @@ def predict(self, imgs): hs, rpn_rois, rpn_roi_indices = self(x) if self._store_rpn_outputs: rpn_rois_cpu = [ - chainer.backends.cuda.to_cpu(rpn_roi) for rpn_roi in - _flat_to_list(rpn_rois, rpn_roi_indices, len(imgs))] + chainer.backends.cuda.to_cpu(rpn_roi) / scale + for rpn_roi, scale in + zip(_flat_to_list(rpn_rois, rpn_roi_indices, len(imgs)), + scales)] output.update({'rois': rpn_rois_cpu}) if self._run_bbox: @@ -198,7 +200,7 @@ def predict(self, imgs): # Currently MaskHead only supports numpy inputs masks_cpu = self.mask_head.decode(segms, bboxes_cpu, labels_cpu, sizes) output.update({'masks': masks_cpu}) - return (output[key] for key in self._return_values) + return tuple([output[key] for key in self._return_values]) def prepare(self, imgs): """Preprocess images. diff --git a/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn.py b/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn.py index 1d245ac0bd..bebfa4a79b 100644 --- a/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn.py +++ b/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn.py @@ -7,10 +7,13 @@ from chainer import testing from chainer.testing import attr +from chainercv.links.model.fpn import BboxHead from chainercv.links.model.fpn import FasterRCNN -from chainercv.links.model.fpn import Head +from chainercv.links.model.fpn import MaskHead from chainercv.links.model.fpn import RPN +from chainercv.utils import assert_is_bbox from chainercv.utils import assert_is_detection_link +from chainercv.utils import assert_is_instance_segmentation_link def _random_array(xp, shape): @@ -31,28 +34,35 @@ def __call__(self, x): class DummyFasterRCNN(FasterRCNN): - def __init__(self, n_fg_class, min_size, max_size): + def __init__(self, n_fg_class, return_values, min_size, max_size): extractor = DummyExtractor() super(DummyFasterRCNN, self).__init__( extractor=extractor, rpn=RPN(extractor.scales), - head=Head(n_fg_class + 1, extractor.scales), + bbox_head=BboxHead(n_fg_class + 1, extractor.scales), + mask_head=MaskHead(n_fg_class + 1, extractor.scales), + return_values=return_values, min_size=min_size, max_size=max_size, ) @testing.parameterize(*testing.product_dict( + [ + {'return_values': 'detection'}, + {'return_values': 'instance_segmentation'}, + {'return_values': 'rpn'} + ], [ {'n_fg_class': 1}, {'n_fg_class': 5}, {'n_fg_class': 20}, ], [ - { - 'in_sizes': [(480, 640), (320, 320)], - 'min_size': 800, 'max_size': 1333, - 'expected_shape': (800, 1088), - }, + # { + # 'in_sizes': [(480, 640), (320, 320)], + # 'min_size': 800, 'max_size': 1333, + # 'expected_shape': (800, 1088), + # }, { 'in_sizes': [(200, 50), (400, 100)], 'min_size': 200, 'max_size': 320, @@ -63,7 +73,14 @@ def __init__(self, n_fg_class, min_size, max_size): class TestFasterRCNN(unittest.TestCase): def setUp(self): + if self.return_values == 'detection': + return_values = ['bboxes', 'labels', 'scores'] + elif self.return_values == 'instance_segmentation': + return_values = ['masks', 'labels', 'scores'] + elif self.return_values == 'rpn': + return_values = ['rois'] self.link = DummyFasterRCNN(n_fg_class=self.n_fg_class, + return_values=return_values, min_size=self.min_size, max_size=self.max_size) @@ -88,29 +105,20 @@ def test_use_preset(self): def _check_call(self): x = _random_array(self.link.xp, (2, 3, 32, 32)) with chainer.using_config('train', False): - rois, roi_indices, head_locs, head_confs = self.link(x) + hs, rois, roi_indices = self.link(x) - self.assertEqual(len(rois), len(self.link.extractor.scales)) - self.assertEqual(len(roi_indices), len(self.link.extractor.scales)) + self.assertEqual(len(hs), len(self.link.extractor.scales)) for l in range(len(self.link.extractor.scales)): - self.assertIsInstance(rois[l], self.link.xp.ndarray) - self.assertEqual(rois[l].shape[1:], (4,)) - - self.assertIsInstance(roi_indices[l], self.link.xp.ndarray) - self.assertEqual(roi_indices[l].shape[1:], ()) - - self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0]) + self.assertIsInstance(hs[l], chainer.Variable) + self.assertIsInstance(hs[l].data, self.link.xp.ndarray) - n_roi = sum( - len(rois[l]) for l in range(len(self.link.extractor.scales))) + self.assertIsInstance(rois, self.link.xp.ndarray) + self.assertEqual(rois.shape[1:], (4,)) - self.assertIsInstance(head_locs, chainer.Variable) - self.assertIsInstance(head_locs.array, self.link.xp.ndarray) - self.assertEqual(head_locs.shape, (n_roi, self.n_fg_class + 1, 4)) + self.assertIsInstance(roi_indices, self.link.xp.ndarray) + self.assertEqual(roi_indices.shape[1:], ()) - self.assertIsInstance(head_confs, chainer.Variable) - self.assertIsInstance(head_confs.array, self.link.xp.ndarray) - self.assertEqual(head_confs.shape, (n_roi, self.n_fg_class + 1)) + self.assertEqual(rois.shape[0], roi_indices.shape[0]) def test_call_cpu(self): self._check_call() @@ -126,13 +134,32 @@ def test_call_train_mode(self): with chainer.using_config('train', True): self.link(x) + def _check_predict(self): + if self.return_values == 'detection': + assert_is_detection_link(self.link, self.n_fg_class) + elif self.return_values == 'instance_segmentation': + assert_is_instance_segmentation_link(self.link, self.n_fg_class) + elif self.return_values == 'rpn': + imgs = [ + np.random.randint( + 0, 256, size=(3, 480, 320)).astype(np.float32), + np.random.randint( + 0, 256, size=(3, 480, 320)).astype(np.float32)] + result = self.link.predict(imgs) + assert len(result) == 1 + assert len(result[0]) == 1 + for i in range(len(result[0])): + roi = result[0][i] + assert_is_bbox(roi) + + @attr.slow def test_predict_cpu(self): - assert_is_detection_link(self.link, self.n_fg_class) + self._check_predict() @attr.gpu def test_predict_gpu(self): self.link.to_gpu() - assert_is_detection_link(self.link, self.n_fg_class) + self._check_predict() def test_prepare(self): imgs = [_random_array(np, (3, s[0], s[1])) for s in self.in_sizes] diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py b/tests/links_tests/model_tests/fpn_tests/test_mask_head.py similarity index 97% rename from tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py rename to tests/links_tests/model_tests/fpn_tests/test_mask_head.py index e89cf3c38d..c8e0bc927c 100644 --- a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py +++ b/tests/links_tests/model_tests/fpn_tests/test_mask_head.py @@ -7,9 +7,9 @@ from chainer import testing from chainer.testing import attr -from chainercv.links.model.mask_rcnn import MaskHead -from chainercv.links.model.mask_rcnn import mask_loss_post -from chainercv.links.model.mask_rcnn import mask_loss_pre +from chainercv.links.model.fpn import MaskHead +from chainercv.links.model.fpn import mask_loss_post +from chainercv.links.model.fpn import mask_loss_pre from chainercv.utils import mask_to_bbox diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py b/tests/links_tests/model_tests/fpn_tests/test_mask_utils.py similarity index 92% rename from tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py rename to tests/links_tests/model_tests/fpn_tests/test_mask_utils.py index 6bd6722c7a..5ae85bf237 100644 --- a/tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py +++ b/tests/links_tests/model_tests/fpn_tests/test_mask_utils.py @@ -5,8 +5,8 @@ from chainer import testing -from chainercv.links.model.mask_rcnn.misc import segm_to_mask -from chainercv.links.model.mask_rcnn.misc import mask_to_segm +from chainercv.links.model.fpn.mask_utils import segm_to_mask +from chainercv.links.model.fpn.mask_utils import mask_to_segm class TestSegmToMask(unittest.TestCase): diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py deleted file mode 100644 index 637bab61c4..0000000000 --- a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py +++ /dev/null @@ -1,132 +0,0 @@ -from __future__ import division - -import numpy as np -import unittest - -import chainer -from chainer import testing -from chainer.testing import attr - -from chainercv.links.model.fpn import Head -from chainercv.links.model.fpn import RPN -from chainercv.links.model.mask_rcnn import MaskRCNN -from chainercv.links.model.mask_rcnn import MaskHead -from chainercv.utils import assert_is_instance_segmentation_link - - -def _random_array(xp, shape): - return xp.array( - np.random.uniform(-1, 1, size=shape), dtype=np.float32) - - -class DummyExtractor(chainer.Link): - scales = (1 / 2, 1 / 4, 1 / 8) - mean = _random_array(np, (3, 1, 1)) - n_channel = 16 - - def __call__(self, x): - n, _, h, w = x.shape - return [chainer.Variable(_random_array( - self.xp, (n, self.n_channel, int(h * scale), int(w * scale)))) - for scale in self.scales] - - -class DummyMaskRCNN(MaskRCNN): - - def __init__(self, n_fg_class): - extractor = DummyExtractor() - n_class = n_fg_class + 1 - super(DummyMaskRCNN, self).__init__( - extractor=extractor, - rpn=RPN(extractor.scales), - head=Head(n_class, extractor.scales), - mask_head=MaskHead(n_class, extractor.scales) - ) - - -@testing.parameterize( - {'n_fg_class': 1}, - {'n_fg_class': 5}, - {'n_fg_class': 20}, -) -class TestMaskRCNN(unittest.TestCase): - - def setUp(self): - self.link = DummyMaskRCNN(n_fg_class=self.n_fg_class) - - def test_use_preset(self): - self.link.nms_thresh = 0 - self.link.score_thresh = 0 - - self.link.use_preset('visualize') - self.assertEqual(self.link.nms_thresh, 0.5) - self.assertEqual(self.link.score_thresh, 0.7) - - self.link.nms_thresh = 0 - self.link.score_thresh = 0 - - self.link.use_preset('evaluate') - self.assertEqual(self.link.nms_thresh, 0.5) - self.assertEqual(self.link.score_thresh, 0.05) - - with self.assertRaises(ValueError): - self.link.use_preset('unknown') - - def _check_call(self): - B = 2 - size = 32 - x = _random_array(self.link.xp, (B, 3, size, size)) - with chainer.using_config('train', False): - hs, rois, roi_indices = self.link(x) - - self.assertEqual(len(hs), len(self.link.extractor.scales)) - self.assertEqual(len(rois), len(self.link.extractor.scales)) - self.assertEqual(len(roi_indices), len(self.link.extractor.scales)) - for l, scale in enumerate(self.link.extractor.scales): - self.assertIsInstance(rois[l], self.link.xp.ndarray) - self.assertEqual(rois[l].shape[1:], (4,)) - - self.assertIsInstance(roi_indices[l], self.link.xp.ndarray) - self.assertEqual(roi_indices[l].shape[1:], ()) - - self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0]) - - self.assertIsInstance(hs[l], chainer.Variable) - self.assertIsInstance(hs[l].array, self.link.xp.ndarray) - feat_size = int(size * scale) - self.assertEqual( - hs[l].shape, - (B, self.link.extractor.n_channel, feat_size, feat_size)) - - def test_call_cpu(self): - self._check_call() - - @attr.gpu - def test_call_gpu(self): - self.link.to_gpu() - self._check_call() - - def test_call_train_mode(self): - x = _random_array(self.link.xp, (2, 3, 32, 32)) - with self.assertRaises(AssertionError): - with chainer.using_config('train', True): - self.link(x) - - def test_predict_cpu(self): - assert_is_instance_segmentation_link(self.link, self.n_fg_class) - - @attr.gpu - def test_predict_gpu(self): - self.link.to_gpu() - assert_is_instance_segmentation_link(self.link, self.n_fg_class) - - def test_prepare(self): - imgs = [ - np.random.randint(0, 255, size=(3, 480, 640)).astype(np.float32), - np.random.randint(0, 255, size=(3, 320, 320)).astype(np.float32), - ] - x, _, _ = self.link.prepare(imgs) - self.assertEqual(x.shape, (2, 3, 800, 1088)) - - -testing.run_module(__name__, __file__) From 462726f683a2f4f4854201c4ae9fbcf7d0ccecf0 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 19:57:53 +0900 Subject: [PATCH 090/100] merge to fpn --- chainercv/links/__init__.py | 2 + chainercv/links/model/fpn/__init__.py | 2 + chainercv/links/model/fpn/faster_rcnn.py | 46 +++- .../links/model/fpn/faster_rcnn_fpn_resnet.py | 83 +++++- .../model/{mask_rcnn => fpn}/keypoint_head.py | 4 +- chainercv/links/model/fpn/keypoint_utils.py | 52 ++++ chainercv/links/model/fpn/mask_utils.py | 47 ---- chainercv/links/model/mask_rcnn/__init__.py | 11 - chainercv/links/model/mask_rcnn/mask_rcnn.py | 253 ------------------ .../model/mask_rcnn/mask_rcnn_fpn_resnet.py | 137 ---------- examples/fpn/demo.py | 30 ++- .../eval_keypoint_detection.py | 10 +- examples/mask_rcnn/demo.py | 75 ------ 13 files changed, 211 insertions(+), 541 deletions(-) rename chainercv/links/model/{mask_rcnn => fpn}/keypoint_head.py (98%) create mode 100644 chainercv/links/model/fpn/keypoint_utils.py delete mode 100644 chainercv/links/model/mask_rcnn/__init__.py delete mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn.py delete mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py delete mode 100644 examples/mask_rcnn/demo.py diff --git a/chainercv/links/__init__.py b/chainercv/links/__init__.py index 72b4d32106..aa91f30b77 100644 --- a/chainercv/links/__init__.py +++ b/chainercv/links/__init__.py @@ -11,6 +11,8 @@ from chainercv.links.model.faster_rcnn.faster_rcnn_vgg import FasterRCNNVGG16 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50 # NOQA +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import KeypointRCNNFPNResNet101 # NOQA +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import KeypointRCNNFPNResNet50 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA from chainercv.links.model.resnet import ResNet101 # NOQA diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py index 7f2f16d62e..d55ac5471c 100644 --- a/chainercv/links/model/fpn/__init__.py +++ b/chainercv/links/model/fpn/__init__.py @@ -1,6 +1,8 @@ from chainercv.links.model.fpn.faster_rcnn import FasterRCNN # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50 # NOQA +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import KeypointRCNNFPNResNet101 # NOQA +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import KeypointRCNNFPNResNet50 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA from chainercv.links.model.fpn.fpn import FPN # NOQA diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py index 68b4506233..56c11ba7fb 100644 --- a/chainercv/links/model/fpn/faster_rcnn.py +++ b/chainercv/links/model/fpn/faster_rcnn.py @@ -50,10 +50,11 @@ class FasterRCNN(chainer.Chain): """ stride = 32 - _accepted_return_values = ('rois', 'bboxes', 'labels', 'scores', 'masks') + _accepted_return_values = ('rois', 'bboxes', 'labels', 'scores', + 'masks', 'points', 'point_scores') def __init__(self, extractor, rpn, bbox_head, - mask_head, return_values, + mask_head, keypoint_head, return_values, min_size=800, max_size=1333): for value_name in return_values: if value_name not in self._accepted_return_values: @@ -64,8 +65,10 @@ def __init__(self, extractor, rpn, bbox_head, self._store_rpn_outputs = 'rois' in self._return_values self._run_bbox = any([key in self._return_values - for key in ['bboxes', 'labels', 'scores', 'masks']]) + for key in ['bboxes', 'labels', 'scores', + 'masks', 'points', 'point_scores']]) self._run_mask = 'masks' in self._return_values + self._run_keypoint = 'points' in self._return_values super(FasterRCNN, self).__init__() with self.init_scope(): @@ -75,6 +78,8 @@ def __init__(self, extractor, rpn, bbox_head, self.bbox_head = bbox_head if self._run_mask: self.mask_head = mask_head + if self._run_keypoint: + self.keypoint_head = keypoint_head self.min_size = min_size self.max_size = max_size @@ -174,10 +179,9 @@ def predict(self, imgs): scores_cpu = [cuda.to_cpu(score) for score in scores] output.update({'bboxes': bboxes_cpu, 'labels': labels_cpu, 'scores': scores_cpu}) - - if self._run_mask: rescaled_bboxes = [bbox * scale - for scale, bbox in zip(scales, bboxes)] + for scale, bbox in zip(scales, bboxes)] + if self._run_mask: # Change bboxes to RoI and RoI indices format mask_rois_before_reordering, mask_roi_indices_before_reordering =\ _list_to_flat(rescaled_bboxes) @@ -200,6 +204,36 @@ def predict(self, imgs): # Currently MaskHead only supports numpy inputs masks_cpu = self.mask_head.decode(segms, bboxes_cpu, labels_cpu, sizes) output.update({'masks': masks_cpu}) + + if self._run_keypoint: + (point_rois_before_reordering, + point_roi_indices_before_reordering) = _list_to_flat( + rescaled_bboxes) + point_rois, point_roi_indices, order =\ + self.keypoint_head.distribute( + point_rois_before_reordering, + point_roi_indices_before_reordering) + with chainer.using_config( + 'train', False), chainer.no_backprop_mode(): + point_maps = self.keypoint_head( + hs, point_rois, point_roi_indices).data + point_maps = point_maps[order] + point_maps = _flat_to_list( + point_maps, point_roi_indices_before_reordering, len(imgs)) + point_maps = [point_map if point_map is not None else + self.xp.zeros( + (0, self.keypoint_head.n_point, + self.keypoint_head.point_map_size, + self.keypoint_head.point_map_size), + dtype=np.float32) + for point_map in point_maps] + point_maps = [ + chainer.backends.cuda.to_cpu(point_map) + for point_map in point_maps] + points_cpu, point_scores_cpu = self.keypoint_head.decode( + point_maps, bboxes_cpu) + output.update( + {'points': points_cpu, 'point_scores': point_scores_cpu}) return tuple([output[key] for key in self._return_values]) def prepare(self, imgs): diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py index debadb10ea..f74a890495 100644 --- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py +++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py @@ -7,6 +7,7 @@ from chainercv.links.model.fpn.faster_rcnn import FasterRCNN from chainercv.links.model.fpn.fpn import FPN from chainercv.links.model.fpn.bbox_head import BboxHead +from chainercv.links.model.fpn.keypoint_head import KeypointHead from chainercv.links.model.fpn.mask_head import MaskHead from chainercv.links.model.fpn.rpn import RPN from chainercv.links.model.resnet import ResNet101 @@ -45,10 +46,12 @@ class FasterRCNNFPNResNet(FasterRCNN): """ def __init__(self, n_fg_class=None, pretrained_model=None, + n_point=None, return_values=['bboxes', 'labels', 'scores'], min_size=800, max_size=1333): param, path = utils.prepare_pretrained_model( - {'n_fg_class': n_fg_class}, pretrained_model, self._models) + {'n_fg_class': n_fg_class, 'n_point': n_point}, + pretrained_model, self._models) base = self._base(n_class=1, arch='he') base.pick = ('res2', 'res3', 'res4', 'res5') @@ -58,11 +61,16 @@ def __init__(self, n_fg_class=None, pretrained_model=None, extractor = FPN( base, len(base.pick), (1 / 4, 1 / 8, 1 / 16, 1 / 32, 1 / 64)) + if param['n_point'] is not None: + keypoint_head = KeypointHead(param['n_point'], extractor.scales) + else: + keypoint_head = None super(FasterRCNNFPNResNet, self).__init__( extractor=extractor, rpn=RPN(extractor.scales), bbox_head=BboxHead(param['n_fg_class'] + 1, extractor.scales), mask_head=MaskHead(param['n_fg_class'] + 1, extractor.scales), + keypoint_head=keypoint_head, return_values=return_values, min_size=min_size, max_size=max_size ) @@ -72,7 +80,7 @@ def __init__(self, n_fg_class=None, pretrained_model=None, self.extractor.base, self._base(pretrained_model='imagenet', arch='he')) elif path: - chainer.serializers.load_npz(path, self) + chainer.serializers.load_npz(path, self, strict=False) class MaskRCNNFPNResNet(FasterRCNNFPNResNet): @@ -91,7 +99,30 @@ class MaskRCNNFPNResNet(FasterRCNNFPNResNet): def __init__(self, n_fg_class=None, pretrained_model=None, min_size=800, max_size=1333): super(MaskRCNNFPNResNet, self).__init__( - n_fg_class, pretrained_model, ['masks', 'labels', 'scores'], + n_fg_class, pretrained_model, None, + ['masks', 'labels', 'scores'], + min_size, max_size) + + +class KeypointRCNNFPNResNet(FasterRCNNFPNResNet): + """Feature Pyramid Networks with ResNet-50. + + This is a model of Feature Pyramid Networks [#]_. + This model uses :class:`~chainercv.links.ResNet50` as + its base feature extractor. + + .. [#] Tsung-Yi Lin et al. + Feature Pyramid Networks for Object Detection. CVPR 2017 + + + """ + + def __init__(self, n_fg_class=None, pretrained_model=None, + n_point=None, + min_size=800, max_size=1333): + super(KeypointRCNNFPNResNet, self).__init__( + n_fg_class, pretrained_model, n_point, + ['points', 'labels', 'scores', 'point_scores', 'bboxes'], min_size, max_size) @@ -189,6 +220,52 @@ class MaskRCNNFPNResNet101(MaskRCNNFPNResNet): } +class KeypointRCNNFPNResNet50(KeypointRCNNFPNResNet): + """Feature Pyramid Networks with ResNet-50. + + This is a model of Feature Pyramid Networks [#]_. + This model uses :class:`~chainercv.links.ResNet50` as + its base feature extractor. + + .. [#] Tsung-Yi Lin et al. + Feature Pyramid Networks for Object Detection. CVPR 2017 + + + """ + + _base = ResNet50 + _models = { + 'coco': { + 'param': {'n_fg_class': 1, 'n_point': 17}, + 'url': 'https://chainercv-models.preferred.jp/' + 'faster_rcnn_fpn_resnet50_keypoint_coco_converted_2019_03_15.npz', + 'cv2': True + }, + } + + +class KeypointRCNNFPNResNet101(KeypointRCNNFPNResNet): + """Feature Pyramid Networks with ResNet-50. + + This is a model of Feature Pyramid Networks [#]_. + This model uses :class:`~chainercv.links.ResNet50` as + its base feature extractor. + + .. [#] Tsung-Yi Lin et al. + Feature Pyramid Networks for Object Detection. CVPR 2017 + + + """ + + _base = ResNet50 + _models = { + 'coco': { + 'param': {'n_fg_class': 80}, + 'url': '', + 'cv2': True + }, + } + def _copyparams(dst, src): if isinstance(dst, chainer.Chain): diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/fpn/keypoint_head.py similarity index 98% rename from chainercv/links/model/mask_rcnn/keypoint_head.py rename to chainercv/links/model/fpn/keypoint_head.py index f53a44a102..c0dd00679d 100644 --- a/chainercv/links/model/mask_rcnn/keypoint_head.py +++ b/chainercv/links/model/fpn/keypoint_head.py @@ -15,8 +15,8 @@ from chainercv.transforms.image.resize import resize from chainercv.utils.bbox.bbox_iou import bbox_iou -from chainercv.links.model.mask_rcnn.misc import point_to_roi_points -from chainercv.links.model.mask_rcnn.misc import within_bbox +from chainercv.links.model.fpn.keypoint_utils import point_to_roi_points +from chainercv.links.model.fpn.keypoint_utils import within_bbox # make a bilinear interpolation kernel diff --git a/chainercv/links/model/fpn/keypoint_utils.py b/chainercv/links/model/fpn/keypoint_utils.py new file mode 100644 index 0000000000..adc5070528 --- /dev/null +++ b/chainercv/links/model/fpn/keypoint_utils.py @@ -0,0 +1,52 @@ +from __future__ import division + +import numpy as np + +import chainer + + +def point_to_roi_points( + point, visible, bbox, point_map_size): + xp = chainer.backends.cuda.get_array_module(point) + + R, K, _ = point.shape + + roi_point = xp.zeros((len(bbox), K, 2)) + roi_visible = xp.zeros((len(bbox), K), dtype=np.bool) + + offset_y = bbox[:, 0] + offset_x = bbox[:, 1] + scale_y = point_map_size / (bbox[:, 2] - bbox[:, 0]) + scale_x = point_map_size / (bbox[:, 3] - bbox[:, 1]) + + for k in range(K): + y_boundary_index = xp.where(point[:, k, 0] == bbox[:, 2])[0] + x_boundary_index = xp.where(point[:, k, 1] == bbox[:, 3])[0] + + ys = (point[:, k, 0] - offset_y) * scale_y + ys = xp.floor(ys) + if len(y_boundary_index) > 0: + ys[y_boundary_index] = point_map_size - 1 + xs = (point[:, k, 1] - offset_x) * scale_x + xs = xp.floor(xs) + if len(x_boundary_index) > 0: + xs[x_boundary_index] = point_map_size - 1 + + valid = xp.logical_and( + xp.logical_and( + xp.logical_and(ys >= 0, xs >= 0), + xp.logical_and(ys < point_map_size, xs < point_map_size)), + visible[:, k]) + + roi_point[:, k, 0] = ys + roi_point[:, k, 1] = xs + roi_visible[:, k] = valid + return roi_point, roi_visible + + +def within_bbox(point, bbox): + y_within = (point[:, :, 0] >= bbox[:, 0][:, None]) & ( + point[:, :, 0] <= bbox[:, 2][:, None]) + x_within = (point[:, :, 1] >= bbox[:, 1][:, None]) & ( + point[:, :, 1] <= bbox[:, 3][:, None]) + return y_within & x_within diff --git a/chainercv/links/model/fpn/mask_utils.py b/chainercv/links/model/fpn/mask_utils.py index c8cba87076..5c28e20232 100644 --- a/chainercv/links/model/fpn/mask_utils.py +++ b/chainercv/links/model/fpn/mask_utils.py @@ -155,50 +155,3 @@ def _expand_boxes(bbox, scale): expanded_bbox[:, 3] = x_c + w_half return expanded_bbox - - -def point_to_roi_points( - point, visible, bbox, point_map_size): - xp = chainer.backends.cuda.get_array_module(point) - - R, K, _ = point.shape - - roi_point = xp.zeros((len(bbox), K, 2)) - roi_visible = xp.zeros((len(bbox), K), dtype=np.bool) - - offset_y = bbox[:, 0] - offset_x = bbox[:, 1] - scale_y = point_map_size / (bbox[:, 2] - bbox[:, 0]) - scale_x = point_map_size / (bbox[:, 3] - bbox[:, 1]) - - for k in range(K): - y_boundary_index = xp.where(point[:, k, 0] == bbox[:, 2])[0] - x_boundary_index = xp.where(point[:, k, 1] == bbox[:, 3])[0] - - ys = (point[:, k, 0] - offset_y) * scale_y - ys = xp.floor(ys) - if len(y_boundary_index) > 0: - ys[y_boundary_index] = point_map_size - 1 - xs = (point[:, k, 1] - offset_x) * scale_x - xs = xp.floor(xs) - if len(x_boundary_index) > 0: - xs[x_boundary_index] = point_map_size - 1 - - valid = xp.logical_and( - xp.logical_and( - xp.logical_and(ys >= 0, xs >= 0), - xp.logical_and(ys < point_map_size, xs < point_map_size)), - visible[:, k]) - - roi_point[:, k, 0] = ys - roi_point[:, k, 1] = xs - roi_visible[:, k] = valid - return roi_point, roi_visible - - -def within_bbox(point, bbox): - y_within = (point[:, :, 0] >= bbox[:, 0][:, None]) & ( - point[:, :, 0] <= bbox[:, 2][:, None]) - x_within = (point[:, :, 1] >= bbox[:, 1][:, None]) & ( - point[:, :, 1] <= bbox[:, 3][:, None]) - return y_within & x_within diff --git a/chainercv/links/model/mask_rcnn/__init__.py b/chainercv/links/model/mask_rcnn/__init__.py deleted file mode 100644 index 3391efe1f9..0000000000 --- a/chainercv/links/model/mask_rcnn/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from chainercv.links.model.mask_rcnn.keypoint_head import KeypointHead # NOQA -from chainercv.links.model.mask_rcnn.keypoint_head import keypoint_loss_post # NOQA -from chainercv.links.model.mask_rcnn.keypoint_head import keypoint_loss_pre # NOQA -from chainercv.links.model.mask_rcnn.mask_head import mask_loss_post # NOQA -from chainercv.links.model.mask_rcnn.mask_head import mask_loss_pre # NOQA -from chainercv.links.model.mask_rcnn.mask_head import MaskHead # NOQA -from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN # NOQA -from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA -from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA -from chainercv.links.model.mask_rcnn.misc import mask_to_segm # NOQA -from chainercv.links.model.mask_rcnn.misc import segm_to_mask # NOQA diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py deleted file mode 100644 index 8bb88f9789..0000000000 --- a/chainercv/links/model/mask_rcnn/mask_rcnn.py +++ /dev/null @@ -1,253 +0,0 @@ -from __future__ import division - -import numpy as np - -import chainer -from chainer.backends import cuda -import chainer.functions as F - -from chainercv.links.model.mask_rcnn.misc import scale_img - - -class MaskRCNN(chainer.Chain): - - """Base class of Mask R-CNN. - - This is a base class of Mask R-CNN [#]_. - - .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017 - - Args: - extractor (Link): A link that extracts feature maps. - This link must have :obj:`scales`, :obj:`mean` and - :meth:`__call__`. - rpn (Link): A link that has the same interface as - :class:`~chainercv.links.model.fpn.RPN`. - Please refer to the documentation found there. - head (Link): A link that has the same interface as - :class:`~chainercv.links.model.fpn.Head`. - Please refer to the documentation found there. - mask_head (Link): A link that has the same interface as - :class:`~chainercv.links.model.mask_rcnn.MaskRCNN`. - Please refer to the documentation found there. - - Parameters: - nms_thresh (float): The threshold value - for :func:`~chainercv.utils.non_maximum_suppression`. - The default value is :obj:`0.5`. - This value can be changed directly or by using :meth:`use_preset`. - score_thresh (float): The threshold value for confidence score. - If a bounding box whose confidence score is lower than this value, - the bounding box will be suppressed. - The default value is :obj:`0.7`. - This value can be changed directly or by using :meth:`use_preset`. - - """ - - min_size = 800 - max_size = 1333 - stride = 32 - - def __init__(self, extractor, rpn, head, mask_head, - keypoint_head, mode='mask'): - super(MaskRCNN, self).__init__() - with self.init_scope(): - self.extractor = extractor - self.rpn = rpn - self.head = head - if mode == 'mask': - self.mask_head = mask_head - elif mode =='keypoint': - self.keypoint_head = keypoint_head - self.mode = mode - - self.use_preset('visualize') - - def use_preset(self, preset): - """Use the given preset during prediction. - - This method changes values of :obj:`nms_thresh` and - :obj:`score_thresh`. These values are a threshold value - used for non maximum suppression and a threshold value - to discard low confidence proposals in :meth:`predict`, - respectively. - - If the attributes need to be changed to something - other than the values provided in the presets, please modify - them by directly accessing the public attributes. - - Args: - preset ({'visualize', 'evaluate'}): A string to determine the - preset to use. - """ - - if preset == 'visualize': - self.nms_thresh = 0.5 - self.score_thresh = 0.7 - elif preset == 'evaluate': - self.nms_thresh = 0.5 - self.score_thresh = 0.05 - else: - raise ValueError('preset must be visualize or evaluate') - - def __call__(self, x): - assert(not chainer.config.train) - hs = self.extractor(x) - rpn_locs, rpn_confs = self.rpn(hs) - anchors = self.rpn.anchors(h.shape[2:] for h in hs) - rois, roi_indices = self.rpn.decode( - rpn_locs, rpn_confs, anchors, x.shape) - rois, roi_indices = self.head.distribute(rois, roi_indices) - return hs, rois, roi_indices - - def predict(self, imgs): - """Segment object instances from images. - - This method predicts instance-aware object regions for each image. - - Args: - imgs (iterable of numpy.ndarray): Arrays holding images of shape - :math:`(B, C, H, W)`. All images are in CHW and RGB format - and the range of their value is :math:`[0, 255]`. - - Returns: - tuple of lists: - This method returns a tuple of three lists, - :obj:`(masks, labels, scores)`. - - * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \ - where :math:`R` is the number of masks in a image. \ - Each pixel holds value if it is inside the object inside or not. - * **labels** : A list of integer arrays of shape :math:`(R,)`. \ - Each value indicates the class of the masks. \ - Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ - number of the foreground classes. - * **scores** : A list of float arrays of shape :math:`(R,)`. \ - Each value indicates how confident the prediction is. - - """ - - sizes = [img.shape[1:] for img in imgs] - x, scales = self.prepare(imgs) - - with chainer.using_config('train', False), chainer.no_backprop_mode(): - hs, rois, roi_indices = self(x) - head_locs, head_confs = self.head(hs, rois, roi_indices) - bboxes, labels, scores = self.head.decode( - rois, roi_indices, head_locs, head_confs, - scales, sizes, self.nms_thresh, self.score_thresh) - - rescaled_bboxes = [bbox * scale for scale, bbox in zip(scales, bboxes)] - if self.mode == 'mask': - # Change bboxes to RoI and RoI indices format - mask_rois_before_reordering, mask_roi_indices_before_reordering =\ - _list_to_flat(rescaled_bboxes) - mask_rois, mask_roi_indices, order = self.mask_head.distribute( - mask_rois_before_reordering, mask_roi_indices_before_reordering) - with chainer.using_config('train', False), chainer.no_backprop_mode(): - segms = F.sigmoid( - self.mask_head(hs, mask_rois, mask_roi_indices)).data - # Put the order of proposals back to the one used by bbox head. - segms = segms[order] - segms = _flat_to_list( - segms, mask_roi_indices_before_reordering, len(imgs)) - segms = [segm if segm is not None else - self.xp.zeros( - (0, self.mask_head.segm_size, self.mask_head.segm_size), - dtype=np.float32) - for segm in segms] - - segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms] - bboxes = [chainer.backends.cuda.to_cpu(bbox / scale) - for bbox, scale in zip(rescaled_bboxes, scales)] - labels = [chainer.backends.cuda.to_cpu(label) for label in labels] - # Currently MaskHead only supports numpy inputs - masks = self.mask_head.decode(segms, bboxes, labels, sizes) - scores = [cuda.to_cpu(score) for score in scores] - return masks, labels, scores - elif self.mode == 'keypoint': - (point_rois_before_reordering, - point_roi_indices_before_reordering) = _list_to_flat( - rescaled_bboxes) - point_rois, point_roi_indices, order =\ - self.keypoint_head.distribute( - point_rois_before_reordering, - point_roi_indices_before_reordering) - with chainer.using_config('train', False), chainer.no_backprop_mode(): - point_maps = self.keypoint_head( - hs, point_rois, point_roi_indices).data - point_maps = point_maps[order] - point_maps = _flat_to_list( - point_maps, point_roi_indices_before_reordering, len(imgs)) - point_maps = [point_map if point_map is not None else - self.xp.zeros( - (0, self.keypoint_head.n_point, - self.keypoint_head.point_map_size, - self.keypoint_head.point_map_size), - dtype=np.float32) - for point_map in point_maps] - point_maps = [ - chainer.backends.cuda.to_cpu(point_map) - for point_map in point_maps] - bboxes = [chainer.cuda.to_cpu(bbox / scale) - for bbox, scale in zip(rescaled_bboxes, scales)] - points, point_scores = self.keypoint_head.decode( - point_maps, bboxes) - labels = [cuda.to_cpu(label) for label in labels] - scores = [cuda.to_cpu(score) for score in scores] - return points, labels, scores, point_scores, bboxes - - def prepare(self, imgs): - """Preprocess images. - - Args: - imgs (iterable of numpy.ndarray): Arrays holding images. - All images are in CHW and RGB format - and the range of their value is :math:`[0, 255]`. - - Returns: - Two arrays: preprocessed images and \ - scales that were caluclated in prepocessing. - - """ - scales = [] - resized_imgs = [] - for img in imgs: - img, scale = scale_img( - img, self.min_size, self.max_size) - img -= self.extractor.mean - scales.append(scale) - resized_imgs.append(img) - pad_size = np.array( - [im.shape[1:] for im in resized_imgs]).max(axis=0) - pad_size = ( - np.ceil(pad_size / self.stride) * self.stride).astype(int) - x = np.zeros( - (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32) - for i, im in enumerate(resized_imgs): - _, H, W = im.shape - x[i, :, :H, :W] = im - x = self.xp.array(x) - - return x, scales - - -def _list_to_flat(array_list): - xp = chainer.backends.cuda.get_array_module(array_list[0]) - - indices = xp.concatenate( - [i * xp.ones((len(array),), dtype=np.int32) for - i, array in enumerate(array_list)], axis=0) - flat = xp.concatenate(array_list, axis=0) - return flat, indices - - -def _flat_to_list(flat, indices, B): - array_list = [] - for i in range(B): - array = flat[indices == i] - if len(array) > 0: - array_list.append(array) - else: - array_list.append(None) - return array_list diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py deleted file mode 100644 index 3048ce80cf..0000000000 --- a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py +++ /dev/null @@ -1,137 +0,0 @@ -from __future__ import division - -import chainer -import chainer.functions as F - -from chainercv.links.model.fpn import FPN -from chainercv.links.model.fpn import Head -from chainercv.links.model.fpn import RPN -from chainercv.links.model.mask_rcnn.keypoint_head import KeypointHead -from chainercv.links.model.mask_rcnn.mask_head import MaskHead -from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN -from chainercv.links.model.resnet import ResNet101 -from chainercv.links.model.resnet import ResNet50 -from chainercv import utils - -from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import _copyparams - - -class MaskRCNNFPNResNet(MaskRCNN): - - """Base class for Mask R-CNN with ResNet backbone. - - A subclass of this class should have :obj:`_base` and :obj:`_models`. - """ - - def __init__(self, n_fg_class=None, pretrained_model=None, - n_point=17, mode='mask'): - param, path = utils.prepare_pretrained_model( - {'n_fg_class': n_fg_class}, pretrained_model, self._models) - - base = self._base(n_class=1, arch='he') - base.pick = ('res2', 'res3', 'res4', 'res5') - base.pool1 = lambda x: F.max_pooling_2d( - x, 3, stride=2, pad=1, cover_all=False) - base.remove_unused() - extractor = FPN( - base, len(base.pick), (1 / 4, 1 / 8, 1 / 16, 1 / 32, 1 / 64)) - - n_class = param['n_fg_class'] + 1 - super(MaskRCNNFPNResNet, self).__init__( - extractor=extractor, - rpn=RPN(extractor.scales), - head=Head(n_class, extractor.scales), - mask_head=MaskHead(n_class, extractor.scales), - keypoint_head=KeypointHead(n_point, extractor.scales), - mode=mode, - ) - if path == 'imagenet': - _copyparams( - self.extractor.base, - self._base(pretrained_model='imagenet', arch='he')) - elif path: - chainer.serializers.load_npz(path, self) - - -class MaskRCNNFPNResNet50(MaskRCNNFPNResNet): - - """Mask R-CNN with ResNet-50. - - This is a model of Mask R-CNN [#]_. - This model uses :class:`~chainercv.links.ResNet50` as - its base feature extractor. - - .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017 - - Args: - n_fg_class (int): The number of classes excluding the background. - pretrained_model (string): The weight file to be loaded. - This can take :obj:`'coco'`, `filepath` or :obj:`None`. - The default value is :obj:`None`. - - * :obj:`'coco'`: Load weights trained on train split of \ - MS COCO 2017. \ - The weight file is downloaded and cached automatically. \ - :obj:`n_fg_class` must be :obj:`80` or :obj:`None`. - * :obj:`'imagenet'`: Load weights of ResNet-50 trained on \ - ImageNet. \ - The weight file is downloaded and cached automatically. \ - This option initializes weights partially and the rests are \ - initialized randomly. In this case, :obj:`n_fg_class` \ - can be set to any number. - * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \ - must be specified properly. - * :obj:`None`: Do not load weights. - - """ - - _base = ResNet50 - _models = { - 'coco': { - 'param': {'n_fg_class': 80}, - 'url': None, - 'cv2': True - }, - } - - -class MaskRCNNFPNResNet101(MaskRCNNFPNResNet): - - """Mask R-CNN with ResNet-101. - - This is a model of Mask R-CNN [#]_. - This model uses :class:`~chainercv.links.ResNet101` as - its base feature extractor. - - .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017 - - Args: - n_fg_class (int): The number of classes excluding the background. - pretrained_model (string): The weight file to be loaded. - This can take :obj:`'coco'`, `filepath` or :obj:`None`. - The default value is :obj:`None`. - - * :obj:`'coco'`: Load weights trained on train split of \ - MS COCO 2017. \ - The weight file is downloaded and cached automatically. \ - :obj:`n_fg_class` must be :obj:`80` or :obj:`None`. - * :obj:`'imagenet'`: Load weights of ResNet-101 trained on \ - ImageNet. \ - The weight file is downloaded and cached automatically. \ - This option initializes weights partially and the rests are \ - initialized randomly. In this case, :obj:`n_fg_class` \ - can be set to any number. - * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \ - must be specified properly. - * :obj:`None`: Do not load weights. - - """ - - _base = ResNet101 - _models = { - 'coco': { - 'param': {'n_fg_class': 80}, - 'url': None, - 'cv2': True - }, - } diff --git a/examples/fpn/demo.py b/examples/fpn/demo.py index 0d615cacfb..b11a844eb6 100644 --- a/examples/fpn/demo.py +++ b/examples/fpn/demo.py @@ -5,13 +5,17 @@ from chainercv.datasets import coco_bbox_label_names from chainercv.datasets import coco_instance_segmentation_label_names +from chainercv.datasets import coco_keypoint_names from chainercv.links import FasterRCNNFPNResNet101 from chainercv.links import FasterRCNNFPNResNet50 +from chainercv.links import KeypointRCNNFPNResNet101 +from chainercv.links import KeypointRCNNFPNResNet50 from chainercv.links import MaskRCNNFPNResNet101 from chainercv.links import MaskRCNNFPNResNet50 from chainercv import utils from chainercv.visualizations import vis_bbox from chainercv.visualizations import vis_instance_segmentation +from chainercv.visualizations import vis_keypoint_coco def main(): @@ -19,7 +23,8 @@ def main(): parser.add_argument( '--model', choices=('faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101', - 'mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), + 'mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101', + 'keypoint_rcnn_fpn_resnet50', 'keypoint_rcnn_fpn_resnet101'), default='faster_rcnn_fpn_resnet50') parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--pretrained-model', default='coco') @@ -46,6 +51,18 @@ def main(): model = MaskRCNNFPNResNet101( n_fg_class=len(coco_instance_segmentation_label_names), pretrained_model=args.pretrained_model) + elif args.model == 'keypoint_rcnn_fpn_resnet50': + mode = 'keypoint' + model = KeypointRCNNFPNResNet50( + n_fg_class=1, + pretrained_model=args.pretrained_model, + n_point=len(coco_keypoint_names[0])) + elif args.model == 'keypoint_rcnn_fpn_resnet101': + mode = 'keypoint' + model = KeypointRCNNFPNResNet101( + n_fg_class=1, + pretrained_model=args.pretrained_model, + n_point=len(coco_keypoint_names[0])) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() @@ -69,6 +86,17 @@ def main(): vis_instance_segmentation( img, mask, label, score, label_names=coco_instance_segmentation_label_names) + elif mode == 'keypoint': + points, labels, scores, point_scores, bboxes = model.predict([img]) + point = points[0] + label = labels[0] + score = scores[0] + point_score = point_scores[0] + bbox = bboxes[0] + ax = vis_keypoint_coco( + img, point, None, point_score) + vis_bbox(None, bbox, label, score=score, + label_names=coco_bbox_label_names, ax=ax) plt.show() diff --git a/examples/keypoint_detection/eval_keypoint_detection.py b/examples/keypoint_detection/eval_keypoint_detection.py index 94954c5cd7..377e14f385 100644 --- a/examples/keypoint_detection/eval_keypoint_detection.py +++ b/examples/keypoint_detection/eval_keypoint_detection.py @@ -5,17 +5,15 @@ from chainercv.datasets import COCOKeypointDataset from chainercv.evaluations import eval_keypoint_detection_coco -from chainercv.links import MaskRCNNFPNResNet101 -from chainercv.links import MaskRCNNFPNResNet50 +from chainercv.links import KeypointRCNNFPNResNet101 +from chainercv.links import KeypointRCNNFPNResNet50 from chainercv.utils import apply_to_iterator from chainercv.utils import ProgressHook models = { # model: (class, dataset -> pretrained_model, default batchsize) - 'mask_rcnn_fpn_resnet50': (MaskRCNNFPNResNet50, - {}, 1), - 'mask_rcnn_fpn_resnet101': (MaskRCNNFPNResNet101, - {}, 1), + 'keypoint_rcnn_fpn_resnet50': (KeypointRCNNFPNResNet50, {}, 1), + 'keypoint_rcnn_fpn_resnet101': (KeypointRCNNFPNResNet101, {}, 1), } diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py deleted file mode 100644 index 81659c862b..0000000000 --- a/examples/mask_rcnn/demo.py +++ /dev/null @@ -1,75 +0,0 @@ -import argparse -import matplotlib.pyplot as plt - -import chainer - -import chainercv -from chainercv.datasets import coco_instance_segmentation_label_names -from chainercv import utils - -from chainercv.links import MaskRCNNFPNResNet101 -from chainercv.links import MaskRCNNFPNResNet50 - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - '--model', - choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'), - default='mask_rcnn_fpn_resnet50' - ) - parser.add_argument('--gpu', type=int, default=-1) - parser.add_argument('--pretrained-model', default='coco') - parser.add_argument( - '--mode', - choices=('mask', 'keypoint'), - default='mask') - parser.add_argument('image') - args = parser.parse_args() - - if args.mode == 'mask': - n_fg_class = len(coco_instance_segmentation_label_names) - elif args.mode == 'keypoint': - n_fg_class = 1 - if args.model == 'mask_rcnn_fpn_resnet50': - model = MaskRCNNFPNResNet50( - n_fg_class=n_fg_class, - pretrained_model=args.pretrained_model, - mode=args.mode - ) - elif args.model == 'mask_rcnn_fpn_resnet101': - model = MaskRCNNFPNResNet101( - n_fg_class=n_fg_class, - pretrained_model=args.pretrained_model, - mode=args.mode - ) - - if args.gpu >= 0: - chainer.cuda.get_device_from_id(args.gpu).use() - model.to_gpu() - - img = utils.read_image(args.image) - if args.mode == 'mask': - masks, labels, scores = model.predict([img]) - mask = masks[0] - label = labels[0] - score = scores[0] - chainercv.visualizations.vis_instance_segmentation( - img, mask, label, score, - label_names=coco_instance_segmentation_label_names) - plt.show() - elif args.mode == 'keypoint': - points, labels, scores, point_scores, bboxes = model.predict([img]) - point = points[0] - label = labels[0] - score = scores[0] - point_score = point_scores[0] - bbox = bboxes[0] - ax = chainercv.visualizations.vis_keypoint_coco( - img, point, None, point_score) - chainercv.visualizations.vis_bbox(None, bbox, score=score, ax=ax) - plt.show() - - -if __name__ == '__main__': - main() From 8694df5f32a17bb4f0ddbcf4fa7ff40bf360e0c2 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 20:12:41 +0900 Subject: [PATCH 091/100] fix --- examples/keypoint_detection/eval_keypoint_detection.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/keypoint_detection/eval_keypoint_detection.py b/examples/keypoint_detection/eval_keypoint_detection.py index 377e14f385..a5a7ca68d1 100644 --- a/examples/keypoint_detection/eval_keypoint_detection.py +++ b/examples/keypoint_detection/eval_keypoint_detection.py @@ -36,7 +36,6 @@ def setup(dataset, model_name, pretrained_model, batchsize): n_fg_class=n_fg_class, pretrained_model=pretrained_model, n_point=n_point, - mode='keypoint' ) model.use_preset('evaluate') From b3d3b4e3dc360e3090eeb8c1eff47595a895bff1 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 20:24:55 +0900 Subject: [PATCH 092/100] fix --- chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py index f74a890495..778c2e49d1 100644 --- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py +++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py @@ -51,7 +51,7 @@ def __init__(self, n_fg_class=None, pretrained_model=None, min_size=800, max_size=1333): param, path = utils.prepare_pretrained_model( {'n_fg_class': n_fg_class, 'n_point': n_point}, - pretrained_model, self._models) + pretrained_model, self._models, {'n_point': None}) base = self._base(n_class=1, arch='he') base.pick = ('res2', 'res3', 'res4', 'res5') From 3abe75dde51104f2ac730719fe8e7d47a03fc5af Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 20:25:30 +0900 Subject: [PATCH 093/100] fix train script --- examples/fpn/train_multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py index 9d3d08b633..4386acf084 100644 --- a/examples/fpn/train_multi.py +++ b/examples/fpn/train_multi.py @@ -177,7 +177,7 @@ def main(): '--model', choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101', 'faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'), - default='faster__rcnn_fpn_resnet50') + default='faster_rcnn_fpn_resnet50') parser.add_argument('--batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=90000) parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000]) From 3ce01cb9279912c0fed441118c2f45f8af6fd00a Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 20:36:54 +0900 Subject: [PATCH 094/100] add test --- .../fpn_tests/test_faster_rcnn_fpn_resnet.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn_fpn_resnet.py b/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn_fpn_resnet.py index cf5537ed3e..3ac43292fc 100644 --- a/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn_fpn_resnet.py +++ b/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn_fpn_resnet.py @@ -6,17 +6,21 @@ from chainercv.links import FasterRCNNFPNResNet101 from chainercv.links import FasterRCNNFPNResNet50 +from chainercv.links import MaskRCNNFPNResNet101 +from chainercv.links import MaskRCNNFPNResNet50 from chainercv.utils.testing import attr @testing.parameterize(*testing.product({ - 'model': [FasterRCNNFPNResNet50, FasterRCNNFPNResNet101], + 'model': [FasterRCNNFPNResNet50, FasterRCNNFPNResNet101, + MaskRCNNFPNResNet50, MaskRCNNFPNResNet101], 'n_fg_class': [1, 5, 20], })) class TestFasterRCNNFPNResNet(unittest.TestCase): def setUp(self): - self.link = self.model(n_fg_class=self.n_fg_class) + self.link = self.model( + n_fg_class=self.n_fg_class, min_size=66) def _check_call(self): imgs = [ @@ -40,7 +44,8 @@ def test_call_gpu(self): @testing.parameterize(*testing.product({ - 'model': [FasterRCNNFPNResNet50, FasterRCNNFPNResNet101], + 'model': [FasterRCNNFPNResNet50, FasterRCNNFPNResNet101, + MaskRCNNFPNResNet50, MaskRCNNFPNResNet101], 'n_fg_class': [None, 10, 80], 'pretrained_model': ['coco', 'imagenet'], })) From c8a15262bca689a24bf13dff4168b7165201b18e Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 20:45:09 +0900 Subject: [PATCH 095/100] doc --- README.md | 2 +- chainercv/links/model/fpn/__init__.py | 2 + docs/source/reference/links.rst | 10 ++--- docs/source/reference/links/fpn.rst | 49 ++++++++++++++++++--- docs/source/reference/links/mask_rcnn.rst | 52 ----------------------- 5 files changed, 49 insertions(+), 66 deletions(-) delete mode 100644 docs/source/reference/links/mask_rcnn.rst diff --git a/README.md b/README.md index 23115abb27..fd5b9acc9b 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Supported tasks: + Image Classification ([ResNet](examples/resnet), [SENet](examples/senet), [VGG](examples/vgg)) + Object Detection ([tutorial](http://chainercv.readthedocs.io/en/latest/tutorial/detection.html), [Faster R-CNN](examples/faster_rcnn), [FPN](examples/fpn), [SSD](examples/ssd), [YOLO](examples/yolo)) + Semantic Segmentation ([SegNet](examples/segnet), [PSPNet](examples/pspnet)) -+ Instance Segmentation ([FCIS](examples/fcis),) ++ Instance Segmentation ([FCIS](examples/fcis), [Mask R-CNN](examples/fpn)) # Guiding Principles ChainerCV is developed under the following three guiding principles. diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py index 7f2f16d62e..e4ebd5aba0 100644 --- a/chainercv/links/model/fpn/__init__.py +++ b/chainercv/links/model/fpn/__init__.py @@ -10,5 +10,7 @@ from chainercv.links.model.fpn.mask_head import MaskHead # NOQA from chainercv.links.model.fpn.mask_head import mask_loss_post # NOQA from chainercv.links.model.fpn.mask_head import mask_loss_pre # NOQA +from chainercv.links.model.fpn.mask_utils import mask_to_segm # NOQA +from chainercv.links.model.fpn.mask_utils import segm_to_mask # NOQA from chainercv.links.model.fpn.rpn import RPN # NOQA from chainercv.links.model.fpn.rpn import rpn_loss # NOQA diff --git a/docs/source/reference/links.rst b/docs/source/reference/links.rst index 15001a98b3..7b4c9709b1 100644 --- a/docs/source/reference/links.rst +++ b/docs/source/reference/links.rst @@ -33,7 +33,6 @@ For more details, please read :func:`FasterRCNN.predict`. .. toctree:: links/faster_rcnn - links/fpn links/ssd links/yolo @@ -52,15 +51,12 @@ For more details, please read :func:`SegNetBasic.predict`. links/deeplab -Instance Segmentation -~~~~~~~~~~~~~~~~~~~~~ - -Instance segmentation links share a common method :meth:`predict` to detect masks that cover objects in an image. -For more details, please read :func:`MaskRCNN.predict`. +Links for Multiple Tasks +~~~~~~~~~~~~~~~~~~~~~~~~ .. toctree:: - links/mask_rcnn + links/fpn Classifiers diff --git a/docs/source/reference/links/fpn.rst b/docs/source/reference/links/fpn.rst index 5d267ff026..bd26896c27 100644 --- a/docs/source/reference/links/fpn.rst +++ b/docs/source/reference/links/fpn.rst @@ -18,6 +18,20 @@ FasterRCNNFPNResnet101 :members: +Instance Segmentation Links +--------------------------- + +MaskRCNNFPNResNet50 +~~~~~~~~~~~~~~~~~~~ +.. autoclass:: MaskRCNNFPNResNet50 + :members: + +MaskRCNNFPNResNet101 +~~~~~~~~~~~~~~~~~~~~ +.. autoclass:: MaskRCNNFPNResNet101 + :members: + + Utility ------- @@ -43,17 +57,40 @@ RPN :members: :special-members: __call__ +MaskHead +~~~~~~~~ +.. autoclass:: MaskHead + :members: + :special-members: __call__ + +segm_to_mask +~~~~~~~~~~~~ +.. autofunction:: segm_to_mask + + Train-only Utility ------------------ -bbox_head_loss_pre -~~~~~~~~~~~~~~~~~~ -.. autofunction:: bbox_head_loss_pre +bbox_loss_pre +~~~~~~~~~~~~~ +.. autofunction:: bbox_loss_pre -bbox_head_loss_post -~~~~~~~~~~~~~~~~~~~ -.. autofunction:: bbox_head_loss_post +bbox_loss_post +~~~~~~~~~~~~~~ +.. autofunction:: bbox_loss_post rpn_loss ~~~~~~~~ .. autofunction:: rpn_loss + +mask_loss_pre +~~~~~~~~~~~~~ +.. autofunction:: mask_loss_pre + +mask_loss_post +~~~~~~~~~~~~~~ +.. autofunction:: mask_loss_post + +mask_to_segm +~~~~~~~~~~~~ +.. autofunction:: mask_to_segm diff --git a/docs/source/reference/links/mask_rcnn.rst b/docs/source/reference/links/mask_rcnn.rst deleted file mode 100644 index 9fce65c343..0000000000 --- a/docs/source/reference/links/mask_rcnn.rst +++ /dev/null @@ -1,52 +0,0 @@ -Mask R-CNN -========== - -.. module:: chainercv.links.model.mask_rcnn - - -Instance Segmentation Links ---------------------------- - -MaskRCNNFPNResNet50 -~~~~~~~~~~~~~~~~~~~ -.. autoclass:: MaskRCNNFPNResNet50 - :members: - -MaskRCNNFPNResNet101 -~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: MaskRCNNFPNResNet101 - :members: - - -Utility -------- - -MaskRCNN -~~~~~~~~ -.. autoclass:: MaskRCNN - :members: - -MaskHead -~~~~~~~~ -.. autoclass:: MaskHead - :members: - :special-members: __call__ - -segm_to_mask -~~~~~~~~~~~~ -.. autofunction:: segm_to_mask - -Train-only Utility ------------------- - -mask_loss_pre -~~~~~~~~~~~~~ -.. autofunction:: mask_loss_pre - -mask_loss_post -~~~~~~~~~~~~~~ -.. autofunction:: mask_loss_post - -mask_to_segm -~~~~~~~~~~~~ -.. autofunction:: mask_to_segm From 3bd6f320643948c7c1aba06930d66beede30d14e Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 15 Mar 2019 20:50:51 +0900 Subject: [PATCH 096/100] flake8 --- chainercv/links/model/fpn/__init__.py | 8 +-- chainercv/links/model/fpn/faster_rcnn.py | 26 ++++--- .../links/model/fpn/faster_rcnn_fpn_resnet.py | 3 +- .../model_tests/fpn_tests/test_mask_head.py | 2 +- .../model_tests/fpn_tests/test_mask_utils.py | 9 +-- .../test_mask_rcnn_fpn_resnet.py | 68 ------------------- 6 files changed, 26 insertions(+), 90 deletions(-) delete mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py index e4ebd5aba0..24edf211aa 100644 --- a/chainercv/links/model/fpn/__init__.py +++ b/chainercv/links/model/fpn/__init__.py @@ -1,15 +1,15 @@ +from chainercv.links.model.fpn.bbox_head import bbox_loss_post # NOQA +from chainercv.links.model.fpn.bbox_head import bbox_loss_pre # NOQA +from chainercv.links.model.fpn.bbox_head import BboxHead # NOQA from chainercv.links.model.fpn.faster_rcnn import FasterRCNN # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA from chainercv.links.model.fpn.fpn import FPN # NOQA -from chainercv.links.model.fpn.bbox_head import BboxHead # NOQA -from chainercv.links.model.fpn.bbox_head import bbox_loss_post # NOQA -from chainercv.links.model.fpn.bbox_head import bbox_loss_pre # NOQA -from chainercv.links.model.fpn.mask_head import MaskHead # NOQA from chainercv.links.model.fpn.mask_head import mask_loss_post # NOQA from chainercv.links.model.fpn.mask_head import mask_loss_pre # NOQA +from chainercv.links.model.fpn.mask_head import MaskHead # NOQA from chainercv.links.model.fpn.mask_utils import mask_to_segm # NOQA from chainercv.links.model.fpn.mask_utils import segm_to_mask # NOQA from chainercv.links.model.fpn.rpn import RPN # NOQA diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py index 68b4506233..d6fe9d2de2 100644 --- a/chainercv/links/model/fpn/faster_rcnn.py +++ b/chainercv/links/model/fpn/faster_rcnn.py @@ -3,8 +3,8 @@ import numpy as np import chainer -import chainer.functions as F from chainer.backends import cuda +import chainer.functions as F from chainercv.links.model.fpn.misc import scale_img @@ -64,7 +64,8 @@ def __init__(self, extractor, rpn, bbox_head, self._store_rpn_outputs = 'rois' in self._return_values self._run_bbox = any([key in self._return_values - for key in ['bboxes', 'labels', 'scores', 'masks']]) + for key in + ['bboxes', 'labels', 'scores', 'masks']]) self._run_mask = 'masks' in self._return_values super(FasterRCNN, self).__init__() @@ -168,9 +169,10 @@ def predict(self, imgs): bboxes, labels, scores = self.bbox_head.decode( bbox_rois, bbox_roi_indices, head_locs, head_confs, scales, sizes, self.nms_thresh, self.score_thresh) - bboxes_cpu = [chainer.backends.cuda.to_cpu(bbox) - for bbox in bboxes] - labels_cpu = [chainer.backends.cuda.to_cpu(label) for label in labels] + bboxes_cpu = [ + chainer.backends.cuda.to_cpu(bbox) for bbox in bboxes] + labels_cpu = [ + chainer.backends.cuda.to_cpu(label) for label in labels] scores_cpu = [cuda.to_cpu(score) for score in scores] output.update({'bboxes': bboxes_cpu, 'labels': labels_cpu, 'scores': scores_cpu}) @@ -182,7 +184,8 @@ def predict(self, imgs): mask_rois_before_reordering, mask_roi_indices_before_reordering =\ _list_to_flat(rescaled_bboxes) mask_rois, mask_roi_indices, order = self.mask_head.distribute( - mask_rois_before_reordering, mask_roi_indices_before_reordering) + mask_rois_before_reordering, + mask_roi_indices_before_reordering) with chainer.using_config( 'train', False), chainer.no_backprop_mode(): segms = F.sigmoid( @@ -192,13 +195,14 @@ def predict(self, imgs): segms = _flat_to_list( segms, mask_roi_indices_before_reordering, len(imgs)) segms = [segm if segm is not None else - self.xp.zeros( - (0, self.mask_head.segm_size, self.mask_head.segm_size), - dtype=np.float32) - for segm in segms] + self.xp.zeros( + (0, self.mask_head.segm_size, + self.mask_head.segm_size), dtype=np.float32) + for segm in segms] segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms] # Currently MaskHead only supports numpy inputs - masks_cpu = self.mask_head.decode(segms, bboxes_cpu, labels_cpu, sizes) + masks_cpu = self.mask_head.decode( + segms, bboxes_cpu, labels_cpu, sizes) output.update({'masks': masks_cpu}) return tuple([output[key] for key in self._return_values]) diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py index debadb10ea..4a8e0a55ee 100644 --- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py +++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py @@ -4,9 +4,9 @@ import chainer.functions as F import chainer.links as L +from chainercv.links.model.fpn.bbox_head import BboxHead from chainercv.links.model.fpn.faster_rcnn import FasterRCNN from chainercv.links.model.fpn.fpn import FPN -from chainercv.links.model.fpn.bbox_head import BboxHead from chainercv.links.model.fpn.mask_head import MaskHead from chainercv.links.model.fpn.rpn import RPN from chainercv.links.model.resnet import ResNet101 @@ -189,7 +189,6 @@ class MaskRCNNFPNResNet101(MaskRCNNFPNResNet): } - def _copyparams(dst, src): if isinstance(dst, chainer.Chain): for link in dst.children(): diff --git a/tests/links_tests/model_tests/fpn_tests/test_mask_head.py b/tests/links_tests/model_tests/fpn_tests/test_mask_head.py index c8e0bc927c..116404273d 100644 --- a/tests/links_tests/model_tests/fpn_tests/test_mask_head.py +++ b/tests/links_tests/model_tests/fpn_tests/test_mask_head.py @@ -7,9 +7,9 @@ from chainer import testing from chainer.testing import attr -from chainercv.links.model.fpn import MaskHead from chainercv.links.model.fpn import mask_loss_post from chainercv.links.model.fpn import mask_loss_pre +from chainercv.links.model.fpn import MaskHead from chainercv.utils import mask_to_bbox diff --git a/tests/links_tests/model_tests/fpn_tests/test_mask_utils.py b/tests/links_tests/model_tests/fpn_tests/test_mask_utils.py index 5ae85bf237..c6bcd360d0 100644 --- a/tests/links_tests/model_tests/fpn_tests/test_mask_utils.py +++ b/tests/links_tests/model_tests/fpn_tests/test_mask_utils.py @@ -5,21 +5,22 @@ from chainer import testing -from chainercv.links.model.fpn.mask_utils import segm_to_mask from chainercv.links.model.fpn.mask_utils import mask_to_segm +from chainercv.links.model.fpn.mask_utils import segm_to_mask class TestSegmToMask(unittest.TestCase): def setUp(self): # When n_inst >= 3, the test fails. - # This is due to the fact that the transformed image of `transforms.resize` - # is misaligned to the corners. + # This is due to the fact that the transformed + # image of `transforms.resize` is misaligned to the corners. n_inst = 2 self.segm_size = 3 self.size = (36, 48) - self.segm = np.ones((n_inst, self.segm_size, self.segm_size), dtype=np.float32) + self.segm = np.ones( + (n_inst, self.segm_size, self.segm_size), dtype=np.float32) self.bbox = np.zeros((n_inst, 4), dtype=np.float32) for i in range(n_inst): self.bbox[i, 0] = 10 + i diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py deleted file mode 100644 index b7cedc364d..0000000000 --- a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py +++ /dev/null @@ -1,68 +0,0 @@ -import numpy as np -import unittest - -import chainer -from chainer import testing -from chainer.testing import attr - -from chainercv.links import MaskRCNNFPNResNet101 -from chainercv.links import MaskRCNNFPNResNet50 - - -@testing.parameterize(*testing.product({ - 'model': [MaskRCNNFPNResNet50, MaskRCNNFPNResNet101], - 'n_fg_class': [1, 5, 20], -})) -class TestFasterRCNNFPNResNet(unittest.TestCase): - - def setUp(self): - self.link = self.model(n_fg_class=self.n_fg_class) - - def _check_call(self): - imgs = [ - np.random.uniform(-1, 1, size=(3, 48, 48)).astype(np.float32), - np.random.uniform(-1, 1, size=(3, 32, 64)).astype(np.float32), - ] - x, _, _ = self.link.prepare(imgs) - with chainer.using_config('train', False): - self.link(self.link.xp.array(x)) - - @attr.slow - def test_call_cpu(self): - self._check_call() - - @attr.gpu - @attr.slow - def test_call_gpu(self): - self.link.to_gpu() - self._check_call() - - -@testing.parameterize(*testing.product({ - 'model': [MaskRCNNFPNResNet50, MaskRCNNFPNResNet101], - 'n_fg_class': [None, 10, 80], - # 'pretrained_model': ['coco', 'imagenet'], - 'pretrained_model': ['imagenet'], -})) -class TestFasterRCNNFPNResNetPretrained(unittest.TestCase): - - @attr.slow - def test_pretrained(self): - kwargs = { - 'n_fg_class': self.n_fg_class, - 'pretrained_model': self.pretrained_model, - } - - if self.pretrained_model == 'coco': - valid = self.n_fg_class in {None, 80} - elif self.pretrained_model == 'imagenet': - valid = self.n_fg_class is not None - - if valid: - self.model(**kwargs) - else: - with self.assertRaises(ValueError): - self.model(**kwargs) - - -testing.run_module(__name__, __file__) From fd54af2dba29c649766350e3a8b3907a76ce7a9d Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sat, 16 Mar 2019 00:01:07 +0900 Subject: [PATCH 097/100] fix doc --- chainercv/links/model/fpn/__init__.py | 1 + chainercv/links/model/fpn/faster_rcnn.py | 58 +++++++++++-------- .../links/model/fpn/faster_rcnn_fpn_resnet.py | 56 +++++------------- chainercv/links/model/fpn/mask_head.py | 2 +- docs/source/reference/links/fpn.rst | 6 ++ 5 files changed, 57 insertions(+), 66 deletions(-) diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py index 24edf211aa..f462c5230b 100644 --- a/chainercv/links/model/fpn/__init__.py +++ b/chainercv/links/model/fpn/__init__.py @@ -2,6 +2,7 @@ from chainercv.links.model.fpn.bbox_head import bbox_loss_pre # NOQA from chainercv.links.model.fpn.bbox_head import BboxHead # NOQA from chainercv.links.model.fpn.faster_rcnn import FasterRCNN # NOQA +from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py index d6fe9d2de2..0be0d8ba4b 100644 --- a/chainercv/links/model/fpn/faster_rcnn.py +++ b/chainercv/links/model/fpn/faster_rcnn.py @@ -10,12 +10,9 @@ class FasterRCNN(chainer.Chain): - """Base class of Feature Pyramid Networks. + """Base class of Faster R-CNN with FPN. - This is a base class of Feature Pyramid Networks [#]_. - - .. [#] Tsung-Yi Lin et al. - Feature Pyramid Networks for Object Detection. CVPR 2017 + This is a base class of Faster R-CNN with FPN. Args: extractor (Link): A link that extracts feature maps. @@ -28,8 +25,10 @@ class FasterRCNN(chainer.Chain): :class:`~chainercv.links.model.fpn.BboxHead`. Please refer to the documentation found there. mask_head (Link): A link that has the same interface as - :class:`~chainercv.links.model.mask_rcnn.MaskRCNN`. + :class:`~chainercv.links.model.fpn.MaskHead`. Please refer to the documentation found there. + return_values (list of strings): Determines the values + returned by :meth:`predict`. min_size (int): A preprocessing paramter for :meth:`prepare`. Please refer to a docstring found for :meth:`prepare`. max_size (int): A preprocessing paramter for :meth:`prepare`. Note @@ -119,29 +118,40 @@ def __call__(self, x): return hs, rois, roi_indices def predict(self, imgs): - """Segment object instances from images. + """Conduct inference on the given images. + + The value returned by this method is decided based on + the argument :obj:`return_values` of :meth:`__init__`. - This method predicts instance-aware object regions for each image. + Examples: + + >>> from chainercv.links import FasterRCNNFPNResNet50 + >>> model = FasterRCNNFPNResNet50( + ... pretrained_model='coco', + ... return_values=['rois', 'bboxes', 'labels', 'scores']) + >>> rois, bboxes, labels, scores = model.predict(imgs) Args: - imgs (iterable of numpy.ndarray): Arrays holding images of shape - :math:`(B, C, H, W)`. All images are in CHW and RGB format - and the range of their value is :math:`[0, 255]`. + imgs (iterable of numpy.ndarray): Inputs. Returns: - tuple of lists: - This method returns a tuple of three lists, - :obj:`(masks, labels, scores)`. - - * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \ - where :math:`R` is the number of masks in a image. \ - Each pixel holds value if it is inside the object inside or not. - * **labels** : A list of integer arrays of shape :math:`(R,)`. \ - Each value indicates the class of the masks. \ - Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ - number of the foreground classes. - * **scores** : A list of float arrays of shape :math:`(R,)`. \ - Each value indicates how confident the prediction is. + tuple of lists: + The table below shows the input and possible outputs. + + .. csv-table:: + :header: name, shape, dtype, format + + :obj:`imgs`, ":math:`[(3, H, W)]`", :obj:`float32`, \ + "RGB, :math:`[0, 255]`" + :obj:`rois`, ":math:`[(R', 4)]`", :obj:`float32`, \ + ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`" + :obj:`bboxes`, ":math:`[(R, 4)]`", :obj:`float32`, \ + ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`" + :obj:`scores`, ":math:`[(R,)]`", :obj:`float32`, \ + -- + :obj:`labels`, ":math:`[(R,)]`", :obj:`int32`, \ + ":math:`[0, \#fg\_class - 1]`" + :obj:`masks`, ":math:`[(R, H, W)]`", :obj:`bool`, -- """ output = {} diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py index 4a8e0a55ee..1532d8c2ce 100644 --- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py +++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py @@ -15,7 +15,7 @@ class FasterRCNNFPNResNet(FasterRCNN): - """Base class for FasterRCNNFPNResNet50 and FasterRCNNFPNResNet101. + """Base class for Faster R-CNN with a ResNet backbone and FPN. A subclass of this class should have :obj:`_base` and :obj:`_models`. @@ -38,6 +38,8 @@ class FasterRCNNFPNResNet(FasterRCNN): * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \ must be specified properly. * :obj:`None`: Do not load weights. + return_values (list of strings): Determines the values + returned by :meth:`predict`. min_size (int): A preprocessing paramter for :meth:`prepare`. Please \ refer to :meth:`prepare`. max_size (int): A preprocessing paramter for :meth:`prepare`. @@ -76,35 +78,24 @@ def __init__(self, n_fg_class=None, pretrained_model=None, class MaskRCNNFPNResNet(FasterRCNNFPNResNet): - """Feature Pyramid Networks with ResNet-50. - - This is a model of Feature Pyramid Networks [#]_. - This model uses :class:`~chainercv.links.ResNet50` as - its base feature extractor. - - .. [#] Tsung-Yi Lin et al. - Feature Pyramid Networks for Object Detection. CVPR 2017 + """Mask R-CNN with a ResNet backbone and FPN. + Please refer to :class:`~chainercv.links.model.fpn.FasterRCNNFPNResNet`. """ def __init__(self, n_fg_class=None, pretrained_model=None, + return_values=['masks', 'labels', 'scores'], min_size=800, max_size=1333): super(MaskRCNNFPNResNet, self).__init__( - n_fg_class, pretrained_model, ['masks', 'labels', 'scores'], + n_fg_class, pretrained_model, return_values, min_size, max_size) class FasterRCNNFPNResNet50(FasterRCNNFPNResNet): - """Feature Pyramid Networks with ResNet-50. - - This is a model of Feature Pyramid Networks [#]_. - This model uses :class:`~chainercv.links.ResNet50` as - its base feature extractor. - - .. [#] Tsung-Yi Lin et al. - Feature Pyramid Networks for Object Detection. CVPR 2017 + """Faster R-CNN with ResNet-50 and FPN. + Please refer to :class:`~chainercv.links.model.fpn.FasterRCNNFPNResNet`. """ @@ -120,14 +111,9 @@ class FasterRCNNFPNResNet50(FasterRCNNFPNResNet): class FasterRCNNFPNResNet101(FasterRCNNFPNResNet): - """Feature Pyramid Networks with ResNet-101. - - This is a model of Feature Pyramid Networks [#]_. - This model uses :class:`~chainercv.links.ResNet101` as - its base feature extractor. + """Faster R-CNN with ResNet-101 and FPN. - .. [#] Tsung-Yi Lin et al. - Feature Pyramid Networks for Object Detection. CVPR 2017 + Please refer to :class:`~chainercv.links.model.fpn.FasterRCNNFPNResNet`. """ @@ -143,15 +129,9 @@ class FasterRCNNFPNResNet101(FasterRCNNFPNResNet): class MaskRCNNFPNResNet50(MaskRCNNFPNResNet): - """Feature Pyramid Networks with ResNet-50. - - This is a model of Feature Pyramid Networks [#]_. - This model uses :class:`~chainercv.links.ResNet50` as - its base feature extractor. - - .. [#] Tsung-Yi Lin et al. - Feature Pyramid Networks for Object Detection. CVPR 2017 + """Mask R-CNN with ResNet-50 and FPN. + Please refer to :class:`~chainercv.links.model.fpn.FasterRCNNFPNResNet`. """ @@ -167,15 +147,9 @@ class MaskRCNNFPNResNet50(MaskRCNNFPNResNet): class MaskRCNNFPNResNet101(MaskRCNNFPNResNet): - """Feature Pyramid Networks with ResNet-50. - - This is a model of Feature Pyramid Networks [#]_. - This model uses :class:`~chainercv.links.ResNet50` as - its base feature extractor. - - .. [#] Tsung-Yi Lin et al. - Feature Pyramid Networks for Object Detection. CVPR 2017 + """Mask R-CNN with ResNet-101 and FPN. + Please refer to :class:`~chainercv.links.model.fpn.FasterRCNNFPNResNet`. """ diff --git a/chainercv/links/model/fpn/mask_head.py b/chainercv/links/model/fpn/mask_head.py index b89857fa5d..602713838b 100644 --- a/chainercv/links/model/fpn/mask_head.py +++ b/chainercv/links/model/fpn/mask_head.py @@ -228,7 +228,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes, def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels, batchsize): - """Loss function for Head (post). + """Loss function for Mask Head (post). Args: segms (array): An array whose shape is :math:`(R, n\_class, M, M)`, diff --git a/docs/source/reference/links/fpn.rst b/docs/source/reference/links/fpn.rst index bd26896c27..4c01e2a44a 100644 --- a/docs/source/reference/links/fpn.rst +++ b/docs/source/reference/links/fpn.rst @@ -40,6 +40,12 @@ FasterRCNN .. autoclass:: FasterRCNN :members: +FasterRCNNFPNResNet +~~~~~~~~~~~~~~~~~~~ +.. autoclass:: FasterRCNNFPNResNet + :members: + + FPN ~~~ .. autoclass:: FPN From e71c322a0557309f53601d571798ec7bbb06ca56 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sat, 16 Mar 2019 00:11:27 +0900 Subject: [PATCH 098/100] fix --- chainercv/links/model/fpn/__init__.py | 3 +++ chainercv/links/model/fpn/faster_rcnn.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py index 6a2b989025..e4ba9c853c 100644 --- a/chainercv/links/model/fpn/__init__.py +++ b/chainercv/links/model/fpn/__init__.py @@ -10,6 +10,9 @@ from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101 # NOQA from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50 # NOQA from chainercv.links.model.fpn.fpn import FPN # NOQA +from chainercv.links.model.fpn.keypoint_head import keypoint_loss_post # NOQA +from chainercv.links.model.fpn.keypoint_head import keypoint_loss_pre # NOQA +from chainercv.links.model.fpn.keypoint_head import KeypointHead # NOQA from chainercv.links.model.fpn.mask_head import mask_loss_post # NOQA from chainercv.links.model.fpn.mask_head import mask_loss_pre # NOQA from chainercv.links.model.fpn.mask_head import MaskHead # NOQA diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py index 5eceba3ce7..c37fe30c08 100644 --- a/chainercv/links/model/fpn/faster_rcnn.py +++ b/chainercv/links/model/fpn/faster_rcnn.py @@ -191,7 +191,7 @@ def predict(self, imgs): output.update({'bboxes': bboxes_cpu, 'labels': labels_cpu, 'scores': scores_cpu}) rescaled_bboxes = [bbox * scale - for scale, bbox in zip(scales, bboxes)] + for scale, bbox in zip(scales, bboxes)] if self._run_mask: # Change bboxes to RoI and RoI indices format mask_rois_before_reordering, mask_roi_indices_before_reordering =\ From 8c9816ce5aac76e8d2f2bbbdb268d0c9b1a75664 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sat, 16 Mar 2019 00:31:56 +0900 Subject: [PATCH 099/100] merge train --- examples/fpn/train_multi.py | 113 +++++++++++++++++++++++++++++++----- 1 file changed, 97 insertions(+), 16 deletions(-) diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py index 4386acf084..01c4c5963d 100644 --- a/examples/fpn/train_multi.py +++ b/examples/fpn/train_multi.py @@ -28,8 +28,15 @@ from chainercv.links import FasterRCNNFPNResNet101 from chainercv.links import FasterRCNNFPNResNet50 +from chainercv.datasets import coco_keypoint_names +from chainercv.datasets import COCOKeypointDataset +from chainercv.links import KeypointRCNNFPNResNet101 +from chainercv.links import KeypointRCNNFPNResNet50 + from chainercv.links.model.fpn import bbox_loss_post from chainercv.links.model.fpn import bbox_loss_pre +from chainercv.links.model.fpn import keypoint_loss_post +from chainercv.links.model.fpn import keypoint_loss_pre from chainercv.links.model.fpn import mask_loss_post from chainercv.links.model.fpn import mask_loss_pre from chainercv.links.model.fpn import rpn_loss @@ -49,7 +56,8 @@ def __init__(self, model): with self.init_scope(): self.model = model - def __call__(self, imgs, bboxes, labels, masks=None): + def __call__(self, imgs, bboxes, labels, masks=None, + points=None, visibles=None): B = len(imgs) pad_size = np.array( [im.shape[1:] for im in imgs]).max(axis=0) @@ -117,30 +125,63 @@ def __call__(self, imgs, bboxes, labels, masks=None): mask_roi_indices[0] = self.xp.array([0], dtype=np.int32) segms = self.model.mask_head(hs, mask_rois, mask_roi_indices) mask_loss = 0 * F.sum(segms) + + point_loss = 0 + if points is not None: + points = [self.xp.array(point) for point in points] + visibles = [self.xp.array(visible) for visible in visibles] + + point_rois, point_roi_indices, gt_head_points, gt_head_visibles =\ + keypoint_loss_pre( + rois, roi_indices, points, visibles, bboxes, + head_gt_labels, self.model.keypoint_head.point_map_size) + n_roi = sum([len(roi) for roi in point_rois]) + if n_roi > 0: + point_maps = self.model.keypoint_head( + hs, point_rois, point_roi_indices) + point_loss = keypoint_loss_post( + point_maps, point_roi_indices, + gt_head_points, gt_head_visibles, B) + else: + # Compute dummy variables to complete the computational graph + point_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32) + point_roi_indices[0] = self.xp.array([0], dtype=np.int32) + point_maps = self.model.keypoint_head( + hs, point_rois, point_roi_indices) + point_loss = 0 * F.sum(point_maps) + loss = (rpn_loc_loss + rpn_conf_loss + - head_loc_loss + head_conf_loss + mask_loss) + head_loc_loss + head_conf_loss + mask_loss + point_loss) chainer.reporter.report({ 'loss': loss, 'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss, 'loss/bbox_head/loc': head_loc_loss, 'loss/bbox_head/conf': head_conf_loss, - 'loss/mask_head': mask_loss}, + 'loss/mask_head': mask_loss, + 'loss/keypoint_head': point_loss}, self) return loss class Transform(object): - def __init__(self, min_size, max_size, mean): + def __init__(self, min_size, max_size, mean, mode): + if not isinstance(min_size, (tuple, list)): + min_size = (min_size,) self.min_size = min_size self.max_size = max_size self.mean = mean + self.mode = mode def __call__(self, in_data): - if len(in_data) == 4: - img, mask, label, bbox = in_data - else: + if self.mode == 'bbox': img, bbox, label = in_data + elif self.mode == 'instance_segmentation': + img, mask, label, bbox = in_data + elif self.mode == 'keypoint': + img, point, visible, label, bbox = in_data + + original_size = img.shape[1:] # Flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) @@ -154,15 +195,21 @@ def __call__(self, in_data): img -= self.mean bbox = bbox * scale - if len(in_data) == 4: + if self.mode == 'bbox': + return img, bbox, label + elif self.mode == 'instance_segmentation': mask = transforms.flip(mask, x_flip=x_flip) mask = transforms.resize( mask.astype(np.float32), img.shape[1:], interpolation=PIL.Image.NEAREST).astype(np.bool) return img, bbox, label, mask - else: - return img, bbox, label + elif self.mode == 'keypoint': + point = transforms.flip_point( + point, original_size, x_flip=x_flip) + point = transforms.resize_point( + point, original_size, img.shape[1:]) + return img, bbox, label, None, point, visible def converter(batch, device=None): @@ -170,13 +217,22 @@ def converter(batch, device=None): return tuple(list(v) for v in zip(*batch)) +def valid_point_annotation(visible): + if len(visible) == 0: + return False + min_keypoint_per_image = 10 + n_visible = visible.sum() + return n_visible >= min_keypoint_per_image + + def main(): parser = argparse.ArgumentParser() parser.add_argument('--data-dir', default='auto') parser.add_argument( '--model', choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101', - 'faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'), + 'faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101', + 'keypoint_rcnn_fpn_resnet50', 'keypoint_rcnn_fpn_resnet101'), default='faster_rcnn_fpn_resnet50') parser.add_argument('--batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=90000) @@ -216,6 +272,16 @@ def main(): model = MaskRCNNFPNResNet101( n_fg_class=len(coco_instance_segmentation_label_names), pretrained_model='imagenet') + elif args.model == 'keypoint_rcnn_fpn_resnet50': + mode = 'keypoint' + model = KeypointRCNNFPNResNet50( + n_fg_class=1, pretrained_model='imagenet', + n_point=len(coco_keypoint_names[0])) + elif args.model == 'keypoint_rcnn_fpn_resnet101': + mode = 'keypoint' + model = KeypointRCNNFPNResNet101( + n_fg_class=1, pretrained_model='imagenet', + n_point=len(coco_keypoint_names[0])) model.use_preset('evaluate') train_chain = TrainChain(model) @@ -223,17 +289,30 @@ def main(): train_chain.to_gpu() if mode == 'bbox': + transform = Transform( + model.min_size, model.max_size, model.extractor.mean, mode) train = TransformDataset( COCOBboxDataset( data_dir=args.data_dir, year='2017', split='train'), - ('img', 'bbox', 'label'), - Transform(model.min_size, model.max_size, model.extractor.mean)) + ('img', 'bbox', 'label'), transform) elif mode == 'instance_segmentation': + transform = Transform( + model.min_size, model.max_size, model.extractor.mean, mode) train = TransformDataset( COCOInstanceSegmentationDataset( data_dir=args.data_dir, split='train', return_bbox=True), - ('img', 'bbox', 'label', 'mask'), - Transform(model.min_size, model.max_size, model.extractor.mean)) + ('img', 'bbox', 'label', 'mask'), transform) + elif mode == 'keypoint': + train = COCOKeypointDataset(data_dir=args.data_dir, split='train') + indices = [i for i, visible in enumerate(train.slice[:, 'visible']) + if valid_point_annotation(visible)] + train = train.slice[indices] + transform = Transform( + (640, 672, 704, 736, 768, 800), + model.max_size, model.extractor.mean, mode) + train = TransformDataset( + train, + ('img', 'bbox', 'label', 'mask', 'point', 'visible'), transform) if comm.rank == 0: indices = np.arange(len(train)) @@ -257,6 +336,8 @@ def main(): for link in model.links(): if isinstance(link, L.BatchNormalization): link.disable_update() + if mode == 'keypoint': + model.keypoint_head.upsample.disable_update() n_iteration = args.iteration * 16 / args.batchsize updater = training.updaters.StandardUpdater( @@ -292,7 +373,7 @@ def lr_schedule(trainer): ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/rpn/loc', 'main/loss/rpn/conf', 'main/loss/bbox_head/loc', 'main/loss/bbox_head/conf', - 'main/loss/mask_head' + 'main/loss/mask_head', 'main/loss/keypoint_head' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) From ee08fbb6fed67c92157163775c16f0a8f3ad4d32 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sat, 16 Mar 2019 00:44:39 +0900 Subject: [PATCH 100/100] fix --- examples/fpn/train_multi.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py index 01c4c5963d..f0dd4d0e30 100644 --- a/examples/fpn/train_multi.py +++ b/examples/fpn/train_multi.py @@ -1,6 +1,7 @@ import argparse import multiprocessing import numpy as np +import random import PIL import chainer @@ -51,10 +52,11 @@ class TrainChain(chainer.Chain): - def __init__(self, model): + def __init__(self, model, mode): super(TrainChain, self).__init__() with self.init_scope(): self.model = model + self.mode = mode def __call__(self, imgs, bboxes, labels, masks=None, points=None, visibles=None): @@ -99,7 +101,7 @@ def __call__(self, imgs, bboxes, labels, masks=None, roi_indices, head_gt_locs, head_gt_labels, B) mask_loss = 0 - if masks is not None: + if self.mode == 'instance_segmentation': # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU. pad_masks = [ np.zeros( @@ -127,7 +129,7 @@ def __call__(self, imgs, bboxes, labels, masks=None, mask_loss = 0 * F.sum(segms) point_loss = 0 - if points is not None: + if self.mode == 'keypoint': points = [self.xp.array(point) for point in points] visibles = [self.xp.array(visible) for visible in visibles] @@ -190,8 +192,9 @@ def __call__(self, in_data): bbox, img.shape[1:], x_flip=x_flip) # Scaling and mean subtraction + min_size = random.choice(self.min_size) img, scale = scale_img( - img, self.min_size, self.max_size) + img, min_size, self.max_size) img -= self.mean bbox = bbox * scale @@ -284,7 +287,7 @@ def main(): n_point=len(coco_keypoint_names[0])) model.use_preset('evaluate') - train_chain = TrainChain(model) + train_chain = TrainChain(model, mode) chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu()