From b3e8ac055b3aac9b6ae8b8fce8d7015e467b93c5 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 12 Feb 2019 21:29:36 +0900
Subject: [PATCH 001/100] [wip] add mask_rcnn

---
 chainercv/links/__init__.py                   |   2 +
 chainercv/links/model/mask_rcnn/__init__.py   |   6 +
 chainercv/links/model/mask_rcnn/mask_head.py  | 227 +++++++++++++++++
 chainercv/links/model/mask_rcnn/mask_rcnn.py  | 147 +++++++++++
 .../model/mask_rcnn/mask_rcnn_fpn_resnet.py   |  68 ++++++
 examples/instance_segmentation/eval_coco.py   |  23 +-
 examples/mask_rcnn/demo.py                    |  58 +++++
 examples/mask_rcnn/train_multi.py             | 229 ++++++++++++++++++
 8 files changed, 756 insertions(+), 4 deletions(-)
 create mode 100644 chainercv/links/model/mask_rcnn/__init__.py
 create mode 100644 chainercv/links/model/mask_rcnn/mask_head.py
 create mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn.py
 create mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
 create mode 100644 examples/mask_rcnn/demo.py
 create mode 100644 examples/mask_rcnn/train_multi.py

diff --git a/chainercv/links/__init__.py b/chainercv/links/__init__.py
index 5aa5ae5d37..be7f150873 100644
--- a/chainercv/links/__init__.py
+++ b/chainercv/links/__init__.py
@@ -9,6 +9,8 @@
 from chainercv.links.model.faster_rcnn.faster_rcnn_vgg import FasterRCNNVGG16  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50  # NOQA
+from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet101  # NOQA
+from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet50  # NOQA
 from chainercv.links.model.resnet import ResNet101  # NOQA
 from chainercv.links.model.resnet import ResNet152  # NOQA
 from chainercv.links.model.resnet import ResNet50  # NOQA
diff --git a/chainercv/links/model/mask_rcnn/__init__.py b/chainercv/links/model/mask_rcnn/__init__.py
new file mode 100644
index 0000000000..c9e910a524
--- /dev/null
+++ b/chainercv/links/model/mask_rcnn/__init__.py
@@ -0,0 +1,6 @@
+from chainercv.links.model.mask_rcnn.mask_head import mask_loss_post  # NOQA
+from chainercv.links.model.mask_rcnn.mask_head import mask_loss_pre  # NOQA
+from chainercv.links.model.mask_rcnn.mask_head import MaskHead  # NOQA
+from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN  # NOQA
+from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet101  # NOQA
+from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet50  # NOQA
diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py
new file mode 100644
index 0000000000..2b2b5c4cbb
--- /dev/null
+++ b/chainercv/links/model/mask_rcnn/mask_head.py
@@ -0,0 +1,227 @@
+from __future__ import division
+
+import numpy as np
+import PIL
+
+import cv2
+
+import chainer
+from chainer.backends import cuda
+import chainer.functions as F
+from chainer.initializers import HeNormal
+import chainer.links as L
+
+from chainercv.transforms.image.resize import resize
+from chainercv.utils.bbox.bbox_iou import bbox_iou
+from chainercv.utils.mask.mask_to_bbox import mask_to_bbox
+
+
+class MaskHead(chainer.Chain):
+
+    _canonical_scale = 224
+    _roi_size = 14
+    _roi_sample_ratio = 2
+    mask_size = _roi_size * 2
+
+    # Remember, initialization is MSRAFill
+    def __init__(self, n_class, scales):
+        super(MaskHead, self).__init__()
+
+        initialW = HeNormal(1, fan_option='fan_out')
+        with self.init_scope():
+            self.conv1 = L.Convolution2D(256, 3, pad=1, initialW=initialW)
+            self.conv2 = L.Convolution2D(256, 3, pad=1, initialW=initialW)
+            self.conv3 = L.Convolution2D(256, 3, pad=1, initialW=initialW)
+            self.conv4 = L.Convolution2D(256, 3, pad=1, initialW=initialW)
+            self.conv5 = L.Deconvolution2D(
+                256, 2, pad=0, stride=2, initialW=initialW)
+            self.seg = L.Convolution2D(n_class, 1, pad=0, initialW=initialW)
+
+        self._n_class = n_class
+        self._scales = scales
+
+    def __call__(self, hs, rois, roi_indices):
+        pooled_hs = []
+        for l, h in enumerate(hs):
+            if len(rois[l]) == 0:
+                continue
+
+            pooled_hs.append(F.roi_average_align_2d(
+                h, rois[l], roi_indices[l],
+                self._roi_size,
+                self._scales[l], self._roi_sample_ratio))
+
+        if len(pooled_hs) == 0:
+            out_size = self.mask_size
+            segs = chainer.Variable(
+                self.xp.empty((0, self._n_class, out_size, out_size),
+                              dtype=np.float32))
+            return segs
+
+        h = F.concat(pooled_hs, axis=0)
+        h = F.relu(self.conv1(h))
+        h = F.relu(self.conv2(h))
+        h = F.relu(self.conv3(h))
+        h = F.relu(self.conv4(h))
+        h = F.relu(self.conv5(h))
+        return self.seg(h)
+
+    def distribute(self, rois, roi_indices):
+        size = self.xp.sqrt(
+            self.xp.prod(rois[:, 2:] + 1 - rois[:, :2], axis=1))
+        level = self.xp.floor(self.xp.log2(
+            size / self._canonical_scale + 1e-6)).astype(np.int32)
+        # skip last level
+        level = self.xp.clip(
+            level + len(self._scales) // 2, 0, len(self._scales) - 2)
+
+        masks = [level == l for l in range(len(self._scales))]
+        rois = [rois[mask] for mask in masks]
+        roi_indices = [roi_indices[mask] for mask in masks]
+        order = self.xp.argsort(
+            self.xp.concatenate([self.xp.where(mask)[0] for mask in masks]))
+        return rois, roi_indices, order
+
+    def decode(self, segms, bboxes, labels, sizes):
+        # CPU is used because cv2.resize only accepts numpy arrays.
+        segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
+        bboxes = [chainer.backends.cuda.to_cpu(bbox) for bbox in bboxes]
+        labels = [chainer.backends.cuda.to_cpu(label) for label in labels]
+
+        masks = []
+        # To work around an issue with cv2.resize (it seems to automatically
+        # pad with repeated border values), we manually zero-pad the masks by 1
+        # pixel prior to resizing back to the original image resolution.
+        # This prevents "top hat" artifacts. We therefore need to expand
+        # the reference boxes by an appropriate factor.
+        cv2_expand_scale = (self.mask_size + 2) / self.mask_size
+        padded_mask = np.zeros((self.mask_size + 2, self.mask_size + 2),
+                               dtype=np.float32)
+        for bbox, segm, label, size in zip(
+                bboxes, segms, labels, sizes):
+            img_H, img_W = size
+            mask = np.zeros((len(bbox), img_H, img_W), dtype=np.bool)
+
+            bbox = expand_boxes(bbox, cv2_expand_scale)
+            for i, (bb, sgm, lbl) in enumerate(zip(bbox, segm, label)):
+                bb = bb.astype(np.int32)
+                padded_mask[1:-1, 1:-1] = sgm[lbl + 1]
+
+                # TODO(yuyu2172): Ignore +1 later
+                bb_height = np.maximum(bb[2] - bb[0] + 1, 1)
+                bb_width = np.maximum(bb[3] - bb[1] + 1, 1)
+
+                crop_mask = cv2.resize(padded_mask, (bb_width, bb_height))
+                crop_mask = crop_mask > 0.5
+
+                y_min = max(bb[0], 0)
+                x_min = max(bb[1], 0)
+                y_max = min(bb[2] + 1, img_H)
+                x_max = min(bb[3] + 1, img_W)
+                mask[i, y_min:y_max, x_min:x_max] = crop_mask[
+                    (y_min - bb[0]):(y_max - bb[0]),
+                    (x_min - bb[1]):(x_max - bb[1])]
+            masks.append(mask)
+        return masks
+
+
+def expand_boxes(bbox, scale):
+    """Expand an array of boxes by a given scale."""
+    xp = chainer.backends.cuda.get_array_module(bbox)
+
+    h_half = (bbox[:, 2] - bbox[:, 0]) * .5
+    w_half = (bbox[:, 3] - bbox[:, 1]) * .5
+    y_c = (bbox[:, 2] + bbox[:, 0]) * .5
+    x_c = (bbox[:, 3] + bbox[:, 1]) * .5
+
+    h_half *= scale
+    w_half *= scale
+
+    expanded_bbox = xp.zeros(bbox.shape)
+    expanded_bbox[:, 0] = y_c - h_half
+    expanded_bbox[:, 1] = x_c - w_half
+    expanded_bbox[:, 2] = y_c + h_half
+    expanded_bbox[:, 3] = x_c + w_half
+
+    return expanded_bbox
+
+
+def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
+                  mask_size=28):
+    xp = cuda.get_array_module(*rois)
+
+    n_level = len(rois)
+
+    roi_levels = xp.hstack(
+        xp.array((l,) * len(rois[l])) for l in range(n_level)).astype(np.int32)
+    rois = xp.vstack(rois).astype(np.float32)
+    roi_indices = xp.hstack(roi_indices).astype(np.int32)
+    gt_head_labels = xp.hstack(gt_head_labels)
+
+    index = (gt_head_labels > 0).nonzero()[0]
+    mask_roi_levels = roi_levels[index]
+    mask_rois = rois[index]
+    mask_roi_indices = roi_indices[index]
+    gt_mask_labels = gt_head_labels[index]
+
+    gt_segms = xp.empty((len(mask_rois), mask_size, mask_size), dtype=np.bool)
+    for i in np.unique(cuda.to_cpu(mask_roi_indices)):
+        gt_mask = gt_masks[i]
+        gt_bbox = mask_to_bbox(gt_mask)
+
+        index = (mask_roi_indices == i).nonzero()[0]
+        mask_roi = mask_rois[index]
+        iou = bbox_iou(mask_roi, gt_bbox)
+        gt_index = iou.argmax(axis=1)
+        gt_segms[index] = segm_wrt_bbox(
+            gt_mask[gt_index], mask_roi, (M, M))
+
+    # indices = [(mask_roi_levels == l).nonzero() for l in range(n_level)]
+    flag_masks = [mask_roi_levels == l for l in range(n_level)]
+    mask_rois = [mask_rois[m] for m in flag_masks]
+    mask_roi_indices = [mask_roi_indices[m] for m in flag_masks]
+    gt_segms = [gt_segms[m] for m in flag_masks]
+    gt_mask_labels = [gt_mask_labels[m] for m in flag_masks]
+    return mask_rois, mask_roi_indices, gt_segms, gt_mask_labels
+
+
+def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
+                   batchsize):
+    # Just compute loss for the foreground class
+    # divide by the batchsize
+    xp = cuda.get_array_module(segms.array)
+
+    mask_roi_indices = xp.hstack(mask_roi_indices).astype(np.int32)
+    gt_segms = xp.vstack(gt_segms).astype(np.float32)
+    gt_mask_labels = xp.hstack(gt_mask_labels).astype(np.int32)
+
+    mask_loss = 0
+    for i in np.unique(cuda.to_cpu(mask_roi_indices)):
+        index = (mask_roi_indices == i).nonzero()[0]
+        gt_segm = gt_segms[index]
+        gt_mask_label = gt_mask_labels[index]
+
+        mask_loss += F.sigmoid_cross_entropy(
+            segms[index, gt_mask_label], gt_segm.astype(np.int32))
+
+    mask_loss /= batchsize
+    return mask_loss
+
+
+def segm_wrt_bbox(mask, bbox, size):
+    xp = chainer.backends.cuda.get_array_module(mask)
+
+    bbox = bbox.astype(np.int32)
+
+    segm = []
+    for m, bb in zip(mask, bbox):
+        if bb[2] - bb[0] == 0 or bb[3] - bb[1] == 0:
+            segm.append(xp.zeros(size, dtype=np.bool))
+            continue
+        cropped_m = m[bb[0]:bb[2], bb[1]:bb[3]]
+        cropped_m = chainer.backends.cuda.to_cpu(cropped_m)
+
+        segm.append(resize(
+            cropped_m[None].astype(np.float32),
+            size, interpolation=PIL.Image.NEAREST)[0].astype(np.bool))
+    return xp.array(segm, dtype=np.bool)
diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
new file mode 100644
index 0000000000..94347b1cdd
--- /dev/null
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py
@@ -0,0 +1,147 @@
+from __future__ import division
+
+import numpy as np
+import PIL
+
+import chainer
+from chainer.backends import cuda
+import chainer.functions as F
+
+from chainercv import transforms
+
+
+class MaskRCNN(chainer.Chain):
+
+    _min_size = 800
+    _max_size = 1333
+    _stride = 32
+
+    def __init__(self, extractor, rpn, head, mask_head):
+        super(MaskRCNN, self).__init__()
+        with self.init_scope():
+            self.extractor = extractor
+            self.rpn = rpn
+            self.head = head
+            self.mask_head = mask_head
+
+        self.use_preset('visualize')
+
+    def use_preset(self, preset):
+        if preset == 'visualize':
+            self.nms_thresh = 0.5
+            self.score_thresh = 0.7
+        elif preset == 'evaluate':
+            self.nms_thresh = 0.5
+            self.score_thresh = 0.05
+        else:
+            raise ValueError('preset must be visualize or evaluate')
+
+    def __call__(self, x):
+        assert(not chainer.config.train)
+        hs = self.extractor(x)
+        rpn_locs, rpn_confs = self.rpn(hs)
+        anchors = self.rpn.anchors(h.shape[2:] for h in hs)
+        rois, roi_indices = self.rpn.decode(
+            rpn_locs, rpn_confs, anchors, x.shape)
+        rois, roi_indices = self.head.distribute(rois, roi_indices)
+        return hs, rois, roi_indices
+
+    def predict(self, imgs):
+        sizes = [img.shape[1:] for img in imgs]
+        x, scales = self.prepare(imgs)
+
+        with chainer.using_config('train', False), chainer.no_backprop_mode():
+            hs, rois, roi_indices = self(x)
+            head_locs, head_confs = self.head(hs, rois, roi_indices)
+        bboxes, labels, scores = self.head.decode(
+            rois, roi_indices, head_locs, head_confs,
+            scales, sizes, self.nms_thresh, self.score_thresh)
+
+        # Rescale bbox to the scaled resolution
+        rescaled_bboxes = [bbox * scale for scale, bbox in zip(scales, bboxes)]
+        # Change bboxes to RoI and RoI indices format
+        mask_rois_before_reordering, mask_roi_indices_before_reordering =\
+            _list_to_flat(rescaled_bboxes)
+        mask_rois, mask_roi_indices, order = self.mask_head.distribute(
+            mask_rois_before_reordering, mask_roi_indices_before_reordering)
+        with chainer.using_config('train', False), chainer.no_backprop_mode():
+            segms = F.sigmoid(
+                self.mask_head(hs, mask_rois, mask_roi_indices)).data
+        # Put the order of proposals back to the one used by bbox head
+        # from the ordering respective FPN levels.
+        segms = segms[order]
+        segms = _flat_to_list(segms, mask_roi_indices_before_reordering)
+        if len(segms) == 0:
+            segms = [
+                self.xp.zeros((0, self.mask_head.mask_size,
+                               self.mask_head.mask_size), dtype=np.float32)]
+
+        masks = self.mask_head.decode(
+            segms,
+            [bbox / scale for bbox, scale in zip(rescaled_bboxes, scales)],
+            labels, sizes)
+
+        masks = [cuda.to_cpu(mask) for mask in masks]
+        labels = [cuda.to_cpu(label) for label in labels]
+        scores = [cuda.to_cpu(score) for score in scores]
+        return masks, labels, scores
+
+    def prepare(self, imgs, masks=None):
+        scales = []
+        resized_imgs = []
+        sizes = []
+        for img in imgs:
+            _, H, W = img.shape
+            scale = self._min_size / min(H, W)
+            if scale * max(H, W) > self._max_size:
+                scale = self._max_size / max(H, W)
+            scales.append(scale)
+            H, W = int(H * scale), int(W * scale)
+            img = transforms.resize(img, (H, W))
+            img -= self.extractor.mean
+            resized_imgs.append(img)
+            sizes.append((H, W))
+        pad_size = np.array(
+            [im.shape[1:] for im in resized_imgs]).max(axis=0)
+        pad_size = (
+            np.ceil(pad_size / self._stride) * self._stride).astype(int)
+        pad_imgs = np.zeros(
+            (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32)
+        for i, im in enumerate(resized_imgs):
+            _, H, W = img.shape
+            pad_imgs[i, :, :H, :W] = im
+        pad_imgs = self.xp.array(pad_imgs)
+
+        if masks is None:
+            return pad_imgs, scales
+
+        resized_masks = []
+        for size, mask in zip(sizes, masks):
+            resized_masks.append(transforms.resize(
+                mask.astype(np.float32),
+                size, interpolation=PIL.Image.NEAREST).astype(np.bool))
+        pad_masks = []
+        for mask in resized_masks:
+            n_class, H, W = mask.shape
+            pad_mask = self.xp.zeros(
+                (n_class, pad_size[0], pad_size[1]), dtype=np.bool)
+            pad_mask[:, :H, :W] = self.xp.array(mask)
+            pad_masks.append(pad_mask)
+        return pad_imgs, pad_masks, scales
+
+
+def _list_to_flat(array_list):
+    xp = chainer.backends.cuda.get_array_module(array_list[0])
+
+    indices = xp.concatenate(
+        [i * xp.ones((len(array),), dtype=np.int32) for
+         i, array in enumerate(array_list)], axis=0)
+    flat = xp.concatenate(array_list, axis=0)
+    return flat, indices
+
+
+def _flat_to_list(flat, indices):
+    array_list = []
+    for i in np.unique(chainer.backends.cuda.to_cpu(indices)):
+        array_list.append(flat[indices == i])
+    return array_list
diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
new file mode 100644
index 0000000000..2e1b132d42
--- /dev/null
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
@@ -0,0 +1,68 @@
+from __future__ import division
+
+import chainer
+import chainer.functions as F
+
+from chainercv.links.model.fpn import FPN
+from chainercv.links.model.fpn import Head
+from chainercv.links.model.fpn import RPN
+from chainercv.links.model.mask_rcnn.mask_head import MaskHead
+from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN
+from chainercv.links.model.resnet import ResNet101
+from chainercv.links.model.resnet import ResNet50
+from chainercv import utils
+
+from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import _copyparams
+
+
+class MaskRCNNFPNResNet(MaskRCNN):
+
+    def __init__(self, n_fg_class=None, pretrained_model=None):
+        param, path = utils.prepare_pretrained_model(
+            {'n_fg_class': n_fg_class}, pretrained_model, self._models)
+
+        base = self._base(n_class=1, arch='he')
+        base.pick = ('res2', 'res3', 'res4', 'res5')
+        base.pool1 = lambda x: F.max_pooling_2d(
+            x, 3, stride=2, pad=1, cover_all=False)
+        base.remove_unused()
+        extractor = FPN(
+            base, len(base.pick), (1 / 4, 1 / 8, 1 / 16, 1 / 32, 1 / 64))
+
+        n_class = param['n_fg_class'] + 1
+        super(MaskRCNNFPNResNet, self).__init__(
+            extractor=extractor,
+            rpn=RPN(extractor.scales),
+            head=Head(n_class, extractor.scales),
+            mask_head=MaskHead(n_class, extractor.scales)
+        )
+        if path == 'imagenet':
+            _copyparams(
+                self.extractor.base,
+                self._base(pretrained_model='imagenet', arch='he'))
+        elif path:
+            chainer.serializers.load_npz(path, self)
+
+
+class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
+
+    _base = ResNet50
+    _models = {
+        'coco': {
+            'param': {'n_fg_class': 80},
+            'url': None,
+            'cv2': True
+        },
+    }
+
+
+class MaskRCNNFPNResNet101(MaskRCNNFPNResNet):
+
+    _base = ResNet101
+    _models = {
+        'coco': {
+            'param': {'n_fg_class': 80},
+            'url': None,
+            'cv2': True
+        },
+    }
diff --git a/examples/instance_segmentation/eval_coco.py b/examples/instance_segmentation/eval_coco.py
index a8e531ba07..98258252b8 100755
--- a/examples/instance_segmentation/eval_coco.py
+++ b/examples/instance_segmentation/eval_coco.py
@@ -7,6 +7,8 @@
 from chainercv.datasets import COCOInstanceSegmentationDataset
 from chainercv.evaluations import eval_instance_segmentation_coco
 from chainercv.experimental.links import FCISResNet101
+from chainercv.links import MaskRCNNFPNResNet101
+from chainercv.links import MaskRCNNFPNResNet50
 from chainercv.utils import apply_to_iterator
 from chainercv.utils import ProgressHook
 
@@ -14,15 +16,17 @@
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--model', choices=('fcis_resnet101',),
+        '--model', choices=(
+            'fcis_resnet101',
+            'mask_rcnn_fpn_resnet101', 'mask_rcnn_fpn_resnet50'),
         default='fcis_resnet101')
     parser.add_argument('--pretrained-model', default=None)
     parser.add_argument('--gpu', type=int, default=-1)
     args = parser.parse_args()
 
+    if args.pretrained_model is None:
+        args.pretrained_model = 'coco'
     if args.model == 'fcis_resnet101':
-        if args.pretrained_model is None:
-            args.pretrained_model = 'coco'
         proposal_creator_params = FCISResNet101.proposal_creator_params
         proposal_creator_params['min_size'] = 2
         model = FCISResNet101(
@@ -30,8 +34,19 @@ def main():
             anchor_scales=(4, 8, 16, 32),
             pretrained_model=args.pretrained_model,
             proposal_creator_params=proposal_creator_params)
+        preset = 'coco_evaluate'
+    elif args.model == 'mask_rcnn_fpn_resnet50':
+        model = MaskRCNNFPNResNet50(
+            len(coco_instance_segmentation_label_names),
+            args.pretrained_model)
+        preset = 'evaluate'
+    elif args.model == 'mask_rcnn_fpn_resnet101':
+        model = MaskRCNNFPNResNet101(
+            len(coco_instance_segmentation_label_names),
+            args.pretrained_model)
+        preset = 'evaluate'
 
-    model.use_preset('coco_evaluate')
+    model.use_preset(preset)
 
     if args.gpu >= 0:
         chainer.cuda.get_device_from_id(args.gpu).use()
diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py
new file mode 100644
index 0000000000..aa4b7adbe4
--- /dev/null
+++ b/examples/mask_rcnn/demo.py
@@ -0,0 +1,58 @@
+import argparse
+import matplotlib.pyplot as plt
+
+import chainer
+
+import chainercv
+from chainercv.datasets import coco_instance_segmentation_label_names
+from chainercv import utils
+
+from chainercv.links import MaskRCNNFPNResNet101
+from chainercv.links import MaskRCNNFPNResNet50
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--gpu', type=int, default=-1)
+    parser.add_argument('--model', choices=('resnet50', 'resnet101'))
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument('--pretrained-model')
+    group.add_argument('--snapshot')
+    parser.add_argument('image')
+    args = parser.parse_args()
+
+    if args.model == 'resnet50':
+        model = MaskRCNNFPNResNet50(
+            n_fg_class=len(coco_instance_segmentation_label_names),
+            pretrained_model=args.pretrained_model)
+    elif args.model == 'resnet101':
+        model = MaskRCNNFPNResNet101(
+            n_fg_class=len(coco_instance_segmentation_label_names),
+            pretrained_model=args.pretrained_model)
+
+    if args.gpu >= 0:
+        chainer.cuda.get_device_from_id(args.gpu).use()
+        model.to_gpu()
+
+    img = utils.read_image(args.image)
+    # bboxes, masks, labels, scores = model.predict([img])
+    masks, labels, scores = model.predict([img])
+    # bbox = bboxes[0]
+    mask = masks[0]
+    label = labels[0]
+    score = scores[0]
+
+    # chainercv.visualizations.vis_bbox(
+    #     img, bbox, label, score, label_names=coco_bbox_label_names)
+
+    import numpy as np
+    # flag = np.array([bb[3] - bb[1] < 300 for bb in bbox], dtype=np.bool)
+    flag = np.ones(len(mask), dtype=np.bool)
+    chainercv.visualizations.vis_instance_segmentation(
+        img, mask[flag], label[flag], score[flag],
+        label_names=coco_instance_segmentation_label_names)
+    plt.show()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
new file mode 100644
index 0000000000..cb68857a93
--- /dev/null
+++ b/examples/mask_rcnn/train_multi.py
@@ -0,0 +1,229 @@
+import argparse
+import numpy as np
+
+import chainer
+import chainer.links as L
+from chainer.optimizer_hooks import WeightDecay
+from chainer import serializers
+from chainer import training
+from chainer.training import extensions
+
+import chainermn
+
+from chainercv.chainer_experimental.datasets.sliceable import TransformDataset
+from chainercv.chainer_experimental.training.extensions import make_shift
+from chainercv.datasets import coco_instance_segmentation_label_names
+from chainercv.datasets import COCOInstanceSegmentationDataset
+from chainercv.links import MaskRCNNFPNResNet101
+from chainercv.links import MaskRCNNFPNResNet50
+from chainercv import transforms
+
+from chainercv.links.model.fpn import head_loss_post
+from chainercv.links.model.fpn import head_loss_pre
+from chainercv.links.model.fpn import rpn_loss
+from chainercv.links.model.mask_rcnn import mask_loss_post
+from chainercv.links.model.mask_rcnn import mask_loss_pre
+
+# https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator
+try:
+    import cv2
+    cv2.setNumThreads(0)
+except ImportError:
+    pass
+
+
+class TrainChain(chainer.Chain):
+
+    def __init__(self, model):
+        super().__init__()
+        with self.init_scope():
+            self.model = model
+
+    def __call__(self, imgs, masks, labels, bboxes):
+        x, masks, scales = self.model.prepare(imgs, masks)
+        B = len(x)
+        bboxes = [self.xp.array(bbox) * scale
+                  for bbox, scale in zip(bboxes, scales)]
+        labels = [self.xp.array(label) for label in labels]
+
+        with chainer.using_config('train', False):
+            hs = self.model.extractor(x)
+
+        rpn_locs, rpn_confs = self.model.rpn(hs)
+        anchors = self.model.rpn.anchors(h.shape[2:] for h in hs)
+        rpn_loc_loss, rpn_conf_loss = rpn_loss(
+            rpn_locs, rpn_confs, anchors,
+            [(int(img.shape[1] * scale), int(img.shape[2] * scale))
+             for img, scale in zip(imgs, scales)],
+            bboxes)
+
+        rois, roi_indices = self.model.rpn.decode(
+            rpn_locs, rpn_confs, anchors, x.shape)
+        rois = self.xp.vstack([rois] + bboxes)
+        roi_indices = self.xp.hstack(
+            [roi_indices]
+            + [self.xp.array((i,) * len(bbox))
+               for i, bbox in enumerate(bboxes)])
+        rois, roi_indices = self.model.head.distribute(rois, roi_indices)
+        rois, roi_indices, head_gt_locs, head_gt_labels = head_loss_pre(
+            rois, roi_indices, self.model.head.std, bboxes, labels)
+        head_locs, head_confs = self.model.head(hs, rois, roi_indices)
+        head_loc_loss, head_conf_loss = head_loss_post(
+            head_locs, head_confs,
+            roi_indices, head_gt_locs, head_gt_labels, B)
+
+        mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre(
+            rois, roi_indices, masks, head_gt_labels,
+            self.model.mask_head.mask_size)
+        segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
+        mask_loss = mask_loss_post(
+            segms, mask_roi_indices, gt_segms, gt_mask_labels, B)
+
+        loss = (rpn_loc_loss + rpn_conf_loss +
+                head_loc_loss + head_conf_loss + mask_loss)
+        chainer.reporter.report({
+            'loss': loss,
+            'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss,
+            'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss,
+            'loss/mask': mask_loss},
+            self)
+
+        return loss
+
+
+def transform(in_data):
+    img, mask, label, bbox = in_data
+
+    img, params = transforms.random_flip(
+        img, x_random=True, return_param=True)
+    mask = transforms.flip(mask, x_flip=params['x_flip'])
+    bbox = transforms.flip_bbox(
+        bbox, img.shape[1:], x_flip=params['x_flip'])
+
+    return img, mask, label, bbox
+
+
+def converter(batch, device=None):
+    # do not send data to gpu (device is ignored)
+    return tuple(list(v) for v in zip(*batch))
+
+
+def copyparams(dst, src):
+    if isinstance(dst, chainer.Chain):
+        for link in dst.children():
+            copyparams(link, src[link.name])
+    elif isinstance(dst, chainer.ChainList):
+        for i, link in enumerate(dst):
+            copyparams(link, src[i])
+    else:
+        dst.copyparams(src)
+        if isinstance(dst, L.BatchNormalization):
+            dst.avg_mean = src.avg_mean
+            dst.avg_var = src.avg_var
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--model', choices=('resnet50', 'resnet101'))
+    parser.add_argument('--batchsize', type=int, default=16)
+    parser.add_argument('--out', default='result')
+    parser.add_argument('--resume')
+    parser.add_argument('--communicator', default='hierarchical')
+    args = parser.parse_args()
+
+    comm = chainermn.create_communicator(args.communicator)
+    device = comm.intra_rank
+
+    if args.model == 'resnet50':
+        model = MaskRCNNFPNResNet50(
+            n_fg_class=len(coco_instance_segmentation_label_names),
+            pretrained_model='imagenet')
+    elif args.model == 'resnet101':
+        model = MaskRCNNFPNResNet101(
+            n_fg_class=len(coco_instance_segmentation_label_names),
+            pretrained_model='imagenet')
+
+    model.use_preset('evaluate')
+    train_chain = TrainChain(model)
+    chainer.cuda.get_device_from_id(device).use()
+    train_chain.to_gpu()
+
+    train = TransformDataset(
+        COCOInstanceSegmentationDataset(
+            split='train', return_bbox=True),
+        ('img', 'mask', 'label', 'bbox'), transform)
+
+    if comm.rank == 0:
+        indices = np.arange(len(train))
+    else:
+        indices = None
+    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
+    train = train.slice[indices]
+
+    train_iter = chainer.iterators.MultithreadIterator(
+        train, args.batchsize // comm.size)
+
+    optimizer = chainermn.create_multi_node_optimizer(
+        chainer.optimizers.MomentumSGD(), comm)
+    optimizer.setup(train_chain)
+    optimizer.add_hook(WeightDecay(0.0001))
+
+    model.extractor.base.conv1.disable_update()
+    model.extractor.base.res2.disable_update()
+    for link in model.links():
+        if isinstance(link, L.BatchNormalization):
+            link.disable_update()
+
+    updater = training.updaters.StandardUpdater(
+        train_iter, optimizer, converter=converter, device=device)
+    trainer = training.Trainer(
+        updater, (90000 * 16 / args.batchsize, 'iteration'), args.out)
+
+    @make_shift('lr')
+    def lr_schedule(trainer):
+        base_lr = 0.02 * args.batchsize / 16
+        warm_up_duration = 500
+        warm_up_rate = 1 / 3
+
+        iteration = trainer.updater.iteration
+        if iteration < warm_up_duration:
+            rate = warm_up_rate \
+                + (1 - warm_up_rate) * iteration / warm_up_duration
+        else:
+            rate = 1
+            for step in args.step:
+                if iteration >= step * 16 / args.batchsize:
+                    rate *= 0.1
+
+        return base_lr * rate
+
+    trainer.extend(lr_schedule)
+
+    if comm.rank == 0:
+        log_interval = 10, 'iteration'
+        trainer.extend(extensions.LogReport(trigger=log_interval))
+        trainer.extend(extensions.observe_lr(), trigger=log_interval)
+        trainer.extend(extensions.PrintReport(
+            ['epoch', 'iteration', 'lr', 'main/loss',
+             'main/loss/rpn/loc', 'main/loss/rpn/conf',
+             'main/loss/head/loc', 'main/loss/head/conf',
+             'main/loss/mask'
+             ]),
+            trigger=log_interval)
+        trainer.extend(extensions.ProgressBar(update_interval=10))
+
+        trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration'))
+        trainer.extend(
+            extensions.snapshot_object(
+                model, 'model_iter_{.updater.iteration}'),
+            trigger=(90000 * 16 / args.batchsize, 'iteration'))
+
+    if args.resume:
+        serializers.load_npz(args.resume, trainer, strict=False)
+
+    trainer.run()
+
+
+if __name__ == '__main__':
+    main()

From bcd68fac657d25a835d8de823c9f26ace124843b Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 12 Feb 2019 22:04:35 +0900
Subject: [PATCH 002/100] misc

---
 chainercv/links/model/mask_rcnn/mask_head.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py
index 2b2b5c4cbb..fd2d35e763 100644
--- a/chainercv/links/model/mask_rcnn/mask_head.py
+++ b/chainercv/links/model/mask_rcnn/mask_head.py
@@ -147,7 +147,7 @@ def expand_boxes(bbox, scale):
 
 
 def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
-                  mask_size=28):
+                  mask_size):
     xp = cuda.get_array_module(*rois)
 
     n_level = len(rois)
@@ -174,9 +174,8 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
         iou = bbox_iou(mask_roi, gt_bbox)
         gt_index = iou.argmax(axis=1)
         gt_segms[index] = segm_wrt_bbox(
-            gt_mask[gt_index], mask_roi, (M, M))
+            gt_mask[gt_index], mask_roi, (mask_size, mask_size))
 
-    # indices = [(mask_roi_levels == l).nonzero() for l in range(n_level)]
     flag_masks = [mask_roi_levels == l for l in range(n_level)]
     mask_rois = [mask_rois[m] for m in flag_masks]
     mask_roi_indices = [mask_roi_indices[m] for m in flag_masks]

From f1d4e46835de8aea8bc56c73e2b7dcec30477c85 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 13 Feb 2019 15:13:39 +0900
Subject: [PATCH 003/100] doc

---
 docs/source/reference/links.rst           | 11 ++++++
 docs/source/reference/links/mask_rcnn.rst | 45 +++++++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 docs/source/reference/links/mask_rcnn.rst

diff --git a/docs/source/reference/links.rst b/docs/source/reference/links.rst
index 5c7426a97f..6dbf1a1e86 100644
--- a/docs/source/reference/links.rst
+++ b/docs/source/reference/links.rst
@@ -51,6 +51,17 @@ For more details, please read :func:`SegNetBasic.predict`.
    links/segnet
 
 
+Instance Segmentation
+~~~~~~~~~~~~~~~~~~~~~
+
+Instance segmentation links share a common method :meth:`predict` to detect masks that cover objects in an image.
+For more details, please read :func:`MaskRCNN.predict`.
+
+.. toctree::
+
+   links/mask_rcnn
+
+
 Classifiers
 ~~~~~~~~~~~
 
diff --git a/docs/source/reference/links/mask_rcnn.rst b/docs/source/reference/links/mask_rcnn.rst
new file mode 100644
index 0000000000..4c0870e2e5
--- /dev/null
+++ b/docs/source/reference/links/mask_rcnn.rst
@@ -0,0 +1,45 @@
+Mask R-CNN
+==========
+
+.. module:: chainercv.links.model.mask_rcnn
+
+
+Instance Segmentation Links
+---------------------------
+
+MaskRCNNFPNResNet50
+~~~~~~~~~~~~~~~~~~~
+.. autoclass:: MaskRCNNFPNResNet50
+   :members:
+
+MaskRCNNFPNResNet101
+~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: MaskRCNNFPNResNet101
+   :members:
+
+
+Utility
+-------
+
+MaskRCNN
+~~~~~~~~
+.. autoclass:: MaskRCNN
+   :members:
+
+MaskHead
+~~~~~~~~
+.. autoclass:: MaskHead
+   :members:
+   :special-members: __call__
+
+
+Train-only Utility
+------------------
+
+mask_loss_pre
+~~~~~~~~~~~~~
+.. autofunction:: mask_loss_pre
+
+mask_loss_post
+~~~~~~~~~~~~~~
+.. autofunction:: mask_loss_post

From 1e10d3210f7fc59c3ca65b492b78c1521208f921 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 13 Feb 2019 16:06:40 +0900
Subject: [PATCH 004/100] remove unnecessary print

---
 .../testing/assertions/assert_is_instance_segmentation_link.py   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/chainercv/utils/testing/assertions/assert_is_instance_segmentation_link.py b/chainercv/utils/testing/assertions/assert_is_instance_segmentation_link.py
index 1faf7aaf7e..09f55c900c 100644
--- a/chainercv/utils/testing/assertions/assert_is_instance_segmentation_link.py
+++ b/chainercv/utils/testing/assertions/assert_is_instance_segmentation_link.py
@@ -21,7 +21,6 @@ def assert_is_instance_segmentation_link(link, n_fg_class):
         np.random.randint(0, 256, size=(3, 480, 320)).astype(np.float32)]
 
     result = link.predict(imgs)
-    print(result)
     assert len(result) == 3, \
         'Link must return three elements: masks, labels and scores.'
     masks, labels, scores = result

From e468545cb34df7c38cd7b6b6b10d2f28710f88d1 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 13 Feb 2019 16:06:29 +0900
Subject: [PATCH 005/100] misc

---
 chainercv/links/model/mask_rcnn/mask_head.py  |   7 +-
 chainercv/links/model/mask_rcnn/mask_rcnn.py  | 146 ++++++++---
 examples/mask_rcnn/train_multi.py             |  41 ++-
 .../mask_rcnn_tests/test_mask_head.py         | 240 ++++++++++++++++++
 .../mask_rcnn_tests/test_mask_rcnn.py         | 132 ++++++++++
 .../test_mask_rcnn_fpn_resnet.py              |  68 +++++
 6 files changed, 590 insertions(+), 44 deletions(-)
 create mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
 create mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py
 create mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py

diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py
index fd2d35e763..0e98cc50fc 100644
--- a/chainercv/links/model/mask_rcnn/mask_head.py
+++ b/chainercv/links/model/mask_rcnn/mask_head.py
@@ -18,12 +18,12 @@
 
 class MaskHead(chainer.Chain):
 
+    _canonical_level = 2
     _canonical_scale = 224
     _roi_size = 14
     _roi_sample_ratio = 2
     mask_size = _roi_size * 2
 
-    # Remember, initialization is MSRAFill
     def __init__(self, n_class, scales):
         super(MaskHead, self).__init__()
 
@@ -67,13 +67,12 @@ def __call__(self, hs, rois, roi_indices):
         return self.seg(h)
 
     def distribute(self, rois, roi_indices):
-        size = self.xp.sqrt(
-            self.xp.prod(rois[:, 2:] + 1 - rois[:, :2], axis=1))
+        size = self.xp.sqrt(self.xp.prod(rois[:, 2:] - rois[:, :2], axis=1))
         level = self.xp.floor(self.xp.log2(
             size / self._canonical_scale + 1e-6)).astype(np.int32)
         # skip last level
         level = self.xp.clip(
-            level + len(self._scales) // 2, 0, len(self._scales) - 2)
+            level + self._canonical_level, 0, len(self._scales) - 2)
 
         masks = [level == l for l in range(len(self._scales))]
         rois = [rois[mask] for mask in masks]
diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
index 94347b1cdd..0ddc65ce13 100644
--- a/chainercv/links/model/mask_rcnn/mask_rcnn.py
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py
@@ -12,6 +12,39 @@
 
 class MaskRCNN(chainer.Chain):
 
+    """Base class of Mask R-CNN.
+
+    This is a base class of Mask R-CNN [#]_.
+
+    .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017
+
+    Args:
+        extractor (Link): A link that extracts feature maps.
+            This link must have :obj:`scales`, :obj:`mean` and
+            :meth:`__call__`.
+        rpn (Link): A link that has the same interface as
+            :class:`~chainercv.links.model.fpn.RPN`.
+            Please refer to the documentation found there.
+        head (Link): A link that has the same interface as
+            :class:`~chainercv.links.model.fpn.Head`.
+            Please refer to the documentation found there.
+        mask_head (Link): A link that has the same interface as
+            :class:`~chainercv.links.model.mask_rcnn.MaskRCNN`.
+            Please refer to the documentation found there.
+
+    Parameters:
+        nms_thresh (float): The threshold value
+            for :func:`~chainercv.utils.non_maximum_suppression`.
+            The default value is :obj:`0.5`.
+            This value can be changed directly or by using :meth:`use_preset`.
+        score_thresh (float): The threshold value for confidence score.
+            If a bounding box whose confidence score is lower than this value,
+            the bounding box will be suppressed.
+            The default value is :obj:`0.7`.
+            This value can be changed directly or by using :meth:`use_preset`.
+
+    """
+
     _min_size = 800
     _max_size = 1333
     _stride = 32
@@ -27,6 +60,23 @@ def __init__(self, extractor, rpn, head, mask_head):
         self.use_preset('visualize')
 
     def use_preset(self, preset):
+        """Use the given preset during prediction.
+
+        This method changes values of :obj:`nms_thresh` and
+        :obj:`score_thresh`. These values are a threshold value
+        used for non maximum suppression and a threshold value
+        to discard low confidence proposals in :meth:`predict`,
+        respectively.
+
+        If the attributes need to be changed to something
+        other than the values provided in the presets, please modify
+        them by directly accessing the public attributes.
+
+        Args:
+            preset ({'visualize', 'evaluate'}): A string to determine the
+                preset to use.
+        """
+
         if preset == 'visualize':
             self.nms_thresh = 0.5
             self.score_thresh = 0.7
@@ -47,8 +97,34 @@ def __call__(self, x):
         return hs, rois, roi_indices
 
     def predict(self, imgs):
+        """Segment object instances from images.
+
+        This method predicts instance-aware object regions for each image.
+
+        Args:
+            imgs (iterable of numpy.ndarray): Arrays holding images of shape
+                :math:`(B, C, H, W)`.  All images are in CHW and RGB format
+                and the range of their value is :math:`[0, 255]`.
+
+        Returns:
+           tuple of lists:
+           This method returns a tuple of three lists,
+           :obj:`(masks, labels, scores)`.
+
+           * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \
+               where :math:`R` is the number of masks in a image. \
+               Each pixel holds value if it is inside the object inside or not.
+           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
+               Each value indicates the class of the masks. \
+               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
+               number of the foreground classes.
+           * **scores** : A list of float arrays of shape :math:`(R,)`. \
+               Each value indicates how confident the prediction is.
+
+        """
+
         sizes = [img.shape[1:] for img in imgs]
-        x, scales = self.prepare(imgs)
+        x, scales, _ = self.prepare(imgs)
 
         with chainer.using_config('train', False), chainer.no_backprop_mode():
             hs, rois, roi_indices = self(x)
@@ -67,14 +143,15 @@ def predict(self, imgs):
         with chainer.using_config('train', False), chainer.no_backprop_mode():
             segms = F.sigmoid(
                 self.mask_head(hs, mask_rois, mask_roi_indices)).data
-        # Put the order of proposals back to the one used by bbox head
-        # from the ordering respective FPN levels.
+        # Put the order of proposals back to the one used by bbox head.
         segms = segms[order]
-        segms = _flat_to_list(segms, mask_roi_indices_before_reordering)
-        if len(segms) == 0:
-            segms = [
-                self.xp.zeros((0, self.mask_head.mask_size,
-                               self.mask_head.mask_size), dtype=np.float32)]
+        segms = _flat_to_list(
+            segms, mask_roi_indices_before_reordering, len(imgs))
+        segms = [segm if segm is not None else
+                 self.xp.zeros(
+                     (0, self.mask_head.mask_size, self.mask_head.mask_size),
+                     dtype=np.float32)
+                 for segm in segms]
 
         masks = self.mask_head.decode(
             segms,
@@ -87,9 +164,21 @@ def predict(self, imgs):
         return masks, labels, scores
 
     def prepare(self, imgs, masks=None):
+        """Preprocess images.
+
+        Args:
+            imgs (iterable of numpy.ndarray): Arrays holding images.
+                All images are in CHW and RGB format
+                and the range of their value is :math:`[0, 255]`.
+
+        Returns:
+            Two arrays: preprocessed images and \
+            scales that were caluclated in prepocessing.
+
+        """
         scales = []
         resized_imgs = []
-        sizes = []
+        resized_sizes = []
         for img in imgs:
             _, H, W = img.shape
             scale = self._min_size / min(H, W)
@@ -100,34 +189,19 @@ def prepare(self, imgs, masks=None):
             img = transforms.resize(img, (H, W))
             img -= self.extractor.mean
             resized_imgs.append(img)
-            sizes.append((H, W))
+            resized_sizes.append((H, W))
         pad_size = np.array(
             [im.shape[1:] for im in resized_imgs]).max(axis=0)
         pad_size = (
             np.ceil(pad_size / self._stride) * self._stride).astype(int)
-        pad_imgs = np.zeros(
+        x = np.zeros(
             (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32)
         for i, im in enumerate(resized_imgs):
-            _, H, W = img.shape
-            pad_imgs[i, :, :H, :W] = im
-        pad_imgs = self.xp.array(pad_imgs)
-
-        if masks is None:
-            return pad_imgs, scales
-
-        resized_masks = []
-        for size, mask in zip(sizes, masks):
-            resized_masks.append(transforms.resize(
-                mask.astype(np.float32),
-                size, interpolation=PIL.Image.NEAREST).astype(np.bool))
-        pad_masks = []
-        for mask in resized_masks:
-            n_class, H, W = mask.shape
-            pad_mask = self.xp.zeros(
-                (n_class, pad_size[0], pad_size[1]), dtype=np.bool)
-            pad_mask[:, :H, :W] = self.xp.array(mask)
-            pad_masks.append(pad_mask)
-        return pad_imgs, pad_masks, scales
+            _, H, W = im.shape
+            x[i, :, :H, :W] = im
+        x = self.xp.array(x)
+
+        return x, scales, resized_sizes
 
 
 def _list_to_flat(array_list):
@@ -140,8 +214,12 @@ def _list_to_flat(array_list):
     return flat, indices
 
 
-def _flat_to_list(flat, indices):
+def _flat_to_list(flat, indices, B):
     array_list = []
-    for i in np.unique(chainer.backends.cuda.to_cpu(indices)):
-        array_list.append(flat[indices == i])
+    for i in range(B):
+        array = flat[indices == i]
+        if len(array) > 0:
+            array_list.append(array)
+        else:
+            array_list.append(None)
     return array_list
diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index cb68857a93..71957c86e0 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -1,5 +1,7 @@
 import argparse
+import multiprocessing
 import numpy as np
+import PIL
 
 import chainer
 import chainer.links as L
@@ -35,13 +37,29 @@
 class TrainChain(chainer.Chain):
 
     def __init__(self, model):
-        super().__init__()
+        super(TrainChain, self).__init__()
         with self.init_scope():
             self.model = model
 
+    def prepare_mask(self, masks, resized_sizes, pad_size):
+        resized_masks = []
+        for size, mask in zip(resized_sizes, masks):
+            resized_masks.append(transforms.resize(
+                mask.astype(np.float32),
+                size, interpolation=PIL.Image.NEAREST).astype(np.bool))
+        pad_masks = []
+        for mask in resized_masks:
+            n_class, H, W = mask.shape
+            pad_mask = self.xp.zeros(
+                (n_class, pad_size[0], pad_size[1]), dtype=np.bool)
+            pad_mask[:, :H, :W] = self.xp.array(mask)
+            pad_masks.append(pad_mask)
+        return pad_masks
+
     def __call__(self, imgs, masks, labels, bboxes):
-        x, masks, scales = self.model.prepare(imgs, masks)
-        B = len(x)
+        x, scales, resized_sizes = self.model.prepare(imgs, masks)
+        B, _, pad_H, pad_W = x.shape
+        masks = self.prepare_mask(masks, resized_sizes, (pad_H, pad_W))
         bboxes = [self.xp.array(bbox) * scale
                   for bbox, scale in zip(bboxes, scales)]
         labels = [self.xp.array(label) for label in labels]
@@ -125,13 +143,23 @@ def copyparams(dst, src):
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--model', choices=('resnet50', 'resnet101'))
+        '--model', choices=('resnet50', 'resnet101'),
+        default='resnet50')
     parser.add_argument('--batchsize', type=int, default=16)
+    parser.add_argument('--iteration', type=int, default=90000)
+    parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000])
     parser.add_argument('--out', default='result')
     parser.add_argument('--resume')
     parser.add_argument('--communicator', default='hierarchical')
     args = parser.parse_args()
 
+    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
+    if hasattr(multiprocessing, 'set_start_method'):
+        multiprocessing.set_start_method('forkserver')
+        p = multiprocessing.Process()
+        p.start()
+        p.join()
+
     comm = chainermn.create_communicator(args.communicator)
     device = comm.intra_rank
 
@@ -175,10 +203,11 @@ def main():
         if isinstance(link, L.BatchNormalization):
             link.disable_update()
 
+    n_iteration = args.iteration * 16 / args.batchsize
     updater = training.updaters.StandardUpdater(
         train_iter, optimizer, converter=converter, device=device)
     trainer = training.Trainer(
-        updater, (90000 * 16 / args.batchsize, 'iteration'), args.out)
+        updater, (n_iteration, 'iteration'), args.out)
 
     @make_shift('lr')
     def lr_schedule(trainer):
@@ -217,7 +246,7 @@ def lr_schedule(trainer):
         trainer.extend(
             extensions.snapshot_object(
                 model, 'model_iter_{.updater.iteration}'),
-            trigger=(90000 * 16 / args.batchsize, 'iteration'))
+            trigger=(n_iteration, 'iteration'))
 
     if args.resume:
         serializers.load_npz(args.resume, trainer, strict=False)
diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
new file mode 100644
index 0000000000..9c8760f388
--- /dev/null
+++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
@@ -0,0 +1,240 @@
+from __future__ import division
+
+import numpy as np
+import unittest
+
+import chainer
+from chainer import testing
+from chainer.testing import attr
+
+from chainercv.links.model.mask_rcnn import MaskHead
+from chainercv.links.model.mask_rcnn import mask_loss_post
+from chainercv.links.model.mask_rcnn import mask_loss_pre
+
+
+def _random_array(xp, shape):
+    return xp.array(
+        np.random.uniform(-1, 1, size=shape), dtype=np.float32)
+
+
+# @testing.parameterize(
+#     {'n_class': 1 + 1},
+#     {'n_class': 5 + 1},
+#     {'n_class': 20 + 1},
+# )
+# class TestMaskHead(unittest.TestCase):
+# 
+#     def setUp(self):
+#         self.link = MaskHead(
+#             n_class=self.n_class, scales=(1 / 2, 1 / 4, 1 / 8))
+# 
+#     def _check_call(self):
+#         hs = [
+#             chainer.Variable(_random_array(self.link.xp, (2, 64, 32, 32))),
+#             chainer.Variable(_random_array(self.link.xp, (2, 64, 16, 16))),
+#             chainer.Variable(_random_array(self.link.xp, (2, 64, 8, 8))),
+#         ]
+#         rois = [
+#             self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32),
+#             self.link.xp.array(
+#                 ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32),
+#             self.link.xp.array(((10, 4, 12, 10),), dtype=np.float32),
+#         ]
+#         roi_indices = [
+#             self.link.xp.array((0,), dtype=np.int32),
+#             self.link.xp.array((1, 0), dtype=np.int32),
+#             self.link.xp.array((1,), dtype=np.int32),
+#         ]
+# 
+#         segs = self.link(hs, rois, roi_indices)
+# 
+#         self.assertIsInstance(segs, chainer.Variable)
+#         self.assertIsInstance(segs.array, self.link.xp.ndarray)
+#         self.assertEqual(
+#             segs.shape,
+#             (4, self.n_class, self.link.mask_size, self.link.mask_size))
+# 
+#     def test_call_cpu(self):
+#         self._check_call()
+# 
+#     @attr.gpu
+#     def test_call_gpu(self):
+#         self.link.to_gpu()
+#         self._check_call()
+# 
+#     def _check_distribute(self):
+#         rois = self.link.xp.array((
+#             (0, 0, 10, 10),
+#             (0, 1000, 0, 1000),
+#             (0, 0, 224, 224),
+#             (100, 100, 224, 224),
+#         ), dtype=np.float32)
+#         roi_indices = self.link.xp.array((0, 1, 0, 0), dtype=np.int32)
+#         n_roi = len(roi_indices)
+# 
+#         rois, roi_indices, order = self.link.distribute(rois, roi_indices)
+# 
+#         self.assertEqual(len(rois), 3)
+#         self.assertEqual(len(roi_indices), 3)
+#         for l in range(3):
+#             self.assertIsInstance(rois[l], self.link.xp.ndarray)
+#             self.assertIsInstance(roi_indices[l], self.link.xp.ndarray)
+# 
+#             self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0])
+#             self.assertEqual(rois[l].shape[1:], (4,))
+#             self.assertEqual(roi_indices[l].shape[1:], ())
+# 
+#         self.assertEqual(sum(rois[l].shape[0] for l in range(3)), 4)
+# 
+#         self.assertEqual(len(order), n_roi)
+#         self.assertIsInstance(order, self.link.xp.ndarray)
+# 
+#     def test_distribute_cpu(self):
+#         self._check_distribute()
+# 
+#     @attr.gpu
+#     def test_distribute_gpu(self):
+#         self.link.to_gpu()
+#         self._check_distribute()
+# 
+#     def _check_decode(self):
+#         segms = [
+#             _random_array(
+#                 self.link.xp,
+#                 (1, self.n_class, self.link.mask_size, self.link.mask_size)),
+#             _random_array(
+#                 self.link.xp,
+#                 (2, self.n_class, self.link.mask_size, self.link.mask_size)),
+#             _random_array(
+#                 self.link.xp,
+#                 (1, self.n_class, self.link.mask_size, self.link.mask_size))
+#         ]
+#         bboxes = [
+#             self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32),
+#             self.link.xp.array(
+#                 ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32),
+#             self.link.xp.array(((10, 4, 12, 10),), dtype=np.float32),
+#         ]
+#         labels = [
+#             self.link.xp.random.randint(
+#                 0, self.n_class - 1, size=(1,), dtype=np.int32),
+#             self.link.xp.random.randint(
+#                 0, self.n_class - 1, size=(2,), dtype=np.int32),
+#             self.link.xp.random.randint(
+#                 0, self.n_class - 1, size=(1,), dtype=np.int32),
+#         ]
+# 
+#         sizes = [(56, 56), (48, 48), (72, 72)]
+#         masks = self.link.decode(
+#             segms, bboxes, labels, sizes)
+# 
+#         self.assertEqual(len(masks), 3)
+#         for n in range(3):
+#             self.assertIsInstance(masks[n], self.link.xp.ndarray)
+# 
+#             self.assertEqual(masks[n].shape[0], labels[n].shape[0])
+#             self.assertEqual(masks[n].shape[1:], sizes[n])
+# 
+#     def test_decode_cpu(self):
+#         self._check_decode()
+# 
+#     @attr.gpu
+#     def test_decode_gpu(self):
+#         self.link.to_gpu()
+#         self._check_decode()
+# 
+# 
+class TestMaskHeadLoss(unittest.TestCase):
+
+    def _check_mask_loss_pre(self, xp):
+        n_class = 12
+        mask_size = 28
+        rois = [
+            xp.array(((4, 1, 6, 3),), dtype=np.float32),
+            xp.array(
+                ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32),
+            xp.array(((10, 4, 12, 10),), dtype=np.float32),
+        ]
+        roi_indices = [
+            xp.array((0,), dtype=np.int32),
+            xp.array((1, 0), dtype=np.int32),
+            xp.array((1,), dtype=np.int32),
+        ]
+        masks = [
+            _random_array(xp, (n_class, mask_size, mask_size)),
+            _random_array(xp, (n_class, mask_size, mask_size)),
+            _random_array(xp, (n_class, mask_size, mask_size)),
+        ]
+        labels = [
+            xp.array((10, 4), dtype=np.float32),
+            xp.array((1,), dtype=np.float32),
+        ]
+        rois, roi_indices, gt_segms, gt_mask_labels = mask_loss_pre(
+            rois, roi_indices, masks, labels, mask_size)
+
+        self.assertEqual(len(rois), 3)
+        self.assertEqual(len(roi_indices), 3)
+        self.assertEqual(len(gt_segms), 3)
+        self.assertEqual(len(gt_mask_labels), 3)
+        # for l in range(3):
+        #     self.assertIsInstance(rois[l], xp.ndarray)
+        #     self.assertIsInstance(roi_indices[l], xp.ndarray)
+        #     self.assertIsInstance(gt_locs[l], xp.ndarray)
+        #     self.assertIsInstance(gt_labels[l], xp.ndarray)
+
+        #     self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0])
+        #     self.assertEqual(rois[l].shape[0], gt_locs[l].shape[0])
+        #     self.assertEqual(rois[l].shape[0], gt_labels[l].shape[0])
+        #     self.assertEqual(rois[l].shape[1:], (4,))
+        #     self.assertEqual(roi_indices[l].shape[1:], ())
+        #     self.assertEqual(gt_locs[l].shape[1:], (4,))
+        #     self.assertEqual(gt_labels[l].shape[1:], ())
+
+    def test_mask_loss_pre_cpu(self):
+        self._check_mask_loss_pre(np)
+
+    @attr.gpu
+    def test_mask_loss_pre_gpu(self):
+        import cupy
+        self._check_mask_loss_pre(cupy)
+
+    # def _check_head_loss_post(self, xp):
+    #     locs = chainer.Variable(_random_array(xp, (20, 81, 4)))
+    #     confs = chainer.Variable(_random_array(xp, (20, 81)))
+    #     roi_indices = [
+    #         xp.random.randint(0, 2, size=5).astype(np.int32),
+    #         xp.random.randint(0, 2, size=7).astype(np.int32),
+    #         xp.random.randint(0, 2, size=8).astype(np.int32),
+    #     ]
+    #     gt_locs = [
+    #         _random_array(xp, (5, 4)),
+    #         _random_array(xp, (7, 4)),
+    #         _random_array(xp, (8, 4)),
+    #     ]
+    #     gt_labels = [
+    #         xp.random.randint(0, 80, size=5).astype(np.int32),
+    #         xp.random.randint(0, 80, size=7).astype(np.int32),
+    #         xp.random.randint(0, 80, size=8).astype(np.int32),
+    #     ]
+
+    #     loc_loss, conf_loss = head_loss_post(
+    #         locs, confs, roi_indices, gt_locs, gt_labels, 2)
+
+    #     self.assertIsInstance(loc_loss, chainer.Variable)
+    #     self.assertIsInstance(loc_loss.array, xp.ndarray)
+    #     self.assertEqual(loc_loss.shape, ())
+
+    #     self.assertIsInstance(conf_loss, chainer.Variable)
+    #     self.assertIsInstance(conf_loss.array, xp.ndarray)
+    #     self.assertEqual(conf_loss.shape, ())
+
+    # def test_head_loss_post_cpu(self):
+    #     self._check_head_loss_post(np)
+
+    # @attr.gpu
+    # def test_head_loss_post_gpu(self):
+    #     import cupy
+    #     self._check_head_loss_post(cupy)
+
+
+testing.run_module(__name__, __file__)
diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py
new file mode 100644
index 0000000000..637bab61c4
--- /dev/null
+++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py
@@ -0,0 +1,132 @@
+from __future__ import division
+
+import numpy as np
+import unittest
+
+import chainer
+from chainer import testing
+from chainer.testing import attr
+
+from chainercv.links.model.fpn import Head
+from chainercv.links.model.fpn import RPN
+from chainercv.links.model.mask_rcnn import MaskRCNN
+from chainercv.links.model.mask_rcnn import MaskHead
+from chainercv.utils import assert_is_instance_segmentation_link
+
+
+def _random_array(xp, shape):
+    return xp.array(
+        np.random.uniform(-1, 1, size=shape), dtype=np.float32)
+
+
+class DummyExtractor(chainer.Link):
+    scales = (1 / 2, 1 / 4, 1 / 8)
+    mean = _random_array(np, (3, 1, 1))
+    n_channel = 16
+
+    def __call__(self, x):
+        n, _, h, w = x.shape
+        return [chainer.Variable(_random_array(
+                self.xp, (n, self.n_channel, int(h * scale), int(w * scale))))
+                for scale in self.scales]
+
+
+class DummyMaskRCNN(MaskRCNN):
+
+    def __init__(self, n_fg_class):
+        extractor = DummyExtractor()
+        n_class = n_fg_class + 1
+        super(DummyMaskRCNN, self).__init__(
+            extractor=extractor,
+            rpn=RPN(extractor.scales),
+            head=Head(n_class, extractor.scales),
+            mask_head=MaskHead(n_class, extractor.scales)
+        )
+
+
+@testing.parameterize(
+    {'n_fg_class': 1},
+    {'n_fg_class': 5},
+    {'n_fg_class': 20},
+)
+class TestMaskRCNN(unittest.TestCase):
+
+    def setUp(self):
+        self.link = DummyMaskRCNN(n_fg_class=self.n_fg_class)
+
+    def test_use_preset(self):
+        self.link.nms_thresh = 0
+        self.link.score_thresh = 0
+
+        self.link.use_preset('visualize')
+        self.assertEqual(self.link.nms_thresh, 0.5)
+        self.assertEqual(self.link.score_thresh, 0.7)
+
+        self.link.nms_thresh = 0
+        self.link.score_thresh = 0
+
+        self.link.use_preset('evaluate')
+        self.assertEqual(self.link.nms_thresh, 0.5)
+        self.assertEqual(self.link.score_thresh, 0.05)
+
+        with self.assertRaises(ValueError):
+            self.link.use_preset('unknown')
+
+    def _check_call(self):
+        B = 2
+        size = 32
+        x = _random_array(self.link.xp, (B, 3, size, size))
+        with chainer.using_config('train', False):
+            hs, rois, roi_indices = self.link(x)
+
+        self.assertEqual(len(hs), len(self.link.extractor.scales))
+        self.assertEqual(len(rois), len(self.link.extractor.scales))
+        self.assertEqual(len(roi_indices), len(self.link.extractor.scales))
+        for l, scale in enumerate(self.link.extractor.scales):
+            self.assertIsInstance(rois[l], self.link.xp.ndarray)
+            self.assertEqual(rois[l].shape[1:], (4,))
+
+            self.assertIsInstance(roi_indices[l], self.link.xp.ndarray)
+            self.assertEqual(roi_indices[l].shape[1:], ())
+
+            self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0])
+
+            self.assertIsInstance(hs[l], chainer.Variable)
+            self.assertIsInstance(hs[l].array, self.link.xp.ndarray)
+            feat_size = int(size * scale)
+            self.assertEqual(
+                hs[l].shape,
+                (B, self.link.extractor.n_channel, feat_size, feat_size))
+
+    def test_call_cpu(self):
+        self._check_call()
+
+    @attr.gpu
+    def test_call_gpu(self):
+        self.link.to_gpu()
+        self._check_call()
+
+    def test_call_train_mode(self):
+        x = _random_array(self.link.xp, (2, 3, 32, 32))
+        with self.assertRaises(AssertionError):
+            with chainer.using_config('train', True):
+                self.link(x)
+
+    def test_predict_cpu(self):
+        assert_is_instance_segmentation_link(self.link, self.n_fg_class)
+
+    @attr.gpu
+    def test_predict_gpu(self):
+        self.link.to_gpu()
+        assert_is_instance_segmentation_link(self.link, self.n_fg_class)
+
+    def test_prepare(self):
+        imgs = [
+            np.random.randint(0, 255, size=(3, 480, 640)).astype(np.float32),
+            np.random.randint(0, 255, size=(3, 320, 320)).astype(np.float32),
+        ]
+        x, _, _ = self.link.prepare(imgs)
+        self.assertEqual(x.shape, (2, 3, 800, 1088))
+
+
+testing.run_module(__name__, __file__)
diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py
new file mode 100644
index 0000000000..b7cedc364d
--- /dev/null
+++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py
@@ -0,0 +1,68 @@
+import numpy as np
+import unittest
+
+import chainer
+from chainer import testing
+from chainer.testing import attr
+
+from chainercv.links import MaskRCNNFPNResNet101
+from chainercv.links import MaskRCNNFPNResNet50
+
+
+@testing.parameterize(*testing.product({
+    'model': [MaskRCNNFPNResNet50, MaskRCNNFPNResNet101],
+    'n_fg_class': [1, 5, 20],
+}))
+class TestFasterRCNNFPNResNet(unittest.TestCase):
+
+    def setUp(self):
+        self.link = self.model(n_fg_class=self.n_fg_class)
+
+    def _check_call(self):
+        imgs = [
+            np.random.uniform(-1, 1, size=(3, 48, 48)).astype(np.float32),
+            np.random.uniform(-1, 1, size=(3, 32, 64)).astype(np.float32),
+        ]
+        x, _, _ = self.link.prepare(imgs)
+        with chainer.using_config('train', False):
+            self.link(self.link.xp.array(x))
+
+    @attr.slow
+    def test_call_cpu(self):
+        self._check_call()
+
+    @attr.gpu
+    @attr.slow
+    def test_call_gpu(self):
+        self.link.to_gpu()
+        self._check_call()
+
+
+@testing.parameterize(*testing.product({
+    'model': [MaskRCNNFPNResNet50, MaskRCNNFPNResNet101],
+    'n_fg_class': [None, 10, 80],
+    # 'pretrained_model': ['coco', 'imagenet'],
+    'pretrained_model': ['imagenet'],
+}))
+class TestFasterRCNNFPNResNetPretrained(unittest.TestCase):
+
+    @attr.slow
+    def test_pretrained(self):
+        kwargs = {
+            'n_fg_class': self.n_fg_class,
+            'pretrained_model': self.pretrained_model,
+        }
+
+        if self.pretrained_model == 'coco':
+            valid = self.n_fg_class in {None, 80}
+        elif self.pretrained_model == 'imagenet':
+            valid = self.n_fg_class is not None
+
+        if valid:
+            self.model(**kwargs)
+        else:
+            with self.assertRaises(ValueError):
+                self.model(**kwargs)
+
+
+testing.run_module(__name__, __file__)

From 75fb3ffdc4cb9371214acbd29db38d98071dfe82 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 13 Feb 2019 22:41:00 +0900
Subject: [PATCH 006/100] misc

---
 chainercv/links/model/mask_rcnn/mask_head.py  | 132 ++++++-
 chainercv/links/model/mask_rcnn/mask_rcnn.py  |  20 +-
 .../model/mask_rcnn/mask_rcnn_fpn_resnet.py   |  65 ++++
 examples/mask_rcnn/demo.py                    |  21 +-
 examples/mask_rcnn/train_multi.py             |  23 +-
 examples_tests/mask_rcnn_tests/test_demo.sh   |   8 +
 .../mask_rcnn_tests/test_train_multi.sh       |   4 +
 .../mask_rcnn_tests/test_mask_head.py         | 356 +++++++++---------
 8 files changed, 391 insertions(+), 238 deletions(-)
 create mode 100644 examples_tests/mask_rcnn_tests/test_demo.sh
 create mode 100644 examples_tests/mask_rcnn_tests/test_train_multi.sh

diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py
index 0e98cc50fc..7be88c98cb 100644
--- a/chainercv/links/model/mask_rcnn/mask_head.py
+++ b/chainercv/links/model/mask_rcnn/mask_head.py
@@ -18,6 +18,14 @@
 
 class MaskHead(chainer.Chain):
 
+    """Mask Head network of Mask R-CNN.
+
+    Args:
+        n_class (int): The number of classes including background.
+        scales (tuple of floats): The scales of feature maps.
+
+    """
+
     _canonical_level = 2
     _canonical_scale = 224
     _roi_size = 14
@@ -67,6 +75,30 @@ def __call__(self, hs, rois, roi_indices):
         return self.seg(h)
 
     def distribute(self, rois, roi_indices):
+        """Assigns feature levels to Rois based on their size.
+
+        Args:
+            rois (array): An array of shape :math:`(R, 4)`, \
+                where :math:`R` is the total number of RoIs in the given batch.
+            roi_indices (array): An array of shape :math:`(R,)`.
+
+        Returns:
+            two lists and one array:
+            :obj:`out_rois`, :obj:`out_roi_indices` and :obj:`order`.
+
+            * **out_rois**: A list of arrays of shape :math:`(R_l, 4)`, \
+                where :math:`R_l` is the number of RoIs in the :math:`l`-th \
+                feature map.
+            * **out_roi_indices** : A list of arrays of shape :math:`(R_l,)`.
+            * **order**: A correspondence between the output and the input. \
+                The relationship below is satisfied.
+
+            .. code:: python
+
+                xp.concatenate(out_rois, axis=0)[order[i]] == rois[i]
+
+        """
+
         size = self.xp.sqrt(self.xp.prod(rois[:, 2:] - rois[:, :2], axis=1))
         level = self.xp.floor(self.xp.log2(
             size / self._canonical_scale + 1e-6)).astype(np.int32)
@@ -75,18 +107,39 @@ def distribute(self, rois, roi_indices):
             level + self._canonical_level, 0, len(self._scales) - 2)
 
         masks = [level == l for l in range(len(self._scales))]
-        rois = [rois[mask] for mask in masks]
-        roi_indices = [roi_indices[mask] for mask in masks]
+        out_rois = [rois[mask] for mask in masks]
+        out_roi_indices = [roi_indices[mask] for mask in masks]
         order = self.xp.argsort(
             self.xp.concatenate([self.xp.where(mask)[0] for mask in masks]))
-        return rois, roi_indices, order
+        return out_rois, out_roi_indices, order
 
     def decode(self, segms, bboxes, labels, sizes):
-        # CPU is used because cv2.resize only accepts numpy arrays.
-        segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
-        bboxes = [chainer.backends.cuda.to_cpu(bbox) for bbox in bboxes]
-        labels = [chainer.backends.cuda.to_cpu(label) for label in labels]
-
+        """Decodes back to masks.
+
+        Args:
+            segms (iterable of arrays): An iterable of arrays of
+                shape :math:`(R_n, n\_class, M, M)`.
+            bboxes (iterable of arrays): An iterable of arrays of
+                shape :math:`(R_n, 4)`.
+            labels (iterable of arrays): An iterable of arrays of
+                shape :math:`(R_n,)`.
+            sizes (list of tuples of two ints): A list of
+                :math:`(H_n, W_n)`, where :math:`H_n` and :math:`W_n`
+                are height and width of the :math:`n`-th image.
+
+        Returns:
+            list of arrays:
+            This list contains instance segmentation for each image
+            in the batch.
+            More precisely, this is a list of boolean arrays of shape
+            :math:`(R'_n, H_n, W_n)`, where :math:`R'_n` is the number of
+            bounding boxes in the :math:`n`-th image.
+        """
+
+        xp = chainer.backends.cuda.get_array_module(*segms)
+        if xp != np:
+            raise ValueError(
+                'MaskHead.decode only supports numpy inputs for now.')
         masks = []
         # To work around an issue with cv2.resize (it seems to automatically
         # pad with repeated border values), we manually zero-pad the masks by 1
@@ -101,7 +154,7 @@ def decode(self, segms, bboxes, labels, sizes):
             img_H, img_W = size
             mask = np.zeros((len(bbox), img_H, img_W), dtype=np.bool)
 
-            bbox = expand_boxes(bbox, cv2_expand_scale)
+            bbox = _expand_boxes(bbox, cv2_expand_scale)
             for i, (bb, sgm, lbl) in enumerate(zip(bbox, segm, label)):
                 bb = bb.astype(np.int32)
                 padded_mask[1:-1, 1:-1] = sgm[lbl + 1]
@@ -124,7 +177,7 @@ def decode(self, segms, bboxes, labels, sizes):
         return masks
 
 
-def expand_boxes(bbox, scale):
+def _expand_boxes(bbox, scale):
     """Expand an array of boxes by a given scale."""
     xp = chainer.backends.cuda.get_array_module(bbox)
 
@@ -147,6 +200,42 @@ def expand_boxes(bbox, scale):
 
 def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
                   mask_size):
+    """Loss function for Mask Head (pre).
+
+    This function processes RoIs for :func:`mask_loss_post` by
+    selecting RoIs for mask loss calculation and
+    preparing ground truth network output.
+
+    Args:
+        rois (iterable of arrays): An iterable of arrays of
+            shape :math:`(R_l, 4)`, where :math:`R_l` is the number
+            of RoIs in the :math:`l`-th feature map.
+        roi_indices (iterable of arrays): An iterable of arrays of
+            shape :math:`(R_l,)`.
+        gt_masks (iterable of arrays): An iterable of arrays whose shape is
+            :math:`(R_n, H, W)`, where :math:`R_n` is the number of
+            ground truth objects.
+        gt_head_labels (iterable of arrays): An iterable of arrays of
+            shape :math:`(R_l,)`. This is a collection of ground-truth
+            labels assigned to :obj:`rois` during bounding box localization
+            stage. The range of value is :math:`(0, n\_class - 1)`.
+        mask_size (int): Size of the ground truth network output.
+
+    Returns:
+        tuple of four lists:
+        :obj:`mask_rois`, :obj:`mask_roi_indices`,
+        :obj:`gt_segms`, and :obj:`gt_mask_labels`.
+
+        * **rois**: A list of arrays of shape :math:`(R'_l, 4)`, \
+            where :math:`R'_l` is the number of RoIs in the :math:`l`-th \
+            feature map.
+        * **roi_indices**: A list of arrays of shape :math:`(R'_l,)`.
+        * **gt_segms**: A list of arrays of shape :math:`(R'_l, M, M). \
+            :math:`M` is the argument :obj:`mask_size`.
+        * **gt_mask_labels**: A list of arrays of shape :math:`(R'_l,)` \
+            indicating the classes of ground truth.
+    """
+
     xp = cuda.get_array_module(*rois)
 
     n_level = len(rois)
@@ -172,7 +261,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
         mask_roi = mask_rois[index]
         iou = bbox_iou(mask_roi, gt_bbox)
         gt_index = iou.argmax(axis=1)
-        gt_segms[index] = segm_wrt_bbox(
+        gt_segms[index] = _segm_wrt_bbox(
             gt_mask[gt_index], mask_roi, (mask_size, mask_size))
 
     flag_masks = [mask_roi_levels == l for l in range(n_level)]
@@ -185,8 +274,23 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
 
 def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
                    batchsize):
-    # Just compute loss for the foreground class
-    # divide by the batchsize
+    """Loss function for Head (post).
+
+     Args:
+         segms (array): An array whose shape is :math:`(R, n\_class, M, M)`,
+             where :math:`R` is the total number of RoIs in the given batch.
+         mask_roi_indices (array): A list of arrays returned by
+             :func:`mask_loss_pre`.
+         gt_segms (list of arrays): A list of arrays returned by
+             :func:`mask_loss_pre`.
+         gt_mask_labels (list of arrays): A list of arrays returned by
+             :func:`mask_loss_pre`.
+         batchsize (int): The size of batch.
+
+     Returns:
+        chainer.Variable:
+        Mask loss.
+    """
     xp = cuda.get_array_module(segms.array)
 
     mask_roi_indices = xp.hstack(mask_roi_indices).astype(np.int32)
@@ -206,7 +310,7 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
     return mask_loss
 
 
-def segm_wrt_bbox(mask, bbox, size):
+def _segm_wrt_bbox(mask, bbox, size):
     xp = chainer.backends.cuda.get_array_module(mask)
 
     bbox = bbox.astype(np.int32)
diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
index 0ddc65ce13..876ce06060 100644
--- a/chainercv/links/model/mask_rcnn/mask_rcnn.py
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py
@@ -1,7 +1,6 @@
 from __future__ import division
 
 import numpy as np
-import PIL
 
 import chainer
 from chainer.backends import cuda
@@ -133,7 +132,6 @@ def predict(self, imgs):
             rois, roi_indices, head_locs, head_confs,
             scales, sizes, self.nms_thresh, self.score_thresh)
 
-        # Rescale bbox to the scaled resolution
         rescaled_bboxes = [bbox * scale for scale, bbox in zip(scales, bboxes)]
         # Change bboxes to RoI and RoI indices format
         mask_rois_before_reordering, mask_roi_indices_before_reordering =\
@@ -153,13 +151,12 @@ def predict(self, imgs):
                      dtype=np.float32)
                  for segm in segms]
 
-        masks = self.mask_head.decode(
-            segms,
-            [bbox / scale for bbox, scale in zip(rescaled_bboxes, scales)],
-            labels, sizes)
-
-        masks = [cuda.to_cpu(mask) for mask in masks]
-        labels = [cuda.to_cpu(label) for label in labels]
+        segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
+        bboxes = [chainer.backends.cuda.to_cpu(bbox / scale)
+                  for bbox, scale in zip(rescaled_bboxes, scales)]
+        labels = [chainer.backends.cuda.to_cpu(label) for label in labels]
+        # Currently MaskHead only supports numpy inputs
+        masks = self.mask_head.decode(segms, bboxes, labels, sizes)
         scores = [cuda.to_cpu(score) for score in scores]
         return masks, labels, scores
 
@@ -172,8 +169,9 @@ def prepare(self, imgs, masks=None):
                 and the range of their value is :math:`[0, 255]`.
 
         Returns:
-            Two arrays: preprocessed images and \
-            scales that were caluclated in prepocessing.
+            Three arrays: preprocessed images, \
+            scales that were caluclated in prepocessing and
+            the size of the images after resizing.
 
         """
         scales = []
diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
index 2e1b132d42..d18f92f628 100644
--- a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
@@ -17,6 +17,11 @@
 
 class MaskRCNNFPNResNet(MaskRCNN):
 
+    """Base class for Mask R-CNN with ResNet backbone.
+
+    A subclass of this class should have :obj:`_base` and :obj:`_models`.
+    """
+
     def __init__(self, n_fg_class=None, pretrained_model=None):
         param, path = utils.prepare_pretrained_model(
             {'n_fg_class': n_fg_class}, pretrained_model, self._models)
@@ -46,6 +51,36 @@ def __init__(self, n_fg_class=None, pretrained_model=None):
 
 class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
 
+    """Mask R-CNN with ResNet-50.
+
+    This is a model of Mask R-CNN [#]_.
+    This model uses :class:`~chainercv.links.ResNet50` as
+    its base feature extractor.
+
+    .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017
+
+    Args:
+       n_fg_class (int): The number of classes excluding the background.
+       pretrained_model (string): The weight file to be loaded.
+           This can take :obj:`'coco'`, `filepath` or :obj:`None`.
+           The default value is :obj:`None`.
+
+            * :obj:`'coco'`: Load weights trained on train split of \
+                MS COCO 2017. \
+                The weight file is downloaded and cached automatically. \
+                :obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
+            * :obj:`'imagenet'`: Load weights of ResNet-50 trained on \
+                ImageNet. \
+                The weight file is downloaded and cached automatically. \
+                This option initializes weights partially and the rests are \
+                initialized randomly. In this case, :obj:`n_fg_class` \
+                can be set to any number.
+            * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
+                must be specified properly.
+            * :obj:`None`: Do not load weights.
+
+    """
+
     _base = ResNet50
     _models = {
         'coco': {
@@ -58,6 +93,36 @@ class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
 
 class MaskRCNNFPNResNet101(MaskRCNNFPNResNet):
 
+    """Mask R-CNN with ResNet-101.
+
+    This is a model of Mask R-CNN [#]_.
+    This model uses :class:`~chainercv.links.ResNet101` as
+    its base feature extractor.
+
+    .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017
+
+    Args:
+       n_fg_class (int): The number of classes excluding the background.
+       pretrained_model (string): The weight file to be loaded.
+           This can take :obj:`'coco'`, `filepath` or :obj:`None`.
+           The default value is :obj:`None`.
+
+            * :obj:`'coco'`: Load weights trained on train split of \
+                MS COCO 2017. \
+                The weight file is downloaded and cached automatically. \
+                :obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
+            * :obj:`'imagenet'`: Load weights of ResNet-101 trained on \
+                ImageNet. \
+                The weight file is downloaded and cached automatically. \
+                This option initializes weights partially and the rests are \
+                initialized randomly. In this case, :obj:`n_fg_class` \
+                can be set to any number.
+            * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
+                must be specified properly.
+            * :obj:`None`: Do not load weights.
+
+    """
+
     _base = ResNet101
     _models = {
         'coco': {
diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py
index aa4b7adbe4..ef16dbdac0 100644
--- a/examples/mask_rcnn/demo.py
+++ b/examples/mask_rcnn/demo.py
@@ -14,18 +14,22 @@
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('--gpu', type=int, default=-1)
-    parser.add_argument('--model', choices=('resnet50', 'resnet101'))
+    parser.add_argument(
+        '--model',
+        choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
+        default='mask_rcnn_fpn_resnet50'
+    )
     group = parser.add_mutually_exclusive_group()
     group.add_argument('--pretrained-model')
     group.add_argument('--snapshot')
     parser.add_argument('image')
     args = parser.parse_args()
 
-    if args.model == 'resnet50':
+    if args.model == 'mask_rcnn_fpn_resnet50':
         model = MaskRCNNFPNResNet50(
             n_fg_class=len(coco_instance_segmentation_label_names),
             pretrained_model=args.pretrained_model)
-    elif args.model == 'resnet101':
+    elif args.model == 'mask_rcnn_fpn_resnet101':
         model = MaskRCNNFPNResNet101(
             n_fg_class=len(coco_instance_segmentation_label_names),
             pretrained_model=args.pretrained_model)
@@ -35,21 +39,12 @@ def main():
         model.to_gpu()
 
     img = utils.read_image(args.image)
-    # bboxes, masks, labels, scores = model.predict([img])
     masks, labels, scores = model.predict([img])
-    # bbox = bboxes[0]
     mask = masks[0]
     label = labels[0]
     score = scores[0]
-
-    # chainercv.visualizations.vis_bbox(
-    #     img, bbox, label, score, label_names=coco_bbox_label_names)
-
-    import numpy as np
-    # flag = np.array([bb[3] - bb[1] < 300 for bb in bbox], dtype=np.bool)
-    flag = np.ones(len(mask), dtype=np.bool)
     chainercv.visualizations.vis_instance_segmentation(
-        img, mask[flag], label[flag], score[flag],
+        img, mask, label, score,
         label_names=coco_instance_segmentation_label_names)
     plt.show()
 
diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index 71957c86e0..b7ba734910 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -126,25 +126,12 @@ def converter(batch, device=None):
     return tuple(list(v) for v in zip(*batch))
 
 
-def copyparams(dst, src):
-    if isinstance(dst, chainer.Chain):
-        for link in dst.children():
-            copyparams(link, src[link.name])
-    elif isinstance(dst, chainer.ChainList):
-        for i, link in enumerate(dst):
-            copyparams(link, src[i])
-    else:
-        dst.copyparams(src)
-        if isinstance(dst, L.BatchNormalization):
-            dst.avg_mean = src.avg_mean
-            dst.avg_var = src.avg_var
-
-
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--model', choices=('resnet50', 'resnet101'),
-        default='resnet50')
+        '--model',
+        choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
+        default='mask_rcnn_fpn_resnet50')
     parser.add_argument('--batchsize', type=int, default=16)
     parser.add_argument('--iteration', type=int, default=90000)
     parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000])
@@ -163,11 +150,11 @@ def main():
     comm = chainermn.create_communicator(args.communicator)
     device = comm.intra_rank
 
-    if args.model == 'resnet50':
+    if args.model == 'mask_rcnn_fpn_resnet50':
         model = MaskRCNNFPNResNet50(
             n_fg_class=len(coco_instance_segmentation_label_names),
             pretrained_model='imagenet')
-    elif args.model == 'resnet101':
+    elif args.model == 'mask_rcnn_fpn_resnet101':
         model = MaskRCNNFPNResNet101(
             n_fg_class=len(coco_instance_segmentation_label_names),
             pretrained_model='imagenet')
diff --git a/examples_tests/mask_rcnn_tests/test_demo.sh b/examples_tests/mask_rcnn_tests/test_demo.sh
new file mode 100644
index 0000000000..344ae45c19
--- /dev/null
+++ b/examples_tests/mask_rcnn_tests/test_demo.sh
@@ -0,0 +1,8 @@
+cd examples/mask_rcnn
+curl -L https://cloud.githubusercontent.com/assets/2062128/26187667/9cb236da-3bd5-11e7-8bcf-7dbd4302e2dc.jpg \
+     -o sample.jpg
+
+$PYTHON demo.py --model mask_rcnn_fpn_resnet50 sample.jpg
+$PYTHON demo.py --model mask_rcnn_fpn_resnet50 --gpu 0 sample.jpg
+$PYTHON demo.py --model mask_rcnn_fpn_resnet101 sample.jpg
+$PYTHON demo.py --model mask_rcnn_fpn_resnet101 --gpu 0 sample.jpg
diff --git a/examples_tests/mask_rcnn_tests/test_train_multi.sh b/examples_tests/mask_rcnn_tests/test_train_multi.sh
new file mode 100644
index 0000000000..5f5227d2f7
--- /dev/null
+++ b/examples_tests/mask_rcnn_tests/test_train_multi.sh
@@ -0,0 +1,4 @@
+cd examples/mask_rcnn
+
+$MPIEXEC $PYTHON train_multi.py --model mask_rcnn_fpn_resnet50 --batchsize 4 --iteration 9 --step 6 8
+$MPIEXEC $PYTHON train_multi.py --model mask_rcnn_fpn_resnet101 --batchsize 4 --iteration 9 --step 6 8
diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
index 9c8760f388..d1832d1b8b 100644
--- a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
+++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
@@ -17,133 +17,128 @@ def _random_array(xp, shape):
         np.random.uniform(-1, 1, size=shape), dtype=np.float32)
 
 
-# @testing.parameterize(
-#     {'n_class': 1 + 1},
-#     {'n_class': 5 + 1},
-#     {'n_class': 20 + 1},
-# )
-# class TestMaskHead(unittest.TestCase):
-# 
-#     def setUp(self):
-#         self.link = MaskHead(
-#             n_class=self.n_class, scales=(1 / 2, 1 / 4, 1 / 8))
-# 
-#     def _check_call(self):
-#         hs = [
-#             chainer.Variable(_random_array(self.link.xp, (2, 64, 32, 32))),
-#             chainer.Variable(_random_array(self.link.xp, (2, 64, 16, 16))),
-#             chainer.Variable(_random_array(self.link.xp, (2, 64, 8, 8))),
-#         ]
-#         rois = [
-#             self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32),
-#             self.link.xp.array(
-#                 ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32),
-#             self.link.xp.array(((10, 4, 12, 10),), dtype=np.float32),
-#         ]
-#         roi_indices = [
-#             self.link.xp.array((0,), dtype=np.int32),
-#             self.link.xp.array((1, 0), dtype=np.int32),
-#             self.link.xp.array((1,), dtype=np.int32),
-#         ]
-# 
-#         segs = self.link(hs, rois, roi_indices)
-# 
-#         self.assertIsInstance(segs, chainer.Variable)
-#         self.assertIsInstance(segs.array, self.link.xp.ndarray)
-#         self.assertEqual(
-#             segs.shape,
-#             (4, self.n_class, self.link.mask_size, self.link.mask_size))
-# 
-#     def test_call_cpu(self):
-#         self._check_call()
-# 
-#     @attr.gpu
-#     def test_call_gpu(self):
-#         self.link.to_gpu()
-#         self._check_call()
-# 
-#     def _check_distribute(self):
-#         rois = self.link.xp.array((
-#             (0, 0, 10, 10),
-#             (0, 1000, 0, 1000),
-#             (0, 0, 224, 224),
-#             (100, 100, 224, 224),
-#         ), dtype=np.float32)
-#         roi_indices = self.link.xp.array((0, 1, 0, 0), dtype=np.int32)
-#         n_roi = len(roi_indices)
-# 
-#         rois, roi_indices, order = self.link.distribute(rois, roi_indices)
-# 
-#         self.assertEqual(len(rois), 3)
-#         self.assertEqual(len(roi_indices), 3)
-#         for l in range(3):
-#             self.assertIsInstance(rois[l], self.link.xp.ndarray)
-#             self.assertIsInstance(roi_indices[l], self.link.xp.ndarray)
-# 
-#             self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0])
-#             self.assertEqual(rois[l].shape[1:], (4,))
-#             self.assertEqual(roi_indices[l].shape[1:], ())
-# 
-#         self.assertEqual(sum(rois[l].shape[0] for l in range(3)), 4)
-# 
-#         self.assertEqual(len(order), n_roi)
-#         self.assertIsInstance(order, self.link.xp.ndarray)
-# 
-#     def test_distribute_cpu(self):
-#         self._check_distribute()
-# 
-#     @attr.gpu
-#     def test_distribute_gpu(self):
-#         self.link.to_gpu()
-#         self._check_distribute()
-# 
-#     def _check_decode(self):
-#         segms = [
-#             _random_array(
-#                 self.link.xp,
-#                 (1, self.n_class, self.link.mask_size, self.link.mask_size)),
-#             _random_array(
-#                 self.link.xp,
-#                 (2, self.n_class, self.link.mask_size, self.link.mask_size)),
-#             _random_array(
-#                 self.link.xp,
-#                 (1, self.n_class, self.link.mask_size, self.link.mask_size))
-#         ]
-#         bboxes = [
-#             self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32),
-#             self.link.xp.array(
-#                 ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32),
-#             self.link.xp.array(((10, 4, 12, 10),), dtype=np.float32),
-#         ]
-#         labels = [
-#             self.link.xp.random.randint(
-#                 0, self.n_class - 1, size=(1,), dtype=np.int32),
-#             self.link.xp.random.randint(
-#                 0, self.n_class - 1, size=(2,), dtype=np.int32),
-#             self.link.xp.random.randint(
-#                 0, self.n_class - 1, size=(1,), dtype=np.int32),
-#         ]
-# 
-#         sizes = [(56, 56), (48, 48), (72, 72)]
-#         masks = self.link.decode(
-#             segms, bboxes, labels, sizes)
-# 
-#         self.assertEqual(len(masks), 3)
-#         for n in range(3):
-#             self.assertIsInstance(masks[n], self.link.xp.ndarray)
-# 
-#             self.assertEqual(masks[n].shape[0], labels[n].shape[0])
-#             self.assertEqual(masks[n].shape[1:], sizes[n])
-# 
-#     def test_decode_cpu(self):
-#         self._check_decode()
-# 
-#     @attr.gpu
-#     def test_decode_gpu(self):
-#         self.link.to_gpu()
-#         self._check_decode()
-# 
-# 
+@testing.parameterize(
+    {'n_class': 1 + 1},
+    {'n_class': 5 + 1},
+    {'n_class': 20 + 1},
+)
+class TestMaskHead(unittest.TestCase):
+
+    def setUp(self):
+        self.link = MaskHead(
+            n_class=self.n_class, scales=(1 / 2, 1 / 4, 1 / 8))
+
+    def _check_call(self):
+        hs = [
+            chainer.Variable(_random_array(self.link.xp, (2, 64, 32, 32))),
+            chainer.Variable(_random_array(self.link.xp, (2, 64, 16, 16))),
+            chainer.Variable(_random_array(self.link.xp, (2, 64, 8, 8))),
+        ]
+        rois = [
+            self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32),
+            self.link.xp.array(
+                ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32),
+            self.link.xp.array(((10, 4, 12, 10),), dtype=np.float32),
+        ]
+        roi_indices = [
+            self.link.xp.array((0,), dtype=np.int32),
+            self.link.xp.array((1, 0), dtype=np.int32),
+            self.link.xp.array((1,), dtype=np.int32),
+        ]
+
+        segs = self.link(hs, rois, roi_indices)
+
+        self.assertIsInstance(segs, chainer.Variable)
+        self.assertIsInstance(segs.array, self.link.xp.ndarray)
+        self.assertEqual(
+            segs.shape,
+            (4, self.n_class, self.link.mask_size, self.link.mask_size))
+
+    def test_call_cpu(self):
+        self._check_call()
+
+    @attr.gpu
+    def test_call_gpu(self):
+        self.link.to_gpu()
+        self._check_call()
+
+    def _check_distribute(self):
+        rois = self.link.xp.array((
+            (0, 0, 10, 10),
+            (0, 1000, 0, 1000),
+            (0, 0, 224, 224),
+            (100, 100, 224, 224),
+        ), dtype=np.float32)
+        roi_indices = self.link.xp.array((0, 1, 0, 0), dtype=np.int32)
+        n_roi = len(roi_indices)
+
+        rois, roi_indices, order = self.link.distribute(rois, roi_indices)
+
+        self.assertEqual(len(rois), 3)
+        self.assertEqual(len(roi_indices), 3)
+        for l in range(3):
+            self.assertIsInstance(rois[l], self.link.xp.ndarray)
+            self.assertIsInstance(roi_indices[l], self.link.xp.ndarray)
+
+            self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0])
+            self.assertEqual(rois[l].shape[1:], (4,))
+            self.assertEqual(roi_indices[l].shape[1:], ())
+
+        self.assertEqual(sum(rois[l].shape[0] for l in range(3)), 4)
+
+        self.assertEqual(len(order), n_roi)
+        self.assertIsInstance(order, self.link.xp.ndarray)
+
+    def test_distribute_cpu(self):
+        self._check_distribute()
+
+    @attr.gpu
+    def test_distribute_gpu(self):
+        self.link.to_gpu()
+        self._check_distribute()
+
+    def _check_decode(self):
+        segms = [
+            _random_array(
+                self.link.xp,
+                (1, self.n_class, self.link.mask_size, self.link.mask_size)),
+            _random_array(
+                self.link.xp,
+                (2, self.n_class, self.link.mask_size, self.link.mask_size)),
+            _random_array(
+                self.link.xp,
+                (1, self.n_class, self.link.mask_size, self.link.mask_size))
+        ]
+        bboxes = [
+            self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32),
+            self.link.xp.array(
+                ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32),
+            self.link.xp.array(((10, 4, 12, 10),), dtype=np.float32),
+        ]
+        labels = [
+            self.link.xp.random.randint(
+                0, self.n_class - 1, size=(1,), dtype=np.int32),
+            self.link.xp.random.randint(
+                0, self.n_class - 1, size=(2,), dtype=np.int32),
+            self.link.xp.random.randint(
+                0, self.n_class - 1, size=(1,), dtype=np.int32),
+        ]
+
+        sizes = [(56, 56), (48, 48), (72, 72)]
+        masks = self.link.decode(
+            segms, bboxes, labels, sizes)
+
+        self.assertEqual(len(masks), 3)
+        for n in range(3):
+            self.assertIsInstance(masks[n], self.link.xp.ndarray)
+
+            self.assertEqual(masks[n].shape[0], labels[n].shape[0])
+            self.assertEqual(masks[n].shape[1:], sizes[n])
+
+    def test_decode_cpu(self):
+        self._check_decode()
+
+
 class TestMaskHeadLoss(unittest.TestCase):
 
     def _check_mask_loss_pre(self, xp):
@@ -161,13 +156,14 @@ def _check_mask_loss_pre(self, xp):
             xp.array((1,), dtype=np.int32),
         ]
         masks = [
-            _random_array(xp, (n_class, mask_size, mask_size)),
-            _random_array(xp, (n_class, mask_size, mask_size)),
-            _random_array(xp, (n_class, mask_size, mask_size)),
+            _random_array(xp, (n_class, 60, 70)),
+            _random_array(xp, (n_class, 60, 70)),
+            _random_array(xp, (n_class, 60, 70)),
         ]
         labels = [
-            xp.array((10, 4), dtype=np.float32),
-            xp.array((1,), dtype=np.float32),
+            xp.array((10, 4), dtype=np.int32),
+            xp.array((1,), dtype=np.int32),
+            xp.array((3,), dtype=np.int32),
         ]
         rois, roi_indices, gt_segms, gt_mask_labels = mask_loss_pre(
             rois, roi_indices, masks, labels, mask_size)
@@ -176,19 +172,19 @@ def _check_mask_loss_pre(self, xp):
         self.assertEqual(len(roi_indices), 3)
         self.assertEqual(len(gt_segms), 3)
         self.assertEqual(len(gt_mask_labels), 3)
-        # for l in range(3):
-        #     self.assertIsInstance(rois[l], xp.ndarray)
-        #     self.assertIsInstance(roi_indices[l], xp.ndarray)
-        #     self.assertIsInstance(gt_locs[l], xp.ndarray)
-        #     self.assertIsInstance(gt_labels[l], xp.ndarray)
-
-        #     self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0])
-        #     self.assertEqual(rois[l].shape[0], gt_locs[l].shape[0])
-        #     self.assertEqual(rois[l].shape[0], gt_labels[l].shape[0])
-        #     self.assertEqual(rois[l].shape[1:], (4,))
-        #     self.assertEqual(roi_indices[l].shape[1:], ())
-        #     self.assertEqual(gt_locs[l].shape[1:], (4,))
-        #     self.assertEqual(gt_labels[l].shape[1:], ())
+        for l in range(3):
+            self.assertIsInstance(rois[l], xp.ndarray)
+            self.assertIsInstance(roi_indices[l], xp.ndarray)
+            self.assertIsInstance(gt_segms[l], xp.ndarray)
+            self.assertIsInstance(gt_mask_labels[l], xp.ndarray)
+
+            self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0])
+            self.assertEqual(rois[l].shape[0], gt_segms[l].shape[0])
+            self.assertEqual(rois[l].shape[0], gt_mask_labels[l].shape[0])
+            self.assertEqual(rois[l].shape[1:], (4,))
+            self.assertEqual(roi_indices[l].shape[1:], ())
+            self.assertEqual(gt_segms[l].shape[1:], (mask_size, mask_size))
+            self.assertEqual(gt_mask_labels[l].shape[1:], ())
 
     def test_mask_loss_pre_cpu(self):
         self._check_mask_loss_pre(np)
@@ -198,43 +194,39 @@ def test_mask_loss_pre_gpu(self):
         import cupy
         self._check_mask_loss_pre(cupy)
 
-    # def _check_head_loss_post(self, xp):
-    #     locs = chainer.Variable(_random_array(xp, (20, 81, 4)))
-    #     confs = chainer.Variable(_random_array(xp, (20, 81)))
-    #     roi_indices = [
-    #         xp.random.randint(0, 2, size=5).astype(np.int32),
-    #         xp.random.randint(0, 2, size=7).astype(np.int32),
-    #         xp.random.randint(0, 2, size=8).astype(np.int32),
-    #     ]
-    #     gt_locs = [
-    #         _random_array(xp, (5, 4)),
-    #         _random_array(xp, (7, 4)),
-    #         _random_array(xp, (8, 4)),
-    #     ]
-    #     gt_labels = [
-    #         xp.random.randint(0, 80, size=5).astype(np.int32),
-    #         xp.random.randint(0, 80, size=7).astype(np.int32),
-    #         xp.random.randint(0, 80, size=8).astype(np.int32),
-    #     ]
-
-    #     loc_loss, conf_loss = head_loss_post(
-    #         locs, confs, roi_indices, gt_locs, gt_labels, 2)
-
-    #     self.assertIsInstance(loc_loss, chainer.Variable)
-    #     self.assertIsInstance(loc_loss.array, xp.ndarray)
-    #     self.assertEqual(loc_loss.shape, ())
-
-    #     self.assertIsInstance(conf_loss, chainer.Variable)
-    #     self.assertIsInstance(conf_loss.array, xp.ndarray)
-    #     self.assertEqual(conf_loss.shape, ())
-
-    # def test_head_loss_post_cpu(self):
-    #     self._check_head_loss_post(np)
-
-    # @attr.gpu
-    # def test_head_loss_post_gpu(self):
-    #     import cupy
-    #     self._check_head_loss_post(cupy)
+    def _check_head_loss_post(self, xp):
+        B = 2
+        segms = chainer.Variable(_random_array(xp, (20, 81, 28, 28)))
+        mask_roi_indices = [
+            xp.random.randint(0, B, size=5).astype(np.int32),
+            xp.random.randint(0, B, size=7).astype(np.int32),
+            xp.random.randint(0, B, size=8).astype(np.int32),
+        ]
+        gt_segms = [
+            _random_array(xp, (5, 28, 28)),
+            _random_array(xp, (7, 28, 28)),
+            _random_array(xp, (8, 28, 28)),
+        ]
+        gt_mask_labels = [
+            xp.random.randint(0, 80, size=5).astype(np.int32),
+            xp.random.randint(0, 80, size=7).astype(np.int32),
+            xp.random.randint(0, 80, size=8).astype(np.int32),
+        ]
+
+        mask_loss = mask_loss_post(
+            segms, mask_roi_indices, gt_segms, gt_mask_labels, B)
+
+        self.assertIsInstance(mask_loss, chainer.Variable)
+        self.assertIsInstance(mask_loss.array, xp.ndarray)
+        self.assertEqual(mask_loss.shape, ())
+
+    def test_head_loss_post_cpu(self):
+        self._check_head_loss_post(np)
+
+    @attr.gpu
+    def test_head_loss_post_gpu(self):
+        import cupy
+        self._check_head_loss_post(cupy)
 
 
 testing.run_module(__name__, __file__)

From e19c8d03f0fc289b6e082c43a705f0c6f925b466 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 14 Feb 2019 10:33:46 +0900
Subject: [PATCH 007/100] COCOPointDataset

---
 chainercv/datasets/__init__.py                |   2 +
 chainercv/datasets/coco/coco_point_dataset.py | 111 ++++++++++++++++++
 chainercv/datasets/coco/coco_utils.py         |  21 ++++
 3 files changed, 134 insertions(+)
 create mode 100644 chainercv/datasets/coco/coco_point_dataset.py

diff --git a/chainercv/datasets/__init__.py b/chainercv/datasets/__init__.py
index 1370b54c75..c2ca52af4d 100644
--- a/chainercv/datasets/__init__.py
+++ b/chainercv/datasets/__init__.py
@@ -12,9 +12,11 @@
 from chainercv.datasets.cityscapes.cityscapes_utils import cityscapes_semantic_segmentation_label_names  # NOQA
 from chainercv.datasets.coco.coco_bbox_dataset import COCOBboxDataset  # NOQA
 from chainercv.datasets.coco.coco_instance_segmentation_dataset import COCOInstanceSegmentationDataset  # NOQA
+from chainercv.datasets.coco.coco_point_dataset import COCOPointDataset  # NOQA
 from chainercv.datasets.coco.coco_semantic_segmentation_dataset import COCOSemanticSegmentationDataset  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_bbox_label_names  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_instance_segmentation_label_names  # NOQA
+from chainercv.datasets.coco.coco_utils import coco_point_names  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_colors  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_names  # NOQA
 from chainercv.datasets.cub.cub_label_dataset import CUBLabelDataset  # NOQA
diff --git a/chainercv/datasets/coco/coco_point_dataset.py b/chainercv/datasets/coco/coco_point_dataset.py
new file mode 100644
index 0000000000..6438ef0bf2
--- /dev/null
+++ b/chainercv/datasets/coco/coco_point_dataset.py
@@ -0,0 +1,111 @@
+from collections import defaultdict
+import json
+import numpy as np
+import os
+
+from chainercv.chainer_experimental.datasets.sliceable import GetterDataset
+from chainercv.datasets.coco.coco_instances_base_dataset import \
+    COCOInstancesBaseDataset
+from chainercv.datasets.coco.coco_utils import get_coco
+from chainercv import utils
+
+
+class COCOPointDataset(GetterDataset):
+
+    def __init__(self, data_dir='auto', split='train', year='2017',
+                 use_crowded=False, return_area=False, return_crowded=False):
+        super(COCOPointDataset, self).__init__()
+        self.use_crowded = use_crowded
+        if data_dir == 'auto':
+            data_dir = get_coco(split, split, year, 'instances')
+
+        self.img_root = os.path.join(
+            data_dir, 'images', '{}{}'.format(split, year))
+        self.data_dir = data_dir
+
+        point_anno_path = os.path.join(
+            self.data_dir, 'annotations', 'person_keypoints_{}{}.json'.format(
+                split, year))
+        annos = json.load(open(point_anno_path, 'r'))
+
+        self.id_to_prop = {}
+        for prop in annos['images']:
+            self.id_to_prop[prop['id']] = prop
+        self.ids = sorted(list(self.id_to_prop.keys()))
+
+        self.cat_ids = [cat['id'] for cat in annos['categories']]
+
+        self.id_to_anno = defaultdict(list)
+        for anno in annos['annotations']:
+            self.id_to_anno[anno['image_id']].append(anno)
+
+        self.add_getter('img', self._get_image)
+        self.add_getter(
+            ['point', 'bbox', 'label', 'area', 'crowded'],
+            self._get_annotations)
+        keys = ('img', 'point', 'bbox', 'label')
+        if return_area:
+            keys += ('area',)
+        if return_crowded:
+            keys += ('crowded',)
+        self.keys = keys
+
+    def __len__(self):
+        return len(self.ids)
+
+    def _get_image(self, i):
+        img_path = os.path.join(
+            self.img_root, self.id_to_prop[self.ids[i]]['file_name'])
+        img = utils.read_image(img_path, dtype=np.float32, color=True)
+        return img
+
+    def _get_annotations(self, i):
+        # List[{'segmentation', 'area', 'iscrowd',
+        #       'image_id', 'bbox', 'category_id', 'id'}]
+        annotation = self.id_to_anno[self.ids[i]]
+        bbox = np.array([ann['bbox'] for ann in annotation],
+                        dtype=np.float32)
+        if len(bbox) == 0:
+            bbox = np.zeros((0, 4), dtype=np.float32)
+        # (x, y, width, height)  -> (x_min, y_min, x_max, y_max)
+        bbox[:, 2] = bbox[:, 0] + bbox[:, 2]
+        bbox[:, 3] = bbox[:, 1] + bbox[:, 3]
+        # (x_min, y_min, x_max, y_max) -> (y_min, x_min, y_max, x_max)
+        bbox = bbox[:, [1, 0, 3, 2]]
+
+        label = np.array([self.cat_ids.index(ann['category_id'])
+                          for ann in annotation], dtype=np.int32)
+
+        area = np.array([ann['area']
+                         for ann in annotation], dtype=np.float32)
+
+        crowded = np.array([ann['iscrowd']
+                            for ann in annotation], dtype=np.bool)
+
+        point = np.array(
+            [anno['keypoints'] for anno in annotation], dtype=np.float32)
+        if len(point) > 0:
+            x = point[:, 0::3]
+            y = point[:, 1::3]
+            # 0: not labeled; 1: labeled, not inside mask;
+            # 2: labeled and inside mask
+            v = point[:, 2::3]
+            point = np.stack((y, x, v), axis=2)
+        else:
+            point = np.array((0, 0, 3), dtype=np.float32)
+
+        # Remove invalid boxes
+        bbox_area = np.prod(bbox[:, 2:] - bbox[:, :2], axis=1)
+        keep_mask = np.logical_and(bbox[:, 0] <= bbox[:, 2],
+                                   bbox[:, 1] <= bbox[:, 3])
+        keep_mask = np.logical_and(keep_mask, bbox_area > 0)
+
+        if not self.use_crowded:
+            keep_mask = np.logical_and(keep_mask, np.logical_not(crowded))
+
+        point = point[keep_mask]
+        bbox = bbox[keep_mask]
+        label = label[keep_mask]
+        area = area[keep_mask]
+        crowded = crowded[keep_mask]
+        return point, bbox, label, area, crowded
diff --git a/chainercv/datasets/coco/coco_utils.py b/chainercv/datasets/coco/coco_utils.py
index cf1a6e195e..10841d567a 100644
--- a/chainercv/datasets/coco/coco_utils.py
+++ b/chainercv/datasets/coco/coco_utils.py
@@ -439,3 +439,24 @@ def get_coco(split, img_split, year, mode):
 
 
 coco_instance_segmentation_label_names = coco_bbox_label_names
+
+
+coco_point_names = [
+    'nose',
+    'left_eye',
+    'right_eye',
+    'left_ear',
+    'right_ear',
+    'left_shoulder',
+    'right_shoulder',
+    'left_elbow',
+    'right_elbow',
+    'left_wrist',
+    'right_wrist',
+    'left_hip',
+    'right_hip',
+    'left_knee',
+    'right_knee',
+    'left_ankle',
+    'right_ankle'
+]

From 1d97870bb9ed4c4e6c59c0be6d56da890fcd4d0c Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Feb 2019 16:31:25 +0900
Subject: [PATCH 008/100] add vis_coco_point

---
 chainercv/visualizations/__init__.py       |   1 +
 chainercv/visualizations/vis_coco_point.py | 118 +++++++++++++++++++++
 2 files changed, 119 insertions(+)
 create mode 100644 chainercv/visualizations/vis_coco_point.py

diff --git a/chainercv/visualizations/__init__.py b/chainercv/visualizations/__init__.py
index 2adf9f7ba8..33ef5a9d1f 100644
--- a/chainercv/visualizations/__init__.py
+++ b/chainercv/visualizations/__init__.py
@@ -1,4 +1,5 @@
 from chainercv.visualizations.vis_bbox import vis_bbox  # NOQA
+from chainercv.visualizations.vis_coco_point import vis_coco_point  # NOQA
 from chainercv.visualizations.vis_image import vis_image  # NOQA
 from chainercv.visualizations.vis_instance_segmentation import vis_instance_segmentation  # NOQA
 from chainercv.visualizations.vis_point import vis_point  # NOQA
diff --git a/chainercv/visualizations/vis_coco_point.py b/chainercv/visualizations/vis_coco_point.py
new file mode 100644
index 0000000000..438ff278e9
--- /dev/null
+++ b/chainercv/visualizations/vis_coco_point.py
@@ -0,0 +1,118 @@
+from __future__ import division
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from chainercv.datasets import coco_point_names
+from chainercv.visualizations.vis_image import vis_image
+
+
+coco_point_skeleton = [
+    [coco_point_names.index('left_eye'),
+     coco_point_names.index('right_eye')],
+    [coco_point_names.index('left_eye'),
+     coco_point_names.index('nose')],
+    [coco_point_names.index('right_eye'),
+     coco_point_names.index('nose')],
+    [coco_point_names.index('right_eye'),
+     coco_point_names.index('right_ear')],
+    [coco_point_names.index('left_eye'),
+     coco_point_names.index('left_ear')],
+    [coco_point_names.index('right_shoulder'),
+     coco_point_names.index('right_elbow')],
+    [coco_point_names.index('right_elbow'),
+     coco_point_names.index('right_wrist')],
+    [coco_point_names.index('left_shoulder'),
+     coco_point_names.index('left_elbow')],
+    [coco_point_names.index('left_elbow'),
+     coco_point_names.index('left_wrist')],
+    [coco_point_names.index('right_hip'),
+     coco_point_names.index('right_knee')],
+    [coco_point_names.index('right_knee'),
+     coco_point_names.index('right_ankle')],
+    [coco_point_names.index('left_hip'),
+     coco_point_names.index('left_knee')],
+    [coco_point_names.index('left_knee'),
+     coco_point_names.index('left_ankle')],
+    [coco_point_names.index('right_shoulder'),
+     coco_point_names.index('left_shoulder')],
+    [coco_point_names.index('right_hip'),
+     coco_point_names.index('left_hip')]
+]
+
+
+def vis_coco_point(img, point, point_score, thresh=2, ax=None):
+    from matplotlib import pyplot as plt
+
+    # Returns newly instantiated matplotlib.axes.Axes object if ax is None
+    ax = vis_image(img, ax=ax)
+
+    cmap = plt.get_cmap('rainbow')
+    colors = [cmap(i) for i in np.linspace(0, 1, len(coco_point_skeleton) + 2)]
+
+    # plt.autoscale(False)
+    for i in range(len(point)):
+        pnt = point[i]
+        pnt_sc = point_score[i]
+        for l in range(len(coco_point_skeleton)):
+            i0 = coco_point_skeleton[l][0]
+            i1 = coco_point_skeleton[l][1]
+            s0 = pnt_sc[i0]
+            y0 = pnt[i0, 0]
+            x0 = pnt[i0, 1]
+            s1 = pnt_sc[i1]
+            y1 = pnt[i1, 0]
+            x1 = pnt[i1, 1]
+            if s0 > thresh and s1 > thresh:
+                line = ax.plot([x0, x1], [y0, y1])
+                plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7)
+            if s0 > thresh:
+                ax.plot(
+                    x0, y0, '.', color=colors[l],
+                    markersize=3.0, alpha=0.7)
+            if s1 > thresh:
+                ax.plot(
+                    x1, y1, '.', color=colors[l],
+                    markersize=3.0, alpha=0.7)
+
+        # for better visualization, add mid shoulder / mid hip
+        mid_shoulder = (
+            pnt[coco_point_names.index('right_shoulder'), :2] +
+            pnt[coco_point_names.index('left_shoulder'), :2]) / 2
+        mid_shoulder_sc = np.minimum(
+            pnt[coco_point_names.index('right_shoulder'), 2],
+            pnt[coco_point_names.index('left_shoulder'), 2])
+
+        mid_hip = (
+            pnt[coco_point_names.index('right_hip'), :2] +
+            pnt[coco_point_names.index('left_hip'), :2]) / 2
+        mid_hip_sc = np.minimum(
+            pnt[coco_point_names.index('right_hip'), 2],
+            pnt[coco_point_names.index('left_hip'), 2])
+        if (mid_shoulder_sc > thresh and
+                pnt[coco_point_names.index('nose'), 2] > thresh):
+            y = [mid_shoulder[0], pnt[coco_point_names.index('nose'), 0]]
+            x = [mid_shoulder[1], pnt[coco_point_names.index('nose'), 1]]
+            line = ax.plot(x, y)
+            plt.setp(
+                line, color=colors[len(coco_point_skeleton)],
+                linewidth=1.0, alpha=0.7)
+        if (mid_shoulder_sc > thresh and mid_hip_sc > thresh):
+            y = [mid_shoulder[0], mid_hip[0]]
+            x = [mid_shoulder[1], mid_hip[1]]
+            line = ax.plot(x, y)
+            plt.setp(
+                line, color=colors[len(coco_point_skeleton) + 1],
+                linewidth=1.0, alpha=0.7)
+
+    return ax
+
+
+if __name__ == '__main__':
+    data = np.load('vis_point.npz')
+    img = data['img']
+    point = data['point']
+    point_score = data['point_score']
+    # plt.imshow(img)
+    vis_coco_point(img, point, point_score)
+    plt.show()

From 43e8acca63eec94ffcfe9e0ad12912876da7df96 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Feb 2019 17:51:39 +0900
Subject: [PATCH 009/100] handle the case when #RoI is 0

---
 examples/mask_rcnn/train_multi.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index b7ba734910..c8e030d1a9 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -93,19 +93,23 @@ def __call__(self, imgs, masks, labels, bboxes):
         mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre(
             rois, roi_indices, masks, head_gt_labels,
             self.model.mask_head.mask_size)
-        segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
-        mask_loss = mask_loss_post(
-            segms, mask_roi_indices, gt_segms, gt_mask_labels, B)
-
-        loss = (rpn_loc_loss + rpn_conf_loss +
+        n_roi = sum([len(roi) for roi in mask_rois])
+        if n_roi > 0:
+            segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
+            mask_loss = mask_loss_post(
+                segms, mask_roi_indices, gt_segms, gt_mask_labels, B)
+            loss = (rpn_loc_loss + rpn_conf_loss + 
                 head_loc_loss + head_conf_loss + mask_loss)
-        chainer.reporter.report({
-            'loss': loss,
-            'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss,
-            'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss,
-            'loss/mask': mask_loss},
-            self)
-
+            chainer.reporter.report({
+                'loss': loss,
+                'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss,
+                'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss,
+                'loss/mask': mask_loss},
+                self)
+        else:
+            # ChainerMN hangs when a subset of nodes has a different
+            # computational graph from the rest.
+            loss = chainer.Variable(self.xp.array(0, dtype=np.float32))
         return loss
 
 

From cb2ad16719bf9bdd00a88cf1a23af701fdc08039 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sat, 16 Feb 2019 14:43:14 +0900
Subject: [PATCH 010/100] fix some bug

---
 chainercv/visualizations/vis_coco_point.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/chainercv/visualizations/vis_coco_point.py b/chainercv/visualizations/vis_coco_point.py
index 438ff278e9..9666bbbfe6 100644
--- a/chainercv/visualizations/vis_coco_point.py
+++ b/chainercv/visualizations/vis_coco_point.py
@@ -51,9 +51,7 @@ def vis_coco_point(img, point, point_score, thresh=2, ax=None):
     colors = [cmap(i) for i in np.linspace(0, 1, len(coco_point_skeleton) + 2)]
 
     # plt.autoscale(False)
-    for i in range(len(point)):
-        pnt = point[i]
-        pnt_sc = point_score[i]
+    for pnt, pnt_sc in zip(point, point_score):
         for l in range(len(coco_point_skeleton)):
             i0 = coco_point_skeleton[l][0]
             i1 = coco_point_skeleton[l][1]
@@ -80,17 +78,17 @@ def vis_coco_point(img, point, point_score, thresh=2, ax=None):
             pnt[coco_point_names.index('right_shoulder'), :2] +
             pnt[coco_point_names.index('left_shoulder'), :2]) / 2
         mid_shoulder_sc = np.minimum(
-            pnt[coco_point_names.index('right_shoulder'), 2],
-            pnt[coco_point_names.index('left_shoulder'), 2])
+            pnt_sc[coco_point_names.index('right_shoulder')],
+            pnt_sc[coco_point_names.index('left_shoulder')])
 
         mid_hip = (
             pnt[coco_point_names.index('right_hip'), :2] +
             pnt[coco_point_names.index('left_hip'), :2]) / 2
         mid_hip_sc = np.minimum(
-            pnt[coco_point_names.index('right_hip'), 2],
-            pnt[coco_point_names.index('left_hip'), 2])
+            pnt_sc[coco_point_names.index('right_hip')],
+            pnt_sc[coco_point_names.index('left_hip')])
         if (mid_shoulder_sc > thresh and
-                pnt[coco_point_names.index('nose'), 2] > thresh):
+                pnt_sc[coco_point_names.index('nose')] > thresh):
             y = [mid_shoulder[0], pnt[coco_point_names.index('nose'), 0]]
             x = [mid_shoulder[1], pnt[coco_point_names.index('nose'), 1]]
             line = ax.plot(x, y)

From a4319e052a51cdc6541265149f8ede8e2fc1da34 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Mon, 18 Feb 2019 10:18:57 +0900
Subject: [PATCH 011/100] delete mask option for MaskRCNN.prepare

---
 chainercv/links/model/mask_rcnn/mask_rcnn.py | 2 +-
 examples/mask_rcnn/train_multi.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
index 876ce06060..6de944fc34 100644
--- a/chainercv/links/model/mask_rcnn/mask_rcnn.py
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py
@@ -160,7 +160,7 @@ def predict(self, imgs):
         scores = [cuda.to_cpu(score) for score in scores]
         return masks, labels, scores
 
-    def prepare(self, imgs, masks=None):
+    def prepare(self, imgs):
         """Preprocess images.
 
         Args:
diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index c8e030d1a9..478a263b26 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -57,7 +57,7 @@ def prepare_mask(self, masks, resized_sizes, pad_size):
         return pad_masks
 
     def __call__(self, imgs, masks, labels, bboxes):
-        x, scales, resized_sizes = self.model.prepare(imgs, masks)
+        x, scales, resized_sizes = self.model.prepare(imgs)
         B, _, pad_H, pad_W = x.shape
         masks = self.prepare_mask(masks, resized_sizes, (pad_H, pad_W))
         bboxes = [self.xp.array(bbox) * scale

From 496dd9369a86390993dcf7c717ec641463627e03 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Mon, 18 Feb 2019 14:32:09 +0900
Subject: [PATCH 012/100] use MultiprocessIterator

---
 chainercv/links/model/mask_rcnn/mask_rcnn.py |  14 +--
 examples/mask_rcnn/train_multi.py            | 119 +++++++++++++------
 2 files changed, 91 insertions(+), 42 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
index 876ce06060..4158846ed4 100644
--- a/chainercv/links/model/mask_rcnn/mask_rcnn.py
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py
@@ -44,9 +44,9 @@ class MaskRCNN(chainer.Chain):
 
     """
 
-    _min_size = 800
-    _max_size = 1333
-    _stride = 32
+    min_size = 800
+    max_size = 1333
+    stride = 32
 
     def __init__(self, extractor, rpn, head, mask_head):
         super(MaskRCNN, self).__init__()
@@ -179,9 +179,9 @@ def prepare(self, imgs, masks=None):
         resized_sizes = []
         for img in imgs:
             _, H, W = img.shape
-            scale = self._min_size / min(H, W)
-            if scale * max(H, W) > self._max_size:
-                scale = self._max_size / max(H, W)
+            scale = self.min_size / min(H, W)
+            if scale * max(H, W) > self.max_size:
+                scale = self.max_size / max(H, W)
             scales.append(scale)
             H, W = int(H * scale), int(W * scale)
             img = transforms.resize(img, (H, W))
@@ -191,7 +191,7 @@ def prepare(self, imgs, masks=None):
         pad_size = np.array(
             [im.shape[1:] for im in resized_imgs]).max(axis=0)
         pad_size = (
-            np.ceil(pad_size / self._stride) * self._stride).astype(int)
+            np.ceil(pad_size / self.stride) * self.stride).astype(int)
         x = np.zeros(
             (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32)
         for i, im in enumerate(resized_imgs):
diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index c8e030d1a9..5f38949069 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -41,28 +41,31 @@ def __init__(self, model):
         with self.init_scope():
             self.model = model
 
-    def prepare_mask(self, masks, resized_sizes, pad_size):
-        resized_masks = []
-        for size, mask in zip(resized_sizes, masks):
-            resized_masks.append(transforms.resize(
-                mask.astype(np.float32),
-                size, interpolation=PIL.Image.NEAREST).astype(np.bool))
-        pad_masks = []
-        for mask in resized_masks:
-            n_class, H, W = mask.shape
-            pad_mask = self.xp.zeros(
-                (n_class, pad_size[0], pad_size[1]), dtype=np.bool)
-            pad_mask[:, :H, :W] = self.xp.array(mask)
-            pad_masks.append(pad_mask)
-        return pad_masks
-
     def __call__(self, imgs, masks, labels, bboxes):
-        x, scales, resized_sizes = self.model.prepare(imgs, masks)
-        B, _, pad_H, pad_W = x.shape
-        masks = self.prepare_mask(masks, resized_sizes, (pad_H, pad_W))
-        bboxes = [self.xp.array(bbox) * scale
-                  for bbox, scale in zip(bboxes, scales)]
+        B = len(imgs)
+        pad_size = np.array(
+            [im.shape[1:] for im in imgs]).max(axis=0)
+        pad_size = (
+            np.ceil(pad_size / self.model.stride) * self.model.stride).astype(int)
+        x = np.zeros(
+            (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32)
+        for i, img in enumerate(imgs):
+            _, H, W = img.shape
+            x[i, :, :H, :W] = img
+        x = self.xp.array(x)
+
+        pad_masks = [
+            self.xp.zeros(
+                (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool)
+            for mask in masks]
+        for i, mask in enumerate(masks):
+            _, H, W = mask.shape
+            pad_masks[i][:, :H, :W] = self.xp.array(mask)
+        masks = pad_masks
+
+        bboxes = [self.xp.array(bbox) for bbox in bboxes]
         labels = [self.xp.array(label) for label in labels]
+        sizes = [img.shape[1:] for img in imgs]
 
         with chainer.using_config('train', False):
             hs = self.model.extractor(x)
@@ -70,10 +73,7 @@ def __call__(self, imgs, masks, labels, bboxes):
         rpn_locs, rpn_confs = self.model.rpn(hs)
         anchors = self.model.rpn.anchors(h.shape[2:] for h in hs)
         rpn_loc_loss, rpn_conf_loss = rpn_loss(
-            rpn_locs, rpn_confs, anchors,
-            [(int(img.shape[1] * scale), int(img.shape[2] * scale))
-             for img, scale in zip(imgs, scales)],
-            bboxes)
+            rpn_locs, rpn_confs, anchors, sizes, bboxes)
 
         rois, roi_indices = self.model.rpn.decode(
             rpn_locs, rpn_confs, anchors, x.shape)
@@ -110,19 +110,43 @@ def __call__(self, imgs, masks, labels, bboxes):
             # ChainerMN hangs when a subset of nodes has a different
             # computational graph from the rest.
             loss = chainer.Variable(self.xp.array(0, dtype=np.float32))
+            self.zerograds()
         return loss
 
 
-def transform(in_data):
-    img, mask, label, bbox = in_data
+class Transform(object):
+
+    def __init__(self, mean, min_size, max_size):
+        self.mean = mean
+        self.min_size = min_size
+        self.max_size = max_size
+
+    def __call__(self, in_data):
+        img, mask, label, bbox = in_data
+
+        # Flipping
+        img, params = transforms.random_flip(
+            img, x_random=True, return_param=True)
+        mask = transforms.flip(mask, x_flip=params['x_flip'])
+        bbox = transforms.flip_bbox(
+            bbox, img.shape[1:], x_flip=params['x_flip'])
 
-    img, params = transforms.random_flip(
-        img, x_random=True, return_param=True)
-    mask = transforms.flip(mask, x_flip=params['x_flip'])
-    bbox = transforms.flip_bbox(
-        bbox, img.shape[1:], x_flip=params['x_flip'])
+        # TODO: make this part reusable
+        # Scaling
+        _, H, W = img.shape
+        scale = self.min_size / min(H, W)
+        if scale * max(H, W) > self.max_size:
+            scale = self.max_size / max(H, W)
+        H, W = int(H * scale), int(W * scale)
+        img = transforms.resize(img, (H, W))
+        mask = transforms.resize(
+            mask.astype(np.float32),
+            (H, W), interpolation=PIL.Image.NEAREST).astype(np.bool)
+        bbox = bbox * scale
 
-    return img, mask, label, bbox
+        # Subtract mean
+        img -= self.mean
+        return img, mask, label, bbox, scale
 
 
 def converter(batch, device=None):
@@ -142,7 +166,10 @@ def main():
     parser.add_argument('--out', default='result')
     parser.add_argument('--resume')
     parser.add_argument('--communicator', default='hierarchical')
+    parser.add_argument('--cprofile', action='store_true', help='cprofile')
     args = parser.parse_args()
+    chainer.global_config.cv_resize_backend = 'PIL'
+    # chainer.global_config.cv_read_image_backend = 'PIL'
 
     # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
     if hasattr(multiprocessing, 'set_start_method'):
@@ -153,6 +180,8 @@ def main():
 
     comm = chainermn.create_communicator(args.communicator)
     device = comm.intra_rank
+    global rank
+    rank = comm.rank
 
     if args.model == 'mask_rcnn_fpn_resnet50':
         model = MaskRCNNFPNResNet50(
@@ -170,8 +199,10 @@ def main():
 
     train = TransformDataset(
         COCOInstanceSegmentationDataset(
+            data_dir='/home/yuyu2172/coco',
             split='train', return_bbox=True),
-        ('img', 'mask', 'label', 'bbox'), transform)
+        ('img', 'mask', 'label', 'bbox'),
+        Transform(model.extractor.mean, model.min_size, model.max_size))
 
     if comm.rank == 0:
         indices = np.arange(len(train))
@@ -180,8 +211,9 @@ def main():
     indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
     train = train.slice[indices]
 
-    train_iter = chainer.iterators.MultithreadIterator(
-        train, args.batchsize // comm.size)
+    train_iter = chainer.iterators.MultiprocessIterator(
+        train, args.batchsize // comm.size,
+        n_processes=args.batchsize // comm.size, shared_mem=100 * 1000 * 1000 * 4)
 
     optimizer = chainermn.create_multi_node_optimizer(
         chainer.optimizers.MomentumSGD(), comm)
@@ -242,7 +274,24 @@ def lr_schedule(trainer):
     if args.resume:
         serializers.load_npz(args.resume, trainer, strict=False)
 
+    if args.cprofile:
+        import cProfile
+        import io
+        import pstats
+        print('cprofiling')
+        pr = cProfile.Profile()
+        pr.enable()
     trainer.run()
+    if args.cprofile:
+        pr.disable()
+        s = io.StringIO()
+        sort_by = 'tottime'
+        ps = pstats.Stats(pr, stream=s).sort_stats(sort_by)
+        ps.print_stats()
+        if comm.rank == 0:
+            print(s.getvalue())
+
+        pr.dump_stats('{0}/rank_{1}.cprofile'.format(args.out, comm.rank))
 
 
 if __name__ == '__main__':

From 183229c1d53faf95ca91cc7615be21cf1031d0d3 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Mon, 18 Feb 2019 14:40:03 +0900
Subject: [PATCH 013/100] change url link

---
 chainercv/datasets/coco/coco_utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/chainercv/datasets/coco/coco_utils.py b/chainercv/datasets/coco/coco_utils.py
index 10841d567a..f96ec5803c 100644
--- a/chainercv/datasets/coco/coco_utils.py
+++ b/chainercv/datasets/coco/coco_utils.py
@@ -18,10 +18,10 @@
 }
 instances_anno_urls = {
     '2014': {
-        'train': 'http://msvocds.blob.core.windows.net/annotations-1-0-3/'
-        'instances_train-val2014.zip',
-        'val': 'http://msvocds.blob.core.windows.net/annotations-1-0-3/'
-        'instances_train-val2014.zip',
+        'train': 'http://images.cocodataset.org/annotations/'
+        'annotations_trainval2014.zip',
+        'val': 'http://images.cocodataset.org/annotations/'
+        'annotations_trainval2014.zip',
         'valminusminival': 'https://dl.dropboxusercontent.com/s/'
         's3tw5zcg7395368/instances_valminusminival2014.json.zip',
         'minival': 'https://dl.dropboxusercontent.com/s/o43o90bna78omob/'

From 8417559815c9b27fe5d0d7d021fb4457f23fa211 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Mon, 18 Feb 2019 16:58:37 +0900
Subject: [PATCH 014/100] add eval_point_coco

---
 chainercv/evaluations/__init__.py             |   1 +
 chainercv/evaluations/eval_point_coco.py      | 189 ++++++++++++++++++
 .../evaluations_tests/test_eval_point_coco.py | 136 +++++++++++++
 3 files changed, 326 insertions(+)
 create mode 100644 chainercv/evaluations/eval_point_coco.py
 create mode 100644 tests/evaluations_tests/test_eval_point_coco.py

diff --git a/chainercv/evaluations/__init__.py b/chainercv/evaluations/__init__.py
index 1f12332cdb..b3937cebfd 100644
--- a/chainercv/evaluations/__init__.py
+++ b/chainercv/evaluations/__init__.py
@@ -5,6 +5,7 @@
 from chainercv.evaluations.eval_instance_segmentation_coco import eval_instance_segmentation_coco  # NOQA
 from chainercv.evaluations.eval_instance_segmentation_voc import calc_instance_segmentation_voc_prec_rec  # NOQA
 from chainercv.evaluations.eval_instance_segmentation_voc import eval_instance_segmentation_voc  # NOQA
+from chainercv.evaluations.eval_point_coco import eval_point_coco  # NOQA
 from chainercv.evaluations.eval_semantic_segmentation import calc_semantic_segmentation_confusion  # NOQA
 from chainercv.evaluations.eval_semantic_segmentation import calc_semantic_segmentation_iou  # NOQA
 from chainercv.evaluations.eval_semantic_segmentation import eval_semantic_segmentation  # NOQA
diff --git a/chainercv/evaluations/eval_point_coco.py b/chainercv/evaluations/eval_point_coco.py
new file mode 100644
index 0000000000..68f3e00975
--- /dev/null
+++ b/chainercv/evaluations/eval_point_coco.py
@@ -0,0 +1,189 @@
+import itertools
+import numpy as np
+import os
+import six
+
+from chainercv.evaluations.eval_detection_coco import _redirect_stdout
+from chainercv.evaluations.eval_detection_coco import _summarize
+
+try:
+    import pycocotools.coco
+    import pycocotools.cocoeval
+    _available = True
+except ImportError:
+    _available = False
+
+
+def eval_point_coco(pred_points, pred_labels, pred_scores,
+                    gt_points, gt_is_valids, gt_bboxes, gt_labels,
+                    gt_areas, gt_crowdeds=None):
+    if not _available:
+        raise ValueError(
+            'Please install pycocotools \n'
+            'pip install -e \'git+https://github.com/cocodataset/coco.git'
+            '#egg=pycocotools&subdirectory=PythonAPI\'')
+
+    gt_coco = pycocotools.coco.COCO()
+    pred_coco = pycocotools.coco.COCO()
+
+    pred_points = iter(pred_points)
+    pred_labels = iter(pred_labels)
+    pred_scores = iter(pred_scores)
+    gt_points = iter(gt_points)
+    gt_is_valids = iter(gt_is_valids)
+    gt_bboxes = iter(gt_bboxes)
+    gt_labels = iter(gt_labels)
+
+    if gt_areas is None:
+        compute_area_dependent_metrics = False
+        gt_areas = itertools.repeat(None)
+    else:
+        compute_area_dependent_metrics = True
+        gt_areas = iter(gt_areas)
+    gt_crowdeds = (iter(gt_crowdeds) if gt_crowdeds is not None
+                   else itertools.repeat(None))
+
+    ids = []
+    pred_annos = []
+    gt_annos = []
+    existent_labels = {}
+    for i, (pred_point, pred_label, pred_score, gt_point, gt_is_valid,
+            gt_bbox, gt_label,
+            gt_area, gt_crowded) in enumerate(six.moves.zip(
+                pred_points, pred_labels, pred_scores,
+                gt_points, gt_is_valids, gt_bboxes, gt_labels,
+                gt_areas, gt_crowdeds)):
+        if gt_area is None:
+            gt_area = itertools.repeat(None)
+        if gt_crowded is None:
+            gt_crowded = itertools.repeat(None)
+        # Starting ids from 1 is important when using COCO.
+        img_id = i + 1
+
+        for pred_pnt, pred_lb, pred_sc in zip(pred_point, pred_label,
+                                              pred_score):
+            # http://cocodataset.org/#format-results
+            # Visibility flag is currently not used for evaluation
+            is_v = np.ones(len(pred_pnt))
+            pred_annos.append(
+                _create_anno(pred_pnt, is_v, None,
+                             pred_lb, pred_sc,
+                             img_id=img_id, anno_id=len(pred_annos) + 1,
+                             ar=None, crw=0))
+            existent_labels[pred_lb] = True
+
+        for gt_pnt, gt_is_v, gt_bb, gt_lb, gt_ar, gt_crw in zip(
+                gt_point, gt_is_valid, gt_bbox, gt_label, gt_area, gt_crowded):
+            gt_annos.append(
+                _create_anno(gt_pnt, gt_is_v, gt_bb, gt_lb, None,
+                             img_id=img_id, anno_id=len(gt_annos) + 1,
+                             ar=gt_ar, crw=gt_crw))
+        ids.append({'id': img_id})
+    existent_labels = sorted(existent_labels.keys())
+
+    pred_coco.dataset['categories'] = [{'id': i} for i in existent_labels]
+    gt_coco.dataset['categories'] = [{'id': i} for i in existent_labels]
+    pred_coco.dataset['annotations'] = pred_annos
+    gt_coco.dataset['annotations'] = gt_annos
+    pred_coco.dataset['images'] = ids
+    gt_coco.dataset['images'] = ids
+
+    with _redirect_stdout(open(os.devnull, 'w')):
+        pred_coco.createIndex()
+        gt_coco.createIndex()
+        coco_eval = pycocotools.cocoeval.COCOeval(
+            gt_coco, pred_coco, 'keypoints')
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+
+    results = {'coco_eval': coco_eval}
+    p = coco_eval.params
+    common_kwargs = {
+        'prec': coco_eval.eval['precision'],
+        'rec': coco_eval.eval['recall'],
+        'iou_threshs': p.iouThrs,
+        'area_ranges': p.areaRngLbl,
+        'max_detection_list': p.maxDets,
+    }
+    all_kwargs = {
+        'ap/iou=0.50:0.95/area=all/max_dets=20': {
+            'ap': True, 'iou_thresh': None, 'area_range': 'all',
+            'max_detection': 20},
+        'ap/iou=0.50/area=all/max_dets=20': {
+            'ap': True, 'iou_thresh': 0.5, 'area_range': 'all',
+            'max_detection': 20},
+        'ap/iou=0.75/area=all/max_dets=20': {
+            'ap': True, 'iou_thresh': 0.75, 'area_range': 'all',
+            'max_detection': 20},
+        'ar/iou=0.50:0.95/area=all/max_dets=20': {
+            'ap': False, 'iou_thresh': None, 'area_range': 'all',
+            'max_detection': 20},
+        'ar/iou=0.50/area=all/max_dets=20': {
+            'ap': False, 'iou_thresh': 0.5, 'area_range': 'all',
+            'max_detection': 20},
+        'ar/iou=0.75/area=all/max_dets=20': {
+            'ap': False, 'iou_thresh': 0.75, 'area_range': 'all',
+            'max_detection': 20},
+    }
+    if compute_area_dependent_metrics:
+        all_kwargs.update({
+            'ap/iou=0.50:0.95/area=medium/max_dets=20': {
+                'ap': True, 'iou_thresh': None, 'area_range': 'medium',
+                'max_detection': 20},
+            'ap/iou=0.50:0.95/area=large/max_dets=20': {
+                'ap': True, 'iou_thresh': None, 'area_range': 'large',
+                'max_detection': 20},
+            'ar/iou=0.50:0.95/area=medium/max_dets=20': {
+                'ap': False, 'iou_thresh': None, 'area_range': 'medium',
+                'max_detection': 20},
+            'ar/iou=0.50:0.95/area=large/max_dets=20': {
+                'ap': False, 'iou_thresh': None, 'area_range': 'large',
+                'max_detection': 20},
+        })
+
+    for key, kwargs in all_kwargs.items():
+        kwargs.update(common_kwargs)
+        metrics, mean_metric = _summarize(**kwargs)
+
+        # pycocotools ignores classes that are not included in
+        # either gt or prediction, but lies between 0 and
+        # the maximum label id.
+        # We set values for these classes to np.nan.
+        results[key] = np.nan * np.ones(np.max(existent_labels) + 1)
+        results[key][existent_labels] = metrics
+        results['m' + key] = mean_metric
+
+    results['existent_labels'] = existent_labels
+    return results
+
+
+def _create_anno(pnt, is_v, bb, lb, sc, img_id, anno_id, ar=None, crw=None):
+    # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L342
+    y_min = np.min(pnt[:, 0])
+    x_min = np.min(pnt[:, 1])
+    y_max = np.max(pnt[:, 0])
+    x_max = np.max(pnt[:, 1])
+    if ar is None:
+        ar = (y_max - y_min) * (x_max - x_min)
+
+    if crw is None:
+        crw = False
+    # Rounding is done to make the result consistent with COCO.
+
+    if bb is None:
+        bb_xywh = [x_min, y_min, x_max - x_min, y_max - y_min]
+    else:
+        bb_xywh = [bb[1], bb[0], bb[3] - bb[1], bb[2] - bb[0]]
+    pnt = np.concatenate((pnt[:, [1, 0]], is_v[:, None]), axis=1)
+    anno = {
+        'image_id': img_id, 'category_id': lb,
+        'keypoints': pnt.reshape((-1)).tolist(),
+        'area': ar,
+        'bbox': bb_xywh,
+        'id': anno_id,
+        'iscrowd': crw,
+        'num_keypoints': (pnt[:, 0] > 0).sum()
+    }
+    if sc is not None:
+        anno.update({'score': sc})
+    return anno
diff --git a/tests/evaluations_tests/test_eval_point_coco.py b/tests/evaluations_tests/test_eval_point_coco.py
new file mode 100644
index 0000000000..bc2095eefd
--- /dev/null
+++ b/tests/evaluations_tests/test_eval_point_coco.py
@@ -0,0 +1,136 @@
+import numpy as np
+import os
+from six.moves.urllib import request
+import unittest
+
+from chainer import testing
+
+from chainercv.evaluations import eval_point_coco
+
+try:
+    import pycocotools  # NOQA
+    _available = True
+except ImportError:
+    _available = False
+
+
+# @unittest.skipUnless(_available, 'pycocotools is not installed')
+# class TestEvalPointCOCOSingleClass(unittest.TestCase):
+# 
+#     def setUp(self):
+#         self.pred_bboxes = np.array([[[0, 0, 10, 10], [0, 0, 20, 20]]])
+#         self.pred_labels = np.array([[0, 0]])
+#         self.pred_scores = np.array([[0.8, 0.9]])
+#         self.gt_bboxes = np.array([[[0, 0, 10, 9]]])
+#         self.gt_labels = np.array([[0, 0]])
+# 
+#     def test_crowded(self):
+#         result = eval_detection_coco(self.pred_bboxes, self.pred_labels,
+#                                      self.pred_scores,
+#                                      self.gt_bboxes, self.gt_labels,
+#                                      gt_crowdeds=[[True]])
+#         # When the only ground truth is crowded, nothing is evaluated.
+#         # In that case, all the results are nan.
+#         self.assertTrue(
+#             np.isnan(result['map/iou=0.50:0.95/area=all/max_dets=100']))
+#         self.assertTrue(
+#             np.isnan(result['map/iou=0.50/area=all/max_dets=100']))
+#         self.assertTrue(
+#             np.isnan(result['map/iou=0.75/area=all/max_dets=100']))
+# 
+#     def test_area_not_supplied(self):
+#         result = eval_detection_coco(self.pred_bboxes, self.pred_labels,
+#                                      self.pred_scores,
+#                                      self.gt_bboxes, self.gt_labels)
+#         self.assertFalse(
+#             'map/iou=0.50:0.95/area=small/max_dets=100' in result)
+#         self.assertFalse(
+#             'map/iou=0.50:0.95/area=medium/max_dets=100' in result)
+#         self.assertFalse(
+#             'map/iou=0.50:0.95/area=large/max_dets=100' in result)
+# 
+#     def test_area_specified(self):
+#         result = eval_detection_coco(self.pred_bboxes, self.pred_labels,
+#                                      self.pred_scores,
+#                                      self.gt_bboxes, self.gt_labels,
+#                                      gt_areas=[[2048]])
+#         self.assertFalse(
+#             np.isnan(result['map/iou=0.50:0.95/area=medium/max_dets=100']))
+#         self.assertTrue(
+#             np.isnan(result['map/iou=0.50:0.95/area=small/max_dets=100']))
+#         self.assertTrue(
+#             np.isnan(result['map/iou=0.50:0.95/area=large/max_dets=100']))
+
+
+# @unittest.skipUnless(_available, 'pycocotools is not installed')
+# class TestEvalPointCOCOSomeClassNonExistent(unittest.TestCase):
+# 
+#     def setUp(self):
+#         self.pred_bboxes = np.array([[[0, 0, 10, 10], [0, 0, 20, 20]]])
+#         self.pred_labels = np.array([[1, 2]])
+#         self.pred_scores = np.array([[0.8, 0.9]])
+#         self.gt_bboxes = np.array([[[0, 0, 10, 9]]])
+#         self.gt_labels = np.array([[1, 2]])
+# 
+#     def test(self):
+#         result = eval_detection_coco(self.pred_bboxes, self.pred_labels,
+#                                      self.pred_scores,
+#                                      self.gt_bboxes, self.gt_labels)
+#         self.assertEqual(
+#             result['ap/iou=0.50:0.95/area=all/max_dets=100'].shape, (3,))
+#         self.assertTrue(
+#             np.isnan(result['ap/iou=0.50:0.95/area=all/max_dets=100'][0]))
+#         self.assertEqual(
+#             np.nanmean(result['ap/iou=0.50:0.95/area=all/max_dets=100'][1:]),
+#             result['map/iou=0.50:0.95/area=all/max_dets=100'])
+# 
+
+@unittest.skipUnless(_available, 'pycocotools is not installed')
+class TestEvalPointCOCO(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        base_url = 'https://chainercv-models.preferred.jp/tests'
+
+        cls.dataset = np.load(request.urlretrieve(os.path.join(
+            base_url, 'eval_point_coco_dataset_2019_02_18.npz'))[0])
+        cls.result = np.load(request.urlretrieve(os.path.join(
+            base_url, 'eval_point_coco_result_2019_02_18.npz'))[0])
+
+    def test_eval_detection_coco(self):
+        pred_points = self.result['points']
+        pred_labels = self.result['labels']
+        pred_scores = self.result['scores']
+
+        gt_points = self.dataset['points']
+        gt_is_valids = self.dataset['is_valids']
+        gt_bboxes = self.dataset['bboxes']
+        gt_labels = self.dataset['labels']
+        gt_areas = self.dataset['areas']
+        gt_crowdeds = self.dataset['crowdeds']
+
+        result = eval_point_coco(
+            pred_points, pred_labels, pred_scores,
+            gt_points, gt_is_valids, gt_bboxes,
+            gt_labels, gt_areas, gt_crowdeds)
+
+
+        expected = {
+            'map/iou=0.50:0.95/area=all/max_dets=20': 0.37733572721481323,
+            'map/iou=0.50/area=all/max_dets=20': 0.6448841691017151,
+            'map/iou=0.75/area=all/max_dets=20': 0.35469090938568115,
+            'map/iou=0.50:0.95/area=medium/max_dets=20': 0.3894105851650238,
+            'map/iou=0.50:0.95/area=large/max_dets=20': 0.39169296622276306,
+            'mar/iou=0.50:0.95/area=all/max_dets=20': 0.5218977928161621,
+            'mar/iou=0.50/area=all/max_dets=20': 0.7445255517959595,
+            'mar/iou=0.75/area=all/max_dets=20': 0.510948896408081,
+            'mar/iou=0.50:0.95/area=medium/max_dets=20': 0.5150684714317322,
+            'mar/iou=0.50:0.95/area=large/max_dets=20': 0.5296875238418579,
+        }
+
+        for key, item in expected.items():
+            np.testing.assert_almost_equal(
+                result[key], expected[key], decimal=5)
+
+
+testing.run_module(__name__, __file__)

From a8fcf2a6a4604145e2c8a467c0f2dc19e6bec21c Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Mon, 18 Feb 2019 21:53:17 +0900
Subject: [PATCH 015/100] train now works

---
 examples/mask_rcnn/train_multi.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index 5f38949069..74323658f5 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -27,11 +27,11 @@
 from chainercv.links.model.mask_rcnn import mask_loss_pre
 
 # https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator
-try:
-    import cv2
-    cv2.setNumThreads(0)
-except ImportError:
-    pass
+# try:
+#     import cv2
+#     cv2.setNumThreads(0)
+# except ImportError:
+#     pass
 
 
 class TrainChain(chainer.Chain):
@@ -122,8 +122,10 @@ def __init__(self, mean, min_size, max_size):
         self.max_size = max_size
 
     def __call__(self, in_data):
+        import time
+        start = time.time()
         img, mask, label, bbox = in_data
-
+        original = mask.shape
         # Flipping
         img, params = transforms.random_flip(
             img, x_random=True, return_param=True)
@@ -168,7 +170,7 @@ def main():
     parser.add_argument('--communicator', default='hierarchical')
     parser.add_argument('--cprofile', action='store_true', help='cprofile')
     args = parser.parse_args()
-    chainer.global_config.cv_resize_backend = 'PIL'
+    # chainer.global_config.cv_resize_backend = 'PIL'
     # chainer.global_config.cv_read_image_backend = 'PIL'
 
     # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator

From 0d9ba6528c514197912fc72bf2781146e03db2fd Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 19 Feb 2019 11:16:28 +0900
Subject: [PATCH 016/100] some speed up

---
 chainercv/links/model/mask_rcnn/mask_head.py | 23 +++++++++-----------
 examples/mask_rcnn/train_multi.py            | 11 +++++-----
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py
index 7be88c98cb..8815774fcf 100644
--- a/chainercv/links/model/mask_rcnn/mask_head.py
+++ b/chainercv/links/model/mask_rcnn/mask_head.py
@@ -13,7 +13,6 @@
 
 from chainercv.transforms.image.resize import resize
 from chainercv.utils.bbox.bbox_iou import bbox_iou
-from chainercv.utils.mask.mask_to_bbox import mask_to_bbox
 
 
 class MaskHead(chainer.Chain):
@@ -198,8 +197,8 @@ def _expand_boxes(bbox, scale):
     return expanded_bbox
 
 
-def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
-                  mask_size):
+def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes,
+                  gt_head_labels, mask_size):
     """Loss function for Mask Head (pre).
 
     This function processes RoIs for :func:`mask_loss_post` by
@@ -255,14 +254,14 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
     gt_segms = xp.empty((len(mask_rois), mask_size, mask_size), dtype=np.bool)
     for i in np.unique(cuda.to_cpu(mask_roi_indices)):
         gt_mask = gt_masks[i]
-        gt_bbox = mask_to_bbox(gt_mask)
+        gt_bbox = gt_bboxes[i]
 
         index = (mask_roi_indices == i).nonzero()[0]
         mask_roi = mask_rois[index]
         iou = bbox_iou(mask_roi, gt_bbox)
-        gt_index = iou.argmax(axis=1)
+        gt_index = chainer.backends.cuda.to_cpu(iou.argmax(axis=1))
         gt_segms[index] = _segm_wrt_bbox(
-            gt_mask[gt_index], mask_roi, (mask_size, mask_size))
+            gt_mask[gt_index], mask_roi, (mask_size, mask_size), xp)
 
     flag_masks = [mask_roi_levels == l for l in range(n_level)]
     mask_rois = [mask_rois[m] for m in flag_masks]
@@ -310,18 +309,16 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
     return mask_loss
 
 
-def _segm_wrt_bbox(mask, bbox, size):
-    xp = chainer.backends.cuda.get_array_module(mask)
-
-    bbox = bbox.astype(np.int32)
+def _segm_wrt_bbox(mask, bbox, size, xp):
+    bbox = chainer.backends.cuda.to_cpu(bbox.astype(np.int32))
 
     segm = []
     for m, bb in zip(mask, bbox):
-        if bb[2] - bb[0] == 0 or bb[3] - bb[1] == 0:
-            segm.append(xp.zeros(size, dtype=np.bool))
-            continue
         cropped_m = m[bb[0]:bb[2], bb[1]:bb[3]]
         cropped_m = chainer.backends.cuda.to_cpu(cropped_m)
+        if cropped_m.shape[0] == 0 or cropped_m.shape[1] == 0:
+            segm.append(np.zeros(size, dtype=np.bool))
+            continue
 
         segm.append(resize(
             cropped_m[None].astype(np.float32),
diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index 74323658f5..a944aac58b 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -54,13 +54,14 @@ def __call__(self, imgs, masks, labels, bboxes):
             x[i, :, :H, :W] = img
         x = self.xp.array(x)
 
+        # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU.
         pad_masks = [
-            self.xp.zeros(
+            np.zeros(
                 (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool)
             for mask in masks]
         for i, mask in enumerate(masks):
             _, H, W = mask.shape
-            pad_masks[i][:, :H, :W] = self.xp.array(mask)
+            pad_masks[i][:, :H, :W] = mask
         masks = pad_masks
 
         bboxes = [self.xp.array(bbox) for bbox in bboxes]
@@ -91,8 +92,8 @@ def __call__(self, imgs, masks, labels, bboxes):
             roi_indices, head_gt_locs, head_gt_labels, B)
 
         mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre(
-            rois, roi_indices, masks, head_gt_labels,
-            self.model.mask_head.mask_size)
+            rois, roi_indices, masks, bboxes,
+            head_gt_labels, self.model.mask_head.mask_size)
         n_roi = sum([len(roi) for roi in mask_rois])
         if n_roi > 0:
             segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
@@ -122,8 +123,6 @@ def __init__(self, mean, min_size, max_size):
         self.max_size = max_size
 
     def __call__(self, in_data):
-        import time
-        start = time.time()
         img, mask, label, bbox = in_data
         original = mask.shape
         # Flipping

From 97497ea68d999be043cb769a767426dd9aeec35f Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 19 Feb 2019 03:03:19 +0000
Subject: [PATCH 017/100] reduce copy

---
 chainercv/links/model/mask_rcnn/mask_head.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py
index 8815774fcf..90594be847 100644
--- a/chainercv/links/model/mask_rcnn/mask_head.py
+++ b/chainercv/links/model/mask_rcnn/mask_head.py
@@ -259,9 +259,9 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes,
         index = (mask_roi_indices == i).nonzero()[0]
         mask_roi = mask_rois[index]
         iou = bbox_iou(mask_roi, gt_bbox)
-        gt_index = chainer.backends.cuda.to_cpu(iou.argmax(axis=1))
+        gt_index = iou.argmax(axis=1)
         gt_segms[index] = _segm_wrt_bbox(
-            gt_mask[gt_index], mask_roi, (mask_size, mask_size), xp)
+            gt_mask, gt_index, mask_roi, (mask_size, mask_size), xp)
 
     flag_masks = [mask_roi_levels == l for l in range(n_level)]
     mask_rois = [mask_rois[m] for m in flag_masks]
@@ -293,7 +293,7 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
     xp = cuda.get_array_module(segms.array)
 
     mask_roi_indices = xp.hstack(mask_roi_indices).astype(np.int32)
-    gt_segms = xp.vstack(gt_segms).astype(np.float32)
+    gt_segms = xp.vstack(gt_segms).astype(np.float32, copy=False)
     gt_mask_labels = xp.hstack(gt_mask_labels).astype(np.int32)
 
     mask_loss = 0
@@ -309,8 +309,9 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
     return mask_loss
 
 
-def _segm_wrt_bbox(mask, bbox, size, xp):
+def _segm_wrt_bbox(mask, gt_index, bbox, size, xp):
     bbox = chainer.backends.cuda.to_cpu(bbox.astype(np.int32))
+    mask = mask[chainer.backends.cuda.to_cpu(gt_index)]
 
     segm = []
     for m, bb in zip(mask, bbox):
@@ -322,5 +323,5 @@ def _segm_wrt_bbox(mask, bbox, size, xp):
 
         segm.append(resize(
             cropped_m[None].astype(np.float32),
-            size, interpolation=PIL.Image.NEAREST)[0].astype(np.bool))
-    return xp.array(segm, dtype=np.bool)
+            size, interpolation=PIL.Image.NEAREST)[0])
+    return xp.array(segm, dtype=np.float32)

From 1e38522af2f99f0e70b1d7c11be8bd63a73427ee Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 19 Feb 2019 12:06:48 +0900
Subject: [PATCH 018/100] delete unnecessary

---
 examples/mask_rcnn/train_multi.py | 33 +++++++------------------------
 1 file changed, 7 insertions(+), 26 deletions(-)

diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index a944aac58b..c9c5856d13 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -27,11 +27,11 @@
 from chainercv.links.model.mask_rcnn import mask_loss_pre
 
 # https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator
-# try:
-#     import cv2
-#     cv2.setNumThreads(0)
-# except ImportError:
-#     pass
+try:
+    import cv2
+    cv2.setNumThreads(0)
+except ImportError:
+    pass
 
 
 class TrainChain(chainer.Chain):
@@ -167,10 +167,7 @@ def main():
     parser.add_argument('--out', default='result')
     parser.add_argument('--resume')
     parser.add_argument('--communicator', default='hierarchical')
-    parser.add_argument('--cprofile', action='store_true', help='cprofile')
     args = parser.parse_args()
-    # chainer.global_config.cv_resize_backend = 'PIL'
-    # chainer.global_config.cv_read_image_backend = 'PIL'
 
     # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
     if hasattr(multiprocessing, 'set_start_method'):
@@ -214,7 +211,8 @@ def main():
 
     train_iter = chainer.iterators.MultiprocessIterator(
         train, args.batchsize // comm.size,
-        n_processes=args.batchsize // comm.size, shared_mem=100 * 1000 * 1000 * 4)
+        n_processes=args.batchsize // comm.size,
+        shared_mem=100 * 1000 * 1000 * 4)
 
     optimizer = chainermn.create_multi_node_optimizer(
         chainer.optimizers.MomentumSGD(), comm)
@@ -275,24 +273,7 @@ def lr_schedule(trainer):
     if args.resume:
         serializers.load_npz(args.resume, trainer, strict=False)
 
-    if args.cprofile:
-        import cProfile
-        import io
-        import pstats
-        print('cprofiling')
-        pr = cProfile.Profile()
-        pr.enable()
     trainer.run()
-    if args.cprofile:
-        pr.disable()
-        s = io.StringIO()
-        sort_by = 'tottime'
-        ps = pstats.Stats(pr, stream=s).sort_stats(sort_by)
-        ps.print_stats()
-        if comm.rank == 0:
-            print(s.getvalue())
-
-        pr.dump_stats('{0}/rank_{1}.cprofile'.format(args.out, comm.rank))
 
 
 if __name__ == '__main__':

From c63c3068b17b5bcbd6d1546b3b266f4558155498 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 19 Feb 2019 12:13:04 +0900
Subject: [PATCH 019/100] reuse prepare function

---
 chainercv/links/model/mask_rcnn/mask_rcnn.py | 36 +++++++++++---------
 examples/mask_rcnn/train_multi.py            | 21 +++---------
 2 files changed, 24 insertions(+), 33 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
index 4158846ed4..1386f071b2 100644
--- a/chainercv/links/model/mask_rcnn/mask_rcnn.py
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py
@@ -44,8 +44,8 @@ class MaskRCNN(chainer.Chain):
 
     """
 
-    min_size = 800
-    max_size = 1333
+    _min_size = 800
+    _max_size = 1333
     stride = 32
 
     def __init__(self, extractor, rpn, head, mask_head):
@@ -123,7 +123,7 @@ def predict(self, imgs):
         """
 
         sizes = [img.shape[1:] for img in imgs]
-        x, scales, _ = self.prepare(imgs)
+        x, scales = self.prepare(imgs)
 
         with chainer.using_config('train', False), chainer.no_backprop_mode():
             hs, rois, roi_indices = self(x)
@@ -160,7 +160,7 @@ def predict(self, imgs):
         scores = [cuda.to_cpu(score) for score in scores]
         return masks, labels, scores
 
-    def prepare(self, imgs, masks=None):
+    def prepare(self, imgs):
         """Preprocess images.
 
         Args:
@@ -169,25 +169,16 @@ def prepare(self, imgs, masks=None):
                 and the range of their value is :math:`[0, 255]`.
 
         Returns:
-            Three arrays: preprocessed images, \
-            scales that were caluclated in prepocessing and
-            the size of the images after resizing.
+            Two arrays: preprocessed images and \
+            scales that were caluclated in prepocessing.
 
         """
         scales = []
         resized_imgs = []
-        resized_sizes = []
         for img in imgs:
-            _, H, W = img.shape
-            scale = self.min_size / min(H, W)
-            if scale * max(H, W) > self.max_size:
-                scale = self.max_size / max(H, W)
+            img, scale = self.prepare_img(img)
             scales.append(scale)
-            H, W = int(H * scale), int(W * scale)
-            img = transforms.resize(img, (H, W))
-            img -= self.extractor.mean
             resized_imgs.append(img)
-            resized_sizes.append((H, W))
         pad_size = np.array(
             [im.shape[1:] for im in resized_imgs]).max(axis=0)
         pad_size = (
@@ -199,7 +190,18 @@ def prepare(self, imgs, masks=None):
             x[i, :, :H, :W] = im
         x = self.xp.array(x)
 
-        return x, scales, resized_sizes
+        return x, scales
+
+    def prepare_img(self, img):
+        """Process image."""
+        _, H, W = img.shape
+        scale = self._min_size / min(H, W)
+        if scale * max(H, W) > self._max_size:
+            scale = self._max_size / max(H, W)
+        H, W = int(H * scale), int(W * scale)
+        img = transforms.resize(img, (H, W))
+        img -= self.extractor.mean
+        return img, scale
 
 
 def _list_to_flat(array_list):
diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index c9c5856d13..593b0ceb55 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -117,10 +117,8 @@ def __call__(self, imgs, masks, labels, bboxes):
 
 class Transform(object):
 
-    def __init__(self, mean, min_size, max_size):
-        self.mean = mean
-        self.min_size = min_size
-        self.max_size = max_size
+    def __init__(self, prepare_img):
+        self.prepare_img = prepare_img
 
     def __call__(self, in_data):
         img, mask, label, bbox = in_data
@@ -132,21 +130,12 @@ def __call__(self, in_data):
         bbox = transforms.flip_bbox(
             bbox, img.shape[1:], x_flip=params['x_flip'])
 
-        # TODO: make this part reusable
-        # Scaling
-        _, H, W = img.shape
-        scale = self.min_size / min(H, W)
-        if scale * max(H, W) > self.max_size:
-            scale = self.max_size / max(H, W)
-        H, W = int(H * scale), int(W * scale)
-        img = transforms.resize(img, (H, W))
+        # Scaling and mean subtraction
+        img, scale = self.prepare_img(img)
         mask = transforms.resize(
             mask.astype(np.float32),
             (H, W), interpolation=PIL.Image.NEAREST).astype(np.bool)
         bbox = bbox * scale
-
-        # Subtract mean
-        img -= self.mean
         return img, mask, label, bbox, scale
 
 
@@ -200,7 +189,7 @@ def main():
             data_dir='/home/yuyu2172/coco',
             split='train', return_bbox=True),
         ('img', 'mask', 'label', 'bbox'),
-        Transform(model.extractor.mean, model.min_size, model.max_size))
+        Transform(model.prepare_img))
 
     if comm.rank == 0:
         indices = np.arange(len(train))

From bb8fd686f602c0bfcf26e683675b22f4f242f3d5 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 19 Feb 2019 03:31:34 +0000
Subject: [PATCH 020/100] don't use instance method

---
 chainercv/links/model/mask_rcnn/mask_rcnn.py | 21 ++++++--------------
 chainercv/links/model/mask_rcnn/misc.py      | 12 +++++++++++
 examples/mask_rcnn/train_multi.py            | 16 ++++++++++-----
 3 files changed, 29 insertions(+), 20 deletions(-)
 create mode 100644 chainercv/links/model/mask_rcnn/misc.py

diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
index 1386f071b2..9f59f49d92 100644
--- a/chainercv/links/model/mask_rcnn/mask_rcnn.py
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py
@@ -6,7 +6,7 @@
 from chainer.backends import cuda
 import chainer.functions as F
 
-from chainercv import transforms
+from chainercv.links.model.mask_rcnn.misc import scale_img
 
 
 class MaskRCNN(chainer.Chain):
@@ -44,8 +44,8 @@ class MaskRCNN(chainer.Chain):
 
     """
 
-    _min_size = 800
-    _max_size = 1333
+    min_size = 800
+    max_size = 1333
     stride = 32
 
     def __init__(self, extractor, rpn, head, mask_head):
@@ -176,7 +176,9 @@ def prepare(self, imgs):
         scales = []
         resized_imgs = []
         for img in imgs:
-            img, scale = self.prepare_img(img)
+            img, scale = scale_img(
+                img, self.min_size, self.max_size)
+            img -= self.extractor.mean
             scales.append(scale)
             resized_imgs.append(img)
         pad_size = np.array(
@@ -192,17 +194,6 @@ def prepare(self, imgs):
 
         return x, scales
 
-    def prepare_img(self, img):
-        """Process image."""
-        _, H, W = img.shape
-        scale = self._min_size / min(H, W)
-        if scale * max(H, W) > self._max_size:
-            scale = self._max_size / max(H, W)
-        H, W = int(H * scale), int(W * scale)
-        img = transforms.resize(img, (H, W))
-        img -= self.extractor.mean
-        return img, scale
-
 
 def _list_to_flat(array_list):
     xp = chainer.backends.cuda.get_array_module(array_list[0])
diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py
new file mode 100644
index 0000000000..abb233443b
--- /dev/null
+++ b/chainercv/links/model/mask_rcnn/misc.py
@@ -0,0 +1,12 @@
+from chainercv import transforms
+
+
+def scale_img(img, min_size, max_size):
+    """Process image."""
+    _, H, W = img.shape
+    scale = min_size / min(H, W)
+    if scale * max(H, W) > max_size:
+        scale = max_size / max(H, W)
+    H, W = int(H * scale), int(W * scale)
+    img = transforms.resize(img, (H, W))
+    return img, scale
diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index 593b0ceb55..36b76ce293 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -18,6 +18,7 @@
 from chainercv.datasets import COCOInstanceSegmentationDataset
 from chainercv.links import MaskRCNNFPNResNet101
 from chainercv.links import MaskRCNNFPNResNet50
+from chainercv.links.model.mask_rcnn.misc import scale_img
 from chainercv import transforms
 
 from chainercv.links.model.fpn import head_loss_post
@@ -117,8 +118,10 @@ def __call__(self, imgs, masks, labels, bboxes):
 
 class Transform(object):
 
-    def __init__(self, prepare_img):
-        self.prepare_img = prepare_img
+    def __init__(self, min_size, max_size, mean):
+        self.min_size = min_size
+        self.max_size = max_size
+        self.mean = mean
 
     def __call__(self, in_data):
         img, mask, label, bbox = in_data
@@ -131,10 +134,13 @@ def __call__(self, in_data):
             bbox, img.shape[1:], x_flip=params['x_flip'])
 
         # Scaling and mean subtraction
-        img, scale = self.prepare_img(img)
+        img, scale = scale_img(
+            img, self.min_size, self.max_size)
+        img -= self.mean
         mask = transforms.resize(
             mask.astype(np.float32),
-            (H, W), interpolation=PIL.Image.NEAREST).astype(np.bool)
+            img.shape[1:],
+            interpolation=PIL.Image.NEAREST).astype(np.bool)
         bbox = bbox * scale
         return img, mask, label, bbox, scale
 
@@ -189,7 +195,7 @@ def main():
             data_dir='/home/yuyu2172/coco',
             split='train', return_bbox=True),
         ('img', 'mask', 'label', 'bbox'),
-        Transform(model.prepare_img))
+        Transform(model.min_size, model.max_size, model.extractor.mean))
 
     if comm.rank == 0:
         indices = np.arange(len(train))

From 45e77be1d128a71f19cd03447b4953cf7339aab5 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 19 Feb 2019 13:43:12 +0900
Subject: [PATCH 021/100] delete eval_coco

---
 examples/instance_segmentation/eval_coco.py | 94 ---------------------
 1 file changed, 94 deletions(-)
 delete mode 100755 examples/instance_segmentation/eval_coco.py

diff --git a/examples/instance_segmentation/eval_coco.py b/examples/instance_segmentation/eval_coco.py
deleted file mode 100755
index 98258252b8..0000000000
--- a/examples/instance_segmentation/eval_coco.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import argparse
-
-import chainer
-from chainer import iterators
-
-from chainercv.datasets import coco_instance_segmentation_label_names
-from chainercv.datasets import COCOInstanceSegmentationDataset
-from chainercv.evaluations import eval_instance_segmentation_coco
-from chainercv.experimental.links import FCISResNet101
-from chainercv.links import MaskRCNNFPNResNet101
-from chainercv.links import MaskRCNNFPNResNet50
-from chainercv.utils import apply_to_iterator
-from chainercv.utils import ProgressHook
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--model', choices=(
-            'fcis_resnet101',
-            'mask_rcnn_fpn_resnet101', 'mask_rcnn_fpn_resnet50'),
-        default='fcis_resnet101')
-    parser.add_argument('--pretrained-model', default=None)
-    parser.add_argument('--gpu', type=int, default=-1)
-    args = parser.parse_args()
-
-    if args.pretrained_model is None:
-        args.pretrained_model = 'coco'
-    if args.model == 'fcis_resnet101':
-        proposal_creator_params = FCISResNet101.proposal_creator_params
-        proposal_creator_params['min_size'] = 2
-        model = FCISResNet101(
-            n_fg_class=len(coco_instance_segmentation_label_names),
-            anchor_scales=(4, 8, 16, 32),
-            pretrained_model=args.pretrained_model,
-            proposal_creator_params=proposal_creator_params)
-        preset = 'coco_evaluate'
-    elif args.model == 'mask_rcnn_fpn_resnet50':
-        model = MaskRCNNFPNResNet50(
-            len(coco_instance_segmentation_label_names),
-            args.pretrained_model)
-        preset = 'evaluate'
-    elif args.model == 'mask_rcnn_fpn_resnet101':
-        model = MaskRCNNFPNResNet101(
-            len(coco_instance_segmentation_label_names),
-            args.pretrained_model)
-        preset = 'evaluate'
-
-    model.use_preset(preset)
-
-    if args.gpu >= 0:
-        chainer.cuda.get_device_from_id(args.gpu).use()
-        model.to_gpu()
-
-    dataset = COCOInstanceSegmentationDataset(
-        split='minival', year='2014',
-        use_crowded=True, return_crowded=True, return_area=True)
-    iterator = iterators.SerialIterator(
-        dataset, 1, repeat=False, shuffle=False)
-
-    in_values, out_values, rest_values = apply_to_iterator(
-        model.predict, iterator, hook=ProgressHook(len(dataset)))
-    # delete unused iterators explicitly
-    del in_values
-
-    pred_masks, pred_labels, pred_scores = out_values
-    gt_masks, gt_labels, gt_areas, gt_crowdeds = rest_values
-
-    result = eval_instance_segmentation_coco(
-        pred_masks, pred_labels, pred_scores,
-        gt_masks, gt_labels, gt_areas, gt_crowdeds)
-
-    keys = [
-        'map/iou=0.50:0.95/area=all/max_dets=100',
-        'map/iou=0.50/area=all/max_dets=100',
-        'map/iou=0.75/area=all/max_dets=100',
-        'map/iou=0.50:0.95/area=small/max_dets=100',
-        'map/iou=0.50:0.95/area=medium/max_dets=100',
-        'map/iou=0.50:0.95/area=large/max_dets=100',
-        'mar/iou=0.50:0.95/area=all/max_dets=1',
-        'mar/iou=0.50:0.95/area=all/max_dets=10',
-        'mar/iou=0.50:0.95/area=all/max_dets=100',
-        'mar/iou=0.50:0.95/area=small/max_dets=100',
-        'mar/iou=0.50:0.95/area=medium/max_dets=100',
-        'mar/iou=0.50:0.95/area=large/max_dets=100',
-    ]
-
-    print('')
-    for key in keys:
-        print('{:s}: {:f}'.format(key, result[key]))
-
-
-if __name__ == '__main__':
-    main()

From c8b08a4a59c3e700b2ad7f76564004d68049e920 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 19 Feb 2019 13:46:48 +0900
Subject: [PATCH 022/100] speed up _segm_wrt_bbox

---
 chainercv/links/model/mask_rcnn/mask_head.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py
index 90594be847..5f6e5c03d2 100644
--- a/chainercv/links/model/mask_rcnn/mask_head.py
+++ b/chainercv/links/model/mask_rcnn/mask_head.py
@@ -311,11 +311,10 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
 
 def _segm_wrt_bbox(mask, gt_index, bbox, size, xp):
     bbox = chainer.backends.cuda.to_cpu(bbox.astype(np.int32))
-    mask = mask[chainer.backends.cuda.to_cpu(gt_index)]
 
     segm = []
-    for m, bb in zip(mask, bbox):
-        cropped_m = m[bb[0]:bb[2], bb[1]:bb[3]]
+    for i, bb in zip(chainer.backends.cuda.to_cpu(gt_index), bbox):
+        cropped_m = mask[i, bb[0]:bb[2], bb[1]:bb[3]]
         cropped_m = chainer.backends.cuda.to_cpu(cropped_m)
         if cropped_m.shape[0] == 0 or cropped_m.shape[1] == 0:
             segm.append(np.zeros(size, dtype=np.bool))

From 9ef9564eda60f103d1b06606efec5cbf05d1584b Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 19 Feb 2019 13:53:10 +0900
Subject: [PATCH 023/100] delete unnecessary

---
 examples/mask_rcnn/train_multi.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index 36b76ce293..44f1e23249 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -173,8 +173,6 @@ def main():
 
     comm = chainermn.create_communicator(args.communicator)
     device = comm.intra_rank
-    global rank
-    rank = comm.rank
 
     if args.model == 'mask_rcnn_fpn_resnet50':
         model = MaskRCNNFPNResNet50(

From 2e18bf682288b435436c77b8b6131c794eb42945 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 19 Feb 2019 21:00:45 +0900
Subject: [PATCH 024/100] COCOPointDataset -> COCOKeypointDataset

---
 chainercv/datasets/__init__.py                |  2 +-
 ...nt_dataset.py => coco_keypoint_dataset.py} | 20 +++++++++++--------
 2 files changed, 13 insertions(+), 9 deletions(-)
 rename chainercv/datasets/coco/{coco_point_dataset.py => coco_keypoint_dataset.py} (86%)

diff --git a/chainercv/datasets/__init__.py b/chainercv/datasets/__init__.py
index c2ca52af4d..d6598093db 100644
--- a/chainercv/datasets/__init__.py
+++ b/chainercv/datasets/__init__.py
@@ -12,7 +12,7 @@
 from chainercv.datasets.cityscapes.cityscapes_utils import cityscapes_semantic_segmentation_label_names  # NOQA
 from chainercv.datasets.coco.coco_bbox_dataset import COCOBboxDataset  # NOQA
 from chainercv.datasets.coco.coco_instance_segmentation_dataset import COCOInstanceSegmentationDataset  # NOQA
-from chainercv.datasets.coco.coco_point_dataset import COCOPointDataset  # NOQA
+from chainercv.datasets.coco.coco_keypoint_dataset import COCOKeypointDataset  # NOQA
 from chainercv.datasets.coco.coco_semantic_segmentation_dataset import COCOSemanticSegmentationDataset  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_bbox_label_names  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_instance_segmentation_label_names  # NOQA
diff --git a/chainercv/datasets/coco/coco_point_dataset.py b/chainercv/datasets/coco/coco_keypoint_dataset.py
similarity index 86%
rename from chainercv/datasets/coco/coco_point_dataset.py
rename to chainercv/datasets/coco/coco_keypoint_dataset.py
index 6438ef0bf2..de40491100 100644
--- a/chainercv/datasets/coco/coco_point_dataset.py
+++ b/chainercv/datasets/coco/coco_keypoint_dataset.py
@@ -10,11 +10,12 @@
 from chainercv import utils
 
 
-class COCOPointDataset(GetterDataset):
+class COCOKeypointDataset(GetterDataset):
 
     def __init__(self, data_dir='auto', split='train', year='2017',
-                 use_crowded=False, return_area=False, return_crowded=False):
-        super(COCOPointDataset, self).__init__()
+                 use_crowded=False,
+                 return_area=False, return_crowded=False):
+        super(COCOKeypointDataset, self).__init__()
         self.use_crowded = use_crowded
         if data_dir == 'auto':
             data_dir = get_coco(split, split, year, 'instances')
@@ -41,9 +42,9 @@ def __init__(self, data_dir='auto', split='train', year='2017',
 
         self.add_getter('img', self._get_image)
         self.add_getter(
-            ['point', 'bbox', 'label', 'area', 'crowded'],
+            ['point', 'valid', 'bbox', 'label', 'area', 'crowded'],
             self._get_annotations)
-        keys = ('img', 'point', 'bbox', 'label')
+        keys = ('img', 'point', 'valid', 'bbox', 'label')
         if return_area:
             keys += ('area',)
         if return_crowded:
@@ -90,9 +91,11 @@ def _get_annotations(self, i):
             # 0: not labeled; 1: labeled, not inside mask;
             # 2: labeled and inside mask
             v = point[:, 2::3]
-            point = np.stack((y, x, v), axis=2)
+            valid = v > 0
+            point = np.stack((y, x), axis=2)
         else:
-            point = np.array((0, 0, 3), dtype=np.float32)
+            point = np.empty((0, 0, 2), dtype=np.float32)
+            valid = np.empty((0, 0), dtype=np.bool)
 
         # Remove invalid boxes
         bbox_area = np.prod(bbox[:, 2:] - bbox[:, :2], axis=1)
@@ -104,8 +107,9 @@ def _get_annotations(self, i):
             keep_mask = np.logical_and(keep_mask, np.logical_not(crowded))
 
         point = point[keep_mask]
+        valid = valid[keep_mask]
         bbox = bbox[keep_mask]
         label = label[keep_mask]
         area = area[keep_mask]
         crowded = crowded[keep_mask]
-        return point, bbox, label, area, crowded
+        return point, valid, bbox, label, area, crowded

From 8ad8a74a5980f67a599eaa430394d80ef7933999 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 19 Feb 2019 21:16:31 +0900
Subject: [PATCH 025/100] vis_coco_point -> vis_keypoint_coco

---
 chainercv/visualizations/__init__.py          |  2 +-
 ...vis_coco_point.py => vis_keypoint_coco.py} | 37 ++++++++++---------
 2 files changed, 21 insertions(+), 18 deletions(-)
 rename chainercv/visualizations/{vis_coco_point.py => vis_keypoint_coco.py} (83%)

diff --git a/chainercv/visualizations/__init__.py b/chainercv/visualizations/__init__.py
index 33ef5a9d1f..edc5c41286 100644
--- a/chainercv/visualizations/__init__.py
+++ b/chainercv/visualizations/__init__.py
@@ -1,5 +1,5 @@
 from chainercv.visualizations.vis_bbox import vis_bbox  # NOQA
-from chainercv.visualizations.vis_coco_point import vis_coco_point  # NOQA
+from chainercv.visualizations.vis_keypoint_coco import vis_keypoint_coco  # NOQA
 from chainercv.visualizations.vis_image import vis_image  # NOQA
 from chainercv.visualizations.vis_instance_segmentation import vis_instance_segmentation  # NOQA
 from chainercv.visualizations.vis_point import vis_point  # NOQA
diff --git a/chainercv/visualizations/vis_coco_point.py b/chainercv/visualizations/vis_keypoint_coco.py
similarity index 83%
rename from chainercv/visualizations/vis_coco_point.py
rename to chainercv/visualizations/vis_keypoint_coco.py
index 9666bbbfe6..b5559930ad 100644
--- a/chainercv/visualizations/vis_coco_point.py
+++ b/chainercv/visualizations/vis_keypoint_coco.py
@@ -41,7 +41,13 @@
 ]
 
 
-def vis_coco_point(img, point, point_score, thresh=2, ax=None):
+def vis_keypoint_coco(
+        img, point, valid=None,
+        point_score=None, thresh=2,
+        markersize=3, linewidth=1, ax=None):
+    if valid.dtype != np.bool:
+        raise ValueError('The dtype of `valid` should be np.bool')
+
     from matplotlib import pyplot as plt
 
     # Returns newly instantiated matplotlib.axes.Axes object if ax is None
@@ -50,7 +56,13 @@ def vis_coco_point(img, point, point_score, thresh=2, ax=None):
     cmap = plt.get_cmap('rainbow')
     colors = [cmap(i) for i in np.linspace(0, 1, len(coco_point_skeleton) + 2)]
 
-    # plt.autoscale(False)
+    if point_score is None:
+        point_score = np.inf * np.ones(point.shape[:2], dtype=np.float32)
+
+    if valid is not None:
+        for i, vld in enumerate(valid):
+            point_score[i, np.logical_not(vld)] = -np.inf
+
     for pnt, pnt_sc in zip(point, point_score):
         for l in range(len(coco_point_skeleton)):
             i0 = coco_point_skeleton[l][0]
@@ -63,15 +75,16 @@ def vis_coco_point(img, point, point_score, thresh=2, ax=None):
             x1 = pnt[i1, 1]
             if s0 > thresh and s1 > thresh:
                 line = ax.plot([x0, x1], [y0, y1])
-                plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7)
+                plt.setp(line, color=colors[l],
+                         linewidth=linewidth, alpha=0.7)
             if s0 > thresh:
                 ax.plot(
                     x0, y0, '.', color=colors[l],
-                    markersize=3.0, alpha=0.7)
+                    markersize=markersize, alpha=0.7)
             if s1 > thresh:
                 ax.plot(
                     x1, y1, '.', color=colors[l],
-                    markersize=3.0, alpha=0.7)
+                    markersize=markersize, alpha=0.7)
 
         # for better visualization, add mid shoulder / mid hip
         mid_shoulder = (
@@ -94,23 +107,13 @@ def vis_coco_point(img, point, point_score, thresh=2, ax=None):
             line = ax.plot(x, y)
             plt.setp(
                 line, color=colors[len(coco_point_skeleton)],
-                linewidth=1.0, alpha=0.7)
+                linewidth=linewidth, alpha=0.7)
         if (mid_shoulder_sc > thresh and mid_hip_sc > thresh):
             y = [mid_shoulder[0], mid_hip[0]]
             x = [mid_shoulder[1], mid_hip[1]]
             line = ax.plot(x, y)
             plt.setp(
                 line, color=colors[len(coco_point_skeleton) + 1],
-                linewidth=1.0, alpha=0.7)
+                linewidth=linewidth, alpha=0.7)
 
     return ax
-
-
-if __name__ == '__main__':
-    data = np.load('vis_point.npz')
-    img = data['img']
-    point = data['point']
-    point_score = data['point_score']
-    # plt.imshow(img)
-    vis_coco_point(img, point, point_score)
-    plt.show()

From a48b2ba5df12f30ea99875ab035ff58627453eca Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 19 Feb 2019 21:20:53 +0900
Subject: [PATCH 026/100] change representation of coco_keypoint_names

---
 chainercv/datasets/__init__.py                |  2 +-
 chainercv/datasets/coco/coco_utils.py         | 40 ++++----
 chainercv/visualizations/__init__.py          |  2 +-
 chainercv/visualizations/vis_keypoint_coco.py | 92 ++++++++++---------
 4 files changed, 72 insertions(+), 64 deletions(-)

diff --git a/chainercv/datasets/__init__.py b/chainercv/datasets/__init__.py
index d6598093db..ce4030f419 100644
--- a/chainercv/datasets/__init__.py
+++ b/chainercv/datasets/__init__.py
@@ -16,7 +16,7 @@
 from chainercv.datasets.coco.coco_semantic_segmentation_dataset import COCOSemanticSegmentationDataset  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_bbox_label_names  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_instance_segmentation_label_names  # NOQA
-from chainercv.datasets.coco.coco_utils import coco_point_names  # NOQA
+from chainercv.datasets.coco.coco_utils import coco_keypoint_names  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_colors  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_names  # NOQA
 from chainercv.datasets.cub.cub_label_dataset import CUBLabelDataset  # NOQA
diff --git a/chainercv/datasets/coco/coco_utils.py b/chainercv/datasets/coco/coco_utils.py
index f96ec5803c..aac12861fb 100644
--- a/chainercv/datasets/coco/coco_utils.py
+++ b/chainercv/datasets/coco/coco_utils.py
@@ -441,22 +441,24 @@ def get_coco(split, img_split, year, mode):
 coco_instance_segmentation_label_names = coco_bbox_label_names
 
 
-coco_point_names = [
-    'nose',
-    'left_eye',
-    'right_eye',
-    'left_ear',
-    'right_ear',
-    'left_shoulder',
-    'right_shoulder',
-    'left_elbow',
-    'right_elbow',
-    'left_wrist',
-    'right_wrist',
-    'left_hip',
-    'right_hip',
-    'left_knee',
-    'right_knee',
-    'left_ankle',
-    'right_ankle'
-]
+coco_keypoint_names = {
+    0: [
+        'nose',
+        'left_eye',
+        'right_eye',
+        'left_ear',
+        'right_ear',
+        'left_shoulder',
+        'right_shoulder',
+        'left_elbow',
+        'right_elbow',
+        'left_wrist',
+        'right_wrist',
+        'left_hip',
+        'right_hip',
+        'left_knee',
+        'right_knee',
+        'left_ankle',
+        'right_ankle'
+    ]
+}
diff --git a/chainercv/visualizations/__init__.py b/chainercv/visualizations/__init__.py
index edc5c41286..bf77cf892c 100644
--- a/chainercv/visualizations/__init__.py
+++ b/chainercv/visualizations/__init__.py
@@ -1,6 +1,6 @@
 from chainercv.visualizations.vis_bbox import vis_bbox  # NOQA
-from chainercv.visualizations.vis_keypoint_coco import vis_keypoint_coco  # NOQA
 from chainercv.visualizations.vis_image import vis_image  # NOQA
 from chainercv.visualizations.vis_instance_segmentation import vis_instance_segmentation  # NOQA
+from chainercv.visualizations.vis_keypoint_coco import vis_keypoint_coco  # NOQA
 from chainercv.visualizations.vis_point import vis_point  # NOQA
 from chainercv.visualizations.vis_semantic_segmentation import vis_semantic_segmentation  # NOQA
diff --git a/chainercv/visualizations/vis_keypoint_coco.py b/chainercv/visualizations/vis_keypoint_coco.py
index b5559930ad..f750d23378 100644
--- a/chainercv/visualizations/vis_keypoint_coco.py
+++ b/chainercv/visualizations/vis_keypoint_coco.py
@@ -1,43 +1,44 @@
 from __future__ import division
 
-import matplotlib.pyplot as plt
 import numpy as np
 
-from chainercv.datasets import coco_point_names
+from chainercv.datasets import coco_keypoint_names
 from chainercv.visualizations.vis_image import vis_image
 
 
+human_id = 0
+
 coco_point_skeleton = [
-    [coco_point_names.index('left_eye'),
-     coco_point_names.index('right_eye')],
-    [coco_point_names.index('left_eye'),
-     coco_point_names.index('nose')],
-    [coco_point_names.index('right_eye'),
-     coco_point_names.index('nose')],
-    [coco_point_names.index('right_eye'),
-     coco_point_names.index('right_ear')],
-    [coco_point_names.index('left_eye'),
-     coco_point_names.index('left_ear')],
-    [coco_point_names.index('right_shoulder'),
-     coco_point_names.index('right_elbow')],
-    [coco_point_names.index('right_elbow'),
-     coco_point_names.index('right_wrist')],
-    [coco_point_names.index('left_shoulder'),
-     coco_point_names.index('left_elbow')],
-    [coco_point_names.index('left_elbow'),
-     coco_point_names.index('left_wrist')],
-    [coco_point_names.index('right_hip'),
-     coco_point_names.index('right_knee')],
-    [coco_point_names.index('right_knee'),
-     coco_point_names.index('right_ankle')],
-    [coco_point_names.index('left_hip'),
-     coco_point_names.index('left_knee')],
-    [coco_point_names.index('left_knee'),
-     coco_point_names.index('left_ankle')],
-    [coco_point_names.index('right_shoulder'),
-     coco_point_names.index('left_shoulder')],
-    [coco_point_names.index('right_hip'),
-     coco_point_names.index('left_hip')]
+    [coco_keypoint_names[human_id].index('left_eye'),
+     coco_keypoint_names[human_id].index('right_eye')],
+    [coco_keypoint_names[human_id].index('left_eye'),
+     coco_keypoint_names[human_id].index('nose')],
+    [coco_keypoint_names[human_id].index('right_eye'),
+     coco_keypoint_names[human_id].index('nose')],
+    [coco_keypoint_names[human_id].index('right_eye'),
+     coco_keypoint_names[human_id].index('right_ear')],
+    [coco_keypoint_names[human_id].index('left_eye'),
+     coco_keypoint_names[human_id].index('left_ear')],
+    [coco_keypoint_names[human_id].index('right_shoulder'),
+     coco_keypoint_names[human_id].index('right_elbow')],
+    [coco_keypoint_names[human_id].index('right_elbow'),
+     coco_keypoint_names[human_id].index('right_wrist')],
+    [coco_keypoint_names[human_id].index('left_shoulder'),
+     coco_keypoint_names[human_id].index('left_elbow')],
+    [coco_keypoint_names[human_id].index('left_elbow'),
+     coco_keypoint_names[human_id].index('left_wrist')],
+    [coco_keypoint_names[human_id].index('right_hip'),
+     coco_keypoint_names[human_id].index('right_knee')],
+    [coco_keypoint_names[human_id].index('right_knee'),
+     coco_keypoint_names[human_id].index('right_ankle')],
+    [coco_keypoint_names[human_id].index('left_hip'),
+     coco_keypoint_names[human_id].index('left_knee')],
+    [coco_keypoint_names[human_id].index('left_knee'),
+     coco_keypoint_names[human_id].index('left_ankle')],
+    [coco_keypoint_names[human_id].index('right_shoulder'),
+     coco_keypoint_names[human_id].index('left_shoulder')],
+    [coco_keypoint_names[human_id].index('right_hip'),
+     coco_keypoint_names[human_id].index('left_hip')]
 ]
 
 
@@ -45,6 +46,9 @@ def vis_keypoint_coco(
         img, point, valid=None,
         point_score=None, thresh=2,
         markersize=3, linewidth=1, ax=None):
+    """Visualize bounding boxes inside image.
+
+    """
     if valid.dtype != np.bool:
         raise ValueError('The dtype of `valid` should be np.bool')
 
@@ -88,22 +92,24 @@ def vis_keypoint_coco(
 
         # for better visualization, add mid shoulder / mid hip
         mid_shoulder = (
-            pnt[coco_point_names.index('right_shoulder'), :2] +
-            pnt[coco_point_names.index('left_shoulder'), :2]) / 2
+            pnt[coco_keypoint_names[human_id].index('right_shoulder'), :2] +
+            pnt[coco_keypoint_names[human_id].index('left_shoulder'), :2]) / 2
         mid_shoulder_sc = np.minimum(
-            pnt_sc[coco_point_names.index('right_shoulder')],
-            pnt_sc[coco_point_names.index('left_shoulder')])
+            pnt_sc[coco_keypoint_names[human_id].index('right_shoulder')],
+            pnt_sc[coco_keypoint_names[human_id].index('left_shoulder')])
 
         mid_hip = (
-            pnt[coco_point_names.index('right_hip'), :2] +
-            pnt[coco_point_names.index('left_hip'), :2]) / 2
+            pnt[coco_keypoint_names[human_id].index('right_hip'), :2] +
+            pnt[coco_keypoint_names[human_id].index('left_hip'), :2]) / 2
         mid_hip_sc = np.minimum(
-            pnt_sc[coco_point_names.index('right_hip')],
-            pnt_sc[coco_point_names.index('left_hip')])
+            pnt_sc[coco_keypoint_names[human_id].index('right_hip')],
+            pnt_sc[coco_keypoint_names[human_id].index('left_hip')])
         if (mid_shoulder_sc > thresh and
-                pnt_sc[coco_point_names.index('nose')] > thresh):
-            y = [mid_shoulder[0], pnt[coco_point_names.index('nose'), 0]]
-            x = [mid_shoulder[1], pnt[coco_point_names.index('nose'), 1]]
+                pnt_sc[coco_keypoint_names[human_id].index('nose')] > thresh):
+            y = [mid_shoulder[0],
+                 pnt[coco_keypoint_names[human_id].index('nose'), 0]]
+            x = [mid_shoulder[1],
+                 pnt[coco_keypoint_names[human_id].index('nose'), 1]]
             line = ax.plot(x, y)
             plt.setp(
                 line, color=colors[len(coco_point_skeleton)],

From deb9f9c245f1e7c946e0afb7c68f5a5e2fb52476 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 20 Feb 2019 10:41:33 +0900
Subject: [PATCH 027/100] add doc

---
 .../datasets/coco/coco_keypoint_dataset.py    | 53 +++++++++++++++++++
 chainercv/visualizations/vis_keypoint_coco.py | 43 ++++++++++++++-
 docs/source/reference/datasets.rst            |  4 ++
 docs/source/reference/visualizations.rst      |  4 ++
 4 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/chainercv/datasets/coco/coco_keypoint_dataset.py b/chainercv/datasets/coco/coco_keypoint_dataset.py
index de40491100..f3d8c4434e 100644
--- a/chainercv/datasets/coco/coco_keypoint_dataset.py
+++ b/chainercv/datasets/coco/coco_keypoint_dataset.py
@@ -12,9 +12,62 @@
 
 class COCOKeypointDataset(GetterDataset):
 
+    """Keypoint dataset for `MS COCO`_.
+
+    This only returns annotation for objects categorized to the "person"
+    category.
+
+    .. _`MS COCO`: http://cocodataset.org/#home
+
+    Args:
+        data_dir (string): Path to the root of the training data. If this is
+            :obj:`auto`, this class will automatically download data for you
+            under :obj:`$CHAINER_DATASET_ROOT/pfnet/chainercv/coco`.
+        split ({'train', 'val'}): Select a split of the dataset.
+        year ({'2014', '2017'}): Use a dataset released in :obj:`year`.
+        use_crowded (bool): If true, use bounding boxes that are labeled as
+            crowded in the original annotation. The default value is
+            :obj:`False`.
+        return_area (bool): If true, this dataset returns areas of masks
+            around objects. The default value is :obj:`False`.
+        return_crowded (bool): If true, this dataset returns a boolean array
+            that indicates whether bounding boxes are labeled as crowded
+            or not. The default value is :obj:`False`.
+
+    This dataset returns the following data.
+
+    .. csv-table::
+        :header: name, shape, dtype, format
+
+        :obj:`img`, ":math:`(3, H, W)`", :obj:`float32`, \
+        "RGB, :math:`[0, 255]`"
+        :obj:`point` [#coco_point_1]_, ":math:`(R, K, 2)`", :obj:`float32`, \
+        ":math:`(y, x)`"
+        :obj:`valid` [#coco_point_1]_, ":math:`(R, K)`", :obj:`bool`, \
+        "true when a keypoint is visible."
+        :obj:`bbox` [#coco_point_1]_, ":math:`(R, 4)`", :obj:`float32`, \
+        ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
+        :obj:`label` [#coco_point_1]_, ":math:`(R,)`", :obj:`int32`, \
+        ":math:`[0, \#fg\_class - 1]`"
+        :obj:`area` [#coco_point_1]_ [#coco_point_2]_, ":math:`(R,)`", \
+        :obj:`float32`, --
+        :obj:`crowded` [#coco_point_3]_, ":math:`(R,)`", :obj:`bool`, --
+
+    .. [#coco_point_1] If :obj:`use_crowded = True`, :obj:`point`, \
+        :obj:`valid`, :obj:`bbox`, \
+        :obj:`label` and :obj:`area` contain crowded instances.
+    .. [#coco_point_2] :obj:`area` is available \
+        if :obj:`return_area = True`.
+    .. [#coco_point_3] :obj:`crowded` is available \
+        if :obj:`return_crowded = True`.
+
+    """
+
     def __init__(self, data_dir='auto', split='train', year='2017',
                  use_crowded=False,
                  return_area=False, return_crowded=False):
+        if split not in ['train', 'val']:
+            raise ValueError('Unsupported split is given.')
         super(COCOKeypointDataset, self).__init__()
         self.use_crowded = use_crowded
         if data_dir == 'auto':
diff --git a/chainercv/visualizations/vis_keypoint_coco.py b/chainercv/visualizations/vis_keypoint_coco.py
index f750d23378..d977a59bf9 100644
--- a/chainercv/visualizations/vis_keypoint_coco.py
+++ b/chainercv/visualizations/vis_keypoint_coco.py
@@ -46,7 +46,48 @@ def vis_keypoint_coco(
         img, point, valid=None,
         point_score=None, thresh=2,
         markersize=3, linewidth=1, ax=None):
-    """Visualize bounding boxes inside image.
+    """Visualize keypoints organized as in COCO.
+
+    Example:
+
+        >>> from chainercv.datasets import COCOKeypointDataset
+        >>> from chainercv.visualizations import vis_keypoint_coco
+        >>> import matplotlib.pyplot as plt
+        >>> data = COCOKeypointDataset(split='val')
+        >>> img, point, valid = data[10][:3]
+        >>> vis_keypoint_coco(img, point, valid)
+        >>> plt.show()
+
+    Args:
+        img (~numpy.ndarray): See the table below.
+            If this is :obj:`None`, no image is displayed.
+        point (~numpy.ndarray): See the table below.
+        valid (~numpy.ndarray): See the table below. If this is
+            :obj:`None`, all points are assumed to be visible.
+        point_score (~numpy.ndarray): See the table below. If this
+            is :obj:`None`, the confidence of all points is infinitely
+            large.
+        thresh (float): Points with confidence below :obj:`thresh` are
+            not visualized.
+        markersize (float): The size of vertices.
+        linewidth (float): The thickness of edges.
+        ax (matplotlib.axes.Axis): The visualization is displayed on this
+            axis. If this is :obj:`None` (default), a new axis is created.
+
+    .. csv-table::
+        :header: name, shape, dtype, format
+
+        :obj:`img`, ":math:`(3, H, W)`", :obj:`float32`, \
+        "RGB, :math:`[0, 255]`"
+        :obj:`point`, ":math:`(R, K, 2)`", :obj:`float32`, \
+        ":math:`(y, x)`"
+        :obj:`valid`, ":math:`(R, K)`", :obj:`bool`, \
+        "true when a keypoint is visible."
+        :obj:`point_score`, ":math:`(R, K)`", :obj:`float32`, --
+
+    Returns:
+        ~matploblib.axes.Axes:
+        Returns the Axes object with the plot for further tweaking.
 
     """
     if valid.dtype != np.bool:
diff --git a/docs/source/reference/datasets.rst b/docs/source/reference/datasets.rst
index ebf878354e..276c3249d9 100644
--- a/docs/source/reference/datasets.rst
+++ b/docs/source/reference/datasets.rst
@@ -73,6 +73,10 @@ COCOInstanceSegmentationDataset
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autoclass:: COCOInstanceSegmentationDataset
 
+COCOKeypointDataset
+~~~~~~~~~~~~~~~~~~~
+.. autoclass:: COCOKeypointDataset
+
 COCOSemanticSegmentationDataset
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autoclass:: COCOSemanticSegmentationDataset
diff --git a/docs/source/reference/visualizations.rst b/docs/source/reference/visualizations.rst
index 685b498e43..c316209839 100644
--- a/docs/source/reference/visualizations.rst
+++ b/docs/source/reference/visualizations.rst
@@ -12,6 +12,10 @@ vis_image
 ~~~~~~~~~
 .. autofunction:: vis_image
 
+vis_keypoint_coco
+~~~~~~~~~~~~~~~~~
+.. autofunction:: vis_keypoint_coco
+
 vis_instance_segmentation
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autofunction:: vis_instance_segmentation

From 98301e3390fa74e39b26dd0717aed3268a263002 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 20 Feb 2019 11:16:01 +0900
Subject: [PATCH 028/100] add test for vis_keypoint_coco

---
 chainercv/visualizations/vis_keypoint_coco.py | 11 ++-
 .../test_vis_keypoint_coco.py                 | 97 +++++++++++++++++++
 2 files changed, 105 insertions(+), 3 deletions(-)
 create mode 100644 tests/visualizations_tests/test_vis_keypoint_coco.py

diff --git a/chainercv/visualizations/vis_keypoint_coco.py b/chainercv/visualizations/vis_keypoint_coco.py
index d977a59bf9..58624fe35c 100644
--- a/chainercv/visualizations/vis_keypoint_coco.py
+++ b/chainercv/visualizations/vis_keypoint_coco.py
@@ -90,9 +90,6 @@ def vis_keypoint_coco(
         Returns the Axes object with the plot for further tweaking.
 
     """
-    if valid.dtype != np.bool:
-        raise ValueError('The dtype of `valid` should be np.bool')
-
     from matplotlib import pyplot as plt
 
     # Returns newly instantiated matplotlib.axes.Axes object if ax is None
@@ -103,8 +100,16 @@ def vis_keypoint_coco(
 
     if point_score is None:
         point_score = np.inf * np.ones(point.shape[:2], dtype=np.float32)
+    if point_score.shape != point.shape[:2]:
+        raise ValueError('Mismatch in the number of instances or joints.')
+    if point.shape[1:] != (len(coco_keypoint_names[human_id]), 2):
+        raise ValueError('point has invalid shape')
 
     if valid is not None:
+        if valid.dtype != np.bool:
+            raise ValueError('The dtype of `valid` should be np.bool')
+        if valid.shape != point.shape[:2]:
+            raise ValueError('Mismatch in the number of instances or joints.')
         for i, vld in enumerate(valid):
             point_score[i, np.logical_not(vld)] = -np.inf
 
diff --git a/tests/visualizations_tests/test_vis_keypoint_coco.py b/tests/visualizations_tests/test_vis_keypoint_coco.py
new file mode 100644
index 0000000000..0a80260ec2
--- /dev/null
+++ b/tests/visualizations_tests/test_vis_keypoint_coco.py
@@ -0,0 +1,97 @@
+import unittest
+
+import numpy as np
+
+from chainer import testing
+
+from chainercv.datasets import coco_keypoint_names
+from chainercv.visualizations import vis_keypoint_coco
+
+try:
+    import matplotlib  # NOQA
+    _available = True
+except ImportError:
+    _available = False
+
+
+human_id = 0
+
+
+def _generate_point(n_inst, size):
+    H, W = size
+    n_joint = len(coco_keypoint_names[human_id])
+    ys = np.random.uniform(0, H, size=(n_inst, n_joint))
+    xs = np.random.uniform(0, W, size=(n_inst, n_joint))
+    point = np.stack((ys, xs), axis=2).astype(np.float32)
+
+    valid = np.random.randint(0, 2, size=(n_inst, n_joint)).astype(np.bool)
+
+    point_score = np.random.uniform(
+        0, 6, size=(n_inst, n_joint)).astype(np.float32)
+    return point, valid, point_score
+
+
+@testing.parameterize(*testing.product({
+    'n_inst': [3, 0],
+    'use_img': [False, True],
+    'use_valid': [False, True],
+    'use_point_score': [False, True]
+}))
+@unittest.skipUnless(_available, 'matplotlib is not installed')
+class TestVisKeypointCOCO(unittest.TestCase):
+
+    def setUp(self):
+        size = (32, 48)
+        self.point, valid, point_score = _generate_point(self.n_inst, size)
+        self.img = (np.random.randint(
+            0, 255, size=(3,) + size).astype(np.float32)
+            if self.use_img else None)
+        self.valid = valid if self.use_valid else None
+        self.point_score = point_score if self.use_point_score else None
+
+    def test_vis_keypoint_coco(self):
+        ax = vis_keypoint_coco(
+            self.img, self.point, self.valid,
+            self.point_score)
+
+        self.assertIsInstance(ax, matplotlib.axes.Axes)
+
+
+@unittest.skipUnless(_available, 'matplotlib is not installed')
+class TestVisKeypointCOCOInvalidInputs(unittest.TestCase):
+
+    def setUp(self):
+        size = (32, 48)
+        n_inst = 10
+        self.point, self.valid, self.point_score = _generate_point(
+            n_inst, size)
+        self.img = np.random.randint(
+            0, 255, size=(3,) + size).astype(np.float32)
+
+    def _check(self, img, point, valid, point_score):
+        with self.assertRaises(ValueError):
+            vis_keypoint_coco(img, point, valid, point_score)
+
+    def test_invalid_n_inst_point(self):
+        self._check(self.img, self.point[:5], self.valid, self.point_score)
+
+    def test_invalid_n_inst_valid(self):
+        self._check(self.img, self.point, self.valid[:5], self.point_score)
+
+    def test_invalid_n_inst_point_score(self):
+        self._check(self.img, self.point, self.valid, self.point_score[:5])
+
+    def test_invalid_n_joint_point(self):
+        self._check(self.img, self.point[:, :15], self.valid, self.point_score)
+
+    def test_invalid_n_joint_valid(self):
+        self._check(self.img, self.point, self.valid[:, :15], self.point_score)
+
+    def test_invalid_n_joint_point_score(self):
+        self._check(self.img, self.point, self.valid, self.point_score[:, :15])
+
+    def test_invalid_valid_dtype(self):
+        self._check(self.img, self.point, self.valid.astype(np.int32),
+                    self.point_score)
+
+testing.run_module(__name__, __file__)

From b63254e4977608e32e7cb2d80868b322a526ed2f Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 20 Feb 2019 14:57:26 +0900
Subject: [PATCH 029/100] change name and add test

---
 chainercv/evaluations/__init__.py             |   2 +-
 ...oco.py => eval_keypoint_detection_coco.py} |  17 +-
 docs/source/reference/evaluations.rst         |   4 +
 .../test_eval_keypoint_detection_coco.py      | 171 ++++++++++++++++++
 .../evaluations_tests/test_eval_point_coco.py | 136 --------------
 5 files changed, 186 insertions(+), 144 deletions(-)
 rename chainercv/evaluations/{eval_point_coco.py => eval_keypoint_detection_coco.py} (93%)
 create mode 100644 tests/evaluations_tests/test_eval_keypoint_detection_coco.py
 delete mode 100644 tests/evaluations_tests/test_eval_point_coco.py

diff --git a/chainercv/evaluations/__init__.py b/chainercv/evaluations/__init__.py
index b3937cebfd..53017c6bb1 100644
--- a/chainercv/evaluations/__init__.py
+++ b/chainercv/evaluations/__init__.py
@@ -5,7 +5,7 @@
 from chainercv.evaluations.eval_instance_segmentation_coco import eval_instance_segmentation_coco  # NOQA
 from chainercv.evaluations.eval_instance_segmentation_voc import calc_instance_segmentation_voc_prec_rec  # NOQA
 from chainercv.evaluations.eval_instance_segmentation_voc import eval_instance_segmentation_voc  # NOQA
-from chainercv.evaluations.eval_point_coco import eval_point_coco  # NOQA
+from chainercv.evaluations.eval_keypoint_detection_coco import eval_keypoint_detection_coco  # NOQA
 from chainercv.evaluations.eval_semantic_segmentation import calc_semantic_segmentation_confusion  # NOQA
 from chainercv.evaluations.eval_semantic_segmentation import calc_semantic_segmentation_iou  # NOQA
 from chainercv.evaluations.eval_semantic_segmentation import eval_semantic_segmentation  # NOQA
diff --git a/chainercv/evaluations/eval_point_coco.py b/chainercv/evaluations/eval_keypoint_detection_coco.py
similarity index 93%
rename from chainercv/evaluations/eval_point_coco.py
rename to chainercv/evaluations/eval_keypoint_detection_coco.py
index 68f3e00975..50e573ed57 100644
--- a/chainercv/evaluations/eval_point_coco.py
+++ b/chainercv/evaluations/eval_keypoint_detection_coco.py
@@ -14,9 +14,10 @@
     _available = False
 
 
-def eval_point_coco(pred_points, pred_labels, pred_scores,
-                    gt_points, gt_is_valids, gt_bboxes, gt_labels,
-                    gt_areas, gt_crowdeds=None):
+def eval_keypoint_detection_coco(
+        pred_points, pred_labels, pred_scores,
+        gt_points, gt_valids, gt_bboxes=None, gt_labels=None,
+        gt_areas=None, gt_crowdeds=None):
     if not _available:
         raise ValueError(
             'Please install pycocotools \n'
@@ -30,10 +31,10 @@ def eval_point_coco(pred_points, pred_labels, pred_scores,
     pred_labels = iter(pred_labels)
     pred_scores = iter(pred_scores)
     gt_points = iter(gt_points)
-    gt_is_valids = iter(gt_is_valids)
-    gt_bboxes = iter(gt_bboxes)
+    gt_valids = iter(gt_valids)
+    gt_bboxes = (iter(gt_bboxes) if gt_bboxes is not None
+                 else itertools.repeat(None))
     gt_labels = iter(gt_labels)
-
     if gt_areas is None:
         compute_area_dependent_metrics = False
         gt_areas = itertools.repeat(None)
@@ -51,8 +52,10 @@ def eval_point_coco(pred_points, pred_labels, pred_scores,
             gt_bbox, gt_label,
             gt_area, gt_crowded) in enumerate(six.moves.zip(
                 pred_points, pred_labels, pred_scores,
-                gt_points, gt_is_valids, gt_bboxes, gt_labels,
+                gt_points, gt_valids, gt_bboxes, gt_labels,
                 gt_areas, gt_crowdeds)):
+        if gt_bbox is None:
+            gt_bbox = itertools.repeat(None)
         if gt_area is None:
             gt_area = itertools.repeat(None)
         if gt_crowded is None:
diff --git a/docs/source/reference/evaluations.rst b/docs/source/reference/evaluations.rst
index 2befc38e47..553f1b52f6 100644
--- a/docs/source/reference/evaluations.rst
+++ b/docs/source/reference/evaluations.rst
@@ -45,6 +45,10 @@ calc_instance_segmentation_voc_prec_rec
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autofunction:: calc_instance_segmentation_voc_prec_rec
 
+Keypoint Detection COCO
+-----------------------
+.. autofunction:: eval_keypoint_detection_coco
+
 Semantic Segmentation IoU
 -------------------------
 
diff --git a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
new file mode 100644
index 0000000000..4e3ece2955
--- /dev/null
+++ b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
@@ -0,0 +1,171 @@
+import numpy as np
+import os
+from six.moves.urllib import request
+import unittest
+
+from chainer import testing
+
+from chainercv.datasets import coco_keypoint_names
+from chainercv.evaluations import eval_keypoint_detection_coco
+
+try:
+    import pycocotools  # NOQA
+    _available = True
+except ImportError:
+    _available = False
+
+
+human_id = 0
+
+
+def _generate_point(n_inst, size):
+    H, W = size
+    n_joint = len(coco_keypoint_names[human_id])
+    ys = np.random.uniform(0, H, size=(n_inst, n_joint))
+    xs = np.random.uniform(0, W, size=(n_inst, n_joint))
+    point = np.stack((ys, xs), axis=2).astype(np.float32)
+
+    valid = np.random.randint(0, 2, size=(n_inst, n_joint)).astype(np.bool)
+    return point, valid
+
+
+@unittest.skipUnless(_available, 'pycocotools is not installed')
+class TestEvalPointCOCOSingleClass(unittest.TestCase):
+
+    n_inst = 3
+
+    def setUp(self):
+        self.pred_points = []
+        self.pred_labels = []
+        self.pred_scores = []
+        self.gt_points = []
+        self.gt_valids = []
+        self.gt_bboxes = []
+        self.gt_labels = []
+        for i in range(2):
+            point, valid = _generate_point(self.n_inst, (32, 48))
+            self.pred_points.append(point)
+            self.pred_labels.append(np.zeros((self.n_inst,), dtype=np.int32))
+            self.pred_scores.append(np.random.uniform(
+                0.5, 1, size=(self.n_inst,)).astype(np.float32))
+            self.gt_points.append(point)
+            self.gt_valids.append(valid)
+            bbox = np.zeros((self.n_inst, 4), dtype=np.float32)
+            for i, pnt in enumerate(point):
+                y_min = np.min(pnt[:, 0])
+                x_min = np.min(pnt[:, 1])
+                y_max = np.max(pnt[:, 0])
+                x_max = np.max(pnt[:, 1])
+                bbox[i] = [y_min, x_min, y_max, x_max]
+            self.gt_bboxes.append(bbox)
+            self.gt_labels.append(np.zeros((self.n_inst,), dtype=np.int32))
+
+    def _check(self, result):
+        self.assertEqual(result['map/iou=0.50:0.95/area=all/max_dets=20'], 1)
+        self.assertEqual(result['map/iou=0.50/area=all/max_dets=20'], 1)
+        self.assertEqual(result['map/iou=0.75/area=all/max_dets=20'], 1)
+        self.assertEqual(result['mar/iou=0.50:0.95/area=all/max_dets=20'], 1)
+        self.assertEqual(result['mar/iou=0.50/area=all/max_dets=20'], 1)
+        self.assertEqual(result['mar/iou=0.75/area=all/max_dets=20'], 1)
+
+    def test_gt_bboxes_not_supplied(self):
+        result = eval_keypoint_detection_coco(
+            self.pred_points, self.pred_labels, self.pred_scores,
+            self.gt_points, self.gt_valids, None, self.gt_labels)
+        self._check(result)
+
+    def test_area_not_supplied(self):
+        result = eval_keypoint_detection_coco(
+            self.pred_points, self.pred_labels, self.pred_scores,
+            self.gt_points, self.gt_valids, self.gt_bboxes, self.gt_labels)
+        self._check(result)
+
+        self.assertFalse(
+            'map/iou=0.50:0.95/area=medium/max_dets=20' in result)
+        self.assertFalse(
+            'map/iou=0.50:0.95/area=large/max_dets=20' in result)
+        self.assertFalse(
+            'mar/iou=0.50:0.95/area=medium/max_dets=20' in result)
+        self.assertFalse(
+            'mar/iou=0.50:0.95/area=large/max_dets=20' in result)
+
+    def test_area_supplied(self):
+        gt_areas = [[100] * self.n_inst for _ in range(2)]
+        result = eval_keypoint_detection_coco(
+            self.pred_points, self.pred_labels, self.pred_scores,
+            self.gt_points, self.gt_valids, self.gt_bboxes, self.gt_labels,
+            gt_areas=gt_areas,
+        )
+        self._check(result)
+        self.assertTrue(
+            'map/iou=0.50:0.95/area=medium/max_dets=20' in result)
+        self.assertTrue(
+            'map/iou=0.50:0.95/area=large/max_dets=20' in result)
+        self.assertTrue(
+            'mar/iou=0.50:0.95/area=medium/max_dets=20' in result)
+        self.assertTrue(
+            'mar/iou=0.50:0.95/area=large/max_dets=20' in result)
+
+    def test_crowded_supplied(self):
+        gt_crowdeds = [[True] * self.n_inst for _ in range(2)]
+        result = eval_keypoint_detection_coco(
+            self.pred_points, self.pred_labels, self.pred_scores,
+            self.gt_points, self.gt_valids, self.gt_bboxes, self.gt_labels,
+            gt_crowdeds=gt_crowdeds,
+        )
+        # When the only ground truth is crowded, nothing is evaluated.
+        # In that case, all the results are nan.
+        self.assertTrue(
+            np.isnan(result['map/iou=0.50:0.95/area=all/max_dets=20']))
+
+
+@unittest.skipUnless(_available, 'pycocotools is not installed')
+class TestEvalKeypointDetectionCOCO(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        base_url = 'https://chainercv-models.preferred.jp/tests'
+
+        cls.dataset = np.load(request.urlretrieve(os.path.join(
+            base_url,
+            'eval_keypoint_detection_coco_dataset_2019_02_20.npz'))[0])
+        cls.result = np.load(request.urlretrieve(os.path.join(
+            base_url,
+            'eval_keypoint_detection_coco_result_2019_02_20.npz'))[0])
+
+    def test_eval_keypoint_detection_coco(self):
+        pred_points = self.result['points']
+        pred_labels = self.result['labels']
+        pred_scores = self.result['scores']
+
+        gt_points = self.dataset['points']
+        gt_valids = self.dataset['valids']
+        gt_bboxes = self.dataset['bboxes']
+        gt_labels = self.dataset['labels']
+        gt_areas = self.dataset['areas']
+        gt_crowdeds = self.dataset['crowdeds']
+
+        result = eval_keypoint_detection_coco(
+            pred_points, pred_labels, pred_scores,
+            gt_points, gt_valids, gt_bboxes,
+            gt_labels, gt_areas, gt_crowdeds)
+
+        expected = {
+            'map/iou=0.50:0.95/area=all/max_dets=20': 0.37733572721481323,
+            'map/iou=0.50/area=all/max_dets=20': 0.6448841691017151,
+            'map/iou=0.75/area=all/max_dets=20': 0.35469090938568115,
+            'map/iou=0.50:0.95/area=medium/max_dets=20': 0.3894105851650238,
+            'map/iou=0.50:0.95/area=large/max_dets=20': 0.39169296622276306,
+            'mar/iou=0.50:0.95/area=all/max_dets=20': 0.5218977928161621,
+            'mar/iou=0.50/area=all/max_dets=20': 0.7445255517959595,
+            'mar/iou=0.75/area=all/max_dets=20': 0.510948896408081,
+            'mar/iou=0.50:0.95/area=medium/max_dets=20': 0.5150684714317322,
+            'mar/iou=0.50:0.95/area=large/max_dets=20': 0.5296875238418579,
+        }
+
+        for key, item in expected.items():
+            np.testing.assert_almost_equal(
+                result[key], expected[key], decimal=5)
+
+
+testing.run_module(__name__, __file__)
diff --git a/tests/evaluations_tests/test_eval_point_coco.py b/tests/evaluations_tests/test_eval_point_coco.py
deleted file mode 100644
index bc2095eefd..0000000000
--- a/tests/evaluations_tests/test_eval_point_coco.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import numpy as np
-import os
-from six.moves.urllib import request
-import unittest
-
-from chainer import testing
-
-from chainercv.evaluations import eval_point_coco
-
-try:
-    import pycocotools  # NOQA
-    _available = True
-except ImportError:
-    _available = False
-
-
-# @unittest.skipUnless(_available, 'pycocotools is not installed')
-# class TestEvalPointCOCOSingleClass(unittest.TestCase):
-# 
-#     def setUp(self):
-#         self.pred_bboxes = np.array([[[0, 0, 10, 10], [0, 0, 20, 20]]])
-#         self.pred_labels = np.array([[0, 0]])
-#         self.pred_scores = np.array([[0.8, 0.9]])
-#         self.gt_bboxes = np.array([[[0, 0, 10, 9]]])
-#         self.gt_labels = np.array([[0, 0]])
-# 
-#     def test_crowded(self):
-#         result = eval_detection_coco(self.pred_bboxes, self.pred_labels,
-#                                      self.pred_scores,
-#                                      self.gt_bboxes, self.gt_labels,
-#                                      gt_crowdeds=[[True]])
-#         # When the only ground truth is crowded, nothing is evaluated.
-#         # In that case, all the results are nan.
-#         self.assertTrue(
-#             np.isnan(result['map/iou=0.50:0.95/area=all/max_dets=100']))
-#         self.assertTrue(
-#             np.isnan(result['map/iou=0.50/area=all/max_dets=100']))
-#         self.assertTrue(
-#             np.isnan(result['map/iou=0.75/area=all/max_dets=100']))
-# 
-#     def test_area_not_supplied(self):
-#         result = eval_detection_coco(self.pred_bboxes, self.pred_labels,
-#                                      self.pred_scores,
-#                                      self.gt_bboxes, self.gt_labels)
-#         self.assertFalse(
-#             'map/iou=0.50:0.95/area=small/max_dets=100' in result)
-#         self.assertFalse(
-#             'map/iou=0.50:0.95/area=medium/max_dets=100' in result)
-#         self.assertFalse(
-#             'map/iou=0.50:0.95/area=large/max_dets=100' in result)
-# 
-#     def test_area_specified(self):
-#         result = eval_detection_coco(self.pred_bboxes, self.pred_labels,
-#                                      self.pred_scores,
-#                                      self.gt_bboxes, self.gt_labels,
-#                                      gt_areas=[[2048]])
-#         self.assertFalse(
-#             np.isnan(result['map/iou=0.50:0.95/area=medium/max_dets=100']))
-#         self.assertTrue(
-#             np.isnan(result['map/iou=0.50:0.95/area=small/max_dets=100']))
-#         self.assertTrue(
-#             np.isnan(result['map/iou=0.50:0.95/area=large/max_dets=100']))
-
-
-# @unittest.skipUnless(_available, 'pycocotools is not installed')
-# class TestEvalPointCOCOSomeClassNonExistent(unittest.TestCase):
-# 
-#     def setUp(self):
-#         self.pred_bboxes = np.array([[[0, 0, 10, 10], [0, 0, 20, 20]]])
-#         self.pred_labels = np.array([[1, 2]])
-#         self.pred_scores = np.array([[0.8, 0.9]])
-#         self.gt_bboxes = np.array([[[0, 0, 10, 9]]])
-#         self.gt_labels = np.array([[1, 2]])
-# 
-#     def test(self):
-#         result = eval_detection_coco(self.pred_bboxes, self.pred_labels,
-#                                      self.pred_scores,
-#                                      self.gt_bboxes, self.gt_labels)
-#         self.assertEqual(
-#             result['ap/iou=0.50:0.95/area=all/max_dets=100'].shape, (3,))
-#         self.assertTrue(
-#             np.isnan(result['ap/iou=0.50:0.95/area=all/max_dets=100'][0]))
-#         self.assertEqual(
-#             np.nanmean(result['ap/iou=0.50:0.95/area=all/max_dets=100'][1:]),
-#             result['map/iou=0.50:0.95/area=all/max_dets=100'])
-# 
-
-@unittest.skipUnless(_available, 'pycocotools is not installed')
-class TestEvalPointCOCO(unittest.TestCase):
-
-    @classmethod
-    def setUpClass(cls):
-        base_url = 'https://chainercv-models.preferred.jp/tests'
-
-        cls.dataset = np.load(request.urlretrieve(os.path.join(
-            base_url, 'eval_point_coco_dataset_2019_02_18.npz'))[0])
-        cls.result = np.load(request.urlretrieve(os.path.join(
-            base_url, 'eval_point_coco_result_2019_02_18.npz'))[0])
-
-    def test_eval_detection_coco(self):
-        pred_points = self.result['points']
-        pred_labels = self.result['labels']
-        pred_scores = self.result['scores']
-
-        gt_points = self.dataset['points']
-        gt_is_valids = self.dataset['is_valids']
-        gt_bboxes = self.dataset['bboxes']
-        gt_labels = self.dataset['labels']
-        gt_areas = self.dataset['areas']
-        gt_crowdeds = self.dataset['crowdeds']
-
-        result = eval_point_coco(
-            pred_points, pred_labels, pred_scores,
-            gt_points, gt_is_valids, gt_bboxes,
-            gt_labels, gt_areas, gt_crowdeds)
-
-
-        expected = {
-            'map/iou=0.50:0.95/area=all/max_dets=20': 0.37733572721481323,
-            'map/iou=0.50/area=all/max_dets=20': 0.6448841691017151,
-            'map/iou=0.75/area=all/max_dets=20': 0.35469090938568115,
-            'map/iou=0.50:0.95/area=medium/max_dets=20': 0.3894105851650238,
-            'map/iou=0.50:0.95/area=large/max_dets=20': 0.39169296622276306,
-            'mar/iou=0.50:0.95/area=all/max_dets=20': 0.5218977928161621,
-            'mar/iou=0.50/area=all/max_dets=20': 0.7445255517959595,
-            'mar/iou=0.75/area=all/max_dets=20': 0.510948896408081,
-            'mar/iou=0.50:0.95/area=medium/max_dets=20': 0.5150684714317322,
-            'mar/iou=0.50:0.95/area=large/max_dets=20': 0.5296875238418579,
-        }
-
-        for key, item in expected.items():
-            np.testing.assert_almost_equal(
-                result[key], expected[key], decimal=5)
-
-
-testing.run_module(__name__, __file__)

From b1f6454abcb5fa4242524b1a58a7cbb359ee012d Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 20 Feb 2019 15:37:43 +0900
Subject: [PATCH 030/100] update doc

---
 .../eval_keypoint_detection_coco.py           | 116 ++++++++++++++++++
 1 file changed, 116 insertions(+)

diff --git a/chainercv/evaluations/eval_keypoint_detection_coco.py b/chainercv/evaluations/eval_keypoint_detection_coco.py
index 50e573ed57..6ff5692bc9 100644
--- a/chainercv/evaluations/eval_keypoint_detection_coco.py
+++ b/chainercv/evaluations/eval_keypoint_detection_coco.py
@@ -18,6 +18,122 @@ def eval_keypoint_detection_coco(
         pred_points, pred_labels, pred_scores,
         gt_points, gt_valids, gt_bboxes=None, gt_labels=None,
         gt_areas=None, gt_crowdeds=None):
+    """Evaluate keypoint detection based on evaluation code of MS COCO.
+
+    This function evaluates predicted keypints obtained by using average
+    precision for each class.
+    The code is based on the evaluation code used in MS COCO.
+
+    Args:
+        pred_points (iterable of numpy.ndarray): See the table below.
+        pred_labels (iterable of numpy.ndarray): See the table below.
+        pred_scores (iterable of numpy.ndarray): See the table below.
+            This is used to rank instances. Note that this is not
+            the confidene for each keypoint.
+        gt_points (iterable of numpy.ndarray): See the table below.
+        gt_valids (iterable of numpy.ndarray): See the table below.
+        gt_bboxes (iterable of numpy.ndarray): See the table below.
+            This is optional. If this is :obj:`None`, the ground truth
+            bounding boxes are esitmated from the ground truth
+            keypoints.
+        gt_labels (iterable of numpy.ndarray): See the table below.
+        gt_areas (iterable of numpy.ndarray): See the table below. If
+            :obj:`None`, some scores are not returned.
+        gt_crowdeds (iterable of numpy.ndarray): See the table below.
+
+    .. csv-table::
+        :header: name, shape, dtype, format
+
+        :obj:`pred_points`, ":math:`[(R, K, 2)]`", :obj:`float32`, \
+        ":math:`(y, x)`"
+        :obj:`pred_labels`, ":math:`[(R,)]`", :obj:`int32`, \
+        ":math:`[0, \#fg\_class - 1]`"
+        :obj:`pred_scores`, ":math:`[(R,)]`", :obj:`float32`, \
+        --
+        :obj:`gt_points`, ":math:`[(R, K, 2)]`", :obj:`float32`, \
+        ":math:`(y, x)`"
+        :obj:`gt_valids`, ":math:`[(R, K)]`", :obj:`bool`, --
+        :obj:`gt_bboxes`, ":math:`[(R, 4)]`", :obj:`float32`, \
+        ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
+        :obj:`gt_labels`, ":math:`[(R,)]`", :obj:`int32`, \
+        ":math:`[0, \#fg\_class - 1]`"
+        :obj:`gt_areas`, ":math:`[(R,)]`", \
+        :obj:`float32`, --
+        :obj:`gt_crowdeds`, ":math:`[(R,)]`", :obj:`bool`, --
+
+
+    Returns:
+        dict:
+
+        The keys, value-types and the description of the values are listed
+        below. The APs and ARs calculated with different iou
+        thresholds, sizes of objects, and numbers of detections
+        per image. For more details on the 12 patterns of evaluation metrics,
+        please refer to COCO's official `evaluation page`_.
+
+        .. csv-table::
+            :header: key, type, description
+
+            ap/iou=0.50:0.95/area=all/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_1]_
+            ap/iou=0.50/area=all/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_1]_
+            ap/iou=0.75/area=all/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_1]_
+            ap/iou=0.50:0.95/area=medium/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_1]_ [#coco_kp_eval_5]_
+            ap/iou=0.50:0.95/area=large/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_1]_ [#coco_kp_eval_5]_
+            ar/iou=0.50:0.95/area=all/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_2]_
+            ar/iou=0.50/area=all/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_2]_
+            ar/iou=0.75/area=all/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_2]_
+            ar/iou=0.50:0.95/area=medium/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_2]_ [#coco_kp_eval_5]_
+            ar/iou=0.50:0.95/area=large/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_2]_ [#coco_kp_eval_5]_
+            map/iou=0.50:0.95/area=all/max_dets=20, *float*, \
+                [#coco_kp_eval_3]_
+            map/iou=0.50/area=all/max_dets=20, *float*, \
+                [#coco_kp_eval_3]_
+            map/iou=0.75/area=all/max_dets=20, *float*, \
+                [#coco_kp_eval_3]_
+            map/iou=0.50:0.95/area=medium/max_dets=20, *float*, \
+                [#coco_kp_eval_3]_ [#coco_kp_eval_5]_
+            map/iou=0.50:0.95/area=large/max_dets=20, *float*, \
+                [#coco_kp_eval_3]_ [#coco_kp_eval_5]_
+            mar/iou=0.50:0.95/area=all/max_dets=20, *float*, \
+                [#coco_kp_eval_4]_
+            mar/iou=0.50/area=all/max_dets=20, *float*, \
+                [#coco_kp_eval_4]_
+            mar/iou=0.75/area=all/max_dets=20, *float*, \
+                [#coco_kp_eval_4]_
+            mar/iou=0.50:0.95/area=medium/max_dets=20, *float*, \
+                [#coco_kp_eval_4]_ [#coco_kp_eval_5]_
+            mar/iou=0.50:0.95/area=large/max_dets=20, *float*, \
+                [#coco_kp_eval_4]_ [#coco_kp_eval_5]_
+            coco_eval, *pycocotools.cocoeval.COCOeval*, \
+                result from :obj:`pycocotools`
+            existent_labels, *numpy.ndarray*, \
+                used labels \
+
+    .. [#coco_kp_eval_1] An array of average precisions. \
+        The :math:`l`-th value corresponds to the average precision \
+        for class :math:`l`. If class :math:`l` does not exist in \
+        either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \
+        value is set to :obj:`numpy.nan`.
+    .. [#coco_kp_eval_2] An array of average recalls. \
+        The :math:`l`-th value corresponds to the average precision \
+        for class :math:`l`. If class :math:`l` does not exist in \
+        either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \
+        value is set to :obj:`numpy.nan`.
+    .. [#coco_kp_eval_3] The average of average precisions over classes.
+    .. [#coco_kp_eval_4] The average of average recalls over classes.
+    .. [#coco_kp_eval_5] Skip if :obj:`gt_areas` is :obj:`None`.
+
+    """
     if not _available:
         raise ValueError(
             'Please install pycocotools \n'

From 9cc885ab04b19221b53ce47b6c8f053b25942fcf Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 20 Feb 2019 21:19:11 +0900
Subject: [PATCH 031/100] use Conv2DActiv

---
 chainercv/links/model/mask_rcnn/mask_head.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py
index 5f6e5c03d2..d1436785c1 100644
--- a/chainercv/links/model/mask_rcnn/mask_head.py
+++ b/chainercv/links/model/mask_rcnn/mask_head.py
@@ -11,6 +11,7 @@
 from chainer.initializers import HeNormal
 import chainer.links as L
 
+from chainercv.links import Conv2DActiv
 from chainercv.transforms.image.resize import resize
 from chainercv.utils.bbox.bbox_iou import bbox_iou
 
@@ -36,10 +37,10 @@ def __init__(self, n_class, scales):
 
         initialW = HeNormal(1, fan_option='fan_out')
         with self.init_scope():
-            self.conv1 = L.Convolution2D(256, 3, pad=1, initialW=initialW)
-            self.conv2 = L.Convolution2D(256, 3, pad=1, initialW=initialW)
-            self.conv3 = L.Convolution2D(256, 3, pad=1, initialW=initialW)
-            self.conv4 = L.Convolution2D(256, 3, pad=1, initialW=initialW)
+            self.conv1 = Conv2DActiv(256, 3, pad=1, initialW=initialW)
+            self.conv2 = Conv2DActiv(256, 3, pad=1, initialW=initialW)
+            self.conv3 = Conv2DActiv(256, 3, pad=1, initialW=initialW)
+            self.conv4 = Conv2DActiv(256, 3, pad=1, initialW=initialW)
             self.conv5 = L.Deconvolution2D(
                 256, 2, pad=0, stride=2, initialW=initialW)
             self.seg = L.Convolution2D(n_class, 1, pad=0, initialW=initialW)
@@ -66,10 +67,10 @@ def __call__(self, hs, rois, roi_indices):
             return segs
 
         h = F.concat(pooled_hs, axis=0)
-        h = F.relu(self.conv1(h))
-        h = F.relu(self.conv2(h))
-        h = F.relu(self.conv3(h))
-        h = F.relu(self.conv4(h))
+        h = self.conv1(h)
+        h = self.conv2(h)
+        h = self.conv3(h)
+        h = self.conv4(h)
         h = F.relu(self.conv5(h))
         return self.seg(h)
 

From 5ffd8b3d27ad4bf4d264a7982d3e6e03b849a27c Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 21 Feb 2019 16:40:54 +0900
Subject: [PATCH 032/100] change interface of point/transforms

---
 chainercv/transforms/point/flip_point.py      | 39 +++++++++++++------
 chainercv/transforms/point/resize_point.py    | 33 +++++++++++-----
 chainercv/transforms/point/translate_point.py | 30 +++++++++-----
 .../point_tests/test_flip_point.py            | 24 ++++++++++--
 .../point_tests/test_resize_point.py          | 18 +++++++--
 .../point_tests/test_translate_point.py       | 19 +++++++--
 6 files changed, 120 insertions(+), 43 deletions(-)

diff --git a/chainercv/transforms/point/flip_point.py b/chainercv/transforms/point/flip_point.py
index 104929e5bf..36e279ab7d 100644
--- a/chainercv/transforms/point/flip_point.py
+++ b/chainercv/transforms/point/flip_point.py
@@ -1,12 +1,11 @@
+import numpy as np
+
+
 def flip_point(point, size, y_flip=False, x_flip=False):
     """Modify points according to image flips.
 
     Args:
-        point (~numpy.ndarray): Points in the image.
-            The shape of this array is :math:`(P, 2)`. :math:`P` is the number
-            of points in the image.
-            The last dimension is composed of :math:`y` and :math:`x`
-            coordinates of the points.
+        point (~numpy.ndarray or list of arrays): See the table below.
         size (tuple): A tuple of length 2. The height and the width
             of the image, which is associated with the points.
         y_flip (bool): Modify points according to a vertical flip of
@@ -14,15 +13,31 @@ def flip_point(point, size, y_flip=False, x_flip=False):
         x_flip (bool): Modify keypoipoints according to a horizontal flip of
             an image.
 
+    .. csv-table::
+        :header: name, shape, dtype, format
+
+        :obj:`point`, ":math:`[(K, 2)]` or :math:`(R, K, 2)`", \
+        :obj:`float32`, ":math:`(y, x)`"
+
     Returns:
-        ~numpy.ndarray:
+        ~numpy.ndarray or list of arrays:
         Points modified according to image flips.
 
     """
     H, W = size
-    point = point.copy()
-    if y_flip:
-        point[:, 0] = H - point[:, 0]
-    if x_flip:
-        point[:, 1] = W - point[:, 1]
-    return point
+    if isinstance(point, np.ndarray):
+        out_point = point.copy()
+        if y_flip:
+            out_point[:, :, 0] = H - out_point[:, :, 0]
+        if x_flip:
+            out_point[:, :, 1] = W - out_point[:, :, 1]
+    else:
+        out_point = []
+        for pnt in point:
+            pnt = pnt.copy()
+            if y_flip:
+                pnt[:, 0] = H - pnt[:, 0]
+            if x_flip:
+                pnt[:, 1] = W - pnt[:, 1]
+            out_point.append(pnt)
+    return out_point
diff --git a/chainercv/transforms/point/resize_point.py b/chainercv/transforms/point/resize_point.py
index 0991fd4170..061efc0410 100644
--- a/chainercv/transforms/point/resize_point.py
+++ b/chainercv/transforms/point/resize_point.py
@@ -1,25 +1,38 @@
+import numpy as np
+
+
 def resize_point(point, in_size, out_size):
     """Adapt point coordinates to the rescaled image space.
 
     Args:
-        point (~numpy.ndarray): Points in the image.
-            The shape of this array is :math:`(P, 2)`. :math:`P` is the number
-            of points in the image.
-            The last dimension is composed of :math:`y` and :math:`x`
-            coordinates of the points.
+        point (~numpy.ndarray or list of arrays): See the table below.
         in_size (tuple): A tuple of length 2. The height and the width
             of the image before resized.
         out_size (tuple): A tuple of length 2. The height and the width
             of the image after resized.
 
+    .. csv-table::
+        :header: name, shape, dtype, format
+
+        :obj:`point`, ":math:`[(K, 2)]` or :math:`(R, K, 2)`", \
+        :obj:`float32`, ":math:`(y, x)`"
+
     Returns:
-        ~numpy.ndarray:
+        ~numpy.ndarray or list of arrays:
         Points rescaled according to the given image shapes.
 
     """
-    point = point.copy()
     y_scale = float(out_size[0]) / in_size[0]
     x_scale = float(out_size[1]) / in_size[1]
-    point[:, 0] = y_scale * point[:, 0]
-    point[:, 1] = x_scale * point[:, 1]
-    return point
+    if isinstance(point, np.ndarray):
+        out_point = point.copy()
+        out_point[:, :, 0] = y_scale * point[:, :, 0]
+        out_point[:, :, 1] = x_scale * point[:, :, 1]
+    else:
+        out_point = []
+        for pnt in point:
+            out_pnt = pnt.copy()
+            out_pnt[:, 0] = y_scale * pnt[:, 0]
+            out_pnt[:, 1] = x_scale * pnt[:, 1]
+            out_point.append(out_pnt)
+    return out_point
diff --git a/chainercv/transforms/point/translate_point.py b/chainercv/transforms/point/translate_point.py
index bd05f91244..c4a9e911bf 100644
--- a/chainercv/transforms/point/translate_point.py
+++ b/chainercv/transforms/point/translate_point.py
@@ -1,3 +1,6 @@
+import numpy as np
+
+
 def translate_point(point, y_offset=0, x_offset=0):
     """Translate points.
 
@@ -6,23 +9,32 @@ def translate_point(point, y_offset=0, x_offset=0):
     to the coordinate :math:`(y, x) = (y_{offset}, x_{offset})`.
 
     Args:
-        point (~numpy.ndarray): Points in the image.
-            The shape of this array is :math:`(P, 2)`. :math:`P` is the number
-            of points in the image.
-            The last dimension is composed of :math:`y` and :math:`x`
-            coordinates of the points.
+        point (~numpy.ndarray or list of arrays): See the table below.
         y_offset (int or float): The offset along y axis.
         x_offset (int or float): The offset along x axis.
 
+    .. csv-table::
+        :header: name, shape, dtype, format
+
+        :obj:`point`, ":math:`[(K, 2)]` or :math:`(R, K, 2)`", \
+        :obj:`float32`, ":math:`(y, x)`"
+
     Returns:
         ~numpy.ndarray:
         Points modified translation of an image.
 
     """
 
-    out_point = point.copy()
-
-    out_point[:, 0] += y_offset
-    out_point[:, 1] += x_offset
+    if isinstance(point, np.ndarray):
+        out_point = point.copy()
 
+        out_point[:, :, 0] += y_offset
+        out_point[:, :, 1] += x_offset
+    else:
+        out_point = []
+        for pnt in point:
+            out_pnt = pnt.copy()
+            out_pnt[:, 0] += y_offset
+            out_pnt[:, 1] += x_offset
+            out_point.append(out_pnt)
     return out_point
diff --git a/tests/transforms_tests/point_tests/test_flip_point.py b/tests/transforms_tests/point_tests/test_flip_point.py
index ac6dc4d690..f02ae8b33d 100644
--- a/tests/transforms_tests/point_tests/test_flip_point.py
+++ b/tests/transforms_tests/point_tests/test_flip_point.py
@@ -8,19 +8,35 @@
 
 class TestFlipPoint(unittest.TestCase):
 
-    def test_flip_point(self):
+    def test_flip_point_ndarray(self):
         point = np.random.uniform(
-            low=0., high=32., size=(12, 2))
+            low=0., high=32., size=(3, 12, 2))
 
         out = flip_point(point, size=(34, 32), y_flip=True)
         point_expected = point.copy()
-        point_expected[:, 0] = 34 - point[:, 0]
+        point_expected[:, :, 0] = 34 - point[:, :, 0]
         np.testing.assert_equal(out, point_expected)
 
         out = flip_point(point, size=(34, 32), x_flip=True)
         point_expected = point.copy()
-        point_expected[:, 1] = 32 - point[:, 1]
+        point_expected[:, :, 1] = 32 - point[:, :, 1]
         np.testing.assert_equal(out, point_expected)
 
+    def test_flip_point_list(self):
+        point = [np.random.uniform(
+            low=0., high=32., size=(12, 2))]
+
+        out = flip_point(point, size=(34, 32), y_flip=True)
+        for i, pnt in enumerate(point):
+            pnt_expected = pnt.copy()
+            pnt_expected[:, 0] = 34 - pnt[:, 0]
+            np.testing.assert_equal(out[i], pnt_expected)
+
+        out = flip_point(point, size=(34, 32), x_flip=True)
+        for i, pnt in enumerate(point):
+            pnt_expected = pnt.copy()
+            pnt_expected[:, 1] = 32 - pnt[:, 1]
+            np.testing.assert_equal(out[i], pnt_expected)
+
 
 testing.run_module(__name__, __file__)
diff --git a/tests/transforms_tests/point_tests/test_resize_point.py b/tests/transforms_tests/point_tests/test_resize_point.py
index a3fb7b172b..79ce01daff 100644
--- a/tests/transforms_tests/point_tests/test_resize_point.py
+++ b/tests/transforms_tests/point_tests/test_resize_point.py
@@ -8,14 +8,24 @@
 
 class TestResizePoint(unittest.TestCase):
 
-    def test_resize_point(self):
+    def test_resize_point_ndarray(self):
         point = np.random.uniform(
-            low=0., high=32., size=(12, 2))
+            low=0., high=32., size=(3, 12, 2))
 
         out = resize_point(point, in_size=(16, 32), out_size=(8, 64))
-        point[:, 0] *= 0.5
-        point[:, 1] *= 2
+        point[:, :, 0] *= 0.5
+        point[:, :, 1] *= 2
         np.testing.assert_equal(out, point)
 
+    def test_resize_point_list(self):
+        point = [np.random.uniform(
+            low=0., high=32., size=(12, 2))]
+
+        out = resize_point(point, in_size=(16, 32), out_size=(8, 64))
+        for i, pnt in enumerate(point):
+            pnt[:, 0] *= 0.5
+            pnt[:, 1] *= 2
+            np.testing.assert_equal(out[i], pnt)
+
 
 testing.run_module(__name__, __file__)
diff --git a/tests/transforms_tests/point_tests/test_translate_point.py b/tests/transforms_tests/point_tests/test_translate_point.py
index 1030bf22cb..8851d13e3d 100644
--- a/tests/transforms_tests/point_tests/test_translate_point.py
+++ b/tests/transforms_tests/point_tests/test_translate_point.py
@@ -8,15 +8,26 @@
 
 class TestTranslatePoint(unittest.TestCase):
 
-    def test_translate_point(self):
+    def test_translate_point_ndarray(self):
         point = np.random.uniform(
-            low=0., high=32., size=(10, 2))
+            low=0., high=32., size=(3, 10, 2))
 
         out = translate_point(point, y_offset=3, x_offset=5)
         expected = np.empty_like(point)
-        expected[:, 0] = point[:, 0] + 3
-        expected[:, 1] = point[:, 1] + 5
+        expected[:, :, 0] = point[:, :, 0] + 3
+        expected[:, :, 1] = point[:, :, 1] + 5
         np.testing.assert_equal(out, expected)
 
+    def test_translate_point_list(self):
+        point = [np.random.uniform(
+            low=0., high=32., size=(10, 2))]
+
+        out = translate_point(point, y_offset=3, x_offset=5)
+        for i, pnt in enumerate(point):
+            expected = np.empty_like(pnt)
+            expected[:, 0] = pnt[:,  0] + 3
+            expected[:, 1] = pnt[:,  1] + 5
+            np.testing.assert_equal(out[i], expected)
+
 
 testing.run_module(__name__, __file__)

From 45bd712a3a1ffbb16f8a3f2540539ada582b8ef9 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 21 Feb 2019 16:58:50 +0900
Subject: [PATCH 033/100] fix variable name

---
 .../datasets/coco/coco_keypoint_dataset.py    | 18 +++----
 chainercv/visualizations/vis_keypoint_coco.py | 22 ++++-----
 .../test_vis_keypoint_coco.py                 | 48 +++++++++----------
 3 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/chainercv/datasets/coco/coco_keypoint_dataset.py b/chainercv/datasets/coco/coco_keypoint_dataset.py
index f3d8c4434e..886eb2f252 100644
--- a/chainercv/datasets/coco/coco_keypoint_dataset.py
+++ b/chainercv/datasets/coco/coco_keypoint_dataset.py
@@ -43,7 +43,7 @@ class COCOKeypointDataset(GetterDataset):
         "RGB, :math:`[0, 255]`"
         :obj:`point` [#coco_point_1]_, ":math:`(R, K, 2)`", :obj:`float32`, \
         ":math:`(y, x)`"
-        :obj:`valid` [#coco_point_1]_, ":math:`(R, K)`", :obj:`bool`, \
+        :obj:`visible` [#coco_point_1]_, ":math:`(R, K)`", :obj:`bool`, \
         "true when a keypoint is visible."
         :obj:`bbox` [#coco_point_1]_, ":math:`(R, 4)`", :obj:`float32`, \
         ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
@@ -54,7 +54,7 @@ class COCOKeypointDataset(GetterDataset):
         :obj:`crowded` [#coco_point_3]_, ":math:`(R,)`", :obj:`bool`, --
 
     .. [#coco_point_1] If :obj:`use_crowded = True`, :obj:`point`, \
-        :obj:`valid`, :obj:`bbox`, \
+        :obj:`visible`, :obj:`bbox`, \
         :obj:`label` and :obj:`area` contain crowded instances.
     .. [#coco_point_2] :obj:`area` is available \
         if :obj:`return_area = True`.
@@ -95,9 +95,9 @@ def __init__(self, data_dir='auto', split='train', year='2017',
 
         self.add_getter('img', self._get_image)
         self.add_getter(
-            ['point', 'valid', 'bbox', 'label', 'area', 'crowded'],
+            ['point', 'visible', 'bbox', 'label', 'area', 'crowded'],
             self._get_annotations)
-        keys = ('img', 'point', 'valid', 'bbox', 'label')
+        keys = ('img', 'point', 'visible', 'bbox', 'label')
         if return_area:
             keys += ('area',)
         if return_crowded:
@@ -144,13 +144,13 @@ def _get_annotations(self, i):
             # 0: not labeled; 1: labeled, not inside mask;
             # 2: labeled and inside mask
             v = point[:, 2::3]
-            valid = v > 0
+            visible = v > 0
             point = np.stack((y, x), axis=2)
         else:
             point = np.empty((0, 0, 2), dtype=np.float32)
-            valid = np.empty((0, 0), dtype=np.bool)
+            visible = np.empty((0, 0), dtype=np.bool)
 
-        # Remove invalid boxes
+        # Remove invisible boxes
         bbox_area = np.prod(bbox[:, 2:] - bbox[:, :2], axis=1)
         keep_mask = np.logical_and(bbox[:, 0] <= bbox[:, 2],
                                    bbox[:, 1] <= bbox[:, 3])
@@ -160,9 +160,9 @@ def _get_annotations(self, i):
             keep_mask = np.logical_and(keep_mask, np.logical_not(crowded))
 
         point = point[keep_mask]
-        valid = valid[keep_mask]
+        visible = visible[keep_mask]
         bbox = bbox[keep_mask]
         label = label[keep_mask]
         area = area[keep_mask]
         crowded = crowded[keep_mask]
-        return point, valid, bbox, label, area, crowded
+        return point, visible, bbox, label, area, crowded
diff --git a/chainercv/visualizations/vis_keypoint_coco.py b/chainercv/visualizations/vis_keypoint_coco.py
index 58624fe35c..61f47e8a27 100644
--- a/chainercv/visualizations/vis_keypoint_coco.py
+++ b/chainercv/visualizations/vis_keypoint_coco.py
@@ -43,7 +43,7 @@
 
 
 def vis_keypoint_coco(
-        img, point, valid=None,
+        img, point, visible=None,
         point_score=None, thresh=2,
         markersize=3, linewidth=1, ax=None):
     """Visualize keypoints organized as in COCO.
@@ -54,15 +54,15 @@ def vis_keypoint_coco(
         >>> from chainercv.visualizations import vis_keypoint_coco
         >>> import matplotlib.pyplot as plt
         >>> data = COCOKeypointDataset(split='val')
-        >>> img, point, valid = data[10][:3]
-        >>> vis_keypoint_coco(img, point, valid)
+        >>> img, point, visible = data[10][:3]
+        >>> vis_keypoint_coco(img, point, visible)
         >>> plt.show()
 
     Args:
         img (~numpy.ndarray): See the table below.
             If this is :obj:`None`, no image is displayed.
         point (~numpy.ndarray): See the table below.
-        valid (~numpy.ndarray): See the table below. If this is
+        visible (~numpy.ndarray): See the table below. If this is
             :obj:`None`, all points are assumed to be visible.
         point_score (~numpy.ndarray): See the table below. If this
             is :obj:`None`, the confidence of all points is infinitely
@@ -81,7 +81,7 @@ def vis_keypoint_coco(
         "RGB, :math:`[0, 255]`"
         :obj:`point`, ":math:`(R, K, 2)`", :obj:`float32`, \
         ":math:`(y, x)`"
-        :obj:`valid`, ":math:`(R, K)`", :obj:`bool`, \
+        :obj:`visible`, ":math:`(R, K)`", :obj:`bool`, \
         "true when a keypoint is visible."
         :obj:`point_score`, ":math:`(R, K)`", :obj:`float32`, --
 
@@ -103,14 +103,14 @@ def vis_keypoint_coco(
     if point_score.shape != point.shape[:2]:
         raise ValueError('Mismatch in the number of instances or joints.')
     if point.shape[1:] != (len(coco_keypoint_names[human_id]), 2):
-        raise ValueError('point has invalid shape')
+        raise ValueError('point has invisible shape')
 
-    if valid is not None:
-        if valid.dtype != np.bool:
-            raise ValueError('The dtype of `valid` should be np.bool')
-        if valid.shape != point.shape[:2]:
+    if visible is not None:
+        if visible.dtype != np.bool:
+            raise ValueError('The dtype of `visible` should be np.bool')
+        if visible.shape != point.shape[:2]:
             raise ValueError('Mismatch in the number of instances or joints.')
-        for i, vld in enumerate(valid):
+        for i, vld in enumerate(visible):
             point_score[i, np.logical_not(vld)] = -np.inf
 
     for pnt, pnt_sc in zip(point, point_score):
diff --git a/tests/visualizations_tests/test_vis_keypoint_coco.py b/tests/visualizations_tests/test_vis_keypoint_coco.py
index 0a80260ec2..75ad231810 100644
--- a/tests/visualizations_tests/test_vis_keypoint_coco.py
+++ b/tests/visualizations_tests/test_vis_keypoint_coco.py
@@ -24,17 +24,17 @@ def _generate_point(n_inst, size):
     xs = np.random.uniform(0, W, size=(n_inst, n_joint))
     point = np.stack((ys, xs), axis=2).astype(np.float32)
 
-    valid = np.random.randint(0, 2, size=(n_inst, n_joint)).astype(np.bool)
+    visible = np.random.randint(0, 2, size=(n_inst, n_joint)).astype(np.bool)
 
     point_score = np.random.uniform(
         0, 6, size=(n_inst, n_joint)).astype(np.float32)
-    return point, valid, point_score
+    return point, visible, point_score
 
 
 @testing.parameterize(*testing.product({
     'n_inst': [3, 0],
     'use_img': [False, True],
-    'use_valid': [False, True],
+    'use_visible': [False, True],
     'use_point_score': [False, True]
 }))
 @unittest.skipUnless(_available, 'matplotlib is not installed')
@@ -42,56 +42,56 @@ class TestVisKeypointCOCO(unittest.TestCase):
 
     def setUp(self):
         size = (32, 48)
-        self.point, valid, point_score = _generate_point(self.n_inst, size)
+        self.point, visible, point_score = _generate_point(self.n_inst, size)
         self.img = (np.random.randint(
             0, 255, size=(3,) + size).astype(np.float32)
             if self.use_img else None)
-        self.valid = valid if self.use_valid else None
+        self.visible = visible if self.use_visible else None
         self.point_score = point_score if self.use_point_score else None
 
     def test_vis_keypoint_coco(self):
         ax = vis_keypoint_coco(
-            self.img, self.point, self.valid,
+            self.img, self.point, self.visible,
             self.point_score)
 
         self.assertIsInstance(ax, matplotlib.axes.Axes)
 
 
 @unittest.skipUnless(_available, 'matplotlib is not installed')
-class TestVisKeypointCOCOInvalidInputs(unittest.TestCase):
+class TestVisKeypointCOCOInvisibleInputs(unittest.TestCase):
 
     def setUp(self):
         size = (32, 48)
         n_inst = 10
-        self.point, self.valid, self.point_score = _generate_point(
+        self.point, self.visible, self.point_score = _generate_point(
             n_inst, size)
         self.img = np.random.randint(
             0, 255, size=(3,) + size).astype(np.float32)
 
-    def _check(self, img, point, valid, point_score):
+    def _check(self, img, point, visible, point_score):
         with self.assertRaises(ValueError):
-            vis_keypoint_coco(img, point, valid, point_score)
+            vis_keypoint_coco(img, point, visible, point_score)
 
-    def test_invalid_n_inst_point(self):
-        self._check(self.img, self.point[:5], self.valid, self.point_score)
+    def test_invisible_n_inst_point(self):
+        self._check(self.img, self.point[:5], self.visible, self.point_score)
 
-    def test_invalid_n_inst_valid(self):
-        self._check(self.img, self.point, self.valid[:5], self.point_score)
+    def test_invisible_n_inst_visible(self):
+        self._check(self.img, self.point, self.visible[:5], self.point_score)
 
-    def test_invalid_n_inst_point_score(self):
-        self._check(self.img, self.point, self.valid, self.point_score[:5])
+    def test_invisible_n_inst_point_score(self):
+        self._check(self.img, self.point, self.visible, self.point_score[:5])
 
-    def test_invalid_n_joint_point(self):
-        self._check(self.img, self.point[:, :15], self.valid, self.point_score)
+    def test_invisible_n_joint_point(self):
+        self._check(self.img, self.point[:, :15], self.visible, self.point_score)
 
-    def test_invalid_n_joint_valid(self):
-        self._check(self.img, self.point, self.valid[:, :15], self.point_score)
+    def test_invisible_n_joint_visible(self):
+        self._check(self.img, self.point, self.visible[:, :15], self.point_score)
 
-    def test_invalid_n_joint_point_score(self):
-        self._check(self.img, self.point, self.valid, self.point_score[:, :15])
+    def test_invisible_n_joint_point_score(self):
+        self._check(self.img, self.point, self.visible, self.point_score[:, :15])
 
-    def test_invalid_valid_dtype(self):
-        self._check(self.img, self.point, self.valid.astype(np.int32),
+    def test_invisible_visible_dtype(self):
+        self._check(self.img, self.point, self.visible.astype(np.int32),
                     self.point_score)
 
 testing.run_module(__name__, __file__)

From 26080942e21b3d5b80cf9130e476230d8a64a4bf Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 21 Feb 2019 17:05:50 +0900
Subject: [PATCH 034/100] change variable names

---
 .../eval_keypoint_detection_coco.py           | 26 +++++++++----------
 .../test_eval_keypoint_detection_coco.py      | 18 ++++++-------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/chainercv/evaluations/eval_keypoint_detection_coco.py b/chainercv/evaluations/eval_keypoint_detection_coco.py
index 6ff5692bc9..4258552bfd 100644
--- a/chainercv/evaluations/eval_keypoint_detection_coco.py
+++ b/chainercv/evaluations/eval_keypoint_detection_coco.py
@@ -16,7 +16,7 @@
 
 def eval_keypoint_detection_coco(
         pred_points, pred_labels, pred_scores,
-        gt_points, gt_valids, gt_bboxes=None, gt_labels=None,
+        gt_points, gt_visibles, gt_bboxes=None, gt_labels=None,
         gt_areas=None, gt_crowdeds=None):
     """Evaluate keypoint detection based on evaluation code of MS COCO.
 
@@ -31,7 +31,7 @@ def eval_keypoint_detection_coco(
             This is used to rank instances. Note that this is not
             the confidene for each keypoint.
         gt_points (iterable of numpy.ndarray): See the table below.
-        gt_valids (iterable of numpy.ndarray): See the table below.
+        gt_visibles (iterable of numpy.ndarray): See the table below.
         gt_bboxes (iterable of numpy.ndarray): See the table below.
             This is optional. If this is :obj:`None`, the ground truth
             bounding boxes are esitmated from the ground truth
@@ -52,7 +52,7 @@ def eval_keypoint_detection_coco(
         --
         :obj:`gt_points`, ":math:`[(R, K, 2)]`", :obj:`float32`, \
         ":math:`(y, x)`"
-        :obj:`gt_valids`, ":math:`[(R, K)]`", :obj:`bool`, --
+        :obj:`gt_visibles`, ":math:`[(R, K)]`", :obj:`bool`, --
         :obj:`gt_bboxes`, ":math:`[(R, 4)]`", :obj:`float32`, \
         ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
         :obj:`gt_labels`, ":math:`[(R,)]`", :obj:`int32`, \
@@ -147,7 +147,7 @@ def eval_keypoint_detection_coco(
     pred_labels = iter(pred_labels)
     pred_scores = iter(pred_scores)
     gt_points = iter(gt_points)
-    gt_valids = iter(gt_valids)
+    gt_visibles = iter(gt_visibles)
     gt_bboxes = (iter(gt_bboxes) if gt_bboxes is not None
                  else itertools.repeat(None))
     gt_labels = iter(gt_labels)
@@ -164,11 +164,11 @@ def eval_keypoint_detection_coco(
     pred_annos = []
     gt_annos = []
     existent_labels = {}
-    for i, (pred_point, pred_label, pred_score, gt_point, gt_is_valid,
+    for i, (pred_point, pred_label, pred_score, gt_point, gt_visible,
             gt_bbox, gt_label,
             gt_area, gt_crowded) in enumerate(six.moves.zip(
                 pred_points, pred_labels, pred_scores,
-                gt_points, gt_valids, gt_bboxes, gt_labels,
+                gt_points, gt_visibles, gt_bboxes, gt_labels,
                 gt_areas, gt_crowdeds)):
         if gt_bbox is None:
             gt_bbox = itertools.repeat(None)
@@ -183,18 +183,18 @@ def eval_keypoint_detection_coco(
                                               pred_score):
             # http://cocodataset.org/#format-results
             # Visibility flag is currently not used for evaluation
-            is_v = np.ones(len(pred_pnt))
+            v = np.ones(len(pred_pnt))
             pred_annos.append(
-                _create_anno(pred_pnt, is_v, None,
+                _create_anno(pred_pnt, v, None,
                              pred_lb, pred_sc,
                              img_id=img_id, anno_id=len(pred_annos) + 1,
                              ar=None, crw=0))
             existent_labels[pred_lb] = True
 
-        for gt_pnt, gt_is_v, gt_bb, gt_lb, gt_ar, gt_crw in zip(
-                gt_point, gt_is_valid, gt_bbox, gt_label, gt_area, gt_crowded):
+        for gt_pnt, gt_v, gt_bb, gt_lb, gt_ar, gt_crw in zip(
+                gt_point, gt_visible, gt_bbox, gt_label, gt_area, gt_crowded):
             gt_annos.append(
-                _create_anno(gt_pnt, gt_is_v, gt_bb, gt_lb, None,
+                _create_anno(gt_pnt, gt_v, gt_bb, gt_lb, None,
                              img_id=img_id, anno_id=len(gt_annos) + 1,
                              ar=gt_ar, crw=gt_crw))
         ids.append({'id': img_id})
@@ -276,7 +276,7 @@ def eval_keypoint_detection_coco(
     return results
 
 
-def _create_anno(pnt, is_v, bb, lb, sc, img_id, anno_id, ar=None, crw=None):
+def _create_anno(pnt, v, bb, lb, sc, img_id, anno_id, ar=None, crw=None):
     # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L342
     y_min = np.min(pnt[:, 0])
     x_min = np.min(pnt[:, 1])
@@ -293,7 +293,7 @@ def _create_anno(pnt, is_v, bb, lb, sc, img_id, anno_id, ar=None, crw=None):
         bb_xywh = [x_min, y_min, x_max - x_min, y_max - y_min]
     else:
         bb_xywh = [bb[1], bb[0], bb[3] - bb[1], bb[2] - bb[0]]
-    pnt = np.concatenate((pnt[:, [1, 0]], is_v[:, None]), axis=1)
+    pnt = np.concatenate((pnt[:, [1, 0]], v[:, None]), axis=1)
     anno = {
         'image_id': img_id, 'category_id': lb,
         'keypoints': pnt.reshape((-1)).tolist(),
diff --git a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
index 4e3ece2955..11d636d7d7 100644
--- a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
+++ b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
@@ -39,7 +39,7 @@ def setUp(self):
         self.pred_labels = []
         self.pred_scores = []
         self.gt_points = []
-        self.gt_valids = []
+        self.gt_visibles = []
         self.gt_bboxes = []
         self.gt_labels = []
         for i in range(2):
@@ -49,7 +49,7 @@ def setUp(self):
             self.pred_scores.append(np.random.uniform(
                 0.5, 1, size=(self.n_inst,)).astype(np.float32))
             self.gt_points.append(point)
-            self.gt_valids.append(valid)
+            self.gt_visibles.append(valid)
             bbox = np.zeros((self.n_inst, 4), dtype=np.float32)
             for i, pnt in enumerate(point):
                 y_min = np.min(pnt[:, 0])
@@ -71,13 +71,13 @@ def _check(self, result):
     def test_gt_bboxes_not_supplied(self):
         result = eval_keypoint_detection_coco(
             self.pred_points, self.pred_labels, self.pred_scores,
-            self.gt_points, self.gt_valids, None, self.gt_labels)
+            self.gt_points, self.gt_visibles, None, self.gt_labels)
         self._check(result)
 
     def test_area_not_supplied(self):
         result = eval_keypoint_detection_coco(
             self.pred_points, self.pred_labels, self.pred_scores,
-            self.gt_points, self.gt_valids, self.gt_bboxes, self.gt_labels)
+            self.gt_points, self.gt_visibles, self.gt_bboxes, self.gt_labels)
         self._check(result)
 
         self.assertFalse(
@@ -93,7 +93,7 @@ def test_area_supplied(self):
         gt_areas = [[100] * self.n_inst for _ in range(2)]
         result = eval_keypoint_detection_coco(
             self.pred_points, self.pred_labels, self.pred_scores,
-            self.gt_points, self.gt_valids, self.gt_bboxes, self.gt_labels,
+            self.gt_points, self.gt_visibles, self.gt_bboxes, self.gt_labels,
             gt_areas=gt_areas,
         )
         self._check(result)
@@ -110,7 +110,7 @@ def test_crowded_supplied(self):
         gt_crowdeds = [[True] * self.n_inst for _ in range(2)]
         result = eval_keypoint_detection_coco(
             self.pred_points, self.pred_labels, self.pred_scores,
-            self.gt_points, self.gt_valids, self.gt_bboxes, self.gt_labels,
+            self.gt_points, self.gt_visibles, self.gt_bboxes, self.gt_labels,
             gt_crowdeds=gt_crowdeds,
         )
         # When the only ground truth is crowded, nothing is evaluated.
@@ -128,7 +128,7 @@ def setUpClass(cls):
 
         cls.dataset = np.load(request.urlretrieve(os.path.join(
             base_url,
-            'eval_keypoint_detection_coco_dataset_2019_02_20.npz'))[0])
+            'eval_keypoint_detection_coco_dataset_2019_02_21.npz'))[0])
         cls.result = np.load(request.urlretrieve(os.path.join(
             base_url,
             'eval_keypoint_detection_coco_result_2019_02_20.npz'))[0])
@@ -139,7 +139,7 @@ def test_eval_keypoint_detection_coco(self):
         pred_scores = self.result['scores']
 
         gt_points = self.dataset['points']
-        gt_valids = self.dataset['valids']
+        gt_visibles = self.dataset['visibles']
         gt_bboxes = self.dataset['bboxes']
         gt_labels = self.dataset['labels']
         gt_areas = self.dataset['areas']
@@ -147,7 +147,7 @@ def test_eval_keypoint_detection_coco(self):
 
         result = eval_keypoint_detection_coco(
             pred_points, pred_labels, pred_scores,
-            gt_points, gt_valids, gt_bboxes,
+            gt_points, gt_visibles, gt_bboxes,
             gt_labels, gt_areas, gt_crowdeds)
 
         expected = {

From 716043e92056fb35501f354750e4987d574d4768 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 21 Feb 2019 21:30:30 +0900
Subject: [PATCH 035/100] flake8 and test

---
 .../datasets/coco/coco_keypoint_dataset.py    |  2 -
 .../coco_tests/test_coco_keypoint_dataset.py  | 86 +++++++++++++++++++
 .../test_vis_keypoint_coco.py                 |  9 +-
 3 files changed, 92 insertions(+), 5 deletions(-)
 create mode 100644 tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py

diff --git a/chainercv/datasets/coco/coco_keypoint_dataset.py b/chainercv/datasets/coco/coco_keypoint_dataset.py
index 886eb2f252..2f0dfebf0a 100644
--- a/chainercv/datasets/coco/coco_keypoint_dataset.py
+++ b/chainercv/datasets/coco/coco_keypoint_dataset.py
@@ -4,8 +4,6 @@
 import os
 
 from chainercv.chainer_experimental.datasets.sliceable import GetterDataset
-from chainercv.datasets.coco.coco_instances_base_dataset import \
-    COCOInstancesBaseDataset
 from chainercv.datasets.coco.coco_utils import get_coco
 from chainercv import utils
 
diff --git a/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py b/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py
new file mode 100644
index 0000000000..191e9c96ee
--- /dev/null
+++ b/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py
@@ -0,0 +1,86 @@
+import unittest
+
+import numpy as np
+
+from chainer import testing
+from chainer.testing import attr
+
+from chainercv.datasets import coco_keypoint_names
+from chainercv.datasets import COCOKeypointDataset
+from chainercv.utils import assert_is_bbox
+from chainercv.utils import assert_is_point_dataset
+
+
+def _create_paramters():
+    split_years = testing.product({
+        'split': ['train', 'val'],
+        'year': ['2014', '2017']})
+    split_years += [{'split': 'minival', 'year': '2014'},
+                    {'split': 'valminusminival', 'year': '2014'}]
+    use_and_return_args = testing.product({
+        'use_crowded': [False, True],
+        'return_crowded': [False, True],
+        'return_area': [False, True]})
+    params = testing.product_dict(
+        split_years,
+        use_and_return_args)
+    return params
+
+
+@testing.parameterize(*testing.product(
+    {
+        'split': ['train', 'val'],
+        'year': ['2014', '2017'],
+        'use_crowded': [False, True],
+        'return_crowded': [False, True],
+        'return_area': [False, True],
+    }
+))
+class TestCOCOKeypointDataset(unittest.TestCase):
+
+    def setUp(self):
+        self.dataset = COCOKeypointDataset(
+            split=self.split, year=self.year,
+            use_crowded=self.use_crowded, return_area=self.return_area,
+            return_crowded=self.return_crowded)
+
+    @attr.slow
+    def test_coco_bbox_dataset(self):
+        human_id = 0
+        assert_is_point_dataset(
+            self.dataset, len(coco_keypoint_names[human_id]),
+            n_example=30)
+
+        for _ in range(10):
+            i = np.random.randint(0, len(self.dataset))
+            img, point, _, bbox, label = self.dataset[i][:5]
+            assert_is_bbox(bbox, img.shape[1:])
+            self.assertEqual(len(bbox), len(point))
+
+            self.assertIsInstance(label, np.ndarray)
+            self.assertEqual(label.dtype, np.int32)
+            self.assertEqual(label.shape, (point.shape[0],))
+
+        if self.return_area:
+            for _ in range(10):
+                i = np.random.randint(0, len(self.dataset))
+                _, point, _, _, _, area = self.dataset[i][:6]
+                self.assertIsInstance(area, np.ndarray)
+                self.assertEqual(area.dtype, np.float32)
+                self.assertEqual(area.shape, (point.shape[0],))
+
+        if self.return_crowded:
+            for _ in range(10):
+                i = np.random.randint(0, len(self.dataset))
+                example = self.dataset[i]
+                crowded = example[-1]
+                point = example[1]
+                self.assertIsInstance(crowded, np.ndarray)
+                self.assertEqual(crowded.dtype, np.bool)
+                self.assertEqual(crowded.shape, (point.shape[0],))
+
+                if not self.use_crowded:
+                    np.testing.assert_equal(crowded, 0)
+
+
+testing.run_module(__name__, __file__)
diff --git a/tests/visualizations_tests/test_vis_keypoint_coco.py b/tests/visualizations_tests/test_vis_keypoint_coco.py
index 75ad231810..0e776ce8b1 100644
--- a/tests/visualizations_tests/test_vis_keypoint_coco.py
+++ b/tests/visualizations_tests/test_vis_keypoint_coco.py
@@ -82,13 +82,16 @@ def test_invisible_n_inst_point_score(self):
         self._check(self.img, self.point, self.visible, self.point_score[:5])
 
     def test_invisible_n_joint_point(self):
-        self._check(self.img, self.point[:, :15], self.visible, self.point_score)
+        self._check(
+            self.img, self.point[:, :15], self.visible, self.point_score)
 
     def test_invisible_n_joint_visible(self):
-        self._check(self.img, self.point, self.visible[:, :15], self.point_score)
+        self._check(
+            self.img, self.point, self.visible[:, :15], self.point_score)
 
     def test_invisible_n_joint_point_score(self):
-        self._check(self.img, self.point, self.visible, self.point_score[:, :15])
+        self._check(
+            self.img, self.point, self.visible, self.point_score[:, :15])
 
     def test_invisible_visible_dtype(self):
         self._check(self.img, self.point, self.visible.astype(np.int32),

From 3320b399a6a5d93ad95e5c1f62a2d2f62c204307 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 21 Feb 2019 21:33:31 +0900
Subject: [PATCH 036/100] fix class name

---
 tests/evaluations_tests/test_eval_keypoint_detection_coco.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
index 11d636d7d7..aa030cccb4 100644
--- a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
+++ b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
@@ -30,7 +30,7 @@ def _generate_point(n_inst, size):
 
 
 @unittest.skipUnless(_available, 'pycocotools is not installed')
-class TestEvalPointCOCOSingleClass(unittest.TestCase):
+class TestEvalKeypointCOCOSingleClass(unittest.TestCase):
 
     n_inst = 3
 

From a467173facf3e8da8bbbd8f946a8d2d2995a247d Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 21 Feb 2019 21:33:31 +0900
Subject: [PATCH 037/100] fix class name

---
 tests/evaluations_tests/test_eval_keypoint_detection_coco.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
index 11d636d7d7..2235847640 100644
--- a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
+++ b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
@@ -30,7 +30,7 @@ def _generate_point(n_inst, size):
 
 
 @unittest.skipUnless(_available, 'pycocotools is not installed')
-class TestEvalPointCOCOSingleClass(unittest.TestCase):
+class TestEvalKeypointDetectionCOCOSimple(unittest.TestCase):
 
     n_inst = 3
 

From 91b2b5225b30113c60fe25a6964353093377fc84 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 21 Feb 2019 21:49:34 +0900
Subject: [PATCH 038/100] fix demo

---
 examples/mask_rcnn/demo.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py
index ef16dbdac0..9483876076 100644
--- a/examples/mask_rcnn/demo.py
+++ b/examples/mask_rcnn/demo.py
@@ -19,9 +19,8 @@ def main():
         choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
         default='mask_rcnn_fpn_resnet50'
     )
-    group = parser.add_mutually_exclusive_group()
-    group.add_argument('--pretrained-model')
-    group.add_argument('--snapshot')
+    parser.add_argument('--gpu', type=int, default=-1)
+    parser.add_argument('--pretrained-model', default='coco')
     parser.add_argument('image')
     args = parser.parse_args()
 

From 8dd3706e61e9a37af78387792ef5e42c7df4248a Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 21 Feb 2019 21:57:13 +0900
Subject: [PATCH 039/100] fix demo

---
 examples/mask_rcnn/demo.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py
index 9483876076..d95eacc567 100644
--- a/examples/mask_rcnn/demo.py
+++ b/examples/mask_rcnn/demo.py
@@ -13,7 +13,6 @@
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--gpu', type=int, default=-1)
     parser.add_argument(
         '--model',
         choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),

From 7191dc036c0e53b9dda85c63cc258a99ccd79d72 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 21 Feb 2019 21:56:53 +0900
Subject: [PATCH 040/100] demo works

---
 chainercv/links/model/mask_rcnn/__init__.py   |   1 +
 .../links/model/mask_rcnn/keypoint_head.py    | 124 ++++++++++++++++++
 chainercv/links/model/mask_rcnn/mask_rcnn.py  |  93 +++++++++----
 .../model/mask_rcnn/mask_rcnn_fpn_resnet.py   |   8 +-
 examples/mask_rcnn/demo.py                    |  49 +++++--
 5 files changed, 232 insertions(+), 43 deletions(-)
 create mode 100644 chainercv/links/model/mask_rcnn/keypoint_head.py

diff --git a/chainercv/links/model/mask_rcnn/__init__.py b/chainercv/links/model/mask_rcnn/__init__.py
index c9e910a524..a8ceb6978a 100644
--- a/chainercv/links/model/mask_rcnn/__init__.py
+++ b/chainercv/links/model/mask_rcnn/__init__.py
@@ -1,3 +1,4 @@
+from chainercv.links.model.mask_rcnn.keypoint_head import KeypointHead  # NOQA
 from chainercv.links.model.mask_rcnn.mask_head import mask_loss_post  # NOQA
 from chainercv.links.model.mask_rcnn.mask_head import mask_loss_pre  # NOQA
 from chainercv.links.model.mask_rcnn.mask_head import MaskHead  # NOQA
diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py
new file mode 100644
index 0000000000..9994c19554
--- /dev/null
+++ b/chainercv/links/model/mask_rcnn/keypoint_head.py
@@ -0,0 +1,124 @@
+from __future__ import division
+
+import numpy as np
+import PIL
+
+import cv2
+
+import chainer
+import chainer.links as L
+import chainer.functions as F
+from chainer.backends import cuda
+from chainer.initializers import HeNormal
+from chainer.initializers import Normal
+
+from chainercv.links import Conv2DActiv
+from chainercv.transforms.image.resize import resize
+from chainercv.utils.bbox.bbox_iou import bbox_iou
+from chainercv.utils.mask.mask_to_bbox import mask_to_bbox
+
+
+class KeypointHead(chainer.Chain):
+
+    _canonical_scale = 224
+    _roi_size = 14
+    _roi_sample_ratio = 2
+    map_size = 56
+
+    def __init__(self, n_point, scales):
+        super(KeypointHead, self).__init__()
+
+        initialW = HeNormal(1, fan_option='fan_out')
+        with self.init_scope():
+            self.conv1 = Conv2DActiv(512, 3, pad=1, initialW=initialW)
+            self.conv2 = Conv2DActiv(512, 3, pad=1, initialW=initialW)
+            self.conv3 = Conv2DActiv(512, 3, pad=1, initialW=initialW)
+            self.conv4 = Conv2DActiv(512, 3, pad=1, initialW=initialW)
+            self.conv5 = Conv2DActiv(512, 3, pad=1, initialW=initialW)
+            self.conv6 = Conv2DActiv(512, 3, pad=1, initialW=initialW)
+            self.conv7 = Conv2DActiv(512, 3, pad=1, initialW=initialW)
+            self.conv8 = Conv2DActiv(512, 3, pad=1, initialW=initialW)
+            self.point = L.Deconvolution2D(
+                n_point, 4, pad=1, stride=2, initialW=initialW)
+
+        self._scales = scales
+        self._n_point = n_point
+
+    def __call__(self, hs, rois, roi_indices):
+        pooled_hs = []
+        for l, h in enumerate(hs):
+            if len(rois[l]) == 0:
+                continue
+
+            pooled_hs.append(F.roi_average_align_2d(
+                h, rois[l], roi_indices[l],
+                self._roi_size,
+                self._scales[l], self._roi_sample_ratio))
+
+        if len(pooled_hs) == 0:
+            out_size = self.map_size
+            point = chainer.Variable(
+               self.xp.empty((0, self._n_class, out_size, out_size), dtype=np.float32))
+            return segs
+
+        h = F.concat(pooled_hs, axis=0)
+        h = self.conv1(h)
+        h = self.conv2(h)
+        h = self.conv3(h)
+        h = self.conv4(h)
+        h = self.conv5(h)
+        h = self.conv6(h)
+        h = self.conv7(h)
+        h = self.conv8(h)
+        h = self.point(h)
+        return F.resize_images(h, (self.map_size, self.map_size))
+
+    def distribute(self, rois, roi_indices):
+        # Compleetely same as MaskHead.distribute
+        size = self.xp.sqrt(self.xp.prod(rois[:, 2:] + 1 - rois[:, :2], axis=1))
+        level = self.xp.floor(self.xp.log2(
+            size / self._canonical_scale + 1e-6)).astype(np.int32)
+        # skip last level
+        level = self.xp.clip(
+            level + len(self._scales) // 2, 0, len(self._scales) - 2)
+
+        masks = [level == l for l in range(len(self._scales))]
+        rois = [rois[mask] for mask in masks]
+        roi_indices = [roi_indices[mask] for mask in masks]
+        order = self.xp.argsort(
+            self.xp.concatenate([self.xp.where(mask)[0] for mask in masks]))
+        return rois, roi_indices, order
+
+    def decode(self, point_maps, bboxes):
+        points = []
+        point_scores = []
+        for bbox, point_map in zip(bboxes, point_maps):
+            point = np.zeros((len(bbox), self._n_point, 2), dtype=np.float32)
+            point_score = np.zeros((len(bbox), self._n_point), dtype=np.float32)
+
+            hs = bbox[:, 2] - bbox[:, 0]
+            ws = bbox[:, 3] - bbox[:, 1]
+            h_ceils = np.ceil(np.maximum(hs, 1))
+            w_ceils = np.ceil(np.maximum(ws, 1))
+            h_corrections = hs / h_ceils
+            w_corrections = ws / w_ceils
+            for i, (bb, point_m) in enumerate(zip(bbox, point_map)):
+                point_m = cv2.resize(
+                    point_m.transpose((1, 2, 0)),
+                    (w_ceils[i], h_ceils[i]),
+                    interpolation=cv2.INTER_CUBIC).transpose(
+                        (2, 0, 1))
+                _, H, W = point_m.shape
+                for k in range(self._n_point):
+                    pos = point_m[k].argmax()
+                    x_int = pos % W
+                    y_int = (pos - x_int) // W
+
+                    y = (y_int + 0.5) * h_corrections[i]
+                    x = (x_int + 0.5) * w_corrections[i]
+                    point[i, k, 0] = y + bb[0]
+                    point[i, k, 1] = x + bb[1]
+                    point_score[i, k] = point_m[k, y_int, x_int]
+            points.append(point)
+            point_scores.append(point_score)
+        return points, point_scores
diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
index 9f59f49d92..81e282219d 100644
--- a/chainercv/links/model/mask_rcnn/mask_rcnn.py
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py
@@ -48,13 +48,18 @@ class MaskRCNN(chainer.Chain):
     max_size = 1333
     stride = 32
 
-    def __init__(self, extractor, rpn, head, mask_head):
+    def __init__(self, extractor, rpn, head, mask_head,
+                 keypoint_head, mode='mask'):
         super(MaskRCNN, self).__init__()
         with self.init_scope():
             self.extractor = extractor
             self.rpn = rpn
             self.head = head
-            self.mask_head = mask_head
+            if mode == 'mask':
+                self.mask_head = mask_head
+            elif mode =='keypoint':
+                self.keypoint_head = keypoint_head
+        self.mode = mode
 
         self.use_preset('visualize')
 
@@ -133,32 +138,64 @@ def predict(self, imgs):
             scales, sizes, self.nms_thresh, self.score_thresh)
 
         rescaled_bboxes = [bbox * scale for scale, bbox in zip(scales, bboxes)]
-        # Change bboxes to RoI and RoI indices format
-        mask_rois_before_reordering, mask_roi_indices_before_reordering =\
-            _list_to_flat(rescaled_bboxes)
-        mask_rois, mask_roi_indices, order = self.mask_head.distribute(
-            mask_rois_before_reordering, mask_roi_indices_before_reordering)
-        with chainer.using_config('train', False), chainer.no_backprop_mode():
-            segms = F.sigmoid(
-                self.mask_head(hs, mask_rois, mask_roi_indices)).data
-        # Put the order of proposals back to the one used by bbox head.
-        segms = segms[order]
-        segms = _flat_to_list(
-            segms, mask_roi_indices_before_reordering, len(imgs))
-        segms = [segm if segm is not None else
-                 self.xp.zeros(
-                     (0, self.mask_head.mask_size, self.mask_head.mask_size),
-                     dtype=np.float32)
-                 for segm in segms]
-
-        segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
-        bboxes = [chainer.backends.cuda.to_cpu(bbox / scale)
-                  for bbox, scale in zip(rescaled_bboxes, scales)]
-        labels = [chainer.backends.cuda.to_cpu(label) for label in labels]
-        # Currently MaskHead only supports numpy inputs
-        masks = self.mask_head.decode(segms, bboxes, labels, sizes)
-        scores = [cuda.to_cpu(score) for score in scores]
-        return masks, labels, scores
+        if self.mode == 'mask':
+            # Change bboxes to RoI and RoI indices format
+            mask_rois_before_reordering, mask_roi_indices_before_reordering =\
+                _list_to_flat(rescaled_bboxes)
+            mask_rois, mask_roi_indices, order = self.mask_head.distribute(
+                mask_rois_before_reordering, mask_roi_indices_before_reordering)
+            with chainer.using_config('train', False), chainer.no_backprop_mode():
+                segms = F.sigmoid(
+                    self.mask_head(hs, mask_rois, mask_roi_indices)).data
+            # Put the order of proposals back to the one used by bbox head.
+            segms = segms[order]
+            segms = _flat_to_list(
+                segms, mask_roi_indices_before_reordering, len(imgs))
+            segms = [segm if segm is not None else
+                    self.xp.zeros(
+                        (0, self.mask_head.mask_size, self.mask_head.mask_size),
+                        dtype=np.float32)
+                    for segm in segms]
+
+            segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
+            bboxes = [chainer.backends.cuda.to_cpu(bbox / scale)
+                    for bbox, scale in zip(rescaled_bboxes, scales)]
+            labels = [chainer.backends.cuda.to_cpu(label) for label in labels]
+            # Currently MaskHead only supports numpy inputs
+            masks = self.mask_head.decode(segms, bboxes, labels, sizes)
+            scores = [cuda.to_cpu(score) for score in scores]
+            return masks, labels, scores
+        elif self.mode == 'keypoint':
+            (point_rois_before_reordering,
+             point_roi_indices_before_reordering) = _list_to_flat(
+                 rescaled_bboxes)
+            point_rois, point_roi_indices, order =\
+                self.keypoint_head.distribute(
+                    point_rois_before_reordering,
+                    point_roi_indices_before_reordering)
+            with chainer.using_config('train', False), chainer.no_backprop_mode():
+                point_maps = self.keypoint_head(
+                    hs, point_rois, point_roi_indices).data
+            point_maps = point_maps[order]
+            point_maps = _flat_to_list(
+                point_maps, point_roi_indices_before_reordering, len(imgs))
+            point_maps = [point_map if point_map is not None else
+                          self.xp.zeros(
+                              (0, self.keypoint_head.n_point,
+                               self.keypoint_head.map_size,
+                               self.keypoint_head.map_size),
+                              dtype=np.float32)
+                          for point_map in point_maps]
+            point_maps = [
+                chainer.backends.cuda.to_cpu(point_map)
+                for point_map in point_maps]
+            bboxes = [chainer.cuda.to_cpu(bbox / scale)
+                      for bbox, scale in zip(rescaled_bboxes, scales)]
+            points, point_scores = self.keypoint_head.decode(
+                point_maps, bboxes)
+            labels = [cuda.to_cpu(label) for label in labels]
+            scores = [cuda.to_cpu(score) for score in scores]
+            return points, point_scores, bboxes, labels, scores
 
     def prepare(self, imgs):
         """Preprocess images.
diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
index d18f92f628..3048ce80cf 100644
--- a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
@@ -6,6 +6,7 @@
 from chainercv.links.model.fpn import FPN
 from chainercv.links.model.fpn import Head
 from chainercv.links.model.fpn import RPN
+from chainercv.links.model.mask_rcnn.keypoint_head import KeypointHead
 from chainercv.links.model.mask_rcnn.mask_head import MaskHead
 from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN
 from chainercv.links.model.resnet import ResNet101
@@ -22,7 +23,8 @@ class MaskRCNNFPNResNet(MaskRCNN):
     A subclass of this class should have :obj:`_base` and :obj:`_models`.
     """
 
-    def __init__(self, n_fg_class=None, pretrained_model=None):
+    def __init__(self, n_fg_class=None, pretrained_model=None,
+                 n_point=17, mode='mask'):
         param, path = utils.prepare_pretrained_model(
             {'n_fg_class': n_fg_class}, pretrained_model, self._models)
 
@@ -39,7 +41,9 @@ def __init__(self, n_fg_class=None, pretrained_model=None):
             extractor=extractor,
             rpn=RPN(extractor.scales),
             head=Head(n_class, extractor.scales),
-            mask_head=MaskHead(n_class, extractor.scales)
+            mask_head=MaskHead(n_class, extractor.scales),
+            keypoint_head=KeypointHead(n_point, extractor.scales),
+            mode=mode,
         )
         if path == 'imagenet':
             _copyparams(
diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py
index 9483876076..682b742fa2 100644
--- a/examples/mask_rcnn/demo.py
+++ b/examples/mask_rcnn/demo.py
@@ -13,7 +13,6 @@
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--gpu', type=int, default=-1)
     parser.add_argument(
         '--model',
         choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
@@ -21,31 +20,55 @@ def main():
     )
     parser.add_argument('--gpu', type=int, default=-1)
     parser.add_argument('--pretrained-model', default='coco')
+    parser.add_argument(
+        '--mode',
+        choices=('mask', 'keypoint'),
+        default='mask')
     parser.add_argument('image')
     args = parser.parse_args()
 
+    if args.mode == 'mask':
+        n_fg_class = len(coco_instance_segmentation_label_names)
+    elif args.mode == 'keypoint':
+        n_fg_class = 1
     if args.model == 'mask_rcnn_fpn_resnet50':
         model = MaskRCNNFPNResNet50(
-            n_fg_class=len(coco_instance_segmentation_label_names),
-            pretrained_model=args.pretrained_model)
+            n_fg_class=n_fg_class,
+            pretrained_model=args.pretrained_model,
+            mode=args.mode
+        )
     elif args.model == 'mask_rcnn_fpn_resnet101':
         model = MaskRCNNFPNResNet101(
-            n_fg_class=len(coco_instance_segmentation_label_names),
-            pretrained_model=args.pretrained_model)
+            n_fg_class=n_fg_class,
+            pretrained_model=args.pretrained_model,
+            mode=args.mode
+        )
 
     if args.gpu >= 0:
         chainer.cuda.get_device_from_id(args.gpu).use()
         model.to_gpu()
 
     img = utils.read_image(args.image)
-    masks, labels, scores = model.predict([img])
-    mask = masks[0]
-    label = labels[0]
-    score = scores[0]
-    chainercv.visualizations.vis_instance_segmentation(
-        img, mask, label, score,
-        label_names=coco_instance_segmentation_label_names)
-    plt.show()
+    if args.mode == 'mask':
+        masks, labels, scores = model.predict([img])
+        mask = masks[0]
+        label = labels[0]
+        score = scores[0]
+        chainercv.visualizations.vis_instance_segmentation(
+            img, mask, label, score,
+            label_names=coco_instance_segmentation_label_names)
+        plt.show()
+    elif args.mode == 'keypoint':
+        points, point_scores, bboxes, labels, scores = model.predict([img])
+        point = points[0]
+        point_score = point_scores[0]
+        bbox = bboxes[0]
+        label = labels[0]
+        score = scores[0]
+        ax = chainercv.visualizations.vis_keypoint_coco(
+            img, point, None, point_score)
+        chainercv.visualizations.vis_bbox(None, bbox, score=score, ax=ax)
+        plt.show()
 
 
 if __name__ == '__main__':

From 62cb5e825287d71a26429540e677fc38266bc607 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 21 Feb 2019 22:43:16 +0900
Subject: [PATCH 041/100] eval

---
 .../links/model/mask_rcnn/keypoint_head.py    | 16 ++--
 .../eval_keypoint_detection.py                | 93 +++++++++++++++++++
 2 files changed, 101 insertions(+), 8 deletions(-)
 create mode 100644 examples/keypoint_detection/eval_keypoint_detection.py

diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py
index 9994c19554..7f4d967803 100644
--- a/chainercv/links/model/mask_rcnn/keypoint_head.py
+++ b/chainercv/links/model/mask_rcnn/keypoint_head.py
@@ -42,7 +42,7 @@ def __init__(self, n_point, scales):
                 n_point, 4, pad=1, stride=2, initialW=initialW)
 
         self._scales = scales
-        self._n_point = n_point
+        self.n_point = n_point
 
     def __call__(self, hs, rois, roi_indices):
         pooled_hs = []
@@ -56,10 +56,10 @@ def __call__(self, hs, rois, roi_indices):
                 self._scales[l], self._roi_sample_ratio))
 
         if len(pooled_hs) == 0:
-            out_size = self.map_size
-            point = chainer.Variable(
-               self.xp.empty((0, self._n_class, out_size, out_size), dtype=np.float32))
-            return segs
+            return chainer.Variable(
+               self.xp.empty(
+                   (0, self.n_point, self.map_size, self.map_size),
+                   dtype=np.float32))
 
         h = F.concat(pooled_hs, axis=0)
         h = self.conv1(h)
@@ -93,8 +93,8 @@ def decode(self, point_maps, bboxes):
         points = []
         point_scores = []
         for bbox, point_map in zip(bboxes, point_maps):
-            point = np.zeros((len(bbox), self._n_point, 2), dtype=np.float32)
-            point_score = np.zeros((len(bbox), self._n_point), dtype=np.float32)
+            point = np.zeros((len(bbox), self.n_point, 2), dtype=np.float32)
+            point_score = np.zeros((len(bbox), self.n_point), dtype=np.float32)
 
             hs = bbox[:, 2] - bbox[:, 0]
             ws = bbox[:, 3] - bbox[:, 1]
@@ -109,7 +109,7 @@ def decode(self, point_maps, bboxes):
                     interpolation=cv2.INTER_CUBIC).transpose(
                         (2, 0, 1))
                 _, H, W = point_m.shape
-                for k in range(self._n_point):
+                for k in range(self.n_point):
                     pos = point_m[k].argmax()
                     x_int = pos % W
                     y_int = (pos - x_int) // W
diff --git a/examples/keypoint_detection/eval_keypoint_detection.py b/examples/keypoint_detection/eval_keypoint_detection.py
new file mode 100644
index 0000000000..14da196e8c
--- /dev/null
+++ b/examples/keypoint_detection/eval_keypoint_detection.py
@@ -0,0 +1,93 @@
+import argparse
+
+import chainer
+from chainer import iterators
+
+from chainercv.datasets import COCOKeypointDataset
+from chainercv.evaluations import eval_keypoint_detection_coco
+from chainercv.links import MaskRCNNFPNResNet101
+from chainercv.links import MaskRCNNFPNResNet50
+from chainercv.utils import apply_to_iterator
+from chainercv.utils import ProgressHook
+
+models = {
+    # model: (class, dataset -> pretrained_model, default batchsize)
+    'mask_rcnn_fpn_resnet50': (MaskRCNNFPNResNet50,
+                               {}, 1),
+    'mask_rcnn_fpn_resnet101': (MaskRCNNFPNResNet101,
+                                {}, 1),
+}
+
+
+def setup(dataset, model_name, pretrained_model, batchsize):
+    cls, pretrained_models, default_batchsize = models[model_name]
+    dataset_name = dataset
+    if pretrained_model is None:
+        pretrained_model = pretrained_models.get(dataset_name, dataset_name)
+    if batchsize is None:
+        batchsize = default_batchsize
+
+    if dataset_name == 'coco':
+        dataset = COCOKeypointDataset(
+            split='val',
+            use_crowded=True, return_crowded=True,
+            return_area=True)
+        n_fg_class = 1
+        n_point = 17
+        model = cls(
+            n_fg_class=n_fg_class,
+            pretrained_model=pretrained_model,
+            n_point=n_point,
+            mode='keypoint'
+        )
+        model.use_preset('evaluate')
+
+        def eval_(out_values, rest_values):
+            (pred_points, pred_point_scores, pred_bboxes, pred_labels,
+             pred_scores) = out_values
+            (gt_points, gt_visibles, gt_bboxes, gt_labels,
+             gt_areas, gt_crowdeds) = rest_values
+
+            result = eval_keypoint_detection_coco(
+                pred_points, pred_labels, pred_scores,
+                gt_points, gt_visibles, gt_bboxes, gt_labels,
+                gt_areas, gt_crowdeds)
+
+            print()
+            for area in ('all', 'large', 'medium'):
+                print('mmAP ({}):'.format(area),
+                      result['map/iou=0.50:0.95/area={}/max_dets=20'.format(
+                          area)])
+
+    return dataset, eval_, model, batchsize
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dataset', choices=('coco',), default='coco')
+    parser.add_argument('--model', choices=sorted(models.keys()))
+    parser.add_argument('--pretrained-model')
+    parser.add_argument('--batchsize', type=int)
+    parser.add_argument('--gpu', type=int, default=-1)
+    args = parser.parse_args()
+
+    dataset, eval_, model, batchsize = setup(
+        args.dataset, args.model, args.pretrained_model, args.batchsize)
+
+    if args.gpu >= 0:
+        chainer.cuda.get_device_from_id(args.gpu).use()
+        model.to_gpu()
+
+    iterator = iterators.MultithreadIterator(
+        dataset, batchsize, repeat=False, shuffle=False)
+
+    in_values, out_values, rest_values = apply_to_iterator(
+        model.predict, iterator, hook=ProgressHook(len(dataset)))
+    # delete unused iterators explicitly
+    del in_values
+
+    eval_(out_values, rest_values)
+
+
+if __name__ == '__main__':
+    main()

From 18af5fb58356af351761d12d598d4ef302491fde Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 22 Feb 2019 08:24:16 +0900
Subject: [PATCH 042/100] flake8

---
 chainercv/datasets/__init__.py                       | 2 +-
 tests/visualizations_tests/test_vis_keypoint_coco.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/chainercv/datasets/__init__.py b/chainercv/datasets/__init__.py
index fcb3a6c772..bb6ed650dc 100644
--- a/chainercv/datasets/__init__.py
+++ b/chainercv/datasets/__init__.py
@@ -19,8 +19,8 @@
 from chainercv.datasets.coco.coco_utils import coco_keypoint_names  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_colors  # NOQA
 from chainercv.datasets.coco.coco_utils import coco_semantic_segmentation_label_names  # NOQA
-from chainercv.datasets.cub.cub_label_dataset import CUBLabelDataset  # NOQA
 from chainercv.datasets.cub.cub_keypoint_dataset import CUBKeypointDataset  # NOQA
+from chainercv.datasets.cub.cub_label_dataset import CUBLabelDataset  # NOQA
 from chainercv.datasets.cub.cub_utils import cub_label_names  # NOQA
 from chainercv.datasets.directory_parsing_label_dataset import directory_parsing_label_names  # NOQA
 from chainercv.datasets.directory_parsing_label_dataset import DirectoryParsingLabelDataset  # NOQA
diff --git a/tests/visualizations_tests/test_vis_keypoint_coco.py b/tests/visualizations_tests/test_vis_keypoint_coco.py
index 0e776ce8b1..97c2f09a9b 100644
--- a/tests/visualizations_tests/test_vis_keypoint_coco.py
+++ b/tests/visualizations_tests/test_vis_keypoint_coco.py
@@ -97,4 +97,5 @@ def test_invisible_visible_dtype(self):
         self._check(self.img, self.point, self.visible.astype(np.int32),
                     self.point_score)
 
+
 testing.run_module(__name__, __file__)

From 9f4c9274392932e02f3f5cca75fa80d42cb1b60c Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 22 Feb 2019 08:26:07 +0900
Subject: [PATCH 043/100] delete zerograd

---
 examples/mask_rcnn/train_multi.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index 44f1e23249..5061e68dce 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -112,7 +112,6 @@ def __call__(self, imgs, masks, labels, bboxes):
             # ChainerMN hangs when a subset of nodes has a different
             # computational graph from the rest.
             loss = chainer.Variable(self.xp.array(0, dtype=np.float32))
-            self.zerograds()
         return loss
 
 

From 7e610d48ae564427105da31621c0c56f6f312972 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Mon, 25 Feb 2019 16:08:17 +0900
Subject: [PATCH 044/100] complete graph when n_roi == 0

---
 examples/mask_rcnn/train_multi.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index 5061e68dce..d0a4255af4 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -4,6 +4,7 @@
 import PIL
 
 import chainer
+import chainer.functions as F
 import chainer.links as L
 from chainer.optimizer_hooks import WeightDecay
 from chainer import serializers
@@ -96,22 +97,28 @@ def __call__(self, imgs, masks, labels, bboxes):
             rois, roi_indices, masks, bboxes,
             head_gt_labels, self.model.mask_head.mask_size)
         n_roi = sum([len(roi) for roi in mask_rois])
+        if n_roi == 0:
+            H, W = sizes[0]
+            mask_rois = [np.array([[
+                H // 4,
+                W // 4,
+                3 * H // 4,
+                3 * W // 4]], dtype=np.float32)]
+            mask_roi_indices = [np.array([0], dtype=np.int32)]
+        segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
         if n_roi > 0:
-            segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
             mask_loss = mask_loss_post(
                 segms, mask_roi_indices, gt_segms, gt_mask_labels, B)
-            loss = (rpn_loc_loss + rpn_conf_loss + 
-                head_loc_loss + head_conf_loss + mask_loss)
-            chainer.reporter.report({
-                'loss': loss,
-                'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss,
-                'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss,
-                'loss/mask': mask_loss},
-                self)
         else:
-            # ChainerMN hangs when a subset of nodes has a different
-            # computational graph from the rest.
-            loss = chainer.Variable(self.xp.array(0, dtype=np.float32))
+            mask_loss = 0 * F.sum(segms)
+        loss = (rpn_loc_loss + rpn_conf_loss +
+                head_loc_loss + head_conf_loss + mask_loss)
+        chainer.reporter.report({
+            'loss': loss,
+            'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss,
+            'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss,
+            'loss/mask': mask_loss},
+            self)
         return loss
 
 

From b5cb93a5b572d72aabea0f67d4be4312416a5196 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Mon, 25 Feb 2019 16:08:23 +0900
Subject: [PATCH 045/100] flake8

---
 examples/mask_rcnn/train_multi.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index d0a4255af4..217b31dc86 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -48,7 +48,8 @@ def __call__(self, imgs, masks, labels, bboxes):
         pad_size = np.array(
             [im.shape[1:] for im in imgs]).max(axis=0)
         pad_size = (
-            np.ceil(pad_size / self.model.stride) * self.model.stride).astype(int)
+            np.ceil(
+                pad_size / self.model.stride) * self.model.stride).astype(int)
         x = np.zeros(
             (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32)
         for i, img in enumerate(imgs):
@@ -131,7 +132,6 @@ def __init__(self, min_size, max_size, mean):
 
     def __call__(self, in_data):
         img, mask, label, bbox = in_data
-        original = mask.shape
         # Flipping
         img, params = transforms.random_flip(
             img, x_random=True, return_param=True)

From 7e707d8ab247a03de433135c59ef2bf3fd9de35b Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Mon, 25 Feb 2019 16:29:18 +0900
Subject: [PATCH 046/100] fix

---
 examples/mask_rcnn/train_multi.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index 217b31dc86..44273026f7 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -98,19 +98,15 @@ def __call__(self, imgs, masks, labels, bboxes):
             rois, roi_indices, masks, bboxes,
             head_gt_labels, self.model.mask_head.mask_size)
         n_roi = sum([len(roi) for roi in mask_rois])
-        if n_roi == 0:
-            H, W = sizes[0]
-            mask_rois = [np.array([[
-                H // 4,
-                W // 4,
-                3 * H // 4,
-                3 * W // 4]], dtype=np.float32)]
-            mask_roi_indices = [np.array([0], dtype=np.int32)]
-        segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
         if n_roi > 0:
+            segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
             mask_loss = mask_loss_post(
                 segms, mask_roi_indices, gt_segms, gt_mask_labels, B)
         else:
+            # Compute dummy variables to complete the computational graph
+            mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32)
+            mask_roi_indices[0] = self.xp.array([0], dtype=np.int32)
+            segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
             mask_loss = 0 * F.sum(segms)
         loss = (rpn_loc_loss + rpn_conf_loss +
                 head_loc_loss + head_conf_loss + mask_loss)

From 9a2606566e3f23bef0375bef2119d3b7a090279b Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Mon, 4 Mar 2019 18:29:22 +0900
Subject: [PATCH 047/100] use bilinear interpolation with kernel size 4

---
 .../links/model/mask_rcnn/keypoint_head.py    | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py
index 7f4d967803..0d67ab2c21 100644
--- a/chainercv/links/model/mask_rcnn/keypoint_head.py
+++ b/chainercv/links/model/mask_rcnn/keypoint_head.py
@@ -18,6 +18,19 @@
 from chainercv.utils.mask.mask_to_bbox import mask_to_bbox
 
 
+# make a bilinear interpolation kernel
+# credit @longjon
+def _upsample_filt(size):
+    factor = (size + 1) // 2
+    if size % 2 == 1:
+        center = factor - 1
+    else:
+        center = factor - 0.5
+    og = np.ogrid[:size, :size]
+    return (1 - abs(og[0] - center) / factor) * \
+        (1 - abs(og[1] - center) / factor)
+
+
 class KeypointHead(chainer.Chain):
 
     _canonical_scale = 224
@@ -40,6 +53,11 @@ def __init__(self, n_point, scales):
             self.conv8 = Conv2DActiv(512, 3, pad=1, initialW=initialW)
             self.point = L.Deconvolution2D(
                 n_point, 4, pad=1, stride=2, initialW=initialW)
+            # Do not update the weight of this link
+            self.upsample = L.Deconvolution2D(
+                n_point, n_point, 4, pad=1, stride=2, nobias=True)
+        self.upsample.W.data[:] = 0
+        self.upsample.W.data[np.arange(n_point), np.arange(n_point)] = _upsample_filt(4)
 
         self._scales = scales
         self.n_point = n_point
@@ -71,7 +89,7 @@ def __call__(self, hs, rois, roi_indices):
         h = self.conv7(h)
         h = self.conv8(h)
         h = self.point(h)
-        return F.resize_images(h, (self.map_size, self.map_size))
+        return self.upsample(h)
 
     def distribute(self, rois, roi_indices):
         # Compleetely same as MaskHead.distribute

From 2d44d66f74c0be1f2952af80cc11a9076fd71e3c Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 5 Mar 2019 15:28:26 +0900
Subject: [PATCH 048/100] change mask_to_segm and divide mask loss by #RoI

---
 chainercv/links/model/mask_rcnn/mask_head.py |  75 ++------------
 chainercv/links/model/mask_rcnn/misc.py      | 100 +++++++++++++++++++
 2 files changed, 111 insertions(+), 64 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py
index d1436785c1..44fd6df6df 100644
--- a/chainercv/links/model/mask_rcnn/mask_head.py
+++ b/chainercv/links/model/mask_rcnn/mask_head.py
@@ -15,6 +15,9 @@
 from chainercv.transforms.image.resize import resize
 from chainercv.utils.bbox.bbox_iou import bbox_iou
 
+from chainercv.links.model.mask_rcnn.misc import segm_to_mask
+from chainercv.links.model.mask_rcnn.misc import mask_to_segm
+
 
 class MaskHead(chainer.Chain):
 
@@ -141,63 +144,14 @@ def decode(self, segms, bboxes, labels, sizes):
             raise ValueError(
                 'MaskHead.decode only supports numpy inputs for now.')
         masks = []
-        # To work around an issue with cv2.resize (it seems to automatically
-        # pad with repeated border values), we manually zero-pad the masks by 1
-        # pixel prior to resizing back to the original image resolution.
-        # This prevents "top hat" artifacts. We therefore need to expand
-        # the reference boxes by an appropriate factor.
-        cv2_expand_scale = (self.mask_size + 2) / self.mask_size
-        padded_mask = np.zeros((self.mask_size + 2, self.mask_size + 2),
-                               dtype=np.float32)
         for bbox, segm, label, size in zip(
                 bboxes, segms, labels, sizes):
-            img_H, img_W = size
-            mask = np.zeros((len(bbox), img_H, img_W), dtype=np.bool)
-
-            bbox = _expand_boxes(bbox, cv2_expand_scale)
-            for i, (bb, sgm, lbl) in enumerate(zip(bbox, segm, label)):
-                bb = bb.astype(np.int32)
-                padded_mask[1:-1, 1:-1] = sgm[lbl + 1]
-
-                # TODO(yuyu2172): Ignore +1 later
-                bb_height = np.maximum(bb[2] - bb[0] + 1, 1)
-                bb_width = np.maximum(bb[3] - bb[1] + 1, 1)
-
-                crop_mask = cv2.resize(padded_mask, (bb_width, bb_height))
-                crop_mask = crop_mask > 0.5
-
-                y_min = max(bb[0], 0)
-                x_min = max(bb[1], 0)
-                y_max = min(bb[2] + 1, img_H)
-                x_max = min(bb[3] + 1, img_W)
-                mask[i, y_min:y_max, x_min:x_max] = crop_mask[
-                    (y_min - bb[0]):(y_max - bb[0]),
-                    (x_min - bb[1]):(x_max - bb[1])]
-            masks.append(mask)
+            masks.append(
+                segm_to_mask(segm[np.arange(len(label)), label + 1],
+                             bbox, size))
         return masks
 
 
-def _expand_boxes(bbox, scale):
-    """Expand an array of boxes by a given scale."""
-    xp = chainer.backends.cuda.get_array_module(bbox)
-
-    h_half = (bbox[:, 2] - bbox[:, 0]) * .5
-    w_half = (bbox[:, 3] - bbox[:, 1]) * .5
-    y_c = (bbox[:, 2] + bbox[:, 0]) * .5
-    x_c = (bbox[:, 3] + bbox[:, 1]) * .5
-
-    h_half *= scale
-    w_half *= scale
-
-    expanded_bbox = xp.zeros(bbox.shape)
-    expanded_bbox[:, 0] = y_c - h_half
-    expanded_bbox[:, 1] = x_c - w_half
-    expanded_bbox[:, 2] = y_c + h_half
-    expanded_bbox[:, 3] = x_c + w_half
-
-    return expanded_bbox
-
-
 def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes,
                   gt_head_labels, mask_size):
     """Loss function for Mask Head (pre).
@@ -261,8 +215,8 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes,
         mask_roi = mask_rois[index]
         iou = bbox_iou(mask_roi, gt_bbox)
         gt_index = iou.argmax(axis=1)
-        gt_segms[index] = _segm_wrt_bbox(
-            gt_mask, gt_index, mask_roi, (mask_size, mask_size), xp)
+        gt_segms[index] = xp.array(
+            mask_to_segm(gt_mask, mask_roi, mask_size, gt_index))
 
     flag_masks = [mask_roi_levels == l for l in range(n_level)]
     mask_rois = [mask_rois[m] for m in flag_masks]
@@ -297,16 +251,9 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
     gt_segms = xp.vstack(gt_segms).astype(np.float32, copy=False)
     gt_mask_labels = xp.hstack(gt_mask_labels).astype(np.int32)
 
-    mask_loss = 0
-    for i in np.unique(cuda.to_cpu(mask_roi_indices)):
-        index = (mask_roi_indices == i).nonzero()[0]
-        gt_segm = gt_segms[index]
-        gt_mask_label = gt_mask_labels[index]
-
-        mask_loss += F.sigmoid_cross_entropy(
-            segms[index, gt_mask_label], gt_segm.astype(np.int32))
-
-    mask_loss /= batchsize
+    mask_loss = F.sigmoid_cross_entropy(
+        segms[np.arange(len(gt_mask_labels)), gt_mask_labels],
+        gt_segms.astype(np.int32))
     return mask_loss
 
 
diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py
index abb233443b..8d0fca37f4 100644
--- a/chainercv/links/model/mask_rcnn/misc.py
+++ b/chainercv/links/model/mask_rcnn/misc.py
@@ -1,3 +1,10 @@
+from __future__ import division
+
+import cv2
+import numpy as np
+
+import chainer
+
 from chainercv import transforms
 
 
@@ -10,3 +17,96 @@ def scale_img(img, min_size, max_size):
     H, W = int(H * scale), int(W * scale)
     img = transforms.resize(img, (H, W))
     return img, scale
+
+
+def mask_to_segm(mask, bbox, segm_size, index=None, pad=1):
+    _, H, W = mask.shape
+    bbox = chainer.backends.cuda.to_cpu(bbox)
+    padded_segm_size = segm_size + pad * 2
+    cv2_expand_scale = padded_segm_size / segm_size
+    bbox = _expand_boxes(bbox, cv2_expand_scale).astype(np.int32)
+
+    segm = []
+    if index is None:
+        index = np.arange(len(index))
+    else:
+        index = chainer.backends.cuda.to_cpu(index)
+
+    for i, bb in zip(index, bbox):
+        y_min = max(bb[0], 0)
+        x_min = max(bb[1], 0)
+        y_max = min(bb[2] + 1, H)
+        x_max = min(bb[3] + 1, W)
+        cropped_m = mask[i, y_min:y_max, x_min:x_max]
+        cropped_m = chainer.backends.cuda.to_cpu(cropped_m)
+        if cropped_m.shape[0] <= 1 or cropped_m.shape[1] <= 1:
+            segm.append(np.zeros((segm_size, segm_size), dtype=np.float32))
+            continue
+
+        sgm = transforms.resize(
+            cropped_m[None].astype(np.float32),
+            (padded_segm_size, padded_segm_size))[0]
+        segm.append(sgm[pad:-pad, pad:-pad])
+
+    return np.array(segm, dtype=np.int32)
+
+
+def segm_to_mask(segm, bbox, size, pad=1):
+    """
+    segm: (R, H, W) float32
+
+    """
+    H, W = size
+    _, segm_size, _ = segm.shape
+
+    mask = np.zeros((len(bbox), H, W), dtype=np.bool)
+
+    # To work around an issue with cv2.resize (it seems to automatically
+    # pad with repeated border values), we manually zero-pad the masks by 1
+    # pixel prior to resizing back to the original image resolution.
+    # This prevents "top hat" artifacts. We therefore need to expand
+    # the reference boxes by an appropriate factor.
+    cv2_expand_scale = (segm_size + pad * 2) / segm_size
+    padded_mask = np.zeros(
+        (segm_size + pad * 2, segm_size + pad * 2), dtype=np.float32)
+
+    bbox = _expand_boxes(bbox, cv2_expand_scale)
+    for i, (bb, sgm) in enumerate(zip(bbox, segm)):
+        bb = bb.astype(np.int32)
+        padded_mask[1:-1, 1:-1] = sgm
+
+        bb_height = np.maximum(bb[2] - bb[0] + 1, 1)
+        bb_width = np.maximum(bb[3] - bb[1] + 1, 1)
+
+        crop_mask = cv2.resize(padded_mask, (bb_width, bb_height))
+        crop_mask = crop_mask > 0.5
+
+        y_min = max(bb[0], 0)
+        x_min = max(bb[1], 0)
+        y_max = min(bb[2] + 1, H)
+        x_max = min(bb[3] + 1, W)
+        mask[i, y_min:y_max, x_min:x_max] = crop_mask[
+            (y_min - bb[0]):(y_max - bb[0]),
+            (x_min - bb[1]):(x_max - bb[1])]
+    return mask
+
+
+def _expand_boxes(bbox, scale):
+    """Expand an array of boxes by a given scale."""
+    xp = chainer.backends.cuda.get_array_module(bbox)
+
+    h_half = (bbox[:, 2] - bbox[:, 0]) * .5
+    w_half = (bbox[:, 3] - bbox[:, 1]) * .5
+    y_c = (bbox[:, 2] + bbox[:, 0]) * .5
+    x_c = (bbox[:, 3] + bbox[:, 1]) * .5
+
+    h_half *= scale
+    w_half *= scale
+
+    expanded_bbox = xp.zeros(bbox.shape)
+    expanded_bbox[:, 0] = y_c - h_half
+    expanded_bbox[:, 1] = x_c - w_half
+    expanded_bbox[:, 2] = y_c + h_half
+    expanded_bbox[:, 3] = x_c + w_half
+
+    return expanded_bbox

From be4e8ad5bd50ff0e0329c06bea6e2ba4c71b8bed Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Tue, 5 Mar 2019 15:45:09 +0900
Subject: [PATCH 049/100] use segm_size instead of mask_size

---
 chainercv/links/model/mask_rcnn/mask_head.py | 14 +++++++-------
 chainercv/links/model/mask_rcnn/mask_rcnn.py |  2 +-
 examples/mask_rcnn/train_multi.py            |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py
index 44fd6df6df..8a8ca8a01f 100644
--- a/chainercv/links/model/mask_rcnn/mask_head.py
+++ b/chainercv/links/model/mask_rcnn/mask_head.py
@@ -33,7 +33,7 @@ class MaskHead(chainer.Chain):
     _canonical_scale = 224
     _roi_size = 14
     _roi_sample_ratio = 2
-    mask_size = _roi_size * 2
+    segm_size = _roi_size * 2
 
     def __init__(self, n_class, scales):
         super(MaskHead, self).__init__()
@@ -63,7 +63,7 @@ def __call__(self, hs, rois, roi_indices):
                 self._scales[l], self._roi_sample_ratio))
 
         if len(pooled_hs) == 0:
-            out_size = self.mask_size
+            out_size = self.segm_size
             segs = chainer.Variable(
                 self.xp.empty((0, self._n_class, out_size, out_size),
                               dtype=np.float32))
@@ -153,7 +153,7 @@ def decode(self, segms, bboxes, labels, sizes):
 
 
 def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes,
-                  gt_head_labels, mask_size):
+                  gt_head_labels, segm_size):
     """Loss function for Mask Head (pre).
 
     This function processes RoIs for :func:`mask_loss_post` by
@@ -173,7 +173,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes,
             shape :math:`(R_l,)`. This is a collection of ground-truth
             labels assigned to :obj:`rois` during bounding box localization
             stage. The range of value is :math:`(0, n\_class - 1)`.
-        mask_size (int): Size of the ground truth network output.
+        segm_size (int): Size of the ground truth network output.
 
     Returns:
         tuple of four lists:
@@ -185,7 +185,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes,
             feature map.
         * **roi_indices**: A list of arrays of shape :math:`(R'_l,)`.
         * **gt_segms**: A list of arrays of shape :math:`(R'_l, M, M). \
-            :math:`M` is the argument :obj:`mask_size`.
+            :math:`M` is the argument :obj:`segm_size`.
         * **gt_mask_labels**: A list of arrays of shape :math:`(R'_l,)` \
             indicating the classes of ground truth.
     """
@@ -206,7 +206,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes,
     mask_roi_indices = roi_indices[index]
     gt_mask_labels = gt_head_labels[index]
 
-    gt_segms = xp.empty((len(mask_rois), mask_size, mask_size), dtype=np.bool)
+    gt_segms = xp.empty((len(mask_rois), segm_size, segm_size), dtype=np.bool)
     for i in np.unique(cuda.to_cpu(mask_roi_indices)):
         gt_mask = gt_masks[i]
         gt_bbox = gt_bboxes[i]
@@ -216,7 +216,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes,
         iou = bbox_iou(mask_roi, gt_bbox)
         gt_index = iou.argmax(axis=1)
         gt_segms[index] = xp.array(
-            mask_to_segm(gt_mask, mask_roi, mask_size, gt_index))
+            mask_to_segm(gt_mask, mask_roi, segm_size, gt_index))
 
     flag_masks = [mask_roi_levels == l for l in range(n_level)]
     mask_rois = [mask_rois[m] for m in flag_masks]
diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
index 9f59f49d92..65b76c5b0d 100644
--- a/chainercv/links/model/mask_rcnn/mask_rcnn.py
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py
@@ -147,7 +147,7 @@ def predict(self, imgs):
             segms, mask_roi_indices_before_reordering, len(imgs))
         segms = [segm if segm is not None else
                  self.xp.zeros(
-                     (0, self.mask_head.mask_size, self.mask_head.mask_size),
+                     (0, self.mask_head.segm_size, self.mask_head.segm_size),
                      dtype=np.float32)
                  for segm in segms]
 
diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
index 44273026f7..921b1e53dc 100644
--- a/examples/mask_rcnn/train_multi.py
+++ b/examples/mask_rcnn/train_multi.py
@@ -96,7 +96,7 @@ def __call__(self, imgs, masks, labels, bboxes):
 
         mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre(
             rois, roi_indices, masks, bboxes,
-            head_gt_labels, self.model.mask_head.mask_size)
+            head_gt_labels, self.model.mask_head.segm_size)
         n_roi = sum([len(roi) for roi in mask_rois])
         if n_roi > 0:
             segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)

From 28227892cca86fb52b07f5df44def25665a89704 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 6 Mar 2019 10:55:01 +0900
Subject: [PATCH 050/100] fix mask_head

---
 chainercv/links/model/mask_rcnn/mask_head.py | 32 +++++---------------
 1 file changed, 7 insertions(+), 25 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py
index 8a8ca8a01f..462085d536 100644
--- a/chainercv/links/model/mask_rcnn/mask_head.py
+++ b/chainercv/links/model/mask_rcnn/mask_head.py
@@ -1,9 +1,6 @@
 from __future__ import division
 
 import numpy as np
-import PIL
-
-import cv2
 
 import chainer
 from chainer.backends import cuda
@@ -12,11 +9,10 @@
 import chainer.links as L
 
 from chainercv.links import Conv2DActiv
-from chainercv.transforms.image.resize import resize
 from chainercv.utils.bbox.bbox_iou import bbox_iou
 
-from chainercv.links.model.mask_rcnn.misc import segm_to_mask
 from chainercv.links.model.mask_rcnn.misc import mask_to_segm
+from chainercv.links.model.mask_rcnn.misc import segm_to_mask
 
 
 class MaskHead(chainer.Chain):
@@ -146,9 +142,12 @@ def decode(self, segms, bboxes, labels, sizes):
         masks = []
         for bbox, segm, label, size in zip(
                 bboxes, segms, labels, sizes):
-            masks.append(
-                segm_to_mask(segm[np.arange(len(label)), label + 1],
-                             bbox, size))
+            if len(segm) > 0:
+                masks.append(
+                    segm_to_mask(segm[np.arange(len(label)), label + 1],
+                                 bbox, size))
+            else:
+                masks.append(np.zeros((0,) + size, dtype=np.bool))
         return masks
 
 
@@ -255,20 +254,3 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
         segms[np.arange(len(gt_mask_labels)), gt_mask_labels],
         gt_segms.astype(np.int32))
     return mask_loss
-
-
-def _segm_wrt_bbox(mask, gt_index, bbox, size, xp):
-    bbox = chainer.backends.cuda.to_cpu(bbox.astype(np.int32))
-
-    segm = []
-    for i, bb in zip(chainer.backends.cuda.to_cpu(gt_index), bbox):
-        cropped_m = mask[i, bb[0]:bb[2], bb[1]:bb[3]]
-        cropped_m = chainer.backends.cuda.to_cpu(cropped_m)
-        if cropped_m.shape[0] == 0 or cropped_m.shape[1] == 0:
-            segm.append(np.zeros(size, dtype=np.bool))
-            continue
-
-        segm.append(resize(
-            cropped_m[None].astype(np.float32),
-            size, interpolation=PIL.Image.NEAREST)[0])
-    return xp.array(segm, dtype=np.float32)

From 6513e2480e98ce3773ec5566c9923be51f75bb23 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <niitani@preferred.jp>
Date: Wed, 6 Mar 2019 13:20:10 +0900
Subject: [PATCH 051/100] delete +1

---
 chainercv/links/model/mask_rcnn/misc.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py
index 8d0fca37f4..796da612ff 100644
--- a/chainercv/links/model/mask_rcnn/misc.py
+++ b/chainercv/links/model/mask_rcnn/misc.py
@@ -24,7 +24,7 @@ def mask_to_segm(mask, bbox, segm_size, index=None, pad=1):
     bbox = chainer.backends.cuda.to_cpu(bbox)
     padded_segm_size = segm_size + pad * 2
     cv2_expand_scale = padded_segm_size / segm_size
-    bbox = _expand_boxes(bbox, cv2_expand_scale).astype(np.int32)
+    bbox = _integerize_bbox(_expand_boxes(bbox, cv2_expand_scale))
 
     segm = []
     if index is None:
@@ -35,11 +35,11 @@ def mask_to_segm(mask, bbox, segm_size, index=None, pad=1):
     for i, bb in zip(index, bbox):
         y_min = max(bb[0], 0)
         x_min = max(bb[1], 0)
-        y_max = min(bb[2] + 1, H)
-        x_max = min(bb[3] + 1, W)
+        y_max = min(bb[2], H)
+        x_max = min(bb[3], W)
         cropped_m = mask[i, y_min:y_max, x_min:x_max]
         cropped_m = chainer.backends.cuda.to_cpu(cropped_m)
-        if cropped_m.shape[0] <= 1 or cropped_m.shape[1] <= 1:
+        if cropped_m.shape[0] == 0 or cropped_m.shape[1] == 0:
             segm.append(np.zeros((segm_size, segm_size), dtype=np.float32))
             continue
 
@@ -70,27 +70,30 @@ def segm_to_mask(segm, bbox, size, pad=1):
     padded_mask = np.zeros(
         (segm_size + pad * 2, segm_size + pad * 2), dtype=np.float32)
 
-    bbox = _expand_boxes(bbox, cv2_expand_scale)
+    bbox = _integerize_bbox(_expand_boxes(bbox, cv2_expand_scale))
     for i, (bb, sgm) in enumerate(zip(bbox, segm)):
-        bb = bb.astype(np.int32)
         padded_mask[1:-1, 1:-1] = sgm
 
-        bb_height = np.maximum(bb[2] - bb[0] + 1, 1)
-        bb_width = np.maximum(bb[3] - bb[1] + 1, 1)
+        bb_height = np.maximum(bb[2] - bb[0], 1)
+        bb_width = np.maximum(bb[3] - bb[1], 1)
 
         crop_mask = cv2.resize(padded_mask, (bb_width, bb_height))
         crop_mask = crop_mask > 0.5
 
         y_min = max(bb[0], 0)
         x_min = max(bb[1], 0)
-        y_max = min(bb[2] + 1, H)
-        x_max = min(bb[3] + 1, W)
+        y_max = min(bb[2], H)
+        x_max = min(bb[3], W)
         mask[i, y_min:y_max, x_min:x_max] = crop_mask[
             (y_min - bb[0]):(y_max - bb[0]),
             (x_min - bb[1]):(x_max - bb[1])]
     return mask
 
 
+def _integerize_bbox(bbox):
+    return np.round(bbox).astype(np.int32)
+
+
 def _expand_boxes(bbox, scale):
     """Expand an array of boxes by a given scale."""
     xp = chainer.backends.cuda.get_array_module(bbox)

From 7e1e3ecee5930c527c7f1c89cdd4719d826095e5 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 6 Mar 2019 14:45:46 +0900
Subject: [PATCH 052/100] fix mask_to_segm and segm_to_mask

---
 chainercv/links/model/mask_rcnn/misc.py | 40 ++++++++++++++++---------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py
index 796da612ff..c589d7f2d2 100644
--- a/chainercv/links/model/mask_rcnn/misc.py
+++ b/chainercv/links/model/mask_rcnn/misc.py
@@ -35,20 +35,28 @@ def mask_to_segm(mask, bbox, segm_size, index=None, pad=1):
     for i, bb in zip(index, bbox):
         y_min = max(bb[0], 0)
         x_min = max(bb[1], 0)
-        y_max = min(bb[2], H)
-        x_max = min(bb[3], W)
-        cropped_m = mask[i, y_min:y_max, x_min:x_max]
-        cropped_m = chainer.backends.cuda.to_cpu(cropped_m)
-        if cropped_m.shape[0] == 0 or cropped_m.shape[1] == 0:
+        y_max = max(min(bb[2], H), 0)
+        x_max = max(min(bb[3], W), 0)
+        if y_max - y_min == 0 or x_max - x_min == 0:
             segm.append(np.zeros((segm_size, segm_size), dtype=np.float32))
             continue
 
+        bb_height = bb[2] - bb[0]
+        bb_width = bb[3] - bb[1]
+        cropped_m = np.zeros((bb_height, bb_width), dtype=np.bool)
+
+        y_offset = y_min - bb[0]
+        x_offset = x_min - bb[1]
+        cropped_m[y_offset:y_offset + y_max - y_min,
+                  x_offset:x_offset + x_max - x_min] =\
+            chainer.backends.cuda.to_cpu(mask[i, y_min:y_max, x_min:x_max])
+
         sgm = transforms.resize(
             cropped_m[None].astype(np.float32),
-            (padded_segm_size, padded_segm_size))[0]
+            (padded_segm_size, padded_segm_size))[0].astype(np.int32)
         segm.append(sgm[pad:-pad, pad:-pad])
 
-    return np.array(segm, dtype=np.int32)
+    return np.array(segm, dtype=np.float32)
 
 
 def segm_to_mask(segm, bbox, size, pad=1):
@@ -74,19 +82,23 @@ def segm_to_mask(segm, bbox, size, pad=1):
     for i, (bb, sgm) in enumerate(zip(bbox, segm)):
         padded_mask[1:-1, 1:-1] = sgm
 
-        bb_height = np.maximum(bb[2] - bb[0], 1)
-        bb_width = np.maximum(bb[3] - bb[1], 1)
+        bb_height = bb[2] - bb[0]
+        bb_width = bb[3] - bb[1]
+        if bb_height == 0 or bb_width == 0:
+            continue
 
-        crop_mask = cv2.resize(padded_mask, (bb_width, bb_height))
+        crop_mask = transforms.resize(padded_mask[None], (bb_width, bb_height))[0]
         crop_mask = crop_mask > 0.5
 
         y_min = max(bb[0], 0)
         x_min = max(bb[1], 0)
-        y_max = min(bb[2], H)
-        x_max = min(bb[3], W)
+        y_max = max(min(bb[2], H), 0)
+        x_max = max(min(bb[3], W), 0)
+        y_offset = y_min - bb[0]
+        x_offset = x_min - bb[1]
         mask[i, y_min:y_max, x_min:x_max] = crop_mask[
-            (y_min - bb[0]):(y_max - bb[0]),
-            (x_min - bb[1]):(x_max - bb[1])]
+            y_offset:y_offset + y_max - y_min,
+            x_offset:x_offset + x_max - x_min]
     return mask
 
 

From c78925c640b15cb9dbbbe8f5763556bbe5fdaa07 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 6 Mar 2019 14:59:30 +0900
Subject: [PATCH 053/100] add test

---
 chainercv/links/model/mask_rcnn/misc.py       |  2 +-
 .../model_tests/mask_rcnn_tests/test_misc.py  | 52 +++++++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)
 create mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py

diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py
index c589d7f2d2..4f10c699c8 100644
--- a/chainercv/links/model/mask_rcnn/misc.py
+++ b/chainercv/links/model/mask_rcnn/misc.py
@@ -28,7 +28,7 @@ def mask_to_segm(mask, bbox, segm_size, index=None, pad=1):
 
     segm = []
     if index is None:
-        index = np.arange(len(index))
+        index = np.arange(len(bbox))
     else:
         index = chainer.backends.cuda.to_cpu(index)
 
diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py
new file mode 100644
index 0000000000..6bd6722c7a
--- /dev/null
+++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py
@@ -0,0 +1,52 @@
+from __future__ import division
+
+import numpy as np
+import unittest
+
+from chainer import testing
+
+from chainercv.links.model.mask_rcnn.misc import segm_to_mask
+from chainercv.links.model.mask_rcnn.misc import mask_to_segm
+
+
+class TestSegmToMask(unittest.TestCase):
+
+    def setUp(self):
+        # When n_inst >= 3, the test fails.
+        # This is due to the fact that the transformed image of `transforms.resize`
+        # is misaligned to the corners.
+        n_inst = 2
+        self.segm_size = 3
+        self.size = (36, 48)
+
+        self.segm = np.ones((n_inst, self.segm_size, self.segm_size), dtype=np.float32)
+        self.bbox = np.zeros((n_inst, 4), dtype=np.float32)
+        for i in range(n_inst):
+            self.bbox[i, 0] = 10 + i
+            self.bbox[i, 1] = 10 + i
+            self.bbox[i, 2] = self.bbox[i, 0] + self.segm_size * (1 + i)
+            self.bbox[i, 3] = self.bbox[i, 1] + self.segm_size * (1 + i)
+
+        self.mask = np.zeros((n_inst,) + self.size, dtype=np.bool)
+        for i, bb in enumerate(self.bbox):
+            bb = bb.astype(np.int32)
+            self.mask[i, bb[0]:bb[2], bb[1]:bb[3]] = 1
+
+    def test_segm_to_mask(self):
+        mask = segm_to_mask(self.segm, self.bbox, self.size)
+        np.testing.assert_equal(mask, self.mask)
+
+    def test_mask_to_segm(self):
+        segm = mask_to_segm(self.mask, self.bbox, self.segm_size)
+        np.testing.assert_equal(segm, self.segm)
+
+    def test_mask_to_segm_index(self):
+        index = np.arange(len(self.bbox))[::-1]
+        segm = mask_to_segm(
+            self.mask, self.bbox[::-1],
+            self.segm_size, index=index)
+        segm = segm[::-1]
+        np.testing.assert_equal(segm, self.segm)
+
+
+testing.run_module(__name__, __file__)

From b181dfdd16e35a743380c611a782df710ee72013 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 6 Mar 2019 15:15:28 +0900
Subject: [PATCH 054/100] add mask_to_segm and segm_to_mask to doc

---
 chainercv/links/model/mask_rcnn/__init__.py |  2 +
 chainercv/links/model/mask_rcnn/misc.py     | 47 +++++++++++++++++++--
 docs/source/reference/links/mask_rcnn.rst   |  7 +++
 3 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/__init__.py b/chainercv/links/model/mask_rcnn/__init__.py
index c9e910a524..9f1b210dbc 100644
--- a/chainercv/links/model/mask_rcnn/__init__.py
+++ b/chainercv/links/model/mask_rcnn/__init__.py
@@ -4,3 +4,5 @@
 from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN  # NOQA
 from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet101  # NOQA
 from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet50  # NOQA
+from chainercv.links.model.mask_rcnn.misc import mask_to_segm  # NOQA
+from chainercv.links.model.mask_rcnn.misc import segm_to_mask  # NOQA
diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py
index 8d0fca37f4..a62b3bf43b 100644
--- a/chainercv/links/model/mask_rcnn/misc.py
+++ b/chainercv/links/model/mask_rcnn/misc.py
@@ -20,6 +20,30 @@ def scale_img(img, min_size, max_size):
 
 
 def mask_to_segm(mask, bbox, segm_size, index=None, pad=1):
+    """Crop and resize mask.
+
+    Args:
+        mask (~numpy.ndarray): See below.
+        bbox (~numpy.ndarray): See below.
+        segm_size (int): The size of segm :math:`S`.
+        index (~numpy.ndarray): See below. :math:`R = N` when
+            :obj:`index` is :obj:`None`.
+        pad (int): The amount of padding used for bbox.
+
+    Returns:
+        ~numpy.ndarray: See below.
+
+    .. csv-table::
+        :header: name, shape, dtype, format
+
+        :obj:`mask`, ":math:`(N, H, W)`", :obj:`bool`, --
+        :obj:`bbox`, ":math:`(R, 4)`", :obj:`float32`, \
+        ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
+        :obj:`index` (optional), ":math:`(R,)`", :obj:`int32`, --
+        :obj:`segms` (output), ":math:`(R, S, S)`", :obj:`float32`, \
+        ":math:`[0, 1]`"
+
+    """
     _, H, W = mask.shape
     bbox = chainer.backends.cuda.to_cpu(bbox)
     padded_segm_size = segm_size + pad * 2
@@ -48,12 +72,29 @@ def mask_to_segm(mask, bbox, segm_size, index=None, pad=1):
             (padded_segm_size, padded_segm_size))[0]
         segm.append(sgm[pad:-pad, pad:-pad])
 
-    return np.array(segm, dtype=np.int32)
+    return np.array(segm, dtype=np.float32)
 
 
 def segm_to_mask(segm, bbox, size, pad=1):
-    """
-    segm: (R, H, W) float32
+    """Recover mask from cropped and resized mask.
+
+    Args:
+        segm (~numpy.ndarray): See below.
+        bbox (~numpy.ndarray): See below.
+        size (tuple): This is a tuple of length 2. Its elements are
+            ordered as (height, width).
+        pad (int): The amount of padding used for bbox.
+
+    Returns:
+        ~numpy.ndarray: See below.
+
+    .. csv-table::
+        :header: name, shape, dtype, format
+
+        :obj:`segm`, ":math:`(R, S, S)`", :obj:`float32`, --
+        :obj:`bbox`, ":math:`(R, 4)`", :obj:`float32`, \
+        ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
+        :obj:`mask` (output), ":math:`(R, H, W)`", :obj:`bool`, --
 
     """
     H, W = size
diff --git a/docs/source/reference/links/mask_rcnn.rst b/docs/source/reference/links/mask_rcnn.rst
index 4c0870e2e5..9fce65c343 100644
--- a/docs/source/reference/links/mask_rcnn.rst
+++ b/docs/source/reference/links/mask_rcnn.rst
@@ -32,6 +32,9 @@ MaskHead
    :members:
    :special-members: __call__
 
+segm_to_mask
+~~~~~~~~~~~~
+.. autofunction:: segm_to_mask
 
 Train-only Utility
 ------------------
@@ -43,3 +46,7 @@ mask_loss_pre
 mask_loss_post
 ~~~~~~~~~~~~~~
 .. autofunction:: mask_loss_post
+
+mask_to_segm
+~~~~~~~~~~~~
+.. autofunction:: mask_to_segm

From acf73a67317785df16af4427553d12ff7452145c Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 6 Mar 2019 18:52:44 +0900
Subject: [PATCH 055/100] add keypoint_loss

---
 chainercv/links/model/mask_rcnn/__init__.py   |   2 +
 .../links/model/mask_rcnn/keypoint_head.py    |  70 ++++++++-
 chainercv/links/model/mask_rcnn/misc.py       |  39 +++++
 .../mask_rcnn_tests/test_keypoint_head.py     | 146 ++++++++++++++++++
 .../mask_rcnn_tests/test_mask_head.py         |  10 +-
 5 files changed, 260 insertions(+), 7 deletions(-)
 create mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py

diff --git a/chainercv/links/model/mask_rcnn/__init__.py b/chainercv/links/model/mask_rcnn/__init__.py
index 1dc597cb9d..3391efe1f9 100644
--- a/chainercv/links/model/mask_rcnn/__init__.py
+++ b/chainercv/links/model/mask_rcnn/__init__.py
@@ -1,4 +1,6 @@
 from chainercv.links.model.mask_rcnn.keypoint_head import KeypointHead  # NOQA
+from chainercv.links.model.mask_rcnn.keypoint_head import keypoint_loss_post  # NOQA
+from chainercv.links.model.mask_rcnn.keypoint_head import keypoint_loss_pre  # NOQA
 from chainercv.links.model.mask_rcnn.mask_head import mask_loss_post  # NOQA
 from chainercv.links.model.mask_rcnn.mask_head import mask_loss_pre  # NOQA
 from chainercv.links.model.mask_rcnn.mask_head import MaskHead  # NOQA
diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py
index 0d67ab2c21..8b8e3fc428 100644
--- a/chainercv/links/model/mask_rcnn/keypoint_head.py
+++ b/chainercv/links/model/mask_rcnn/keypoint_head.py
@@ -10,12 +10,12 @@
 import chainer.functions as F
 from chainer.backends import cuda
 from chainer.initializers import HeNormal
-from chainer.initializers import Normal
 
 from chainercv.links import Conv2DActiv
 from chainercv.transforms.image.resize import resize
 from chainercv.utils.bbox.bbox_iou import bbox_iou
-from chainercv.utils.mask.mask_to_bbox import mask_to_bbox
+
+from chainercv.links.model.mask_rcnn.misc import point_to_roi_points
 
 
 # make a bilinear interpolation kernel
@@ -140,3 +140,69 @@ def decode(self, point_maps, bboxes):
             points.append(point)
             point_scores.append(point_score)
         return points, point_scores
+
+
+def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles,
+                      gt_bboxes, gt_head_labels, point_map_size):
+    _, n_point, _ = gt_points[0].shape
+
+    xp = cuda.get_array_module(*rois)
+
+    n_level = len(rois)
+
+    roi_levels = xp.hstack(
+        xp.array((l,) * len(rois[l])) for l in range(n_level)).astype(np.int32)
+    rois = xp.vstack(rois).astype(np.float32)
+    roi_indices = xp.hstack(roi_indices).astype(np.int32)
+    gt_head_labels = xp.hstack(gt_head_labels)
+
+    index = (gt_head_labels > 0).nonzero()[0]
+    point_roi_levels = roi_levels[index]
+    point_rois = rois[index]
+    point_roi_indices = roi_indices[index]
+
+    gt_roi_points = xp.empty(
+        (len(point_rois), n_point, 2), dtype=np.float32)
+    gt_roi_visibles = xp.empty(
+        (len(point_rois), n_point), dtype=np.bool)
+    for i in np.unique(cuda.to_cpu(point_roi_indices)):
+        gt_point = gt_points[i]
+        gt_visible = gt_visibles[i]
+        gt_bbox = gt_bboxes[i]
+
+        index = (point_roi_indices == i).nonzero()[0]
+        point_roi = point_rois[index]
+        iou = bbox_iou(point_roi, gt_bbox)
+        gt_index = iou.argmax(axis=1)
+        gt_roi_point, gt_roi_visible = point_to_roi_points(
+                gt_point[gt_index], gt_visible[gt_index],
+                point_roi, point_map_size)
+        gt_roi_points[index] = xp.array(gt_roi_point)
+        gt_roi_visibles[index] = xp.array(gt_roi_visible)
+
+    flag_masks = [point_roi_levels == l for l in range(n_level)]
+    point_rois = [point_rois[m] for m in flag_masks]
+    point_roi_indices = [point_roi_indices[m] for m in flag_masks]
+    gt_roi_points = [gt_roi_points[m] for m in flag_masks]
+    gt_roi_visibles = [gt_roi_visibles[m] for m in flag_masks]
+    return point_rois, point_roi_indices, gt_roi_points, gt_roi_visibles
+
+
+def keypoint_loss_post(
+        point_maps, point_roi_indices, gt_roi_points,
+        gt_roi_visibles, batchsize):
+    xp = cuda.get_array_module(point_maps.array)
+
+    point_roi_indices = xp.hstack(point_roi_indices).astype(np.int32)
+    gt_roi_points = xp.vstack(gt_roi_points).astype(np.int32)
+    gt_roi_visibles = xp.vstack(gt_roi_visibles).astype(np.bool)
+
+    B, K, H, W = point_maps.shape
+    point_maps = point_maps.reshape((B * K, H * W))
+    spatial_labels = gt_roi_points[:, :, 0] * W + gt_roi_points[:, :, 1]
+    spatial_labels = spatial_labels.reshape((B * K,))
+    spatial_labels[xp.logical_not(gt_roi_visibles.reshape((B * K,)))] = -1
+    # Remember that the loss is normalized by the total number of
+    # visible keypoints.
+    keypoint_loss = F.softmax_cross_entropy(point_maps, spatial_labels)
+    return keypoint_loss
diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py
index a62b3bf43b..6b0c7a3f91 100644
--- a/chainercv/links/model/mask_rcnn/misc.py
+++ b/chainercv/links/model/mask_rcnn/misc.py
@@ -151,3 +151,42 @@ def _expand_boxes(bbox, scale):
     expanded_bbox[:, 3] = x_c + w_half
 
     return expanded_bbox
+
+
+def point_to_roi_points(
+        point, visible, bbox, point_map_size):
+    xp = chainer.backends.cuda.get_array_module(point)
+
+    R, K, _ = point.shape
+
+    roi_point = xp.zeros((len(bbox), K, 2))
+    roi_visible = xp.zeros((len(bbox), K), dtype=np.bool)
+
+    offset_y = bbox[:, 0]
+    offset_x = bbox[:, 1]
+    scale_y = point_map_size / (bbox[:, 2] - bbox[:, 0])
+    scale_x = point_map_size / (bbox[:, 3] - bbox[:, 1])
+
+    for k in range(K):
+        y_boundary_index = xp.where(point[:, k, 0] == bbox[:, 2])[0]
+        x_boundary_index = xp.where(point[:, k, 1] == bbox[:, 3])[0]
+
+        ys = (point[:, k, 0] - offset_y) * scale_y
+        ys = xp.floor(ys)
+        if len(y_boundary_index) > 0:
+            ys[y_boundary_index] = point_map_size - 1
+        xs = (point[:, k, 1] - offset_x) * scale_x
+        xs = xp.floor(xs)
+        if len(x_boundary_index) > 0:
+            xs[x_boundary_index] = point_map_size - 1
+
+        valid = xp.logical_and(
+            xp.logical_and(
+                xp.logical_and(ys >= 0, xs >= 0),
+                xp.logical_and(ys < point_map_size, xs < point_map_size)),
+            visible[:, k])
+
+        roi_point[:, k, 0] = ys
+        roi_point[:, k, 1] = xs
+        roi_visible[:, k] = valid
+    return roi_point, roi_visible
diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py
new file mode 100644
index 0000000000..836e9425f3
--- /dev/null
+++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py
@@ -0,0 +1,146 @@
+from __future__ import division
+
+import numpy as np
+import unittest
+
+import chainer
+from chainer import testing
+from chainer.testing import attr
+
+from chainercv.links.model.mask_rcnn import KeypointHead
+from chainercv.links.model.mask_rcnn import keypoint_loss_post
+from chainercv.links.model.mask_rcnn import keypoint_loss_pre
+
+
+def _random_array(xp, shape):
+    return xp.array(
+        np.random.uniform(-1, 1, size=shape), dtype=np.float32)
+
+
+def _point_to_bbox(point, visible=None):
+    xp = chainer.backends.cuda.get_array_module(point)
+
+    bbox = xp.zeros((len(point), 4), dtype=np.float32)
+
+    for i, pnt in enumerate(point):
+        if visible is None:
+            vsbl = xp.ones((len(pnt),), dtype=np.bool)
+        else:
+            vsbl = visible[i]
+        pnt = pnt[vsbl]
+        bbox[i, 0] = xp.min(pnt[:, 0])
+        bbox[i, 1] = xp.min(pnt[:, 1])
+        bbox[i, 2] = xp.max(pnt[:, 0])
+        bbox[i, 3] = xp.max(pnt[:, 1])
+    return bbox
+
+
+class TestKeypointHeadLoss(unittest.TestCase):
+
+    def _check_keypoint_loss_pre(self, xp):
+        point_map_size = 28
+        n_point = 17
+        rois = [
+            xp.array(((4, 1, 6, 3),), dtype=np.float32),
+            xp.array(
+                ((0, 1, 2, 3), (5, 4, 10, 6)), dtype=np.float32),
+            xp.array(((10, 4, 12, 10),), dtype=np.float32),
+        ]
+        roi_indices = [
+            xp.array((0,), dtype=np.int32),
+            xp.array((1, 0), dtype=np.int32),
+            xp.array((1,), dtype=np.int32),
+        ]
+        points = [
+            xp.zeros((1, n_point, 2), dtype=np.float32),
+            xp.zeros((2, n_point, 2), dtype=np.float32),
+            xp.zeros((1, n_point, 2), dtype=np.float32),
+        ]
+        visibles = [
+            xp.ones((1, n_point), dtype=np.bool),
+            xp.ones((2, n_point), dtype=np.bool),
+            xp.ones((1, n_point), dtype=np.bool)
+        ]
+        bboxes = [_point_to_bbox(point, visible)
+                  for point, visible in zip(points, visibles)]
+        labels = [
+            xp.array((1, 1), dtype=np.int32),
+            xp.array((1,), dtype=np.int32),
+            xp.array((1,), dtype=np.int32),
+        ]
+        rois, roi_indices, gt_roi_points, gt_roi_visibles = keypoint_loss_pre(
+            rois, roi_indices, points, visibles, bboxes,
+            labels, point_map_size)
+
+        self.assertEqual(len(rois), 3)
+        self.assertEqual(len(roi_indices), 3)
+        self.assertEqual(len(gt_roi_points), 3)
+        self.assertEqual(len(gt_roi_visibles), 3)
+        for l in range(3):
+            self.assertIsInstance(rois[l], xp.ndarray)
+            self.assertIsInstance(roi_indices[l], xp.ndarray)
+            self.assertIsInstance(gt_roi_points[l], xp.ndarray)
+            self.assertIsInstance(gt_roi_visibles[l], xp.ndarray)
+
+            self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0])
+            self.assertEqual(rois[l].shape[0], gt_roi_points[l].shape[0])
+            self.assertEqual(rois[l].shape[0], gt_roi_visibles[l].shape[0])
+            self.assertEqual(rois[l].shape[1:], (4,))
+            self.assertEqual(roi_indices[l].shape[1:], ())
+            self.assertEqual(
+                gt_roi_points[l].shape[1:], (n_point, 2))
+            self.assertEqual(
+                gt_roi_visibles[l].shape[1:], (n_point,))
+
+            self.assertEqual(
+                gt_roi_points[l].dtype, np.float32)
+            self.assertEqual(
+                gt_roi_visibles[l].dtype, np.bool)
+
+    def test_keypoint_loss_pre_cpu(self):
+        self._check_keypoint_loss_pre(np)
+
+    @attr.gpu
+    def test_keypoint_loss_pre_gpu(self):
+        import cupy
+        self._check_keypoint_loss_pre(cupy)
+
+    def _check_keypoint_loss_post(self, xp):
+        B = 2
+        n_point = 17
+
+        point_maps = chainer.Variable(_random_array(xp, (20, n_point, 28, 28)))
+        point_roi_indices = [
+            xp.random.randint(0, B, size=5).astype(np.int32),
+            xp.random.randint(0, B, size=7).astype(np.int32),
+            xp.random.randint(0, B, size=8).astype(np.int32),
+        ]
+        gt_roi_points = [
+            xp.random.randint(0, 28, size=(5, n_point, 2)).astype(np.int32),
+            xp.random.randint(0, 28, size=(7, n_point, 2)).astype(np.int32),
+            xp.random.randint(0, 28, size=(8, n_point, 2)).astype(np.int32),
+        ]
+        gt_roi_visibles = [
+            xp.random.randint(0, 2, size=(5, n_point)).astype(np.bool),
+            xp.random.randint(0, 2, size=(7, n_point)).astype(np.bool),
+            xp.random.randint(0, 2, size=(8, n_point)).astype(np.bool),
+        ]
+
+        keypoint_loss = keypoint_loss_post(
+            point_maps, point_roi_indices, gt_roi_points,
+            gt_roi_visibles, B)
+
+        self.assertIsInstance(keypoint_loss, chainer.Variable)
+        self.assertIsInstance(keypoint_loss.array, xp.ndarray)
+        self.assertEqual(keypoint_loss.shape, ())
+
+    def test_keypoint_loss_post_cpu(self):
+        self._check_keypoint_loss_post(np)
+
+    @attr.gpu
+    def test_keypoint_loss_post_gpu(self):
+        import cupy
+        self._check_keypoint_loss_post(cupy)
+
+
+testing.run_module(__name__, __file__)
diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
index d1832d1b8b..ba2f132b15 100644
--- a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
+++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
@@ -194,7 +194,7 @@ def test_mask_loss_pre_gpu(self):
         import cupy
         self._check_mask_loss_pre(cupy)
 
-    def _check_head_loss_post(self, xp):
+    def _check_mask_loss_post(self, xp):
         B = 2
         segms = chainer.Variable(_random_array(xp, (20, 81, 28, 28)))
         mask_roi_indices = [
@@ -220,13 +220,13 @@ def _check_head_loss_post(self, xp):
         self.assertIsInstance(mask_loss.array, xp.ndarray)
         self.assertEqual(mask_loss.shape, ())
 
-    def test_head_loss_post_cpu(self):
-        self._check_head_loss_post(np)
+    def test_mask_loss_post_cpu(self):
+        self._check_mask_loss_post(np)
 
     @attr.gpu
-    def test_head_loss_post_gpu(self):
+    def test_mask_loss_post_gpu(self):
         import cupy
-        self._check_head_loss_post(cupy)
+        self._check_mask_loss_post(cupy)
 
 
 testing.run_module(__name__, __file__)

From 6cf563717a1cad2fce9eca3a2dbdfc892a85ffab Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Wed, 6 Mar 2019 18:53:08 +0900
Subject: [PATCH 056/100] wip

---
 examples/mask_rcnn/train_multi_keypoint.py | 278 +++++++++++++++++++++
 1 file changed, 278 insertions(+)
 create mode 100644 examples/mask_rcnn/train_multi_keypoint.py

diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py
new file mode 100644
index 0000000000..42ae655ee7
--- /dev/null
+++ b/examples/mask_rcnn/train_multi_keypoint.py
@@ -0,0 +1,278 @@
+import argparse
+import multiprocessing
+import numpy as np
+import PIL
+
+import chainer
+import chainer.functions as F
+import chainer.links as L
+from chainer.optimizer_hooks import WeightDecay
+from chainer import serializers
+from chainer import training
+from chainer.training import extensions
+
+import chainermn
+
+from chainercv.chainer_experimental.datasets.sliceable import TransformDataset
+from chainercv.chainer_experimental.training.extensions import make_shift
+from chainercv.datasets import coco_keypoint_names
+from chainercv.datasets import COCOKeypointDataset
+from chainercv.links import MaskRCNNFPNResNet101
+from chainercv.links import MaskRCNNFPNResNet50
+from chainercv.links.model.mask_rcnn.misc import scale_img
+from chainercv import transforms
+
+from chainercv.links.model.fpn import head_loss_post
+from chainercv.links.model.fpn import head_loss_pre
+from chainercv.links.model.fpn import rpn_loss
+from chainercv.links.model.mask_rcnn import keypoint_loss_pre
+from chainercv.links.model.mask_rcnn import keypoint_loss_post
+
+# https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator
+try:
+    import cv2
+    cv2.setNumThreads(0)
+except ImportError:
+    pass
+
+
+class TrainChain(chainer.Chain):
+
+    def __init__(self, model):
+        super(TrainChain, self).__init__()
+        with self.init_scope():
+            self.model = model
+
+    def __call__(self, imgs, points, visibles, bboxes):
+        B = len(imgs)
+        pad_size = np.array(
+            [im.shape[1:] for im in imgs]).max(axis=0)
+        pad_size = (
+            np.ceil(
+                pad_size / self.model.stride) * self.model.stride).astype(int)
+        x = np.zeros(
+            (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32)
+        for i, img in enumerate(imgs):
+            _, H, W = img.shape
+            x[i, :, :H, :W] = img
+        x = self.xp.array(x)
+
+        # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU.
+        pad_masks = [
+            np.zeros(
+                (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool)
+            for mask in masks]
+        for i, mask in enumerate(masks):
+            _, H, W = mask.shape
+            pad_masks[i][:, :H, :W] = mask
+        masks = pad_masks
+
+        bboxes = [self.xp.array(bbox) for bbox in bboxes]
+        labels = [self.xp.array(label) for label in labels]
+        sizes = [img.shape[1:] for img in imgs]
+
+        with chainer.using_config('train', False):
+            hs = self.model.extractor(x)
+
+        rpn_locs, rpn_confs = self.model.rpn(hs)
+        anchors = self.model.rpn.anchors(h.shape[2:] for h in hs)
+        rpn_loc_loss, rpn_conf_loss = rpn_loss(
+            rpn_locs, rpn_confs, anchors, sizes, bboxes)
+
+        rois, roi_indices = self.model.rpn.decode(
+            rpn_locs, rpn_confs, anchors, x.shape)
+        rois = self.xp.vstack([rois] + bboxes)
+        roi_indices = self.xp.hstack(
+            [roi_indices]
+            + [self.xp.array((i,) * len(bbox))
+               for i, bbox in enumerate(bboxes)])
+        rois, roi_indices = self.model.head.distribute(rois, roi_indices)
+        rois, roi_indices, head_gt_locs, head_gt_labels = head_loss_pre(
+            rois, roi_indices, self.model.head.std, bboxes, labels)
+        head_locs, head_confs = self.model.head(hs, rois, roi_indices)
+        head_loc_loss, head_conf_loss = head_loss_post(
+            head_locs, head_confs,
+            roi_indices, head_gt_locs, head_gt_labels, B)
+        losses = [
+            rpn_loc_loss + rpn_conf_loss + head_loc_loss + head_conf_loss]
+
+        # mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre(
+        #     rois, roi_indices, masks, bboxes,
+        #     head_gt_labels, self.model.mask_head.segm_size)
+        # n_roi = sum([len(roi) for roi in mask_rois])
+        # if n_roi > 0:
+        #     segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
+        #     mask_loss = mask_loss_post(
+        #         segms, mask_roi_indices, gt_segms, gt_mask_labels, B)
+        # else:
+        #     # Compute dummy variables to complete the computational graph
+        #     mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32)
+        #     mask_roi_indices[0] = self.xp.array([0], dtype=np.int32)
+        #     segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
+        #     mask_loss = 0 * F.sum(segms)
+        loss = sum(losses)
+        chainer.reporter.report({
+            'loss': loss,
+            'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss,
+            'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss,
+            'loss/keypoint': keypoint_loss},
+            self)
+        return loss
+
+
+class Transform(object):
+
+    def __init__(self, min_size, max_size, mean):
+        self.min_size = min_size
+        self.max_size = max_size
+        self.mean = mean
+
+    def __call__(self, in_data):
+        img, point, visible, _, bbox = in_data
+        # Flipping
+        size = img.shape[1:]
+        img, params = transforms.random_flip(
+            img, x_random=True, return_param=True)
+        point = transforms.flip_point(
+            point, size, x_flip=params['x_flip'])
+        bbox = transforms.flip_bbox(
+            bbox, size, x_flip=params['x_flip'])
+
+        # Scaling and mean subtraction
+        img, scale = scale_img(img, self.min_size, self.max_size)
+        img -= self.mean
+        point = transforms.resize_point(point, size, img.shape[1:])
+        bbox = bbox * scale
+        return img, point, visible, bbox
+
+
+def converter(batch, device=None):
+    # do not send data to gpu (device is ignored)
+    return tuple(list(v) for v in zip(*batch))
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--model',
+        choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
+        default='mask_rcnn_fpn_resnet50')
+    parser.add_argument('--batchsize', type=int, default=16)
+    parser.add_argument('--iteration', type=int, default=90000)
+    parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000])
+    parser.add_argument('--out', default='result')
+    parser.add_argument('--resume')
+    parser.add_argument('--communicator', default='hierarchical')
+    args = parser.parse_args()
+
+    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
+    if hasattr(multiprocessing, 'set_start_method'):
+        multiprocessing.set_start_method('forkserver')
+        p = multiprocessing.Process()
+        p.start()
+        p.join()
+
+    comm = chainermn.create_communicator(args.communicator)
+    device = comm.intra_rank
+
+    if args.model == 'mask_rcnn_fpn_resnet50':
+        model = MaskRCNNFPNResNet50(
+            n_fg_class=1,
+            pretrained_model='imagenet',
+            mode='keypoint'
+        )
+    elif args.model == 'mask_rcnn_fpn_resnet101':
+        model = MaskRCNNFPNResNet101(
+            n_fg_class=1,
+            pretrained_model='imagenet',
+            mode='keypoint'
+        )
+
+    model.use_preset('evaluate')
+    train_chain = TrainChain(model)
+    chainer.cuda.get_device_from_id(device).use()
+    train_chain.to_gpu()
+
+    train = TransformDataset(
+        COCOKeypointDataset(
+            data_dir='/home/yuyu2172/coco',
+            split='train'),
+        ('img', 'point', 'visible', 'bbox'),
+        Transform(model.min_size, model.max_size, model.extractor.mean))
+
+    if comm.rank == 0:
+        indices = np.arange(len(train))
+    else:
+        indices = None
+    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
+    train = train.slice[indices]
+
+    train_iter = chainer.iterators.MultiprocessIterator(
+        train, args.batchsize // comm.size,
+        n_processes=args.batchsize // comm.size,
+        shared_mem=3 * 1000 * 1000 * 4)
+
+    optimizer = chainermn.create_multi_node_optimizer(
+        chainer.optimizers.MomentumSGD(), comm)
+    optimizer.setup(train_chain)
+    optimizer.add_hook(WeightDecay(0.0001))
+
+    model.extractor.base.conv1.disable_update()
+    model.extractor.base.res2.disable_update()
+    for link in model.links():
+        if isinstance(link, L.BatchNormalization):
+            link.disable_update()
+
+    n_iteration = args.iteration * 16 / args.batchsize
+    updater = training.updaters.StandardUpdater(
+        train_iter, optimizer, converter=converter, device=device)
+    trainer = training.Trainer(
+        updater, (n_iteration, 'iteration'), args.out)
+
+    @make_shift('lr')
+    def lr_schedule(trainer):
+        base_lr = 0.02 * args.batchsize / 16
+        warm_up_duration = 500
+        warm_up_rate = 1 / 3
+
+        iteration = trainer.updater.iteration
+        if iteration < warm_up_duration:
+            rate = warm_up_rate \
+                + (1 - warm_up_rate) * iteration / warm_up_duration
+        else:
+            rate = 1
+            for step in args.step:
+                if iteration >= step * 16 / args.batchsize:
+                    rate *= 0.1
+
+        return base_lr * rate
+
+    trainer.extend(lr_schedule)
+
+    if comm.rank == 0:
+        log_interval = 10, 'iteration'
+        trainer.extend(extensions.LogReport(trigger=log_interval))
+        trainer.extend(extensions.observe_lr(), trigger=log_interval)
+        trainer.extend(extensions.PrintReport(
+            ['epoch', 'iteration', 'lr', 'main/loss',
+             'main/loss/rpn/loc', 'main/loss/rpn/conf',
+             'main/loss/head/loc', 'main/loss/head/conf',
+             'main/loss/keypoint'
+             ]),
+            trigger=log_interval)
+        trainer.extend(extensions.ProgressBar(update_interval=10))
+
+        trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration'))
+        trainer.extend(
+            extensions.snapshot_object(
+                model, 'model_iter_{.updater.iteration}'),
+            trigger=(n_iteration, 'iteration'))
+
+    if args.resume:
+        serializers.load_npz(args.resume, trainer, strict=False)
+
+    trainer.run()
+
+
+if __name__ == '__main__':
+    main()

From c045eaefc48fcdf877a0a9c5007a4bdcd9d099d5 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 7 Mar 2019 11:34:06 +0900
Subject: [PATCH 057/100] fix broken flake8

---
 examples/mask_rcnn/train_multi_keypoint.py | 52 ++++++++++------------
 1 file changed, 23 insertions(+), 29 deletions(-)

diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py
index 42ae655ee7..001effbae4 100644
--- a/examples/mask_rcnn/train_multi_keypoint.py
+++ b/examples/mask_rcnn/train_multi_keypoint.py
@@ -1,7 +1,6 @@
 import argparse
 import multiprocessing
 import numpy as np
-import PIL
 
 import chainer
 import chainer.functions as F
@@ -15,7 +14,6 @@
 
 from chainercv.chainer_experimental.datasets.sliceable import TransformDataset
 from chainercv.chainer_experimental.training.extensions import make_shift
-from chainercv.datasets import coco_keypoint_names
 from chainercv.datasets import COCOKeypointDataset
 from chainercv.links import MaskRCNNFPNResNet101
 from chainercv.links import MaskRCNNFPNResNet50
@@ -43,7 +41,7 @@ def __init__(self, model):
         with self.init_scope():
             self.model = model
 
-    def __call__(self, imgs, points, visibles, bboxes):
+    def __call__(self, imgs, points, visibles, labels, bboxes):
         B = len(imgs)
         pad_size = np.array(
             [im.shape[1:] for im in imgs]).max(axis=0)
@@ -57,17 +55,11 @@ def __call__(self, imgs, points, visibles, bboxes):
             x[i, :, :H, :W] = img
         x = self.xp.array(x)
 
-        # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU.
-        pad_masks = [
-            np.zeros(
-                (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool)
-            for mask in masks]
-        for i, mask in enumerate(masks):
-            _, H, W = mask.shape
-            pad_masks[i][:, :H, :W] = mask
-        masks = pad_masks
+        points = [self.xp.array(point) for point in points]
+        visibles = [self.xp.array(visible) for visible in visibles]
 
         bboxes = [self.xp.array(bbox) for bbox in bboxes]
+        assert all([np.all(label == 1) for label in labels])
         labels = [self.xp.array(label) for label in labels]
         sizes = [img.shape[1:] for img in imgs]
 
@@ -96,26 +88,28 @@ def __call__(self, imgs, points, visibles, bboxes):
         losses = [
             rpn_loc_loss + rpn_conf_loss + head_loc_loss + head_conf_loss]
 
-        # mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre(
-        #     rois, roi_indices, masks, bboxes,
-        #     head_gt_labels, self.model.mask_head.segm_size)
-        # n_roi = sum([len(roi) for roi in mask_rois])
-        # if n_roi > 0:
-        #     segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
-        #     mask_loss = mask_loss_post(
-        #         segms, mask_roi_indices, gt_segms, gt_mask_labels, B)
-        # else:
-        #     # Compute dummy variables to complete the computational graph
-        #     mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32)
-        #     mask_roi_indices[0] = self.xp.array([0], dtype=np.int32)
-        #     segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
-        #     mask_loss = 0 * F.sum(segms)
+        point_rois, point_roi_indices, gt_points, gt_visibles = keypoint_loss_pre(
+            rois, roi_indices, points, visibles, bboxes, head_gt_labels,
+            self.model.keypoint_head.point_map_size)
+        n_roi = sum([len(roi) for roi in point_rois])
+        if n_roi > 0:
+            point_maps = self.model.keypoint_head(hs, point_rois, point_roi_indices)
+            point_loss = keypoint_loss_post(
+                point_maps, point_roi_indices,
+                gt_points, gt_visibles, B)
+        else:
+            # Compute dummy variables to complete the computational graph
+            point_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32)
+            point_roi_indices[0] = self.xp.array([0], dtype=np.int32)
+            point_maps = self.model.keypoint_head(hs, point_rois, point_roi_indices)
+            point_loss = 0 * F.sum(point_maps)
+        losses.append(point_loss)
         loss = sum(losses)
         chainer.reporter.report({
             'loss': loss,
             'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss,
             'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss,
-            'loss/keypoint': keypoint_loss},
+            'loss/point': point_loss},
             self)
         return loss
 
@@ -128,7 +122,7 @@ def __init__(self, min_size, max_size, mean):
         self.mean = mean
 
     def __call__(self, in_data):
-        img, point, visible, _, bbox = in_data
+        img, point, visible, label, bbox = in_data
         # Flipping
         size = img.shape[1:]
         img, params = transforms.random_flip(
@@ -143,7 +137,7 @@ def __call__(self, in_data):
         img -= self.mean
         point = transforms.resize_point(point, size, img.shape[1:])
         bbox = bbox * scale
-        return img, point, visible, bbox
+        return img, point, visible, label, bbox
 
 
 def converter(batch, device=None):

From ac18d36085b98b7283b354009ee3bd193b5de49e Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <niitani@preferred.jp>
Date: Thu, 7 Mar 2019 20:06:44 +0900
Subject: [PATCH 058/100] fix mistake

---
 examples/mask_rcnn/train_multi_keypoint.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py
index 001effbae4..fedaaa321b 100644
--- a/examples/mask_rcnn/train_multi_keypoint.py
+++ b/examples/mask_rcnn/train_multi_keypoint.py
@@ -189,9 +189,8 @@ def main():
 
     train = TransformDataset(
         COCOKeypointDataset(
-            data_dir='/home/yuyu2172/coco',
             split='train'),
-        ('img', 'point', 'visible', 'bbox'),
+        ('img', 'point', 'visible', 'label', 'bbox'),
         Transform(model.min_size, model.max_size, model.extractor.mean))
 
     if comm.rank == 0:

From 781792b2f7426fabe117f289aec019946192f49c Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <niitani@preferred.jp>
Date: Thu, 7 Mar 2019 20:30:21 +0900
Subject: [PATCH 059/100] fix the order of data

---
 chainercv/datasets/coco/coco_keypoint_dataset.py            | 6 +++---
 .../datasets_tests/coco_tests/test_coco_keypoint_dataset.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/chainercv/datasets/coco/coco_keypoint_dataset.py b/chainercv/datasets/coco/coco_keypoint_dataset.py
index 2f0dfebf0a..234d7e0942 100644
--- a/chainercv/datasets/coco/coco_keypoint_dataset.py
+++ b/chainercv/datasets/coco/coco_keypoint_dataset.py
@@ -43,10 +43,10 @@ class COCOKeypointDataset(GetterDataset):
         ":math:`(y, x)`"
         :obj:`visible` [#coco_point_1]_, ":math:`(R, K)`", :obj:`bool`, \
         "true when a keypoint is visible."
-        :obj:`bbox` [#coco_point_1]_, ":math:`(R, 4)`", :obj:`float32`, \
-        ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
         :obj:`label` [#coco_point_1]_, ":math:`(R,)`", :obj:`int32`, \
         ":math:`[0, \#fg\_class - 1]`"
+        :obj:`bbox` [#coco_point_1]_, ":math:`(R, 4)`", :obj:`float32`, \
+        ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
         :obj:`area` [#coco_point_1]_ [#coco_point_2]_, ":math:`(R,)`", \
         :obj:`float32`, --
         :obj:`crowded` [#coco_point_3]_, ":math:`(R,)`", :obj:`bool`, --
@@ -95,7 +95,7 @@ def __init__(self, data_dir='auto', split='train', year='2017',
         self.add_getter(
             ['point', 'visible', 'bbox', 'label', 'area', 'crowded'],
             self._get_annotations)
-        keys = ('img', 'point', 'visible', 'bbox', 'label')
+        keys = ('img', 'point', 'visible', 'label', 'bbox')
         if return_area:
             keys += ('area',)
         if return_crowded:
diff --git a/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py b/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py
index 191e9c96ee..984245f9ba 100644
--- a/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py
+++ b/tests/datasets_tests/coco_tests/test_coco_keypoint_dataset.py
@@ -53,7 +53,7 @@ def test_coco_bbox_dataset(self):
 
         for _ in range(10):
             i = np.random.randint(0, len(self.dataset))
-            img, point, _, bbox, label = self.dataset[i][:5]
+            img, point, _, label, bbox = self.dataset[i][:5]
             assert_is_bbox(bbox, img.shape[1:])
             self.assertEqual(len(bbox), len(point))
 

From c6639f77dec617573534b5f32260c5c1e12d1ece Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <niitani@preferred.jp>
Date: Thu, 7 Mar 2019 20:34:41 +0900
Subject: [PATCH 060/100] fix assertion

---
 examples/mask_rcnn/train_multi_keypoint.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py
index fedaaa321b..ccda7b11ef 100644
--- a/examples/mask_rcnn/train_multi_keypoint.py
+++ b/examples/mask_rcnn/train_multi_keypoint.py
@@ -59,7 +59,7 @@ def __call__(self, imgs, points, visibles, labels, bboxes):
         visibles = [self.xp.array(visible) for visible in visibles]
 
         bboxes = [self.xp.array(bbox) for bbox in bboxes]
-        assert all([np.all(label == 1) for label in labels])
+        assert all([np.all(label == 0) for label in labels])
         labels = [self.xp.array(label) for label in labels]
         sizes = [img.shape[1:] for img in imgs]
 

From 665f34b2bb4ddd69306b5b2948929cd053a95069 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 7 Mar 2019 20:37:15 +0900
Subject: [PATCH 061/100] fix

---
 chainercv/links/model/mask_rcnn/keypoint_head.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py
index 8b8e3fc428..a048bbd218 100644
--- a/chainercv/links/model/mask_rcnn/keypoint_head.py
+++ b/chainercv/links/model/mask_rcnn/keypoint_head.py
@@ -36,7 +36,7 @@ class KeypointHead(chainer.Chain):
     _canonical_scale = 224
     _roi_size = 14
     _roi_sample_ratio = 2
-    map_size = 56
+    point_map_size = 56
 
     def __init__(self, n_point, scales):
         super(KeypointHead, self).__init__()
@@ -76,7 +76,7 @@ def __call__(self, hs, rois, roi_indices):
         if len(pooled_hs) == 0:
             return chainer.Variable(
                self.xp.empty(
-                   (0, self.n_point, self.map_size, self.map_size),
+                   (0, self.n_point, self.point_map_size, self.point_map_size),
                    dtype=np.float32))
 
         h = F.concat(pooled_hs, axis=0)

From df8c274a2e7c98cebdfd925fd5a50bdcd945b816 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 7 Mar 2019 22:42:48 +0900
Subject: [PATCH 062/100] fix test

---
 .../model_tests/mask_rcnn_tests/test_keypoint_head.py     | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py
index 836e9425f3..17616f156c 100644
--- a/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py
+++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_keypoint_head.py
@@ -53,19 +53,17 @@ def _check_keypoint_loss_pre(self, xp):
         ]
         points = [
             xp.zeros((1, n_point, 2), dtype=np.float32),
-            xp.zeros((2, n_point, 2), dtype=np.float32),
-            xp.zeros((1, n_point, 2), dtype=np.float32),
+            xp.zeros((3, n_point, 2), dtype=np.float32),
         ]
         visibles = [
             xp.ones((1, n_point), dtype=np.bool),
-            xp.ones((2, n_point), dtype=np.bool),
-            xp.ones((1, n_point), dtype=np.bool)
+            xp.ones((3, n_point), dtype=np.bool),
         ]
         bboxes = [_point_to_bbox(point, visible)
                   for point, visible in zip(points, visibles)]
         labels = [
-            xp.array((1, 1), dtype=np.int32),
             xp.array((1,), dtype=np.int32),
+            xp.array((1, 1), dtype=np.int32),
             xp.array((1,), dtype=np.int32),
         ]
         rois, roi_indices, gt_roi_points, gt_roi_visibles = keypoint_loss_pre(

From f5adabc4e75c88bb7c5bb2fe4f56068395c138c8 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 7 Mar 2019 22:52:01 +0900
Subject: [PATCH 063/100] fix test

---
 chainercv/links/model/mask_rcnn/mask_head.py  |  5 ++--
 .../mask_rcnn_tests/test_mask_head.py         | 28 +++++++++++--------
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/mask_rcnn/mask_head.py
index 462085d536..dc65fd6718 100644
--- a/chainercv/links/model/mask_rcnn/mask_head.py
+++ b/chainercv/links/model/mask_rcnn/mask_head.py
@@ -205,7 +205,8 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes,
     mask_roi_indices = roi_indices[index]
     gt_mask_labels = gt_head_labels[index]
 
-    gt_segms = xp.empty((len(mask_rois), segm_size, segm_size), dtype=np.bool)
+    gt_segms = xp.empty(
+        (len(mask_rois), segm_size, segm_size), dtype=np.float32)
     for i in np.unique(cuda.to_cpu(mask_roi_indices)):
         gt_mask = gt_masks[i]
         gt_bbox = gt_bboxes[i]
@@ -247,7 +248,7 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
     xp = cuda.get_array_module(segms.array)
 
     mask_roi_indices = xp.hstack(mask_roi_indices).astype(np.int32)
-    gt_segms = xp.vstack(gt_segms).astype(np.float32, copy=False)
+    gt_segms = xp.vstack(gt_segms)
     gt_mask_labels = xp.hstack(gt_mask_labels).astype(np.int32)
 
     mask_loss = F.sigmoid_cross_entropy(
diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
index d1832d1b8b..e89cf3c38d 100644
--- a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
+++ b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
@@ -11,6 +11,8 @@
 from chainercv.links.model.mask_rcnn import mask_loss_post
 from chainercv.links.model.mask_rcnn import mask_loss_pre
 
+from chainercv.utils import mask_to_bbox
+
 
 def _random_array(xp, shape):
     return xp.array(
@@ -52,7 +54,7 @@ def _check_call(self):
         self.assertIsInstance(segs.array, self.link.xp.ndarray)
         self.assertEqual(
             segs.shape,
-            (4, self.n_class, self.link.mask_size, self.link.mask_size))
+            (4, self.n_class, self.link.segm_size, self.link.segm_size))
 
     def test_call_cpu(self):
         self._check_call()
@@ -101,13 +103,13 @@ def _check_decode(self):
         segms = [
             _random_array(
                 self.link.xp,
-                (1, self.n_class, self.link.mask_size, self.link.mask_size)),
+                (1, self.n_class, self.link.segm_size, self.link.segm_size)),
             _random_array(
                 self.link.xp,
-                (2, self.n_class, self.link.mask_size, self.link.mask_size)),
+                (2, self.n_class, self.link.segm_size, self.link.segm_size)),
             _random_array(
                 self.link.xp,
-                (1, self.n_class, self.link.mask_size, self.link.mask_size))
+                (1, self.n_class, self.link.segm_size, self.link.segm_size))
         ]
         bboxes = [
             self.link.xp.array(((4, 1, 6, 3),), dtype=np.float32),
@@ -142,8 +144,8 @@ def test_decode_cpu(self):
 class TestMaskHeadLoss(unittest.TestCase):
 
     def _check_mask_loss_pre(self, xp):
-        n_class = 12
-        mask_size = 28
+        n_inst = 12
+        segm_size = 28
         rois = [
             xp.array(((4, 1, 6, 3),), dtype=np.float32),
             xp.array(
@@ -156,17 +158,17 @@ def _check_mask_loss_pre(self, xp):
             xp.array((1,), dtype=np.int32),
         ]
         masks = [
-            _random_array(xp, (n_class, 60, 70)),
-            _random_array(xp, (n_class, 60, 70)),
-            _random_array(xp, (n_class, 60, 70)),
+            _random_array(xp, (n_inst, 60, 70)),
+            _random_array(xp, (n_inst, 60, 70)),
         ]
+        bboxes = [mask_to_bbox(mask) for mask in masks]
         labels = [
-            xp.array((10, 4), dtype=np.int32),
             xp.array((1,), dtype=np.int32),
+            xp.array((10, 4), dtype=np.int32),
             xp.array((3,), dtype=np.int32),
         ]
         rois, roi_indices, gt_segms, gt_mask_labels = mask_loss_pre(
-            rois, roi_indices, masks, labels, mask_size)
+            rois, roi_indices, masks, bboxes, labels, segm_size)
 
         self.assertEqual(len(rois), 3)
         self.assertEqual(len(roi_indices), 3)
@@ -183,8 +185,10 @@ def _check_mask_loss_pre(self, xp):
             self.assertEqual(rois[l].shape[0], gt_mask_labels[l].shape[0])
             self.assertEqual(rois[l].shape[1:], (4,))
             self.assertEqual(roi_indices[l].shape[1:], ())
-            self.assertEqual(gt_segms[l].shape[1:], (mask_size, mask_size))
+            self.assertEqual(gt_segms[l].shape[1:], (segm_size, segm_size))
             self.assertEqual(gt_mask_labels[l].shape[1:], ())
+            self.assertEqual(gt_segms[l].dtype, np.float32)
+            self.assertEqual(gt_mask_labels[l].dtype, np.int32)
 
     def test_mask_loss_pre_cpu(self):
         self._check_mask_loss_pre(np)

From 24492f7d8e83d8fde224a0db52e0655be2b43474 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Thu, 7 Mar 2019 23:33:26 +0900
Subject: [PATCH 064/100] filter invalid

---
 examples/mask_rcnn/train_multi_keypoint.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py
index ccda7b11ef..9553bbb4e1 100644
--- a/examples/mask_rcnn/train_multi_keypoint.py
+++ b/examples/mask_rcnn/train_multi_keypoint.py
@@ -145,6 +145,14 @@ def converter(batch, device=None):
     return tuple(list(v) for v in zip(*batch))
 
 
+def valid_annotation(visible):
+    if len(visible) == 0:
+        return False
+    min_keypoint_per_image = 10
+    n_visible = visible.sum()
+    return n_visible >= min_keypoint_per_image
+
+
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -187,10 +195,12 @@ def main():
     chainer.cuda.get_device_from_id(device).use()
     train_chain.to_gpu()
 
+    train = COCOKeypointDataset(split='train')
+    indices = [i for i, visible in enumerate(train.slice[:, 'visible'])
+               if valid_annotation(visible)]
+    train = train.slice[indices]
     train = TransformDataset(
-        COCOKeypointDataset(
-            split='train'),
-        ('img', 'point', 'visible', 'label', 'bbox'),
+        train, ('img', 'point', 'visible', 'label', 'bbox'),
         Transform(model.min_size, model.max_size, model.extractor.mean))
 
     if comm.rank == 0:

From f9228366c4e8c786d7bdec32a7379d712a2caa97 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 8 Mar 2019 13:21:57 +0900
Subject: [PATCH 065/100] add balanced sampling for KeypointHead

---
 chainercv/links/model/fpn/head.py             | 28 +++----
 chainercv/links/model/fpn/misc.py             | 17 +++++
 .../links/model/mask_rcnn/keypoint_head.py    | 73 +++++++++++--------
 examples/mask_rcnn/train_multi_keypoint.py    |  4 +-
 4 files changed, 70 insertions(+), 52 deletions(-)

diff --git a/chainercv/links/model/fpn/head.py b/chainercv/links/model/fpn/head.py
index f0c0fc7b63..3422c07dde 100644
--- a/chainercv/links/model/fpn/head.py
+++ b/chainercv/links/model/fpn/head.py
@@ -7,7 +7,7 @@
 import chainer.links as L
 
 from chainercv.links.model.fpn.misc import argsort
-from chainercv.links.model.fpn.misc import choice
+from chainercv.links.model.fpn.misc import balanced_sampling
 from chainercv.links.model.fpn.misc import exp_clip
 from chainercv.links.model.fpn.misc import smooth_l1
 from chainercv import utils
@@ -285,25 +285,15 @@ def head_loss_pre(rois, roi_indices, std, bboxes, labels):
         else:
             gt_label = xp.zeros(int(mask.sum()), dtype=np.int32)
 
-        fg_index = xp.where(gt_label > 0)[0]
-        n_fg = int(batchsize_per_image * fg_ratio)
-        if len(fg_index) > n_fg:
-            gt_label[choice(fg_index, size=len(fg_index) - n_fg)] = -1
-
-        bg_index = xp.where(gt_label == 0)[0]
-        n_bg = batchsize_per_image - int((gt_label > 0).sum())
-        if len(bg_index) > n_bg:
-            gt_label[choice(bg_index, size=len(bg_index) - n_bg)] = -1
-
         gt_locs[mask] = gt_loc
-        gt_labels[mask] = gt_label
-
-    mask = gt_labels >= 0
-    rois = rois[mask]
-    roi_indices = roi_indices[mask]
-    roi_levels = roi_levels[mask]
-    gt_locs = gt_locs[mask]
-    gt_labels = gt_labels[mask]
+        gt_labels[mask] = balanced_sampling(gt_label)
+
+    is_sampled = gt_labels >= 0
+    rois = rois[is_sampled]
+    roi_indices = roi_indices[is_sampled]
+    roi_levels = roi_levels[is_sampled]
+    gt_locs = gt_locs[is_sampled]
+    gt_labels = gt_labels[is_sampled]
 
     masks = [roi_levels == l for l in range(n_level)]
     rois = [rois[m] for m in masks]
diff --git a/chainercv/links/model/fpn/misc.py b/chainercv/links/model/fpn/misc.py
index c699e3d2f6..19204cf9c7 100644
--- a/chainercv/links/model/fpn/misc.py
+++ b/chainercv/links/model/fpn/misc.py
@@ -13,6 +13,23 @@ def smooth_l1(x, t, beta):
     return F.huber_loss(x, t, beta, reduce='no') / beta
 
 
+def balanced_sampling(label, n_sample, fg_ratio):
+    label = label.copy()
+
+    xp = cuda.get_array_module(label)
+
+    fg_index = xp.where(label > 0)[0]
+    n_fg = int(n_sample * fg_ratio)
+    if len(fg_index) > n_fg:
+        label[choice(fg_index, size=len(fg_index) - n_fg)] = -1
+
+    bg_index = xp.where(label == 0)[0]
+    n_bg = n_sample - int((label > 0).sum())
+    if len(bg_index) > n_bg:
+        label[choice(bg_index, size=len(bg_index) - n_bg)] = -1
+    return label
+
+
 # to avoid out of memory
 def argsort(x):
     xp = cuda.get_array_module(x)
diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py
index a048bbd218..58a060ddd1 100644
--- a/chainercv/links/model/mask_rcnn/keypoint_head.py
+++ b/chainercv/links/model/mask_rcnn/keypoint_head.py
@@ -17,6 +17,8 @@
 
 from chainercv.links.model.mask_rcnn.misc import point_to_roi_points
 
+from chainercv.links.model.fpn.misc import balanced_sampling
+
 
 # make a bilinear interpolation kernel
 # credit @longjon
@@ -144,6 +146,9 @@ def decode(self, point_maps, bboxes):
 
 def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles,
                       gt_bboxes, gt_head_labels, point_map_size):
+    batchsize_per_image = 512
+    fg_ratio = 0.25
+
     _, n_point, _ = gt_points[0].shape
 
     xp = cuda.get_array_module(*rois)
@@ -156,52 +161,58 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles,
     roi_indices = xp.hstack(roi_indices).astype(np.int32)
     gt_head_labels = xp.hstack(gt_head_labels)
 
-    index = (gt_head_labels > 0).nonzero()[0]
-    point_roi_levels = roi_levels[index]
-    point_rois = rois[index]
-    point_roi_indices = roi_indices[index]
-
-    gt_roi_points = xp.empty(
-        (len(point_rois), n_point, 2), dtype=np.float32)
-    gt_roi_visibles = xp.empty(
-        (len(point_rois), n_point), dtype=np.bool)
-    for i in np.unique(cuda.to_cpu(point_roi_indices)):
+    gt_head_points = xp.empty(
+        (len(rois), n_point, 2), dtype=np.float32)
+    gt_head_visibles = xp.empty(
+        (len(rois), n_point), dtype=np.bool)
+    for i in np.unique(cuda.to_cpu(roi_indices)):
         gt_point = gt_points[i]
         gt_visible = gt_visibles[i]
         gt_bbox = gt_bboxes[i]
 
-        index = (point_roi_indices == i).nonzero()[0]
-        point_roi = point_rois[index]
-        iou = bbox_iou(point_roi, gt_bbox)
-        gt_index = iou.argmax(axis=1)
-        gt_roi_point, gt_roi_visible = point_to_roi_points(
-                gt_point[gt_index], gt_visible[gt_index],
-                point_roi, point_map_size)
-        gt_roi_points[index] = xp.array(gt_roi_point)
-        gt_roi_visibles[index] = xp.array(gt_roi_visible)
+        index = (roi_indices == i).nonzero()[0]
+        roi = rois[index]
 
-    flag_masks = [point_roi_levels == l for l in range(n_level)]
-    point_rois = [point_rois[m] for m in flag_masks]
-    point_roi_indices = [point_roi_indices[m] for m in flag_masks]
-    gt_roi_points = [gt_roi_points[m] for m in flag_masks]
-    gt_roi_visibles = [gt_roi_visibles[m] for m in flag_masks]
-    return point_rois, point_roi_indices, gt_roi_points, gt_roi_visibles
+        iou = bbox_iou(roi, gt_bbox)
+        gt_index = iou.argmax(axis=1)
+        gt_head_point, gt_head_visible = point_to_roi_points(
+            gt_point[gt_index], gt_visible[gt_index],
+            roi, point_map_size)
+        gt_head_points[index] = xp.array(gt_head_point)
+        gt_head_visibles[index] = xp.array(gt_head_visible)
+
+        gt_head_labels[index] = balanced_sampling(
+            gt_head_labels[index], batchsize_per_image, fg_ratio)
+
+    is_sampled = gt_head_labels >= 0
+    rois = rois[is_sampled]
+    roi_indices = roi_indices[is_sampled]
+    roi_levels = roi_levels[is_sampled]
+    gt_head_points = gt_head_points[is_sampled]
+    gt_head_visibles = gt_head_visibles[is_sampled]
+
+    flag_masks = [roi_levels == l for l in range(n_level)]
+    rois = [rois[m] for m in flag_masks]
+    roi_indices = [roi_indices[m] for m in flag_masks]
+    gt_head_points = [gt_head_points[m] for m in flag_masks]
+    gt_head_visibles = [gt_head_visibles[m] for m in flag_masks]
+    return rois, roi_indices, gt_head_points, gt_head_visibles
 
 
 def keypoint_loss_post(
-        point_maps, point_roi_indices, gt_roi_points,
-        gt_roi_visibles, batchsize):
+        point_maps, point_roi_indices, gt_head_points,
+        gt_head_visibles, batchsize):
     xp = cuda.get_array_module(point_maps.array)
 
     point_roi_indices = xp.hstack(point_roi_indices).astype(np.int32)
-    gt_roi_points = xp.vstack(gt_roi_points).astype(np.int32)
-    gt_roi_visibles = xp.vstack(gt_roi_visibles).astype(np.bool)
+    gt_head_points = xp.vstack(gt_head_points).astype(np.int32)
+    gt_head_visibles = xp.vstack(gt_head_visibles).astype(np.bool)
 
     B, K, H, W = point_maps.shape
     point_maps = point_maps.reshape((B * K, H * W))
-    spatial_labels = gt_roi_points[:, :, 0] * W + gt_roi_points[:, :, 1]
+    spatial_labels = gt_head_points[:, :, 0] * W + gt_head_points[:, :, 1]
     spatial_labels = spatial_labels.reshape((B * K,))
-    spatial_labels[xp.logical_not(gt_roi_visibles.reshape((B * K,)))] = -1
+    spatial_labels[xp.logical_not(gt_head_visibles.reshape((B * K,)))] = -1
     # Remember that the loss is normalized by the total number of
     # visible keypoints.
     keypoint_loss = F.softmax_cross_entropy(point_maps, spatial_labels)
diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py
index 9553bbb4e1..1824e2afe9 100644
--- a/examples/mask_rcnn/train_multi_keypoint.py
+++ b/examples/mask_rcnn/train_multi_keypoint.py
@@ -88,7 +88,7 @@ def __call__(self, imgs, points, visibles, labels, bboxes):
         losses = [
             rpn_loc_loss + rpn_conf_loss + head_loc_loss + head_conf_loss]
 
-        point_rois, point_roi_indices, gt_points, gt_visibles = keypoint_loss_pre(
+        point_rois, point_roi_indices, gt_head_points, gt_head_visibles = keypoint_loss_pre(
             rois, roi_indices, points, visibles, bboxes, head_gt_labels,
             self.model.keypoint_head.point_map_size)
         n_roi = sum([len(roi) for roi in point_rois])
@@ -96,7 +96,7 @@ def __call__(self, imgs, points, visibles, labels, bboxes):
             point_maps = self.model.keypoint_head(hs, point_rois, point_roi_indices)
             point_loss = keypoint_loss_post(
                 point_maps, point_roi_indices,
-                gt_points, gt_visibles, B)
+                gt_head_points, gt_head_visibles, B)
         else:
             # Compute dummy variables to complete the computational graph
             point_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32)

From 7ec9a18c937a85d8150442acdca13f8164f0b301 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 8 Mar 2019 13:34:43 +0900
Subject: [PATCH 066/100] remove invalid box

---
 chainercv/links/model/mask_rcnn/keypoint_head.py | 10 ++++++++++
 chainercv/links/model/mask_rcnn/misc.py          |  8 ++++++++
 2 files changed, 18 insertions(+)

diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py
index 58a060ddd1..5e450654fd 100644
--- a/chainercv/links/model/mask_rcnn/keypoint_head.py
+++ b/chainercv/links/model/mask_rcnn/keypoint_head.py
@@ -16,6 +16,7 @@
 from chainercv.utils.bbox.bbox_iou import bbox_iou
 
 from chainercv.links.model.mask_rcnn.misc import point_to_roi_points
+from chainercv.links.model.mask_rcnn.misc import within_bbox
 
 from chainercv.links.model.fpn.misc import balanced_sampling
 
@@ -171,6 +172,7 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles,
         gt_bbox = gt_bboxes[i]
 
         index = (roi_indices == i).nonzero()[0]
+        gt_head_label = gt_head_labels[index]
         roi = rois[index]
 
         iou = bbox_iou(roi, gt_bbox)
@@ -181,6 +183,14 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles,
         gt_head_points[index] = xp.array(gt_head_point)
         gt_head_visibles[index] = xp.array(gt_head_visible)
 
+        # Ignore RoIs that are closest to a bounding box that does
+        # not contain any valid keypoints.
+        valid_point = within_bbox(gt_point, gt_bbox)
+        valid_point = xp.logical_and(valid_point, gt_visible)
+        visible_roi = valid_point.sum(axis=1) > 0
+        visible_roi = visible_roi[gt_index]
+        gt_head_label[xp.logical_not(gt_index)] = -1
+
         gt_head_labels[index] = balanced_sampling(
             gt_head_labels[index], batchsize_per_image, fg_ratio)
 
diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py
index 6b0c7a3f91..8d2dfce1b5 100644
--- a/chainercv/links/model/mask_rcnn/misc.py
+++ b/chainercv/links/model/mask_rcnn/misc.py
@@ -190,3 +190,11 @@ def point_to_roi_points(
         roi_point[:, k, 1] = xs
         roi_visible[:, k] = valid
     return roi_point, roi_visible
+
+
+def within_bbox(point, bbox):
+    y_within = (point[:, :, 0] >= bbox[:, 0][:, None]) & (
+        point[:, :, 0] <= bbox[:, 2][:, None])
+    x_within = (point[:, :, 1] >= bbox[:, 1][:, None]) & (
+        point[:, :, 1] <= bbox[:, 3][:, None])
+    return y_within & x_within

From 1d6f599322b8693100813f6f7ab91f3730d90e24 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 8 Mar 2019 13:40:58 +0900
Subject: [PATCH 067/100] multiscale training

---
 examples/mask_rcnn/train_multi_keypoint.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py
index 1824e2afe9..6d7d4f6dd1 100644
--- a/examples/mask_rcnn/train_multi_keypoint.py
+++ b/examples/mask_rcnn/train_multi_keypoint.py
@@ -1,6 +1,7 @@
 import argparse
 import multiprocessing
 import numpy as np
+import random
 
 import chainer
 import chainer.functions as F
@@ -117,6 +118,8 @@ def __call__(self, imgs, points, visibles, labels, bboxes):
 class Transform(object):
 
     def __init__(self, min_size, max_size, mean):
+        if isinstance(min_size, (tuple, list)):
+            min_size = (min_size,)
         self.min_size = min_size
         self.max_size = max_size
         self.mean = mean
@@ -133,7 +136,8 @@ def __call__(self, in_data):
             bbox, size, x_flip=params['x_flip'])
 
         # Scaling and mean subtraction
-        img, scale = scale_img(img, self.min_size, self.max_size)
+        min_size = random.choice(self.min_size)
+        img, scale = scale_img(img, min_size, self.max_size)
         img -= self.mean
         point = transforms.resize_point(point, size, img.shape[1:])
         bbox = bbox * scale
@@ -201,7 +205,9 @@ def main():
     train = train.slice[indices]
     train = TransformDataset(
         train, ('img', 'point', 'visible', 'label', 'bbox'),
-        Transform(model.min_size, model.max_size, model.extractor.mean))
+        Transform(
+            (640, 672, 704, 736, 768, 800), model.max_size,
+            model.extractor.mean))
 
     if comm.rank == 0:
         indices = np.arange(len(train))

From b8fa344234e00698672f8442951de64872e7ba2c Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 8 Mar 2019 13:46:35 +0900
Subject: [PATCH 068/100] add eval_keypoint_detection_multi

---
 .../eval_keypoint_detection_multi.py          | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 examples/keypoint_detection/eval_keypoint_detection_multi.py

diff --git a/examples/keypoint_detection/eval_keypoint_detection_multi.py b/examples/keypoint_detection/eval_keypoint_detection_multi.py
new file mode 100644
index 0000000000..9040a301b8
--- /dev/null
+++ b/examples/keypoint_detection/eval_keypoint_detection_multi.py
@@ -0,0 +1,48 @@
+import argparse
+
+import chainer
+from chainer import iterators
+import chainermn
+
+from chainercv.utils import apply_to_iterator
+from chainercv.utils import ProgressHook
+
+from eval_keypoint_detection import models
+from eval_keypoint_detection import setup
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--dataset', choices=('coco',), default='coco')
+    parser.add_argument('--model', choices=sorted(models.keys()))
+    parser.add_argument('--pretrained-model')
+    parser.add_argument('--batchsize', type=int)
+    args = parser.parse_args()
+
+    comm = chainermn.create_communicator()
+    device = comm.intra_rank
+
+    dataset, label_names, eval_, model, batchsize = setup(
+        args.dataset, args.model, args.pretrained_model, args.batchsize)
+
+    chainer.cuda.get_device_from_id(device).use()
+    model.to_gpu()
+
+    if not comm.rank == 0:
+        apply_to_iterator(model.predict, None, comm=comm)
+        return
+
+    iterator = iterators.MultithreadIterator(
+        dataset, batchsize * comm.size, repeat=False, shuffle=False)
+
+    in_values, out_values, rest_values = apply_to_iterator(
+        model.predict, iterator, hook=ProgressHook(len(dataset)), comm=comm)
+    # delete unused iterators explicitly
+    del in_values
+
+    eval_(out_values, rest_values)
+
+
+if __name__ == '__main__':
+    main()

From a94cca4b1e3b2b00ed9008d6adee21acd4b7bca9 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 8 Mar 2019 13:52:15 +0900
Subject: [PATCH 069/100] fix

---
 examples/mask_rcnn/train_multi_keypoint.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py
index 6d7d4f6dd1..bab67649a8 100644
--- a/examples/mask_rcnn/train_multi_keypoint.py
+++ b/examples/mask_rcnn/train_multi_keypoint.py
@@ -118,7 +118,7 @@ def __call__(self, imgs, points, visibles, labels, bboxes):
 class Transform(object):
 
     def __init__(self, min_size, max_size, mean):
-        if isinstance(min_size, (tuple, list)):
+        if not isinstance(min_size, (tuple, list)):
             min_size = (min_size,)
         self.min_size = min_size
         self.max_size = max_size
@@ -171,6 +171,11 @@ def main():
     parser.add_argument('--communicator', default='hierarchical')
     args = parser.parse_args()
 
+    
+    # from chainer.configuration import global_config
+    # global_config.cv_resize_backend = 'PIL'
+    # global_config.cv_read_image_backend = 'PIL'
+
     # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
     if hasattr(multiprocessing, 'set_start_method'):
         multiprocessing.set_start_method('forkserver')
@@ -218,8 +223,7 @@ def main():
 
     train_iter = chainer.iterators.MultiprocessIterator(
         train, args.batchsize // comm.size,
-        n_processes=args.batchsize // comm.size,
-        shared_mem=3 * 1000 * 1000 * 4)
+        n_processes=args.batchsize // comm.size)
 
     optimizer = chainermn.create_multi_node_optimizer(
         chainer.optimizers.MomentumSGD(), comm)

From 2b0b8a4f244b75e1678a700b8b99d0ad1ddc2af1 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 8 Mar 2019 13:57:12 +0900
Subject: [PATCH 070/100] fix

---
 chainercv/links/model/fpn/head.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/chainercv/links/model/fpn/head.py b/chainercv/links/model/fpn/head.py
index 3422c07dde..12c85b7b31 100644
--- a/chainercv/links/model/fpn/head.py
+++ b/chainercv/links/model/fpn/head.py
@@ -286,7 +286,8 @@ def head_loss_pre(rois, roi_indices, std, bboxes, labels):
             gt_label = xp.zeros(int(mask.sum()), dtype=np.int32)
 
         gt_locs[mask] = gt_loc
-        gt_labels[mask] = balanced_sampling(gt_label)
+        gt_labels[mask] = balanced_sampling(
+            gt_label, batchsize_per_image, fg_ratio)
 
     is_sampled = gt_labels >= 0
     rois = rois[is_sampled]

From e964faea9a4805703604abd2120e8ac851b34d62 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sat, 9 Mar 2019 00:13:25 +0900
Subject: [PATCH 071/100] do not use bg sample

---
 .../links/model/mask_rcnn/keypoint_head.py    | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py
index 5e450654fd..057ccdb9a7 100644
--- a/chainercv/links/model/mask_rcnn/keypoint_head.py
+++ b/chainercv/links/model/mask_rcnn/keypoint_head.py
@@ -18,8 +18,6 @@
 from chainercv.links.model.mask_rcnn.misc import point_to_roi_points
 from chainercv.links.model.mask_rcnn.misc import within_bbox
 
-from chainercv.links.model.fpn.misc import balanced_sampling
-
 
 # make a bilinear interpolation kernel
 # credit @longjon
@@ -162,6 +160,13 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles,
     roi_indices = xp.hstack(roi_indices).astype(np.int32)
     gt_head_labels = xp.hstack(gt_head_labels)
 
+    # Ignore all negative samples
+    index = (gt_head_labels > 0).nonzero()[0]
+    roi_levels = roi_levels[index]
+    rois = rois[index]
+    roi_indices = roi_indices[index]
+    gt_head_labels = gt_head_labels[index]
+
     gt_head_points = xp.empty(
         (len(rois), n_point, 2), dtype=np.float32)
     gt_head_visibles = xp.empty(
@@ -183,18 +188,16 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles,
         gt_head_points[index] = xp.array(gt_head_point)
         gt_head_visibles[index] = xp.array(gt_head_visible)
 
-        # Ignore RoIs that are closest to a bounding box that does
-        # not contain any valid keypoints.
+        # Ignore RoIs whose closest bounding box does not contain
+        # any valid keypoints.
         valid_point = within_bbox(gt_point, gt_bbox)
         valid_point = xp.logical_and(valid_point, gt_visible)
         visible_roi = valid_point.sum(axis=1) > 0
         visible_roi = visible_roi[gt_index]
         gt_head_label[xp.logical_not(gt_index)] = -1
+        gt_head_labels[index] = gt_head_label
 
-        gt_head_labels[index] = balanced_sampling(
-            gt_head_labels[index], batchsize_per_image, fg_ratio)
-
-    is_sampled = gt_head_labels >= 0
+    is_sampled = (gt_head_labels > 0).nonzero()[0]
     rois = rois[is_sampled]
     roi_indices = roi_indices[is_sampled]
     roi_levels = roi_levels[is_sampled]

From afb6e2a864f37cb7ff87d5a438e6b34331c74f6c Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sat, 9 Mar 2019 00:14:44 +0900
Subject: [PATCH 072/100] add shared_mem option

---
 examples/mask_rcnn/train_multi_keypoint.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py
index bab67649a8..37cb683beb 100644
--- a/examples/mask_rcnn/train_multi_keypoint.py
+++ b/examples/mask_rcnn/train_multi_keypoint.py
@@ -223,7 +223,8 @@ def main():
 
     train_iter = chainer.iterators.MultiprocessIterator(
         train, args.batchsize // comm.size,
-        n_processes=args.batchsize // comm.size)
+        n_processes=args.batchsize // comm.size,
+        shared_mem=10 * 1000 * 1000 * 3)
 
     optimizer = chainermn.create_multi_node_optimizer(
         chainer.optimizers.MomentumSGD(), comm)

From 3d317ba8c252219908e782d47e838853e95c7c3b Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sun, 10 Mar 2019 10:38:02 +0900
Subject: [PATCH 073/100] fix

---
 chainercv/links/model/mask_rcnn/misc.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/mask_rcnn/misc.py
index 600cbd0562..2cdcd52bdb 100644
--- a/chainercv/links/model/mask_rcnn/misc.py
+++ b/chainercv/links/model/mask_rcnn/misc.py
@@ -128,7 +128,8 @@ def segm_to_mask(segm, bbox, size, pad=1):
         if bb_height == 0 or bb_width == 0:
             continue
 
-        crop_mask = transforms.resize(padded_mask[None], (bb_width, bb_height))[0]
+        crop_mask = transforms.resize(
+            padded_mask[None], (bb_height, bb_width))[0]
         crop_mask = crop_mask > 0.5
 
         y_min = max(bb[0], 0)

From 902d090b006c3038f006bc0b4eb8bc551b146877 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sun, 10 Mar 2019 11:03:06 +0900
Subject: [PATCH 074/100] fix

---
 chainercv/links/model/mask_rcnn/mask_rcnn.py                 | 4 ++--
 examples/keypoint_detection/eval_keypoint_detection_multi.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
index 98eb319956..a8a5731c96 100644
--- a/chainercv/links/model/mask_rcnn/mask_rcnn.py
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py
@@ -182,8 +182,8 @@ def predict(self, imgs):
             point_maps = [point_map if point_map is not None else
                           self.xp.zeros(
                               (0, self.keypoint_head.n_point,
-                               self.keypoint_head.map_size,
-                               self.keypoint_head.map_size),
+                               self.keypoint_head.point_map_size,
+                               self.keypoint_head.point_map_size),
                               dtype=np.float32)
                           for point_map in point_maps]
             point_maps = [
diff --git a/examples/keypoint_detection/eval_keypoint_detection_multi.py b/examples/keypoint_detection/eval_keypoint_detection_multi.py
index 9040a301b8..8a49017c21 100644
--- a/examples/keypoint_detection/eval_keypoint_detection_multi.py
+++ b/examples/keypoint_detection/eval_keypoint_detection_multi.py
@@ -23,7 +23,7 @@ def main():
     comm = chainermn.create_communicator()
     device = comm.intra_rank
 
-    dataset, label_names, eval_, model, batchsize = setup(
+    dataset, eval_, model, batchsize = setup(
         args.dataset, args.model, args.pretrained_model, args.batchsize)
 
     chainer.cuda.get_device_from_id(device).use()

From 8145849b592c42265e290264f6eb68b67746fed2 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sun, 10 Mar 2019 11:10:38 +0900
Subject: [PATCH 075/100] fix order of bbox and label

---
 .../eval_keypoint_detection_coco.py           | 26 +++++++++----------
 .../test_eval_keypoint_detection_coco.py      | 14 +++++-----
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/chainercv/evaluations/eval_keypoint_detection_coco.py b/chainercv/evaluations/eval_keypoint_detection_coco.py
index 4258552bfd..97dfc75b6f 100644
--- a/chainercv/evaluations/eval_keypoint_detection_coco.py
+++ b/chainercv/evaluations/eval_keypoint_detection_coco.py
@@ -16,7 +16,7 @@
 
 def eval_keypoint_detection_coco(
         pred_points, pred_labels, pred_scores,
-        gt_points, gt_visibles, gt_bboxes=None, gt_labels=None,
+        gt_points, gt_visibles, gt_labels=None, gt_bboxes=None,
         gt_areas=None, gt_crowdeds=None):
     """Evaluate keypoint detection based on evaluation code of MS COCO.
 
@@ -32,11 +32,11 @@ def eval_keypoint_detection_coco(
             the confidene for each keypoint.
         gt_points (iterable of numpy.ndarray): See the table below.
         gt_visibles (iterable of numpy.ndarray): See the table below.
+        gt_labels (iterable of numpy.ndarray): See the table below.
         gt_bboxes (iterable of numpy.ndarray): See the table below.
             This is optional. If this is :obj:`None`, the ground truth
             bounding boxes are esitmated from the ground truth
             keypoints.
-        gt_labels (iterable of numpy.ndarray): See the table below.
         gt_areas (iterable of numpy.ndarray): See the table below. If
             :obj:`None`, some scores are not returned.
         gt_crowdeds (iterable of numpy.ndarray): See the table below.
@@ -53,10 +53,10 @@ def eval_keypoint_detection_coco(
         :obj:`gt_points`, ":math:`[(R, K, 2)]`", :obj:`float32`, \
         ":math:`(y, x)`"
         :obj:`gt_visibles`, ":math:`[(R, K)]`", :obj:`bool`, --
-        :obj:`gt_bboxes`, ":math:`[(R, 4)]`", :obj:`float32`, \
-        ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
         :obj:`gt_labels`, ":math:`[(R,)]`", :obj:`int32`, \
         ":math:`[0, \#fg\_class - 1]`"
+        :obj:`gt_bboxes`, ":math:`[(R, 4)]`", :obj:`float32`, \
+        ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
         :obj:`gt_areas`, ":math:`[(R,)]`", \
         :obj:`float32`, --
         :obj:`gt_crowdeds`, ":math:`[(R,)]`", :obj:`bool`, --
@@ -148,9 +148,9 @@ def eval_keypoint_detection_coco(
     pred_scores = iter(pred_scores)
     gt_points = iter(gt_points)
     gt_visibles = iter(gt_visibles)
+    gt_labels = iter(gt_labels)
     gt_bboxes = (iter(gt_bboxes) if gt_bboxes is not None
                  else itertools.repeat(None))
-    gt_labels = iter(gt_labels)
     if gt_areas is None:
         compute_area_dependent_metrics = False
         gt_areas = itertools.repeat(None)
@@ -165,10 +165,10 @@ def eval_keypoint_detection_coco(
     gt_annos = []
     existent_labels = {}
     for i, (pred_point, pred_label, pred_score, gt_point, gt_visible,
-            gt_bbox, gt_label,
+            gt_label, gt_bbox,
             gt_area, gt_crowded) in enumerate(six.moves.zip(
                 pred_points, pred_labels, pred_scores,
-                gt_points, gt_visibles, gt_bboxes, gt_labels,
+                gt_points, gt_visibles, gt_labels, gt_bboxes,
                 gt_areas, gt_crowdeds)):
         if gt_bbox is None:
             gt_bbox = itertools.repeat(None)
@@ -185,16 +185,16 @@ def eval_keypoint_detection_coco(
             # Visibility flag is currently not used for evaluation
             v = np.ones(len(pred_pnt))
             pred_annos.append(
-                _create_anno(pred_pnt, v, None,
-                             pred_lb, pred_sc,
+                _create_anno(pred_pnt, v,
+                             pred_lb, pred_sc, None,
                              img_id=img_id, anno_id=len(pred_annos) + 1,
                              ar=None, crw=0))
             existent_labels[pred_lb] = True
 
-        for gt_pnt, gt_v, gt_bb, gt_lb, gt_ar, gt_crw in zip(
-                gt_point, gt_visible, gt_bbox, gt_label, gt_area, gt_crowded):
+        for gt_pnt, gt_v, gt_lb, gt_bb, gt_ar, gt_crw in zip(
+                gt_point, gt_visible, gt_label, gt_bbox, gt_area, gt_crowded):
             gt_annos.append(
-                _create_anno(gt_pnt, gt_v, gt_bb, gt_lb, None,
+                _create_anno(gt_pnt, gt_v, gt_lb, None, gt_bb,
                              img_id=img_id, anno_id=len(gt_annos) + 1,
                              ar=gt_ar, crw=gt_crw))
         ids.append({'id': img_id})
@@ -276,7 +276,7 @@ def eval_keypoint_detection_coco(
     return results
 
 
-def _create_anno(pnt, v, bb, lb, sc, img_id, anno_id, ar=None, crw=None):
+def _create_anno(pnt, v, lb, sc, bb, img_id, anno_id, ar=None, crw=None):
     # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L342
     y_min = np.min(pnt[:, 0])
     x_min = np.min(pnt[:, 1])
diff --git a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
index 2235847640..8112f007f8 100644
--- a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
+++ b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
@@ -71,13 +71,13 @@ def _check(self, result):
     def test_gt_bboxes_not_supplied(self):
         result = eval_keypoint_detection_coco(
             self.pred_points, self.pred_labels, self.pred_scores,
-            self.gt_points, self.gt_visibles, None, self.gt_labels)
+            self.gt_points, self.gt_visibles, self.gt_labels, None)
         self._check(result)
 
     def test_area_not_supplied(self):
         result = eval_keypoint_detection_coco(
             self.pred_points, self.pred_labels, self.pred_scores,
-            self.gt_points, self.gt_visibles, self.gt_bboxes, self.gt_labels)
+            self.gt_points, self.gt_visibles, self.gt_labels, self.gt_bboxes)
         self._check(result)
 
         self.assertFalse(
@@ -93,7 +93,7 @@ def test_area_supplied(self):
         gt_areas = [[100] * self.n_inst for _ in range(2)]
         result = eval_keypoint_detection_coco(
             self.pred_points, self.pred_labels, self.pred_scores,
-            self.gt_points, self.gt_visibles, self.gt_bboxes, self.gt_labels,
+            self.gt_points, self.gt_visibles, self.gt_labels, self.gt_bboxes,
             gt_areas=gt_areas,
         )
         self._check(result)
@@ -110,7 +110,7 @@ def test_crowded_supplied(self):
         gt_crowdeds = [[True] * self.n_inst for _ in range(2)]
         result = eval_keypoint_detection_coco(
             self.pred_points, self.pred_labels, self.pred_scores,
-            self.gt_points, self.gt_visibles, self.gt_bboxes, self.gt_labels,
+            self.gt_points, self.gt_visibles, self.gt_labels, self.gt_bboxes,
             gt_crowdeds=gt_crowdeds,
         )
         # When the only ground truth is crowded, nothing is evaluated.
@@ -140,15 +140,15 @@ def test_eval_keypoint_detection_coco(self):
 
         gt_points = self.dataset['points']
         gt_visibles = self.dataset['visibles']
-        gt_bboxes = self.dataset['bboxes']
         gt_labels = self.dataset['labels']
+        gt_bboxes = self.dataset['bboxes']
         gt_areas = self.dataset['areas']
         gt_crowdeds = self.dataset['crowdeds']
 
         result = eval_keypoint_detection_coco(
             pred_points, pred_labels, pred_scores,
-            gt_points, gt_visibles, gt_bboxes,
-            gt_labels, gt_areas, gt_crowdeds)
+            gt_points, gt_visibles, gt_labels, gt_bboxes,
+            gt_areas, gt_crowdeds)
 
         expected = {
             'map/iou=0.50:0.95/area=all/max_dets=20': 0.37733572721481323,

From e5c80fef74502a9436ff6334ffa2f63190de79fd Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sun, 10 Mar 2019 11:13:21 +0900
Subject: [PATCH 076/100] fix order

---
 chainercv/links/model/mask_rcnn/mask_rcnn.py           | 2 +-
 examples/keypoint_detection/eval_keypoint_detection.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
index a8a5731c96..baca550577 100644
--- a/chainercv/links/model/mask_rcnn/mask_rcnn.py
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py
@@ -195,7 +195,7 @@ def predict(self, imgs):
                 point_maps, bboxes)
             labels = [cuda.to_cpu(label) for label in labels]
             scores = [cuda.to_cpu(score) for score in scores]
-            return points, point_scores, bboxes, labels, scores
+            return points, point_scores, labels, scores, bboxes
 
     def prepare(self, imgs):
         """Preprocess images.
diff --git a/examples/keypoint_detection/eval_keypoint_detection.py b/examples/keypoint_detection/eval_keypoint_detection.py
index 14da196e8c..74fbd5ca6c 100644
--- a/examples/keypoint_detection/eval_keypoint_detection.py
+++ b/examples/keypoint_detection/eval_keypoint_detection.py
@@ -43,14 +43,14 @@ def setup(dataset, model_name, pretrained_model, batchsize):
         model.use_preset('evaluate')
 
         def eval_(out_values, rest_values):
-            (pred_points, pred_point_scores, pred_bboxes, pred_labels,
-             pred_scores) = out_values
-            (gt_points, gt_visibles, gt_bboxes, gt_labels,
+            (pred_points, pred_point_scores, pred_labels, pred_scores,
+             pred_bboxes) = out_values
+            (gt_points, gt_visibles, gt_labels, gt_bboxes,
              gt_areas, gt_crowdeds) = rest_values
 
             result = eval_keypoint_detection_coco(
                 pred_points, pred_labels, pred_scores,
-                gt_points, gt_visibles, gt_bboxes, gt_labels,
+                gt_points, gt_visibles, gt_labels, gt_bboxes,
                 gt_areas, gt_crowdeds)
 
             print()

From d50227c447f2e0b2446f415e399ab3381383e2db Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sun, 10 Mar 2019 11:20:08 +0900
Subject: [PATCH 077/100] change order

---
 chainercv/links/model/mask_rcnn/mask_rcnn.py           | 2 +-
 examples/keypoint_detection/eval_keypoint_detection.py | 2 +-
 examples/mask_rcnn/demo.py                             | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
index baca550577..8bb88f9789 100644
--- a/chainercv/links/model/mask_rcnn/mask_rcnn.py
+++ b/chainercv/links/model/mask_rcnn/mask_rcnn.py
@@ -195,7 +195,7 @@ def predict(self, imgs):
                 point_maps, bboxes)
             labels = [cuda.to_cpu(label) for label in labels]
             scores = [cuda.to_cpu(score) for score in scores]
-            return points, point_scores, labels, scores, bboxes
+            return points, labels, scores, point_scores, bboxes
 
     def prepare(self, imgs):
         """Preprocess images.
diff --git a/examples/keypoint_detection/eval_keypoint_detection.py b/examples/keypoint_detection/eval_keypoint_detection.py
index 74fbd5ca6c..94954c5cd7 100644
--- a/examples/keypoint_detection/eval_keypoint_detection.py
+++ b/examples/keypoint_detection/eval_keypoint_detection.py
@@ -43,7 +43,7 @@ def setup(dataset, model_name, pretrained_model, batchsize):
         model.use_preset('evaluate')
 
         def eval_(out_values, rest_values):
-            (pred_points, pred_point_scores, pred_labels, pred_scores,
+            (pred_points, pred_labels, pred_scores, pred_point_scores,
              pred_bboxes) = out_values
             (gt_points, gt_visibles, gt_labels, gt_bboxes,
              gt_areas, gt_crowdeds) = rest_values
diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py
index 682b742fa2..81659c862b 100644
--- a/examples/mask_rcnn/demo.py
+++ b/examples/mask_rcnn/demo.py
@@ -59,12 +59,12 @@ def main():
             label_names=coco_instance_segmentation_label_names)
         plt.show()
     elif args.mode == 'keypoint':
-        points, point_scores, bboxes, labels, scores = model.predict([img])
+        points, labels, scores, point_scores, bboxes = model.predict([img])
         point = points[0]
-        point_score = point_scores[0]
-        bbox = bboxes[0]
         label = labels[0]
         score = scores[0]
+        point_score = point_scores[0]
+        bbox = bboxes[0]
         ax = chainercv.visualizations.vis_keypoint_coco(
             img, point, None, point_score)
         chainercv.visualizations.vis_bbox(None, bbox, score=score, ax=ax)

From 6c9d3bcab5db8be634aea27c05818f0e7625eda6 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sun, 10 Mar 2019 13:54:45 +0900
Subject: [PATCH 078/100] do not update bilinear interpolation layer

---
 examples/mask_rcnn/train_multi_keypoint.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py
index 37cb683beb..45b282a01e 100644
--- a/examples/mask_rcnn/train_multi_keypoint.py
+++ b/examples/mask_rcnn/train_multi_keypoint.py
@@ -236,6 +236,7 @@ def main():
     for link in model.links():
         if isinstance(link, L.BatchNormalization):
             link.disable_update()
+    model.keypoint_head.upsample.disable_update()
 
     n_iteration = args.iteration * 16 / args.batchsize
     updater = training.updaters.StandardUpdater(

From 553f901150743f6ebefe3a6073cececa01f2f1b2 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sun, 10 Mar 2019 14:03:13 +0900
Subject: [PATCH 079/100] fix reporter

---
 examples/mask_rcnn/train_multi_keypoint.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/mask_rcnn/train_multi_keypoint.py b/examples/mask_rcnn/train_multi_keypoint.py
index 45b282a01e..e751aae619 100644
--- a/examples/mask_rcnn/train_multi_keypoint.py
+++ b/examples/mask_rcnn/train_multi_keypoint.py
@@ -110,7 +110,7 @@ def __call__(self, imgs, points, visibles, labels, bboxes):
             'loss': loss,
             'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss,
             'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss,
-            'loss/point': point_loss},
+            'loss/keypoint': point_loss},
             self)
         return loss
 

From ae067896e6a3760b69f485d5a88ab1db6a9b7e51 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sun, 10 Mar 2019 14:07:36 +0900
Subject: [PATCH 080/100] fix ignore logic

---
 chainercv/links/model/mask_rcnn/keypoint_head.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py
index 057ccdb9a7..5cde86464a 100644
--- a/chainercv/links/model/mask_rcnn/keypoint_head.py
+++ b/chainercv/links/model/mask_rcnn/keypoint_head.py
@@ -190,10 +190,9 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles,
 
         # Ignore RoIs whose closest bounding box does not contain
         # any valid keypoints.
-        valid_point = within_bbox(gt_point, gt_bbox)
-        valid_point = xp.logical_and(valid_point, gt_visible)
+        valid_point = within_bbox(gt_point[gt_index], roi)
+        valid_point = xp.logical_and(valid_point, gt_visible[gt_index])
         visible_roi = valid_point.sum(axis=1) > 0
-        visible_roi = visible_roi[gt_index]
         gt_head_label[xp.logical_not(gt_index)] = -1
         gt_head_labels[index] = gt_head_label
 

From b0c733e8fd536438c8121092da0b7f42c4ea00e4 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sun, 10 Mar 2019 14:18:37 +0900
Subject: [PATCH 081/100] fix

---
 chainercv/links/model/mask_rcnn/keypoint_head.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/mask_rcnn/keypoint_head.py
index 5cde86464a..f53a44a102 100644
--- a/chainercv/links/model/mask_rcnn/keypoint_head.py
+++ b/chainercv/links/model/mask_rcnn/keypoint_head.py
@@ -193,7 +193,7 @@ def keypoint_loss_pre(rois, roi_indices, gt_points, gt_visibles,
         valid_point = within_bbox(gt_point[gt_index], roi)
         valid_point = xp.logical_and(valid_point, gt_visible[gt_index])
         visible_roi = valid_point.sum(axis=1) > 0
-        gt_head_label[xp.logical_not(gt_index)] = -1
+        gt_head_label[xp.logical_not(visible_roi)] = -1
         gt_head_labels[index] = gt_head_label
 
     is_sampled = (gt_head_labels > 0).nonzero()[0]

From 80b8b6aef678f421d8e3f542f4057e1dd91e7000 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 13:19:29 +0900
Subject: [PATCH 082/100] Head -> BboxHead

---
 chainercv/links/model/fpn/__init__.py          |  6 +++---
 .../links/model/fpn/{head.py => bbox_head.py}  | 18 +++++++++---------
 chainercv/links/model/fpn/faster_rcnn.py       | 14 +++++++-------
 .../links/model/fpn/faster_rcnn_fpn_resnet.py  |  4 ++--
 docs/source/reference/links/fpn.rst            | 18 +++++++++---------
 5 files changed, 30 insertions(+), 30 deletions(-)
 rename chainercv/links/model/fpn/{head.py => bbox_head.py} (96%)

diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py
index 0ceacd4fe5..1d53b7f70c 100644
--- a/chainercv/links/model/fpn/__init__.py
+++ b/chainercv/links/model/fpn/__init__.py
@@ -2,8 +2,8 @@
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50  # NOQA
 from chainercv.links.model.fpn.fpn import FPN  # NOQA
-from chainercv.links.model.fpn.head import Head  # NOQA
-from chainercv.links.model.fpn.head import head_loss_post  # NOQA
-from chainercv.links.model.fpn.head import head_loss_pre  # NOQA
+from chainercv.links.model.fpn.bbox_head import BboxHead  # NOQA
+from chainercv.links.model.fpn.bbox_head import bbox_head_loss_post  # NOQA
+from chainercv.links.model.fpn.bbox_head import bbox_head_loss_pre  # NOQA
 from chainercv.links.model.fpn.rpn import RPN  # NOQA
 from chainercv.links.model.fpn.rpn import rpn_loss  # NOQA
diff --git a/chainercv/links/model/fpn/head.py b/chainercv/links/model/fpn/bbox_head.py
similarity index 96%
rename from chainercv/links/model/fpn/head.py
rename to chainercv/links/model/fpn/bbox_head.py
index f0c0fc7b63..502baf4775 100644
--- a/chainercv/links/model/fpn/head.py
+++ b/chainercv/links/model/fpn/bbox_head.py
@@ -13,8 +13,8 @@
 from chainercv import utils
 
 
-class Head(chainer.Chain):
-    """Head network of Feature Pyramid Networks.
+class BboxHead(chainer.Chain):
+    """Bounding box head network of Feature Pyramid Networks.
 
     Args:
         n_class (int): The number of classes including background.
@@ -28,7 +28,7 @@ class Head(chainer.Chain):
     std = (0.1, 0.2)
 
     def __init__(self, n_class, scales):
-        super(Head, self).__init__()
+        super(BboxHead, self).__init__()
 
         fc_init = {
             'initialW': Caffe2FCUniform(),
@@ -210,10 +210,10 @@ def decode(self, rois, roi_indices, locs, confs,
         return bboxes, labels, scores
 
 
-def head_loss_pre(rois, roi_indices, std, bboxes, labels):
+def bbox_head_loss_pre(rois, roi_indices, std, bboxes, labels):
     """Loss function for Head (pre).
 
-    This function processes RoIs for :func:`head_loss_post`.
+    This function processes RoIs for :func:`bbox_head_loss_post`.
 
     Args:
         rois (iterable of arrays): An iterable of arrays of
@@ -314,7 +314,7 @@ def head_loss_pre(rois, roi_indices, std, bboxes, labels):
     return rois, roi_indices, gt_locs, gt_labels
 
 
-def head_loss_post(locs, confs, roi_indices, gt_locs, gt_labels, batchsize):
+def bbox_head_loss_post(locs, confs, roi_indices, gt_locs, gt_labels, batchsize):
     """Loss function for Head (post).
 
      Args:
@@ -323,11 +323,11 @@ def head_loss_post(locs, confs, roi_indices, gt_locs, gt_labels, batchsize):
          confs (array): An iterable of arrays whose shape is
              :math:`(R, n\_class)`.
          roi_indices (list of arrays): A list of arrays returned by
-             :func:`head_locs_pre`.
+             :func:`bbox_head_locs_pre`.
          gt_locs (list of arrays): A list of arrays returned by
-             :func:`head_locs_pre`.
+             :func:`bbox_head_locs_pre`.
          gt_labels (list of arrays): A list of arrays returned by
-             :func:`head_locs_pre`.
+             :func:`bbox_head_locs_pre`.
          batchsize (int): The size of batch.
 
      Returns:
diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py
index c64a563db2..3b727c6029 100644
--- a/chainercv/links/model/fpn/faster_rcnn.py
+++ b/chainercv/links/model/fpn/faster_rcnn.py
@@ -23,8 +23,8 @@ class FasterRCNN(chainer.Chain):
         rpn (Link): A link that has the same interface as
             :class:`~chainercv.links.model.fpn.RPN`.
             Please refer to the documentation found there.
-        head (Link): A link that has the same interface as
-            :class:`~chainercv.links.model.fpn.Head`.
+        bbox_head (Link): A link that has the same interface as
+            :class:`~chainercv.links.model.fpn.BboxHead`.
             Please refer to the documentation found there.
         min_size (int): A preprocessing paramter for :meth:`prepare`. Please
             refer to a docstring found for :meth:`prepare`.
@@ -47,13 +47,13 @@ class FasterRCNN(chainer.Chain):
 
     _stride = 32
 
-    def __init__(self, extractor, rpn, head,
+    def __init__(self, extractor, rpn, bbox_head,
                  min_size=800, max_size=1333):
         super(FasterRCNN, self).__init__()
         with self.init_scope():
             self.extractor = extractor
             self.rpn = rpn
-            self.head = head
+            self.bbox_head = bbox_head
 
         self._min_size = min_size
         self._max_size = max_size
@@ -94,8 +94,8 @@ def __call__(self, x):
         anchors = self.rpn.anchors(h.shape[2:] for h in hs)
         rois, roi_indices = self.rpn.decode(
             rpn_locs, rpn_confs, anchors, x.shape)
-        rois, roi_indices = self.head.distribute(rois, roi_indices)
-        head_locs, head_confs = self.head(hs, rois, roi_indices)
+        rois, roi_indices = self.bbox_head.distribute(rois, roi_indices)
+        head_locs, head_confs = self.bbox_head(hs, rois, roi_indices)
         return rois, roi_indices, head_locs, head_confs
 
     def predict(self, imgs):
@@ -132,7 +132,7 @@ def predict(self, imgs):
 
         with chainer.using_config('train', False), chainer.no_backprop_mode():
             rois, roi_indices, head_locs, head_confs = self(x)
-        bboxes, labels, scores = self.head.decode(
+        bboxes, labels, scores = self.bbox_head.decode(
             rois, roi_indices, head_locs, head_confs,
             scales, sizes, self.nms_thresh, self.score_thresh)
 
diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
index 4b86e0cf7e..970fc0d449 100644
--- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
+++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
@@ -6,7 +6,7 @@
 
 from chainercv.links.model.fpn.faster_rcnn import FasterRCNN
 from chainercv.links.model.fpn.fpn import FPN
-from chainercv.links.model.fpn.head import Head
+from chainercv.links.model.fpn.bbox_head import BboxHead
 from chainercv.links.model.fpn.rpn import RPN
 from chainercv.links.model.resnet import ResNet101
 from chainercv.links.model.resnet import ResNet50
@@ -35,7 +35,7 @@ def __init__(self, n_fg_class=None, pretrained_model=None,
         super(FasterRCNNFPNResNet, self).__init__(
             extractor=extractor,
             rpn=RPN(extractor.scales),
-            head=Head(param['n_fg_class'] + 1, extractor.scales),
+            bbox_head=BboxHead(param['n_fg_class'] + 1, extractor.scales),
             min_size=min_size, max_size=max_size
         )
 
diff --git a/docs/source/reference/links/fpn.rst b/docs/source/reference/links/fpn.rst
index d97aa3599f..5d267ff026 100644
--- a/docs/source/reference/links/fpn.rst
+++ b/docs/source/reference/links/fpn.rst
@@ -31,9 +31,9 @@ FPN
 .. autoclass:: FPN
    :members:
 
-Head
-~~~~
-.. autoclass:: Head
+BboxHead
+~~~~~~~~
+.. autoclass:: BboxHead
    :members:
    :special-members:  __call__
 
@@ -46,13 +46,13 @@ RPN
 Train-only Utility
 ------------------
 
-head_loss_pre
-~~~~~~~~~~~~~
-.. autofunction:: head_loss_pre
+bbox_head_loss_pre
+~~~~~~~~~~~~~~~~~~
+.. autofunction:: bbox_head_loss_pre
 
-head_loss_post
-~~~~~~~~~~~~~~
-.. autofunction:: head_loss_post
+bbox_head_loss_post
+~~~~~~~~~~~~~~~~~~~
+.. autofunction:: bbox_head_loss_post
 
 rpn_loss
 ~~~~~~~~

From fa96b48f21099a6e3f77ab1778b41df8f021e993 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 14:59:14 +0900
Subject: [PATCH 083/100] merge Mask R-CNN to chainercv.links.model.fpn

---
 chainercv/links/__init__.py                   |   4 +-
 chainercv/links/model/fpn/__init__.py         |   2 +
 chainercv/links/model/fpn/faster_rcnn.py      | 165 ++++++++---
 .../links/model/fpn/faster_rcnn_fpn_resnet.py | 140 ++++++---
 .../model/{mask_rcnn => fpn}/mask_head.py     |   4 +-
 .../{mask_rcnn/misc.py => fpn/mask_utils.py}  |  11 -
 chainercv/links/model/fpn/misc.py             |  13 +
 chainercv/links/model/mask_rcnn/__init__.py   |   8 -
 chainercv/links/model/mask_rcnn/mask_rcnn.py  | 216 --------------
 .../model/mask_rcnn/mask_rcnn_fpn_resnet.py   | 133 ---------
 examples/fpn/demo.py                          |  41 ++-
 examples/fpn/train_multi.py                   | 149 +++++++---
 examples/mask_rcnn/demo.py                    |  51 ----
 examples/mask_rcnn/train_multi.py             | 275 ------------------
 14 files changed, 376 insertions(+), 836 deletions(-)
 rename chainercv/links/model/{mask_rcnn => fpn}/mask_head.py (98%)
 rename chainercv/links/model/{mask_rcnn/misc.py => fpn/mask_utils.py} (94%)
 delete mode 100644 chainercv/links/model/mask_rcnn/__init__.py
 delete mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn.py
 delete mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
 delete mode 100644 examples/mask_rcnn/demo.py
 delete mode 100644 examples/mask_rcnn/train_multi.py

diff --git a/chainercv/links/__init__.py b/chainercv/links/__init__.py
index d65e14375f..72b4d32106 100644
--- a/chainercv/links/__init__.py
+++ b/chainercv/links/__init__.py
@@ -11,8 +11,8 @@
 from chainercv.links.model.faster_rcnn.faster_rcnn_vgg import FasterRCNNVGG16  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50  # NOQA
-from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet101  # NOQA
-from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet50  # NOQA
+from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101  # NOQA
+from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50  # NOQA
 from chainercv.links.model.resnet import ResNet101  # NOQA
 from chainercv.links.model.resnet import ResNet152  # NOQA
 from chainercv.links.model.resnet import ResNet50  # NOQA
diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py
index 1d53b7f70c..78f6a7684b 100644
--- a/chainercv/links/model/fpn/__init__.py
+++ b/chainercv/links/model/fpn/__init__.py
@@ -1,6 +1,8 @@
 from chainercv.links.model.fpn.faster_rcnn import FasterRCNN  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50  # NOQA
+from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101  # NOQA
+from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50  # NOQA
 from chainercv.links.model.fpn.fpn import FPN  # NOQA
 from chainercv.links.model.fpn.bbox_head import BboxHead  # NOQA
 from chainercv.links.model.fpn.bbox_head import bbox_head_loss_post  # NOQA
diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py
index 3b727c6029..40df122f81 100644
--- a/chainercv/links/model/fpn/faster_rcnn.py
+++ b/chainercv/links/model/fpn/faster_rcnn.py
@@ -3,9 +3,10 @@
 import numpy as np
 
 import chainer
+import chainer.functions as F
 from chainer.backends import cuda
 
-from chainercv import transforms
+from chainercv.links.model.fpn.misc import scale_img
 
 
 class FasterRCNN(chainer.Chain):
@@ -26,6 +27,9 @@ class FasterRCNN(chainer.Chain):
         bbox_head (Link): A link that has the same interface as
             :class:`~chainercv.links.model.fpn.BboxHead`.
             Please refer to the documentation found there.
+        mask_head (Link): A link that has the same interface as
+            :class:`~chainercv.links.model.mask_rcnn.MaskRCNN`.
+            Please refer to the documentation found there.
         min_size (int): A preprocessing paramter for :meth:`prepare`. Please
             refer to a docstring found for :meth:`prepare`.
         max_size (int): A preprocessing paramter for :meth:`prepare`. Note
@@ -45,18 +49,35 @@ class FasterRCNN(chainer.Chain):
 
     """
 
-    _stride = 32
+    stride = 32
+    _accepted_return_values = ('rois', 'bboxes', 'labels', 'scores', 'masks')
 
     def __init__(self, extractor, rpn, bbox_head,
+                 mask_head, return_values,
                  min_size=800, max_size=1333):
+        for value_name in return_values:
+            if value_name not in self._accepted_return_values:
+                raise ValueError(
+                    '{} is not included in accepted value names {}'.format(
+                        value_name, self._accepted_return_values))
+        self._return_values = return_values
+
+        self._store_rpn_outputs = 'rois' in self._return_values
+        self._run_bbox = any([key in self._return_values
+                        for key in ['bboxes', 'labels', 'scores', 'masks']])
+        self._run_mask = 'masks' in self._return_values
         super(FasterRCNN, self).__init__()
+
         with self.init_scope():
             self.extractor = extractor
             self.rpn = rpn
-            self.bbox_head = bbox_head
+            if self._run_bbox:
+                self.bbox_head = bbox_head
+            if self._run_mask:
+                self.mask_head = mask_head
 
-        self._min_size = min_size
-        self._max_size = max_size
+        self.min_size = min_size
+        self.max_size = max_size
 
         self.use_preset('visualize')
 
@@ -94,52 +115,90 @@ def __call__(self, x):
         anchors = self.rpn.anchors(h.shape[2:] for h in hs)
         rois, roi_indices = self.rpn.decode(
             rpn_locs, rpn_confs, anchors, x.shape)
-        rois, roi_indices = self.bbox_head.distribute(rois, roi_indices)
-        head_locs, head_confs = self.bbox_head(hs, rois, roi_indices)
-        return rois, roi_indices, head_locs, head_confs
+        return hs, rois, roi_indices
 
     def predict(self, imgs):
-        """Detect objects from images.
+        """Segment object instances from images.
 
-        This method predicts objects for each image.
+        This method predicts instance-aware object regions for each image.
 
         Args:
-            imgs (iterable of numpy.ndarray): Arrays holding images.
-                All images are in CHW and RGB format
+            imgs (iterable of numpy.ndarray): Arrays holding images of shape
+                :math:`(B, C, H, W)`.  All images are in CHW and RGB format
                 and the range of their value is :math:`[0, 255]`.
 
         Returns:
            tuple of lists:
            This method returns a tuple of three lists,
-           :obj:`(bboxes, labels, scores)`.
+           :obj:`(masks, labels, scores)`.
 
-           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
-               where :math:`R` is the number of bounding boxes in a image. \
-               Each bounding box is organized by \
-               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
-               in the second axis.
+           * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \
+               where :math:`R` is the number of masks in a image. \
+               Each pixel holds value if it is inside the object inside or not.
            * **labels** : A list of integer arrays of shape :math:`(R,)`. \
-               Each value indicates the class of the bounding box. \
+               Each value indicates the class of the masks. \
                Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
                number of the foreground classes.
            * **scores** : A list of float arrays of shape :math:`(R,)`. \
                Each value indicates how confident the prediction is.
 
         """
+        output = {}
 
         sizes = [img.shape[1:] for img in imgs]
         x, scales = self.prepare(imgs)
 
         with chainer.using_config('train', False), chainer.no_backprop_mode():
-            rois, roi_indices, head_locs, head_confs = self(x)
-        bboxes, labels, scores = self.bbox_head.decode(
-            rois, roi_indices, head_locs, head_confs,
-            scales, sizes, self.nms_thresh, self.score_thresh)
-
-        bboxes = [cuda.to_cpu(bbox) for bbox in bboxes]
-        labels = [cuda.to_cpu(label) for label in labels]
-        scores = [cuda.to_cpu(score) for score in scores]
-        return bboxes, labels, scores
+            hs, rpn_rois, rpn_roi_indices = self(x)
+            if self._store_rpn_outputs:
+                rpn_rois_cpu = [
+                    chainer.backends.cuda.to_cpu(rpn_roi) for rpn_roi in
+                    _flat_to_list(rpn_rois, rpn_roi_indices, len(imgs))]
+                output.update({'rois': rpn_rois_cpu})
+
+        if self._run_bbox:
+            bbox_rois, bbox_roi_indices = self.bbox_head.distribute(
+                rpn_rois, rpn_roi_indices)
+            with chainer.using_config(
+                    'train', False), chainer.no_backprop_mode():
+                head_locs, head_confs = self.bbox_head(
+                    hs, bbox_rois, bbox_roi_indices)
+            bboxes, labels, scores = self.bbox_head.decode(
+                bbox_rois, bbox_roi_indices, head_locs, head_confs,
+                scales, sizes, self.nms_thresh, self.score_thresh)
+            bboxes_cpu = [chainer.backends.cuda.to_cpu(bbox)
+                    for bbox in bboxes]
+            labels_cpu = [chainer.backends.cuda.to_cpu(label) for label in labels]
+            scores_cpu = [cuda.to_cpu(score) for score in scores]
+            output.update({'bboxes': bboxes_cpu, 'labels': labels_cpu,
+                           'scores': scores_cpu})
+
+        if self._run_mask:
+            rescaled_bboxes = [bbox * scale
+                               for scale, bbox in zip(scales, bboxes)]
+            # Change bboxes to RoI and RoI indices format
+            mask_rois_before_reordering, mask_roi_indices_before_reordering =\
+                _list_to_flat(rescaled_bboxes)
+            mask_rois, mask_roi_indices, order = self.mask_head.distribute(
+                mask_rois_before_reordering, mask_roi_indices_before_reordering)
+            with chainer.using_config(
+                    'train', False), chainer.no_backprop_mode():
+                segms = F.sigmoid(
+                    self.mask_head(hs, mask_rois, mask_roi_indices)).data
+            # Put the order of proposals back to the one used by bbox head.
+            segms = segms[order]
+            segms = _flat_to_list(
+                segms, mask_roi_indices_before_reordering, len(imgs))
+            segms = [segm if segm is not None else
+                    self.xp.zeros(
+                        (0, self.mask_head.segm_size, self.mask_head.segm_size),
+                        dtype=np.float32)
+                    for segm in segms]
+            segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
+            # Currently MaskHead only supports numpy inputs
+            masks_cpu = self.mask_head.decode(segms, bboxes_cpu, labels_cpu, sizes)
+            output.update({'masks': masks_cpu})
+        return (output[key] for key in self._return_values)
 
     def prepare(self, imgs):
         """Preprocess images.
@@ -154,26 +213,44 @@ def prepare(self, imgs):
             scales that were caluclated in prepocessing.
 
         """
-
         scales = []
         resized_imgs = []
         for img in imgs:
-            _, H, W = img.shape
-            scale = self._min_size / min(H, W)
-            if scale * max(H, W) > self._max_size:
-                scale = self._max_size / max(H, W)
-            scales.append(scale)
-            H, W = int(H * scale), int(W * scale)
-            img = transforms.resize(img, (H, W))
+            img, scale = scale_img(
+                img, self.min_size, self.max_size)
             img -= self.extractor.mean
+            scales.append(scale)
             resized_imgs.append(img)
-
-        size = np.array([im.shape[1:] for im in resized_imgs]).max(axis=0)
-        size = (np.ceil(size / self._stride) * self._stride).astype(int)
-        x = np.zeros((len(imgs), 3, size[0], size[1]), dtype=np.float32)
-        for i, img in enumerate(resized_imgs):
-            _, H, W = img.shape
-            x[i, :, :H, :W] = img
-
+        pad_size = np.array(
+            [im.shape[1:] for im in resized_imgs]).max(axis=0)
+        pad_size = (
+            np.ceil(pad_size / self.stride) * self.stride).astype(int)
+        x = np.zeros(
+            (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32)
+        for i, im in enumerate(resized_imgs):
+            _, H, W = im.shape
+            x[i, :, :H, :W] = im
         x = self.xp.array(x)
+
         return x, scales
+
+
+def _list_to_flat(array_list):
+    xp = chainer.backends.cuda.get_array_module(array_list[0])
+
+    indices = xp.concatenate(
+        [i * xp.ones((len(array),), dtype=np.int32) for
+         i, array in enumerate(array_list)], axis=0)
+    flat = xp.concatenate(array_list, axis=0)
+    return flat, indices
+
+
+def _flat_to_list(flat, indices, B):
+    array_list = []
+    for i in range(B):
+        array = flat[indices == i]
+        if len(array) > 0:
+            array_list.append(array)
+        else:
+            array_list.append(None)
+    return array_list
diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
index 970fc0d449..29e6119c0b 100644
--- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
+++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
@@ -7,6 +7,7 @@
 from chainercv.links.model.fpn.faster_rcnn import FasterRCNN
 from chainercv.links.model.fpn.fpn import FPN
 from chainercv.links.model.fpn.bbox_head import BboxHead
+from chainercv.links.model.fpn.mask_head import MaskHead
 from chainercv.links.model.fpn.rpn import RPN
 from chainercv.links.model.resnet import ResNet101
 from chainercv.links.model.resnet import ResNet50
@@ -17,9 +18,34 @@ class FasterRCNNFPNResNet(FasterRCNN):
     """Base class for FasterRCNNFPNResNet50 and FasterRCNNFPNResNet101.
 
     A subclass of this class should have :obj:`_base` and :obj:`_models`.
+
+    Args:
+        n_fg_class (int): The number of classes excluding the background.
+        pretrained_model (string): The weight file to be loaded.
+            This can take :obj:`'coco'`, `filepath` or :obj:`None`.
+            The default value is :obj:`None`.
+
+            * :obj:`'coco'`: Load weights trained on train split of \
+                MS COCO 2017. \
+                The weight file is downloaded and cached automatically. \
+                :obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
+            * :obj:`'imagenet'`: Load weights of ResNet-50 trained on \
+                ImageNet. \
+                The weight file is downloaded and cached automatically. \
+                This option initializes weights partially and the rests are \
+                initialized randomly. In this case, :obj:`n_fg_class` \
+                can be set to any number.
+            * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
+                must be specified properly.
+            * :obj:`None`: Do not load weights.
+        min_size (int): A preprocessing paramter for :meth:`prepare`. Please \
+            refer to :meth:`prepare`.
+        max_size (int): A preprocessing paramter for :meth:`prepare`.
+
     """
 
     def __init__(self, n_fg_class=None, pretrained_model=None,
+                 return_values=['bboxes', 'labels', 'scores'],
                  min_size=800, max_size=1333):
         param, path = utils.prepare_pretrained_model(
             {'n_fg_class': n_fg_class}, pretrained_model, self._models)
@@ -36,6 +62,8 @@ def __init__(self, n_fg_class=None, pretrained_model=None,
             extractor=extractor,
             rpn=RPN(extractor.scales),
             bbox_head=BboxHead(param['n_fg_class'] + 1, extractor.scales),
+            mask_head=MaskHead(param['n_fg_class'] + 1, extractor.scales),
+            return_values=return_values,
             min_size=min_size, max_size=max_size
         )
 
@@ -57,28 +85,6 @@ class FasterRCNNFPNResNet50(FasterRCNNFPNResNet):
     .. [#] Tsung-Yi Lin et al.
        Feature Pyramid Networks for Object Detection. CVPR 2017
 
-    Args:
-        n_fg_class (int): The number of classes excluding the background.
-        pretrained_model (string): The weight file to be loaded.
-            This can take :obj:`'coco'`, `filepath` or :obj:`None`.
-            The default value is :obj:`None`.
-
-            * :obj:`'coco'`: Load weights trained on train split of \
-                MS COCO 2017. \
-                The weight file is downloaded and cached automatically. \
-                :obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
-            * :obj:`'imagenet'`: Load weights of ResNet-50 trained on \
-                ImageNet. \
-                The weight file is downloaded and cached automatically. \
-                This option initializes weights partially and the rests are \
-                initialized randomly. In this case, :obj:`n_fg_class` \
-                can be set to any number.
-            * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
-                must be specified properly.
-            * :obj:`None`: Do not load weights.
-        min_size (int): A preprocessing paramter for :meth:`prepare`. Please \
-            refer to :meth:`prepare`.
-        max_size (int): A preprocessing paramter for :meth:`prepare`.
 
     """
 
@@ -103,29 +109,6 @@ class FasterRCNNFPNResNet101(FasterRCNNFPNResNet):
     .. [#] Tsung-Yi Lin et al.
        Feature Pyramid Networks for Object Detection. CVPR 2017
 
-    Args:
-        n_fg_class (int): The number of classes excluding the background.
-        pretrained_model (string): The weight file to be loaded.
-            This can take :obj:`'coco'`, `filepath` or :obj:`None`.
-            The default value is :obj:`None`.
-
-            * :obj:`'coco'`: Load weights trained on train split of \
-                MS COCO 2017. \
-                The weight file is downloaded and cached automatically. \
-                :obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
-            * :obj:`'imagenet'`: Load weights of ResNet-101 trained on \
-                ImageNet. \
-                The weight file is downloaded and cached automatically. \
-                This option initializes weights partially and the rests are \
-                initialized randomly. In this case, :obj:`n_fg_class` \
-                can be set to any number.
-            * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
-                must be specified properly.
-            * :obj:`None`: Do not load weights.
-        min_size (int): A preprocessing paramter for :meth:`prepare`. Please \
-            refer to :meth:`prepare`.
-        max_size (int): A preprocessing paramter for :meth:`prepare`.
-
     """
 
     _base = ResNet101
@@ -139,6 +122,73 @@ class FasterRCNNFPNResNet101(FasterRCNNFPNResNet):
     }
 
 
+class MaskRCNNFPNResNet(FasterRCNNFPNResNet):
+    """Feature Pyramid Networks with ResNet-50.
+
+    This is a model of Feature Pyramid Networks [#]_.
+    This model uses :class:`~chainercv.links.ResNet50` as
+    its base feature extractor.
+
+    .. [#] Tsung-Yi Lin et al.
+       Feature Pyramid Networks for Object Detection. CVPR 2017
+
+
+    """
+
+    def __init__(self, n_fg_class=None, pretrained_model=None,
+                 min_size=800, max_size=1333):
+        super(MaskRCNNFPNResNet, self).__init__(
+            n_fg_class, pretrained_model, ['masks', 'labels', 'scores'],
+            min_size, max_size)
+
+
+class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
+    """Feature Pyramid Networks with ResNet-50.
+
+    This is a model of Feature Pyramid Networks [#]_.
+    This model uses :class:`~chainercv.links.ResNet50` as
+    its base feature extractor.
+
+    .. [#] Tsung-Yi Lin et al.
+       Feature Pyramid Networks for Object Detection. CVPR 2017
+
+
+    """
+
+    _base = ResNet50
+    _models = {
+        'coco': {
+            'param': {'n_fg_class': 80},
+            'url': '',
+            'cv2': True
+        },
+    }
+
+
+class MaskRCNNFPNResNet101(MaskRCNNFPNResNet):
+    """Feature Pyramid Networks with ResNet-50.
+
+    This is a model of Feature Pyramid Networks [#]_.
+    This model uses :class:`~chainercv.links.ResNet50` as
+    its base feature extractor.
+
+    .. [#] Tsung-Yi Lin et al.
+       Feature Pyramid Networks for Object Detection. CVPR 2017
+
+
+    """
+
+    _base = ResNet101
+    _models = {
+        'coco': {
+            'param': {'n_fg_class': 80},
+            'url': '',
+            'cv2': True
+        },
+    }
+
+
+
 def _copyparams(dst, src):
     if isinstance(dst, chainer.Chain):
         for link in dst.children():
diff --git a/chainercv/links/model/mask_rcnn/mask_head.py b/chainercv/links/model/fpn/mask_head.py
similarity index 98%
rename from chainercv/links/model/mask_rcnn/mask_head.py
rename to chainercv/links/model/fpn/mask_head.py
index dc65fd6718..b89857fa5d 100644
--- a/chainercv/links/model/mask_rcnn/mask_head.py
+++ b/chainercv/links/model/fpn/mask_head.py
@@ -11,8 +11,8 @@
 from chainercv.links import Conv2DActiv
 from chainercv.utils.bbox.bbox_iou import bbox_iou
 
-from chainercv.links.model.mask_rcnn.misc import mask_to_segm
-from chainercv.links.model.mask_rcnn.misc import segm_to_mask
+from chainercv.links.model.fpn.mask_utils import mask_to_segm
+from chainercv.links.model.fpn.mask_utils import segm_to_mask
 
 
 class MaskHead(chainer.Chain):
diff --git a/chainercv/links/model/mask_rcnn/misc.py b/chainercv/links/model/fpn/mask_utils.py
similarity index 94%
rename from chainercv/links/model/mask_rcnn/misc.py
rename to chainercv/links/model/fpn/mask_utils.py
index 2cdcd52bdb..d9167ec046 100644
--- a/chainercv/links/model/mask_rcnn/misc.py
+++ b/chainercv/links/model/fpn/mask_utils.py
@@ -8,17 +8,6 @@
 from chainercv import transforms
 
 
-def scale_img(img, min_size, max_size):
-    """Process image."""
-    _, H, W = img.shape
-    scale = min_size / min(H, W)
-    if scale * max(H, W) > max_size:
-        scale = max_size / max(H, W)
-    H, W = int(H * scale), int(W * scale)
-    img = transforms.resize(img, (H, W))
-    return img, scale
-
-
 def mask_to_segm(mask, bbox, segm_size, index=None, pad=1):
     """Crop and resize mask.
 
diff --git a/chainercv/links/model/fpn/misc.py b/chainercv/links/model/fpn/misc.py
index c699e3d2f6..0912de3eab 100644
--- a/chainercv/links/model/fpn/misc.py
+++ b/chainercv/links/model/fpn/misc.py
@@ -5,6 +5,8 @@
 from chainer.backends import cuda
 import chainer.functions as F
 
+from chainercv import transforms
+
 
 exp_clip = np.log(1000 / 16)
 
@@ -31,3 +33,14 @@ def choice(x, size):
         return y
     else:
         return cuda.to_gpu(y)
+
+
+def scale_img(img, min_size, max_size):
+    """Process image."""
+    _, H, W = img.shape
+    scale = min_size / min(H, W)
+    if scale * max(H, W) > max_size:
+        scale = max_size / max(H, W)
+    H, W = int(H * scale), int(W * scale)
+    img = transforms.resize(img, (H, W))
+    return img, scale
diff --git a/chainercv/links/model/mask_rcnn/__init__.py b/chainercv/links/model/mask_rcnn/__init__.py
deleted file mode 100644
index 9f1b210dbc..0000000000
--- a/chainercv/links/model/mask_rcnn/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from chainercv.links.model.mask_rcnn.mask_head import mask_loss_post  # NOQA
-from chainercv.links.model.mask_rcnn.mask_head import mask_loss_pre  # NOQA
-from chainercv.links.model.mask_rcnn.mask_head import MaskHead  # NOQA
-from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN  # NOQA
-from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet101  # NOQA
-from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet50  # NOQA
-from chainercv.links.model.mask_rcnn.misc import mask_to_segm  # NOQA
-from chainercv.links.model.mask_rcnn.misc import segm_to_mask  # NOQA
diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
deleted file mode 100644
index 65b76c5b0d..0000000000
--- a/chainercv/links/model/mask_rcnn/mask_rcnn.py
+++ /dev/null
@@ -1,216 +0,0 @@
-from __future__ import division
-
-import numpy as np
-
-import chainer
-from chainer.backends import cuda
-import chainer.functions as F
-
-from chainercv.links.model.mask_rcnn.misc import scale_img
-
-
-class MaskRCNN(chainer.Chain):
-
-    """Base class of Mask R-CNN.
-
-    This is a base class of Mask R-CNN [#]_.
-
-    .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017
-
-    Args:
-        extractor (Link): A link that extracts feature maps.
-            This link must have :obj:`scales`, :obj:`mean` and
-            :meth:`__call__`.
-        rpn (Link): A link that has the same interface as
-            :class:`~chainercv.links.model.fpn.RPN`.
-            Please refer to the documentation found there.
-        head (Link): A link that has the same interface as
-            :class:`~chainercv.links.model.fpn.Head`.
-            Please refer to the documentation found there.
-        mask_head (Link): A link that has the same interface as
-            :class:`~chainercv.links.model.mask_rcnn.MaskRCNN`.
-            Please refer to the documentation found there.
-
-    Parameters:
-        nms_thresh (float): The threshold value
-            for :func:`~chainercv.utils.non_maximum_suppression`.
-            The default value is :obj:`0.5`.
-            This value can be changed directly or by using :meth:`use_preset`.
-        score_thresh (float): The threshold value for confidence score.
-            If a bounding box whose confidence score is lower than this value,
-            the bounding box will be suppressed.
-            The default value is :obj:`0.7`.
-            This value can be changed directly or by using :meth:`use_preset`.
-
-    """
-
-    min_size = 800
-    max_size = 1333
-    stride = 32
-
-    def __init__(self, extractor, rpn, head, mask_head):
-        super(MaskRCNN, self).__init__()
-        with self.init_scope():
-            self.extractor = extractor
-            self.rpn = rpn
-            self.head = head
-            self.mask_head = mask_head
-
-        self.use_preset('visualize')
-
-    def use_preset(self, preset):
-        """Use the given preset during prediction.
-
-        This method changes values of :obj:`nms_thresh` and
-        :obj:`score_thresh`. These values are a threshold value
-        used for non maximum suppression and a threshold value
-        to discard low confidence proposals in :meth:`predict`,
-        respectively.
-
-        If the attributes need to be changed to something
-        other than the values provided in the presets, please modify
-        them by directly accessing the public attributes.
-
-        Args:
-            preset ({'visualize', 'evaluate'}): A string to determine the
-                preset to use.
-        """
-
-        if preset == 'visualize':
-            self.nms_thresh = 0.5
-            self.score_thresh = 0.7
-        elif preset == 'evaluate':
-            self.nms_thresh = 0.5
-            self.score_thresh = 0.05
-        else:
-            raise ValueError('preset must be visualize or evaluate')
-
-    def __call__(self, x):
-        assert(not chainer.config.train)
-        hs = self.extractor(x)
-        rpn_locs, rpn_confs = self.rpn(hs)
-        anchors = self.rpn.anchors(h.shape[2:] for h in hs)
-        rois, roi_indices = self.rpn.decode(
-            rpn_locs, rpn_confs, anchors, x.shape)
-        rois, roi_indices = self.head.distribute(rois, roi_indices)
-        return hs, rois, roi_indices
-
-    def predict(self, imgs):
-        """Segment object instances from images.
-
-        This method predicts instance-aware object regions for each image.
-
-        Args:
-            imgs (iterable of numpy.ndarray): Arrays holding images of shape
-                :math:`(B, C, H, W)`.  All images are in CHW and RGB format
-                and the range of their value is :math:`[0, 255]`.
-
-        Returns:
-           tuple of lists:
-           This method returns a tuple of three lists,
-           :obj:`(masks, labels, scores)`.
-
-           * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \
-               where :math:`R` is the number of masks in a image. \
-               Each pixel holds value if it is inside the object inside or not.
-           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
-               Each value indicates the class of the masks. \
-               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
-               number of the foreground classes.
-           * **scores** : A list of float arrays of shape :math:`(R,)`. \
-               Each value indicates how confident the prediction is.
-
-        """
-
-        sizes = [img.shape[1:] for img in imgs]
-        x, scales = self.prepare(imgs)
-
-        with chainer.using_config('train', False), chainer.no_backprop_mode():
-            hs, rois, roi_indices = self(x)
-            head_locs, head_confs = self.head(hs, rois, roi_indices)
-        bboxes, labels, scores = self.head.decode(
-            rois, roi_indices, head_locs, head_confs,
-            scales, sizes, self.nms_thresh, self.score_thresh)
-
-        rescaled_bboxes = [bbox * scale for scale, bbox in zip(scales, bboxes)]
-        # Change bboxes to RoI and RoI indices format
-        mask_rois_before_reordering, mask_roi_indices_before_reordering =\
-            _list_to_flat(rescaled_bboxes)
-        mask_rois, mask_roi_indices, order = self.mask_head.distribute(
-            mask_rois_before_reordering, mask_roi_indices_before_reordering)
-        with chainer.using_config('train', False), chainer.no_backprop_mode():
-            segms = F.sigmoid(
-                self.mask_head(hs, mask_rois, mask_roi_indices)).data
-        # Put the order of proposals back to the one used by bbox head.
-        segms = segms[order]
-        segms = _flat_to_list(
-            segms, mask_roi_indices_before_reordering, len(imgs))
-        segms = [segm if segm is not None else
-                 self.xp.zeros(
-                     (0, self.mask_head.segm_size, self.mask_head.segm_size),
-                     dtype=np.float32)
-                 for segm in segms]
-
-        segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
-        bboxes = [chainer.backends.cuda.to_cpu(bbox / scale)
-                  for bbox, scale in zip(rescaled_bboxes, scales)]
-        labels = [chainer.backends.cuda.to_cpu(label) for label in labels]
-        # Currently MaskHead only supports numpy inputs
-        masks = self.mask_head.decode(segms, bboxes, labels, sizes)
-        scores = [cuda.to_cpu(score) for score in scores]
-        return masks, labels, scores
-
-    def prepare(self, imgs):
-        """Preprocess images.
-
-        Args:
-            imgs (iterable of numpy.ndarray): Arrays holding images.
-                All images are in CHW and RGB format
-                and the range of their value is :math:`[0, 255]`.
-
-        Returns:
-            Two arrays: preprocessed images and \
-            scales that were caluclated in prepocessing.
-
-        """
-        scales = []
-        resized_imgs = []
-        for img in imgs:
-            img, scale = scale_img(
-                img, self.min_size, self.max_size)
-            img -= self.extractor.mean
-            scales.append(scale)
-            resized_imgs.append(img)
-        pad_size = np.array(
-            [im.shape[1:] for im in resized_imgs]).max(axis=0)
-        pad_size = (
-            np.ceil(pad_size / self.stride) * self.stride).astype(int)
-        x = np.zeros(
-            (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32)
-        for i, im in enumerate(resized_imgs):
-            _, H, W = im.shape
-            x[i, :, :H, :W] = im
-        x = self.xp.array(x)
-
-        return x, scales
-
-
-def _list_to_flat(array_list):
-    xp = chainer.backends.cuda.get_array_module(array_list[0])
-
-    indices = xp.concatenate(
-        [i * xp.ones((len(array),), dtype=np.int32) for
-         i, array in enumerate(array_list)], axis=0)
-    flat = xp.concatenate(array_list, axis=0)
-    return flat, indices
-
-
-def _flat_to_list(flat, indices, B):
-    array_list = []
-    for i in range(B):
-        array = flat[indices == i]
-        if len(array) > 0:
-            array_list.append(array)
-        else:
-            array_list.append(None)
-    return array_list
diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
deleted file mode 100644
index d18f92f628..0000000000
--- a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
+++ /dev/null
@@ -1,133 +0,0 @@
-from __future__ import division
-
-import chainer
-import chainer.functions as F
-
-from chainercv.links.model.fpn import FPN
-from chainercv.links.model.fpn import Head
-from chainercv.links.model.fpn import RPN
-from chainercv.links.model.mask_rcnn.mask_head import MaskHead
-from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN
-from chainercv.links.model.resnet import ResNet101
-from chainercv.links.model.resnet import ResNet50
-from chainercv import utils
-
-from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import _copyparams
-
-
-class MaskRCNNFPNResNet(MaskRCNN):
-
-    """Base class for Mask R-CNN with ResNet backbone.
-
-    A subclass of this class should have :obj:`_base` and :obj:`_models`.
-    """
-
-    def __init__(self, n_fg_class=None, pretrained_model=None):
-        param, path = utils.prepare_pretrained_model(
-            {'n_fg_class': n_fg_class}, pretrained_model, self._models)
-
-        base = self._base(n_class=1, arch='he')
-        base.pick = ('res2', 'res3', 'res4', 'res5')
-        base.pool1 = lambda x: F.max_pooling_2d(
-            x, 3, stride=2, pad=1, cover_all=False)
-        base.remove_unused()
-        extractor = FPN(
-            base, len(base.pick), (1 / 4, 1 / 8, 1 / 16, 1 / 32, 1 / 64))
-
-        n_class = param['n_fg_class'] + 1
-        super(MaskRCNNFPNResNet, self).__init__(
-            extractor=extractor,
-            rpn=RPN(extractor.scales),
-            head=Head(n_class, extractor.scales),
-            mask_head=MaskHead(n_class, extractor.scales)
-        )
-        if path == 'imagenet':
-            _copyparams(
-                self.extractor.base,
-                self._base(pretrained_model='imagenet', arch='he'))
-        elif path:
-            chainer.serializers.load_npz(path, self)
-
-
-class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
-
-    """Mask R-CNN with ResNet-50.
-
-    This is a model of Mask R-CNN [#]_.
-    This model uses :class:`~chainercv.links.ResNet50` as
-    its base feature extractor.
-
-    .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017
-
-    Args:
-       n_fg_class (int): The number of classes excluding the background.
-       pretrained_model (string): The weight file to be loaded.
-           This can take :obj:`'coco'`, `filepath` or :obj:`None`.
-           The default value is :obj:`None`.
-
-            * :obj:`'coco'`: Load weights trained on train split of \
-                MS COCO 2017. \
-                The weight file is downloaded and cached automatically. \
-                :obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
-            * :obj:`'imagenet'`: Load weights of ResNet-50 trained on \
-                ImageNet. \
-                The weight file is downloaded and cached automatically. \
-                This option initializes weights partially and the rests are \
-                initialized randomly. In this case, :obj:`n_fg_class` \
-                can be set to any number.
-            * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
-                must be specified properly.
-            * :obj:`None`: Do not load weights.
-
-    """
-
-    _base = ResNet50
-    _models = {
-        'coco': {
-            'param': {'n_fg_class': 80},
-            'url': None,
-            'cv2': True
-        },
-    }
-
-
-class MaskRCNNFPNResNet101(MaskRCNNFPNResNet):
-
-    """Mask R-CNN with ResNet-101.
-
-    This is a model of Mask R-CNN [#]_.
-    This model uses :class:`~chainercv.links.ResNet101` as
-    its base feature extractor.
-
-    .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017
-
-    Args:
-       n_fg_class (int): The number of classes excluding the background.
-       pretrained_model (string): The weight file to be loaded.
-           This can take :obj:`'coco'`, `filepath` or :obj:`None`.
-           The default value is :obj:`None`.
-
-            * :obj:`'coco'`: Load weights trained on train split of \
-                MS COCO 2017. \
-                The weight file is downloaded and cached automatically. \
-                :obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
-            * :obj:`'imagenet'`: Load weights of ResNet-101 trained on \
-                ImageNet. \
-                The weight file is downloaded and cached automatically. \
-                This option initializes weights partially and the rests are \
-                initialized randomly. In this case, :obj:`n_fg_class` \
-                can be set to any number.
-            * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
-                must be specified properly.
-            * :obj:`None`: Do not load weights.
-
-    """
-
-    _base = ResNet101
-    _models = {
-        'coco': {
-            'param': {'n_fg_class': 80},
-            'url': None,
-            'cv2': True
-        },
-    }
diff --git a/examples/fpn/demo.py b/examples/fpn/demo.py
index 053d0351e2..0d615cacfb 100644
--- a/examples/fpn/demo.py
+++ b/examples/fpn/demo.py
@@ -4,17 +4,22 @@
 import chainer
 
 from chainercv.datasets import coco_bbox_label_names
+from chainercv.datasets import coco_instance_segmentation_label_names
 from chainercv.links import FasterRCNNFPNResNet101
 from chainercv.links import FasterRCNNFPNResNet50
+from chainercv.links import MaskRCNNFPNResNet101
+from chainercv.links import MaskRCNNFPNResNet50
 from chainercv import utils
 from chainercv.visualizations import vis_bbox
+from chainercv.visualizations import vis_instance_segmentation
 
 
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         '--model',
-        choices=('faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'),
+        choices=('faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101',
+                 'mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
         default='faster_rcnn_fpn_resnet50')
     parser.add_argument('--gpu', type=int, default=-1)
     parser.add_argument('--pretrained-model', default='coco')
@@ -22,26 +27,48 @@ def main():
     args = parser.parse_args()
 
     if args.model == 'faster_rcnn_fpn_resnet50':
+        mode = 'bbox'
         model = FasterRCNNFPNResNet50(
             n_fg_class=len(coco_bbox_label_names),
             pretrained_model=args.pretrained_model)
     elif args.model == 'faster_rcnn_fpn_resnet101':
+        mode = 'bbox'
         model = FasterRCNNFPNResNet101(
             n_fg_class=len(coco_bbox_label_names),
             pretrained_model=args.pretrained_model)
+    elif args.model == 'mask_rcnn_fpn_resnet50':
+        mode = 'instance_segmentation'
+        model = MaskRCNNFPNResNet50(
+            n_fg_class=len(coco_instance_segmentation_label_names),
+            pretrained_model=args.pretrained_model)
+    elif args.model == 'mask_rcnn_fpn_resnet101':
+        mode = 'instance_segmentation'
+        model = MaskRCNNFPNResNet101(
+            n_fg_class=len(coco_instance_segmentation_label_names),
+            pretrained_model=args.pretrained_model)
 
     if args.gpu >= 0:
         chainer.cuda.get_device_from_id(args.gpu).use()
         model.to_gpu()
 
     img = utils.read_image(args.image)
-    bboxes, labels, scores = model.predict([img])
-    bbox = bboxes[0]
-    label = labels[0]
-    score = scores[0]
 
-    vis_bbox(
-        img, bbox, label, score, label_names=coco_bbox_label_names)
+    if mode == 'bbox':
+        bboxes, labels, scores = model.predict([img])
+        bbox = bboxes[0]
+        label = labels[0]
+        score = scores[0]
+
+        vis_bbox(
+            img, bbox, label, score, label_names=coco_bbox_label_names)
+    elif mode == 'instance_segmentation':
+        masks, labels, scores = model.predict([img])
+        mask = masks[0]
+        label = labels[0]
+        score = scores[0]
+        vis_instance_segmentation(
+            img, mask, label, score,
+            label_names=coco_instance_segmentation_label_names)
     plt.show()
 
 
diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py
index 1adff045d8..2bd2d26916 100644
--- a/examples/fpn/train_multi.py
+++ b/examples/fpn/train_multi.py
@@ -1,10 +1,10 @@
-from __future__ import division
-
 import argparse
 import multiprocessing
 import numpy as np
+import PIL
 
 import chainer
+import chainer.functions as F
 import chainer.links as L
 from chainer.optimizer_hooks import WeightDecay
 from chainer import serializers
@@ -15,15 +15,18 @@
 
 from chainercv.chainer_experimental.datasets.sliceable import TransformDataset
 from chainercv.chainer_experimental.training.extensions import make_shift
-from chainercv.datasets import coco_bbox_label_names
-from chainercv.datasets import COCOBboxDataset
-from chainercv.links import FasterRCNNFPNResNet101
-from chainercv.links import FasterRCNNFPNResNet50
+from chainercv.datasets import coco_instance_segmentation_label_names
+from chainercv.datasets import COCOInstanceSegmentationDataset
+# from chainercv.links import MaskRCNNFPNResNet101
+# from chainercv.links import MaskRCNNFPNResNet50
+from chainercv.links.model.mask_rcnn.misc import scale_img
 from chainercv import transforms
 
 from chainercv.links.model.fpn import head_loss_post
 from chainercv.links.model.fpn import head_loss_pre
 from chainercv.links.model.fpn import rpn_loss
+from chainercv.links.model.mask_rcnn import mask_loss_post
+from chainercv.links.model.mask_rcnn import mask_loss_pre
 
 # https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator
 try:
@@ -40,11 +43,33 @@ def __init__(self, model):
         with self.init_scope():
             self.model = model
 
-    def __call__(self, imgs, bboxes, labels):
-        x, scales = self.model.prepare(imgs)
-        bboxes = [self.xp.array(bbox) * scale
-                  for bbox, scale in zip(bboxes, scales)]
+    def __call__(self, imgs, masks, labels, bboxes):
+        B = len(imgs)
+        pad_size = np.array(
+            [im.shape[1:] for im in imgs]).max(axis=0)
+        pad_size = (
+            np.ceil(
+                pad_size / self.model.stride) * self.model.stride).astype(int)
+        x = np.zeros(
+            (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32)
+        for i, img in enumerate(imgs):
+            _, H, W = img.shape
+            x[i, :, :H, :W] = img
+        x = self.xp.array(x)
+
+        # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU.
+        pad_masks = [
+            np.zeros(
+                (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool)
+            for mask in masks]
+        for i, mask in enumerate(masks):
+            _, H, W = mask.shape
+            pad_masks[i][:, :H, :W] = mask
+        masks = pad_masks
+
+        bboxes = [self.xp.array(bbox) for bbox in bboxes]
         labels = [self.xp.array(label) for label in labels]
+        sizes = [img.shape[1:] for img in imgs]
 
         with chainer.using_config('train', False):
             hs = self.model.extractor(x)
@@ -52,10 +77,7 @@ def __call__(self, imgs, bboxes, labels):
         rpn_locs, rpn_confs = self.model.rpn(hs)
         anchors = self.model.rpn.anchors(h.shape[2:] for h in hs)
         rpn_loc_loss, rpn_conf_loss = rpn_loss(
-            rpn_locs, rpn_confs, anchors,
-            [(int(img.shape[1] * scale), int(img.shape[2] * scale))
-             for img, scale in zip(imgs, scales)],
-            bboxes)
+            rpn_locs, rpn_confs, anchors, sizes, bboxes)
 
         rois, roi_indices = self.model.rpn.decode(
             rpn_locs, rpn_confs, anchors, x.shape)
@@ -70,27 +92,59 @@ def __call__(self, imgs, bboxes, labels):
         head_locs, head_confs = self.model.head(hs, rois, roi_indices)
         head_loc_loss, head_conf_loss = head_loss_post(
             head_locs, head_confs,
-            roi_indices, head_gt_locs, head_gt_labels, len(x))
-
-        loss = rpn_loc_loss + rpn_conf_loss + head_loc_loss + head_conf_loss
+            roi_indices, head_gt_locs, head_gt_labels, B)
+
+        mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre(
+            rois, roi_indices, masks, bboxes,
+            head_gt_labels, self.model.mask_head.segm_size)
+        n_roi = sum([len(roi) for roi in mask_rois])
+        if n_roi > 0:
+            segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
+            mask_loss = mask_loss_post(
+                segms, mask_roi_indices, gt_segms, gt_mask_labels, B)
+        else:
+            # Compute dummy variables to complete the computational graph
+            mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32)
+            mask_roi_indices[0] = self.xp.array([0], dtype=np.int32)
+            segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
+            mask_loss = 0 * F.sum(segms)
+        loss = (rpn_loc_loss + rpn_conf_loss +
+                head_loc_loss + head_conf_loss + mask_loss)
         chainer.reporter.report({
             'loss': loss,
             'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss,
-            'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss},
+            'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss,
+            'loss/mask': mask_loss},
             self)
-
         return loss
 
 
-def transform(in_data):
-    img, bbox, label = in_data
+class Transform(object):
+
+    def __init__(self, min_size, max_size, mean):
+        self.min_size = min_size
+        self.max_size = max_size
+        self.mean = mean
 
-    img, params = transforms.random_flip(
-        img, x_random=True, return_param=True)
-    bbox = transforms.flip_bbox(
-        bbox, img.shape[1:], x_flip=params['x_flip'])
+    def __call__(self, in_data):
+        img, mask, label, bbox = in_data
+        # Flipping
+        img, params = transforms.random_flip(
+            img, x_random=True, return_param=True)
+        mask = transforms.flip(mask, x_flip=params['x_flip'])
+        bbox = transforms.flip_bbox(
+            bbox, img.shape[1:], x_flip=params['x_flip'])
 
-    return img, bbox, label
+        # Scaling and mean subtraction
+        img, scale = scale_img(
+            img, self.min_size, self.max_size)
+        img -= self.mean
+        mask = transforms.resize(
+            mask.astype(np.float32),
+            img.shape[1:],
+            interpolation=PIL.Image.NEAREST).astype(np.bool)
+        bbox = bbox * scale
+        return img, mask, label, bbox, scale
 
 
 def converter(batch, device=None):
@@ -102,13 +156,14 @@ def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         '--model',
-        choices=('faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'),
-        default='faster_rcnn_fpn_resnet50')
+        choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
+        default='mask_rcnn_fpn_resnet50')
     parser.add_argument('--batchsize', type=int, default=16)
     parser.add_argument('--iteration', type=int, default=90000)
     parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000])
     parser.add_argument('--out', default='result')
     parser.add_argument('--resume')
+    parser.add_argument('--communicator', default='hierarchical')
     args = parser.parse_args()
 
     # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
@@ -118,15 +173,17 @@ def main():
         p.start()
         p.join()
 
-    comm = chainermn.create_communicator()
+    comm = chainermn.create_communicator(args.communicator)
     device = comm.intra_rank
 
-    if args.model == 'faster_rcnn_fpn_resnet50':
-        model = FasterRCNNFPNResNet50(
-            n_fg_class=len(coco_bbox_label_names), pretrained_model='imagenet')
-    elif args.model == 'faster_rcnn_fpn_resnet101':
-        model = FasterRCNNFPNResNet101(
-            n_fg_class=len(coco_bbox_label_names), pretrained_model='imagenet')
+    if args.model == 'mask_rcnn_fpn_resnet50':
+        model = MaskRCNNFPNResNet50(
+            n_fg_class=len(coco_instance_segmentation_label_names),
+            pretrained_model='imagenet')
+    elif args.model == 'mask_rcnn_fpn_resnet101':
+        model = MaskRCNNFPNResNet101(
+            n_fg_class=len(coco_instance_segmentation_label_names),
+            pretrained_model='imagenet')
 
     model.use_preset('evaluate')
     train_chain = TrainChain(model)
@@ -134,8 +191,11 @@ def main():
     train_chain.to_gpu()
 
     train = TransformDataset(
-        COCOBboxDataset(year='2017', split='train'),
-        ('img', 'bbox', 'label'), transform)
+        COCOInstanceSegmentationDataset(
+            data_dir='/home/yuyu2172/coco',
+            split='train', return_bbox=True),
+        ('img', 'mask', 'label', 'bbox'),
+        Transform(model.min_size, model.max_size, model.extractor.mean))
 
     if comm.rank == 0:
         indices = np.arange(len(train))
@@ -144,8 +204,10 @@ def main():
     indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
     train = train.slice[indices]
 
-    train_iter = chainer.iterators.MultithreadIterator(
-        train, args.batchsize // comm.size)
+    train_iter = chainer.iterators.MultiprocessIterator(
+        train, args.batchsize // comm.size,
+        n_processes=args.batchsize // comm.size,
+        shared_mem=100 * 1000 * 1000 * 4)
 
     optimizer = chainermn.create_multi_node_optimizer(
         chainer.optimizers.MomentumSGD(), comm)
@@ -158,10 +220,11 @@ def main():
         if isinstance(link, L.BatchNormalization):
             link.disable_update()
 
+    n_iteration = args.iteration * 16 / args.batchsize
     updater = training.updaters.StandardUpdater(
         train_iter, optimizer, converter=converter, device=device)
     trainer = training.Trainer(
-        updater, (args.iteration * 16 / args.batchsize, 'iteration'), args.out)
+        updater, (n_iteration, 'iteration'), args.out)
 
     @make_shift('lr')
     def lr_schedule(trainer):
@@ -190,7 +253,9 @@ def lr_schedule(trainer):
         trainer.extend(extensions.PrintReport(
             ['epoch', 'iteration', 'lr', 'main/loss',
              'main/loss/rpn/loc', 'main/loss/rpn/conf',
-             'main/loss/head/loc', 'main/loss/head/conf']),
+             'main/loss/head/loc', 'main/loss/head/conf',
+             'main/loss/mask'
+             ]),
             trigger=log_interval)
         trainer.extend(extensions.ProgressBar(update_interval=10))
 
@@ -198,7 +263,7 @@ def lr_schedule(trainer):
         trainer.extend(
             extensions.snapshot_object(
                 model, 'model_iter_{.updater.iteration}'),
-            trigger=(90000 * 16 / args.batchsize, 'iteration'))
+            trigger=(n_iteration, 'iteration'))
 
     if args.resume:
         serializers.load_npz(args.resume, trainer, strict=False)
diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py
deleted file mode 100644
index d95eacc567..0000000000
--- a/examples/mask_rcnn/demo.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import argparse
-import matplotlib.pyplot as plt
-
-import chainer
-
-import chainercv
-from chainercv.datasets import coco_instance_segmentation_label_names
-from chainercv import utils
-
-from chainercv.links import MaskRCNNFPNResNet101
-from chainercv.links import MaskRCNNFPNResNet50
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--model',
-        choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
-        default='mask_rcnn_fpn_resnet50'
-    )
-    parser.add_argument('--gpu', type=int, default=-1)
-    parser.add_argument('--pretrained-model', default='coco')
-    parser.add_argument('image')
-    args = parser.parse_args()
-
-    if args.model == 'mask_rcnn_fpn_resnet50':
-        model = MaskRCNNFPNResNet50(
-            n_fg_class=len(coco_instance_segmentation_label_names),
-            pretrained_model=args.pretrained_model)
-    elif args.model == 'mask_rcnn_fpn_resnet101':
-        model = MaskRCNNFPNResNet101(
-            n_fg_class=len(coco_instance_segmentation_label_names),
-            pretrained_model=args.pretrained_model)
-
-    if args.gpu >= 0:
-        chainer.cuda.get_device_from_id(args.gpu).use()
-        model.to_gpu()
-
-    img = utils.read_image(args.image)
-    masks, labels, scores = model.predict([img])
-    mask = masks[0]
-    label = labels[0]
-    score = scores[0]
-    chainercv.visualizations.vis_instance_segmentation(
-        img, mask, label, score,
-        label_names=coco_instance_segmentation_label_names)
-    plt.show()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/examples/mask_rcnn/train_multi.py b/examples/mask_rcnn/train_multi.py
deleted file mode 100644
index 921b1e53dc..0000000000
--- a/examples/mask_rcnn/train_multi.py
+++ /dev/null
@@ -1,275 +0,0 @@
-import argparse
-import multiprocessing
-import numpy as np
-import PIL
-
-import chainer
-import chainer.functions as F
-import chainer.links as L
-from chainer.optimizer_hooks import WeightDecay
-from chainer import serializers
-from chainer import training
-from chainer.training import extensions
-
-import chainermn
-
-from chainercv.chainer_experimental.datasets.sliceable import TransformDataset
-from chainercv.chainer_experimental.training.extensions import make_shift
-from chainercv.datasets import coco_instance_segmentation_label_names
-from chainercv.datasets import COCOInstanceSegmentationDataset
-from chainercv.links import MaskRCNNFPNResNet101
-from chainercv.links import MaskRCNNFPNResNet50
-from chainercv.links.model.mask_rcnn.misc import scale_img
-from chainercv import transforms
-
-from chainercv.links.model.fpn import head_loss_post
-from chainercv.links.model.fpn import head_loss_pre
-from chainercv.links.model.fpn import rpn_loss
-from chainercv.links.model.mask_rcnn import mask_loss_post
-from chainercv.links.model.mask_rcnn import mask_loss_pre
-
-# https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator
-try:
-    import cv2
-    cv2.setNumThreads(0)
-except ImportError:
-    pass
-
-
-class TrainChain(chainer.Chain):
-
-    def __init__(self, model):
-        super(TrainChain, self).__init__()
-        with self.init_scope():
-            self.model = model
-
-    def __call__(self, imgs, masks, labels, bboxes):
-        B = len(imgs)
-        pad_size = np.array(
-            [im.shape[1:] for im in imgs]).max(axis=0)
-        pad_size = (
-            np.ceil(
-                pad_size / self.model.stride) * self.model.stride).astype(int)
-        x = np.zeros(
-            (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32)
-        for i, img in enumerate(imgs):
-            _, H, W = img.shape
-            x[i, :, :H, :W] = img
-        x = self.xp.array(x)
-
-        # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU.
-        pad_masks = [
-            np.zeros(
-                (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool)
-            for mask in masks]
-        for i, mask in enumerate(masks):
-            _, H, W = mask.shape
-            pad_masks[i][:, :H, :W] = mask
-        masks = pad_masks
-
-        bboxes = [self.xp.array(bbox) for bbox in bboxes]
-        labels = [self.xp.array(label) for label in labels]
-        sizes = [img.shape[1:] for img in imgs]
-
-        with chainer.using_config('train', False):
-            hs = self.model.extractor(x)
-
-        rpn_locs, rpn_confs = self.model.rpn(hs)
-        anchors = self.model.rpn.anchors(h.shape[2:] for h in hs)
-        rpn_loc_loss, rpn_conf_loss = rpn_loss(
-            rpn_locs, rpn_confs, anchors, sizes, bboxes)
-
-        rois, roi_indices = self.model.rpn.decode(
-            rpn_locs, rpn_confs, anchors, x.shape)
-        rois = self.xp.vstack([rois] + bboxes)
-        roi_indices = self.xp.hstack(
-            [roi_indices]
-            + [self.xp.array((i,) * len(bbox))
-               for i, bbox in enumerate(bboxes)])
-        rois, roi_indices = self.model.head.distribute(rois, roi_indices)
-        rois, roi_indices, head_gt_locs, head_gt_labels = head_loss_pre(
-            rois, roi_indices, self.model.head.std, bboxes, labels)
-        head_locs, head_confs = self.model.head(hs, rois, roi_indices)
-        head_loc_loss, head_conf_loss = head_loss_post(
-            head_locs, head_confs,
-            roi_indices, head_gt_locs, head_gt_labels, B)
-
-        mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre(
-            rois, roi_indices, masks, bboxes,
-            head_gt_labels, self.model.mask_head.segm_size)
-        n_roi = sum([len(roi) for roi in mask_rois])
-        if n_roi > 0:
-            segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
-            mask_loss = mask_loss_post(
-                segms, mask_roi_indices, gt_segms, gt_mask_labels, B)
-        else:
-            # Compute dummy variables to complete the computational graph
-            mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32)
-            mask_roi_indices[0] = self.xp.array([0], dtype=np.int32)
-            segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
-            mask_loss = 0 * F.sum(segms)
-        loss = (rpn_loc_loss + rpn_conf_loss +
-                head_loc_loss + head_conf_loss + mask_loss)
-        chainer.reporter.report({
-            'loss': loss,
-            'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss,
-            'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss,
-            'loss/mask': mask_loss},
-            self)
-        return loss
-
-
-class Transform(object):
-
-    def __init__(self, min_size, max_size, mean):
-        self.min_size = min_size
-        self.max_size = max_size
-        self.mean = mean
-
-    def __call__(self, in_data):
-        img, mask, label, bbox = in_data
-        # Flipping
-        img, params = transforms.random_flip(
-            img, x_random=True, return_param=True)
-        mask = transforms.flip(mask, x_flip=params['x_flip'])
-        bbox = transforms.flip_bbox(
-            bbox, img.shape[1:], x_flip=params['x_flip'])
-
-        # Scaling and mean subtraction
-        img, scale = scale_img(
-            img, self.min_size, self.max_size)
-        img -= self.mean
-        mask = transforms.resize(
-            mask.astype(np.float32),
-            img.shape[1:],
-            interpolation=PIL.Image.NEAREST).astype(np.bool)
-        bbox = bbox * scale
-        return img, mask, label, bbox, scale
-
-
-def converter(batch, device=None):
-    # do not send data to gpu (device is ignored)
-    return tuple(list(v) for v in zip(*batch))
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--model',
-        choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
-        default='mask_rcnn_fpn_resnet50')
-    parser.add_argument('--batchsize', type=int, default=16)
-    parser.add_argument('--iteration', type=int, default=90000)
-    parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000])
-    parser.add_argument('--out', default='result')
-    parser.add_argument('--resume')
-    parser.add_argument('--communicator', default='hierarchical')
-    args = parser.parse_args()
-
-    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
-    if hasattr(multiprocessing, 'set_start_method'):
-        multiprocessing.set_start_method('forkserver')
-        p = multiprocessing.Process()
-        p.start()
-        p.join()
-
-    comm = chainermn.create_communicator(args.communicator)
-    device = comm.intra_rank
-
-    if args.model == 'mask_rcnn_fpn_resnet50':
-        model = MaskRCNNFPNResNet50(
-            n_fg_class=len(coco_instance_segmentation_label_names),
-            pretrained_model='imagenet')
-    elif args.model == 'mask_rcnn_fpn_resnet101':
-        model = MaskRCNNFPNResNet101(
-            n_fg_class=len(coco_instance_segmentation_label_names),
-            pretrained_model='imagenet')
-
-    model.use_preset('evaluate')
-    train_chain = TrainChain(model)
-    chainer.cuda.get_device_from_id(device).use()
-    train_chain.to_gpu()
-
-    train = TransformDataset(
-        COCOInstanceSegmentationDataset(
-            data_dir='/home/yuyu2172/coco',
-            split='train', return_bbox=True),
-        ('img', 'mask', 'label', 'bbox'),
-        Transform(model.min_size, model.max_size, model.extractor.mean))
-
-    if comm.rank == 0:
-        indices = np.arange(len(train))
-    else:
-        indices = None
-    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
-    train = train.slice[indices]
-
-    train_iter = chainer.iterators.MultiprocessIterator(
-        train, args.batchsize // comm.size,
-        n_processes=args.batchsize // comm.size,
-        shared_mem=100 * 1000 * 1000 * 4)
-
-    optimizer = chainermn.create_multi_node_optimizer(
-        chainer.optimizers.MomentumSGD(), comm)
-    optimizer.setup(train_chain)
-    optimizer.add_hook(WeightDecay(0.0001))
-
-    model.extractor.base.conv1.disable_update()
-    model.extractor.base.res2.disable_update()
-    for link in model.links():
-        if isinstance(link, L.BatchNormalization):
-            link.disable_update()
-
-    n_iteration = args.iteration * 16 / args.batchsize
-    updater = training.updaters.StandardUpdater(
-        train_iter, optimizer, converter=converter, device=device)
-    trainer = training.Trainer(
-        updater, (n_iteration, 'iteration'), args.out)
-
-    @make_shift('lr')
-    def lr_schedule(trainer):
-        base_lr = 0.02 * args.batchsize / 16
-        warm_up_duration = 500
-        warm_up_rate = 1 / 3
-
-        iteration = trainer.updater.iteration
-        if iteration < warm_up_duration:
-            rate = warm_up_rate \
-                + (1 - warm_up_rate) * iteration / warm_up_duration
-        else:
-            rate = 1
-            for step in args.step:
-                if iteration >= step * 16 / args.batchsize:
-                    rate *= 0.1
-
-        return base_lr * rate
-
-    trainer.extend(lr_schedule)
-
-    if comm.rank == 0:
-        log_interval = 10, 'iteration'
-        trainer.extend(extensions.LogReport(trigger=log_interval))
-        trainer.extend(extensions.observe_lr(), trigger=log_interval)
-        trainer.extend(extensions.PrintReport(
-            ['epoch', 'iteration', 'lr', 'main/loss',
-             'main/loss/rpn/loc', 'main/loss/rpn/conf',
-             'main/loss/head/loc', 'main/loss/head/conf',
-             'main/loss/mask'
-             ]),
-            trigger=log_interval)
-        trainer.extend(extensions.ProgressBar(update_interval=10))
-
-        trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration'))
-        trainer.extend(
-            extensions.snapshot_object(
-                model, 'model_iter_{.updater.iteration}'),
-            trigger=(n_iteration, 'iteration'))
-
-    if args.resume:
-        serializers.load_npz(args.resume, trainer, strict=False)
-
-    trainer.run()
-
-
-if __name__ == '__main__':
-    main()

From ffc87ddb31281c10fd51a5c091443c9640da8605 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 15:07:01 +0900
Subject: [PATCH 084/100] fix variable names

---
 chainercv/links/model/fpn/mask_utils.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/chainercv/links/model/fpn/mask_utils.py b/chainercv/links/model/fpn/mask_utils.py
index d9167ec046..5c28e20232 100644
--- a/chainercv/links/model/fpn/mask_utils.py
+++ b/chainercv/links/model/fpn/mask_utils.py
@@ -1,6 +1,5 @@
 from __future__ import division
 
-import cv2
 import numpy as np
 
 import chainer
@@ -36,8 +35,8 @@ def mask_to_segm(mask, bbox, segm_size, index=None, pad=1):
     _, H, W = mask.shape
     bbox = chainer.backends.cuda.to_cpu(bbox)
     padded_segm_size = segm_size + pad * 2
-    cv2_expand_scale = padded_segm_size / segm_size
-    bbox = _integerize_bbox(_expand_boxes(bbox, cv2_expand_scale))
+    expand_scale = padded_segm_size / segm_size
+    bbox = _integerize_bbox(_expand_boxes(bbox, expand_scale))
 
     segm = []
     if index is None:
@@ -104,11 +103,11 @@ def segm_to_mask(segm, bbox, size, pad=1):
     # pixel prior to resizing back to the original image resolution.
     # This prevents "top hat" artifacts. We therefore need to expand
     # the reference boxes by an appropriate factor.
-    cv2_expand_scale = (segm_size + pad * 2) / segm_size
+    expand_scale = (segm_size + pad * 2) / segm_size
     padded_mask = np.zeros(
         (segm_size + pad * 2, segm_size + pad * 2), dtype=np.float32)
 
-    bbox = _integerize_bbox(_expand_boxes(bbox, cv2_expand_scale))
+    bbox = _integerize_bbox(_expand_boxes(bbox, expand_scale))
     for i, (bb, sgm) in enumerate(zip(bbox, segm)):
         padded_mask[1:-1, 1:-1] = sgm
 

From 42ca5d2950d4269c935891e785d06dfd4f1aa2ea Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 15:35:17 +0900
Subject: [PATCH 085/100] update train_multi

---
 examples/fpn/train_multi.py | 145 ++++++++++++++++++++++--------------
 1 file changed, 89 insertions(+), 56 deletions(-)

diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py
index 2bd2d26916..726147d5bf 100644
--- a/examples/fpn/train_multi.py
+++ b/examples/fpn/train_multi.py
@@ -15,18 +15,24 @@
 
 from chainercv.chainer_experimental.datasets.sliceable import TransformDataset
 from chainercv.chainer_experimental.training.extensions import make_shift
-from chainercv.datasets import coco_instance_segmentation_label_names
-from chainercv.datasets import COCOInstanceSegmentationDataset
-# from chainercv.links import MaskRCNNFPNResNet101
-# from chainercv.links import MaskRCNNFPNResNet50
-from chainercv.links.model.mask_rcnn.misc import scale_img
+from chainercv.links.model.fpn.misc import scale_img
 from chainercv import transforms
 
-from chainercv.links.model.fpn import head_loss_post
-from chainercv.links.model.fpn import head_loss_pre
+from chainercv.datasets import coco_instance_segmentation_label_names
+from chainercv.datasets import COCOInstanceSegmentationDataset
+from chainercv.links import MaskRCNNFPNResNet101
+from chainercv.links import MaskRCNNFPNResNet50
+
+from chainercv.datasets import coco_bbox_label_names
+from chainercv.datasets import COCOBboxDataset
+from chainercv.links import FasterRCNNFPNResNet101
+from chainercv.links import FasterRCNNFPNResNet50
+
+from chainercv.links.model.fpn import bbox_head_loss_post
+from chainercv.links.model.fpn import bbox_head_loss_pre
+from chainercv.links.model.fpn import mask_loss_post
+from chainercv.links.model.fpn import mask_loss_pre
 from chainercv.links.model.fpn import rpn_loss
-from chainercv.links.model.mask_rcnn import mask_loss_post
-from chainercv.links.model.mask_rcnn import mask_loss_pre
 
 # https://docs.chainer.org/en/stable/tips.html#my-training-process-gets-stuck-when-using-multiprocessiterator
 try:
@@ -43,7 +49,7 @@ def __init__(self, model):
         with self.init_scope():
             self.model = model
 
-    def __call__(self, imgs, masks, labels, bboxes):
+    def __call__(self, imgs, bboxes, labels, masks=None):
         B = len(imgs)
         pad_size = np.array(
             [im.shape[1:] for im in imgs]).max(axis=0)
@@ -57,16 +63,6 @@ def __call__(self, imgs, masks, labels, bboxes):
             x[i, :, :H, :W] = img
         x = self.xp.array(x)
 
-        # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU.
-        pad_masks = [
-            np.zeros(
-                (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool)
-            for mask in masks]
-        for i, mask in enumerate(masks):
-            _, H, W = mask.shape
-            pad_masks[i][:, :H, :W] = mask
-        masks = pad_masks
-
         bboxes = [self.xp.array(bbox) for bbox in bboxes]
         labels = [self.xp.array(label) for label in labels]
         sizes = [img.shape[1:] for img in imgs]
@@ -87,34 +83,48 @@ def __call__(self, imgs, masks, labels, bboxes):
             + [self.xp.array((i,) * len(bbox))
                for i, bbox in enumerate(bboxes)])
         rois, roi_indices = self.model.head.distribute(rois, roi_indices)
-        rois, roi_indices, head_gt_locs, head_gt_labels = head_loss_pre(
+        rois, roi_indices, head_gt_locs, head_gt_labels = bbox_head_loss_pre(
             rois, roi_indices, self.model.head.std, bboxes, labels)
         head_locs, head_confs = self.model.head(hs, rois, roi_indices)
-        head_loc_loss, head_conf_loss = head_loss_post(
+        head_loc_loss, head_conf_loss = bbox_head_loss_post(
             head_locs, head_confs,
             roi_indices, head_gt_locs, head_gt_labels, B)
 
-        mask_rois, mask_roi_indices, gt_segms, gt_mask_labels = mask_loss_pre(
-            rois, roi_indices, masks, bboxes,
-            head_gt_labels, self.model.mask_head.segm_size)
-        n_roi = sum([len(roi) for roi in mask_rois])
-        if n_roi > 0:
-            segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
-            mask_loss = mask_loss_post(
-                segms, mask_roi_indices, gt_segms, gt_mask_labels, B)
-        else:
-            # Compute dummy variables to complete the computational graph
-            mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32)
-            mask_roi_indices[0] = self.xp.array([0], dtype=np.int32)
-            segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
-            mask_loss = 0 * F.sum(segms)
+        mask_loss = 0
+        if masks is not None:
+            # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU.
+            pad_masks = [
+                np.zeros(
+                    (mask.shape[0], pad_size[0], pad_size[1]), dtype=np.bool)
+                for mask in masks]
+            for i, mask in enumerate(masks):
+                _, H, W = mask.shape
+                pad_masks[i][:, :H, :W] = mask
+            masks = pad_masks
+
+            mask_rois, mask_roi_indices, gt_segms, gt_mask_labels =\
+                mask_loss_pre(
+                    rois, roi_indices, masks, bboxes,
+                    head_gt_labels, self.model.mask_head.segm_size)
+            n_roi = sum([len(roi) for roi in mask_rois])
+            if n_roi > 0:
+                segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
+                mask_loss = mask_loss_post(
+                    segms, mask_roi_indices, gt_segms, gt_mask_labels, B)
+            else:
+                # Compute dummy variables to complete the computational graph
+                mask_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32)
+                mask_roi_indices[0] = self.xp.array([0], dtype=np.int32)
+                segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
+                mask_loss = 0 * F.sum(segms)
         loss = (rpn_loc_loss + rpn_conf_loss +
                 head_loc_loss + head_conf_loss + mask_loss)
         chainer.reporter.report({
             'loss': loss,
             'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss,
-            'loss/head/loc': head_loc_loss, 'loss/head/conf': head_conf_loss,
-            'loss/mask': mask_loss},
+            'loss/bbox_head/loc': head_loc_loss,
+            'loss/bbox_head/conf': head_conf_loss,
+            'loss/mask_head': mask_loss},
             self)
         return loss
 
@@ -127,24 +137,30 @@ def __init__(self, min_size, max_size, mean):
         self.mean = mean
 
     def __call__(self, in_data):
-        img, mask, label, bbox = in_data
+        img, bbox, label = in_data[:3]
         # Flipping
         img, params = transforms.random_flip(
             img, x_random=True, return_param=True)
-        mask = transforms.flip(mask, x_flip=params['x_flip'])
+        x_flip = params['x_flip']
         bbox = transforms.flip_bbox(
-            bbox, img.shape[1:], x_flip=params['x_flip'])
+            bbox, img.shape[1:], x_flip=x_flip)
 
         # Scaling and mean subtraction
         img, scale = scale_img(
             img, self.min_size, self.max_size)
         img -= self.mean
-        mask = transforms.resize(
-            mask.astype(np.float32),
-            img.shape[1:],
-            interpolation=PIL.Image.NEAREST).astype(np.bool)
         bbox = bbox * scale
-        return img, mask, label, bbox, scale
+
+        if len(in_data) == 4:
+            mask = in_data[3]
+            mask = transforms.flip(mask, x_flip=x_flip)
+            mask = transforms.resize(
+                mask.astype(np.float32),
+                img.shape[1:],
+                interpolation=PIL.Image.NEAREST).astype(np.bool)
+            return img, bbox, label, mask
+        else:
+            return img, bbox, label
 
 
 def converter(batch, device=None):
@@ -156,7 +172,8 @@ def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         '--model',
-        choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
+        choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101',
+                 'faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'),
         default='mask_rcnn_fpn_resnet50')
     parser.add_argument('--batchsize', type=int, default=16)
     parser.add_argument('--iteration', type=int, default=90000)
@@ -176,11 +193,23 @@ def main():
     comm = chainermn.create_communicator(args.communicator)
     device = comm.intra_rank
 
-    if args.model == 'mask_rcnn_fpn_resnet50':
+    if args.model == 'faster_rcnn_fpn_resnet50':
+        mode = 'bbox'
+        model = FasterRCNNFPNResNet50(
+            n_fg_class=len(coco_bbox_label_names),
+            pretrained_model='imagenet')
+    elif args.model == 'faster_rcnn_fpn_resnet101':
+        mode = 'bbox'
+        model = FasterRCNNFPNResNet101(
+            n_fg_class=len(coco_bbox_label_names),
+            pretrained_model='imagenet')
+    elif args.model == 'mask_rcnn_fpn_resnet50':
+        mode = 'instance_segmentation'
         model = MaskRCNNFPNResNet50(
             n_fg_class=len(coco_instance_segmentation_label_names),
             pretrained_model='imagenet')
     elif args.model == 'mask_rcnn_fpn_resnet101':
+        mode = 'instance_segmentation'
         model = MaskRCNNFPNResNet101(
             n_fg_class=len(coco_instance_segmentation_label_names),
             pretrained_model='imagenet')
@@ -190,12 +219,16 @@ def main():
     chainer.cuda.get_device_from_id(device).use()
     train_chain.to_gpu()
 
-    train = TransformDataset(
-        COCOInstanceSegmentationDataset(
-            data_dir='/home/yuyu2172/coco',
-            split='train', return_bbox=True),
-        ('img', 'mask', 'label', 'bbox'),
-        Transform(model.min_size, model.max_size, model.extractor.mean))
+    if mode == 'bbox':
+        train = TransformDataset(
+            COCOBboxDataset(year='2017', split='train'),
+            ('img', 'bbox', 'label'),
+            Transform(model.min_size, model.max_size, model.extractor.mean))
+    elif mode == 'instance_segmentation':
+        train = TransformDataset(
+            COCOInstanceSegmentationDataset(split='train', return_bbox=True),
+            ('img', 'bbox', 'label', 'mask'),
+            Transform(model.min_size, model.max_size, model.extractor.mean))
 
     if comm.rank == 0:
         indices = np.arange(len(train))
@@ -253,8 +286,8 @@ def lr_schedule(trainer):
         trainer.extend(extensions.PrintReport(
             ['epoch', 'iteration', 'lr', 'main/loss',
              'main/loss/rpn/loc', 'main/loss/rpn/conf',
-             'main/loss/head/loc', 'main/loss/head/conf',
-             'main/loss/mask'
+             'main/loss/bbox_head/loc', 'main/loss/bbox_head/conf',
+             'main/loss/mask_head'
              ]),
             trigger=log_interval)
         trainer.extend(extensions.ProgressBar(update_interval=10))

From eeac6db5ebcdaf2650ff707b9f54936a303679df Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 15:35:37 +0900
Subject: [PATCH 086/100] Mask R-CNN class

---
 .../links/model/fpn/faster_rcnn_fpn_resnet.py | 47 ++++++++++---------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
index 29e6119c0b..debadb10ea 100644
--- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
+++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
@@ -75,6 +75,26 @@ def __init__(self, n_fg_class=None, pretrained_model=None,
             chainer.serializers.load_npz(path, self)
 
 
+class MaskRCNNFPNResNet(FasterRCNNFPNResNet):
+    """Feature Pyramid Networks with ResNet-50.
+
+    This is a model of Feature Pyramid Networks [#]_.
+    This model uses :class:`~chainercv.links.ResNet50` as
+    its base feature extractor.
+
+    .. [#] Tsung-Yi Lin et al.
+       Feature Pyramid Networks for Object Detection. CVPR 2017
+
+
+    """
+
+    def __init__(self, n_fg_class=None, pretrained_model=None,
+                 min_size=800, max_size=1333):
+        super(MaskRCNNFPNResNet, self).__init__(
+            n_fg_class, pretrained_model, ['masks', 'labels', 'scores'],
+            min_size, max_size)
+
+
 class FasterRCNNFPNResNet50(FasterRCNNFPNResNet):
     """Feature Pyramid Networks with ResNet-50.
 
@@ -93,7 +113,7 @@ class FasterRCNNFPNResNet50(FasterRCNNFPNResNet):
         'coco': {
             'param': {'n_fg_class': 80},
             'url': 'https://chainercv-models.preferred.jp/'
-            'faster_rcnn_fpn_resnet50_coco_trained_2018_12_13.npz',
+            'faster_rcnn_fpn_resnet50_coco_trained_2019_03_15.npz',
             'cv2': True
         },
     }
@@ -116,32 +136,12 @@ class FasterRCNNFPNResNet101(FasterRCNNFPNResNet):
         'coco': {
             'param': {'n_fg_class': 80},
             'url': 'https://chainercv-models.preferred.jp/'
-            'faster_rcnn_fpn_resnet101_coco_trained_2018_12_13.npz',
+            'faster_rcnn_fpn_resnet101_coco_trained_2019_03_15.npz',
             'cv2': True
         },
     }
 
 
-class MaskRCNNFPNResNet(FasterRCNNFPNResNet):
-    """Feature Pyramid Networks with ResNet-50.
-
-    This is a model of Feature Pyramid Networks [#]_.
-    This model uses :class:`~chainercv.links.ResNet50` as
-    its base feature extractor.
-
-    .. [#] Tsung-Yi Lin et al.
-       Feature Pyramid Networks for Object Detection. CVPR 2017
-
-
-    """
-
-    def __init__(self, n_fg_class=None, pretrained_model=None,
-                 min_size=800, max_size=1333):
-        super(MaskRCNNFPNResNet, self).__init__(
-            n_fg_class, pretrained_model, ['masks', 'labels', 'scores'],
-            min_size, max_size)
-
-
 class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
     """Feature Pyramid Networks with ResNet-50.
 
@@ -159,7 +159,8 @@ class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
     _models = {
         'coco': {
             'param': {'n_fg_class': 80},
-            'url': '',
+            'url': 'https://chainercv-models.preferred.jp/'
+            'faster_rcnn_fpn_resnet50_mask_coco_trained_2019_03_15.npz',
             'cv2': True
         },
     }

From 20414c46bf9d7d92293011ede21baac64add717e Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 15:45:58 +0900
Subject: [PATCH 087/100] fix

---
 chainercv/links/model/fpn/__init__.py  |  6 ++++--
 chainercv/links/model/fpn/bbox_head.py |  4 ++--
 examples/fpn/train_multi.py            | 17 ++++++++++-------
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py
index 78f6a7684b..aab5fb64cb 100644
--- a/chainercv/links/model/fpn/__init__.py
+++ b/chainercv/links/model/fpn/__init__.py
@@ -5,7 +5,9 @@
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50  # NOQA
 from chainercv.links.model.fpn.fpn import FPN  # NOQA
 from chainercv.links.model.fpn.bbox_head import BboxHead  # NOQA
-from chainercv.links.model.fpn.bbox_head import bbox_head_loss_post  # NOQA
-from chainercv.links.model.fpn.bbox_head import bbox_head_loss_pre  # NOQA
+from chainercv.links.model.fpn.bbox_head import bbox_loss_post  # NOQA
+from chainercv.links.model.fpn.bbox_head import bbox_loss_pre  # NOQA
+from chainercv.links.model.fpn.mask_head import mask_loss_post  # NOQA
+from chainercv.links.model.fpn.mask_head import mask_loss_pre  # NOQA
 from chainercv.links.model.fpn.rpn import RPN  # NOQA
 from chainercv.links.model.fpn.rpn import rpn_loss  # NOQA
diff --git a/chainercv/links/model/fpn/bbox_head.py b/chainercv/links/model/fpn/bbox_head.py
index 502baf4775..199d0b3508 100644
--- a/chainercv/links/model/fpn/bbox_head.py
+++ b/chainercv/links/model/fpn/bbox_head.py
@@ -210,7 +210,7 @@ def decode(self, rois, roi_indices, locs, confs,
         return bboxes, labels, scores
 
 
-def bbox_head_loss_pre(rois, roi_indices, std, bboxes, labels):
+def bbox_loss_pre(rois, roi_indices, std, bboxes, labels):
     """Loss function for Head (pre).
 
     This function processes RoIs for :func:`bbox_head_loss_post`.
@@ -314,7 +314,7 @@ def bbox_head_loss_pre(rois, roi_indices, std, bboxes, labels):
     return rois, roi_indices, gt_locs, gt_labels
 
 
-def bbox_head_loss_post(locs, confs, roi_indices, gt_locs, gt_labels, batchsize):
+def bbox_loss_post(locs, confs, roi_indices, gt_locs, gt_labels, batchsize):
     """Loss function for Head (post).
 
      Args:
diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py
index 726147d5bf..ffa308166d 100644
--- a/examples/fpn/train_multi.py
+++ b/examples/fpn/train_multi.py
@@ -28,8 +28,8 @@
 from chainercv.links import FasterRCNNFPNResNet101
 from chainercv.links import FasterRCNNFPNResNet50
 
-from chainercv.links.model.fpn import bbox_head_loss_post
-from chainercv.links.model.fpn import bbox_head_loss_pre
+from chainercv.links.model.fpn import bbox_loss_post
+from chainercv.links.model.fpn import bbox_loss_pre
 from chainercv.links.model.fpn import mask_loss_post
 from chainercv.links.model.fpn import mask_loss_pre
 from chainercv.links.model.fpn import rpn_loss
@@ -83,10 +83,10 @@ def __call__(self, imgs, bboxes, labels, masks=None):
             + [self.xp.array((i,) * len(bbox))
                for i, bbox in enumerate(bboxes)])
         rois, roi_indices = self.model.head.distribute(rois, roi_indices)
-        rois, roi_indices, head_gt_locs, head_gt_labels = bbox_head_loss_pre(
+        rois, roi_indices, head_gt_locs, head_gt_labels = bbox_loss_pre(
             rois, roi_indices, self.model.head.std, bboxes, labels)
         head_locs, head_confs = self.model.head(hs, rois, roi_indices)
-        head_loc_loss, head_conf_loss = bbox_head_loss_post(
+        head_loc_loss, head_conf_loss = bbox_loss_post(
             head_locs, head_confs,
             roi_indices, head_gt_locs, head_gt_labels, B)
 
@@ -170,11 +170,12 @@ def converter(batch, device=None):
 
 def main():
     parser = argparse.ArgumentParser()
+    parser.add_argument('--data-dir', default='auto')
     parser.add_argument(
         '--model',
         choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101',
                  'faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'),
-        default='mask_rcnn_fpn_resnet50')
+        default='faster__rcnn_fpn_resnet50')
     parser.add_argument('--batchsize', type=int, default=16)
     parser.add_argument('--iteration', type=int, default=90000)
     parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000])
@@ -221,12 +222,14 @@ def main():
 
     if mode == 'bbox':
         train = TransformDataset(
-            COCOBboxDataset(year='2017', split='train'),
+            COCOBboxDataset(
+                data_dir=args.data_dir, year='2017', split='train'),
             ('img', 'bbox', 'label'),
             Transform(model.min_size, model.max_size, model.extractor.mean))
     elif mode == 'instance_segmentation':
         train = TransformDataset(
-            COCOInstanceSegmentationDataset(split='train', return_bbox=True),
+            COCOInstanceSegmentationDataset(
+                data_dir=args.data_dir, split='train', return_bbox=True),
             ('img', 'bbox', 'label', 'mask'),
             Transform(model.min_size, model.max_size, model.extractor.mean))
 

From 1c31a000438d5d331fb847858712675365648dd4 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 15:55:43 +0900
Subject: [PATCH 088/100] fix

---
 examples/fpn/train_multi.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py
index ffa308166d..9d3d08b633 100644
--- a/examples/fpn/train_multi.py
+++ b/examples/fpn/train_multi.py
@@ -82,10 +82,10 @@ def __call__(self, imgs, bboxes, labels, masks=None):
             [roi_indices]
             + [self.xp.array((i,) * len(bbox))
                for i, bbox in enumerate(bboxes)])
-        rois, roi_indices = self.model.head.distribute(rois, roi_indices)
+        rois, roi_indices = self.model.bbox_head.distribute(rois, roi_indices)
         rois, roi_indices, head_gt_locs, head_gt_labels = bbox_loss_pre(
-            rois, roi_indices, self.model.head.std, bboxes, labels)
-        head_locs, head_confs = self.model.head(hs, rois, roi_indices)
+            rois, roi_indices, self.model.bbox_head.std, bboxes, labels)
+        head_locs, head_confs = self.model.bbox_head(hs, rois, roi_indices)
         head_loc_loss, head_conf_loss = bbox_loss_post(
             head_locs, head_confs,
             roi_indices, head_gt_locs, head_gt_labels, B)
@@ -137,7 +137,10 @@ def __init__(self, min_size, max_size, mean):
         self.mean = mean
 
     def __call__(self, in_data):
-        img, bbox, label = in_data[:3]
+        if len(in_data) == 4:
+            img, mask, label, bbox = in_data
+        else:
+            img, bbox, label = in_data
         # Flipping
         img, params = transforms.random_flip(
             img, x_random=True, return_param=True)
@@ -152,7 +155,6 @@ def __call__(self, in_data):
         bbox = bbox * scale
 
         if len(in_data) == 4:
-            mask = in_data[3]
             mask = transforms.flip(mask, x_flip=x_flip)
             mask = transforms.resize(
                 mask.astype(np.float32),

From 80401b136053264ec702e8485fb355b573b2342c Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 16:42:37 +0900
Subject: [PATCH 089/100] fix tests

---
 chainercv/links/model/fpn/__init__.py         |   1 +
 chainercv/links/model/fpn/faster_rcnn.py      |   8 +-
 .../model_tests/fpn_tests/test_faster_rcnn.py |  83 +++++++----
 .../test_mask_head.py                         |   6 +-
 .../test_mask_utils.py}                       |   4 +-
 .../mask_rcnn_tests/test_mask_rcnn.py         | 132 ------------------
 6 files changed, 66 insertions(+), 168 deletions(-)
 rename tests/links_tests/model_tests/{mask_rcnn_tests => fpn_tests}/test_mask_head.py (97%)
 rename tests/links_tests/model_tests/{mask_rcnn_tests/test_misc.py => fpn_tests/test_mask_utils.py} (92%)
 delete mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py

diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py
index aab5fb64cb..7f2f16d62e 100644
--- a/chainercv/links/model/fpn/__init__.py
+++ b/chainercv/links/model/fpn/__init__.py
@@ -7,6 +7,7 @@
 from chainercv.links.model.fpn.bbox_head import BboxHead  # NOQA
 from chainercv.links.model.fpn.bbox_head import bbox_loss_post  # NOQA
 from chainercv.links.model.fpn.bbox_head import bbox_loss_pre  # NOQA
+from chainercv.links.model.fpn.mask_head import MaskHead  # NOQA
 from chainercv.links.model.fpn.mask_head import mask_loss_post  # NOQA
 from chainercv.links.model.fpn.mask_head import mask_loss_pre  # NOQA
 from chainercv.links.model.fpn.rpn import RPN  # NOQA
diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py
index 40df122f81..68b4506233 100644
--- a/chainercv/links/model/fpn/faster_rcnn.py
+++ b/chainercv/links/model/fpn/faster_rcnn.py
@@ -152,8 +152,10 @@ def predict(self, imgs):
             hs, rpn_rois, rpn_roi_indices = self(x)
             if self._store_rpn_outputs:
                 rpn_rois_cpu = [
-                    chainer.backends.cuda.to_cpu(rpn_roi) for rpn_roi in
-                    _flat_to_list(rpn_rois, rpn_roi_indices, len(imgs))]
+                    chainer.backends.cuda.to_cpu(rpn_roi) / scale
+                    for rpn_roi, scale in
+                    zip(_flat_to_list(rpn_rois, rpn_roi_indices, len(imgs)),
+                        scales)]
                 output.update({'rois': rpn_rois_cpu})
 
         if self._run_bbox:
@@ -198,7 +200,7 @@ def predict(self, imgs):
             # Currently MaskHead only supports numpy inputs
             masks_cpu = self.mask_head.decode(segms, bboxes_cpu, labels_cpu, sizes)
             output.update({'masks': masks_cpu})
-        return (output[key] for key in self._return_values)
+        return tuple([output[key] for key in self._return_values])
 
     def prepare(self, imgs):
         """Preprocess images.
diff --git a/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn.py b/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn.py
index 1d245ac0bd..bebfa4a79b 100644
--- a/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn.py
+++ b/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn.py
@@ -7,10 +7,13 @@
 from chainer import testing
 from chainer.testing import attr
 
+from chainercv.links.model.fpn import BboxHead
 from chainercv.links.model.fpn import FasterRCNN
-from chainercv.links.model.fpn import Head
+from chainercv.links.model.fpn import MaskHead
 from chainercv.links.model.fpn import RPN
+from chainercv.utils import assert_is_bbox
 from chainercv.utils import assert_is_detection_link
+from chainercv.utils import assert_is_instance_segmentation_link
 
 
 def _random_array(xp, shape):
@@ -31,28 +34,35 @@ def __call__(self, x):
 
 class DummyFasterRCNN(FasterRCNN):
 
-    def __init__(self, n_fg_class, min_size, max_size):
+    def __init__(self, n_fg_class, return_values, min_size, max_size):
         extractor = DummyExtractor()
         super(DummyFasterRCNN, self).__init__(
             extractor=extractor,
             rpn=RPN(extractor.scales),
-            head=Head(n_fg_class + 1, extractor.scales),
+            bbox_head=BboxHead(n_fg_class + 1, extractor.scales),
+            mask_head=MaskHead(n_fg_class + 1, extractor.scales),
+            return_values=return_values,
             min_size=min_size, max_size=max_size,
         )
 
 
 @testing.parameterize(*testing.product_dict(
+    [
+        {'return_values': 'detection'},
+        {'return_values': 'instance_segmentation'},
+        {'return_values': 'rpn'}
+    ],
     [
         {'n_fg_class': 1},
         {'n_fg_class': 5},
         {'n_fg_class': 20},
     ],
     [
-        {
-            'in_sizes': [(480, 640), (320, 320)],
-            'min_size': 800, 'max_size': 1333,
-            'expected_shape': (800, 1088),
-        },
+        # {
+        #     'in_sizes': [(480, 640), (320, 320)],
+        #     'min_size': 800, 'max_size': 1333,
+        #     'expected_shape': (800, 1088),
+        # },
         {
             'in_sizes': [(200, 50), (400, 100)],
             'min_size': 200, 'max_size': 320,
@@ -63,7 +73,14 @@ def __init__(self, n_fg_class, min_size, max_size):
 class TestFasterRCNN(unittest.TestCase):
 
     def setUp(self):
+        if self.return_values == 'detection':
+            return_values = ['bboxes', 'labels', 'scores']
+        elif self.return_values == 'instance_segmentation':
+            return_values = ['masks', 'labels', 'scores']
+        elif self.return_values == 'rpn':
+            return_values = ['rois']
         self.link = DummyFasterRCNN(n_fg_class=self.n_fg_class,
+                                    return_values=return_values,
                                     min_size=self.min_size,
                                     max_size=self.max_size)
 
@@ -88,29 +105,20 @@ def test_use_preset(self):
     def _check_call(self):
         x = _random_array(self.link.xp, (2, 3, 32, 32))
         with chainer.using_config('train', False):
-            rois, roi_indices, head_locs, head_confs = self.link(x)
+            hs, rois, roi_indices = self.link(x)
 
-        self.assertEqual(len(rois), len(self.link.extractor.scales))
-        self.assertEqual(len(roi_indices), len(self.link.extractor.scales))
+        self.assertEqual(len(hs), len(self.link.extractor.scales))
         for l in range(len(self.link.extractor.scales)):
-            self.assertIsInstance(rois[l], self.link.xp.ndarray)
-            self.assertEqual(rois[l].shape[1:], (4,))
-
-            self.assertIsInstance(roi_indices[l], self.link.xp.ndarray)
-            self.assertEqual(roi_indices[l].shape[1:], ())
-
-            self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0])
+            self.assertIsInstance(hs[l], chainer.Variable)
+            self.assertIsInstance(hs[l].data, self.link.xp.ndarray)
 
-        n_roi = sum(
-            len(rois[l]) for l in range(len(self.link.extractor.scales)))
+        self.assertIsInstance(rois, self.link.xp.ndarray)
+        self.assertEqual(rois.shape[1:], (4,))
 
-        self.assertIsInstance(head_locs, chainer.Variable)
-        self.assertIsInstance(head_locs.array, self.link.xp.ndarray)
-        self.assertEqual(head_locs.shape, (n_roi, self.n_fg_class + 1, 4))
+        self.assertIsInstance(roi_indices, self.link.xp.ndarray)
+        self.assertEqual(roi_indices.shape[1:], ())
 
-        self.assertIsInstance(head_confs, chainer.Variable)
-        self.assertIsInstance(head_confs.array, self.link.xp.ndarray)
-        self.assertEqual(head_confs.shape, (n_roi, self.n_fg_class + 1))
+        self.assertEqual(rois.shape[0], roi_indices.shape[0])
 
     def test_call_cpu(self):
         self._check_call()
@@ -126,13 +134,32 @@ def test_call_train_mode(self):
             with chainer.using_config('train', True):
                 self.link(x)
 
+    def _check_predict(self):
+        if self.return_values == 'detection':
+            assert_is_detection_link(self.link, self.n_fg_class)
+        elif self.return_values == 'instance_segmentation':
+            assert_is_instance_segmentation_link(self.link, self.n_fg_class)
+        elif self.return_values == 'rpn':
+            imgs = [
+                np.random.randint(
+                    0, 256, size=(3, 480, 320)).astype(np.float32),
+                np.random.randint(
+                    0, 256, size=(3, 480, 320)).astype(np.float32)]
+            result = self.link.predict(imgs)
+            assert len(result) == 1
+            assert len(result[0]) == 1
+            for i in range(len(result[0])):
+                roi = result[0][i]
+                assert_is_bbox(roi)
+
+    @attr.slow
     def test_predict_cpu(self):
-        assert_is_detection_link(self.link, self.n_fg_class)
+        self._check_predict()
 
     @attr.gpu
     def test_predict_gpu(self):
         self.link.to_gpu()
-        assert_is_detection_link(self.link, self.n_fg_class)
+        self._check_predict()
 
     def test_prepare(self):
         imgs = [_random_array(np, (3, s[0], s[1])) for s in self.in_sizes]
diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py b/tests/links_tests/model_tests/fpn_tests/test_mask_head.py
similarity index 97%
rename from tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
rename to tests/links_tests/model_tests/fpn_tests/test_mask_head.py
index e89cf3c38d..c8e0bc927c 100644
--- a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_head.py
+++ b/tests/links_tests/model_tests/fpn_tests/test_mask_head.py
@@ -7,9 +7,9 @@
 from chainer import testing
 from chainer.testing import attr
 
-from chainercv.links.model.mask_rcnn import MaskHead
-from chainercv.links.model.mask_rcnn import mask_loss_post
-from chainercv.links.model.mask_rcnn import mask_loss_pre
+from chainercv.links.model.fpn import MaskHead
+from chainercv.links.model.fpn import mask_loss_post
+from chainercv.links.model.fpn import mask_loss_pre
 
 from chainercv.utils import mask_to_bbox
 
diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py b/tests/links_tests/model_tests/fpn_tests/test_mask_utils.py
similarity index 92%
rename from tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py
rename to tests/links_tests/model_tests/fpn_tests/test_mask_utils.py
index 6bd6722c7a..5ae85bf237 100644
--- a/tests/links_tests/model_tests/mask_rcnn_tests/test_misc.py
+++ b/tests/links_tests/model_tests/fpn_tests/test_mask_utils.py
@@ -5,8 +5,8 @@
 
 from chainer import testing
 
-from chainercv.links.model.mask_rcnn.misc import segm_to_mask
-from chainercv.links.model.mask_rcnn.misc import mask_to_segm
+from chainercv.links.model.fpn.mask_utils import segm_to_mask
+from chainercv.links.model.fpn.mask_utils import mask_to_segm
 
 
 class TestSegmToMask(unittest.TestCase):
diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py
deleted file mode 100644
index 637bab61c4..0000000000
--- a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn.py
+++ /dev/null
@@ -1,132 +0,0 @@
-from __future__ import division
-
-import numpy as np
-import unittest
-
-import chainer
-from chainer import testing
-from chainer.testing import attr
-
-from chainercv.links.model.fpn import Head
-from chainercv.links.model.fpn import RPN
-from chainercv.links.model.mask_rcnn import MaskRCNN
-from chainercv.links.model.mask_rcnn import MaskHead
-from chainercv.utils import assert_is_instance_segmentation_link
-
-
-def _random_array(xp, shape):
-    return xp.array(
-        np.random.uniform(-1, 1, size=shape), dtype=np.float32)
-
-
-class DummyExtractor(chainer.Link):
-    scales = (1 / 2, 1 / 4, 1 / 8)
-    mean = _random_array(np, (3, 1, 1))
-    n_channel = 16
-
-    def __call__(self, x):
-        n, _, h, w = x.shape
-        return [chainer.Variable(_random_array(
-                self.xp, (n, self.n_channel, int(h * scale), int(w * scale))))
-                for scale in self.scales]
-
-
-class DummyMaskRCNN(MaskRCNN):
-
-    def __init__(self, n_fg_class):
-        extractor = DummyExtractor()
-        n_class = n_fg_class + 1
-        super(DummyMaskRCNN, self).__init__(
-            extractor=extractor,
-            rpn=RPN(extractor.scales),
-            head=Head(n_class, extractor.scales),
-            mask_head=MaskHead(n_class, extractor.scales)
-        )
-
-
-@testing.parameterize(
-    {'n_fg_class': 1},
-    {'n_fg_class': 5},
-    {'n_fg_class': 20},
-)
-class TestMaskRCNN(unittest.TestCase):
-
-    def setUp(self):
-        self.link = DummyMaskRCNN(n_fg_class=self.n_fg_class)
-
-    def test_use_preset(self):
-        self.link.nms_thresh = 0
-        self.link.score_thresh = 0
-
-        self.link.use_preset('visualize')
-        self.assertEqual(self.link.nms_thresh, 0.5)
-        self.assertEqual(self.link.score_thresh, 0.7)
-
-        self.link.nms_thresh = 0
-        self.link.score_thresh = 0
-
-        self.link.use_preset('evaluate')
-        self.assertEqual(self.link.nms_thresh, 0.5)
-        self.assertEqual(self.link.score_thresh, 0.05)
-
-        with self.assertRaises(ValueError):
-            self.link.use_preset('unknown')
-
-    def _check_call(self):
-        B = 2
-        size = 32
-        x = _random_array(self.link.xp, (B, 3, size, size))
-        with chainer.using_config('train', False):
-            hs, rois, roi_indices = self.link(x)
-
-        self.assertEqual(len(hs), len(self.link.extractor.scales))
-        self.assertEqual(len(rois), len(self.link.extractor.scales))
-        self.assertEqual(len(roi_indices), len(self.link.extractor.scales))
-        for l, scale in enumerate(self.link.extractor.scales):
-            self.assertIsInstance(rois[l], self.link.xp.ndarray)
-            self.assertEqual(rois[l].shape[1:], (4,))
-
-            self.assertIsInstance(roi_indices[l], self.link.xp.ndarray)
-            self.assertEqual(roi_indices[l].shape[1:], ())
-
-            self.assertEqual(rois[l].shape[0], roi_indices[l].shape[0])
-
-            self.assertIsInstance(hs[l], chainer.Variable)
-            self.assertIsInstance(hs[l].array, self.link.xp.ndarray)
-            feat_size = int(size * scale)
-            self.assertEqual(
-                hs[l].shape,
-                (B, self.link.extractor.n_channel, feat_size, feat_size))
-
-    def test_call_cpu(self):
-        self._check_call()
-
-    @attr.gpu
-    def test_call_gpu(self):
-        self.link.to_gpu()
-        self._check_call()
-
-    def test_call_train_mode(self):
-        x = _random_array(self.link.xp, (2, 3, 32, 32))
-        with self.assertRaises(AssertionError):
-            with chainer.using_config('train', True):
-                self.link(x)
-
-    def test_predict_cpu(self):
-        assert_is_instance_segmentation_link(self.link, self.n_fg_class)
-
-    @attr.gpu
-    def test_predict_gpu(self):
-        self.link.to_gpu()
-        assert_is_instance_segmentation_link(self.link, self.n_fg_class)
-
-    def test_prepare(self):
-        imgs = [
-            np.random.randint(0, 255, size=(3, 480, 640)).astype(np.float32),
-            np.random.randint(0, 255, size=(3, 320, 320)).astype(np.float32),
-        ]
-        x, _, _ = self.link.prepare(imgs)
-        self.assertEqual(x.shape, (2, 3, 800, 1088))
-
-
-testing.run_module(__name__, __file__)

From 462726f683a2f4f4854201c4ae9fbcf7d0ccecf0 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 19:57:53 +0900
Subject: [PATCH 090/100] merge to fpn

---
 chainercv/links/__init__.py                   |   2 +
 chainercv/links/model/fpn/__init__.py         |   2 +
 chainercv/links/model/fpn/faster_rcnn.py      |  46 +++-
 .../links/model/fpn/faster_rcnn_fpn_resnet.py |  83 +++++-
 .../model/{mask_rcnn => fpn}/keypoint_head.py |   4 +-
 chainercv/links/model/fpn/keypoint_utils.py   |  52 ++++
 chainercv/links/model/fpn/mask_utils.py       |  47 ----
 chainercv/links/model/mask_rcnn/__init__.py   |  11 -
 chainercv/links/model/mask_rcnn/mask_rcnn.py  | 253 ------------------
 .../model/mask_rcnn/mask_rcnn_fpn_resnet.py   | 137 ----------
 examples/fpn/demo.py                          |  30 ++-
 .../eval_keypoint_detection.py                |  10 +-
 examples/mask_rcnn/demo.py                    |  75 ------
 13 files changed, 211 insertions(+), 541 deletions(-)
 rename chainercv/links/model/{mask_rcnn => fpn}/keypoint_head.py (98%)
 create mode 100644 chainercv/links/model/fpn/keypoint_utils.py
 delete mode 100644 chainercv/links/model/mask_rcnn/__init__.py
 delete mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn.py
 delete mode 100644 chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
 delete mode 100644 examples/mask_rcnn/demo.py

diff --git a/chainercv/links/__init__.py b/chainercv/links/__init__.py
index 72b4d32106..aa91f30b77 100644
--- a/chainercv/links/__init__.py
+++ b/chainercv/links/__init__.py
@@ -11,6 +11,8 @@
 from chainercv.links.model.faster_rcnn.faster_rcnn_vgg import FasterRCNNVGG16  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50  # NOQA
+from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import KeypointRCNNFPNResNet101  # NOQA
+from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import KeypointRCNNFPNResNet50  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50  # NOQA
 from chainercv.links.model.resnet import ResNet101  # NOQA
diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py
index 7f2f16d62e..d55ac5471c 100644
--- a/chainercv/links/model/fpn/__init__.py
+++ b/chainercv/links/model/fpn/__init__.py
@@ -1,6 +1,8 @@
 from chainercv.links.model.fpn.faster_rcnn import FasterRCNN  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50  # NOQA
+from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import KeypointRCNNFPNResNet101  # NOQA
+from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import KeypointRCNNFPNResNet50  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50  # NOQA
 from chainercv.links.model.fpn.fpn import FPN  # NOQA
diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py
index 68b4506233..56c11ba7fb 100644
--- a/chainercv/links/model/fpn/faster_rcnn.py
+++ b/chainercv/links/model/fpn/faster_rcnn.py
@@ -50,10 +50,11 @@ class FasterRCNN(chainer.Chain):
     """
 
     stride = 32
-    _accepted_return_values = ('rois', 'bboxes', 'labels', 'scores', 'masks')
+    _accepted_return_values = ('rois', 'bboxes', 'labels', 'scores',
+                               'masks', 'points', 'point_scores')
 
     def __init__(self, extractor, rpn, bbox_head,
-                 mask_head, return_values,
+                 mask_head, keypoint_head, return_values,
                  min_size=800, max_size=1333):
         for value_name in return_values:
             if value_name not in self._accepted_return_values:
@@ -64,8 +65,10 @@ def __init__(self, extractor, rpn, bbox_head,
 
         self._store_rpn_outputs = 'rois' in self._return_values
         self._run_bbox = any([key in self._return_values
-                        for key in ['bboxes', 'labels', 'scores', 'masks']])
+                        for key in ['bboxes', 'labels', 'scores',
+                                    'masks', 'points', 'point_scores']])
         self._run_mask = 'masks' in self._return_values
+        self._run_keypoint = 'points' in self._return_values
         super(FasterRCNN, self).__init__()
 
         with self.init_scope():
@@ -75,6 +78,8 @@ def __init__(self, extractor, rpn, bbox_head,
                 self.bbox_head = bbox_head
             if self._run_mask:
                 self.mask_head = mask_head
+            if self._run_keypoint:
+                self.keypoint_head = keypoint_head
 
         self.min_size = min_size
         self.max_size = max_size
@@ -174,10 +179,9 @@ def predict(self, imgs):
             scores_cpu = [cuda.to_cpu(score) for score in scores]
             output.update({'bboxes': bboxes_cpu, 'labels': labels_cpu,
                            'scores': scores_cpu})
-
-        if self._run_mask:
             rescaled_bboxes = [bbox * scale
-                               for scale, bbox in zip(scales, bboxes)]
+                                for scale, bbox in zip(scales, bboxes)]
+        if self._run_mask:
             # Change bboxes to RoI and RoI indices format
             mask_rois_before_reordering, mask_roi_indices_before_reordering =\
                 _list_to_flat(rescaled_bboxes)
@@ -200,6 +204,36 @@ def predict(self, imgs):
             # Currently MaskHead only supports numpy inputs
             masks_cpu = self.mask_head.decode(segms, bboxes_cpu, labels_cpu, sizes)
             output.update({'masks': masks_cpu})
+
+        if self._run_keypoint:
+            (point_rois_before_reordering,
+             point_roi_indices_before_reordering) = _list_to_flat(
+                 rescaled_bboxes)
+            point_rois, point_roi_indices, order =\
+                self.keypoint_head.distribute(
+                    point_rois_before_reordering,
+                    point_roi_indices_before_reordering)
+            with chainer.using_config(
+                    'train', False), chainer.no_backprop_mode():
+                point_maps = self.keypoint_head(
+                    hs, point_rois, point_roi_indices).data
+            point_maps = point_maps[order]
+            point_maps = _flat_to_list(
+                point_maps, point_roi_indices_before_reordering, len(imgs))
+            point_maps = [point_map if point_map is not None else
+                          self.xp.zeros(
+                              (0, self.keypoint_head.n_point,
+                               self.keypoint_head.point_map_size,
+                               self.keypoint_head.point_map_size),
+                              dtype=np.float32)
+                          for point_map in point_maps]
+            point_maps = [
+                chainer.backends.cuda.to_cpu(point_map)
+                for point_map in point_maps]
+            points_cpu, point_scores_cpu = self.keypoint_head.decode(
+                point_maps, bboxes_cpu)
+            output.update(
+                {'points': points_cpu, 'point_scores': point_scores_cpu})
         return tuple([output[key] for key in self._return_values])
 
     def prepare(self, imgs):
diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
index debadb10ea..f74a890495 100644
--- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
+++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
@@ -7,6 +7,7 @@
 from chainercv.links.model.fpn.faster_rcnn import FasterRCNN
 from chainercv.links.model.fpn.fpn import FPN
 from chainercv.links.model.fpn.bbox_head import BboxHead
+from chainercv.links.model.fpn.keypoint_head import KeypointHead
 from chainercv.links.model.fpn.mask_head import MaskHead
 from chainercv.links.model.fpn.rpn import RPN
 from chainercv.links.model.resnet import ResNet101
@@ -45,10 +46,12 @@ class FasterRCNNFPNResNet(FasterRCNN):
     """
 
     def __init__(self, n_fg_class=None, pretrained_model=None,
+                 n_point=None,
                  return_values=['bboxes', 'labels', 'scores'],
                  min_size=800, max_size=1333):
         param, path = utils.prepare_pretrained_model(
-            {'n_fg_class': n_fg_class}, pretrained_model, self._models)
+            {'n_fg_class': n_fg_class, 'n_point': n_point},
+            pretrained_model, self._models)
 
         base = self._base(n_class=1, arch='he')
         base.pick = ('res2', 'res3', 'res4', 'res5')
@@ -58,11 +61,16 @@ def __init__(self, n_fg_class=None, pretrained_model=None,
         extractor = FPN(
             base, len(base.pick), (1 / 4, 1 / 8, 1 / 16, 1 / 32, 1 / 64))
 
+        if param['n_point'] is not None:
+            keypoint_head = KeypointHead(param['n_point'], extractor.scales)
+        else:
+            keypoint_head = None
         super(FasterRCNNFPNResNet, self).__init__(
             extractor=extractor,
             rpn=RPN(extractor.scales),
             bbox_head=BboxHead(param['n_fg_class'] + 1, extractor.scales),
             mask_head=MaskHead(param['n_fg_class'] + 1, extractor.scales),
+            keypoint_head=keypoint_head,
             return_values=return_values,
             min_size=min_size, max_size=max_size
         )
@@ -72,7 +80,7 @@ def __init__(self, n_fg_class=None, pretrained_model=None,
                 self.extractor.base,
                 self._base(pretrained_model='imagenet', arch='he'))
         elif path:
-            chainer.serializers.load_npz(path, self)
+            chainer.serializers.load_npz(path, self, strict=False)
 
 
 class MaskRCNNFPNResNet(FasterRCNNFPNResNet):
@@ -91,7 +99,30 @@ class MaskRCNNFPNResNet(FasterRCNNFPNResNet):
     def __init__(self, n_fg_class=None, pretrained_model=None,
                  min_size=800, max_size=1333):
         super(MaskRCNNFPNResNet, self).__init__(
-            n_fg_class, pretrained_model, ['masks', 'labels', 'scores'],
+            n_fg_class, pretrained_model, None,
+            ['masks', 'labels', 'scores'],
+            min_size, max_size)
+
+
+class KeypointRCNNFPNResNet(FasterRCNNFPNResNet):
+    """Feature Pyramid Networks with ResNet-50.
+
+    This is a model of Feature Pyramid Networks [#]_.
+    This model uses :class:`~chainercv.links.ResNet50` as
+    its base feature extractor.
+
+    .. [#] Tsung-Yi Lin et al.
+       Feature Pyramid Networks for Object Detection. CVPR 2017
+
+
+    """
+
+    def __init__(self, n_fg_class=None, pretrained_model=None,
+                 n_point=None,
+                 min_size=800, max_size=1333):
+        super(KeypointRCNNFPNResNet, self).__init__(
+            n_fg_class, pretrained_model, n_point,
+            ['points', 'labels', 'scores', 'point_scores', 'bboxes'],
             min_size, max_size)
 
 
@@ -189,6 +220,52 @@ class MaskRCNNFPNResNet101(MaskRCNNFPNResNet):
     }
 
 
+class KeypointRCNNFPNResNet50(KeypointRCNNFPNResNet):
+    """Feature Pyramid Networks with ResNet-50.
+
+    This is a model of Feature Pyramid Networks [#]_.
+    This model uses :class:`~chainercv.links.ResNet50` as
+    its base feature extractor.
+
+    .. [#] Tsung-Yi Lin et al.
+       Feature Pyramid Networks for Object Detection. CVPR 2017
+
+
+    """
+
+    _base = ResNet50
+    _models = {
+        'coco': {
+            'param': {'n_fg_class': 1, 'n_point': 17},
+            'url': 'https://chainercv-models.preferred.jp/'
+            'faster_rcnn_fpn_resnet50_keypoint_coco_converted_2019_03_15.npz',
+            'cv2': True
+        },
+    }
+
+
+class KeypointRCNNFPNResNet101(KeypointRCNNFPNResNet):
+    """Feature Pyramid Networks with ResNet-50.
+
+    This is a model of Feature Pyramid Networks [#]_.
+    This model uses :class:`~chainercv.links.ResNet50` as
+    its base feature extractor.
+
+    .. [#] Tsung-Yi Lin et al.
+       Feature Pyramid Networks for Object Detection. CVPR 2017
+
+
+    """
+
+    _base = ResNet50
+    _models = {
+        'coco': {
+            'param': {'n_fg_class': 80},
+            'url': '',
+            'cv2': True
+        },
+    }
+
 
 def _copyparams(dst, src):
     if isinstance(dst, chainer.Chain):
diff --git a/chainercv/links/model/mask_rcnn/keypoint_head.py b/chainercv/links/model/fpn/keypoint_head.py
similarity index 98%
rename from chainercv/links/model/mask_rcnn/keypoint_head.py
rename to chainercv/links/model/fpn/keypoint_head.py
index f53a44a102..c0dd00679d 100644
--- a/chainercv/links/model/mask_rcnn/keypoint_head.py
+++ b/chainercv/links/model/fpn/keypoint_head.py
@@ -15,8 +15,8 @@
 from chainercv.transforms.image.resize import resize
 from chainercv.utils.bbox.bbox_iou import bbox_iou
 
-from chainercv.links.model.mask_rcnn.misc import point_to_roi_points
-from chainercv.links.model.mask_rcnn.misc import within_bbox
+from chainercv.links.model.fpn.keypoint_utils import point_to_roi_points
+from chainercv.links.model.fpn.keypoint_utils import within_bbox
 
 
 # make a bilinear interpolation kernel
diff --git a/chainercv/links/model/fpn/keypoint_utils.py b/chainercv/links/model/fpn/keypoint_utils.py
new file mode 100644
index 0000000000..adc5070528
--- /dev/null
+++ b/chainercv/links/model/fpn/keypoint_utils.py
@@ -0,0 +1,52 @@
+from __future__ import division
+
+import numpy as np
+
+import chainer
+
+
+def point_to_roi_points(
+        point, visible, bbox, point_map_size):
+    xp = chainer.backends.cuda.get_array_module(point)
+
+    R, K, _ = point.shape
+
+    roi_point = xp.zeros((len(bbox), K, 2))
+    roi_visible = xp.zeros((len(bbox), K), dtype=np.bool)
+
+    offset_y = bbox[:, 0]
+    offset_x = bbox[:, 1]
+    scale_y = point_map_size / (bbox[:, 2] - bbox[:, 0])
+    scale_x = point_map_size / (bbox[:, 3] - bbox[:, 1])
+
+    for k in range(K):
+        y_boundary_index = xp.where(point[:, k, 0] == bbox[:, 2])[0]
+        x_boundary_index = xp.where(point[:, k, 1] == bbox[:, 3])[0]
+
+        ys = (point[:, k, 0] - offset_y) * scale_y
+        ys = xp.floor(ys)
+        if len(y_boundary_index) > 0:
+            ys[y_boundary_index] = point_map_size - 1
+        xs = (point[:, k, 1] - offset_x) * scale_x
+        xs = xp.floor(xs)
+        if len(x_boundary_index) > 0:
+            xs[x_boundary_index] = point_map_size - 1
+
+        valid = xp.logical_and(
+            xp.logical_and(
+                xp.logical_and(ys >= 0, xs >= 0),
+                xp.logical_and(ys < point_map_size, xs < point_map_size)),
+            visible[:, k])
+
+        roi_point[:, k, 0] = ys
+        roi_point[:, k, 1] = xs
+        roi_visible[:, k] = valid
+    return roi_point, roi_visible
+
+
+def within_bbox(point, bbox):
+    y_within = (point[:, :, 0] >= bbox[:, 0][:, None]) & (
+        point[:, :, 0] <= bbox[:, 2][:, None])
+    x_within = (point[:, :, 1] >= bbox[:, 1][:, None]) & (
+        point[:, :, 1] <= bbox[:, 3][:, None])
+    return y_within & x_within
diff --git a/chainercv/links/model/fpn/mask_utils.py b/chainercv/links/model/fpn/mask_utils.py
index c8cba87076..5c28e20232 100644
--- a/chainercv/links/model/fpn/mask_utils.py
+++ b/chainercv/links/model/fpn/mask_utils.py
@@ -155,50 +155,3 @@ def _expand_boxes(bbox, scale):
     expanded_bbox[:, 3] = x_c + w_half
 
     return expanded_bbox
-
-
-def point_to_roi_points(
-        point, visible, bbox, point_map_size):
-    xp = chainer.backends.cuda.get_array_module(point)
-
-    R, K, _ = point.shape
-
-    roi_point = xp.zeros((len(bbox), K, 2))
-    roi_visible = xp.zeros((len(bbox), K), dtype=np.bool)
-
-    offset_y = bbox[:, 0]
-    offset_x = bbox[:, 1]
-    scale_y = point_map_size / (bbox[:, 2] - bbox[:, 0])
-    scale_x = point_map_size / (bbox[:, 3] - bbox[:, 1])
-
-    for k in range(K):
-        y_boundary_index = xp.where(point[:, k, 0] == bbox[:, 2])[0]
-        x_boundary_index = xp.where(point[:, k, 1] == bbox[:, 3])[0]
-
-        ys = (point[:, k, 0] - offset_y) * scale_y
-        ys = xp.floor(ys)
-        if len(y_boundary_index) > 0:
-            ys[y_boundary_index] = point_map_size - 1
-        xs = (point[:, k, 1] - offset_x) * scale_x
-        xs = xp.floor(xs)
-        if len(x_boundary_index) > 0:
-            xs[x_boundary_index] = point_map_size - 1
-
-        valid = xp.logical_and(
-            xp.logical_and(
-                xp.logical_and(ys >= 0, xs >= 0),
-                xp.logical_and(ys < point_map_size, xs < point_map_size)),
-            visible[:, k])
-
-        roi_point[:, k, 0] = ys
-        roi_point[:, k, 1] = xs
-        roi_visible[:, k] = valid
-    return roi_point, roi_visible
-
-
-def within_bbox(point, bbox):
-    y_within = (point[:, :, 0] >= bbox[:, 0][:, None]) & (
-        point[:, :, 0] <= bbox[:, 2][:, None])
-    x_within = (point[:, :, 1] >= bbox[:, 1][:, None]) & (
-        point[:, :, 1] <= bbox[:, 3][:, None])
-    return y_within & x_within
diff --git a/chainercv/links/model/mask_rcnn/__init__.py b/chainercv/links/model/mask_rcnn/__init__.py
deleted file mode 100644
index 3391efe1f9..0000000000
--- a/chainercv/links/model/mask_rcnn/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from chainercv.links.model.mask_rcnn.keypoint_head import KeypointHead  # NOQA
-from chainercv.links.model.mask_rcnn.keypoint_head import keypoint_loss_post  # NOQA
-from chainercv.links.model.mask_rcnn.keypoint_head import keypoint_loss_pre  # NOQA
-from chainercv.links.model.mask_rcnn.mask_head import mask_loss_post  # NOQA
-from chainercv.links.model.mask_rcnn.mask_head import mask_loss_pre  # NOQA
-from chainercv.links.model.mask_rcnn.mask_head import MaskHead  # NOQA
-from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN  # NOQA
-from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet101  # NOQA
-from chainercv.links.model.mask_rcnn.mask_rcnn_fpn_resnet import MaskRCNNFPNResNet50  # NOQA
-from chainercv.links.model.mask_rcnn.misc import mask_to_segm  # NOQA
-from chainercv.links.model.mask_rcnn.misc import segm_to_mask  # NOQA
diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn.py b/chainercv/links/model/mask_rcnn/mask_rcnn.py
deleted file mode 100644
index 8bb88f9789..0000000000
--- a/chainercv/links/model/mask_rcnn/mask_rcnn.py
+++ /dev/null
@@ -1,253 +0,0 @@
-from __future__ import division
-
-import numpy as np
-
-import chainer
-from chainer.backends import cuda
-import chainer.functions as F
-
-from chainercv.links.model.mask_rcnn.misc import scale_img
-
-
-class MaskRCNN(chainer.Chain):
-
-    """Base class of Mask R-CNN.
-
-    This is a base class of Mask R-CNN [#]_.
-
-    .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017
-
-    Args:
-        extractor (Link): A link that extracts feature maps.
-            This link must have :obj:`scales`, :obj:`mean` and
-            :meth:`__call__`.
-        rpn (Link): A link that has the same interface as
-            :class:`~chainercv.links.model.fpn.RPN`.
-            Please refer to the documentation found there.
-        head (Link): A link that has the same interface as
-            :class:`~chainercv.links.model.fpn.Head`.
-            Please refer to the documentation found there.
-        mask_head (Link): A link that has the same interface as
-            :class:`~chainercv.links.model.mask_rcnn.MaskRCNN`.
-            Please refer to the documentation found there.
-
-    Parameters:
-        nms_thresh (float): The threshold value
-            for :func:`~chainercv.utils.non_maximum_suppression`.
-            The default value is :obj:`0.5`.
-            This value can be changed directly or by using :meth:`use_preset`.
-        score_thresh (float): The threshold value for confidence score.
-            If a bounding box whose confidence score is lower than this value,
-            the bounding box will be suppressed.
-            The default value is :obj:`0.7`.
-            This value can be changed directly or by using :meth:`use_preset`.
-
-    """
-
-    min_size = 800
-    max_size = 1333
-    stride = 32
-
-    def __init__(self, extractor, rpn, head, mask_head,
-                 keypoint_head, mode='mask'):
-        super(MaskRCNN, self).__init__()
-        with self.init_scope():
-            self.extractor = extractor
-            self.rpn = rpn
-            self.head = head
-            if mode == 'mask':
-                self.mask_head = mask_head
-            elif mode =='keypoint':
-                self.keypoint_head = keypoint_head
-        self.mode = mode
-
-        self.use_preset('visualize')
-
-    def use_preset(self, preset):
-        """Use the given preset during prediction.
-
-        This method changes values of :obj:`nms_thresh` and
-        :obj:`score_thresh`. These values are a threshold value
-        used for non maximum suppression and a threshold value
-        to discard low confidence proposals in :meth:`predict`,
-        respectively.
-
-        If the attributes need to be changed to something
-        other than the values provided in the presets, please modify
-        them by directly accessing the public attributes.
-
-        Args:
-            preset ({'visualize', 'evaluate'}): A string to determine the
-                preset to use.
-        """
-
-        if preset == 'visualize':
-            self.nms_thresh = 0.5
-            self.score_thresh = 0.7
-        elif preset == 'evaluate':
-            self.nms_thresh = 0.5
-            self.score_thresh = 0.05
-        else:
-            raise ValueError('preset must be visualize or evaluate')
-
-    def __call__(self, x):
-        assert(not chainer.config.train)
-        hs = self.extractor(x)
-        rpn_locs, rpn_confs = self.rpn(hs)
-        anchors = self.rpn.anchors(h.shape[2:] for h in hs)
-        rois, roi_indices = self.rpn.decode(
-            rpn_locs, rpn_confs, anchors, x.shape)
-        rois, roi_indices = self.head.distribute(rois, roi_indices)
-        return hs, rois, roi_indices
-
-    def predict(self, imgs):
-        """Segment object instances from images.
-
-        This method predicts instance-aware object regions for each image.
-
-        Args:
-            imgs (iterable of numpy.ndarray): Arrays holding images of shape
-                :math:`(B, C, H, W)`.  All images are in CHW and RGB format
-                and the range of their value is :math:`[0, 255]`.
-
-        Returns:
-           tuple of lists:
-           This method returns a tuple of three lists,
-           :obj:`(masks, labels, scores)`.
-
-           * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \
-               where :math:`R` is the number of masks in a image. \
-               Each pixel holds value if it is inside the object inside or not.
-           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
-               Each value indicates the class of the masks. \
-               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
-               number of the foreground classes.
-           * **scores** : A list of float arrays of shape :math:`(R,)`. \
-               Each value indicates how confident the prediction is.
-
-        """
-
-        sizes = [img.shape[1:] for img in imgs]
-        x, scales = self.prepare(imgs)
-
-        with chainer.using_config('train', False), chainer.no_backprop_mode():
-            hs, rois, roi_indices = self(x)
-            head_locs, head_confs = self.head(hs, rois, roi_indices)
-        bboxes, labels, scores = self.head.decode(
-            rois, roi_indices, head_locs, head_confs,
-            scales, sizes, self.nms_thresh, self.score_thresh)
-
-        rescaled_bboxes = [bbox * scale for scale, bbox in zip(scales, bboxes)]
-        if self.mode == 'mask':
-            # Change bboxes to RoI and RoI indices format
-            mask_rois_before_reordering, mask_roi_indices_before_reordering =\
-                _list_to_flat(rescaled_bboxes)
-            mask_rois, mask_roi_indices, order = self.mask_head.distribute(
-                mask_rois_before_reordering, mask_roi_indices_before_reordering)
-            with chainer.using_config('train', False), chainer.no_backprop_mode():
-                segms = F.sigmoid(
-                    self.mask_head(hs, mask_rois, mask_roi_indices)).data
-            # Put the order of proposals back to the one used by bbox head.
-            segms = segms[order]
-            segms = _flat_to_list(
-                segms, mask_roi_indices_before_reordering, len(imgs))
-            segms = [segm if segm is not None else
-                    self.xp.zeros(
-                        (0, self.mask_head.segm_size, self.mask_head.segm_size),
-                        dtype=np.float32)
-                    for segm in segms]
-
-            segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
-            bboxes = [chainer.backends.cuda.to_cpu(bbox / scale)
-                    for bbox, scale in zip(rescaled_bboxes, scales)]
-            labels = [chainer.backends.cuda.to_cpu(label) for label in labels]
-            # Currently MaskHead only supports numpy inputs
-            masks = self.mask_head.decode(segms, bboxes, labels, sizes)
-            scores = [cuda.to_cpu(score) for score in scores]
-            return masks, labels, scores
-        elif self.mode == 'keypoint':
-            (point_rois_before_reordering,
-             point_roi_indices_before_reordering) = _list_to_flat(
-                 rescaled_bboxes)
-            point_rois, point_roi_indices, order =\
-                self.keypoint_head.distribute(
-                    point_rois_before_reordering,
-                    point_roi_indices_before_reordering)
-            with chainer.using_config('train', False), chainer.no_backprop_mode():
-                point_maps = self.keypoint_head(
-                    hs, point_rois, point_roi_indices).data
-            point_maps = point_maps[order]
-            point_maps = _flat_to_list(
-                point_maps, point_roi_indices_before_reordering, len(imgs))
-            point_maps = [point_map if point_map is not None else
-                          self.xp.zeros(
-                              (0, self.keypoint_head.n_point,
-                               self.keypoint_head.point_map_size,
-                               self.keypoint_head.point_map_size),
-                              dtype=np.float32)
-                          for point_map in point_maps]
-            point_maps = [
-                chainer.backends.cuda.to_cpu(point_map)
-                for point_map in point_maps]
-            bboxes = [chainer.cuda.to_cpu(bbox / scale)
-                      for bbox, scale in zip(rescaled_bboxes, scales)]
-            points, point_scores = self.keypoint_head.decode(
-                point_maps, bboxes)
-            labels = [cuda.to_cpu(label) for label in labels]
-            scores = [cuda.to_cpu(score) for score in scores]
-            return points, labels, scores, point_scores, bboxes
-
-    def prepare(self, imgs):
-        """Preprocess images.
-
-        Args:
-            imgs (iterable of numpy.ndarray): Arrays holding images.
-                All images are in CHW and RGB format
-                and the range of their value is :math:`[0, 255]`.
-
-        Returns:
-            Two arrays: preprocessed images and \
-            scales that were caluclated in prepocessing.
-
-        """
-        scales = []
-        resized_imgs = []
-        for img in imgs:
-            img, scale = scale_img(
-                img, self.min_size, self.max_size)
-            img -= self.extractor.mean
-            scales.append(scale)
-            resized_imgs.append(img)
-        pad_size = np.array(
-            [im.shape[1:] for im in resized_imgs]).max(axis=0)
-        pad_size = (
-            np.ceil(pad_size / self.stride) * self.stride).astype(int)
-        x = np.zeros(
-            (len(imgs), 3, pad_size[0], pad_size[1]), dtype=np.float32)
-        for i, im in enumerate(resized_imgs):
-            _, H, W = im.shape
-            x[i, :, :H, :W] = im
-        x = self.xp.array(x)
-
-        return x, scales
-
-
-def _list_to_flat(array_list):
-    xp = chainer.backends.cuda.get_array_module(array_list[0])
-
-    indices = xp.concatenate(
-        [i * xp.ones((len(array),), dtype=np.int32) for
-         i, array in enumerate(array_list)], axis=0)
-    flat = xp.concatenate(array_list, axis=0)
-    return flat, indices
-
-
-def _flat_to_list(flat, indices, B):
-    array_list = []
-    for i in range(B):
-        array = flat[indices == i]
-        if len(array) > 0:
-            array_list.append(array)
-        else:
-            array_list.append(None)
-    return array_list
diff --git a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py b/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
deleted file mode 100644
index 3048ce80cf..0000000000
--- a/chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
+++ /dev/null
@@ -1,137 +0,0 @@
-from __future__ import division
-
-import chainer
-import chainer.functions as F
-
-from chainercv.links.model.fpn import FPN
-from chainercv.links.model.fpn import Head
-from chainercv.links.model.fpn import RPN
-from chainercv.links.model.mask_rcnn.keypoint_head import KeypointHead
-from chainercv.links.model.mask_rcnn.mask_head import MaskHead
-from chainercv.links.model.mask_rcnn.mask_rcnn import MaskRCNN
-from chainercv.links.model.resnet import ResNet101
-from chainercv.links.model.resnet import ResNet50
-from chainercv import utils
-
-from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import _copyparams
-
-
-class MaskRCNNFPNResNet(MaskRCNN):
-
-    """Base class for Mask R-CNN with ResNet backbone.
-
-    A subclass of this class should have :obj:`_base` and :obj:`_models`.
-    """
-
-    def __init__(self, n_fg_class=None, pretrained_model=None,
-                 n_point=17, mode='mask'):
-        param, path = utils.prepare_pretrained_model(
-            {'n_fg_class': n_fg_class}, pretrained_model, self._models)
-
-        base = self._base(n_class=1, arch='he')
-        base.pick = ('res2', 'res3', 'res4', 'res5')
-        base.pool1 = lambda x: F.max_pooling_2d(
-            x, 3, stride=2, pad=1, cover_all=False)
-        base.remove_unused()
-        extractor = FPN(
-            base, len(base.pick), (1 / 4, 1 / 8, 1 / 16, 1 / 32, 1 / 64))
-
-        n_class = param['n_fg_class'] + 1
-        super(MaskRCNNFPNResNet, self).__init__(
-            extractor=extractor,
-            rpn=RPN(extractor.scales),
-            head=Head(n_class, extractor.scales),
-            mask_head=MaskHead(n_class, extractor.scales),
-            keypoint_head=KeypointHead(n_point, extractor.scales),
-            mode=mode,
-        )
-        if path == 'imagenet':
-            _copyparams(
-                self.extractor.base,
-                self._base(pretrained_model='imagenet', arch='he'))
-        elif path:
-            chainer.serializers.load_npz(path, self)
-
-
-class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
-
-    """Mask R-CNN with ResNet-50.
-
-    This is a model of Mask R-CNN [#]_.
-    This model uses :class:`~chainercv.links.ResNet50` as
-    its base feature extractor.
-
-    .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017
-
-    Args:
-       n_fg_class (int): The number of classes excluding the background.
-       pretrained_model (string): The weight file to be loaded.
-           This can take :obj:`'coco'`, `filepath` or :obj:`None`.
-           The default value is :obj:`None`.
-
-            * :obj:`'coco'`: Load weights trained on train split of \
-                MS COCO 2017. \
-                The weight file is downloaded and cached automatically. \
-                :obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
-            * :obj:`'imagenet'`: Load weights of ResNet-50 trained on \
-                ImageNet. \
-                The weight file is downloaded and cached automatically. \
-                This option initializes weights partially and the rests are \
-                initialized randomly. In this case, :obj:`n_fg_class` \
-                can be set to any number.
-            * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
-                must be specified properly.
-            * :obj:`None`: Do not load weights.
-
-    """
-
-    _base = ResNet50
-    _models = {
-        'coco': {
-            'param': {'n_fg_class': 80},
-            'url': None,
-            'cv2': True
-        },
-    }
-
-
-class MaskRCNNFPNResNet101(MaskRCNNFPNResNet):
-
-    """Mask R-CNN with ResNet-101.
-
-    This is a model of Mask R-CNN [#]_.
-    This model uses :class:`~chainercv.links.ResNet101` as
-    its base feature extractor.
-
-    .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017
-
-    Args:
-       n_fg_class (int): The number of classes excluding the background.
-       pretrained_model (string): The weight file to be loaded.
-           This can take :obj:`'coco'`, `filepath` or :obj:`None`.
-           The default value is :obj:`None`.
-
-            * :obj:`'coco'`: Load weights trained on train split of \
-                MS COCO 2017. \
-                The weight file is downloaded and cached automatically. \
-                :obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
-            * :obj:`'imagenet'`: Load weights of ResNet-101 trained on \
-                ImageNet. \
-                The weight file is downloaded and cached automatically. \
-                This option initializes weights partially and the rests are \
-                initialized randomly. In this case, :obj:`n_fg_class` \
-                can be set to any number.
-            * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
-                must be specified properly.
-            * :obj:`None`: Do not load weights.
-
-    """
-
-    _base = ResNet101
-    _models = {
-        'coco': {
-            'param': {'n_fg_class': 80},
-            'url': None,
-            'cv2': True
-        },
-    }
diff --git a/examples/fpn/demo.py b/examples/fpn/demo.py
index 0d615cacfb..b11a844eb6 100644
--- a/examples/fpn/demo.py
+++ b/examples/fpn/demo.py
@@ -5,13 +5,17 @@
 
 from chainercv.datasets import coco_bbox_label_names
 from chainercv.datasets import coco_instance_segmentation_label_names
+from chainercv.datasets import coco_keypoint_names
 from chainercv.links import FasterRCNNFPNResNet101
 from chainercv.links import FasterRCNNFPNResNet50
+from chainercv.links import KeypointRCNNFPNResNet101
+from chainercv.links import KeypointRCNNFPNResNet50
 from chainercv.links import MaskRCNNFPNResNet101
 from chainercv.links import MaskRCNNFPNResNet50
 from chainercv import utils
 from chainercv.visualizations import vis_bbox
 from chainercv.visualizations import vis_instance_segmentation
+from chainercv.visualizations import vis_keypoint_coco
 
 
 def main():
@@ -19,7 +23,8 @@ def main():
     parser.add_argument(
         '--model',
         choices=('faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101',
-                 'mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
+                 'mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101',
+                 'keypoint_rcnn_fpn_resnet50', 'keypoint_rcnn_fpn_resnet101'),
         default='faster_rcnn_fpn_resnet50')
     parser.add_argument('--gpu', type=int, default=-1)
     parser.add_argument('--pretrained-model', default='coco')
@@ -46,6 +51,18 @@ def main():
         model = MaskRCNNFPNResNet101(
             n_fg_class=len(coco_instance_segmentation_label_names),
             pretrained_model=args.pretrained_model)
+    elif args.model == 'keypoint_rcnn_fpn_resnet50':
+        mode = 'keypoint'
+        model = KeypointRCNNFPNResNet50(
+            n_fg_class=1,
+            pretrained_model=args.pretrained_model,
+            n_point=len(coco_keypoint_names[0]))
+    elif args.model == 'keypoint_rcnn_fpn_resnet101':
+        mode = 'keypoint'
+        model = KeypointRCNNFPNResNet101(
+            n_fg_class=1,
+            pretrained_model=args.pretrained_model,
+            n_point=len(coco_keypoint_names[0]))
 
     if args.gpu >= 0:
         chainer.cuda.get_device_from_id(args.gpu).use()
@@ -69,6 +86,17 @@ def main():
         vis_instance_segmentation(
             img, mask, label, score,
             label_names=coco_instance_segmentation_label_names)
+    elif mode == 'keypoint':
+        points, labels, scores, point_scores, bboxes = model.predict([img])
+        point = points[0]
+        label = labels[0]
+        score = scores[0]
+        point_score = point_scores[0]
+        bbox = bboxes[0]
+        ax = vis_keypoint_coco(
+            img, point, None, point_score)
+        vis_bbox(None, bbox, label, score=score,
+                 label_names=coco_bbox_label_names, ax=ax)
     plt.show()
 
 
diff --git a/examples/keypoint_detection/eval_keypoint_detection.py b/examples/keypoint_detection/eval_keypoint_detection.py
index 94954c5cd7..377e14f385 100644
--- a/examples/keypoint_detection/eval_keypoint_detection.py
+++ b/examples/keypoint_detection/eval_keypoint_detection.py
@@ -5,17 +5,15 @@
 
 from chainercv.datasets import COCOKeypointDataset
 from chainercv.evaluations import eval_keypoint_detection_coco
-from chainercv.links import MaskRCNNFPNResNet101
-from chainercv.links import MaskRCNNFPNResNet50
+from chainercv.links import KeypointRCNNFPNResNet101
+from chainercv.links import KeypointRCNNFPNResNet50
 from chainercv.utils import apply_to_iterator
 from chainercv.utils import ProgressHook
 
 models = {
     # model: (class, dataset -> pretrained_model, default batchsize)
-    'mask_rcnn_fpn_resnet50': (MaskRCNNFPNResNet50,
-                               {}, 1),
-    'mask_rcnn_fpn_resnet101': (MaskRCNNFPNResNet101,
-                                {}, 1),
+    'keypoint_rcnn_fpn_resnet50': (KeypointRCNNFPNResNet50, {}, 1),
+    'keypoint_rcnn_fpn_resnet101': (KeypointRCNNFPNResNet101, {}, 1),
 }
 
 
diff --git a/examples/mask_rcnn/demo.py b/examples/mask_rcnn/demo.py
deleted file mode 100644
index 81659c862b..0000000000
--- a/examples/mask_rcnn/demo.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import argparse
-import matplotlib.pyplot as plt
-
-import chainer
-
-import chainercv
-from chainercv.datasets import coco_instance_segmentation_label_names
-from chainercv import utils
-
-from chainercv.links import MaskRCNNFPNResNet101
-from chainercv.links import MaskRCNNFPNResNet50
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--model',
-        choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
-        default='mask_rcnn_fpn_resnet50'
-    )
-    parser.add_argument('--gpu', type=int, default=-1)
-    parser.add_argument('--pretrained-model', default='coco')
-    parser.add_argument(
-        '--mode',
-        choices=('mask', 'keypoint'),
-        default='mask')
-    parser.add_argument('image')
-    args = parser.parse_args()
-
-    if args.mode == 'mask':
-        n_fg_class = len(coco_instance_segmentation_label_names)
-    elif args.mode == 'keypoint':
-        n_fg_class = 1
-    if args.model == 'mask_rcnn_fpn_resnet50':
-        model = MaskRCNNFPNResNet50(
-            n_fg_class=n_fg_class,
-            pretrained_model=args.pretrained_model,
-            mode=args.mode
-        )
-    elif args.model == 'mask_rcnn_fpn_resnet101':
-        model = MaskRCNNFPNResNet101(
-            n_fg_class=n_fg_class,
-            pretrained_model=args.pretrained_model,
-            mode=args.mode
-        )
-
-    if args.gpu >= 0:
-        chainer.cuda.get_device_from_id(args.gpu).use()
-        model.to_gpu()
-
-    img = utils.read_image(args.image)
-    if args.mode == 'mask':
-        masks, labels, scores = model.predict([img])
-        mask = masks[0]
-        label = labels[0]
-        score = scores[0]
-        chainercv.visualizations.vis_instance_segmentation(
-            img, mask, label, score,
-            label_names=coco_instance_segmentation_label_names)
-        plt.show()
-    elif args.mode == 'keypoint':
-        points, labels, scores, point_scores, bboxes = model.predict([img])
-        point = points[0]
-        label = labels[0]
-        score = scores[0]
-        point_score = point_scores[0]
-        bbox = bboxes[0]
-        ax = chainercv.visualizations.vis_keypoint_coco(
-            img, point, None, point_score)
-        chainercv.visualizations.vis_bbox(None, bbox, score=score, ax=ax)
-        plt.show()
-
-
-if __name__ == '__main__':
-    main()

From 8694df5f32a17bb4f0ddbcf4fa7ff40bf360e0c2 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 20:12:41 +0900
Subject: [PATCH 091/100] fix

---
 examples/keypoint_detection/eval_keypoint_detection.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/keypoint_detection/eval_keypoint_detection.py b/examples/keypoint_detection/eval_keypoint_detection.py
index 377e14f385..a5a7ca68d1 100644
--- a/examples/keypoint_detection/eval_keypoint_detection.py
+++ b/examples/keypoint_detection/eval_keypoint_detection.py
@@ -36,7 +36,6 @@ def setup(dataset, model_name, pretrained_model, batchsize):
             n_fg_class=n_fg_class,
             pretrained_model=pretrained_model,
             n_point=n_point,
-            mode='keypoint'
         )
         model.use_preset('evaluate')
 

From b3d3b4e3dc360e3090eeb8c1eff47595a895bff1 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 20:24:55 +0900
Subject: [PATCH 092/100] fix

---
 chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
index f74a890495..778c2e49d1 100644
--- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
+++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
@@ -51,7 +51,7 @@ def __init__(self, n_fg_class=None, pretrained_model=None,
                  min_size=800, max_size=1333):
         param, path = utils.prepare_pretrained_model(
             {'n_fg_class': n_fg_class, 'n_point': n_point},
-            pretrained_model, self._models)
+            pretrained_model, self._models, {'n_point': None})
 
         base = self._base(n_class=1, arch='he')
         base.pick = ('res2', 'res3', 'res4', 'res5')

From 3abe75dde51104f2ac730719fe8e7d47a03fc5af Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 20:25:30 +0900
Subject: [PATCH 093/100] fix train script

---
 examples/fpn/train_multi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py
index 9d3d08b633..4386acf084 100644
--- a/examples/fpn/train_multi.py
+++ b/examples/fpn/train_multi.py
@@ -177,7 +177,7 @@ def main():
         '--model',
         choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101',
                  'faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'),
-        default='faster__rcnn_fpn_resnet50')
+        default='faster_rcnn_fpn_resnet50')
     parser.add_argument('--batchsize', type=int, default=16)
     parser.add_argument('--iteration', type=int, default=90000)
     parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000])

From 3ce01cb9279912c0fed441118c2f45f8af6fd00a Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 20:36:54 +0900
Subject: [PATCH 094/100] add test

---
 .../fpn_tests/test_faster_rcnn_fpn_resnet.py          | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn_fpn_resnet.py b/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn_fpn_resnet.py
index cf5537ed3e..3ac43292fc 100644
--- a/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn_fpn_resnet.py
+++ b/tests/links_tests/model_tests/fpn_tests/test_faster_rcnn_fpn_resnet.py
@@ -6,17 +6,21 @@
 
 from chainercv.links import FasterRCNNFPNResNet101
 from chainercv.links import FasterRCNNFPNResNet50
+from chainercv.links import MaskRCNNFPNResNet101
+from chainercv.links import MaskRCNNFPNResNet50
 from chainercv.utils.testing import attr
 
 
 @testing.parameterize(*testing.product({
-    'model': [FasterRCNNFPNResNet50, FasterRCNNFPNResNet101],
+    'model': [FasterRCNNFPNResNet50, FasterRCNNFPNResNet101,
+              MaskRCNNFPNResNet50, MaskRCNNFPNResNet101],
     'n_fg_class': [1, 5, 20],
 }))
 class TestFasterRCNNFPNResNet(unittest.TestCase):
 
     def setUp(self):
-        self.link = self.model(n_fg_class=self.n_fg_class)
+        self.link = self.model(
+            n_fg_class=self.n_fg_class, min_size=66)
 
     def _check_call(self):
         imgs = [
@@ -40,7 +44,8 @@ def test_call_gpu(self):
 
 
 @testing.parameterize(*testing.product({
-    'model': [FasterRCNNFPNResNet50, FasterRCNNFPNResNet101],
+    'model': [FasterRCNNFPNResNet50, FasterRCNNFPNResNet101,
+              MaskRCNNFPNResNet50, MaskRCNNFPNResNet101],
     'n_fg_class': [None, 10, 80],
     'pretrained_model': ['coco', 'imagenet'],
 }))

From c8a15262bca689a24bf13dff4168b7165201b18e Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 20:45:09 +0900
Subject: [PATCH 095/100] doc

---
 README.md                                 |  2 +-
 chainercv/links/model/fpn/__init__.py     |  2 +
 docs/source/reference/links.rst           | 10 ++---
 docs/source/reference/links/fpn.rst       | 49 ++++++++++++++++++---
 docs/source/reference/links/mask_rcnn.rst | 52 -----------------------
 5 files changed, 49 insertions(+), 66 deletions(-)
 delete mode 100644 docs/source/reference/links/mask_rcnn.rst

diff --git a/README.md b/README.md
index 23115abb27..fd5b9acc9b 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Supported tasks:
 + Image Classification ([ResNet](examples/resnet), [SENet](examples/senet), [VGG](examples/vgg))
 + Object Detection ([tutorial](http://chainercv.readthedocs.io/en/latest/tutorial/detection.html), [Faster R-CNN](examples/faster_rcnn), [FPN](examples/fpn), [SSD](examples/ssd), [YOLO](examples/yolo))
 + Semantic Segmentation ([SegNet](examples/segnet), [PSPNet](examples/pspnet))
-+ Instance Segmentation ([FCIS](examples/fcis),)
++ Instance Segmentation ([FCIS](examples/fcis), [Mask R-CNN](examples/fpn))
 
 # Guiding Principles
 ChainerCV is developed under the following three guiding principles.
diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py
index 7f2f16d62e..e4ebd5aba0 100644
--- a/chainercv/links/model/fpn/__init__.py
+++ b/chainercv/links/model/fpn/__init__.py
@@ -10,5 +10,7 @@
 from chainercv.links.model.fpn.mask_head import MaskHead  # NOQA
 from chainercv.links.model.fpn.mask_head import mask_loss_post  # NOQA
 from chainercv.links.model.fpn.mask_head import mask_loss_pre  # NOQA
+from chainercv.links.model.fpn.mask_utils import mask_to_segm  # NOQA
+from chainercv.links.model.fpn.mask_utils import segm_to_mask  # NOQA
 from chainercv.links.model.fpn.rpn import RPN  # NOQA
 from chainercv.links.model.fpn.rpn import rpn_loss  # NOQA
diff --git a/docs/source/reference/links.rst b/docs/source/reference/links.rst
index 15001a98b3..7b4c9709b1 100644
--- a/docs/source/reference/links.rst
+++ b/docs/source/reference/links.rst
@@ -33,7 +33,6 @@ For more details, please read :func:`FasterRCNN.predict`.
 .. toctree::
 
    links/faster_rcnn
-   links/fpn
    links/ssd
    links/yolo
 
@@ -52,15 +51,12 @@ For more details, please read :func:`SegNetBasic.predict`.
    links/deeplab
 
 
-Instance Segmentation
-~~~~~~~~~~~~~~~~~~~~~
-
-Instance segmentation links share a common method :meth:`predict` to detect masks that cover objects in an image.
-For more details, please read :func:`MaskRCNN.predict`.
+Links for Multiple Tasks
+~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. toctree::
 
-   links/mask_rcnn
+   links/fpn
 
 
 Classifiers
diff --git a/docs/source/reference/links/fpn.rst b/docs/source/reference/links/fpn.rst
index 5d267ff026..bd26896c27 100644
--- a/docs/source/reference/links/fpn.rst
+++ b/docs/source/reference/links/fpn.rst
@@ -18,6 +18,20 @@ FasterRCNNFPNResnet101
    :members:
 
 
+Instance Segmentation Links
+---------------------------
+
+MaskRCNNFPNResNet50
+~~~~~~~~~~~~~~~~~~~
+.. autoclass:: MaskRCNNFPNResNet50
+   :members:
+
+MaskRCNNFPNResNet101
+~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: MaskRCNNFPNResNet101
+   :members:
+
+
 Utility
 -------
 
@@ -43,17 +57,40 @@ RPN
    :members:
    :special-members:  __call__
 
+MaskHead
+~~~~~~~~
+.. autoclass:: MaskHead
+   :members:
+   :special-members: __call__
+
+segm_to_mask
+~~~~~~~~~~~~
+.. autofunction:: segm_to_mask
+
+
 Train-only Utility
 ------------------
 
-bbox_head_loss_pre
-~~~~~~~~~~~~~~~~~~
-.. autofunction:: bbox_head_loss_pre
+bbox_loss_pre
+~~~~~~~~~~~~~
+.. autofunction:: bbox_loss_pre
 
-bbox_head_loss_post
-~~~~~~~~~~~~~~~~~~~
-.. autofunction:: bbox_head_loss_post
+bbox_loss_post
+~~~~~~~~~~~~~~
+.. autofunction:: bbox_loss_post
 
 rpn_loss
 ~~~~~~~~
 .. autofunction:: rpn_loss
+
+mask_loss_pre
+~~~~~~~~~~~~~
+.. autofunction:: mask_loss_pre
+
+mask_loss_post
+~~~~~~~~~~~~~~
+.. autofunction:: mask_loss_post
+
+mask_to_segm
+~~~~~~~~~~~~
+.. autofunction:: mask_to_segm
diff --git a/docs/source/reference/links/mask_rcnn.rst b/docs/source/reference/links/mask_rcnn.rst
deleted file mode 100644
index 9fce65c343..0000000000
--- a/docs/source/reference/links/mask_rcnn.rst
+++ /dev/null
@@ -1,52 +0,0 @@
-Mask R-CNN
-==========
-
-.. module:: chainercv.links.model.mask_rcnn
-
-
-Instance Segmentation Links
----------------------------
-
-MaskRCNNFPNResNet50
-~~~~~~~~~~~~~~~~~~~
-.. autoclass:: MaskRCNNFPNResNet50
-   :members:
-
-MaskRCNNFPNResNet101
-~~~~~~~~~~~~~~~~~~~~
-.. autoclass:: MaskRCNNFPNResNet101
-   :members:
-
-
-Utility
--------
-
-MaskRCNN
-~~~~~~~~
-.. autoclass:: MaskRCNN
-   :members:
-
-MaskHead
-~~~~~~~~
-.. autoclass:: MaskHead
-   :members:
-   :special-members: __call__
-
-segm_to_mask
-~~~~~~~~~~~~
-.. autofunction:: segm_to_mask
-
-Train-only Utility
-------------------
-
-mask_loss_pre
-~~~~~~~~~~~~~
-.. autofunction:: mask_loss_pre
-
-mask_loss_post
-~~~~~~~~~~~~~~
-.. autofunction:: mask_loss_post
-
-mask_to_segm
-~~~~~~~~~~~~
-.. autofunction:: mask_to_segm

From 3bd6f320643948c7c1aba06930d66beede30d14e Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Fri, 15 Mar 2019 20:50:51 +0900
Subject: [PATCH 096/100] flake8

---
 chainercv/links/model/fpn/__init__.py         |  8 +--
 chainercv/links/model/fpn/faster_rcnn.py      | 26 ++++---
 .../links/model/fpn/faster_rcnn_fpn_resnet.py |  3 +-
 .../model_tests/fpn_tests/test_mask_head.py   |  2 +-
 .../model_tests/fpn_tests/test_mask_utils.py  |  9 +--
 .../test_mask_rcnn_fpn_resnet.py              | 68 -------------------
 6 files changed, 26 insertions(+), 90 deletions(-)
 delete mode 100644 tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py

diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py
index e4ebd5aba0..24edf211aa 100644
--- a/chainercv/links/model/fpn/__init__.py
+++ b/chainercv/links/model/fpn/__init__.py
@@ -1,15 +1,15 @@
+from chainercv.links.model.fpn.bbox_head import bbox_loss_post  # NOQA
+from chainercv.links.model.fpn.bbox_head import bbox_loss_pre  # NOQA
+from chainercv.links.model.fpn.bbox_head import BboxHead  # NOQA
 from chainercv.links.model.fpn.faster_rcnn import FasterRCNN  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50  # NOQA
 from chainercv.links.model.fpn.fpn import FPN  # NOQA
-from chainercv.links.model.fpn.bbox_head import BboxHead  # NOQA
-from chainercv.links.model.fpn.bbox_head import bbox_loss_post  # NOQA
-from chainercv.links.model.fpn.bbox_head import bbox_loss_pre  # NOQA
-from chainercv.links.model.fpn.mask_head import MaskHead  # NOQA
 from chainercv.links.model.fpn.mask_head import mask_loss_post  # NOQA
 from chainercv.links.model.fpn.mask_head import mask_loss_pre  # NOQA
+from chainercv.links.model.fpn.mask_head import MaskHead  # NOQA
 from chainercv.links.model.fpn.mask_utils import mask_to_segm  # NOQA
 from chainercv.links.model.fpn.mask_utils import segm_to_mask  # NOQA
 from chainercv.links.model.fpn.rpn import RPN  # NOQA
diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py
index 68b4506233..d6fe9d2de2 100644
--- a/chainercv/links/model/fpn/faster_rcnn.py
+++ b/chainercv/links/model/fpn/faster_rcnn.py
@@ -3,8 +3,8 @@
 import numpy as np
 
 import chainer
-import chainer.functions as F
 from chainer.backends import cuda
+import chainer.functions as F
 
 from chainercv.links.model.fpn.misc import scale_img
 
@@ -64,7 +64,8 @@ def __init__(self, extractor, rpn, bbox_head,
 
         self._store_rpn_outputs = 'rois' in self._return_values
         self._run_bbox = any([key in self._return_values
-                        for key in ['bboxes', 'labels', 'scores', 'masks']])
+                              for key in
+                              ['bboxes', 'labels', 'scores', 'masks']])
         self._run_mask = 'masks' in self._return_values
         super(FasterRCNN, self).__init__()
 
@@ -168,9 +169,10 @@ def predict(self, imgs):
             bboxes, labels, scores = self.bbox_head.decode(
                 bbox_rois, bbox_roi_indices, head_locs, head_confs,
                 scales, sizes, self.nms_thresh, self.score_thresh)
-            bboxes_cpu = [chainer.backends.cuda.to_cpu(bbox)
-                    for bbox in bboxes]
-            labels_cpu = [chainer.backends.cuda.to_cpu(label) for label in labels]
+            bboxes_cpu = [
+                chainer.backends.cuda.to_cpu(bbox) for bbox in bboxes]
+            labels_cpu = [
+                chainer.backends.cuda.to_cpu(label) for label in labels]
             scores_cpu = [cuda.to_cpu(score) for score in scores]
             output.update({'bboxes': bboxes_cpu, 'labels': labels_cpu,
                            'scores': scores_cpu})
@@ -182,7 +184,8 @@ def predict(self, imgs):
             mask_rois_before_reordering, mask_roi_indices_before_reordering =\
                 _list_to_flat(rescaled_bboxes)
             mask_rois, mask_roi_indices, order = self.mask_head.distribute(
-                mask_rois_before_reordering, mask_roi_indices_before_reordering)
+                mask_rois_before_reordering,
+                mask_roi_indices_before_reordering)
             with chainer.using_config(
                     'train', False), chainer.no_backprop_mode():
                 segms = F.sigmoid(
@@ -192,13 +195,14 @@ def predict(self, imgs):
             segms = _flat_to_list(
                 segms, mask_roi_indices_before_reordering, len(imgs))
             segms = [segm if segm is not None else
-                    self.xp.zeros(
-                        (0, self.mask_head.segm_size, self.mask_head.segm_size),
-                        dtype=np.float32)
-                    for segm in segms]
+                     self.xp.zeros(
+                         (0, self.mask_head.segm_size,
+                          self.mask_head.segm_size), dtype=np.float32)
+                     for segm in segms]
             segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
             # Currently MaskHead only supports numpy inputs
-            masks_cpu = self.mask_head.decode(segms, bboxes_cpu, labels_cpu, sizes)
+            masks_cpu = self.mask_head.decode(
+                segms, bboxes_cpu, labels_cpu, sizes)
             output.update({'masks': masks_cpu})
         return tuple([output[key] for key in self._return_values])
 
diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
index debadb10ea..4a8e0a55ee 100644
--- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
+++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
@@ -4,9 +4,9 @@
 import chainer.functions as F
 import chainer.links as L
 
+from chainercv.links.model.fpn.bbox_head import BboxHead
 from chainercv.links.model.fpn.faster_rcnn import FasterRCNN
 from chainercv.links.model.fpn.fpn import FPN
-from chainercv.links.model.fpn.bbox_head import BboxHead
 from chainercv.links.model.fpn.mask_head import MaskHead
 from chainercv.links.model.fpn.rpn import RPN
 from chainercv.links.model.resnet import ResNet101
@@ -189,7 +189,6 @@ class MaskRCNNFPNResNet101(MaskRCNNFPNResNet):
     }
 
 
-
 def _copyparams(dst, src):
     if isinstance(dst, chainer.Chain):
         for link in dst.children():
diff --git a/tests/links_tests/model_tests/fpn_tests/test_mask_head.py b/tests/links_tests/model_tests/fpn_tests/test_mask_head.py
index c8e0bc927c..116404273d 100644
--- a/tests/links_tests/model_tests/fpn_tests/test_mask_head.py
+++ b/tests/links_tests/model_tests/fpn_tests/test_mask_head.py
@@ -7,9 +7,9 @@
 from chainer import testing
 from chainer.testing import attr
 
-from chainercv.links.model.fpn import MaskHead
 from chainercv.links.model.fpn import mask_loss_post
 from chainercv.links.model.fpn import mask_loss_pre
+from chainercv.links.model.fpn import MaskHead
 
 from chainercv.utils import mask_to_bbox
 
diff --git a/tests/links_tests/model_tests/fpn_tests/test_mask_utils.py b/tests/links_tests/model_tests/fpn_tests/test_mask_utils.py
index 5ae85bf237..c6bcd360d0 100644
--- a/tests/links_tests/model_tests/fpn_tests/test_mask_utils.py
+++ b/tests/links_tests/model_tests/fpn_tests/test_mask_utils.py
@@ -5,21 +5,22 @@
 
 from chainer import testing
 
-from chainercv.links.model.fpn.mask_utils import segm_to_mask
 from chainercv.links.model.fpn.mask_utils import mask_to_segm
+from chainercv.links.model.fpn.mask_utils import segm_to_mask
 
 
 class TestSegmToMask(unittest.TestCase):
 
     def setUp(self):
         # When n_inst >= 3, the test fails.
-        # This is due to the fact that the transformed image of `transforms.resize`
-        # is misaligned to the corners.
+        # This is due to the fact that the transformed
+        # image of `transforms.resize` is misaligned to the corners.
         n_inst = 2
         self.segm_size = 3
         self.size = (36, 48)
 
-        self.segm = np.ones((n_inst, self.segm_size, self.segm_size), dtype=np.float32)
+        self.segm = np.ones(
+            (n_inst, self.segm_size, self.segm_size), dtype=np.float32)
         self.bbox = np.zeros((n_inst, 4), dtype=np.float32)
         for i in range(n_inst):
             self.bbox[i, 0] = 10 + i
diff --git a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py b/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py
deleted file mode 100644
index b7cedc364d..0000000000
--- a/tests/links_tests/model_tests/mask_rcnn_tests/test_mask_rcnn_fpn_resnet.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import numpy as np
-import unittest
-
-import chainer
-from chainer import testing
-from chainer.testing import attr
-
-from chainercv.links import MaskRCNNFPNResNet101
-from chainercv.links import MaskRCNNFPNResNet50
-
-
-@testing.parameterize(*testing.product({
-    'model': [MaskRCNNFPNResNet50, MaskRCNNFPNResNet101],
-    'n_fg_class': [1, 5, 20],
-}))
-class TestFasterRCNNFPNResNet(unittest.TestCase):
-
-    def setUp(self):
-        self.link = self.model(n_fg_class=self.n_fg_class)
-
-    def _check_call(self):
-        imgs = [
-            np.random.uniform(-1, 1, size=(3, 48, 48)).astype(np.float32),
-            np.random.uniform(-1, 1, size=(3, 32, 64)).astype(np.float32),
-        ]
-        x, _, _ = self.link.prepare(imgs)
-        with chainer.using_config('train', False):
-            self.link(self.link.xp.array(x))
-
-    @attr.slow
-    def test_call_cpu(self):
-        self._check_call()
-
-    @attr.gpu
-    @attr.slow
-    def test_call_gpu(self):
-        self.link.to_gpu()
-        self._check_call()
-
-
-@testing.parameterize(*testing.product({
-    'model': [MaskRCNNFPNResNet50, MaskRCNNFPNResNet101],
-    'n_fg_class': [None, 10, 80],
-    # 'pretrained_model': ['coco', 'imagenet'],
-    'pretrained_model': ['imagenet'],
-}))
-class TestFasterRCNNFPNResNetPretrained(unittest.TestCase):
-
-    @attr.slow
-    def test_pretrained(self):
-        kwargs = {
-            'n_fg_class': self.n_fg_class,
-            'pretrained_model': self.pretrained_model,
-        }
-
-        if self.pretrained_model == 'coco':
-            valid = self.n_fg_class in {None, 80}
-        elif self.pretrained_model == 'imagenet':
-            valid = self.n_fg_class is not None
-
-        if valid:
-            self.model(**kwargs)
-        else:
-            with self.assertRaises(ValueError):
-                self.model(**kwargs)
-
-
-testing.run_module(__name__, __file__)

From fd54af2dba29c649766350e3a8b3907a76ce7a9d Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sat, 16 Mar 2019 00:01:07 +0900
Subject: [PATCH 097/100] fix doc

---
 chainercv/links/model/fpn/__init__.py         |  1 +
 chainercv/links/model/fpn/faster_rcnn.py      | 58 +++++++++++--------
 .../links/model/fpn/faster_rcnn_fpn_resnet.py | 56 +++++-------------
 chainercv/links/model/fpn/mask_head.py        |  2 +-
 docs/source/reference/links/fpn.rst           |  6 ++
 5 files changed, 57 insertions(+), 66 deletions(-)

diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py
index 24edf211aa..f462c5230b 100644
--- a/chainercv/links/model/fpn/__init__.py
+++ b/chainercv/links/model/fpn/__init__.py
@@ -2,6 +2,7 @@
 from chainercv.links.model.fpn.bbox_head import bbox_loss_pre  # NOQA
 from chainercv.links.model.fpn.bbox_head import BboxHead  # NOQA
 from chainercv.links.model.fpn.faster_rcnn import FasterRCNN  # NOQA
+from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet101  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import FasterRCNNFPNResNet50  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101  # NOQA
diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py
index d6fe9d2de2..0be0d8ba4b 100644
--- a/chainercv/links/model/fpn/faster_rcnn.py
+++ b/chainercv/links/model/fpn/faster_rcnn.py
@@ -10,12 +10,9 @@
 
 
 class FasterRCNN(chainer.Chain):
-    """Base class of Feature Pyramid Networks.
+    """Base class of Faster R-CNN with FPN.
 
-    This is a base class of Feature Pyramid Networks [#]_.
-
-    .. [#] Tsung-Yi Lin et al.
-       Feature Pyramid Networks for Object Detection. CVPR 2017
+    This is a base class of Faster R-CNN with FPN.
 
     Args:
         extractor (Link): A link that extracts feature maps.
@@ -28,8 +25,10 @@ class FasterRCNN(chainer.Chain):
             :class:`~chainercv.links.model.fpn.BboxHead`.
             Please refer to the documentation found there.
         mask_head (Link): A link that has the same interface as
-            :class:`~chainercv.links.model.mask_rcnn.MaskRCNN`.
+            :class:`~chainercv.links.model.fpn.MaskHead`.
             Please refer to the documentation found there.
+        return_values (list of strings): Determines the values
+            returned by :meth:`predict`.
         min_size (int): A preprocessing paramter for :meth:`prepare`. Please
             refer to a docstring found for :meth:`prepare`.
         max_size (int): A preprocessing paramter for :meth:`prepare`. Note
@@ -119,29 +118,40 @@ def __call__(self, x):
         return hs, rois, roi_indices
 
     def predict(self, imgs):
-        """Segment object instances from images.
+        """Conduct inference on the given images.
+
+        The value returned by this method is decided based on
+        the argument :obj:`return_values` of :meth:`__init__`.
 
-        This method predicts instance-aware object regions for each image.
+        Examples:
+
+            >>> from chainercv.links import FasterRCNNFPNResNet50
+            >>> model = FasterRCNNFPNResNet50(
+            ...     pretrained_model='coco',
+            ...     return_values=['rois', 'bboxes', 'labels', 'scores'])
+            >>> rois, bboxes, labels, scores = model.predict(imgs)
 
         Args:
-            imgs (iterable of numpy.ndarray): Arrays holding images of shape
-                :math:`(B, C, H, W)`.  All images are in CHW and RGB format
-                and the range of their value is :math:`[0, 255]`.
+            imgs (iterable of numpy.ndarray): Inputs.
 
         Returns:
-           tuple of lists:
-           This method returns a tuple of three lists,
-           :obj:`(masks, labels, scores)`.
-
-           * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \
-               where :math:`R` is the number of masks in a image. \
-               Each pixel holds value if it is inside the object inside or not.
-           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
-               Each value indicates the class of the masks. \
-               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
-               number of the foreground classes.
-           * **scores** : A list of float arrays of shape :math:`(R,)`. \
-               Each value indicates how confident the prediction is.
+            tuple of lists:
+            The table below shows the input and possible outputs.
+
+        .. csv-table::
+            :header: name, shape, dtype, format
+
+            :obj:`imgs`, ":math:`[(3, H, W)]`", :obj:`float32`, \
+            "RGB, :math:`[0, 255]`"
+            :obj:`rois`, ":math:`[(R', 4)]`", :obj:`float32`, \
+            ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
+            :obj:`bboxes`, ":math:`[(R, 4)]`", :obj:`float32`, \
+            ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
+            :obj:`scores`, ":math:`[(R,)]`", :obj:`float32`, \
+            --
+            :obj:`labels`, ":math:`[(R,)]`", :obj:`int32`, \
+            ":math:`[0, \#fg\_class - 1]`"
+            :obj:`masks`, ":math:`[(R, H, W)]`", :obj:`bool`, --
 
         """
         output = {}
diff --git a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
index 4a8e0a55ee..1532d8c2ce 100644
--- a/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
+++ b/chainercv/links/model/fpn/faster_rcnn_fpn_resnet.py
@@ -15,7 +15,7 @@
 
 
 class FasterRCNNFPNResNet(FasterRCNN):
-    """Base class for FasterRCNNFPNResNet50 and FasterRCNNFPNResNet101.
+    """Base class for Faster R-CNN with a ResNet backbone and FPN.
 
     A subclass of this class should have :obj:`_base` and :obj:`_models`.
 
@@ -38,6 +38,8 @@ class FasterRCNNFPNResNet(FasterRCNN):
             * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
                 must be specified properly.
             * :obj:`None`: Do not load weights.
+        return_values (list of strings): Determines the values
+            returned by :meth:`predict`.
         min_size (int): A preprocessing paramter for :meth:`prepare`. Please \
             refer to :meth:`prepare`.
         max_size (int): A preprocessing paramter for :meth:`prepare`.
@@ -76,35 +78,24 @@ def __init__(self, n_fg_class=None, pretrained_model=None,
 
 
 class MaskRCNNFPNResNet(FasterRCNNFPNResNet):
-    """Feature Pyramid Networks with ResNet-50.
-
-    This is a model of Feature Pyramid Networks [#]_.
-    This model uses :class:`~chainercv.links.ResNet50` as
-    its base feature extractor.
-
-    .. [#] Tsung-Yi Lin et al.
-       Feature Pyramid Networks for Object Detection. CVPR 2017
+    """Mask R-CNN with a ResNet backbone and FPN.
 
+    Please refer to :class:`~chainercv.links.model.fpn.FasterRCNNFPNResNet`.
 
     """
 
     def __init__(self, n_fg_class=None, pretrained_model=None,
+                 return_values=['masks', 'labels', 'scores'],
                  min_size=800, max_size=1333):
         super(MaskRCNNFPNResNet, self).__init__(
-            n_fg_class, pretrained_model, ['masks', 'labels', 'scores'],
+            n_fg_class, pretrained_model, return_values,
             min_size, max_size)
 
 
 class FasterRCNNFPNResNet50(FasterRCNNFPNResNet):
-    """Feature Pyramid Networks with ResNet-50.
-
-    This is a model of Feature Pyramid Networks [#]_.
-    This model uses :class:`~chainercv.links.ResNet50` as
-    its base feature extractor.
-
-    .. [#] Tsung-Yi Lin et al.
-       Feature Pyramid Networks for Object Detection. CVPR 2017
+    """Faster R-CNN with ResNet-50 and FPN.
 
+    Please refer to :class:`~chainercv.links.model.fpn.FasterRCNNFPNResNet`.
 
     """
 
@@ -120,14 +111,9 @@ class FasterRCNNFPNResNet50(FasterRCNNFPNResNet):
 
 
 class FasterRCNNFPNResNet101(FasterRCNNFPNResNet):
-    """Feature Pyramid Networks with ResNet-101.
-
-    This is a model of Feature Pyramid Networks [#]_.
-    This model uses :class:`~chainercv.links.ResNet101` as
-    its base feature extractor.
+    """Faster R-CNN with ResNet-101 and FPN.
 
-    .. [#] Tsung-Yi Lin et al.
-       Feature Pyramid Networks for Object Detection. CVPR 2017
+    Please refer to :class:`~chainercv.links.model.fpn.FasterRCNNFPNResNet`.
 
     """
 
@@ -143,15 +129,9 @@ class FasterRCNNFPNResNet101(FasterRCNNFPNResNet):
 
 
 class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
-    """Feature Pyramid Networks with ResNet-50.
-
-    This is a model of Feature Pyramid Networks [#]_.
-    This model uses :class:`~chainercv.links.ResNet50` as
-    its base feature extractor.
-
-    .. [#] Tsung-Yi Lin et al.
-       Feature Pyramid Networks for Object Detection. CVPR 2017
+    """Mask R-CNN with ResNet-50 and FPN.
 
+    Please refer to :class:`~chainercv.links.model.fpn.FasterRCNNFPNResNet`.
 
     """
 
@@ -167,15 +147,9 @@ class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
 
 
 class MaskRCNNFPNResNet101(MaskRCNNFPNResNet):
-    """Feature Pyramid Networks with ResNet-50.
-
-    This is a model of Feature Pyramid Networks [#]_.
-    This model uses :class:`~chainercv.links.ResNet50` as
-    its base feature extractor.
-
-    .. [#] Tsung-Yi Lin et al.
-       Feature Pyramid Networks for Object Detection. CVPR 2017
+    """Mask R-CNN with ResNet-101 and FPN.
 
+    Please refer to :class:`~chainercv.links.model.fpn.FasterRCNNFPNResNet`.
 
     """
 
diff --git a/chainercv/links/model/fpn/mask_head.py b/chainercv/links/model/fpn/mask_head.py
index b89857fa5d..602713838b 100644
--- a/chainercv/links/model/fpn/mask_head.py
+++ b/chainercv/links/model/fpn/mask_head.py
@@ -228,7 +228,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_bboxes,
 
 def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
                    batchsize):
-    """Loss function for Head (post).
+    """Loss function for Mask Head (post).
 
      Args:
          segms (array): An array whose shape is :math:`(R, n\_class, M, M)`,
diff --git a/docs/source/reference/links/fpn.rst b/docs/source/reference/links/fpn.rst
index bd26896c27..4c01e2a44a 100644
--- a/docs/source/reference/links/fpn.rst
+++ b/docs/source/reference/links/fpn.rst
@@ -40,6 +40,12 @@ FasterRCNN
 .. autoclass:: FasterRCNN
    :members:
 
+FasterRCNNFPNResNet
+~~~~~~~~~~~~~~~~~~~
+.. autoclass:: FasterRCNNFPNResNet
+   :members:
+
+
 FPN
 ~~~
 .. autoclass:: FPN

From e71c322a0557309f53601d571798ec7bbb06ca56 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sat, 16 Mar 2019 00:11:27 +0900
Subject: [PATCH 098/100] fix

---
 chainercv/links/model/fpn/__init__.py    | 3 +++
 chainercv/links/model/fpn/faster_rcnn.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/chainercv/links/model/fpn/__init__.py b/chainercv/links/model/fpn/__init__.py
index 6a2b989025..e4ba9c853c 100644
--- a/chainercv/links/model/fpn/__init__.py
+++ b/chainercv/links/model/fpn/__init__.py
@@ -10,6 +10,9 @@
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet101  # NOQA
 from chainercv.links.model.fpn.faster_rcnn_fpn_resnet import MaskRCNNFPNResNet50  # NOQA
 from chainercv.links.model.fpn.fpn import FPN  # NOQA
+from chainercv.links.model.fpn.keypoint_head import keypoint_loss_post  # NOQA
+from chainercv.links.model.fpn.keypoint_head import keypoint_loss_pre  # NOQA
+from chainercv.links.model.fpn.keypoint_head import KeypointHead  # NOQA
 from chainercv.links.model.fpn.mask_head import mask_loss_post  # NOQA
 from chainercv.links.model.fpn.mask_head import mask_loss_pre  # NOQA
 from chainercv.links.model.fpn.mask_head import MaskHead  # NOQA
diff --git a/chainercv/links/model/fpn/faster_rcnn.py b/chainercv/links/model/fpn/faster_rcnn.py
index 5eceba3ce7..c37fe30c08 100644
--- a/chainercv/links/model/fpn/faster_rcnn.py
+++ b/chainercv/links/model/fpn/faster_rcnn.py
@@ -191,7 +191,7 @@ def predict(self, imgs):
             output.update({'bboxes': bboxes_cpu, 'labels': labels_cpu,
                            'scores': scores_cpu})
             rescaled_bboxes = [bbox * scale
-                                for scale, bbox in zip(scales, bboxes)]
+                               for scale, bbox in zip(scales, bboxes)]
         if self._run_mask:
             # Change bboxes to RoI and RoI indices format
             mask_rois_before_reordering, mask_roi_indices_before_reordering =\

From 8c9816ce5aac76e8d2f2bbbdb268d0c9b1a75664 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sat, 16 Mar 2019 00:31:56 +0900
Subject: [PATCH 099/100] merge train

---
 examples/fpn/train_multi.py | 113 +++++++++++++++++++++++++++++++-----
 1 file changed, 97 insertions(+), 16 deletions(-)

diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py
index 4386acf084..01c4c5963d 100644
--- a/examples/fpn/train_multi.py
+++ b/examples/fpn/train_multi.py
@@ -28,8 +28,15 @@
 from chainercv.links import FasterRCNNFPNResNet101
 from chainercv.links import FasterRCNNFPNResNet50
 
+from chainercv.datasets import coco_keypoint_names
+from chainercv.datasets import COCOKeypointDataset
+from chainercv.links import KeypointRCNNFPNResNet101
+from chainercv.links import KeypointRCNNFPNResNet50
+
 from chainercv.links.model.fpn import bbox_loss_post
 from chainercv.links.model.fpn import bbox_loss_pre
+from chainercv.links.model.fpn import keypoint_loss_post
+from chainercv.links.model.fpn import keypoint_loss_pre
 from chainercv.links.model.fpn import mask_loss_post
 from chainercv.links.model.fpn import mask_loss_pre
 from chainercv.links.model.fpn import rpn_loss
@@ -49,7 +56,8 @@ def __init__(self, model):
         with self.init_scope():
             self.model = model
 
-    def __call__(self, imgs, bboxes, labels, masks=None):
+    def __call__(self, imgs, bboxes, labels, masks=None,
+                 points=None, visibles=None):
         B = len(imgs)
         pad_size = np.array(
             [im.shape[1:] for im in imgs]).max(axis=0)
@@ -117,30 +125,63 @@ def __call__(self, imgs, bboxes, labels, masks=None):
                 mask_roi_indices[0] = self.xp.array([0], dtype=np.int32)
                 segms = self.model.mask_head(hs, mask_rois, mask_roi_indices)
                 mask_loss = 0 * F.sum(segms)
+
+        point_loss = 0
+        if points is not None:
+            points = [self.xp.array(point) for point in points]
+            visibles = [self.xp.array(visible) for visible in visibles]
+
+            point_rois, point_roi_indices, gt_head_points, gt_head_visibles =\
+                keypoint_loss_pre(
+                    rois, roi_indices, points, visibles, bboxes,
+                    head_gt_labels, self.model.keypoint_head.point_map_size)
+            n_roi = sum([len(roi) for roi in point_rois])
+            if n_roi > 0:
+                point_maps = self.model.keypoint_head(
+                    hs, point_rois, point_roi_indices)
+                point_loss = keypoint_loss_post(
+                    point_maps, point_roi_indices,
+                    gt_head_points, gt_head_visibles, B)
+            else:
+                # Compute dummy variables to complete the computational graph
+                point_rois[0] = self.xp.array([[0, 0, 1, 1]], dtype=np.float32)
+                point_roi_indices[0] = self.xp.array([0], dtype=np.int32)
+                point_maps = self.model.keypoint_head(
+                    hs, point_rois, point_roi_indices)
+                point_loss = 0 * F.sum(point_maps)
+
         loss = (rpn_loc_loss + rpn_conf_loss +
-                head_loc_loss + head_conf_loss + mask_loss)
+                head_loc_loss + head_conf_loss + mask_loss + point_loss)
         chainer.reporter.report({
             'loss': loss,
             'loss/rpn/loc': rpn_loc_loss, 'loss/rpn/conf': rpn_conf_loss,
             'loss/bbox_head/loc': head_loc_loss,
             'loss/bbox_head/conf': head_conf_loss,
-            'loss/mask_head': mask_loss},
+            'loss/mask_head': mask_loss,
+            'loss/keypoint_head': point_loss},
             self)
         return loss
 
 
 class Transform(object):
 
-    def __init__(self, min_size, max_size, mean):
+    def __init__(self, min_size, max_size, mean, mode):
+        if not isinstance(min_size, (tuple, list)):
+            min_size = (min_size,)
         self.min_size = min_size
         self.max_size = max_size
         self.mean = mean
+        self.mode = mode
 
     def __call__(self, in_data):
-        if len(in_data) == 4:
-            img, mask, label, bbox = in_data
-        else:
+        if self.mode == 'bbox':
             img, bbox, label = in_data
+        elif self.mode == 'instance_segmentation':
+            img, mask, label, bbox = in_data
+        elif self.mode == 'keypoint':
+            img, point, visible, label, bbox = in_data
+
+        original_size = img.shape[1:]
         # Flipping
         img, params = transforms.random_flip(
             img, x_random=True, return_param=True)
@@ -154,15 +195,21 @@ def __call__(self, in_data):
         img -= self.mean
         bbox = bbox * scale
 
-        if len(in_data) == 4:
+        if self.mode == 'bbox':
+            return img, bbox, label
+        elif self.mode == 'instance_segmentation':
             mask = transforms.flip(mask, x_flip=x_flip)
             mask = transforms.resize(
                 mask.astype(np.float32),
                 img.shape[1:],
                 interpolation=PIL.Image.NEAREST).astype(np.bool)
             return img, bbox, label, mask
-        else:
-            return img, bbox, label
+        elif self.mode == 'keypoint':
+            point = transforms.flip_point(
+                point, original_size, x_flip=x_flip)
+            point = transforms.resize_point(
+                point, original_size, img.shape[1:])
+            return img, bbox, label, None, point, visible
 
 
 def converter(batch, device=None):
@@ -170,13 +217,22 @@ def converter(batch, device=None):
     return tuple(list(v) for v in zip(*batch))
 
 
+def valid_point_annotation(visible):
+    if len(visible) == 0:
+        return False
+    min_keypoint_per_image = 10
+    n_visible = visible.sum()
+    return n_visible >= min_keypoint_per_image
+
+
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('--data-dir', default='auto')
     parser.add_argument(
         '--model',
         choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101',
-                 'faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'),
+                 'faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101',
+                 'keypoint_rcnn_fpn_resnet50', 'keypoint_rcnn_fpn_resnet101'),
         default='faster_rcnn_fpn_resnet50')
     parser.add_argument('--batchsize', type=int, default=16)
     parser.add_argument('--iteration', type=int, default=90000)
@@ -216,6 +272,16 @@ def main():
         model = MaskRCNNFPNResNet101(
             n_fg_class=len(coco_instance_segmentation_label_names),
             pretrained_model='imagenet')
+    elif args.model == 'keypoint_rcnn_fpn_resnet50':
+        mode = 'keypoint'
+        model = KeypointRCNNFPNResNet50(
+            n_fg_class=1, pretrained_model='imagenet',
+            n_point=len(coco_keypoint_names[0]))
+    elif args.model == 'keypoint_rcnn_fpn_resnet101':
+        mode = 'keypoint'
+        model = KeypointRCNNFPNResNet101(
+            n_fg_class=1, pretrained_model='imagenet',
+            n_point=len(coco_keypoint_names[0]))
 
     model.use_preset('evaluate')
     train_chain = TrainChain(model)
@@ -223,17 +289,30 @@ def main():
     train_chain.to_gpu()
 
     if mode == 'bbox':
+        transform = Transform(
+            model.min_size, model.max_size, model.extractor.mean, mode)
         train = TransformDataset(
             COCOBboxDataset(
                 data_dir=args.data_dir, year='2017', split='train'),
-            ('img', 'bbox', 'label'),
-            Transform(model.min_size, model.max_size, model.extractor.mean))
+            ('img', 'bbox', 'label'), transform)
     elif mode == 'instance_segmentation':
+        transform = Transform(
+            model.min_size, model.max_size, model.extractor.mean, mode)
         train = TransformDataset(
             COCOInstanceSegmentationDataset(
                 data_dir=args.data_dir, split='train', return_bbox=True),
-            ('img', 'bbox', 'label', 'mask'),
-            Transform(model.min_size, model.max_size, model.extractor.mean))
+            ('img', 'bbox', 'label', 'mask'), transform)
+    elif mode == 'keypoint':
+        train = COCOKeypointDataset(data_dir=args.data_dir, split='train')
+        indices = [i for i, visible in enumerate(train.slice[:, 'visible'])
+                   if valid_point_annotation(visible)]
+        train = train.slice[indices]
+        transform = Transform(
+            (640, 672, 704, 736, 768, 800),
+            model.max_size, model.extractor.mean, mode)
+        train = TransformDataset(
+            train,
+            ('img', 'bbox', 'label', 'mask', 'point', 'visible'), transform)
 
     if comm.rank == 0:
         indices = np.arange(len(train))
@@ -257,6 +336,8 @@ def main():
     for link in model.links():
         if isinstance(link, L.BatchNormalization):
             link.disable_update()
+    if mode == 'keypoint':
+        model.keypoint_head.upsample.disable_update()
 
     n_iteration = args.iteration * 16 / args.batchsize
     updater = training.updaters.StandardUpdater(
@@ -292,7 +373,7 @@ def lr_schedule(trainer):
             ['epoch', 'iteration', 'lr', 'main/loss',
              'main/loss/rpn/loc', 'main/loss/rpn/conf',
              'main/loss/bbox_head/loc', 'main/loss/bbox_head/conf',
-             'main/loss/mask_head'
+             'main/loss/mask_head', 'main/loss/keypoint_head'
              ]),
             trigger=log_interval)
         trainer.extend(extensions.ProgressBar(update_interval=10))

From ee08fbb6fed67c92157163775c16f0a8f3ad4d32 Mon Sep 17 00:00:00 2001
From: Yusuke Niitani <yuyuniitani@gmail.com>
Date: Sat, 16 Mar 2019 00:44:39 +0900
Subject: [PATCH 100/100] fix

---
 examples/fpn/train_multi.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/examples/fpn/train_multi.py b/examples/fpn/train_multi.py
index 01c4c5963d..f0dd4d0e30 100644
--- a/examples/fpn/train_multi.py
+++ b/examples/fpn/train_multi.py
@@ -1,6 +1,7 @@
 import argparse
 import multiprocessing
 import numpy as np
+import random
 import PIL
 
 import chainer
@@ -51,10 +52,11 @@
 
 class TrainChain(chainer.Chain):
 
-    def __init__(self, model):
+    def __init__(self, model, mode):
         super(TrainChain, self).__init__()
         with self.init_scope():
             self.model = model
+        self.mode = mode
 
     def __call__(self, imgs, bboxes, labels, masks=None,
                  points=None, visibles=None):
@@ -99,7 +101,7 @@ def __call__(self, imgs, bboxes, labels, masks=None,
             roi_indices, head_gt_locs, head_gt_labels, B)
 
         mask_loss = 0
-        if masks is not None:
+        if self.mode == 'instance_segmentation':
             # For reducing unnecessary CPU/GPU copy, `masks` is kept in CPU.
             pad_masks = [
                 np.zeros(
@@ -127,7 +129,7 @@ def __call__(self, imgs, bboxes, labels, masks=None,
                 mask_loss = 0 * F.sum(segms)
 
         point_loss = 0
-        if points is not None:
+        if self.mode == 'keypoint':
             points = [self.xp.array(point) for point in points]
             visibles = [self.xp.array(visible) for visible in visibles]
 
@@ -190,8 +192,9 @@ def __call__(self, in_data):
             bbox, img.shape[1:], x_flip=x_flip)
 
         # Scaling and mean subtraction
+        min_size = random.choice(self.min_size)
         img, scale = scale_img(
-            img, self.min_size, self.max_size)
+            img, min_size, self.max_size)
         img -= self.mean
         bbox = bbox * scale
 
@@ -284,7 +287,7 @@ def main():
             n_point=len(coco_keypoint_names[0]))
 
     model.use_preset('evaluate')
-    train_chain = TrainChain(model)
+    train_chain = TrainChain(model, mode)
     chainer.cuda.get_device_from_id(device).use()
     train_chain.to_gpu()