Merge branch 'dev-1.x' of github.com:open-mmlab/mmediting into plyfag…

…er/mscoco
open-mmlab · Dec 13, 2022 · 8862d3d · 8862d3d
2 parents b826a3f + 69675c3
commit 8862d3d
Show file tree

Hide file tree

Showing 8 changed files with 359 additions and 40 deletions.
diff --git a/docs/en/howto/dataset.md b/docs/en/howto/dataset.md
@@ -198,6 +198,144 @@ dataset = BasicFramesDataset(
         img=['img1.png', 'img3.png'], gt=['img2.png']))
 ```
 
+### BasicConditonalDataset
+
+**BasicConditonalDataset** `mmedit.datasets.BasicConditonalDataset` is designed for conditional GANs (e.g., SAGAN, BigGAN). This dataset support load label for the annotation file. `BasicConditonalDataset` support three kinds of annotation as follow:
+
+#### 1. Annotation file read by line (e.g., txt)
+
+Sample files structure:
+
+```
+    data_prefix/
+    ├── folder_1
+    │   ├── xxx.png
+    │   ├── xxy.png
+    │   └── ...
+    └── folder_2
+        ├── 123.png
+        ├── nsdf3.png
+        └── ...
+```
+
+Sample annotation file (the first column is the image path and the second column is the index of category):
+
+```
+    folder_1/xxx.png 0
+    folder_1/xxy.png 1
+    folder_2/123.png 5
+    folder_2/nsdf3.png 3
+    ...
+```
+
+Config example for ImageNet dataset:
+
+```python
+dataset=dict(
+    type='BasicConditionalDataset,
+    data_root='./data/imagenet/',
+    ann_file='meta/train.txt',
+    data_prefix='train',
+    pipeline=train_pipeline),
+```
+
+#### 2. Dict-based annotation file (e.g., json):
+
+Sample files structure:
+
+```
+    data_prefix/
+    ├── folder_1
+    │   ├── xxx.png
+    │   ├── xxy.png
+    │   └── ...
+    └── folder_2
+        ├── 123.png
+        ├── nsdf3.png
+        └── ...
+```
+
+Sample annotation file (the key is the image path and the value column
+is the label):
+
+```
+    {
+        "folder_1/xxx.png": [1, 2, 3, 4],
+        "folder_1/xxy.png": [2, 4, 1, 0],
+        "folder_2/123.png": [0, 9, 8, 1],
+        "folder_2/nsdf3.png", [1, 0, 0, 2],
+        ...
+    }
+```
+
+Config example for EG3D (shapenet-car) dataset:
+
+```python
+dataset = dict(
+    type='BasicConditionalDataset',
+    data_root='./data/eg3d/shapenet-car',
+    ann_file='annotation.json',
+    pipeline=train_pipeline)
+```
+
+In this kind of annotation, labels can be any type and not restricted to an index.
+
+#### 3. Folder-based annotation (no annotation file need):
+
+Sample files structure:
+
+```
+    data_prefix/
+    ├── class_x
+    │   ├── xxx.png
+    │   ├── xxy.png
+    │   └── ...
+    │       └── xxz.png
+    └── class_y
+        ├── 123.png
+        ├── nsdf3.png
+        ├── ...
+        └── asd932_.png
+```
+
+If the annotation file is specified, the dataset will be generated by the first two ways, otherwise, try the third way.
+
+### ImageNet Dataset and CIFAR10 Dataset
+
+**ImageNet Dataset**`mmedit.datasets.ImageNet` and **CIFAR10 Dataset**`mmedit.datasets.CIFAR10` are datasets specific designed for ImageNet and CIFAR10 datasets. Both two datasets are encapsulation of `BasicConditionalDataset`. You can used them to load data from ImageNet dataset and CIFAR10 dataset easily.
+
+Config example for ImageNet:
+
+```python
+pipeline = [
+    dict(type='LoadImageFromFile', key='img'),
+    dict(type='RandomCropLongEdge', keys=['img']),
+    dict(type='Resize', scale=(128, 128), keys=['img'], backend='pillow'),
+    dict(type='Flip', keys=['img'], flip_ratio=0.5, direction='horizontal'),
+    dict(type='PackEditInputs')
+]
+
+dataset=dict(
+    type='ImageNet',
+    data_root='./data/imagenet/',
+    ann_file='meta/train.txt',
+    data_prefix='train',
+    pipeline=pipeline),
+```
+
+Config example for CIFAR10:
+
+```python
+pipeline = [dict(type='PackEditInputs')]
+
+dataset = dict(
+    type='CIFAR10',
+    data_root='./data',
+    data_prefix='cifar10',
+    test_mode=False,
+    pipeline=pipeline)
+```
+
 ### AdobeComp1kDataset
 
 **AdobeComp1kDataset** `mmedit.datasets.AdobeComp1kDataset`

diff --git a/mmedit/datasets/basic_conditional_dataset.py b/mmedit/datasets/basic_conditional_dataset.py
@@ -13,15 +13,15 @@
 
 @DATASETS.register_module()
 class BasicConditionalDataset(BaseDataset):
-    """Custom dataset for conditional GAN. This class is the combination of
-    `BaseDataset` (https://github.com/open-
+    """Custom dataset for conditional GAN. This class is based on the
+    combination of `BaseDataset` (https://github.com/open-
     mmlab/mmclassification/blob/1.x/mmcls/datasets/base_dataset.py)  # noqa and
     `CustomDataset` (https://github.com/open-
     mmlab/mmclassification/blob/1.x/mmcls/datasets/custom.py).  # noqa.
 
     The dataset supports two kinds of annotation format.
 
-    1. An annotation file is provided, and each line indicates a sample:
+    1. A annotation file read by line (e.g., txt) is provided, and each line indicates a sample:
 
        The sample files: ::
 
@@ -47,7 +47,35 @@ class BasicConditionalDataset(BaseDataset):
        Please specify the name of categories by the argument ``classes``
        or ``metainfo``.
 
-    2. The samples are arranged in the specific way: ::
+    2. A dict-based annotation file (e.g., json) is provided, key and value
+       indicate the path and label of the sample:
+
+       The sample files: ::
+
+           data_prefix/
+           ├── folder_1
+           │   ├── xxx.png
+           │   ├── xxy.png
+           │   └── ...
+           └── folder_2
+               ├── 123.png
+               ├── nsdf3.png
+               └── ...
+
+       The annotation file (the key is the image path and the value column
+       is the label): ::
+
+            {
+                "folder_1/xxx.png": [1, 2, 3, 4],
+                "folder_1/xxy.png": [2, 4, 1, 0],
+                "folder_2/123.png": [0, 9, 8, 1],
+                "folder_2/nsdf3.png", [1, 0, 0, 2],
+                ...
+            }
+
+       In this kind of annotation, labels can be any type and not restricted to an index.
+
+    3. The samples are arranged in the specific way: ::
 
            data_prefix/
            ├── class_x
@@ -62,7 +90,7 @@ class BasicConditionalDataset(BaseDataset):
                └── asd932_.png
 
     If the ``ann_file`` is specified, the dataset will be generated by the
-    first way, otherwise, try the second way.
+    first two ways, otherwise, try the third way.
 
     Args:
         ann_file (str): Annotation file path. Defaults to ''.
@@ -156,9 +184,14 @@ def load_data_list(self):
 
         if not self.ann_file:
             samples = self._find_samples(file_client)
-        else:
+        elif self.ann_file.endswith('json'):
+            samples = mmengine.fileio.io.load(self.ann_file)
+            samples = [[name, label] for name, label in samples.items()]
+        elif self.ann_file.endswith('txt'):
             lines = mmengine.list_from_file(self.ann_file)
             samples = [x.strip().rsplit(' ', 1) for x in lines]
+        else:
+            raise TypeError('Only support \'json\' and \'txt\' as annotation.')
 
         def add_prefix(filename, prefix=''):
             if not prefix:
@@ -169,7 +202,10 @@ def add_prefix(filename, prefix=''):
         data_list = []
         for filename, gt_label in samples:
             img_path = add_prefix(filename, self.img_prefix)
-            info = {'img_path': img_path, 'gt_label': int(gt_label)}
+            # convert digit label to int
+            if isinstance(gt_label, str):
+                gt_label = int(gt_label) if gt_label.isdigit() else gt_label
+            info = {'img_path': img_path, 'gt_label': gt_label}
             data_list.append(info)
         return data_list
 

diff --git a/mmedit/models/editors/disco_diffusion/guider.py b/mmedit/models/editors/disco_diffusion/guider.py
@@ -10,7 +10,9 @@
 import torch.nn.functional as F
 import torchvision.transforms as T
 import torchvision.transforms.functional as TF
+from mmengine.utils import digit_version
 from resize_right import resize
+from torchvision import __version__ as TORCHVISION_VERSION
 
 from mmedit.models.losses import tv_loss
 from .secondary_model import alpha_sigma_to_t
@@ -206,13 +208,17 @@ def __init__(self,
         self.IC_Size_Pow = IC_Size_Pow
         self.IC_Grey_P = IC_Grey_P
 
+        random_affine_args = dict(degrees=10, translate=(0.05, 0.05))
+        if digit_version(TORCHVISION_VERSION) >= digit_version('0.9.0'):
+            random_affine_args['interpolation'] = T.InterpolationMode.BILINEAR
+        else:
+            from PIL import Image
+            random_affine_args['resample'] = Image.NEAREST
+
         self.augs = T.Compose([
             T.RandomHorizontalFlip(p=0.5),
             T.Lambda(lambda x: x + torch.randn_like(x) * 0.01),
-            T.RandomAffine(
-                degrees=10,
-                translate=(0.05, 0.05),
-                interpolation=T.InterpolationMode.BILINEAR),
+            T.RandomAffine(**random_affine_args),
             T.Lambda(lambda x: x + torch.randn_like(x) * 0.01),
             T.RandomGrayscale(p=0.1),
             T.Lambda(lambda x: x + torch.randn_like(x) * 0.01),

diff --git a/tests/data/dataset/anno.json b/tests/data/dataset/anno.json
@@ -0,0 +1,14 @@
+{
+    "a/1.JPG": [
+        1,
+        2,
+        3,
+        4
+    ],
+    "b/2.jpeg": [
+        1,
+        4,
+        5,
+        3
+    ]
+}
diff --git a/tests/data/dataset/wrong.yml b/tests/data/dataset/wrong.yml