Source code for mmagic.datasets.transforms.fgbg
# Copyright (c) OpenMMLab. All rights reserved.
"""Augmentation on foreground and background."""
import numbers
import os.path as osp
import mmcv
import numpy as np
from mmcv.transforms import BaseTransform
from mmengine.fileio import get_file_backend
from mmagic.registry import TRANSFORMS
from mmagic.utils import add_gaussian_noise, adjust_gamma
@TRANSFORMS.register_module()
[docs]class CompositeFg(BaseTransform):
"""Composite foreground with a random foreground.
This class composites the current training sample with additional data
randomly (could be from the same dataset). With probability 0.5, the sample
will be composited with a random sample from the specified directory.
The composition is performed as:
.. math::
fg_{new} = \\alpha_1 * fg_1 + (1 - \\alpha_1) * fg_2
\\alpha_{new} = 1 - (1 - \\alpha_1) * (1 - \\alpha_2)
where :math:`(fg_1, \\alpha_1)` is from the current sample and
:math:`(fg_2, \\alpha_2)` is the randomly loaded sample. With the above
composition, :math:`\\alpha_{new}` is still in `[0, 1]`.
Required keys are "alpha" and "fg". Modified keys are "alpha" and "fg".
Args:
fg_dirs (str | list[str]): Path of directories to load foreground
images from.
alpha_dirs (str | list[str]): Path of directories to load alpha mattes
from.
interpolation (str): Interpolation method of `mmcv.imresize` to resize
the randomly loaded images. Default: 'nearest'.
"""
def __init__(self, fg_dirs, alpha_dirs, interpolation='nearest'):
# TODO try fetch the path from dataset
self.fg_dirs = fg_dirs if isinstance(fg_dirs, list) else [fg_dirs]
self.alpha_dirs = alpha_dirs if isinstance(alpha_dirs,
list) else [alpha_dirs]
self.interpolation = interpolation
self.file_backend = get_file_backend(uri=fg_dirs[0])
self.fg_list, self.alpha_list = self._get_file_list(
self.fg_dirs, self.alpha_dirs)
[docs] def transform(self, results: dict) -> dict:
"""Transform function.
Args:
results (dict): A dict containing the necessary information and
data for augmentation.
Returns:
dict: A dict containing the processed data and information.
"""
fg = results['fg']
alpha = results['alpha'] / 255.0 # float64, H, W, 1
h, w = results['fg'].shape[:2]
# randomly select fg
if np.random.rand() < 0.5:
idx = np.random.randint(len(self.fg_list))
fg2_bytes = self.file_backend.get(self.fg_list[idx])
fg2 = mmcv.imfrombytes(fg2_bytes)
alpha2_bytes = self.file_backend.get(self.alpha_list[idx])
alpha2 = mmcv.imfrombytes(alpha2_bytes, flag='grayscale')
alpha2 = alpha2 / 255.0 # float64
fg2 = mmcv.imresize(fg2, (w, h), interpolation=self.interpolation)
alpha2 = mmcv.imresize(
alpha2, (w, h), interpolation=self.interpolation)
alpha2 = alpha2[..., None]
# the overlap of two 50% transparency will be 75%
alpha_tmp = 1 - (1 - alpha) * (1 - alpha2)
# if the result alpha is all-one, then we avoid composition
if np.any(alpha_tmp < 1):
# composite fg with fg2
fg = fg * alpha + fg2 * (1 - alpha)
alpha = alpha_tmp
results['fg'] = fg
results['alpha'] = alpha * 255
return results
[docs] def _get_file_list(self, fg_dirs, alpha_dirs):
all_fg_list = list()
all_alpha_list = list()
for fg_dir, alpha_dir in zip(fg_dirs, alpha_dirs):
fg_list = sorted(
self.file_backend.list_dir_or_file(fg_dir, list_dir=False))
alpha_list = sorted(
self.file_backend.list_dir_or_file(alpha_dir, list_dir=False))
# we assume the file names for fg and alpha are the same
assert len(fg_list) == len(alpha_list), (
f'{fg_dir} and {alpha_dir} should have the same number of '
f'images ({len(fg_list)} differs from ({len(alpha_list)})')
fg_list = [osp.join(fg_dir, fg) for fg in fg_list]
alpha_list = [osp.join(alpha_dir, alpha) for alpha in alpha_list]
all_fg_list.extend(fg_list)
all_alpha_list.extend(alpha_list)
return all_fg_list, all_alpha_list
[docs] def __repr__(self):
repr_str = self.__class__.__name__
repr_str += (f'(fg_dirs={repr(self.fg_dirs)}, '
f'alpha_dirs={repr(self.alpha_dirs)}, '
f'interpolation={repr(self.interpolation)})')
return repr_str
@TRANSFORMS.register_module()
[docs]class MergeFgAndBg(BaseTransform):
"""Composite foreground image and background image with alpha.
Required keys are "alpha", "fg" and "bg", added key is "merged".
"""
[docs] def transform(self, results: dict) -> dict:
"""Transform function.
Args:
results (dict): A dict containing the necessary information and
data for augmentation.
Returns:
dict: A dict containing the processed data and information.
"""
alpha = results['alpha'].astype(np.float32) / 255.
fg = results['fg']
bg = results['bg']
merged = fg * alpha + (1. - alpha) * bg
results['merged'] = merged
return results
@TRANSFORMS.register_module()
[docs]class PerturbBg(BaseTransform):
"""Randomly add gaussian noise or gamma change to background image.
Required key is "bg", added key is "noisy_bg".
Args:
gamma_ratio (float, optional): The probability to use gamma correction
instead of gaussian noise. Defaults to 0.6.
"""
def __init__(self, gamma_ratio=0.6):
if gamma_ratio < 0 or gamma_ratio > 1:
raise ValueError('gamma_ratio must be a float between [0, 1], '
f'but got {gamma_ratio}')
self.gamma_ratio = gamma_ratio
[docs] def transform(self, results: dict) -> dict:
"""Transform function.
Args:
results (dict): A dict containing the necessary information and
data for augmentation.
Returns:
dict: A dict containing the processed data and information.
"""
if np.random.rand() >= self.gamma_ratio:
# generate gaussian noise with random gaussian N([-7, 7), [2, 6))
mu = np.random.randint(-7, 7)
sigma = np.random.randint(2, 6)
results['noisy_bg'] = add_gaussian_noise(results['bg'], mu, sigma)
else:
# adjust gamma in a range of N(1, 0.12)
gamma = np.random.normal(1, 0.12)
results['noisy_bg'] = adjust_gamma(results['bg'], gamma)
return results
@TRANSFORMS.register_module()
[docs]class RandomJitter(BaseTransform):
"""Randomly jitter the foreground in hsv space.
The jitter range of hue is adjustable while the jitter ranges of saturation
and value are adaptive to the images. Side effect: the "fg" image will be
converted to `np.float32`.
Required keys are "fg" and "alpha", modified key is "fg".
Args:
hue_range (float | tuple[float]): Range of hue jittering. If it is a
float instead of a tuple like (min, max), the range of hue
jittering will be (-hue_range, +hue_range). Default: 40.
"""
def __init__(self, hue_range=40):
if isinstance(hue_range, numbers.Number):
assert hue_range >= 0, ('If hue_range is a single number, '
'it must be positive.')
self.hue_range = (-hue_range, hue_range)
else:
assert isinstance(hue_range, tuple) and len(hue_range) == 2, \
'hue_range should be a tuple and it must be of length 2.'
self.hue_range = hue_range
[docs] def transform(self, results):
"""transform function.
Args:
results (dict): A dict containing the necessary information and
data for augmentation.
Returns:
dict: A dict containing the processed data and information.
"""
fg, alpha = results['fg'], results['alpha']
alpha = alpha[:, :, 0]
# convert to HSV space;
# convert to float32 image to keep precision during space conversion.
fg = mmcv.bgr2hsv(fg.astype(np.float32) / 255)
# Hue noise
hue_jitter = np.random.randint(self.hue_range[0], self.hue_range[1])
fg[:, :, 0] = np.remainder(fg[:, :, 0] + hue_jitter, 360)
# Saturation noise
sat_mean = fg[:, :, 1][alpha > 0].mean()
# jitter saturation within range (1.1 - sat_mean) * [-0.1, 0.1]
sat_jitter = (1.1 - sat_mean) * (np.random.rand() * 0.2 - 0.1)
sat = fg[:, :, 1]
sat = np.abs(sat + sat_jitter)
sat[sat > 1] = 2 - sat[sat > 1]
fg[:, :, 1] = sat
# Value noise
val_mean = fg[:, :, 2][alpha > 0].mean()
# jitter value within range (1.1 - val_mean) * [-0.1, 0.1]
val_jitter = (1.1 - val_mean) * (np.random.rand() * 0.2 - 0.1)
val = fg[:, :, 2]
val = np.abs(val + val_jitter)
val[val > 1] = 2 - val[val > 1]
fg[:, :, 2] = val
# convert back to BGR space
fg = mmcv.hsv2bgr(fg)
results['fg'] = fg * 255
return results
@TRANSFORMS.register_module()
[docs]class RandomLoadResizeBg(BaseTransform):
"""Randomly load a background image and resize it.
Required key is "fg", added key is "bg".
Args:
bg_dir (str): Path of directory to load background images from.
flag (str): Loading flag for images. Default: 'color'.
channel_order (str): Order of channel, candidates are 'bgr' and 'rgb'.
Default: 'bgr'.
kwargs (dict): Args for file client.
"""
def __init__(self, bg_dir, flag='color', channel_order='bgr'):
self.bg_dir = bg_dir
self.file_backend = get_file_backend(uri=bg_dir)
self.bg_list = list(
self.file_backend.list_dir_or_file(bg_dir, list_dir=False))
self.flag = flag
self.channel_order = channel_order
[docs] def transform(self, results: dict) -> dict:
"""Transform function.
Args:
results (dict): A dict containing the necessary information and
data for augmentation.
Returns:
dict: A dict containing the processed data and information.
"""
h, w = results['fg'].shape[:2]
idx = np.random.randint(len(self.bg_list))
filepath = f'{self.bg_dir}/{self.bg_list[idx]}'
img_bytes = self.file_backend.get(filepath)
img = mmcv.imfrombytes(
img_bytes, flag=self.flag, channel_order=self.channel_order) # HWC
bg = mmcv.imresize(img, (w, h), interpolation='bicubic')
results['bg'] = bg
return results