Shortcuts

Source code for mmagic.datasets.transforms.fgbg

# Copyright (c) OpenMMLab. All rights reserved.
"""Augmentation on foreground and background."""

import numbers
import os.path as osp

import mmcv
import numpy as np
from mmcv.transforms import BaseTransform
from mmengine.fileio import get_file_backend

from mmagic.registry import TRANSFORMS
from mmagic.utils import add_gaussian_noise, adjust_gamma


@TRANSFORMS.register_module()
[docs]class CompositeFg(BaseTransform): """Composite foreground with a random foreground. This class composites the current training sample with additional data randomly (could be from the same dataset). With probability 0.5, the sample will be composited with a random sample from the specified directory. The composition is performed as: .. math:: fg_{new} = \\alpha_1 * fg_1 + (1 - \\alpha_1) * fg_2 \\alpha_{new} = 1 - (1 - \\alpha_1) * (1 - \\alpha_2) where :math:`(fg_1, \\alpha_1)` is from the current sample and :math:`(fg_2, \\alpha_2)` is the randomly loaded sample. With the above composition, :math:`\\alpha_{new}` is still in `[0, 1]`. Required keys are "alpha" and "fg". Modified keys are "alpha" and "fg". Args: fg_dirs (str | list[str]): Path of directories to load foreground images from. alpha_dirs (str | list[str]): Path of directories to load alpha mattes from. interpolation (str): Interpolation method of `mmcv.imresize` to resize the randomly loaded images. Default: 'nearest'. """ def __init__(self, fg_dirs, alpha_dirs, interpolation='nearest'): # TODO try fetch the path from dataset self.fg_dirs = fg_dirs if isinstance(fg_dirs, list) else [fg_dirs] self.alpha_dirs = alpha_dirs if isinstance(alpha_dirs, list) else [alpha_dirs] self.interpolation = interpolation self.file_backend = get_file_backend(uri=fg_dirs[0]) self.fg_list, self.alpha_list = self._get_file_list( self.fg_dirs, self.alpha_dirs)
[docs] def transform(self, results: dict) -> dict: """Transform function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ fg = results['fg'] alpha = results['alpha'] / 255.0 # float64, H, W, 1 h, w = results['fg'].shape[:2] # randomly select fg if np.random.rand() < 0.5: idx = np.random.randint(len(self.fg_list)) fg2_bytes = self.file_backend.get(self.fg_list[idx]) fg2 = mmcv.imfrombytes(fg2_bytes) alpha2_bytes = self.file_backend.get(self.alpha_list[idx]) alpha2 = mmcv.imfrombytes(alpha2_bytes, flag='grayscale') alpha2 = alpha2 / 255.0 # float64 fg2 = mmcv.imresize(fg2, (w, h), interpolation=self.interpolation) alpha2 = mmcv.imresize( alpha2, (w, h), interpolation=self.interpolation) alpha2 = alpha2[..., None] # the overlap of two 50% transparency will be 75% alpha_tmp = 1 - (1 - alpha) * (1 - alpha2) # if the result alpha is all-one, then we avoid composition if np.any(alpha_tmp < 1): # composite fg with fg2 fg = fg * alpha + fg2 * (1 - alpha) alpha = alpha_tmp results['fg'] = fg results['alpha'] = alpha * 255 return results
[docs] def _get_file_list(self, fg_dirs, alpha_dirs): all_fg_list = list() all_alpha_list = list() for fg_dir, alpha_dir in zip(fg_dirs, alpha_dirs): fg_list = sorted( self.file_backend.list_dir_or_file(fg_dir, list_dir=False)) alpha_list = sorted( self.file_backend.list_dir_or_file(alpha_dir, list_dir=False)) # we assume the file names for fg and alpha are the same assert len(fg_list) == len(alpha_list), ( f'{fg_dir} and {alpha_dir} should have the same number of ' f'images ({len(fg_list)} differs from ({len(alpha_list)})') fg_list = [osp.join(fg_dir, fg) for fg in fg_list] alpha_list = [osp.join(alpha_dir, alpha) for alpha in alpha_list] all_fg_list.extend(fg_list) all_alpha_list.extend(alpha_list) return all_fg_list, all_alpha_list
[docs] def __repr__(self): repr_str = self.__class__.__name__ repr_str += (f'(fg_dirs={repr(self.fg_dirs)}, ' f'alpha_dirs={repr(self.alpha_dirs)}, ' f'interpolation={repr(self.interpolation)})') return repr_str
@TRANSFORMS.register_module()
[docs]class MergeFgAndBg(BaseTransform): """Composite foreground image and background image with alpha. Required keys are "alpha", "fg" and "bg", added key is "merged". """
[docs] def transform(self, results: dict) -> dict: """Transform function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ alpha = results['alpha'].astype(np.float32) / 255. fg = results['fg'] bg = results['bg'] merged = fg * alpha + (1. - alpha) * bg results['merged'] = merged return results
[docs] def __repr__(self) -> str: repr_str = f'{self.__class__.__name__}()' return repr_str
@TRANSFORMS.register_module()
[docs]class PerturbBg(BaseTransform): """Randomly add gaussian noise or gamma change to background image. Required key is "bg", added key is "noisy_bg". Args: gamma_ratio (float, optional): The probability to use gamma correction instead of gaussian noise. Defaults to 0.6. """ def __init__(self, gamma_ratio=0.6): if gamma_ratio < 0 or gamma_ratio > 1: raise ValueError('gamma_ratio must be a float between [0, 1], ' f'but got {gamma_ratio}') self.gamma_ratio = gamma_ratio
[docs] def transform(self, results: dict) -> dict: """Transform function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ if np.random.rand() >= self.gamma_ratio: # generate gaussian noise with random gaussian N([-7, 7), [2, 6)) mu = np.random.randint(-7, 7) sigma = np.random.randint(2, 6) results['noisy_bg'] = add_gaussian_noise(results['bg'], mu, sigma) else: # adjust gamma in a range of N(1, 0.12) gamma = np.random.normal(1, 0.12) results['noisy_bg'] = adjust_gamma(results['bg'], gamma) return results
[docs] def __repr__(self): return self.__class__.__name__ + f'(gamma_ratio={self.gamma_ratio})'
@TRANSFORMS.register_module()
[docs]class RandomJitter(BaseTransform): """Randomly jitter the foreground in hsv space. The jitter range of hue is adjustable while the jitter ranges of saturation and value are adaptive to the images. Side effect: the "fg" image will be converted to `np.float32`. Required keys are "fg" and "alpha", modified key is "fg". Args: hue_range (float | tuple[float]): Range of hue jittering. If it is a float instead of a tuple like (min, max), the range of hue jittering will be (-hue_range, +hue_range). Default: 40. """ def __init__(self, hue_range=40): if isinstance(hue_range, numbers.Number): assert hue_range >= 0, ('If hue_range is a single number, ' 'it must be positive.') self.hue_range = (-hue_range, hue_range) else: assert isinstance(hue_range, tuple) and len(hue_range) == 2, \ 'hue_range should be a tuple and it must be of length 2.' self.hue_range = hue_range
[docs] def transform(self, results): """transform function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ fg, alpha = results['fg'], results['alpha'] alpha = alpha[:, :, 0] # convert to HSV space; # convert to float32 image to keep precision during space conversion. fg = mmcv.bgr2hsv(fg.astype(np.float32) / 255) # Hue noise hue_jitter = np.random.randint(self.hue_range[0], self.hue_range[1]) fg[:, :, 0] = np.remainder(fg[:, :, 0] + hue_jitter, 360) # Saturation noise sat_mean = fg[:, :, 1][alpha > 0].mean() # jitter saturation within range (1.1 - sat_mean) * [-0.1, 0.1] sat_jitter = (1.1 - sat_mean) * (np.random.rand() * 0.2 - 0.1) sat = fg[:, :, 1] sat = np.abs(sat + sat_jitter) sat[sat > 1] = 2 - sat[sat > 1] fg[:, :, 1] = sat # Value noise val_mean = fg[:, :, 2][alpha > 0].mean() # jitter value within range (1.1 - val_mean) * [-0.1, 0.1] val_jitter = (1.1 - val_mean) * (np.random.rand() * 0.2 - 0.1) val = fg[:, :, 2] val = np.abs(val + val_jitter) val[val > 1] = 2 - val[val > 1] fg[:, :, 2] = val # convert back to BGR space fg = mmcv.hsv2bgr(fg) results['fg'] = fg * 255 return results
[docs] def __repr__(self): return self.__class__.__name__ + f'hue_range={self.hue_range}'
@TRANSFORMS.register_module()
[docs]class RandomLoadResizeBg(BaseTransform): """Randomly load a background image and resize it. Required key is "fg", added key is "bg". Args: bg_dir (str): Path of directory to load background images from. flag (str): Loading flag for images. Default: 'color'. channel_order (str): Order of channel, candidates are 'bgr' and 'rgb'. Default: 'bgr'. kwargs (dict): Args for file client. """ def __init__(self, bg_dir, flag='color', channel_order='bgr'): self.bg_dir = bg_dir self.file_backend = get_file_backend(uri=bg_dir) self.bg_list = list( self.file_backend.list_dir_or_file(bg_dir, list_dir=False)) self.flag = flag self.channel_order = channel_order
[docs] def transform(self, results: dict) -> dict: """Transform function. Args: results (dict): A dict containing the necessary information and data for augmentation. Returns: dict: A dict containing the processed data and information. """ h, w = results['fg'].shape[:2] idx = np.random.randint(len(self.bg_list)) filepath = f'{self.bg_dir}/{self.bg_list[idx]}' img_bytes = self.file_backend.get(filepath) img = mmcv.imfrombytes( img_bytes, flag=self.flag, channel_order=self.channel_order) # HWC bg = mmcv.imresize(img, (w, h), interpolation='bicubic') results['bg'] = bg return results
[docs] def __repr__(self): return self.__class__.__name__ + f"(bg_dir='{self.bg_dir}')"
Read the Docs v: latest
Versions
latest
stable
0.x
Downloads
pdf
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.