Source code for mmagic.utils.trans_utils
# Copyright (c) OpenMMLab. All rights reserved.
import logging
import math
import cv2
import numpy as np
import torch
from mmengine.logging import print_log
from mmengine.utils import is_tuple_of
from PIL import Image, ImageDraw
[docs]def make_coord(shape, ranges=None, flatten=True):
"""Make coordinates at grid centers.
Args:
shape (tuple): shape of image.
ranges (tuple): range of coordinate value. Default: None.
flatten (bool): flatten to (n, 2) or Not. Default: True.
Returns:
coord (Tensor): coordinates.
"""
coord_seqs = []
for i, n in enumerate(shape):
if ranges is None:
v0, v1 = -1, 1
else:
v0, v1 = ranges[i]
r = (v1 - v0) / (2 * n)
seq = v0 + r + (2 * r) * torch.arange(n).float()
coord_seqs.append(seq)
if 'indexing' in torch.meshgrid.__code__.co_varnames:
coord = torch.meshgrid(*coord_seqs, indexing='ij')
else:
coord = torch.meshgrid(*coord_seqs)
coord = torch.stack(coord, dim=-1)
if flatten:
coord = coord.view(-1, coord.shape[-1])
return coord
[docs]def bbox2mask(img_shape, bbox, dtype='uint8'):
"""Generate mask in np.ndarray from bbox.
The returned mask has the shape of (h, w, 1). '1' indicates the
hole and '0' indicates the valid regions.
We prefer to use `uint8` as the data type of masks, which may be different
from other codes in the community.
Args:
img_shape (tuple[int]): The size of the image.
bbox (tuple[int]): Configuration tuple, (top, left, height, width)
np.dtype (str): Indicate the data type of returned masks.
Default: 'uint8'
Returns:
mask (np.ndarray): Mask in the shape of (h, w, 1).
"""
height, width = img_shape[:2]
mask = np.zeros((height, width, 1), dtype=dtype)
mask[bbox[0]:bbox[0] + bbox[2], bbox[1]:bbox[1] + bbox[3], :] = 1
return mask
[docs]def brush_stroke_mask(img_shape,
num_vertices=(4, 12),
mean_angle=2 * math.pi / 5,
angle_range=2 * math.pi / 15,
brush_width=(12, 40),
max_loops=4,
dtype='uint8'):
"""Generate free-form mask.
The method of generating free-form mask is in the following paper:
Free-Form Image Inpainting with Gated Convolution.
When you set the config of this type of mask. You may note the usage of
`np.random.randint` and the range of `np.random.randint` is [left, right).
We prefer to use `uint8` as the data type of masks, which may be different
from other codes in the community.
TODO: Rewrite the implementation of this function.
Args:
img_shape (tuple[int]): Size of the image.
num_vertices (int | tuple[int]): Min and max number of vertices. If
only give an integer, we will fix the number of vertices.
Default: (4, 12).
mean_angle (float): Mean value of the angle in each vertex. The angle
is measured in radians. Default: 2 * math.pi / 5.
angle_range (float): Range of the random angle.
Default: 2 * math.pi / 15.
brush_width (int | tuple[int]): (min_width, max_width). If only give
an integer, we will fix the width of brush. Default: (12, 40).
max_loops (int): The max number of for loops of drawing strokes.
Default: 4.
np.dtype (str): Indicate the data type of returned masks.
Default: 'uint8'.
Returns:
mask (np.ndarray): Mask in the shape of (h, w, 1).
"""
img_h, img_w = img_shape[:2]
if isinstance(num_vertices, int):
min_num_vertices, max_num_vertices = num_vertices, num_vertices + 1
elif isinstance(num_vertices, tuple):
min_num_vertices, max_num_vertices = num_vertices
else:
raise TypeError('The type of num_vertices should be int'
f'or tuple[int], but got type: {num_vertices}')
if isinstance(brush_width, tuple):
min_width, max_width = brush_width
elif isinstance(brush_width, int):
min_width, max_width = brush_width, brush_width + 1
else:
raise TypeError('The type of brush_width should be int'
f'or tuple[int], but got type: {brush_width}')
average_radius = math.sqrt(img_h * img_h + img_w * img_w) / 8
mask = Image.new('L', (img_w, img_h), 0)
loop_num = np.random.randint(1, max_loops)
num_vertex_list = np.random.randint(
min_num_vertices, max_num_vertices, size=loop_num)
angle_min_list = np.random.uniform(0, angle_range, size=loop_num)
angle_max_list = np.random.uniform(0, angle_range, size=loop_num)
for loop_n in range(loop_num):
num_vertex = num_vertex_list[loop_n]
angle_min = mean_angle - angle_min_list[loop_n]
angle_max = mean_angle + angle_max_list[loop_n]
angles = []
vertex = []
# set random angle on each vertex
angles = np.random.uniform(angle_min, angle_max, size=num_vertex)
reverse_mask = (np.arange(num_vertex, dtype=np.float32) % 2) == 0
angles[reverse_mask] = 2 * math.pi - angles[reverse_mask]
h, w = mask.size
# set random vertices
vertex.append((np.random.randint(0, w), np.random.randint(0, h)))
r_list = np.random.normal(
loc=average_radius, scale=average_radius // 2, size=num_vertex)
for i in range(num_vertex):
r = np.clip(r_list[i], 0, 2 * average_radius)
new_x = np.clip(vertex[-1][0] + r * math.cos(angles[i]), 0, w)
new_y = np.clip(vertex[-1][1] + r * math.sin(angles[i]), 0, h)
vertex.append((int(new_x), int(new_y)))
# draw brush strokes according to the vertex and angle list
draw = ImageDraw.Draw(mask)
width = np.random.randint(min_width, max_width)
draw.line(vertex, fill=1, width=width)
for v in vertex:
draw.ellipse((v[0] - width // 2, v[1] - width // 2,
v[0] + width // 2, v[1] + width // 2),
fill=1)
# randomly flip the mask
if np.random.normal() > 0:
mask.transpose(Image.FLIP_LEFT_RIGHT)
if np.random.normal() > 0:
mask.transpose(Image.FLIP_TOP_BOTTOM)
mask = np.array(mask).astype(dtype=getattr(np, dtype))
mask = mask[:, :, None]
return mask
[docs]def random_bbox(img_shape, max_bbox_shape, max_bbox_delta=40, min_margin=20):
"""Generate a random bbox for the mask on a given image.
In our implementation, the max value cannot be obtained since we use
`np.random.randint`. And this may be different with other standard scripts
in the community.
Args:
img_shape (tuple[int]): The size of a image, in the form of (h, w).
max_bbox_shape (int | tuple[int]): Maximum shape of the mask box,
in the form of (h, w). If it is an integer, the mask box will be
square.
max_bbox_delta (int | tuple[int]): Maximum delta of the mask box,
in the form of (delta_h, delta_w). If it is an integer, delta_h
and delta_w will be the same. Mask shape will be randomly sampled
from the range of `max_bbox_shape - max_bbox_delta` and
`max_bbox_shape`. Default: (40, 40).
min_margin (int | tuple[int]): The minimum margin size from the
edges of mask box to the image boarder, in the form of
(margin_h, margin_w). If it is an integer, margin_h and margin_w
will be the same. Default: (20, 20).
Returns:
tuple[int]: The generated box, (top, left, h, w).
"""
if not isinstance(max_bbox_shape, tuple):
max_bbox_shape = (max_bbox_shape, max_bbox_shape)
if not isinstance(max_bbox_delta, tuple):
max_bbox_delta = (max_bbox_delta, max_bbox_delta)
if not isinstance(min_margin, tuple):
min_margin = (min_margin, min_margin)
assert is_tuple_of(max_bbox_shape, int)
assert is_tuple_of(max_bbox_delta, int)
assert is_tuple_of(min_margin, int)
img_h, img_w = img_shape[:2]
max_mask_h, max_mask_w = max_bbox_shape
max_delta_h, max_delta_w = max_bbox_delta
margin_h, margin_w = min_margin
if max_mask_h > img_h or max_mask_w > img_w:
raise ValueError(f'mask shape {max_bbox_shape} should be smaller than '
f'image shape {img_shape}')
if (max_delta_h // 2 * 2 >= max_mask_h
or max_delta_w // 2 * 2 >= max_mask_w):
raise ValueError(f'mask delta {max_bbox_delta} should be smaller than'
f'mask shape {max_bbox_shape}')
if img_h - max_mask_h < 2 * margin_h or img_w - max_mask_w < 2 * margin_w:
raise ValueError(f'Margin {min_margin} cannot be satisfied for img'
f'shape {img_shape} and mask shape {max_bbox_shape}')
# get the max value of (top, left)
max_top = img_h - margin_h - max_mask_h
max_left = img_w - margin_w - max_mask_w
# randomly select a (top, left)
top = np.random.randint(margin_h, max_top)
left = np.random.randint(margin_w, max_left)
# randomly shrink the shape of mask box according to `max_bbox_delta`
# the center of box is fixed
delta_top = np.random.randint(0, max_delta_h // 2 + 1)
delta_left = np.random.randint(0, max_delta_w // 2 + 1)
top = top + delta_top
left = left + delta_left
h = max_mask_h - delta_top
w = max_mask_w - delta_left
return (top, left, h, w)
[docs]def random_irregular_mask(img_shape,
num_vertices=(4, 8),
max_angle=4,
length_range=(10, 100),
brush_width=(10, 40),
dtype='uint8'):
"""Generate random irregular masks.
This is a modified version of free-form mask implemented in
'brush_stroke_mask'.
We prefer to use `uint8` as the data type of masks, which may be different
from other codes in the community.
TODO: Rewrite the implementation of this function.
Args:
img_shape (tuple[int]): Size of the image.
num_vertices (int | tuple[int]): Min and max number of vertices. If
only give an integer, we will fix the number of vertices.
Default: (4, 8).
max_angle (float): Max value of angle at each vertex. Default 4.0.
length_range (int | tuple[int]): (min_length, max_length). If only give
an integer, we will fix the length of brush. Default: (10, 100).
brush_width (int | tuple[int]): (min_width, max_width). If only give
an integer, we will fix the width of brush. Default: (10, 40).
np.dtype (str): Indicate the data type of returned masks.
Default: 'uint8'
Returns:
mask (np.ndarray): Mask in the shape of (h, w, 1).
"""
h, w = img_shape[:2]
mask = np.zeros((h, w), dtype=dtype)
if isinstance(length_range, int):
min_length, max_length = length_range, length_range + 1
elif isinstance(length_range, tuple):
min_length, max_length = length_range
else:
raise TypeError('The type of length_range should be int'
f'or tuple[int], but got type: {length_range}')
if isinstance(num_vertices, int):
min_num_vertices, max_num_vertices = num_vertices, num_vertices + 1
elif isinstance(num_vertices, tuple):
min_num_vertices, max_num_vertices = num_vertices
else:
raise TypeError('The type of num_vertices should be int'
f'or tuple[int], but got type: {num_vertices}')
if isinstance(brush_width, int):
min_brush_width, max_brush_width = brush_width, brush_width + 1
elif isinstance(brush_width, tuple):
min_brush_width, max_brush_width = brush_width
else:
raise TypeError('The type of brush_width should be int'
f'or tuple[int], but got type: {brush_width}')
num_v = np.random.randint(min_num_vertices, max_num_vertices)
for i in range(num_v):
start_x = np.random.randint(w)
start_y = np.random.randint(h)
# from the start point, randomly setlect n \in [1, 6] directions.
direction_num = np.random.randint(1, 6)
angle_list = np.random.randint(0, max_angle, size=direction_num)
length_list = np.random.randint(
min_length, max_length, size=direction_num)
brush_width_list = np.random.randint(
min_brush_width, max_brush_width, size=direction_num)
for direct_n in range(direction_num):
angle = 0.01 + angle_list[direct_n]
if i % 2 == 0:
angle = 2 * math.pi - angle
length = length_list[direct_n]
brush_w = brush_width_list[direct_n]
# compute end point according to the random angle
end_x = (start_x + length * np.sin(angle)).astype(np.int32)
end_y = (start_y + length * np.cos(angle)).astype(np.int32)
cv2.line(mask, (start_y, start_x), (end_y, end_x), 1, brush_w)
start_x, start_y = end_x, end_y
mask = np.expand_dims(mask, axis=2)
return mask
[docs]def get_irregular_mask(img_shape, area_ratio_range=(0.15, 0.5), **kwargs):
"""Get irregular mask with the constraints in mask ratio.
Args:
img_shape (tuple[int]): Size of the image.
area_ratio_range (tuple(float)): Contain the minimum and maximum area
ratio. Default: (0.15, 0.5).
Returns:
mask (np.ndarray): Mask in the shape of (h, w, 1).
"""
mask = random_irregular_mask(img_shape, **kwargs)
min_ratio, max_ratio = area_ratio_range
while not min_ratio < (np.sum(mask) /
(img_shape[0] * img_shape[1])) < max_ratio:
mask = random_irregular_mask(img_shape, **kwargs)
return mask
[docs]_integer_types = (
np.byte,
np.ubyte, # 8 bits
np.short,
np.ushort, # 16 bits
np.intc,
np.uintc, # 16 or 32 or 64 bits
np.int_,
np.uint, # 32 or 64 bits
np.longlong,
np.ulonglong) # 64 bits
}
[docs]dtype_range = {
np.bool_: (False, True),
np.bool8: (False, True),
np.float16: (-1, 1),
np.float32: (-1, 1),
np.float64: (-1, 1)
}
dtype_range.update(_integer_ranges)
[docs]def dtype_limits(image, clip_negative=False):
"""Return intensity limits, i.e. (min, max) tuple, of the image's dtype.
This function is adopted from skimage:
https://github.com/scikit-image/scikit-image/blob/
7e4840bd9439d1dfb6beaf549998452c99f97fdd/skimage/util/dtype.py#L35
Args:
image (np.ndarray): Input image.
clip_negative (bool, optional): If True, clip the negative range
(i.e. return 0 for min intensity) even if the image dtype allows
negative values. Default: False.
Returns
tuple: Lower and upper intensity limits.
"""
imin, imax = dtype_range[image.dtype.type]
if clip_negative:
imin = 0
return imin, imax
[docs]def adjust_gamma(image, gamma=1, gain=1):
"""Performs Gamma Correction on the input image.
This function is adopted from skimage:
https://github.com/scikit-image/scikit-image/blob/
7e4840bd9439d1dfb6beaf549998452c99f97fdd/skimage/exposure/
exposure.py#L439-L494
Also known as Power Law Transform.
This function transforms the input image pixelwise according to the
equation ``O = I**gamma`` after scaling each pixel to the range 0 to 1.
Args:
image (np.ndarray): Input image.
gamma (float, optional): Non negative real number. Defaults to 1.
gain (float, optional): The constant multiplier. Defaults to 1.
Returns:
np.ndarray: Gamma corrected output image.
"""
if np.any(image < 0):
raise ValueError('Image Correction methods work correctly only on '
'images with non-negative values. Use '
'skimage.exposure.rescale_intensity.')
dtype = image.dtype.type
if gamma < 0:
raise ValueError('Gamma should be a non-negative real number.')
scale = float(dtype_limits(image, True)[1] - dtype_limits(image, True)[0])
out = ((image / scale)**gamma) * scale * gain
return out.astype(dtype)
[docs]def add_gaussian_noise(img: np.ndarray, mu, sigma):
"""Add Gaussian Noise on the input image.
Args:
img (np.ndarray): Input image.
mu (float): The mu value of the Gaussian function.
sigma (float): The sigma value of the Gaussian function.
Returns:
noisy_img (np.ndarray): Gaussian noisy output image.
"""
img = img.astype(np.float32)
gauss_noise = np.random.normal(mu, sigma, img.shape)
noisy_img = img + gauss_noise
noisy_img = np.clip(noisy_img, 0, 255)
return noisy_img
[docs]def random_choose_unknown(unknown, crop_size):
"""Randomly choose an unknown start (top-left) point for a given crop_size.
Args:
unknown (np.ndarray): The binary unknown mask.
crop_size (tuple[int]): The given crop size.
Returns:
tuple[int]: The top-left point of the chosen bbox.
"""
h, w = unknown.shape
crop_h, crop_w = crop_size
delta_h = center_h = crop_h // 2
delta_w = center_w = crop_w // 2
# mask out the validate area for selecting the cropping center
mask = np.zeros_like(unknown)
mask[delta_h:h - delta_h, delta_w:w - delta_w] = 1
if np.any(unknown & mask):
center_h_list, center_w_list = np.where(unknown & mask)
elif np.any(unknown):
center_h_list, center_w_list = np.where(unknown)
else:
print_log('No unknown pixels found!', level=logging.WARNING)
center_h_list = [center_h]
center_w_list = [center_w]
num_unknowns = len(center_h_list)
rand_ind = np.random.randint(num_unknowns)
center_h = center_h_list[rand_ind]
center_w = center_w_list[rand_ind]
# make sure the top-left point is valid
top = np.clip(center_h - delta_h, 0, h - crop_h)
left = np.clip(center_w - delta_w, 0, w - crop_w)
return top, left