Source code for mmagic.datasets.transforms.matlab_like_resize
# This code is referenced from matlab_imresize with modifications
# Reference:
# https://github.com/fatheral/matlab_imresize/blob/master/imresize.py
# Original license: Copyright (c) 2020 fatheral, under the MIT License.
import numpy as np
from mmcv.transforms import BaseTransform
from mmagic.registry import TRANSFORMS
[docs]def get_size_from_scale(input_size, scale_factor):
"""Get the output size given input size and scale factor.
Args:
input_size (tuple): The size of the input image.
scale_factor (float): The resize factor.
Returns:
output_shape (list[int]): The size of the output image.
"""
output_shape = [
int(np.ceil(scale * shape))
for (scale, shape) in zip(scale_factor, input_size)
]
return output_shape
[docs]def get_scale_from_size(input_size, output_size):
"""Get the scale factor given input size and output size.
Args:
input_size (tuple(int)): The size of the input image.
output_size (tuple(int)): The size of the output image.
Returns:
scale (list[float]): The scale factor of each dimension.
"""
scale = [
1.0 * output_shape / input_shape
for (input_shape, output_shape) in zip(input_size, output_size)
]
return scale
[docs]def _cubic(x):
"""Cubic function.
Args:
x (np.ndarray): The distance from the center position.
Returns:
np.ndarray: The weight corresponding to a particular distance.
"""
x = np.array(x, dtype=np.float32)
x_abs = np.abs(x)
x_abs_sq = x_abs**2
x_abs_cu = x_abs_sq * x_abs
# if |x| <= 1: y = 1.5|x|^3 - 2.5|x|^2 + 1
# if 1 < |x| <= 2: -0.5|x|^3 + 2.5|x|^2 - 4|x| + 2
f = (1.5 * x_abs_cu - 2.5 * x_abs_sq + 1) * (x_abs <= 1) + (
-0.5 * x_abs_cu + 2.5 * x_abs_sq - 4 * x_abs + 2) * ((1 < x_abs) &
(x_abs <= 2))
return f
[docs]def get_weights_indices(input_length, output_length, scale, kernel,
kernel_width):
"""Get weights and indices for interpolation.
Args:
input_length (int): Length of the input sequence.
output_length (int): Length of the output sequence.
scale (float): Scale factor.
kernel (func): The kernel used for resizing.
kernel_width (int): The width of the kernel.
Returns:
tuple(list[np.ndarray], list[np.ndarray]): The weights and the indices
for interpolation.
"""
if scale < 1: # modified kernel for antialiasing
def h(x):
return scale * kernel(scale * x)
kernel_width = 1.0 * kernel_width / scale
else:
h = kernel
kernel_width = kernel_width
# coordinates of output
x = np.arange(1, output_length + 1).astype(np.float32)
# coordinates of input
u = x / scale + 0.5 * (1 - 1 / scale)
left = np.floor(u - kernel_width / 2) # leftmost pixel
p = int(np.ceil(kernel_width)) + 2 # maximum number of pixels
# indices of input pixels
ind = left[:, np.newaxis, ...] + np.arange(p)
indices = ind.astype(np.int32)
# weights of input pixels
weights = h(u[:, np.newaxis, ...] - indices - 1)
weights = weights / np.sum(weights, axis=1)[:, np.newaxis, ...]
# remove all-zero columns
aux = np.concatenate(
(np.arange(input_length), np.arange(input_length - 1, -1,
step=-1))).astype(np.int32)
indices = aux[np.mod(indices, aux.size)]
ind2store = np.nonzero(np.any(weights, axis=0))
weights = weights[:, ind2store]
indices = indices[:, ind2store]
return weights, indices
[docs]def resize_along_dim(img_in, weights, indices, dim):
"""Resize along a specific dimension.
Args:
img_in (np.ndarray): The input image.
weights (ndarray): The weights used for interpolation, computed from
[get_weights_indices].
indices (ndarray): The indices used for interpolation, computed from
[get_weights_indices].
dim (int): Which dimension to undergo interpolation.
Returns:
np.ndarray: Interpolated (along one dimension) image.
"""
img_in = img_in.astype(np.float32)
w_shape = weights.shape
output_shape = list(img_in.shape)
output_shape[dim] = w_shape[0]
img_out = np.zeros(output_shape)
if dim == 0:
for i in range(w_shape[0]):
w = weights[i, :][np.newaxis, ...]
ind = indices[i, :]
img_slice = img_in[ind, :]
img_out[i] = np.sum(np.squeeze(img_slice, axis=0) * w.T, axis=0)
elif dim == 1:
for i in range(w_shape[0]):
w = weights[i, :][:, :, np.newaxis]
ind = indices[i, :]
img_slice = img_in[:, ind]
img_out[:, i] = np.sum(np.squeeze(img_slice, axis=1) * w.T, axis=1)
if img_in.dtype == np.uint8:
img_out = np.clip(img_out, 0, 255)
return np.around(img_out).astype(np.uint8)
else:
return img_out
@TRANSFORMS.register_module()
[docs]class MATLABLikeResize(BaseTransform):
"""Resize the input image using MATLAB-like downsampling.
Currently support bicubic interpolation only. Note that the output of
this function is slightly different from the official MATLAB function.
Required keys are the keys in attribute "keys". Added or modified keys
are "scale" and "output_shape", and the keys in attribute "keys".
Args:
keys (list[str]): A list of keys whose values are modified.
scale (float | None, optional): The scale factor of the resize
operation. If None, it will be determined by output_shape.
Default: None.
output_shape (tuple(int) | None, optional): The size of the output
image. If None, it will be determined by scale. Note that if
scale is provided, output_shape will not be used.
Default: None.
kernel (str, optional): The kernel for the resize operation.
Currently support 'bicubic' only. Default: 'bicubic'.
kernel_width (float): The kernel width. Currently support 4.0 only.
Default: 4.0.
"""
def __init__(self,
keys,
scale=None,
output_shape=None,
kernel='bicubic',
kernel_width=4.0):
if kernel.lower() != 'bicubic':
raise ValueError('Currently support bicubic kernel only.')
if float(kernel_width) != 4.0:
raise ValueError('Current support only width=4 only.')
if scale is None and output_shape is None:
raise ValueError('"scale" and "output_shape" cannot be both None')
self.kernel_func = _cubic
self.keys = keys
self.scale = scale
self.output_shape = output_shape
self.kernel = kernel
self.kernel_width = kernel_width
[docs] def _resize(self, img):
"""resize an image to the require size.
Args:
img (np.ndarray): The original image.
Returns:
output (np.ndarray): The resized image.
"""
weights = {}
indices = {}
# compute scale and output_size
if self.scale is not None:
scale = float(self.scale)
scale = [scale, scale]
output_size = get_size_from_scale(img.shape, scale)
else:
scale = get_scale_from_size(img.shape, self.output_shape)
output_size = list(self.output_shape)
# apply cubic interpolation along two dimensions
order = np.argsort(np.array(scale))
for k in range(2):
key = (img.shape[k], output_size[k], scale[k], self.kernel_func,
self.kernel_width)
weight, index = get_weights_indices(img.shape[k], output_size[k],
scale[k], self.kernel_func,
self.kernel_width)
weights[key] = weight
indices[key] = index
output = np.copy(img)
if output.ndim == 2: # grayscale image
output = output[:, :, np.newaxis]
for k in range(2):
dim = order[k]
key = (img.shape[dim], output_size[dim], scale[dim],
self.kernel_func, self.kernel_width)
output = resize_along_dim(output, weights[key], indices[key], dim)
return output
[docs] def transform(self, results):
"""transform function.
Args:
results (dict): A dict containing the necessary information and
data for augmentation.
Returns:
dict: A dict containing the processed data and information.
"""
for key in self.keys:
is_single_image = False
if isinstance(results[key], np.ndarray):
is_single_image = True
results[key] = [results[key]]
results[key] = [self._resize(img) for img in results[key]]
if is_single_image:
results[key] = results[key][0]
results['scale'] = self.scale
results['output_shape'] = self.output_shape
return results
[docs] def __repr__(self):
repr_str = self.__class__.__name__
repr_str += (
f'(keys={self.keys}, scale={self.scale}, '
f'output_shape={self.output_shape}, '
f'kernel={self.kernel}, kernel_width={self.kernel_width})')
return repr_str