Shortcuts

Source code for mmagic.models.data_preprocessors.mattor_preprocessor

# Copyright (c) OpenMMLab. All rights reserved.

from logging import WARNING
from typing import Dict, List, Optional, Sequence, Tuple, Union

import torch
from mmengine import print_log

from mmagic.registry import MODELS
from mmagic.structures import DataSample
from mmagic.utils.typing import SampleList
from .data_preprocessor import DataPreprocessor

[docs]DataSamples = Optional[Union[list, torch.Tensor]]
[docs]ForwardResults = Union[Dict[str, torch.Tensor], List[DataSample], Tuple[torch.Tensor], torch.Tensor]
[docs]MEAN_STD_TYPE = Union[Sequence[Union[float, int]], float, int]
@MODELS.register_module()
[docs]class MattorPreprocessor(DataPreprocessor): """DataPreprocessor for matting models. See base class ``DataPreprocessor`` for detailed information. Workflow as follow : - Collate and move data to the target device. - Convert inputs from bgr to rgb if the shape of input is (3, H, W). - Normalize image with defined std and mean. - Stack inputs to batch_inputs. Args: mean (Sequence[float or int], float or int, optional): The pixel mean of image channels. Noted that normalization operation is performed *after channel order conversion*. If it is not specified, images will not be normalized. Defaults None. std (Sequence[float or int], float or int, optional): The pixel standard deviation of image channels. Noted that normalization operation is performed *after channel order conversion*. If it is not specified, images will not be normalized. Defaults None. proc_trimap (str): Methods to process gt tensors. Default: 'rescale_to_zero_one'. Available options are ``rescale_to_zero_one`` and ``as-is``. stack_data_sample (bool): Whether stack a list of data samples to one data sample. Only support with input data samples are `DataSamples`. Defaults to True. """ def __init__(self, mean: MEAN_STD_TYPE = [123.675, 116.28, 103.53], std: MEAN_STD_TYPE = [58.395, 57.12, 57.375], output_channel_order: str = 'RGB', proc_trimap: str = 'rescale_to_zero_one', stack_data_sample=True): # specific data_keys for matting task data_keys = ['gt_fg', 'gt_bg', 'gt_merged', 'gt_alpha'] super().__init__( mean, std, output_channel_order=output_channel_order, data_keys=data_keys, stack_data_sample=stack_data_sample) self.proc_trimap = proc_trimap # self.proc_gt = proc_gt
[docs] def _proc_batch_trimap(self, batch_trimaps: torch.Tensor): if self.proc_trimap == 'rescale_to_zero_one': batch_trimaps = batch_trimaps / 255.0 # uint8->float32 elif self.proc_trimap == 'as_is': batch_trimaps = batch_trimaps.to(torch.float32) else: raise ValueError( f'proc_trimap = {self.proc_trimap} is not supported.') return batch_trimaps
[docs] def _preprocess_data_sample(self, data_samples: SampleList, training: bool) -> list: """Preprocess data samples. When `training` is True, fields belong to :attr:`self.data_keys` will be converted to :attr:`self.output_channel_order` and *divided by 255*. When `training` is False, fields belongs to :attr:`self.data_keys` will be attempted to convert to 'BGR' without normalization. The corresponding metainfo related to normalization, channel order conversion will be updated to data sample as well. Args: data_samples (List[DataSample]): A list of data samples to preprocess. training (bool): Whether in training mode. Returns: list: The list of processed data samples. """ if not training: # set default order to BGR in test stage target_order = 'BGR' else: # conversion as default (None) target_order = self.output_channel_order for data_sample in data_samples: for key in self.data_keys: if not hasattr(data_sample, key): # do not raise error here if key != 'gt_fg' and not training: # gt_fg is not required in test stage, therefore do # not print log print_log(f'Cannot find key \'{key}\' in data sample.', 'current', WARNING) break data = data_sample.get(key) data_channel_order = self._parse_channel_order( key, data, data_sample) data, channel_order = self._do_conversion( data, data_channel_order, target_order) if training: data = data / 255. # NOTE: divided by 255 data_sample.set_data({f'{key}': data}) data_process_meta = { f'{key}_enable_norm': self._enable_normalize, f'{key}_output_channel_order': channel_order, f'{key}_mean': self.mean, f'{key}_std': self.std } data_sample.set_metainfo(data_process_meta) if self.stack_data_sample: return DataSample.stack(data_samples) return data_samples
[docs] def forward(self, data: Sequence[dict], training: bool = False) -> Tuple[torch.Tensor, list]: """Pre-process input images, trimaps, ground-truth as configured. Args: data (Sequence[dict]): data sampled from dataloader. training (bool): Whether to enable training time augmentation. Default: False. Returns: Tuple[torch.Tensor, list]: Batched inputs and list of data samples. """ if not training: # Image may of different size when testing assert len(data['data_samples']) == 1, ( 'only batch_size=1 is supported for testing.') data = super().forward(data, training=training) batch_images = data['inputs'] batch_trimaps = data['data_samples'].trimap batch_trimaps = self._proc_batch_trimap(batch_trimaps) # Stack image and trimap along channel dimension # All existing models do concat at the start of forwarding # and data_sample is a very complex data structure # so this is a simple work-around to make codes simpler # print(f"batch_trimap.dtype = {batch_trimap.dtype}") assert batch_images.ndim == batch_trimaps.ndim == 4 assert batch_images.shape[-2:] == batch_trimaps.shape[-2:], ( 'Expect merged.shape[-2:] == trimap.shape[-2:], ' f'but got {batch_images.shape[-2:]} vs {batch_trimaps.shape[-2:]}') # N, (4/6), H, W batch_inputs = torch.cat((batch_images, batch_trimaps), dim=1) data['inputs'] = batch_inputs return data
Read the Docs v: latest
Versions
latest
stable
0.x
Downloads
pdf
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.