Shortcuts

Source code for mmagic.evaluation.evaluator

# Copyright (c) OpenMMLab. All rights reserved.
import hashlib
from collections import defaultdict
from typing import Any, Iterator, List, Optional, Sequence, Tuple, Union

from mmengine.evaluator import BaseMetric, Evaluator
from mmengine.model import BaseModel
from torch.utils.data.dataloader import DataLoader

from mmagic.registry import EVALUATORS
from mmagic.structures import DataSample
from .metrics.base_gen_metric import GenMetric


@EVALUATORS.register_module()
[docs]class Evaluator(Evaluator): """Evaluator for generative models. Unlike high-level vision tasks, metrics for generative models have various input types. For example, Inception Score (IS, :class:`~mmagic.evaluation.InceptionScore`) only needs to take fake images as input. However, Frechet Inception Distance (FID, :class:`~mmagic.evaluation.FrechetInceptionDistance`) needs to take both real images and fake images as input, and the numbers of real images and fake images can be set arbitrarily. For Perceptual path length (PPL, :class:`~mmagic.evaluation.PerceptualPathLength`), generator need to sample images along a latent path. In order to be compatible with different metrics, we designed two critical functions, :meth:`prepare_metrics` and :meth:`prepare_samplers` to support those requirements. - :meth:`prepare_metrics` set the image images' color order and pass the dataloader to all metrics. Therefore metrics need pre-processing to prepare the corresponding feature. - :meth:`prepare_samplers` pass the dataloader and model to the metrics, and get the corresponding sampler of each kind of metrics. Metrics with same sample mode can share the sampler. The whole evaluation process can be found in :meth:`mmagic.engine.runner.MultiValLoop.run` and :meth:`mmagic.engine.runner.MultiTestLoop.run`. Args: metrics (dict or BaseMetric or Sequence): The config of metrics. """ def __init__(self, metrics: Union[dict, BaseMetric, Sequence]): if metrics is not None: super().__init__(metrics) else: self.metrics = None self.is_ready = False
[docs] def prepare_metrics(self, module: BaseModel, dataloader: DataLoader): """Prepare for metrics before evaluation starts. Some metrics use pretrained model to extract feature. Some metrics use pretrained model to extract feature and input channel order may vary among those models. Therefore, we first parse the output color order from data preprocessor and set the color order for each metric. Then we pass the dataloader to each metrics to prepare pre-calculated items. (e.g. inception feature of the real images). If metric has no pre-calculated items, :meth:`metric.prepare` will be ignored. Once the function has been called, :attr:`self.is_ready` will be set as `True`. If :attr:`self.is_ready` is `True`, this function will directly return to avoid duplicate computation. Args: module (BaseModel): Model to evaluate. dataloader (DataLoader): The dataloader for real images. """ if self.metrics is None: self.is_ready = True return if self.is_ready: return # prepare metrics for metric in self.metrics: metric.prepare(module, dataloader) self.is_ready = True
@staticmethod
[docs] def _cal_metric_hash(metric: GenMetric): """Calculate a unique hash value based on the `SAMPLER_MODE` and `sample_model`.""" sampler_mode = metric.SAMPLER_MODE sample_model = metric.sample_model metric_dict = { 'SAMPLER_MODE': sampler_mode, 'sample_model': sample_model } if hasattr(metric, 'need_cond_input'): metric_dict['need_cond_input'] = metric.need_cond_input md5 = hashlib.md5(repr(metric_dict).encode('utf-8')).hexdigest() return md5
[docs] def prepare_samplers(self, module: BaseModel, dataloader: DataLoader ) -> List[Tuple[List[BaseMetric], Iterator]]: """Prepare for the sampler for metrics whose sampling mode are different. For generative models, different metric need image generated with different inputs. For example, FID, KID and IS need images generated with random noise, and PPL need paired images on the specific noise interpolation path. Therefore, we first group metrics with respect to their sampler's mode (refers to :attr:~`GenMetrics.SAMPLER_MODE`), and build a shared sampler for each metric group. To be noted that, the length of the shared sampler depends on the metric of the most images required in each group. Args: module (BaseModel): Model to evaluate. Some metrics (e.g. PPL) require `module` in their sampler. dataloader (DataLoader): The dataloader for real image. Returns: List[Tuple[List[BaseMetric], Iterator]]: A list of "metrics-shared sampler" pair. """ if self.metrics is None: return [[[None], []]] # grouping metrics based on `SAMPLER_MODE` and `sample_mode` metric_mode_dict = defaultdict(list) for metric in self.metrics: metric_md5 = self._cal_metric_hash(metric) metric_mode_dict[metric_md5].append(metric) metrics_sampler_list = [] for metrics in metric_mode_dict.values(): first_metric = metrics[0] metrics_sampler_list.append([ metrics, first_metric.get_metric_sampler(module, dataloader, metrics) ]) return metrics_sampler_list
[docs] def process(self, data_samples: Sequence[DataSample], data_batch: Optional[Any], metrics: Sequence[BaseMetric]) -> None: """Pass `data_batch` from dataloader and `predictions` (generated results) to corresponding `metrics`. Args: data_samples (Sequence[DataSample]): A batch of generated results from model. data_batch (Optional[Any]): A batch of data from the metrics specific sampler or the dataloader. metrics (Optional[Sequence[BaseMetric]]): Metrics to evaluate. """ if self.metrics is None: return _data_samples = [] for data_sample in data_samples: if isinstance(data_sample, DataSample): _data_samples.append(data_sample.to_dict()) else: _data_samples.append(data_sample) # feed to the specifics metrics for metric in metrics: metric.process(data_batch, _data_samples)
[docs] def evaluate(self) -> dict: """Invoke ``evaluate`` method of each metric and collect the metrics dictionary. Different from `Evaluator.evaluate`, this function does not take `size` as input, and elements in `self.metrics` will call their own `evaluate` method to calculate the metric. Returns: dict: Evaluation results of all metrics. The keys are the names of the metrics, and the values are corresponding results. """ if self.metrics is None: return {'No Metric': 'Nan'} metrics = {} for metric in self.metrics: _results = metric.evaluate() # Check metric name conflicts for name in _results.keys(): if name in metrics: raise ValueError( 'There are multiple evaluation results with the same ' f'metric name {name}. Please make sure all metrics ' 'have different prefixes.') metrics.update(_results) return metrics
Read the Docs v: latest
Versions
latest
stable
0.x
Downloads
pdf
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.