Shortcuts

Source code for mmagic.models.editors.stylegan1.stylegan1_modules

# Copyright (c) OpenMMLab. All rights reserved.
from copy import deepcopy
from functools import partial

import mmengine
import torch
import torch.nn as nn
from mmcv.ops.fused_bias_leakyrelu import fused_bias_leakyrelu
from mmcv.ops.upfirdn2d import upfirdn2d
from mmengine.model import BaseModule

from mmagic.registry import MODELS
from ..pggan import (EqualizedLRConvModule, EqualizedLRConvUpModule,
                     EqualizedLRLinearModule)


[docs]class EqualLinearActModule(BaseModule): """Equalized LR Linear Module with Activation Layer. This module is modified from ``EqualizedLRLinearModule`` defined in PGGAN. The major features updated in this module is adding support for activation layers used in StyleGAN2. Args: equalized_lr_cfg (dict | None, optional): Config for equalized lr. Defaults to dict(gain=1., lr_mul=1.). bias (bool, optional): Whether to use bias item. Defaults to True. bias_init (float, optional): The value for bias initialization. Defaults to ``0.``. act_cfg (dict | None, optional): Config for activation layer. Defaults to None. """ def __init__(self, *args, equalized_lr_cfg=dict(gain=1., lr_mul=1.), bias=True, bias_init=0., act_cfg=None, **kwargs): super().__init__() self.with_activation = act_cfg is not None # w/o bias in linear layer self.linear = EqualizedLRLinearModule( *args, bias=False, equalized_lr_cfg=equalized_lr_cfg, **kwargs) if equalized_lr_cfg is not None: self.lr_mul = equalized_lr_cfg.get('lr_mul', 1.) else: self.lr_mul = 1. # define bias outside linear layer if bias: self.bias = nn.Parameter( torch.zeros(self.linear.out_features).fill_(bias_init)) else: self.bias = None if self.with_activation: act_cfg = deepcopy(act_cfg) if act_cfg['type'] == 'fused_bias': self.act_type = act_cfg.pop('type') assert self.bias is not None self.activate = partial(fused_bias_leakyrelu, **act_cfg) else: self.act_type = 'normal' self.activate = MODELS.build(act_cfg) else: self.act_type = None
[docs] def forward(self, x): """Forward function. Args: x (Tensor): Input feature map with shape of (N, C, ...). Returns: Tensor: Output feature map. """ if x.ndim >= 3: x = x.reshape(x.size(0), -1) x = self.linear(x) if self.with_activation and self.act_type == 'fused_bias': x = self.activate(x, self.bias * self.lr_mul) elif self.bias is not None and self.with_activation: x = self.activate(x + self.bias * self.lr_mul) elif self.bias is not None: x = x + self.bias * self.lr_mul elif self.with_activation: x = self.activate(x) return x
[docs]class NoiseInjection(BaseModule): """Noise Injection Module. In StyleGAN2, they adopt this module to inject spatial random noise map in the generators. Args: noise_weight_init (float, optional): Initialization weight for noise injection. Defaults to ``0.``. fixed_noise (bool, optional): Whether to inject a fixed noise. Defaults to ``False``. """ def __init__(self, noise_weight_init=0., fixed_noise=False): super().__init__() self.weight = nn.Parameter(torch.zeros(1).fill_(noise_weight_init)) self.fixed_noise = fixed_noise
[docs] def forward(self, image, noise=None, return_noise=False): """Forward Function. Args: image (Tensor): Spatial features with a shape of (N, C, H, W). noise (Tensor, optional): Noises from the outside. Defaults to None. return_noise (bool, optional): Whether to return noise tensor. Defaults to False. Returns: Tensor: Output features. """ if noise is None: batch, _, height, width = image.shape noise = image.new_empty(batch, 1, height, width).normal_() if self.fixed_noise: torch.manual_seed(1024) noise = torch.randn(batch, 1, height, width).cuda() noise = noise.to(image.dtype) if return_noise: return image + self.weight.to(image.dtype) * noise, noise return image + self.weight.to(image.dtype) * noise
[docs]class ConstantInput(BaseModule): """Constant Input. In StyleGAN2, they substitute the original head noise input with such a constant input module. Args: channel (int): Channels for the constant input tensor. size (int, optional): Spatial size for the constant input. Defaults to 4. """ def __init__(self, channel, size=4): super().__init__() if isinstance(size, int): size = [size, size] elif mmengine.is_seq_of(size, int): assert len( size ) == 2, f'The length of size should be 2 but got {len(size)}' else: raise ValueError(f'Got invalid value in size, {size}') self.input = nn.Parameter(torch.randn(1, channel, *size))
[docs] def forward(self, x): """Forward function. Args: x (Tensor): Input feature map with shape of (N, C, ...). Returns: Tensor: Output feature map. """ batch = x.shape[0] out = self.input.repeat(batch, 1, 1, 1) return out
[docs]def make_kernel(k): k = torch.tensor(k, dtype=torch.float32) if k.ndim == 1: k = k[None, :] * k[:, None] k /= k.sum() return k
[docs]class Blur(BaseModule): """Blur module. This module is adopted rightly after upsampling operation in StyleGAN2. Args: kernel (Array): Blur kernel/filter used in UpFIRDn. pad (list[int]): Padding for features. upsample_factor (int, optional): Upsampling factor. Defaults to 1. """ def __init__(self, kernel, pad, upsample_factor=1): super().__init__() kernel = make_kernel(kernel) if upsample_factor > 1: kernel = kernel * (upsample_factor**2) self.register_buffer('kernel', kernel) self.pad = pad
[docs] def forward(self, x): """Forward function. Args: x (Tensor): Input feature map with shape of (N, C, H, W). Returns: Tensor: Output feature map. """ # In Tero's implementation, he uses fp32 return upfirdn2d(x, self.kernel.to(x.dtype), padding=self.pad)
[docs]class AdaptiveInstanceNorm(BaseModule): r"""Adaptive Instance Normalization Module. Ref: https://github.com/rosinality/style-based-gan-pytorch/blob/master/model.py # noqa Args: in_channel (int): The number of input's channel. style_dim (int): Style latent dimension. """ def __init__(self, in_channel, style_dim): super().__init__() self.norm = nn.InstanceNorm2d(in_channel) self.affine = EqualizedLRLinearModule(style_dim, in_channel * 2) self.affine.bias.data[:in_channel] = 1 self.affine.bias.data[in_channel:] = 0
[docs] def forward(self, input, style): """Forward function. Args: input (Tensor): Input tensor with shape (n, c, h, w). style (Tensor): Input style tensor with shape (n, c). Returns: Tensor: Forward results. """ style = self.affine(style).unsqueeze(2).unsqueeze(3) gamma, beta = style.chunk(2, 1) out = self.norm(input) out = gamma * out + beta return out
[docs]class StyleConv(BaseModule): def __init__(self, in_channels, out_channels, kernel_size, style_channels, padding=1, initial=False, blur_kernel=[1, 2, 1], upsample=False, fused=False): """Convolutional style blocks composing of noise injector, AdaIN module and convolution layers. Args: in_channels (int): The channel number of the input tensor. out_channels (itn): The channel number of the output tensor. kernel_size (int): The kernel size of convolution layers. style_channels (int): The number of channels for style code. padding (int, optional): Padding of convolution layers. Defaults to 1. initial (bool, optional): Whether this is the first StyleConv of StyleGAN's generator. Defaults to False. blur_kernel (list, optional): The blurry kernel. Defaults to [1, 2, 1]. upsample (bool, optional): Whether perform upsampling. Defaults to False. fused (bool, optional): Whether use fused upconv. Defaults to False. """ super().__init__() if initial: self.conv1 = ConstantInput(in_channels) else: if upsample: if fused: self.conv1 = nn.Sequential( EqualizedLRConvUpModule( in_channels, out_channels, kernel_size, padding=padding, act_cfg=dict(type='LeakyReLU', negative_slope=0.2)), Blur(blur_kernel, pad=(1, 1)), ) else: self.conv1 = nn.Sequential( nn.Upsample(scale_factor=2, mode='nearest'), EqualizedLRConvModule( in_channels, out_channels, kernel_size, padding=padding, act_cfg=None), Blur(blur_kernel, pad=(1, 1))) else: self.conv1 = EqualizedLRConvModule( in_channels, out_channels, kernel_size, padding=padding, act_cfg=None) self.noise_injector1 = NoiseInjection() self.activate1 = nn.LeakyReLU(0.2) self.adain1 = AdaptiveInstanceNorm(out_channels, style_channels) self.conv2 = EqualizedLRConvModule( out_channels, out_channels, kernel_size, padding=padding, act_cfg=None) self.noise_injector2 = NoiseInjection() self.activate2 = nn.LeakyReLU(0.2) self.adain2 = AdaptiveInstanceNorm(out_channels, style_channels)
[docs] def forward(self, x, style1, style2, noise1=None, noise2=None, return_noise=False): """Forward function. Args: x (Tensor): Input tensor. style1 (Tensor): Input style tensor with shape (n, c). style2 (Tensor): Input style tensor with shape (n, c). noise1 (Tensor, optional): Noise tensor with shape (n, c, h, w). Defaults to None. noise2 (Tensor, optional): Noise tensor with shape (n, c, h, w). Defaults to None. return_noise (bool, optional): If True, ``noise1`` and ``noise2`` will be returned with ``out``. Defaults to False. Returns: Tensor | tuple[Tensor]: Forward results. """ out = self.conv1(x) if return_noise: out, noise1 = self.noise_injector1( out, noise=noise1, return_noise=return_noise) else: out = self.noise_injector1( out, noise=noise1, return_noise=return_noise) out = self.activate1(out) out = self.adain1(out, style1) out = self.conv2(out) if return_noise: out, noise2 = self.noise_injector2( out, noise=noise2, return_noise=return_noise) else: out = self.noise_injector2( out, noise=noise2, return_noise=return_noise) out = self.activate2(out) out = self.adain2(out, style2) if return_noise: return out, noise1, noise2 return out