Source code for mmagic.models.editors.cain.cain_net
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmengine.model import BaseModule
from mmagic.models.archs import pixel_unshuffle
from mmagic.models.utils import make_layer
from mmagic.registry import MODELS
@MODELS.register_module()
[docs]class CAINNet(BaseModule):
"""CAIN network structure.
Paper: Channel Attention Is All You Need for Video Frame Interpolation.
Ref repo: https://github.com/myungsub/CAIN
Args:
in_channels (int): Channel number of inputs. Default: 3.
kernel_size (int): Kernel size of CAINNet. Default: 3.
num_block_groups (int): Number of block groups. Default: 5.
num_block_layers (int): Number of blocks in a group. Default: 12.
depth (int): Down scale depth, scale = 2**depth. Default: 3.
reduction (int): Channel reduction of CA. Default: 16.
norm (str | None): Normalization layer. If it is None, no
normalization is performed. Default: None.
padding (int): Padding of CAINNet. Default: 7.
act (function): activate function. Default: nn.LeakyReLU(0.2, True).
init_cfg (dict, optional): Initialization config dict. Default: None.
"""
def __init__(self,
in_channels=3,
kernel_size=3,
num_block_groups=5,
num_block_layers=12,
depth=3,
reduction=16,
norm=None,
padding=7,
act=nn.LeakyReLU(0.2, True),
init_cfg=None):
super().__init__(init_cfg=init_cfg)
mid_channels = in_channels * (4**depth)
self.scale = 2**depth
self.padding = padding
self.conv_first = nn.Conv2d(mid_channels * 2, mid_channels,
kernel_size, 1, 1)
self.body = make_layer(
ResidualGroup,
num_block_groups,
block_layer=ResidualChannelAttention,
num_block_layers=num_block_layers,
mid_channels=mid_channels,
kernel_size=kernel_size,
reduction=reduction,
norm=norm,
act=act)
self.conv_last = nn.Conv2d(mid_channels, mid_channels, kernel_size, 1,
1)
[docs] def forward(self, imgs, padding_flag=False):
"""Forward function.
Args:
imgs (Tensor): Input tensor with shape (n, 2, c, h, w).
padding_flag (bool): Padding or not. Default: False.
Returns:
Tensor: Forward results.
"""
assert imgs.shape[1] == 2
x1, x2 = imgs[:, 0], imgs[:, 1]
mean1 = x1.mean((2, 3), keepdim=True)
mean2 = x2.mean((2, 3), keepdim=True)
x1 -= mean1
x2 -= mean2
if padding_flag:
padding_function, depadding_function = get_padding_functions(
x1, self.padding)
x1 = padding_function(x1)
x2 = padding_function(x2)
x1 = pixel_unshuffle(x1, self.scale)
x2 = pixel_unshuffle(x2, self.scale)
x = torch.cat([x1, x2], dim=1)
x = self.conv_first(x)
res = self.body(x)
res += x
x = self.conv_last(res)
x = F.pixel_shuffle(x, self.scale)
if padding_flag:
x = depadding_function(x)
x += (mean1 + mean2) / 2
return x
[docs]def get_padding_functions(x, padding=7):
"""Generate padding function for CAIN.
This function produces two functions to pad and depad a tensor, given the
number of pixels to be padded. When applying padding and depadding
sequentially, the original tensor is obtained.
The generated padding function will pad the given tensor to the 'padding'
power of 2, i.e., pow(2, 'padding').
tensor --padding_function--> padded tensor
padded tensor --depadding_function--> original tensor
Args:
x (Tensor): Input tensor.
padding (int): Padding size. Default: 7.
Returns:
padding_function (Function): Padding function.
depadding_function (Function): Depadding function.
"""
h, w = x.shape[-2:]
padding_width, padding_height = 0, 0
if w != ((w >> padding) << padding):
padding_width = (((w >> padding) + 1) << padding) - w
if h != ((h >> padding) << padding):
padding_height = (((h >> padding) + 1) << padding) - h
left, right = padding_width // 2, padding_width - padding_width // 2
up, down = padding_height // 2, padding_height - padding_height // 2
if down >= h or right >= w:
function = nn.ReplicationPad2d
else:
function = nn.ReflectionPad2d
padding_function = function(padding=[left, right, up, down])
depadding_function = function(
padding=[0 - left, 0 - right, 0 - up, 0 - down])
return padding_function, depadding_function
[docs]class ConvNormWithReflectionPad(BaseModule):
"""Apply reflection padding, followed by a convolution, which can be
followed by an optional normalization.
Args:
in_channels (int): Channel number of input features.
out_channels (int): Channel number of output features.
kernel_size (int): Kernel size of convolution layer.
norm (str | None): Normalization layer. If it is None, no
normalization is performed. Default: None.
"""
def __init__(self, in_channels, out_channels, kernel_size, norm=None):
super().__init__()
self.reflection_pad = nn.ReflectionPad2d(kernel_size // 2)
self.conv = nn.Conv2d(
in_channels, out_channels, kernel_size=kernel_size, bias=True)
if norm is None:
self.norm = None
elif norm.lower() == 'in':
self.norm = nn.InstanceNorm2d(
out_channels, track_running_stats=True)
elif norm.lower() == 'bn':
self.norm = nn.BatchNorm2d(out_channels)
else:
raise ValueError(f"Invalid value for 'norm': {norm}")
[docs] def forward(self, x):
"""Forward function for ConvNormWithReflectionPad.
Args:
x (Tensor): Input tensor with shape (n, c, h, w).
Returns:
Tensor: Output tensor with shape (n, c, h, w).
"""
out = self.reflection_pad(x)
out = self.conv(out)
if self.norm:
out = self.norm(out)
return out
[docs]class ChannelAttentionLayer(BaseModule):
"""Channel Attention (CA) Layer.
Args:
mid_channels (int): Channel number of the intermediate features.
reduction (int): Channel reduction of CA. Default: 16.
"""
def __init__(self, mid_channels, reduction=16):
super().__init__()
# global average pooling: (n, c, h, w) --> (n, c, 1, 1)
self.avg_pool = nn.AdaptiveAvgPool2d(1)
# channel reduction.
self.channel_attention = nn.Sequential(
nn.Conv2d(
mid_channels,
mid_channels // reduction,
1,
padding=0,
bias=True), nn.ReLU(inplace=True),
nn.Conv2d(
mid_channels // reduction,
mid_channels,
1,
padding=0,
bias=True), nn.Sigmoid())
[docs] def forward(self, x):
"""Forward function for ChannelAttentionLayer.
Args:
x (Tensor): Input tensor with shape (n, c, h, w).
Returns:
Tensor: Output tensor with shape (n, c, h, w).
"""
y = self.avg_pool(x)
y = self.channel_attention(y)
return x * y
[docs]class ResidualChannelAttention(BaseModule):
"""Residual Channel Attention Module.
Args:
mid_channels (int): Channel number of the intermediate features.
kernel_size (int): Kernel size of convolution layers. Default: 3.
reduction (int): Channel reduction. Default: 16.
norm (None | function): Norm layer. If None, no norm layer.
Default: None.
act (function): activation function. Default: nn.LeakyReLU(0.2, True).
"""
def __init__(self,
mid_channels,
kernel_size=3,
reduction=16,
norm=None,
act=nn.LeakyReLU(0.2, True)):
super().__init__()
self.body = nn.Sequential(
ConvNormWithReflectionPad(
mid_channels, mid_channels, kernel_size, norm=norm), act,
ConvNormWithReflectionPad(
mid_channels, mid_channels, kernel_size, norm=norm),
ChannelAttentionLayer(mid_channels, reduction))
[docs] def forward(self, x):
"""Forward function for ResidualChannelAttention.
Args:
x (Tensor): Input tensor with shape (n, c, h, w).
Returns:
Tensor: Output tensor with shape (n, c, h, w).
"""
out = self.body(x)
return out + x
[docs]class ResidualGroup(BaseModule):
"""Residual Group, consisting of a stack of residual channel attention,
followed by a convolution.
Args:
block_layer (nn.Module): nn.Module class for basic block.
num_block_layers (int): number of blocks.
mid_channels (int): Channel number of the intermediate features.
kernel_size (int): Kernel size of ResidualGroup.
reduction (int): Channel reduction of CA. Default: 16.
act (function): activation function. Default: nn.LeakyReLU(0.2, True).
norm (str | None): Normalization layer. If it is None, no
normalization is performed. Default: None.
"""
def __init__(self,
block_layer,
num_block_layers,
mid_channels,
kernel_size,
reduction,
act=nn.LeakyReLU(0.2, True),
norm=None):
super().__init__()
self.body = make_layer(
block_layer,
num_block_layers,
mid_channels=mid_channels,
kernel_size=kernel_size,
reduction=reduction,
norm=norm,
act=act)
self.conv_after_body = ConvNormWithReflectionPad(
mid_channels, mid_channels, kernel_size, norm=norm)
[docs] def forward(self, x):
"""Forward function for ResidualGroup.
Args:
x (Tensor): Input tensor with shape (n, c, h, w).
Returns:
Tensor: Output tensor with shape (n, c, h, w).
"""
y = self.body(x)
y = self.conv_after_body(y)
return x + y