Source code for mmagic.models.editors.dic.feedback_hour_glass

# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from mmengine.model import BaseModule

from mmagic.registry import MODELS


@MODELS.register_module()
[docs]class FeedbackHourglass(BaseModule):
    """Feedback Hourglass model for face landmark.

    It has a style of:

    ::

        -- preprocessing ----- Hourglass ----->
                           ^               |
                           |_______________|

    Args:
        mid_channels (int): Number of channels in the intermediate features.
        num_keypoints (int): Number of keypoints.
    """

    def __init__(self, mid_channels, num_keypoints):
        super().__init__()
        self.mid_channels = mid_channels
        self.num_keypoints = num_keypoints

        self.pre_conv_block = nn.Sequential(
            nn.Conv2d(3, self.mid_channels // 4, 7, 2, 3),
            nn.ReLU(inplace=True),
            ResBlock(self.mid_channels // 4, self.mid_channels // 2),
            nn.MaxPool2d(2, 2),
            ResBlock(self.mid_channels // 2, self.mid_channels // 2),
            ResBlock(self.mid_channels // 2, self.mid_channels),
        )
        self.first_conv = nn.Conv2d(2 * self.mid_channels,
                                    2 * self.mid_channels, 1)

        self.hg = Hourglass(4, 2 * self.mid_channels)
        self.last = nn.Sequential(
            ResBlock(self.mid_channels, self.mid_channels),
            nn.Conv2d(self.mid_channels, self.mid_channels, 1),
            nn.ReLU(inplace=True),
            nn.Conv2d(self.mid_channels, self.num_keypoints, 1))

[docs]    def forward(self, x, last_hidden=None):
        """Forward function.

        Args:
            x (Tensor): Input tensor with shape (n, c, h, w).
            last_hidden (Tensor | None): The feedback of FeedbackHourglass.
                In first step, last_hidden=None. Otherwise, last_hidden is
                the past output of FeedbackHourglass.
                Default: None.

        Returns:
            heatmap (Tensor): Heatmap of facial landmark.
            feedback (Tensor): Feedback Tensor.
        """

        feature = self.pre_conv_block(x)
        if last_hidden is None:
            feature = self.first_conv(torch.cat((feature, feature), dim=1))
        else:
            feature = self.first_conv(torch.cat((feature, last_hidden), dim=1))
        feature = self.hg(feature)
        heatmap = self.last(feature[:, :self.mid_channels])  # first half
        feedback = feature[:, self.mid_channels:]  # second half
        return heatmap, feedback


[docs]class ResBlock(nn.Module):
    """ResBlock for Hourglass.

    It has a style of:

    ::

        ---Conv-ReLU-Conv-Conv-+-
         |_________Conv________|

        or

        ---Conv-ReLU-Conv-Conv-+-
         |_____________________|

    Args:
        in_channels (int): Number of channels in the input features.
        out_channels (int): Number of channels in the output features.
    """

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv_block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels // 2, 1),
            nn.ReLU(inplace=True),
            nn.Conv2d(
                out_channels // 2, out_channels // 2, 3, stride=1, padding=1),
            nn.Conv2d(out_channels // 2, out_channels, 1))
        if in_channels == out_channels:
            self.skip_layer = None
        else:
            self.skip_layer = nn.Conv2d(in_channels, out_channels, 1)

[docs]    def forward(self, x):
        """Forward function.

        Args:
            x (Tensor): Input tensor with shape (n, c, h, w).

        Returns:
            Tensor: Forward results.
        """

        residual = self.conv_block(x)
        if self.skip_layer:
            x = self.skip_layer(x)
        return x + residual


[docs]class Hourglass(nn.Module):
    """Hourglass model for face landmark.

    It is a recursive model.

    Args:
        depth (int): Depth of Hourglass, the number of recursions.
        mid_channels (int): Number of channels in the intermediate features.
    """

    def __init__(self, depth, mid_channels):
        super().__init__()
        self.up1 = ResBlock(mid_channels, mid_channels)
        self.pool = nn.MaxPool2d(2, 2)
        self.low1 = ResBlock(mid_channels, mid_channels)
        if depth == 1:
            self.low2 = ResBlock(mid_channels, mid_channels)
        else:
            self.low2 = Hourglass(depth - 1, mid_channels)
        self.low3 = ResBlock(mid_channels, mid_channels)

[docs]    def forward(self, x):
        """Forward function.

        Args:
            x (Tensor): Input tensor with shape (n, c, h, w).

        Returns:
            Tensor: Forward results.
        """

        up1 = self.up1(x)
        low1 = self.low1(self.pool(x))
        low2 = self.low2(low1)
        low3 = self.low3(low2)
        up2 = nn.functional.interpolate(
            low3, scale_factor=2, mode='bilinear', align_corners=True)
        return up1 + up2


[docs]def reduce_to_five_heatmaps(ori_heatmap, detach):
    """Reduce facial landmark heatmaps to 5 heatmaps.

    DIC realizes facial SR with the help of key points of the face.
    The number of key points in datasets are different from each other.
    This function reduces the input heatmaps into 5 heatmaps:
        left eye
        right eye
        nose
        mouse
        face silhouette

    Args:
        ori_heatmap (Tensor): Input heatmap tensor. (B, N, 32, 32).
        detach (bool): Detached from the current tensor or not.

    returns:
        Tensor: New heatmap tensor. (B, 5, 32, 32).
    """

    heatmap = ori_heatmap.clone()
    max_heat = heatmap.max(dim=2, keepdim=True)[0].max(dim=3, keepdim=True)[0]
    max_heat = max_heat.clamp_min_(0.05)
    heatmap /= max_heat
    if heatmap.size(1) == 5:
        return heatmap.detach() if detach else heatmap
    elif heatmap.size(1) == 68:
        new_heatmap = torch.zeros_like(heatmap[:, :5])
        new_heatmap[:, 0] = heatmap[:, 36:42].sum(1)  # left eye
        new_heatmap[:, 1] = heatmap[:, 42:48].sum(1)  # right eye
        new_heatmap[:, 2] = heatmap[:, 27:36].sum(1)  # nose
        new_heatmap[:, 3] = heatmap[:, 48:68].sum(1)  # mouse
        new_heatmap[:, 4] = heatmap[:, :27].sum(1)  # face silhouette
        return new_heatmap.detach() if detach else new_heatmap
    elif heatmap.size(1) == 194:  # Helen
        new_heatmap = torch.zeros_like(heatmap[:, :5])
        tmp_id = torch.cat((torch.arange(134, 153), torch.arange(174, 193)))
        new_heatmap[:, 0] = heatmap[:, tmp_id].sum(1)  # left eye
        tmp_id = torch.cat((torch.arange(114, 133), torch.arange(154, 173)))
        new_heatmap[:, 1] = heatmap[:, tmp_id].sum(1)  # right eye
        tmp_id = torch.arange(41, 57)
        new_heatmap[:, 2] = heatmap[:, tmp_id].sum(1)  # nose
        tmp_id = torch.arange(58, 113)
        new_heatmap[:, 3] = heatmap[:, tmp_id].sum(1)  # mouse
        tmp_id = torch.arange(0, 40)
        new_heatmap[:, 4] = heatmap[:, tmp_id].sum(1)  # face silhouette
        return new_heatmap.detach() if detach else new_heatmap
    else:
        raise NotImplementedError(
            f'Face landmark number {heatmap.size(1)} not implemented!')