ComfyUI/comfy_extras/chainner_models/architecture/LaMa.py

# pylint: skip-file
"""
Model adapted from advimman's lama project: https://github.com/advimman/lama
"""

# Fast Fourier Convolution NeurIPS 2020
# original implementation https://github.com/pkumivision/FFC/blob/main/model_zoo/ffc.py
# paper https://proceedings.neurips.cc/paper/2020/file/2fd5d41ec6cfab47e32164d5624269b1-Paper.pdf

from typing import List

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms.functional import InterpolationMode, rotate


class LearnableSpatialTransformWrapper(nn.Module):
    def __init__(self, impl, pad_coef=0.5, angle_init_range=80, train_angle=True):
        super().__init__()
        self.impl = impl
        self.angle = torch.rand(1) * angle_init_range
        if train_angle:
            self.angle = nn.Parameter(self.angle, requires_grad=True)
        self.pad_coef = pad_coef

    def forward(self, x):
        if torch.is_tensor(x):
            return self.inverse_transform(self.impl(self.transform(x)), x)
        elif isinstance(x, tuple):
            x_trans = tuple(self.transform(elem) for elem in x)
            y_trans = self.impl(x_trans)
            return tuple(
                self.inverse_transform(elem, orig_x) for elem, orig_x in zip(y_trans, x)
            )
        else:
            raise ValueError(f"Unexpected input type {type(x)}")

    def transform(self, x):
        height, width = x.shape[2:]
        pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef)
        x_padded = F.pad(x, [pad_w, pad_w, pad_h, pad_h], mode="reflect")
        x_padded_rotated = rotate(
            x_padded, self.angle.to(x_padded), InterpolationMode.BILINEAR, fill=0
        )

        return x_padded_rotated

    def inverse_transform(self, y_padded_rotated, orig_x):
        height, width = orig_x.shape[2:]
        pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef)

        y_padded = rotate(
            y_padded_rotated,
            -self.angle.to(y_padded_rotated),
            InterpolationMode.BILINEAR,
            fill=0,
        )
        y_height, y_width = y_padded.shape[2:]
        y = y_padded[:, :, pad_h : y_height - pad_h, pad_w : y_width - pad_w]
        return y


class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid(),
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        res = x * y.expand_as(x)
        return res


class FourierUnit(nn.Module):
    def __init__(
        self,
        in_channels,
        out_channels,
        groups=1,
        spatial_scale_factor=None,
        spatial_scale_mode="bilinear",
        spectral_pos_encoding=False,
        use_se=False,
        se_kwargs=None,
        ffc3d=False,
        fft_norm="ortho",
    ):
        # bn_layer not used
        super(FourierUnit, self).__init__()
        self.groups = groups

        self.conv_layer = torch.nn.Conv2d(
            in_channels=in_channels * 2 + (2 if spectral_pos_encoding else 0),
            out_channels=out_channels * 2,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=self.groups,
            bias=False,
        )
        self.bn = torch.nn.BatchNorm2d(out_channels * 2)
        self.relu = torch.nn.ReLU(inplace=True)

        # squeeze and excitation block
        self.use_se = use_se
        if use_se:
            if se_kwargs is None:
                se_kwargs = {}
            self.se = SELayer(self.conv_layer.in_channels, **se_kwargs)

        self.spatial_scale_factor = spatial_scale_factor
        self.spatial_scale_mode = spatial_scale_mode
        self.spectral_pos_encoding = spectral_pos_encoding
        self.ffc3d = ffc3d
        self.fft_norm = fft_norm

    def forward(self, x):
        half_check = False
        if x.type() == "torch.cuda.HalfTensor":
            # half only works on gpu anyway
            half_check = True

        batch = x.shape[0]

        if self.spatial_scale_factor is not None:
            orig_size = x.shape[-2:]
            x = F.interpolate(
                x,
                scale_factor=self.spatial_scale_factor,
                mode=self.spatial_scale_mode,
                align_corners=False,
            )

        # (batch, c, h, w/2+1, 2)
        fft_dim = (-3, -2, -1) if self.ffc3d else (-2, -1)
        if half_check == True:
            ffted = torch.fft.rfftn(
                x.float(), dim=fft_dim, norm=self.fft_norm
            )  # .type(torch.cuda.HalfTensor)
        else:
            ffted = torch.fft.rfftn(x, dim=fft_dim, norm=self.fft_norm)

        ffted = torch.stack((ffted.real, ffted.imag), dim=-1)
        ffted = ffted.permute(0, 1, 4, 2, 3).contiguous()  # (batch, c, 2, h, w/2+1)
        ffted = ffted.view(
            (
                batch,
                -1,
            )
            + ffted.size()[3:]
        )

        if self.spectral_pos_encoding:
            height, width = ffted.shape[-2:]
            coords_vert = (
                torch.linspace(0, 1, height)[None, None, :, None]
                .expand(batch, 1, height, width)
                .to(ffted)
            )
            coords_hor = (
                torch.linspace(0, 1, width)[None, None, None, :]
                .expand(batch, 1, height, width)
                .to(ffted)
            )
            ffted = torch.cat((coords_vert, coords_hor, ffted), dim=1)

        if self.use_se:
            ffted = self.se(ffted)

        if half_check == True:
            ffted = self.conv_layer(ffted.half())  # (batch, c*2, h, w/2+1)
        else:
            ffted = self.conv_layer(
                ffted
            )  # .type(torch.cuda.FloatTensor)  # (batch, c*2, h, w/2+1)

        ffted = self.relu(self.bn(ffted))
        # forcing to be always float
        ffted = ffted.float()

        ffted = (
            ffted.view(
                (
                    batch,
                    -1,
                    2,
                )
                + ffted.size()[2:]
            )
            .permute(0, 1, 3, 4, 2)
            .contiguous()
        )  # (batch,c, t, h, w/2+1, 2)

        ffted = torch.complex(ffted[..., 0], ffted[..., 1])

        ifft_shape_slice = x.shape[-3:] if self.ffc3d else x.shape[-2:]
        output = torch.fft.irfftn(
            ffted, s=ifft_shape_slice, dim=fft_dim, norm=self.fft_norm
        )

        if half_check == True:
            output = output.half()

        if self.spatial_scale_factor is not None:
            output = F.interpolate(
                output,
                size=orig_size,
                mode=self.spatial_scale_mode,
                align_corners=False,
            )

        return output


class SpectralTransform(nn.Module):
    def __init__(
        self,
        in_channels,
        out_channels,
        stride=1,
        groups=1,
        enable_lfu=True,
        separable_fu=False,
        **fu_kwargs,
    ):
        # bn_layer not used
        super(SpectralTransform, self).__init__()
        self.enable_lfu = enable_lfu
        if stride == 2:
            self.downsample = nn.AvgPool2d(kernel_size=(2, 2), stride=2)
        else:
            self.downsample = nn.Identity()

        self.stride = stride
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels, out_channels // 2, kernel_size=1, groups=groups, bias=False
            ),
            nn.BatchNorm2d(out_channels // 2),
            nn.ReLU(inplace=True),
        )
        fu_class = FourierUnit
        self.fu = fu_class(out_channels // 2, out_channels // 2, groups, **fu_kwargs)
        if self.enable_lfu:
            self.lfu = fu_class(out_channels // 2, out_channels // 2, groups)
        self.conv2 = torch.nn.Conv2d(
            out_channels // 2, out_channels, kernel_size=1, groups=groups, bias=False
        )

    def forward(self, x):
        x = self.downsample(x)
        x = self.conv1(x)
        output = self.fu(x)

        if self.enable_lfu:
            _, c, h, _ = x.shape
            split_no = 2
            split_s = h // split_no
            xs = torch.cat(
                torch.split(x[:, : c // 4], split_s, dim=-2), dim=1
            ).contiguous()
            xs = torch.cat(torch.split(xs, split_s, dim=-1), dim=1).contiguous()
            xs = self.lfu(xs)
            xs = xs.repeat(1, 1, split_no, split_no).contiguous()
        else:
            xs = 0

        output = self.conv2(x + output + xs)

        return output


class FFC(nn.Module):
    def __init__(
        self,
        in_channels,
        out_channels,
        kernel_size,
        ratio_gin,
        ratio_gout,
        stride=1,
        padding=0,
        dilation=1,
        groups=1,
        bias=False,
        enable_lfu=True,
        padding_type="reflect",
        gated=False,
        **spectral_kwargs,
    ):
        super(FFC, self).__init__()

        assert stride == 1 or stride == 2, "Stride should be 1 or 2."
        self.stride = stride

        in_cg = int(in_channels * ratio_gin)
        in_cl = in_channels - in_cg
        out_cg = int(out_channels * ratio_gout)
        out_cl = out_channels - out_cg
        # groups_g = 1 if groups == 1 else int(groups * ratio_gout)
        # groups_l = 1 if groups == 1 else groups - groups_g

        self.ratio_gin = ratio_gin
        self.ratio_gout = ratio_gout
        self.global_in_num = in_cg

        module = nn.Identity if in_cl == 0 or out_cl == 0 else nn.Conv2d
        self.convl2l = module(
            in_cl,
            out_cl,
            kernel_size,
            stride,
            padding,
            dilation,
            groups,
            bias,
            padding_mode=padding_type,
        )
        module = nn.Identity if in_cl == 0 or out_cg == 0 else nn.Conv2d
        self.convl2g = module(
            in_cl,
            out_cg,
            kernel_size,
            stride,
            padding,
            dilation,
            groups,
            bias,
            padding_mode=padding_type,
        )
        module = nn.Identity if in_cg == 0 or out_cl == 0 else nn.Conv2d
        self.convg2l = module(
            in_cg,
            out_cl,
            kernel_size,
            stride,
            padding,
            dilation,
            groups,
            bias,
            padding_mode=padding_type,
        )
        module = nn.Identity if in_cg == 0 or out_cg == 0 else SpectralTransform
        self.convg2g = module(
            in_cg,
            out_cg,
            stride,
            1 if groups == 1 else groups // 2,
            enable_lfu,
            **spectral_kwargs,
        )

        self.gated = gated
        module = (
            nn.Identity if in_cg == 0 or out_cl == 0 or not self.gated else nn.Conv2d
        )
        self.gate = module(in_channels, 2, 1)

    def forward(self, x):
        x_l, x_g = x if type(x) is tuple else (x, 0)
        out_xl, out_xg = 0, 0

        if self.gated:
            total_input_parts = [x_l]
            if torch.is_tensor(x_g):
                total_input_parts.append(x_g)
            total_input = torch.cat(total_input_parts, dim=1)

            gates = torch.sigmoid(self.gate(total_input))
            g2l_gate, l2g_gate = gates.chunk(2, dim=1)
        else:
            g2l_gate, l2g_gate = 1, 1

        if self.ratio_gout != 1:
            out_xl = self.convl2l(x_l) + self.convg2l(x_g) * g2l_gate
        if self.ratio_gout != 0:
            out_xg = self.convl2g(x_l) * l2g_gate + self.convg2g(x_g)

        return out_xl, out_xg


class FFC_BN_ACT(nn.Module):
    def __init__(
        self,
        in_channels,
        out_channels,
        kernel_size,
        ratio_gin,
        ratio_gout,
        stride=1,
        padding=0,
        dilation=1,
        groups=1,
        bias=False,
        norm_layer=nn.BatchNorm2d,
        activation_layer=nn.Identity,
        padding_type="reflect",
        enable_lfu=True,
        **kwargs,
    ):
        super(FFC_BN_ACT, self).__init__()
        self.ffc = FFC(
            in_channels,
            out_channels,
            kernel_size,
            ratio_gin,
            ratio_gout,
            stride,
            padding,
            dilation,
            groups,
            bias,
            enable_lfu,
            padding_type=padding_type,
            **kwargs,
        )
        lnorm = nn.Identity if ratio_gout == 1 else norm_layer
        gnorm = nn.Identity if ratio_gout == 0 else norm_layer
        global_channels = int(out_channels * ratio_gout)
        self.bn_l = lnorm(out_channels - global_channels)
        self.bn_g = gnorm(global_channels)

        lact = nn.Identity if ratio_gout == 1 else activation_layer
        gact = nn.Identity if ratio_gout == 0 else activation_layer
        self.act_l = lact(inplace=True)
        self.act_g = gact(inplace=True)

    def forward(self, x):
        x_l, x_g = self.ffc(x)
        x_l = self.act_l(self.bn_l(x_l))
        x_g = self.act_g(self.bn_g(x_g))
        return x_l, x_g


class FFCResnetBlock(nn.Module):
    def __init__(
        self,
        dim,
        padding_type,
        norm_layer,
        activation_layer=nn.ReLU,
        dilation=1,
        spatial_transform_kwargs=None,
        inline=False,
        **conv_kwargs,
    ):
        super().__init__()
        self.conv1 = FFC_BN_ACT(
            dim,
            dim,
            kernel_size=3,
            padding=dilation,
            dilation=dilation,
            norm_layer=norm_layer,
            activation_layer=activation_layer,
            padding_type=padding_type,
            **conv_kwargs,
        )
        self.conv2 = FFC_BN_ACT(
            dim,
            dim,
            kernel_size=3,
            padding=dilation,
            dilation=dilation,
            norm_layer=norm_layer,
            activation_layer=activation_layer,
            padding_type=padding_type,
            **conv_kwargs,
        )
        if spatial_transform_kwargs is not None:
            self.conv1 = LearnableSpatialTransformWrapper(
                self.conv1, **spatial_transform_kwargs
            )
            self.conv2 = LearnableSpatialTransformWrapper(
                self.conv2, **spatial_transform_kwargs
            )
        self.inline = inline

    def forward(self, x):
        if self.inline:
            x_l, x_g = (
                x[:, : -self.conv1.ffc.global_in_num],
                x[:, -self.conv1.ffc.global_in_num :],
            )
        else:
            x_l, x_g = x if type(x) is tuple else (x, 0)

        id_l, id_g = x_l, x_g

        x_l, x_g = self.conv1((x_l, x_g))
        x_l, x_g = self.conv2((x_l, x_g))

        x_l, x_g = id_l + x_l, id_g + x_g
        out = x_l, x_g
        if self.inline:
            out = torch.cat(out, dim=1)
        return out


class ConcatTupleLayer(nn.Module):
    def forward(self, x):
        assert isinstance(x, tuple)
        x_l, x_g = x
        assert torch.is_tensor(x_l) or torch.is_tensor(x_g)
        if not torch.is_tensor(x_g):
            return x_l
        return torch.cat(x, dim=1)


class FFCResNetGenerator(nn.Module):
    def __init__(
        self,
        input_nc,
        output_nc,
        ngf=64,
        n_downsampling=3,
        n_blocks=18,
        norm_layer=nn.BatchNorm2d,
        padding_type="reflect",
        activation_layer=nn.ReLU,
        up_norm_layer=nn.BatchNorm2d,
        up_activation=nn.ReLU(True),
        init_conv_kwargs={},
        downsample_conv_kwargs={},
        resnet_conv_kwargs={},
        spatial_transform_layers=None,
        spatial_transform_kwargs={},
        max_features=1024,
        out_ffc=False,
        out_ffc_kwargs={},
    ):
        assert n_blocks >= 0
        super().__init__()
        """
        init_conv_kwargs = {'ratio_gin': 0, 'ratio_gout': 0, 'enable_lfu': False}
        downsample_conv_kwargs = {'ratio_gin': '${generator.init_conv_kwargs.ratio_gout}', 'ratio_gout': '${generator.downsample_conv_kwargs.ratio_gin}', 'enable_lfu': False}
        resnet_conv_kwargs = {'ratio_gin': 0.75, 'ratio_gout': '${generator.resnet_conv_kwargs.ratio_gin}', 'enable_lfu': False}
        spatial_transform_kwargs = {}
        out_ffc_kwargs = {}
        """
        """
        print(input_nc, output_nc, ngf, n_downsampling, n_blocks, norm_layer,
                padding_type, activation_layer,
                up_norm_layer, up_activation,
                spatial_transform_layers,
                add_out_act, max_features, out_ffc, file=sys.stderr)

        4 3 64 3 18 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
        reflect <class 'torch.nn.modules.activation.ReLU'>
        <class 'torch.nn.modules.batchnorm.BatchNorm2d'>
        ReLU(inplace=True)
        None sigmoid 1024 False
        """
        init_conv_kwargs = {"ratio_gin": 0, "ratio_gout": 0, "enable_lfu": False}
        downsample_conv_kwargs = {"ratio_gin": 0, "ratio_gout": 0, "enable_lfu": False}
        resnet_conv_kwargs = {
            "ratio_gin": 0.75,
            "ratio_gout": 0.75,
            "enable_lfu": False,
        }
        spatial_transform_kwargs = {}
        out_ffc_kwargs = {}

        model = [
            nn.ReflectionPad2d(3),
            FFC_BN_ACT(
                input_nc,
                ngf,
                kernel_size=7,
                padding=0,
                norm_layer=norm_layer,
                activation_layer=activation_layer,
                **init_conv_kwargs,
            ),
        ]

        ### downsample
        for i in range(n_downsampling):
            mult = 2**i
            if i == n_downsampling - 1:
                cur_conv_kwargs = dict(downsample_conv_kwargs)
                cur_conv_kwargs["ratio_gout"] = resnet_conv_kwargs.get("ratio_gin", 0)
            else:
                cur_conv_kwargs = downsample_conv_kwargs
            model += [
                FFC_BN_ACT(
                    min(max_features, ngf * mult),
                    min(max_features, ngf * mult * 2),
                    kernel_size=3,
                    stride=2,
                    padding=1,
                    norm_layer=norm_layer,
                    activation_layer=activation_layer,
                    **cur_conv_kwargs,
                )
            ]

        mult = 2**n_downsampling
        feats_num_bottleneck = min(max_features, ngf * mult)

        ### resnet blocks
        for i in range(n_blocks):
            cur_resblock = FFCResnetBlock(
                feats_num_bottleneck,
                padding_type=padding_type,
                activation_layer=activation_layer,
                norm_layer=norm_layer,
                **resnet_conv_kwargs,
            )
            if spatial_transform_layers is not None and i in spatial_transform_layers:
                cur_resblock = LearnableSpatialTransformWrapper(
                    cur_resblock, **spatial_transform_kwargs
                )
            model += [cur_resblock]

        model += [ConcatTupleLayer()]

        ### upsample
        for i in range(n_downsampling):
            mult = 2 ** (n_downsampling - i)
            model += [
                nn.ConvTranspose2d(
                    min(max_features, ngf * mult),
                    min(max_features, int(ngf * mult / 2)),
                    kernel_size=3,
                    stride=2,
                    padding=1,
                    output_padding=1,
                ),
                up_norm_layer(min(max_features, int(ngf * mult / 2))),
                up_activation,
            ]

        if out_ffc:
            model += [
                FFCResnetBlock(
                    ngf,
                    padding_type=padding_type,
                    activation_layer=activation_layer,
                    norm_layer=norm_layer,
                    inline=True,
                    **out_ffc_kwargs,
                )
            ]

        model += [
            nn.ReflectionPad2d(3),
            nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0),
        ]
        model.append(nn.Sigmoid())
        self.model = nn.Sequential(*model)

    def forward(self, image, mask):
        return self.model(torch.cat([image, mask], dim=1))


class LaMa(nn.Module):
    def __init__(self, state_dict) -> None:
        super(LaMa, self).__init__()
        self.model_arch = "LaMa"
        self.sub_type = "Inpaint"
        self.in_nc = 4
        self.out_nc = 3
        self.scale = 1

        self.min_size = None
        self.pad_mod = 8
        self.pad_to_square = False

        self.model = FFCResNetGenerator(self.in_nc, self.out_nc)
        self.state = {
            k.replace("generator.model", "model.model"): v
            for k, v in state_dict.items()
        }

        self.supports_fp16 = False
        self.support_bf16 = True

        self.load_state_dict(self.state, strict=False)

    def forward(self, img, mask):
        masked_img = img * (1 - mask)
        inpainted_mask = mask * self.model.forward(masked_img, mask)
        result = inpainted_mask + (1 - mask) * img
        return result
Take some code from chainner to implement ESRGAN and other upscale models. 2023-03-11 18:09:28 +00:00			`# pylint: skip-file`
			`"""`
			`Model adapted from advimman's lama project: https://github.com/advimman/lama`
			`"""`

			`# Fast Fourier Convolution NeurIPS 2020`
			`# original implementation https://github.com/pkumivision/FFC/blob/main/model_zoo/ffc.py`
			`# paper https://proceedings.neurips.cc/paper/2020/file/2fd5d41ec6cfab47e32164d5624269b1-Paper.pdf`

			`from typing import List`

			`import torch`
			`import torch.nn as nn`
			`import torch.nn.functional as F`
			`from torchvision.transforms.functional import InterpolationMode, rotate`


			`class LearnableSpatialTransformWrapper(nn.Module):`
			`def __init__(self, impl, pad_coef=0.5, angle_init_range=80, train_angle=True):`
			`super().__init__()`
			`self.impl = impl`
			`self.angle = torch.rand(1) * angle_init_range`
			`if train_angle:`
			`self.angle = nn.Parameter(self.angle, requires_grad=True)`
			`self.pad_coef = pad_coef`

			`def forward(self, x):`
			`if torch.is_tensor(x):`
			`return self.inverse_transform(self.impl(self.transform(x)), x)`
			`elif isinstance(x, tuple):`
			`x_trans = tuple(self.transform(elem) for elem in x)`
			`y_trans = self.impl(x_trans)`
			`return tuple(`
			`self.inverse_transform(elem, orig_x) for elem, orig_x in zip(y_trans, x)`
			`)`
			`else:`
			`raise ValueError(f"Unexpected input type {type(x)}")`

			`def transform(self, x):`
			`height, width = x.shape[2:]`
			`pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef)`
			`x_padded = F.pad(x, [pad_w, pad_w, pad_h, pad_h], mode="reflect")`
			`x_padded_rotated = rotate(`
			`x_padded, self.angle.to(x_padded), InterpolationMode.BILINEAR, fill=0`
			`)`

			`return x_padded_rotated`

			`def inverse_transform(self, y_padded_rotated, orig_x):`
			`height, width = orig_x.shape[2:]`
			`pad_h, pad_w = int(height * self.pad_coef), int(width * self.pad_coef)`

			`y_padded = rotate(`
			`y_padded_rotated,`
			`-self.angle.to(y_padded_rotated),`
			`InterpolationMode.BILINEAR,`
			`fill=0,`
			`)`
			`y_height, y_width = y_padded.shape[2:]`
			`y = y_padded[:, :, pad_h : y_height - pad_h, pad_w : y_width - pad_w]`
			`return y`


			`class SELayer(nn.Module):`
			`def __init__(self, channel, reduction=16):`
			`super(SELayer, self).__init__()`
			`self.avg_pool = nn.AdaptiveAvgPool2d(1)`
			`self.fc = nn.Sequential(`
			`nn.Linear(channel, channel // reduction, bias=False),`
			`nn.ReLU(inplace=True),`
			`nn.Linear(channel // reduction, channel, bias=False),`
			`nn.Sigmoid(),`
			`)`

			`def forward(self, x):`
			`b, c, _, _ = x.size()`
			`y = self.avg_pool(x).view(b, c)`
			`y = self.fc(y).view(b, c, 1, 1)`
			`res = x * y.expand_as(x)`
			`return res`


			`class FourierUnit(nn.Module):`
			`def __init__(`
			`self,`
			`in_channels,`
			`out_channels,`
			`groups=1,`
			`spatial_scale_factor=None,`
			`spatial_scale_mode="bilinear",`
			`spectral_pos_encoding=False,`
			`use_se=False,`
			`se_kwargs=None,`
			`ffc3d=False,`
			`fft_norm="ortho",`
			`):`
			`# bn_layer not used`
			`super(FourierUnit, self).__init__()`
			`self.groups = groups`

			`self.conv_layer = torch.nn.Conv2d(`
			`in_channels=in_channels * 2 + (2 if spectral_pos_encoding else 0),`
			`out_channels=out_channels * 2,`
			`kernel_size=1,`
			`stride=1,`
			`padding=0,`
			`groups=self.groups,`
			`bias=False,`
			`)`
			`self.bn = torch.nn.BatchNorm2d(out_channels * 2)`
			`self.relu = torch.nn.ReLU(inplace=True)`

			`# squeeze and excitation block`
			`self.use_se = use_se`
			`if use_se:`
			`if se_kwargs is None:`
			`se_kwargs = {}`
			`self.se = SELayer(self.conv_layer.in_channels, **se_kwargs)`

			`self.spatial_scale_factor = spatial_scale_factor`
			`self.spatial_scale_mode = spatial_scale_mode`
			`self.spectral_pos_encoding = spectral_pos_encoding`
			`self.ffc3d = ffc3d`
			`self.fft_norm = fft_norm`

			`def forward(self, x):`
			`half_check = False`
			`if x.type() == "torch.cuda.HalfTensor":`
			`# half only works on gpu anyway`
			`half_check = True`

			`batch = x.shape[0]`

			`if self.spatial_scale_factor is not None:`
			`orig_size = x.shape[-2:]`
			`x = F.interpolate(`
			`x,`
			`scale_factor=self.spatial_scale_factor,`
			`mode=self.spatial_scale_mode,`
			`align_corners=False,`
			`)`

			`# (batch, c, h, w/2+1, 2)`
			`fft_dim = (-3, -2, -1) if self.ffc3d else (-2, -1)`
			`if half_check == True:`
			`ffted = torch.fft.rfftn(`
			`x.float(), dim=fft_dim, norm=self.fft_norm`
			`) # .type(torch.cuda.HalfTensor)`
			`else:`
			`ffted = torch.fft.rfftn(x, dim=fft_dim, norm=self.fft_norm)`

			`ffted = torch.stack((ffted.real, ffted.imag), dim=-1)`
			`ffted = ffted.permute(0, 1, 4, 2, 3).contiguous() # (batch, c, 2, h, w/2+1)`
			`ffted = ffted.view(`
			`(`
			`batch,`
			`-1,`
			`)`
			`+ ffted.size()[3:]`
			`)`

			`if self.spectral_pos_encoding:`
			`height, width = ffted.shape[-2:]`
			`coords_vert = (`
			`torch.linspace(0, 1, height)[None, None, :, None]`
			`.expand(batch, 1, height, width)`
			`.to(ffted)`
			`)`
			`coords_hor = (`
			`torch.linspace(0, 1, width)[None, None, None, :]`
			`.expand(batch, 1, height, width)`
			`.to(ffted)`
			`)`
			`ffted = torch.cat((coords_vert, coords_hor, ffted), dim=1)`

			`if self.use_se:`
			`ffted = self.se(ffted)`

			`if half_check == True:`
			`ffted = self.conv_layer(ffted.half()) # (batch, c*2, h, w/2+1)`
			`else:`
			`ffted = self.conv_layer(`
			`ffted`
			`) # .type(torch.cuda.FloatTensor) # (batch, c*2, h, w/2+1)`

			`ffted = self.relu(self.bn(ffted))`
			`# forcing to be always float`
			`ffted = ffted.float()`

			`ffted = (`
			`ffted.view(`
			`(`
			`batch,`
			`-1,`
			`2,`
			`)`
			`+ ffted.size()[2:]`
			`)`
			`.permute(0, 1, 3, 4, 2)`
			`.contiguous()`
			`) # (batch,c, t, h, w/2+1, 2)`

			`ffted = torch.complex(ffted[..., 0], ffted[..., 1])`

			`ifft_shape_slice = x.shape[-3:] if self.ffc3d else x.shape[-2:]`
			`output = torch.fft.irfftn(`
			`ffted, s=ifft_shape_slice, dim=fft_dim, norm=self.fft_norm`
			`)`

			`if half_check == True:`
			`output = output.half()`

			`if self.spatial_scale_factor is not None:`
			`output = F.interpolate(`
			`output,`
			`size=orig_size,`
			`mode=self.spatial_scale_mode,`
			`align_corners=False,`
			`)`

			`return output`


			`class SpectralTransform(nn.Module):`
			`def __init__(`
			`self,`
			`in_channels,`
			`out_channels,`
			`stride=1,`
			`groups=1,`
			`enable_lfu=True,`
			`separable_fu=False,`
			`**fu_kwargs,`
			`):`
			`# bn_layer not used`
			`super(SpectralTransform, self).__init__()`
			`self.enable_lfu = enable_lfu`
			`if stride == 2:`
			`self.downsample = nn.AvgPool2d(kernel_size=(2, 2), stride=2)`
			`else:`
			`self.downsample = nn.Identity()`

			`self.stride = stride`
			`self.conv1 = nn.Sequential(`
			`nn.Conv2d(`
			`in_channels, out_channels // 2, kernel_size=1, groups=groups, bias=False`
			`),`
			`nn.BatchNorm2d(out_channels // 2),`
			`nn.ReLU(inplace=True),`
			`)`
			`fu_class = FourierUnit`
			`self.fu = fu_class(out_channels // 2, out_channels // 2, groups, **fu_kwargs)`
			`if self.enable_lfu:`
			`self.lfu = fu_class(out_channels // 2, out_channels // 2, groups)`
			`self.conv2 = torch.nn.Conv2d(`
			`out_channels // 2, out_channels, kernel_size=1, groups=groups, bias=False`
			`)`

			`def forward(self, x):`
			`x = self.downsample(x)`
			`x = self.conv1(x)`
			`output = self.fu(x)`

			`if self.enable_lfu:`
			`_, c, h, _ = x.shape`
			`split_no = 2`
			`split_s = h // split_no`
			`xs = torch.cat(`
			`torch.split(x[:, : c // 4], split_s, dim=-2), dim=1`
			`).contiguous()`
			`xs = torch.cat(torch.split(xs, split_s, dim=-1), dim=1).contiguous()`
			`xs = self.lfu(xs)`
			`xs = xs.repeat(1, 1, split_no, split_no).contiguous()`
			`else:`
			`xs = 0`

			`output = self.conv2(x + output + xs)`

			`return output`


			`class FFC(nn.Module):`
			`def __init__(`
			`self,`
			`in_channels,`
			`out_channels,`
			`kernel_size,`
			`ratio_gin,`
			`ratio_gout,`
			`stride=1,`
			`padding=0,`
			`dilation=1,`
			`groups=1,`
			`bias=False,`
			`enable_lfu=True,`
			`padding_type="reflect",`
			`gated=False,`
			`**spectral_kwargs,`
			`):`
			`super(FFC, self).__init__()`

			`assert stride == 1 or stride == 2, "Stride should be 1 or 2."`
			`self.stride = stride`

			`in_cg = int(in_channels * ratio_gin)`
			`in_cl = in_channels - in_cg`
			`out_cg = int(out_channels * ratio_gout)`
			`out_cl = out_channels - out_cg`
			`# groups_g = 1 if groups == 1 else int(groups * ratio_gout)`
			`# groups_l = 1 if groups == 1 else groups - groups_g`

			`self.ratio_gin = ratio_gin`
			`self.ratio_gout = ratio_gout`
			`self.global_in_num = in_cg`

			`module = nn.Identity if in_cl == 0 or out_cl == 0 else nn.Conv2d`
			`self.convl2l = module(`
			`in_cl,`
			`out_cl,`
			`kernel_size,`
			`stride,`
			`padding,`
			`dilation,`
			`groups,`
			`bias,`
			`padding_mode=padding_type,`
			`)`
			`module = nn.Identity if in_cl == 0 or out_cg == 0 else nn.Conv2d`
			`self.convl2g = module(`
			`in_cl,`
			`out_cg,`
			`kernel_size,`
			`stride,`
			`padding,`
			`dilation,`
			`groups,`
			`bias,`
			`padding_mode=padding_type,`
			`)`
			`module = nn.Identity if in_cg == 0 or out_cl == 0 else nn.Conv2d`
			`self.convg2l = module(`
			`in_cg,`
			`out_cl,`
			`kernel_size,`
			`stride,`
			`padding,`
			`dilation,`
			`groups,`
			`bias,`
			`padding_mode=padding_type,`
			`)`
			`module = nn.Identity if in_cg == 0 or out_cg == 0 else SpectralTransform`
			`self.convg2g = module(`
			`in_cg,`
			`out_cg,`
			`stride,`
			`1 if groups == 1 else groups // 2,`
			`enable_lfu,`
			`**spectral_kwargs,`
			`)`

			`self.gated = gated`
			`module = (`
			`nn.Identity if in_cg == 0 or out_cl == 0 or not self.gated else nn.Conv2d`
			`)`
			`self.gate = module(in_channels, 2, 1)`

			`def forward(self, x):`
			`x_l, x_g = x if type(x) is tuple else (x, 0)`
			`out_xl, out_xg = 0, 0`

			`if self.gated:`
			`total_input_parts = [x_l]`
			`if torch.is_tensor(x_g):`
			`total_input_parts.append(x_g)`
			`total_input = torch.cat(total_input_parts, dim=1)`

			`gates = torch.sigmoid(self.gate(total_input))`
			`g2l_gate, l2g_gate = gates.chunk(2, dim=1)`
			`else:`
			`g2l_gate, l2g_gate = 1, 1`

			`if self.ratio_gout != 1:`
			`out_xl = self.convl2l(x_l) + self.convg2l(x_g) * g2l_gate`
			`if self.ratio_gout != 0:`
			`out_xg = self.convl2g(x_l) * l2g_gate + self.convg2g(x_g)`

			`return out_xl, out_xg`


			`class FFC_BN_ACT(nn.Module):`
			`def __init__(`
			`self,`
			`in_channels,`
			`out_channels,`
			`kernel_size,`
			`ratio_gin,`
			`ratio_gout,`
			`stride=1,`
			`padding=0,`
			`dilation=1,`
			`groups=1,`
			`bias=False,`
			`norm_layer=nn.BatchNorm2d,`
			`activation_layer=nn.Identity,`
			`padding_type="reflect",`
			`enable_lfu=True,`
			`**kwargs,`
			`):`
			`super(FFC_BN_ACT, self).__init__()`
			`self.ffc = FFC(`
			`in_channels,`
			`out_channels,`
			`kernel_size,`
			`ratio_gin,`
			`ratio_gout,`
			`stride,`
			`padding,`
			`dilation,`
			`groups,`
			`bias,`
			`enable_lfu,`
			`padding_type=padding_type,`
			`**kwargs,`
			`)`
			`lnorm = nn.Identity if ratio_gout == 1 else norm_layer`
			`gnorm = nn.Identity if ratio_gout == 0 else norm_layer`
			`global_channels = int(out_channels * ratio_gout)`
			`self.bn_l = lnorm(out_channels - global_channels)`
			`self.bn_g = gnorm(global_channels)`

			`lact = nn.Identity if ratio_gout == 1 else activation_layer`
			`gact = nn.Identity if ratio_gout == 0 else activation_layer`
			`self.act_l = lact(inplace=True)`
			`self.act_g = gact(inplace=True)`

			`def forward(self, x):`
			`x_l, x_g = self.ffc(x)`
			`x_l = self.act_l(self.bn_l(x_l))`
			`x_g = self.act_g(self.bn_g(x_g))`
			`return x_l, x_g`


			`class FFCResnetBlock(nn.Module):`
			`def __init__(`
			`self,`
			`dim,`
			`padding_type,`
			`norm_layer,`
			`activation_layer=nn.ReLU,`
			`dilation=1,`
			`spatial_transform_kwargs=None,`
			`inline=False,`
			`**conv_kwargs,`
			`):`
			`super().__init__()`
			`self.conv1 = FFC_BN_ACT(`
			`dim,`
			`dim,`
			`kernel_size=3,`
			`padding=dilation,`
			`dilation=dilation,`
			`norm_layer=norm_layer,`
			`activation_layer=activation_layer,`
			`padding_type=padding_type,`
			`**conv_kwargs,`
			`)`
			`self.conv2 = FFC_BN_ACT(`
			`dim,`
			`dim,`
			`kernel_size=3,`
			`padding=dilation,`
			`dilation=dilation,`
			`norm_layer=norm_layer,`
			`activation_layer=activation_layer,`
			`padding_type=padding_type,`
			`**conv_kwargs,`
			`)`
			`if spatial_transform_kwargs is not None:`
			`self.conv1 = LearnableSpatialTransformWrapper(`
			`self.conv1, **spatial_transform_kwargs`
			`)`
			`self.conv2 = LearnableSpatialTransformWrapper(`
			`self.conv2, **spatial_transform_kwargs`
			`)`
			`self.inline = inline`

			`def forward(self, x):`
			`if self.inline:`
			`x_l, x_g = (`
			`x[:, : -self.conv1.ffc.global_in_num],`
			`x[:, -self.conv1.ffc.global_in_num :],`
			`)`
			`else:`
			`x_l, x_g = x if type(x) is tuple else (x, 0)`

			`id_l, id_g = x_l, x_g`

			`x_l, x_g = self.conv1((x_l, x_g))`
			`x_l, x_g = self.conv2((x_l, x_g))`

			`x_l, x_g = id_l + x_l, id_g + x_g`
			`out = x_l, x_g`
			`if self.inline:`
			`out = torch.cat(out, dim=1)`
			`return out`


			`class ConcatTupleLayer(nn.Module):`
			`def forward(self, x):`
			`assert isinstance(x, tuple)`
			`x_l, x_g = x`
			`assert torch.is_tensor(x_l) or torch.is_tensor(x_g)`
			`if not torch.is_tensor(x_g):`
			`return x_l`
			`return torch.cat(x, dim=1)`


			`class FFCResNetGenerator(nn.Module):`
			`def __init__(`
			`self,`
			`input_nc,`
			`output_nc,`
			`ngf=64,`
			`n_downsampling=3,`
			`n_blocks=18,`
			`norm_layer=nn.BatchNorm2d,`
			`padding_type="reflect",`
			`activation_layer=nn.ReLU,`
			`up_norm_layer=nn.BatchNorm2d,`
			`up_activation=nn.ReLU(True),`
			`init_conv_kwargs={},`
			`downsample_conv_kwargs={},`
			`resnet_conv_kwargs={},`
			`spatial_transform_layers=None,`
			`spatial_transform_kwargs={},`
			`max_features=1024,`
			`out_ffc=False,`
			`out_ffc_kwargs={},`
			`):`
			`assert n_blocks >= 0`
			`super().__init__()`
			`"""`
			`init_conv_kwargs = {'ratio_gin': 0, 'ratio_gout': 0, 'enable_lfu': False}`
			`downsample_conv_kwargs = {'ratio_gin': '${generator.init_conv_kwargs.ratio_gout}', 'ratio_gout': '${generator.downsample_conv_kwargs.ratio_gin}', 'enable_lfu': False}`
			`resnet_conv_kwargs = {'ratio_gin': 0.75, 'ratio_gout': '${generator.resnet_conv_kwargs.ratio_gin}', 'enable_lfu': False}`
			`spatial_transform_kwargs = {}`
			`out_ffc_kwargs = {}`
			`"""`
			`"""`
			`print(input_nc, output_nc, ngf, n_downsampling, n_blocks, norm_layer,`
			`padding_type, activation_layer,`
			`up_norm_layer, up_activation,`
			`spatial_transform_layers,`
			`add_out_act, max_features, out_ffc, file=sys.stderr)`

			`4 3 64 3 18 <class 'torch.nn.modules.batchnorm.BatchNorm2d'>`
			`reflect <class 'torch.nn.modules.activation.ReLU'>`
			`<class 'torch.nn.modules.batchnorm.BatchNorm2d'>`
			`ReLU(inplace=True)`
			`None sigmoid 1024 False`
			`"""`
			`init_conv_kwargs = {"ratio_gin": 0, "ratio_gout": 0, "enable_lfu": False}`
			`downsample_conv_kwargs = {"ratio_gin": 0, "ratio_gout": 0, "enable_lfu": False}`
			`resnet_conv_kwargs = {`
			`"ratio_gin": 0.75,`
			`"ratio_gout": 0.75,`
			`"enable_lfu": False,`
			`}`
			`spatial_transform_kwargs = {}`
			`out_ffc_kwargs = {}`

			`model = [`
			`nn.ReflectionPad2d(3),`
			`FFC_BN_ACT(`
			`input_nc,`
			`ngf,`
			`kernel_size=7,`
			`padding=0,`
			`norm_layer=norm_layer,`
			`activation_layer=activation_layer,`
			`**init_conv_kwargs,`
			`),`
			`]`

			`### downsample`
			`for i in range(n_downsampling):`
			`mult = 2**i`
			`if i == n_downsampling - 1:`
			`cur_conv_kwargs = dict(downsample_conv_kwargs)`
			`cur_conv_kwargs["ratio_gout"] = resnet_conv_kwargs.get("ratio_gin", 0)`
			`else:`
			`cur_conv_kwargs = downsample_conv_kwargs`
			`model += [`
			`FFC_BN_ACT(`
			`min(max_features, ngf * mult),`
			`min(max_features, ngf * mult * 2),`
			`kernel_size=3,`
			`stride=2,`
			`padding=1,`
			`norm_layer=norm_layer,`
			`activation_layer=activation_layer,`
			`**cur_conv_kwargs,`
			`)`
			`]`

			`mult = 2**n_downsampling`
			`feats_num_bottleneck = min(max_features, ngf * mult)`

			`### resnet blocks`
			`for i in range(n_blocks):`
			`cur_resblock = FFCResnetBlock(`
			`feats_num_bottleneck,`
			`padding_type=padding_type,`
			`activation_layer=activation_layer,`
			`norm_layer=norm_layer,`
			`**resnet_conv_kwargs,`
			`)`
			`if spatial_transform_layers is not None and i in spatial_transform_layers:`
			`cur_resblock = LearnableSpatialTransformWrapper(`
			`cur_resblock, **spatial_transform_kwargs`
			`)`
			`model += [cur_resblock]`

			`model += [ConcatTupleLayer()]`

			`### upsample`
			`for i in range(n_downsampling):`
			`mult = 2 ** (n_downsampling - i)`
			`model += [`
			`nn.ConvTranspose2d(`
			`min(max_features, ngf * mult),`
			`min(max_features, int(ngf * mult / 2)),`
			`kernel_size=3,`
			`stride=2,`
			`padding=1,`
			`output_padding=1,`
			`),`
			`up_norm_layer(min(max_features, int(ngf * mult / 2))),`
			`up_activation,`
			`]`

			`if out_ffc:`
			`model += [`
			`FFCResnetBlock(`
			`ngf,`
			`padding_type=padding_type,`
			`activation_layer=activation_layer,`
			`norm_layer=norm_layer,`
			`inline=True,`
			`**out_ffc_kwargs,`
			`)`
			`]`

			`model += [`
			`nn.ReflectionPad2d(3),`
			`nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0),`
			`]`
			`model.append(nn.Sigmoid())`
			`self.model = nn.Sequential(*model)`

			`def forward(self, image, mask):`
			`return self.model(torch.cat([image, mask], dim=1))`


			`class LaMa(nn.Module):`
			`def __init__(self, state_dict) -> None:`
			`super(LaMa, self).__init__()`
			`self.model_arch = "LaMa"`
			`self.sub_type = "Inpaint"`
			`self.in_nc = 4`
			`self.out_nc = 3`
			`self.scale = 1`

			`self.min_size = None`
			`self.pad_mod = 8`
			`self.pad_to_square = False`

			`self.model = FFCResNetGenerator(self.in_nc, self.out_nc)`
			`self.state = {`
			`k.replace("generator.model", "model.model"): v`
			`for k, v in state_dict.items()`
			`}`

			`self.supports_fp16 = False`
			`self.support_bf16 = True`

			`self.load_state_dict(self.state, strict=False)`

			`def forward(self, img, mask):`
			`masked_img = img * (1 - mask)`
			`inpainted_mask = mask * self.model.forward(masked_img, mask)`
			`result = inpainted_mask + (1 - mask) * img`
			`return result`