Spaces:
Runtime error
Runtime error
| # Copyright (c) OpenMMLab. All rights reserved. | |
| import torch | |
| import torch.nn as nn | |
| from mmcv.cnn import build_activation_layer, build_conv_layer, build_norm_layer | |
| from mmengine.model import BaseModule | |
| from mmseg.registry import MODELS | |
| from ..utils import resize | |
| class DownsamplerBlock(BaseModule): | |
| """Downsampler block of ERFNet. | |
| This module is a little different from basical ConvModule. | |
| The features from Conv and MaxPool layers are | |
| concatenated before BatchNorm. | |
| Args: | |
| in_channels (int): Number of input channels. | |
| out_channels (int): Number of output channels. | |
| conv_cfg (dict | None): Config of conv layers. | |
| Default: None. | |
| norm_cfg (dict | None): Config of norm layers. | |
| Default: dict(type='BN'). | |
| act_cfg (dict): Config of activation layers. | |
| Default: dict(type='ReLU'). | |
| init_cfg (dict or list[dict], optional): Initialization config dict. | |
| Default: None. | |
| """ | |
| def __init__(self, | |
| in_channels, | |
| out_channels, | |
| conv_cfg=None, | |
| norm_cfg=dict(type='BN', eps=1e-3), | |
| act_cfg=dict(type='ReLU'), | |
| init_cfg=None): | |
| super().__init__(init_cfg=init_cfg) | |
| self.conv_cfg = conv_cfg | |
| self.norm_cfg = norm_cfg | |
| self.act_cfg = act_cfg | |
| self.conv = build_conv_layer( | |
| self.conv_cfg, | |
| in_channels, | |
| out_channels - in_channels, | |
| kernel_size=3, | |
| stride=2, | |
| padding=1) | |
| self.pool = nn.MaxPool2d(kernel_size=2, stride=2) | |
| self.bn = build_norm_layer(self.norm_cfg, out_channels)[1] | |
| self.act = build_activation_layer(self.act_cfg) | |
| def forward(self, input): | |
| conv_out = self.conv(input) | |
| pool_out = self.pool(input) | |
| pool_out = resize( | |
| input=pool_out, | |
| size=conv_out.size()[2:], | |
| mode='bilinear', | |
| align_corners=False) | |
| output = torch.cat([conv_out, pool_out], 1) | |
| output = self.bn(output) | |
| output = self.act(output) | |
| return output | |
| class NonBottleneck1d(BaseModule): | |
| """Non-bottleneck block of ERFNet. | |
| Args: | |
| channels (int): Number of channels in Non-bottleneck block. | |
| drop_rate (float): Probability of an element to be zeroed. | |
| Default 0. | |
| dilation (int): Dilation rate for last two conv layers. | |
| Default 1. | |
| num_conv_layer (int): Number of 3x1 and 1x3 convolution layers. | |
| Default 2. | |
| conv_cfg (dict | None): Config of conv layers. | |
| Default: None. | |
| norm_cfg (dict | None): Config of norm layers. | |
| Default: dict(type='BN'). | |
| act_cfg (dict): Config of activation layers. | |
| Default: dict(type='ReLU'). | |
| init_cfg (dict or list[dict], optional): Initialization config dict. | |
| Default: None. | |
| """ | |
| def __init__(self, | |
| channels, | |
| drop_rate=0, | |
| dilation=1, | |
| num_conv_layer=2, | |
| conv_cfg=None, | |
| norm_cfg=dict(type='BN', eps=1e-3), | |
| act_cfg=dict(type='ReLU'), | |
| init_cfg=None): | |
| super().__init__(init_cfg=init_cfg) | |
| self.conv_cfg = conv_cfg | |
| self.norm_cfg = norm_cfg | |
| self.act_cfg = act_cfg | |
| self.act = build_activation_layer(self.act_cfg) | |
| self.convs_layers = nn.ModuleList() | |
| for conv_layer in range(num_conv_layer): | |
| first_conv_padding = (1, 0) if conv_layer == 0 else (dilation, 0) | |
| first_conv_dilation = 1 if conv_layer == 0 else (dilation, 1) | |
| second_conv_padding = (0, 1) if conv_layer == 0 else (0, dilation) | |
| second_conv_dilation = 1 if conv_layer == 0 else (1, dilation) | |
| self.convs_layers.append( | |
| build_conv_layer( | |
| self.conv_cfg, | |
| channels, | |
| channels, | |
| kernel_size=(3, 1), | |
| stride=1, | |
| padding=first_conv_padding, | |
| bias=True, | |
| dilation=first_conv_dilation)) | |
| self.convs_layers.append(self.act) | |
| self.convs_layers.append( | |
| build_conv_layer( | |
| self.conv_cfg, | |
| channels, | |
| channels, | |
| kernel_size=(1, 3), | |
| stride=1, | |
| padding=second_conv_padding, | |
| bias=True, | |
| dilation=second_conv_dilation)) | |
| self.convs_layers.append( | |
| build_norm_layer(self.norm_cfg, channels)[1]) | |
| if conv_layer == 0: | |
| self.convs_layers.append(self.act) | |
| else: | |
| self.convs_layers.append(nn.Dropout(p=drop_rate)) | |
| def forward(self, input): | |
| output = input | |
| for conv in self.convs_layers: | |
| output = conv(output) | |
| output = self.act(output + input) | |
| return output | |
| class UpsamplerBlock(BaseModule): | |
| """Upsampler block of ERFNet. | |
| Args: | |
| in_channels (int): Number of input channels. | |
| out_channels (int): Number of output channels. | |
| conv_cfg (dict | None): Config of conv layers. | |
| Default: None. | |
| norm_cfg (dict | None): Config of norm layers. | |
| Default: dict(type='BN'). | |
| act_cfg (dict): Config of activation layers. | |
| Default: dict(type='ReLU'). | |
| init_cfg (dict or list[dict], optional): Initialization config dict. | |
| Default: None. | |
| """ | |
| def __init__(self, | |
| in_channels, | |
| out_channels, | |
| conv_cfg=None, | |
| norm_cfg=dict(type='BN', eps=1e-3), | |
| act_cfg=dict(type='ReLU'), | |
| init_cfg=None): | |
| super().__init__(init_cfg=init_cfg) | |
| self.conv_cfg = conv_cfg | |
| self.norm_cfg = norm_cfg | |
| self.act_cfg = act_cfg | |
| self.conv = nn.ConvTranspose2d( | |
| in_channels=in_channels, | |
| out_channels=out_channels, | |
| kernel_size=3, | |
| stride=2, | |
| padding=1, | |
| output_padding=1, | |
| bias=True) | |
| self.bn = build_norm_layer(self.norm_cfg, out_channels)[1] | |
| self.act = build_activation_layer(self.act_cfg) | |
| def forward(self, input): | |
| output = self.conv(input) | |
| output = self.bn(output) | |
| output = self.act(output) | |
| return output | |
| class ERFNet(BaseModule): | |
| """ERFNet backbone. | |
| This backbone is the implementation of `ERFNet: Efficient Residual | |
| Factorized ConvNet for Real-time SemanticSegmentation | |
| <https://ieeexplore.ieee.org/document/8063438>`_. | |
| Args: | |
| in_channels (int): The number of channels of input | |
| image. Default: 3. | |
| enc_downsample_channels (Tuple[int]): Size of channel | |
| numbers of various Downsampler block in encoder. | |
| Default: (16, 64, 128). | |
| enc_stage_non_bottlenecks (Tuple[int]): Number of stages of | |
| Non-bottleneck block in encoder. | |
| Default: (5, 8). | |
| enc_non_bottleneck_dilations (Tuple[int]): Dilation rate of each | |
| stage of Non-bottleneck block of encoder. | |
| Default: (2, 4, 8, 16). | |
| enc_non_bottleneck_channels (Tuple[int]): Size of channel | |
| numbers of various Non-bottleneck block in encoder. | |
| Default: (64, 128). | |
| dec_upsample_channels (Tuple[int]): Size of channel numbers of | |
| various Deconvolution block in decoder. | |
| Default: (64, 16). | |
| dec_stages_non_bottleneck (Tuple[int]): Number of stages of | |
| Non-bottleneck block in decoder. | |
| Default: (2, 2). | |
| dec_non_bottleneck_channels (Tuple[int]): Size of channel | |
| numbers of various Non-bottleneck block in decoder. | |
| Default: (64, 16). | |
| drop_rate (float): Probability of an element to be zeroed. | |
| Default 0.1. | |
| """ | |
| def __init__(self, | |
| in_channels=3, | |
| enc_downsample_channels=(16, 64, 128), | |
| enc_stage_non_bottlenecks=(5, 8), | |
| enc_non_bottleneck_dilations=(2, 4, 8, 16), | |
| enc_non_bottleneck_channels=(64, 128), | |
| dec_upsample_channels=(64, 16), | |
| dec_stages_non_bottleneck=(2, 2), | |
| dec_non_bottleneck_channels=(64, 16), | |
| dropout_ratio=0.1, | |
| conv_cfg=None, | |
| norm_cfg=dict(type='BN', requires_grad=True), | |
| act_cfg=dict(type='ReLU'), | |
| init_cfg=None): | |
| super().__init__(init_cfg=init_cfg) | |
| assert len(enc_downsample_channels) \ | |
| == len(dec_upsample_channels)+1, 'Number of downsample\ | |
| block of encoder does not \ | |
| match number of upsample block of decoder!' | |
| assert len(enc_downsample_channels) \ | |
| == len(enc_stage_non_bottlenecks)+1, 'Number of \ | |
| downsample block of encoder does not match \ | |
| number of Non-bottleneck block of encoder!' | |
| assert len(enc_downsample_channels) \ | |
| == len(enc_non_bottleneck_channels)+1, 'Number of \ | |
| downsample block of encoder does not match \ | |
| number of channels of Non-bottleneck block of encoder!' | |
| assert enc_stage_non_bottlenecks[-1] \ | |
| % len(enc_non_bottleneck_dilations) == 0, 'Number of \ | |
| Non-bottleneck block of encoder does not match \ | |
| number of Non-bottleneck block of encoder!' | |
| assert len(dec_upsample_channels) \ | |
| == len(dec_stages_non_bottleneck), 'Number of \ | |
| upsample block of decoder does not match \ | |
| number of Non-bottleneck block of decoder!' | |
| assert len(dec_stages_non_bottleneck) \ | |
| == len(dec_non_bottleneck_channels), 'Number of \ | |
| Non-bottleneck block of decoder does not match \ | |
| number of channels of Non-bottleneck block of decoder!' | |
| self.in_channels = in_channels | |
| self.enc_downsample_channels = enc_downsample_channels | |
| self.enc_stage_non_bottlenecks = enc_stage_non_bottlenecks | |
| self.enc_non_bottleneck_dilations = enc_non_bottleneck_dilations | |
| self.enc_non_bottleneck_channels = enc_non_bottleneck_channels | |
| self.dec_upsample_channels = dec_upsample_channels | |
| self.dec_stages_non_bottleneck = dec_stages_non_bottleneck | |
| self.dec_non_bottleneck_channels = dec_non_bottleneck_channels | |
| self.dropout_ratio = dropout_ratio | |
| self.encoder = nn.ModuleList() | |
| self.decoder = nn.ModuleList() | |
| self.conv_cfg = conv_cfg | |
| self.norm_cfg = norm_cfg | |
| self.act_cfg = act_cfg | |
| self.encoder.append( | |
| DownsamplerBlock(self.in_channels, enc_downsample_channels[0])) | |
| for i in range(len(enc_downsample_channels) - 1): | |
| self.encoder.append( | |
| DownsamplerBlock(enc_downsample_channels[i], | |
| enc_downsample_channels[i + 1])) | |
| # Last part of encoder is some dilated NonBottleneck1d blocks. | |
| if i == len(enc_downsample_channels) - 2: | |
| iteration_times = int(enc_stage_non_bottlenecks[-1] / | |
| len(enc_non_bottleneck_dilations)) | |
| for j in range(iteration_times): | |
| for k in range(len(enc_non_bottleneck_dilations)): | |
| self.encoder.append( | |
| NonBottleneck1d(enc_downsample_channels[-1], | |
| self.dropout_ratio, | |
| enc_non_bottleneck_dilations[k])) | |
| else: | |
| for j in range(enc_stage_non_bottlenecks[i]): | |
| self.encoder.append( | |
| NonBottleneck1d(enc_downsample_channels[i + 1], | |
| self.dropout_ratio)) | |
| for i in range(len(dec_upsample_channels)): | |
| if i == 0: | |
| self.decoder.append( | |
| UpsamplerBlock(enc_downsample_channels[-1], | |
| dec_non_bottleneck_channels[i])) | |
| else: | |
| self.decoder.append( | |
| UpsamplerBlock(dec_non_bottleneck_channels[i - 1], | |
| dec_non_bottleneck_channels[i])) | |
| for j in range(dec_stages_non_bottleneck[i]): | |
| self.decoder.append( | |
| NonBottleneck1d(dec_non_bottleneck_channels[i])) | |
| def forward(self, x): | |
| for enc in self.encoder: | |
| x = enc(x) | |
| for dec in self.decoder: | |
| x = dec(x) | |
| return [x] | |