2023-06-14 15:17:59 +00:00
|
|
|
import torch
|
2023-06-15 00:13:08 +00:00
|
|
|
from contextlib import contextmanager
|
2023-06-14 15:17:59 +00:00
|
|
|
|
2023-12-12 04:27:13 +00:00
|
|
|
class disable_weight_init:
|
|
|
|
class Linear(torch.nn.Linear):
|
|
|
|
def reset_parameters(self):
|
|
|
|
return None
|
2023-06-14 23:46:08 +00:00
|
|
|
|
2023-12-12 04:27:13 +00:00
|
|
|
class Conv2d(torch.nn.Conv2d):
|
|
|
|
def reset_parameters(self):
|
|
|
|
return None
|
2023-06-15 00:13:08 +00:00
|
|
|
|
2023-12-12 04:27:13 +00:00
|
|
|
class Conv3d(torch.nn.Conv3d):
|
|
|
|
def reset_parameters(self):
|
|
|
|
return None
|
2023-11-11 06:00:43 +00:00
|
|
|
|
2023-12-12 04:27:13 +00:00
|
|
|
class GroupNorm(torch.nn.GroupNorm):
|
|
|
|
def reset_parameters(self):
|
|
|
|
return None
|
2023-12-04 08:12:18 +00:00
|
|
|
|
2023-12-12 04:27:13 +00:00
|
|
|
class LayerNorm(torch.nn.LayerNorm):
|
|
|
|
def reset_parameters(self):
|
|
|
|
return None
|
2023-12-04 08:12:18 +00:00
|
|
|
|
2023-12-12 04:27:13 +00:00
|
|
|
@classmethod
|
|
|
|
def conv_nd(s, dims, *args, **kwargs):
|
|
|
|
if dims == 2:
|
|
|
|
return s.Conv2d(*args, **kwargs)
|
|
|
|
elif dims == 3:
|
|
|
|
return s.Conv3d(*args, **kwargs)
|
|
|
|
else:
|
|
|
|
raise ValueError(f"unsupported dimensions: {dims}")
|
2023-06-15 00:13:08 +00:00
|
|
|
|
2023-12-11 04:00:54 +00:00
|
|
|
def cast_bias_weight(s, input):
|
|
|
|
bias = None
|
|
|
|
if s.bias is not None:
|
|
|
|
bias = s.bias.to(device=input.device, dtype=input.dtype)
|
|
|
|
weight = s.weight.to(device=input.device, dtype=input.dtype)
|
|
|
|
return weight, bias
|
|
|
|
|
2023-12-12 04:27:13 +00:00
|
|
|
class manual_cast(disable_weight_init):
|
|
|
|
class Linear(disable_weight_init.Linear):
|
2023-12-11 04:00:54 +00:00
|
|
|
def forward(self, input):
|
|
|
|
weight, bias = cast_bias_weight(self, input)
|
|
|
|
return torch.nn.functional.linear(input, weight, bias)
|
|
|
|
|
2023-12-12 04:27:13 +00:00
|
|
|
class Conv2d(disable_weight_init.Conv2d):
|
2023-12-11 04:00:54 +00:00
|
|
|
def forward(self, input):
|
|
|
|
weight, bias = cast_bias_weight(self, input)
|
|
|
|
return self._conv_forward(input, weight, bias)
|
|
|
|
|
2023-12-12 04:27:13 +00:00
|
|
|
class Conv3d(disable_weight_init.Conv3d):
|
2023-12-11 04:00:54 +00:00
|
|
|
def forward(self, input):
|
|
|
|
weight, bias = cast_bias_weight(self, input)
|
|
|
|
return self._conv_forward(input, weight, bias)
|
|
|
|
|
2023-12-12 04:27:13 +00:00
|
|
|
class GroupNorm(disable_weight_init.GroupNorm):
|
2023-12-11 04:00:54 +00:00
|
|
|
def forward(self, input):
|
|
|
|
weight, bias = cast_bias_weight(self, input)
|
|
|
|
return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps)
|
|
|
|
|
2023-12-12 04:27:13 +00:00
|
|
|
class LayerNorm(disable_weight_init.LayerNorm):
|
2023-12-11 04:00:54 +00:00
|
|
|
def forward(self, input):
|
|
|
|
weight, bias = cast_bias_weight(self, input)
|
|
|
|
return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps)
|