| import torch, math |
| import torch.nn as nn |
|
|
| def adjust_learning_rate(optimizer, epoch, args): |
| """Decay the learning rate with half-cycle cosine after warmup""" |
| if epoch < args.warmup_epochs: |
| lr = args.lr * epoch / args.warmup_epochs |
| else: |
| lr = args.min_lr + (args.lr - args.min_lr) * 0.5 * \ |
| (1. + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs))) |
| for param_group in optimizer.param_groups: |
| if "lr_scale" in param_group: |
| param_group["lr"] = lr * param_group["lr_scale"] |
| else: |
| param_group["lr"] = lr |
| return lr |
|
|
|
|
| def param_groups_weight_decay(model: nn.Module, weight_decay=1e-5, no_weight_decay_list=()): |
| no_weight_decay_list = set(no_weight_decay_list) |
| decay = [] |
| no_decay = [] |
| for name, param in model.named_parameters(): |
| if not param.requires_grad: |
| continue |
|
|
| if param.ndim <= 1 or name.endswith(".bias") or name in no_weight_decay_list: |
| no_decay.append(param) |
| else: |
| decay.append(param) |
|
|
| return [ |
| {'params': no_decay, 'weight_decay': 0.}, |
| {'params': decay, 'weight_decay': weight_decay}] |
|
|
|
|
| def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75): |
| """ |
| Parameter groups for layer-wise lr decay |
| Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58 |
| """ |
| param_group_names = {} |
| param_groups = {} |
|
|
| num_layers = len(model.blocks) + 1 |
|
|
| layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1)) |
|
|
| for n, p in model.named_parameters(): |
| if not p.requires_grad: |
| continue |
|
|
| |
| if p.ndim == 1 or n in no_weight_decay_list: |
| g_decay = "no_decay" |
| this_decay = 0. |
| else: |
| g_decay = "decay" |
| this_decay = weight_decay |
| |
| layer_id = get_layer_id_for_vit(n, num_layers) |
| group_name = "layer_%d_%s" % (layer_id, g_decay) |
|
|
| if group_name not in param_group_names: |
| this_scale = layer_scales[layer_id] |
|
|
| param_group_names[group_name] = { |
| "lr_scale": this_scale, |
| "weight_decay": this_decay, |
| "params": [], |
| } |
| param_groups[group_name] = { |
| "lr_scale": this_scale, |
| "weight_decay": this_decay, |
| "params": [], |
| } |
|
|
| param_group_names[group_name]["params"].append(n) |
| param_groups[group_name]["params"].append(p) |
|
|
| |
|
|
| return list(param_groups.values()) |
|
|
|
|
| def get_layer_id_for_vit(name, num_layers): |
| """ |
| Assign a parameter with its layer id |
| Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33 |
| """ |
| if name in ['cls_token', 'pos_embed']: |
| return 0 |
| elif name.startswith('patch_embed'): |
| return 0 |
| elif name.startswith('blocks'): |
| return int(name.split('.')[1]) + 1 |
| else: |
| return num_layers |