import torch import torch.nn as nn import torch.nn.functional as F class R1Distiller(nn.Module):

作者: 逗你玩发布时间: 2025-01-27 00:57:24 浏览:26 次发布地: 上海市天气: 晴

import torch import torch.nn as nn import torch.nn.functional as F class R1Distiller(nn.Module): def __init__(self, teacher, student): super().__init__() self.teacher = teacher self.student = student def forward(self, x, labels): # 教师模型前向（冻结参数） with torch.no_grad(): t_features = self.teacher.extract_features(x) # 假设已定义特征抽取方法 # 学生模型前向 s_logits, s_features = self.student(x) # 计算任务损失 loss_task = F.cross_entropy(s_logits, labels) # 计算蒸馏损失（KL散度） loss_kd = F.kl_div( F.log_softmax(s_logits / self.temperature, dim=1), F.softmax(t_logits / self.temperature, dim=1), reduction='batchmean' ) * (self.temperature ** 2) # 计算R1正则化项（以余弦相似度为例） t_features_norm = F.normalize(t_features, p=2, dim=1) s_features_norm = F.normalize(s_features, p=2, dim=1) loss_r1 = 1 - torch.mean(torch.sum(t_features_norm * s_features_norm, dim=1)) # 总损失 total_loss = loss_task + self.alpha * loss_kd + self.beta lana[鬼脸]

评论：
作者: 能优化吗？[鬼脸]
J: 妈呀你这文案
无渡: 会写代码

如果觉得我的文章对您有用，请随意打赏。您的支持将鼓励我继续创作！