def __init__(self, args): self.args = args self.criterion = nn.CrossEntropyLoss() self.loss_diff = DiffLoss() self.loss_recon = MSE() self.loss_cmd = CMD() self.metrics = MetricsTop().getMetics(args.datasetName)
def __init__(self, args): self.args = args self.origin_dimensions = args.input_feature_dimensions self.modal_lengths = args.input_time_length # the sequence is <text, audio, video> self.criterion = nn.L1Loss() self.metrics = MetricsTop().getMetrics(args.datasetName) self.ctc_settings = {}
def __init__(self, args): assert args.datasetName == 'sims' self.args = args self.args.tasks = "MTAV" self.criterion = nn.L1Loss() if args.train_mode == 'regression' else nn.CrossEntropyLoss() self.metrics = MetricsTop(args.train_mode).getMetics(args.datasetName)
def __init__(self, args): assert args.tasks in ['M'] self.args = args # self.regression_criterion = nn.L1Loss() self.regression_criterion = nn.MSELoss() self.classification_criterion = nn.CrossEntropyLoss() self.metrics = MetricsTop().getMetics(args.datasetName)
def __init__(self, args): self.args = args self.criterion = nn.MSELoss( ) if args.train_mode == 'regression' else nn.CrossEntropyLoss() self.loss_diff = DiffLoss() self.loss_recon = MSE() self.loss_cmd = CMD() self.metrics = MetricsTop(args.train_mode).getMetics(args.datasetName)
def __init__(self, args): assert args.datasetName == 'SIMS' self.args = args self.criterion = nn.CrossEntropyLoss() self.metrics = MetricsTop().getMetics(args.datasetName)
def __init__(self, args): self.args = args self.criterion = nn.L1Loss() self.metrics = MetricsTop().getMetics(args.datasetName)
def __init__(self, args): assert args.train_mode == 'regression' self.args = args self.args.tasks = "MTAV" self.metrics = MetricsTop(args.train_mode).getMetics(args.datasetName) self.feature_map = { 'fusion': torch.zeros(args.train_samples, args.post_fusion_dim, requires_grad=False).to(args.device), 'text': torch.zeros(args.train_samples, args.post_text_dim, requires_grad=False).to(args.device), 'audio': torch.zeros(args.train_samples, args.post_audio_dim, requires_grad=False).to(args.device), 'vision': torch.zeros(args.train_samples, args.post_video_dim, requires_grad=False).to(args.device), } self.center_map = { 'fusion': { 'pos': torch.zeros(args.post_fusion_dim, requires_grad=False).to(args.device), 'neg': torch.zeros(args.post_fusion_dim, requires_grad=False).to(args.device), }, 'text': { 'pos': torch.zeros(args.post_text_dim, requires_grad=False).to(args.device), 'neg': torch.zeros(args.post_text_dim, requires_grad=False).to(args.device), }, 'audio': { 'pos': torch.zeros(args.post_audio_dim, requires_grad=False).to(args.device), 'neg': torch.zeros(args.post_audio_dim, requires_grad=False).to(args.device), }, 'vision': { 'pos': torch.zeros(args.post_video_dim, requires_grad=False).to(args.device), 'neg': torch.zeros(args.post_video_dim, requires_grad=False).to(args.device), } } self.dim_map = { 'fusion': torch.tensor(args.post_fusion_dim).float(), 'text': torch.tensor(args.post_text_dim).float(), 'audio': torch.tensor(args.post_audio_dim).float(), 'vision': torch.tensor(args.post_video_dim).float(), } # new labels self.label_map = { 'fusion': torch.zeros(args.train_samples, requires_grad=False).to(args.device), 'text': torch.zeros(args.train_samples, requires_grad=False).to(args.device), 'audio': torch.zeros(args.train_samples, requires_grad=False).to(args.device), 'vision': torch.zeros(args.train_samples, requires_grad=False).to(args.device) } self.name_map = { 'M': 'fusion', 'T': 'text', 'A': 'audio', 'V': 'vision' }