def main(): if not os.path.exists(opt.exp2_model_save_file): os.makedirs(opt.exp2_model_save_file) unlabeled_domains = ['books', 'dvd', 'electronics', 'kitchen'] test_acc_dict = {} i = 1 ave_acc = 0.0 # unlabeled samples as train_sets, labeled samples as test_sets for domain in unlabeled_domains: opt.domains = ['books', 'dvd', 'electronics', 'kitchen'] # unlabeled samples as train_sets, labeled samples as test_sets test_sets, train_sets = get_msda_amazon_datasets( opt.prep_amazon_file, domain, 1, opt.feature_num) opt.num_labels = 2 opt.unlabeled_domains = domain opt.dev_domains = domain opt.domains.remove(domain) opt.exp2_model_save_file = './save/man_exp2/exp' + str(i) test_acc = train(i - 1, train_sets, test_sets) # i表示target domain的index test_acc_dict[domain] = test_acc i += 1 log.info(f'Training done...') log.info(f'test_acc\'s result is: ') for key in test_acc_dict: log.info(str(key) + ": " + str(test_acc_dict[key])) ave_acc += test_acc_dict[key] log.info(f'ave_test_acc\'s result is: ') log.info(ave_acc / 4)
def main(): if not os.path.exists(opt.model_save_file): os.makedirs(opt.model_save_file) datasets = {} raw_unlabeled_sets = {} log.info(f'Loading {opt.dataset} Datasets...') for domain in opt.all_domains: datasets[domain], raw_unlabeled_sets[ domain] = get_msda_amazon_datasets(opt.prep_amazon_file, domain, 1, opt.feature_num) opt.num_labels = 2 log.info(f'Done Loading {opt.dataset} Datasets.') log.info(f'Domains: {opt.domains}') train_sets, dev_sets, test_sets, unlabeled_sets = {}, {}, {}, {} for domain in opt.domains: train_sets[domain] = datasets[domain] unlabeled_sets[domain] = raw_unlabeled_sets[domain] # in this setting, dev_domains should only contain unlabeled domains for domain in opt.dev_domains: dev_sets[domain] = datasets[domain] test_sets[domain] = raw_unlabeled_sets[domain] unlabeled_sets[domain] = datasets[domain] cv = train(train_sets, dev_sets, test_sets, unlabeled_sets) log.info(f'Training done...') acc = sum(cv['valid'].values()) / len(cv['valid']) log.info(f'Validation Set Domain Average\t{acc}') test_acc = sum(cv['test'].values()) / len(cv['test']) log.info(f'Test Set Domain Average\t{test_acc}') return cv
def main(): unlabeled_domains = ['books', 'dvd', 'electronics', 'kitchen'] test_acc_dict = {} i = 1 ave_acc = 0.0 # unlabeled samples as train_sets, labeled samples as test_sets for domain in unlabeled_domains: # ---------------------- 一些参数的设置 ---------------------- # opt.domains = ['books', 'dvd', 'electronics', 'kitchen'] opt.num_labels = 2 opt.unlabeled_domains = domain.split() opt.dev_domains = domain.split() opt.domains.remove(domain) opt.exp2_model_save_file = './save/man_exp2/exp' + str(i) if not os.path.exists(opt.exp2_model_save_file): os.makedirs(opt.exp2_model_save_file) # ---------------------- 加载数据集 ---------------------- # datasets = {} raw_unlabeled_sets = {} # ---------------------- unlabeled domain(target domain) ---------------------- # for domain in opt.opt.dev_domains: datasets[domain], raw_unlabeled_sets[domain] = get_msda_amazon_datasets( opt.prep_amazon_file, domain, 1, opt.feature_num) opt.num_labels = 2 log.info(f'Done Loading {opt.dataset} Datasets.') log.info(f'Domains: {opt.domains}') # ---------------------- 数据集的设置 ---------------------- # train_sets, dev_sets, test_sets, unlabeled_sets = {}, {}, {}, {} # 这一部分domain的数据都有私有特征提取器 for domain in opt.domains: train_sets[domain] = datasets[domain] # ---------------------- 这一部分domain的数据没有私有特征提取器,只能通过公有特征提取器提取特征 ---------------------- # # ---------------------- 送入D训练的部分是dev_sets的部分,而test_sets是raw_unlabeled_sets的部分 ---------------------- # for domain in opt.dev_domains: unlabeled_sets[domain] = datasets[domain] test_sets[domain] = raw_unlabeled_sets[domain] # ---------------------- 训练产生伪label和F_d_target的过程 ---------------------- # test_acc = train(i - 1, train_sets, unlabeled_sets, test_sets) # i表示target domain的index test_acc_dict[domain] = test_acc i += 1 log.info(f'Training done...') log.info(f'test_acc\'s result is: ') for key in test_acc_dict: log.info(str(key) + ": " + str(test_acc_dict[key])) ave_acc += test_acc_dict[key] log.info(f'ave_test_acc\'s result is: ') log.info(ave_acc / 4)
def cross_validation(kfold): datasets = {} unlabeled_sets = {} log.info('Loading {} Datasets...'.format(opt.dataset)) for domain in opt.domains + opt.unlabeled_domains: datasets[domain], unlabeled_sets[domain] = get_msda_amazon_datasets( opt.prep_amazon_file, domain, kfold, opt.feature_num) opt.num_labels = 2 log.info('Done Loading {} Datasets.'.format(opt.dataset)) log.info('Domains: {}'.format(opt.domains)) cv = {} log.info('Starting {}-fold Cross Validation...'.format(kfold)) for fold in range(kfold): log.info('Starting fold {}...'.format(fold)) train_sets, dev_sets, test_sets = {}, {}, {} for domain in opt.all_domains: lset = datasets[domain] if domain in opt.domains: # only labeled domains have training data train_sets[domain] = lset.get_trainset(fold) dev_sets[domain] = lset.get_devset(fold) test_sets[domain] = lset.get_testset(fold) cv[fold] = train(train_sets, dev_sets, test_sets, unlabeled_sets, fold) log.info('Ending fold {}...'.format(fold)) acc = sum(cv[fold]['valid'].values()) / len(cv[fold]['valid']) log.info('Validation Set Domain Average\t{}'.format(acc)) test_acc = sum(cv[fold]['test'].values()) / len(cv[fold]['test']) log.info('Test Set Domain Average\t{}'.format(test_acc)) avg_acc = utils.average_cv_accuracy(cv) avg_acc, avg_acc_test = avg_acc['valid'], avg_acc['test'] log.info('{}-Fold Cross Validation Accuracies:'.format(kfold)) for domain in opt.all_domains: log.info('{}\t{}'.format(domain, avg_acc[domain])) overall = sum(avg_acc.values()) / len(avg_acc) log.info('Overall Validation Set Average\t{}'.format(overall)) log.info('{}-Fold Cross Validation Accuracies on Test Set:'.format(kfold)) for domain in opt.all_domains: log.info('{}\t{}'.format(domain, avg_acc_test[domain])) overall_test = sum(avg_acc_test.values()) / len(avg_acc_test) log.info('Overall Test Set Average\t{}'.format(overall_test)) return overall
def main(): unlabeled_domains = ['books', 'dvd', 'electronics', 'kitchen'] test_acc_dict = {} i = 1 opt.shared_lambd = 0.025 opt.private_lambd = 0.025 ave_acc = 0.0 for domain in unlabeled_domains: opt.domains = ['books', 'dvd', 'electronics', 'kitchen'] opt.num_labels = 2 opt.unlabeled_domains = domain.split() opt.dev_domains = domain.split() opt.domains.remove(domain) opt.exp2_model_save_file = './save/man_exp2/exp' + str(i) if not os.path.exists(opt.exp2_model_save_file): os.makedirs(opt.exp2_model_save_file) datasets = {} raw_unlabeled_sets = {} log.info(f'Loading {opt.dataset} Datasets...') for domain in opt.all_domains: datasets[domain], raw_unlabeled_sets[ domain] = get_msda_amazon_datasets(opt.prep_amazon_file, domain, 1, opt.feature_num) opt.num_labels = 2 log.info(f'Done Loading {opt.dataset} Datasets.') log.info(f'Domains: {opt.domains}') train_sets, dev_sets, test_sets, unlabeled_sets = {}, {}, {}, {} for domain in opt.domains: train_sets[domain] = datasets[domain] unlabeled_sets[domain] = raw_unlabeled_sets[domain] # in this setting, dev_domains should only contain unlabeled domains for domain in opt.dev_domains: dev_sets[domain] = datasets[domain] test_sets[domain] = raw_unlabeled_sets[domain] unlabeled_sets[domain] = datasets[domain] cv, visual_features, senti_labels = train(train_sets, dev_sets, test_sets, unlabeled_sets) print(visual_features.shape) print(senti_labels.shape) log.info(f'Training done...') acc = sum(cv['valid'].values()) / len(cv['valid']) log.info(f'Validation Set Domain Average\t{acc}') test_acc = sum(cv['test'].values()) / len(cv['test']) log.info(f'Test Set Domain Average\t{test_acc}') test_acc_dict[domain] = test_acc i += 1 # ---------------------- 可视化 ---------------------- # log.info(f'feature visualization') print("Computing t-SNE 2D embedding") t0 = time() t_sne(domain, visual_features.detach().cpu().numpy(), senti_labels.detach().cpu().numpy()) print("t-SNE 2D embedding of the digits (time %.2fs)" % (time() - t0)) log.info(f'Training done...') log.info(f'test_acc\'s result is: ') for key in test_acc_dict: log.info(str(key) + ": " + str(test_acc_dict[key])) ave_acc += test_acc_dict[key] log.info(f'ave_test_acc\'s result is: ') log.info(ave_acc / 4)