def main(): args = arg_parse() config = get_config(args) config.show_config() dataset = pd.read_csv( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'dataset', 'BBC', 'preproc_dataset.csv')) if config.mode == 'train': train(config, dataset) elif config.mode == 'test': test(config, dataset)
def evaluate(output, gold_standard, tfilter, id): """Evaluate results in output given a gold standard. It is the responsibility of the user to make sure that it makes sense to compare this gold standard to the system results.""" print "Evaluating system results in %s" % output system_file = os.path.join(output, 'classify.MaxEnt.out.s4.scores.sum.nr') command = "python %s" % ' '.join(sys.argv) for term_type in ('all', 'single-token-terms', 'multi-token-terms'): ttstring = term_type_as_short_string(term_type) tfstring = term_filter_as_short_string(tfilter) summary_file = os.path.join( output, "eval-%s-%s-%s.txt" % (id, ttstring, tfstring)) summary_fh = codecs.open(summary_file, 'w', encoding='utf-8') for threshold in (0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9): if threshold == 0.5: log_file = os.path.join(output, "eval-%s-%s-%s-%.1f.txt" \ % (id, ttstring, tfstring, threshold)) else: log_file = None result = evaluation.test(gold_standard, system_file, threshold, log_file, term_type=term_type, term_filter=tfilter, debug_c=False, command=command) summary_fh.write(result)
def run_evaluation(rconfig, batch, gold_standard, threshold, log_file, command): """Runs an evaluation, comparing the system results in the batch to the gold standard. """ corpus_dir = rconfig.target_path system_file = os.path.join(corpus_dir, 'data', 't2_classify', batch, 'classify.MaxEnt.out.s4.scores.sum.nr') if threshold is not None: evaluation.test(gold_standard, system_file, threshold, log_file, debug_c=True, command=command) else: # this requires that the version can be extracted as below version = os.path.basename(os.path.dirname(system_file)) log_file = os.path.join('..', 'evaluation', 'logs', "%s-%s.log" % (version, "0.90")) evaluation.test(gold_standard, system_file, 0.9, log_file, debug_c=True, command=command) for threshold in (0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0): log_file = os.path.join( '..', 'evaluation', 'logs', "%s-%s.log" % (version, "%.2f" % threshold)) evaluation.test(gold_standard, system_file, threshold, log_file, debug_c=False, command=command)
def _run_eval(self): """Evaluate results if a gold standard is handed in. It is the responsibility of the user to make sure that it makes sense to compare this gold standard to the system result.""" # TODO: now the log files have a lot of redundancy, fix this if gold_standard is not None: summary_file = os.path.join(self.batch, "eval-results-summary.txt") summary_fh = open(summary_file, 'w') system_file = os.path.join(self.batch, 'classify.MaxEnt.out.s4.scores.sum.nr') command = "python %s" % ' '.join(sys.argv) for threshold in (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9): log_file = os.path.join(self.batch, "eval-results-%.1f.txt" % threshold) result = evaluation.test(gold_standard, system_file, threshold, log_file, debug_c=False, command=command) summary_fh.write("%s\n" % result)
def single_epoch(hyperparameters, data_parameters, model, criterion, optimizer, scheduler, decoder, train_loader, validation_loader, iter_meter): iter_meter.step_epoch() train(model, criterion, optimizer, scheduler, train_loader, iter_meter) avg_validation_loss, _, _ = test(model, criterion, decoder, validation_loader) state = { 'hyperparameters': hyperparameters, 'data_parameters': data_parameters, 'epoch': iter_meter.get_epoch(), 'iteration': iter_meter.get(), 'model_dict': model.state_dict(), 'optim_dict': optimizer.state_dict(), 'scheduler_dict': scheduler.state_dict(), 'avg_validation_loss': avg_validation_loss # metric to compare } utils.save_checkpoint(state, 'avg_validation_loss', iter_meter)
# initialize the options with the defaults, overwrite the ones specified. args = configparser.ConfigParser() args.read(os.path.join(os.getcwd(), 'default.ini')) if os.path.isfile(conf_file): args.read(conf_file) else: warnings.warn(f'No config file found under "{conf_file}", using default') # to ensure reproducibility in case the defaults changed, # save the entire set of current options too conf_full = os.path.join(sys.argv[2], 'config_full.ini') with open(conf_full, 'w') as f: args.write(f) args['checkpoints']['output_dir'] = output_dir try: args['data']['data_root_folder'] = os.environ['DATASET_DIR'] except KeyError: raise ValueError("Please set the DATASET_DIR environment variable") if mode == 'test' or mode == 'both': import evaluation evaluation.test(args) if mode == 'train' or mode == 'both': import train train.train(args)
nb_classes=CLASSES, Chans=chans, Samples=samples, dropoutRate=0.5, kernLength=125, F1=16, D=4, F2=64, dropoutType='Dropout' ) # compile model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # train model and save weights # train(model, {"x": trainX, "y": trainY}, {"x": valX, "y": valY}, weight_file) # load weights from file model.load_weights(weight_file) # test model comp_eval = test(model, compX, compY) # test competition test set pilot_eval = test(model, pilotX, pilotY) # test pilot test set comp_avg = {k: comp_avg.get(k, 0) + comp_eval.get(k, 0) for k in set(comp_avg) & set(comp_eval)} pilot_avg = {k: pilot_avg.get(k, 0) + pilot_eval.get(k, 0) for k in set(pilot_avg) & set(pilot_eval)} # print evaluation print(f'fold {i+1}') print('competition:', sorted(comp_eval.items())) print('pilot: ', sorted(pilot_eval.items())) print() print('avg') print('competition:', sorted({k: comp_avg.get(k, 0)/FOLDS for k in set(comp_avg)}.items())) print('pilot: ', sorted({k: pilot_avg.get(k, 0)/FOLDS for k in set(pilot_avg)}.items())) print()
] # script start _compX, _compY = epoch_comp(prep_comp(comp_channel_map3, GOODS, h_freq=30.), CLASSES) _pilotX, _pilotY = epoch_pilot(prepall_pilot(GOODS, h_freq=30.), CLASSES) csp = CSP(n_components=4) csp.fit(_compX, _compY) csp_compX = csp.transform(_compX) csp_pilotX = csp.transform(_pilotX) print(csp_compX) print(csp_compX.shape) # clf = SVC(kernel='linear', C=0.05, probability=True) # clf = MLPClassifier([10, 3], batch_size=16) clf = RandomForestClassifier() skf = StratifiedKFold(FOLDS, shuffle=True, random_state=1) for i, (train_index, test_index) in enumerate(skf.split(csp_compX, _compY)): trainX, testX = csp_compX[train_index], csp_compX[test_index] trainY, testY = _compY[train_index], _compY[test_index] clf.fit(trainX, trainY) preds = clf.predict(testX) comp_eval = test(clf, preds, testY) print(comp_eval)
def train(): # ------------------------------------ 显示程序设定参数 ------------------------------------- opt.log_dir = 'checkpoints/' + '%s_%d_%d_%s_%s/' % (opt.train_net_name, opt.max_epoch, opt.train_bs, opt.face_data, time.strftime('%m%d%H%M%S')) os.makedirs(opt.log_dir) opt._print_opt() # ------------------------------------ step 1/5 : 加载数据------------------------------------ data_pool = DataLoaderPool(opt) train_loader = data_pool.select_dataloader(data_type='train') valid_loader = data_pool.select_dataloader(data_type='valid') test_loader = data_pool.select_dataloader(data_type='test') ''' print('load data done !') transform = transforms.Compose([ transforms.Resize(opt.input_net_size), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) data = LFWDataset(dir=opt.lfw_data_dir+'/lfw/', pairs_path='dataset/lfw_pairs.txt', transform=transform) # 构建DataLoder test_loader = DataLoader(dataset=data, batch_size=opt.test_bs, shuffle=False) print('load test data done !') ''' # ------------------------------------ step 2/5 : 定义网络------------------------------------ net_name = opt.train_net_name net_pool = FaceRecognitionNetPool(opt) # 模型选择类 net = net_pool.select_model(net_name) # 调用网络 print('load net done !') # ------------------------------------ step 3/5 : 定义损失函数和优化器 ------------------------------------ criterion_triplet = nn.TripletMarginLoss(margin=opt.margin) optimizer = optim.Adam(net.parameters(), lr=opt.init_lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.618, patience=2, verbose=True, threshold=0.005, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08) # 设置学习率下降策略 # criterion_cross = nn.CrossEntropyLoss() # ------------------------------------ step 4/5 : 训练 -------------------------------------------------- print('train start ------------------------------------------------') time_str = time.strftime('%H时%M分%S秒') print(time_str) iteration_number = 0 train_iter_index = [] val_iter_index = [] test_iter_index = [] train_loss_list = [] val_acc_list = [] test_acc_list = [] best_val_acc = 0 best_test_acc = 0 now_best_net_save_path = None for epoch in range(opt.max_epoch): loss_sigma = 0 net.train() # 训练模式 pbar = tqdm((enumerate(train_loader))) for i, data in pbar: pbar.set_description('Train Epoch: {} [{}/{} ({:.0f}%)]'.format( epoch, i * opt.train_bs, len(train_loader.dataset), 100. * i / len(train_loader))) # 获取图片和标签 inputs0, inputs1, inputs2, label1, label2 = data inputs0, inputs1, inputs2, label1, label2 = Variable(inputs0), Variable(inputs1), Variable(inputs2), \ Variable(label1), Variable(label2) outputs0, outputs1, outputs2 = net(inputs0), net(inputs1), net(inputs2) # online triplet select # Choose the hard negatives d_p = F.pairwise_distance(outputs0, outputs1, 2) d_n = F.pairwise_distance(outputs0, outputs2, 2) hard_negatives = (d_n - d_p < opt.margin).data.numpy().flatten() hard_triplets = np.where(hard_negatives == 1) if len(hard_triplets[0]) == 0: continue outputs0 = outputs0[hard_triplets] outputs1 = outputs1[hard_triplets] outputs2 = outputs2[hard_triplets] ''' inputs0 = inputs0[hard_triplets] inputs1 = inputs1[hard_triplets] inputs2 = inputs2[hard_triplets] label1 = label1[hard_triplets] label2 = label2[hard_triplets] cls_a = net.forward_classifier(inputs0) cls_p = net.forward_classifier(inputs1) cls_n = net.forward_classifier(inputs2) loss_cross_a = criterion_cross(cls_a, label1) loss_cross_p = criterion_cross(cls_p, label1) loss_cross_n = criterion_cross(cls_n, label2) loss_cross = loss_cross_a + loss_cross_p + loss_cross_n loss_triplet = 10.0 * criterion_triplet(outputs0, outputs1, outputs2) loss = loss_cross + loss_triplet ''' loss = criterion_triplet(outputs0, outputs1, outputs2) # forward, backward, update weights optimizer.zero_grad() loss.backward() optimizer.step() iteration_number += 1 loss_sigma += loss.item() if i % 10 == 0: # print("Epoch:{}, Current loss {}".format(epoch, loss.item())) train_iter_index.append(iteration_number) train_loss_list.append(loss.item()) # 每个epoch的 Loss, accuracy, learning rate lr_now = [group['lr'] for group in optimizer.param_groups][0] loss_avg_epoch = loss_sigma / len(train_loader) print("\33[34mTrain_Loss_Avg: {:.4f}\33[0m\t\t\t\t\33[35mLr: {:.8f}\33[0m".format(loss_avg_epoch, lr_now)) scheduler.step(loss_avg_epoch) # 更新学习率 # ------------------------------------ 观察模型在验证集上的表现 ------------------------------------ if epoch % 1 == 0: # print('eval start ') valid_acc = valid(valid_loader, net, epoch) val_iter_index.append(iteration_number) val_acc_list.append(valid_acc) # print("\33[34m\t\t\t\tValid Accuracy:{:.4f}\33[0m".format(valid_acc)) # print(euclidean_distance_list) # show_distance(distance_AP_list, distance_AN_list, opt.show_plot_epoch, epoch) # 每次验证完,根据验证数据判断是否存储当前网络数据 if valid_acc > best_val_acc: best_val_acc = valid_acc # 储存权重 time_str = time.strftime('%m%d%H%M%S') save_name = '%s_validacc_%s_%s_net_params.pkl' % (time_str, '{:.4f}'.format(best_val_acc), net_name) now_best_net_save_path = os.path.join(opt.log_dir, save_name) torch.save(net.state_dict(), now_best_net_save_path) # ------------------------------------ 观察模型在测试集上的表现 ------------------------------------ if (epoch + 1) % 3 == 0: # print('eval start ') test_acc = test(test_loader, net, epoch) test_iter_index.append(iteration_number) test_acc_list.append(test_acc) # 每次验证完,根据验证数据判断是否存储当前网络数据 if np.mean(test_acc) > best_test_acc: best_test_acc = np.mean(test_acc) # 储存权重 time_str = time.strftime('%m%d%H%M%S') save_name = '%s_testacc_%s_%s_net_params.pkl' % (time_str, '{:.4f}'.format(best_test_acc), net_name) now_best_net_save_path = os.path.join(opt.log_dir, save_name) torch.save(net.state_dict(), now_best_net_save_path) print('Finished Training') time_str = time.strftime('%H时%M分%S秒') print(time_str) # ------------------------------------ step5: 加载最好模型 并且在测试集上评估 ------------------------------------ show_plot(train_iter_index, train_loss_list, val_iter_index, val_acc_list, test_iter_index, test_acc_list)
def main(hyperparameters, data_parameters, decoder, project_name): with wandb.init(project=project_name, config=hyperparameters): model_name = dt.datetime.strftime(dt.datetime.now(), "%H_%M__%d_%m_%Y") print("Model Name: {}\n".format(model_name)) wandb.run.name = model_name config = wandb.config iter_meter = utils.IterMeter(project_name, model_name, config['epochs']) if hyperparameters['SortaGrad']: # Sorted, not shuffled loaders train_loader, validation_loader, _ = make_loaders(data_parameters, sortagrad=True) model, criterion, optimizer, scheduler = make_model( config, 0, int(len(train_loader)), device) print(model) print('Num Model Parameters\n', sum([param.nelement() for param in model.parameters()])) wandb.watch(model, criterion, log="all", log_freq=1) # First epoch is ordered and not shuffled single_epoch(hyperparameters, data_parameters, model, criterion, optimizer, scheduler, decoder, train_loader, validation_loader, iter_meter) # for the rest of the epochs, use shuffled dataset train_loader, validation_loader, test_loader = make_loaders( data_parameters, sortagrad=False) for _ in range(config['epochs'] - 1): single_epoch(hyperparameters, data_parameters, model, criterion, optimizer, scheduler, decoder, train_loader, validation_loader, iter_meter) else: train_loader, validation_loader, test_loader = make_loaders( data_parameters, sortagrad=False) model, criterion, optimizer, scheduler = make_model( config, 0, int(len(train_loader)), device) print(model) print('Num Model Parameters', sum([param.nelement() for param in model.parameters()])) wandb.watch(model, criterion, log="all", log_freq=10) for _ in range(config['epochs']): single_epoch(hyperparameters, data_parameters, model, criterion, optimizer, scheduler, decoder, train_loader, validation_loader, iter_meter) avg_test_loss, avg_cer, avg_wer = test( model, criterion, decoder, test_loader) # Test on the test set wandb.log({ 'test_avg_loss': avg_test_loss, 'test_avg_cer': avg_cer, 'test_avg_wer': avg_wer, 'epoch': iter_meter.get_epoch() })
for j in range(0, len(values)): data[i][j] = values[j].index(data[i][j]) data = [list(map(int, row[:-1])) + [row[-1][:-1]] for row in data] # print(data) featurenames = [ 'buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class' ] method = 'entropy' #'gini','entropy','classificationerror' # tree = dt.train(data, featurenames, method) # # errorcount = 0 # for row in data: # row.append(dt.classifyobj(tree, row, featurenames)) # if row[-2] != row[-1]: # errorcount += 1 # # accuracy = 1 - errorcount / len(data) # print('accuracy: ', accuracy) test(data=data, featurenames=featurenames, adaboostOn=True, k=50, preprune=False, postprune=False, threshold=0.1) print('done')
F2=64, dropoutType='Dropout') # compile model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # train model and save weights # train(model, {"x": trainX, "y": trainY}, {"x": valX, "y": valY}, weight_file) # load weights from file model.load_weights(weight_file) # test model comp_eval = test(model, testX, testY) comp_avg = { k: comp_avg.get(k, 0) + comp_eval.get(k, 0) for k in set(comp_avg) & set(comp_eval) } # print evaluation print(f'fold {i+1}') print(sorted(comp_eval.items())) print() print('avg') print(sorted({k: comp_avg.get(k, 0) / FOLDS for k in set(comp_avg)}.items())) print()
def main(): parser = argparse.ArgumentParser( description='Argument Parser for SERIL.') parser.add_argument('--logdir', default='log', help='Name of current experiment.') parser.add_argument('--n_jobs', default=2, type=int) parser.add_argument( '--do', choices=['train', 'test'], default='train', type=str) parser.add_argument( '--mode', choices=['seril', 'finetune'], default='seril', type=str) parser.add_argument( '--model', choices=['LSTM', 'Residual', 'IRM'], default='LSTM', type=str) # Options parser.add_argument( '--config', default='config/config.yaml', required=False) parser.add_argument('--seed', default=1126, type=int, help='Random seed for reproducable results.', required=False) parser.add_argument('--gpu', default='2', type=int, help='Assigning GPU id.') args = parser.parse_args() random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) # build log directory os.makedirs(args.logdir, exist_ok=True) # load configure config = yaml.load(open(args.config, 'r'), Loader=yaml.FullLoader) if config['train']['loss'] == 'sisdr': loss_func = SingleSrcNegSDR("sisdr", zero_mean=False, reduction='mean') if args.do == 'train': torch.cuda.set_device(args.gpu) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') assert len(config['dataset']['train']['clean']) == len( config['dataset']['train']['noisy']) and len(config['dataset']['train']['clean']) >= 1 model_path = f'{args.logdir}/pretrain/{args.model}_model_T0.pth' lifelong_agent_path = f'{args.logdir}/pretrain/{args.model}_synapses_T0.pth' if os.path.exists(model_path) and os.path.exists(lifelong_agent_path): print(f'[Runner] - pretrain model has already existed!') model = torch.load(model_path).to(device) lifelong_agent = torch.load(lifelong_agent_path).to(device) lifelong_agent.load_config(**config['train']['strategies']) else: print(f'[Runner] - run pretrain process!') preprocessor = OnlinePreprocessor(feat_list=feat_list).to(device) model = eval(f'{args.model}')(loss_func, preprocessor, **config['model']).to(device) lifelong_agent = LifeLongAgent(model, **config['train']['strategies']) pretrain(args, config, model, lifelong_agent) print(f'[Runner] - run adaptation process!') args.logdir = f'{args.logdir}/{args.mode}' if args.mode == 'seril': adapt(args, config, model, lifelong_agent) elif args.mode == 'finetune': adapt(args, config, model) if args.do == 'test': test(args, config)
def train(self, train_data_path, test_data, options): validFreq = options['validFreq'] saveFreq = options['saveFreq'] dispFreq = options['dispFreq'] max_iter = options['max_iter'] saveto =options['saveto'] train_loss_his = [] test_loss_his = [] start_time = time.time() #test_loss_ = self.test_loss(self._test, test_data, options) # test_loss_his.append(test_loss_) # print 'Valid cost:', test_loss_ train_loss = 0. records_file = open(options['record_path'],'w+') file_name = options['train_data_file_path'] + 'fcv_train_feats.h5' train_data = DataHelper.DataHelper(options['v_length'], options['batch_size'], options['dim_frame'], data_file=file_name, train=True) H = np.zeros([train_data.data_size_, options['dim_proj']],dtype=np.float32) try: for uidx in xrange(1,max_iter+1): #get splits of an epoch for eidx in xrange(1,options['train_splits_num']+1): #for YFCC #file_name = options['train_data_file_path']+'yfcc_train_feats_'+str(eidx)+'.h5' #for FCV file_name = options['train_data_file_path'] + 'fcv_train_feats.h5' train_data = DataHelper.DataHelper(options['v_length'], options['batch_size'], options['dim_frame'], data_file= file_name, train=True) print 'loading data:'+file_name #get the batch train data m = train_data.data_size_/train_data.batch_size_ if train_data.data_size_%train_data.batch_size_ == 0: m = m else: m += 1 print 'm: ',m for i in range(0,m): #if i % 10 ==0: #print i if i == (m-1): x = indexContent(train_data,train_data.idx_[i*options['batch_size']:]) idxs = train_data.idx_[i*options['batch_size']:] else: x = indexContent(train_data,train_data.idx_[i*options['batch_size']:(i+1)*options['batch_size']]) idxs = train_data.idx_[i*options['batch_size']:(i+1)*options['batch_size']] [H, train_loss, loss_pairwise,reconstruction_loss] = self._train( x,idxs,H, np.zeros((x.shape[0], options['dim_proj']), dtype=np.float32), np.zeros((x.shape[0], options['dim_proj']), dtype=np.float32), np.zeros((x.shape[0], options['dim_proj']), dtype=np.float32), np.zeros((x.shape[0], options['dim_proj']), dtype=np.float32), np.zeros((x.shape[0], options['dim_proj']), dtype=np.float32), np.zeros((x.shape[0], options['dim_proj']), dtype=np.float32), np.zeros((x.shape[0], options['dim_proj']), dtype=np.float32), np.zeros((x.shape[0], options['dim_proj']), dtype=np.float32), np.zeros((x.shape[0], options['dim_proj']), dtype=np.float32), np.zeros((x.shape[0], options['dim_proj']), dtype=np.float32)) if i % 10 == 0: print 'Epoch: ',uidx,'\tPart: ',eidx,'\tBatch: ',i,'\tCost: ',train_loss,'\tpairwise_loss: ',loss_pairwise,'\trestruction_loss: ',reconstruction_loss records_file.write('Epoch: '+str(uidx)+'\tPart: '+str(eidx)+'\tBatch: '+str(i)+'\tCost: '+str(train_loss)+'\tpairwise_loss: '+str(loss_pairwise)+'\trestruction_loss'+str(reconstruction_loss)+'\n') if uidx%options['validFreq'] == 0: print 'start testing...' maps = evaluation.test(self._encoder,options,uidx) if np.isnan(train_loss) or np.isinf(train_loss): print 'bad cost detected: ', train_loss if np.mod(uidx, dispFreq) == 0 or uidx is 1: train_loss = train_loss/(x.shape[0]*x.shape[1]) train_loss_his.append(train_loss) print 'Step ', uidx, 'Train cost:', train_loss if saveto and np.mod(uidx, saveFreq) == 0: print 'Saving...', params_to_save = self.get_params_value() updates_value = self.get_updates_value() np.savez(saveto, params=params_to_save, updates_v=updates_value, train_loss_his=train_loss_his) pkl.dump(options, open('%s.pkl' % saveto, 'wb'), -1) print 'Save Done' except KeyboardInterrupt: print "Training interupted" print 'Saving records!' records_file.close() if saveto: print 'Saving...', params_to_save = self.get_params_value() updates_value = self.get_updates_value() np.savez(saveto, params=params_to_save, updates_v=updates_value, train_loss_his=train_loss_his, test_loss_his=test_loss_his) pkl.dump(options, open('%s.pkl' % saveto, 'wb'), -1) print 'Save Done' end_time = time.time() print ('Training took %.1fs' % (end_time - start_time))