def slave_run_train(model, args, package, pid="None"): model.train() t = time.time() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) list_loss, list_acc = [], [] for iter in range(args.slave_ep): if args.dataset == "Amazon": feature = torch.tensor(package["features"], dtype=torch.float32) else: feature = torch.eye(len(package["features"]), dtype=torch.float32) support = to_torch_sparse_tensor(coo_matrix((package["support"][1], (package["support"][0][:, 0], package["support"][0][:, 1])), shape=package["support"][2])) label = torch.tensor(package["y_train"], dtype=torch.float32) mask = torch.tensor(package["train_mask"].astype(int), dtype=torch.float32) criterion = torch.nn.CrossEntropyLoss() if args.device >= 0: model = model.cuda() criterion = criterion.cuda() feature = feature.cuda() support = support.cuda() label = label.cuda().to(dtype=torch.int64) mask = mask.cuda() model.zero_grad() out = model(support, feature) loss, pred, acc = _metrics(out, label, mask, criterion, args.multilabel) # update model loss.backward() optimizer.step() # calculate F1 if needed. list_loss.append(loss.item()) list_acc.append(acc.item()) time_cost = time.time() - t # print(loss, acc) log_str = "Slave-" + str(pid) + " Done. Total time cost:" + str(time_cost) +\ " average acc: " + str(sum(list_acc)/len(list_acc)) + ". average loss: " + \ str(sum(list_loss) / len(list_loss)) print2file(log_str, args.logDir, True) return {"params": model.cpu().state_dict(), "acc": sum(list_acc) / len(list_acc), "pred": pred, "out": out, "loss": sum(list_loss) / len(list_loss), "time": time_cost}
def slave_run_evaluate(model, args, package, pid="None"): model.eval() t = time.time() if args.dataset == "Amazon": feature = torch.tensor(package["features"], dtype=torch.float32) else: feature = torch.eye(len(package["features"]), dtype=torch.float32) support = to_torch_sparse_tensor(coo_matrix((package["support"][1], (package["support"][0][:, 0], package["support"][0][:, 1])), shape=package["support"][2])) label = torch.tensor(package["y_train"], dtype=torch.float32) mask = torch.tensor(package["train_mask"].astype(int), dtype=torch.float32) criterion = torch.nn.CrossEntropyLoss() if args.device >= 0: model = model.cuda() criterion = criterion.cuda() feature = feature.cuda() support = support.cuda() label = label.cuda().to(dtype=torch.int64) mask = mask.cuda() out = model(support, feature) loss, pred, acc = _metrics(out, label, mask, criterion, args.multilabel) # list_loss.append(loss.item()) # list_acc.append(acc.item()) log_str = "Slave-" + str(pid) + " Done. Total time cost:" + str(time.time()-t) +\ " average acc: " + str(acc.item()) + ". average loss: " + \ str(loss.item()) print2file(log_str, args.logDir, True) # print(log_str) return {"params": model.cpu().state_dict(), "acc": acc.item(), "pred": pred, "out": out, "loss": loss.item()}
def train_net(net, optimizer, device, args, LOG_FILE, MODEL_FILE): # output regression information history = { 'Train_loss': [], 'Train_dice': [], 'Train_other': [], 'Valid_loss': [], 'Valid_dice': [], 'Valid_other': [] } # scheduler if args.sch == 1: scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[args.epoch // 2, args.epoch * 3 // 4], gamma=0.35) elif args.sch == 2: scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epoch, 1e-4) val_dice_best = -float('inf') # main iteration for epoch in range(args.epoch): # loop over the dataset multiple times net.train() running_loss, running_dice, running_other = 0.0, 0.0, 0.0 tk0 = tqdm(enumerate(trainloader), total=len(trainloader), leave=False) # zero the gradient optimizer.zero_grad() # iterate over all samples for i, data in tk0: # get the inputs; data is a list of [inputs, labels] images = data[0].to(device).permute(0, 3, 1, 2) # forward + backward + optimize outputs = net(images) # do not accumulate the gradient if not args.accumulate: # different ways of handling the outputs loss = compute_loss(args, outputs, data, acc_step=1) loss.backward() optimizer.step() optimizer.zero_grad() batch_loss = loss.item() # do accumulation else: acc_step = 64 // args.batch loss = compute_loss(args, outputs, data, acc_step=acc_step) loss.backward() if (i + 1) % acc_step == 0: optimizer.step() optimizer.zero_grad() batch_loss = loss.item() * acc_step # print statistics batch_dice, batch_other = evaluate_batch(data, outputs, args) running_loss += batch_loss running_dice += batch_dice running_other += batch_other tk0.set_postfix( info='Loss {:.3f}, Dice {:.3f}, Other {:.3f}'.format( batch_loss, batch_dice, batch_other)) # stochastic weight averaging if args.swa > 0 and epoch >= args.epoch - args.swa: epoch_tmp = args.epoch - args.swa if epoch == epoch_tmp: net_swa = copy.deepcopy(net.state_dict()) else: for key, val in net_swa.items(): net_swa[key] = ( (epoch - epoch_tmp) * val + net.state_dict()[key]) / (epoch - epoch_tmp + 1) # after every epoch, print the statistics net.eval() val_loss, val_dice, val_other = evaluate_loader( net, device, validloader, args) # save the best up to now if val_dice > val_dice_best: print('Improving val_dice from {:.3f} to {:.3f}, saving the model'. format(val_dice_best / len(VALID_FILES) / args.category, val_dice / len(VALID_FILES) / args.category)) val_dice_best = val_dice torch.save(net.state_dict(), MODEL_FILE) # update the learning rate if args.sch > 0: scheduler.step() # update the history and output message history['Train_loss'].append(running_loss / len(trainloader)) history['Valid_loss'].append(val_loss / len(validloader)) history['Train_dice'].append(running_dice / len(TRAIN_FILES) / args.category) # four categories history['Valid_dice'].append(val_dice / len(VALID_FILES) / args.category) history['Train_other'].append(running_other / len(TRAIN_FILES) / args.category) history['Valid_other'].append(val_other / len(VALID_FILES) / args.category) sout = '\nEpoch {:d} :'.format(epoch) + ' '.join( key + ':{:.3f}'.format(val[-1]) for key, val in history.items()) print2file(sout, LOG_FILE) print(sout) if args.swa > 0: return net_swa, history else: return net.state_dict(), history
# load validation id X_valid = list(pd.read_csv('validID.csv')['Valid'])[:rows] X_train = list(set(np.arange(len(TRAIN_FILES_ALL))) - set(X_valid))[:rows] # get the train and valid files TRAIN_FILES = [TRAIN_FILES_ALL[i] for i in X_train] VALID_FILES = [TRAIN_FILES_ALL[i] for i in X_valid] steel_ds_valid = SteelDataset(VALID_FILES, args, mask_df=mask_df) stat_df_valid = steel_ds_valid.stat_images(rows) # print statistics sout = '======== Validation Stat ==========\n' + analyze_labels( stat_df_valid) + '\n' print2file(sout, LOG_FILE) # not using sophisticated normalize if not args.normalize: train_mean, train_std = 0, 1 test_mean, test_std = 0, 1 else: train_mean, train_std = 0.3438812517320016, 0.056746666005067205 test_mean, test_std = 0.25951299299868136, 0.051800296725619116 sout = 'Train/Test {:d}/{:d}\n'.format(len(TRAIN_FILES_ALL), len(TEST_FILES)) + \ 'Train mean/std {:.3f}/{:.3f}\n'.format(train_mean, train_std) + \ 'Test mean/std {:.3f}/{:.3f}\n'.format(test_mean, test_std) +\ 'Train num/sample {:d}'.format(len(TRAIN_FILES)) + ' '.join(TRAIN_FILES[:2]) + \ '\nValid num/sample {:d}'.format(len(VALID_FILES)) + ' '.join(VALID_FILES[:2])+'\n' print2file(sout, LOG_FILE)
default='data/wordvec_model', type=str, help='word vector model') parser.add_argument('-o', '--output', default='data/test', type=str, help='output file name') args = parser.parse_args() # dict = args.dict # wordvec_source = args.wordvec # tag = args.tag # id = args.carID # sql = '''select `comment` from order_reviews where carID = %s''' % id # pydb = mydb.get_db() # comments = pydb.exec_sql(sql) # comments = [c['comment'] for c in comments] # make_tag = tagging(dict,wordvec_source,tag) # make_tag = tagging('test_dict','data/wordvec_model','artificial_tag') # comments = [ # '驾驶轻松动力足,gps等配备齐全,空间巨大,坐五个人一点都不挤。后备箱也超大,只有塞不满没有装不下。', # '车况好 商务车 车主特别好', # '车主人好,车也很好用。车辆省油,整洁干净,车主和气,很好', # '车很好开,车主人也很好说话~', # '感觉这车开的挺舒服的,车主和PP网都挺好的,下次还选PP,加油!' # ] # result = map(make_tag.tag_comments,comments) utils.print2file('test', result)
int(x['Class 2'] != 0) + \ int(x['Class 3'] != 0) + \ int(x['Class 4'] != 0) != 0, axis=1)) # save the statistics X_train, X_valid, _, _ = train_test_split(np.arange(stat_df.shape[0]), labels, test_size=0.16, random_state=1234) valid_df = pd.DataFrame({'Valid': X_valid}) valid_df.to_csv(VALID_ID_FILE) stat_df_valid = stat_df.iloc[X_valid, :] # print statistics sout = '\n======== Train Stat ==========\n' + analyze_labels(stat_df.iloc[X_train,:]) +\ '======== Validation Stat ==========\n' + analyze_labels(stat_df_valid)+'\n' print2file(sout, LOG_FILE) # plot the distributions fig, axs = plt.subplots(1, 2, figsize=(16, 5)) sns.distplot(stat_df['mean'], ax=axs[0], kde_kws={"label": "Train"}) axs[0].set_title('Distribution of mean') sns.distplot(stat_df['std'], ax=axs[1], kde_kws={"label": "Train"}) axs[1].set_title('Distribution of std') sns.distplot(stat_df_test['mean'], ax=axs[0], kde_kws={"label": "Test"}) sns.distplot(stat_df_test['std'], ax=axs[1], kde_kws={"label": "Test"}) plt.savefig('../output/Distribution.png') # get the train and valid files TRAIN_FILES = [TRAIN_FILES_ALL[i] for i in X_train]
parser = argparse.ArgumentParser(description='tag comments') parser.add_argument('-i', '--carID',type=str,help ='car ID') parser.add_argument('-t', '--tag', default= 'data/tags',type=str,help ='tag file path') parser.add_argument('-u', '--dict', default= 'data/udf_dict',type=str,help ='UDF dict path') parser.add_argument('-w', '--wordvec', default= 'data/wordvec_model',type=str,help ='word vector model') parser.add_argument('-o', '--output', default= 'data/test',type=str,help ='output file name') args = parser.parse_args() # dict = args.dict # wordvec_source = args.wordvec # tag = args.tag # id = args.carID # sql = '''select `comment` from order_reviews where carID = %s''' % id # pydb = mydb.get_db() # comments = pydb.exec_sql(sql) # comments = [c['comment'] for c in comments] # make_tag = tagging(dict,wordvec_source,tag) # make_tag = tagging('test_dict','data/wordvec_model','artificial_tag') # comments = [ # '驾驶轻松动力足,gps等配备齐全,空间巨大,坐五个人一点都不挤。后备箱也超大,只有塞不满没有装不下。', # '车况好 商务车 车主特别好', # '车主人好,车也很好用。车辆省油,整洁干净,车主和气,很好', # '车很好开,车主人也很好说话~', # '感觉这车开的挺舒服的,车主和PP网都挺好的,下次还选PP,加油!' # ] # result = map(make_tag.tag_comments,comments) utils.print2file('test',result)
def main(): print("Program start, environment initializing ...") torch.autograd.set_detect_anomaly(True) args = parameter_parser() utils.print2file(str(args), args.logDir, True) if args.device >= 0: os.environ['CUDA_VISIBLE_DEVICES'] = str(args.device) torch.manual_seed(args.seed) np.random.seed(args.seed) pic = {} # check if pickles, otherwise load data # pickle_name = args.data_prefix+args.dataset+"-"+str(args.bsize)+"-"+str(args.num_clusters)+"_main"+".pickle" # if os.path.isfile(pickle_name): # print("Loading Pickle.") # load_time = time.time() # pic = pickle.load(open(pickle_name, "rb")) # print("Loading Done. " + str(time.time()-load_time) + " seconds.") # else: if True: print("Data Pre-processing") # Load data (pic["train_adj"], full_adj, pic["train_feats"], pic["test_feats"], pic["y_train"], y_val, y_test, pic["train_mask"], pic["val_mask"], test_mask, _, pic["val_data"], pic["test_data"], num_data, visible_data) = utils.load_data(args.data_prefix, args.dataset, args.precalc, amazon=True) print("Partition graph and do preprocessing") if args.bsize > 1: _, pic["parts"] = partition_utils.partition_graph( pic["train_adj"], visible_data, args.num_clusters) pic["parts"] = [np.array(pt) for pt in pic["parts"]] (pic["features_batches"], pic["support_batches"], pic["y_train_batches"], pic["train_mask_batches"]) = utils.preprocess_multicluster_v2( pic["train_adj"], pic["parts"], pic["train_feats"], pic["y_train"], pic["train_mask"], args.num_clusters, args.bsize, args.diag_lambda) else: (pic["parts"], pic["features_batches"], pic["support_batches"], pic["y_train_batches"], pic["train_mask_batches"]) = utils.preprocess( pic["train_adj"], pic["train_feats"], pic["y_train"], pic["train_mask"], visible_data, args.num_clusters, args.diag_lambda) (_, pic["val_features_batches"], pic["val_support_batches"], pic["y_val_batches"], pic["val_mask_batches"]) = utils.preprocess( full_adj, pic["test_feats"], y_val, pic["val_mask"], np.arange(num_data), args.num_clusters_val, args.diag_lambda) (_, pic["test_features_batches"], pic["test_support_batches"], pic["y_test_batches"], pic["test_mask_batches"]) = utils.preprocess( full_adj, pic["test_feats"], y_test, test_mask, np.arange(num_data), args.num_clusters_test, args.diag_lambda) # pickle.dump(pic, open(pickle_name, "wb")) idx_parts = list(range(len(pic["parts"]))) print("Preparing model ...") model = StackedGCN(args, pic["test_feats"].shape[1], pic["y_train"].shape[1], precalc=args.precalc, num_layers=args.num_layers, norm=args.layernorm) w_server = model.cpu().state_dict() print("Start training ...") model_saved = "./model/" + args.dataset + "-" + args.logDir[6:-4] + ".pt" try: for epoch in range(args.epochs): # Training process w_locals, loss_locals, epoch_acc = [], [], [] all_time = [] best_val_acc = 0 for pid in range(len(pic["features_batches"])): # for pid in range(10): # Use preprocessed batch data package = { "features": pic["features_batches"][pid], "support": pic["support_batches"][pid], "y_train": pic["y_train_batches"][pid], "train_mask": pic["train_mask_batches"][pid] } model.load_state_dict(w_server) out_dict = slave_run_train(model, args, package, pid) w_locals.append(copy.deepcopy(out_dict['params'])) loss_locals.append(copy.deepcopy(out_dict['loss'])) all_time.append(out_dict["time"]) epoch_acc.append(out_dict["acc"]) # update global weights a_start_time = time.time() if args.agg == 'avg': w_server = average_agg(w_locals, args.dp) elif args.agg == 'att': w_server = weighted_agg(w_locals, w_server, args.epsilon, args.ord, dp=args.dp) else: exit('Unrecognized aggregation') model.load_state_dict(w_server) # agg_time = time.time() - a_start_time # print(str(sum(all_time)/len(all_time) + agg_time)) print2file( 'Epoch: ' + str(epoch) + ' Average Train acc: ' + str(sum(epoch_acc) / len(epoch_acc)), args.logDir, True) if epoch % args.val_freq == 0: val_cost, val_acc, val_micro, val_macro = evaluate( model, args, pic["val_features_batches"], pic["val_support_batches"], pic["y_val_batches"], pic["val_mask_batches"], pic["val_data"], pid="validation") log_str = 'Validateion set results: ' + 'cost= {:.5f} '.format( val_cost) + 'accuracy= {:.5f} '.format( val_acc) + 'mi F1= {:.5f} ma F1= {:.5f}'.format( val_micro, val_macro) print2file(log_str, args.logDir, True) if val_acc > best_val_acc: best_val_acc = val_acc torch.save(model.state_dict(), model_saved) print2file( "Best val_acc: " + str(best_val_acc) + " with epoch: " + str(epoch), args.logDir, True) torch.save( model.state_dict(), "./model/" + args.dataset + "-" + args.logDir[6:-4] + "Done.pt") print2file("Training Done. Model Saved.", args.logDir, True) # Test Model # Perform two test, one with last model, another with best val_acc model # 1) test_cost, test_acc, micro, macro = evaluate( model, args, pic["test_features_batches"], pic["test_support_batches"], pic["y_test_batches"], pic["test_mask_batches"], pic["test_data"], pid="Final test") log_str = 'Test set results: ' + 'cost= {:.5f} '.format( test_cost) + 'accuracy= {:.5f} '.format( test_acc) + 'mi F1= {:.5f} ma F1= {:.5f}'.format(micro, macro) print2file(log_str, args.logDir, True) # 2) test_model = StackedGCN(args, pic["test_feats"].shape[1], pic["y_train"].shape[1], precalc=args.precalc, num_layers=args.num_layers, norm=args.layernorm) test_model.load_state_dict(torch.load(model_saved)) test_model.eval() test_cost, test_acc, micro, macro = evaluate( test_model, args, pic["test_features_batches"], pic["test_support_batches"], pic["y_test_batches"], pic["test_mask_batches"], pic["test_data"], pid="Best test") log_str = 'Test set results: ' + 'cost= {:.5f} '.format( test_cost) + 'accuracy= {:.5f} '.format( test_acc) + 'mi F1= {:.5f} ma F1= {:.5f}'.format(micro, macro) print2file(log_str, args.logDir, True) except KeyboardInterrupt: print("==" * 20) print("Existing from training earlier than the plan.") print("End..so far so good.")