def run_model(train_file, train_labfile, test_file=None, valid_ratio=0.1, batchsize=240, epoch=10, neurons=36, n_hiddenlayer=2, lr=1e-2, base_dir='../Data/', save_prob=False, dropout_rate=0.2): """Run the deep neural network with droput""" print("Start") st = datetime.now() data = load_data(base_dir + train_file) label_data, label_map = load_label(base_dir + train_labfile) # window size = 9, output = 48 phonemes n_input = data.shape[1] * 9 n_output = 48 N = int(data.shape[0] * (1 - valid_ratio)) print("Done loading data. Start constructing the model...") functions = construct_DNN(n_input, n_output, archi=neurons, n_hid_layers=n_hiddenlayer, lr=lr, dropout_rate=dropout_rate) gradient_update, feed_forward = functions print("Finish constructing the model. Start Training...") result = train_model(N, epoch, batchsize, gradient_update, feed_forward, data, label_data, n_output, dropout_rate) obj_history, valid_accu, cache = result # train accuracy train_accu = accuracy(0, N, data, feed_forward, n_output, label_data, cache, dropout_rate) print("Training Accuracy: %.4f %%" % (100 * train_accu)) # validation valid_accu = accuracy(N, data.shape[0], data, feed_forward, n_output, label_data, cache, dropout_rate) print("Validation Accuracy: %.4f %%" % (100 * valid_accu)) if save_prob: accuracy(0, data.shape[0], data, feed_forward, n_output, label_data, cache, dropout_rate, save_pred=True, save_name='ytrain_prob') if test_file: test_predict(base_dir + test_file, label_map, feed_forward, base_dir, dropout_rate, save_prob=save_prob) print("Done, Using %s." % str(datetime.now() - st))
import tensorflow as tf from tensorflow.contrib import slim from tensorflow.contrib.slim.nets import resnet_v1 import utils datadirect = '../../data/DatasetA_train_20180813/' train_txt = 'train.txt' label_txt = 'label_list.txt' attr_txt = 'attribute_list.txt' lblattr_txt = 'attributes_per_class.txt' lblemb_txt = 'class_wordembeddings.txt' ##################preparation pipelines####################### df_lbl = utils.load_label(datadirect, label_txt) num_classes = df_lbl.shape[0] df_pair = utils.load_pair(datadirect, train_txt) df_pair = pd.merge(df_pair, df_lbl, on='label_code', how='left') imgnum = df_pair.shape[0] df_attrname, df_lblattr = utils.load_attr(datadirect, attr_txt, lblattr_txt) df_attr = pd.merge(df_pair[['label_code']], df_lblattr, on='label_code', how='left') attrnum = df_attrname.shape[0] df_lblattr = pd.merge(df_lblattr, df_lbl, on='label_code', how='left') adj_attrsim = utils.create_adjattr(df_lblattr, num_classes)
def main(args): # load graph data print(time.strftime("%a, %d %b %Y %H:%M:%S +0000: ", time.localtime()) + 'start loading...', flush=True) if args.supervised == 'True': train_pool, train_labels, nlabels, multi = utils.load_label(args.label) train_data, num_nodes, num_rels, train_indices, ntrain, node_attri = utils.load_supervised( args, args.link, args.node, train_pool) elif args.supervised == 'False': train_data, num_nodes, num_rels, node_attri = utils.load_unsupervised( args, args.link, args.node) nlabels = 0 print(time.strftime("%a, %d %b %Y %H:%M:%S +0000: ", time.localtime()) + 'finish loading...', flush=True) # check cuda use_cuda = args.gpu >= 0 and torch.cuda.is_available() if use_cuda: torch.cuda.set_device(args.gpu) print('check 1', flush=True) # create model model = TrainModel(node_attri, num_nodes, args.n_hidden, num_rels, nlabels, num_bases=args.n_bases, num_hidden_layers=args.n_layers, dropout=args.dropout, use_cuda=use_cuda, reg_param=args.regularization) print('check 2', flush=True) if use_cuda: model.cuda() print('check 3', flush=True) # build adj list and calculate degrees for sampling degrees = utils.get_adj_and_degrees(num_nodes, train_data) print('check 4', flush=True) # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # training loop print(time.strftime("%a, %d %b %Y %H:%M:%S +0000: ", time.localtime()) + "start training...", flush=True) for epoch in range(args.n_epochs): model.train() # perform edge neighborhood sampling to generate training graph and data if args.supervised == 'True': g, node_id, edge_type, node_norm, matched_labels, matched_index = \ utils.generate_sampled_graph_and_labels_supervised( train_data, args.graph_batch_size, args.graph_split_size, num_rels, degrees, args.negative_sample, args.edge_sampler, train_indices, train_labels, multi, nlabels, ntrain, if_train=True, label_batch_size=args.label_batch_size) if multi: matched_labels = torch.from_numpy(matched_labels).float() else: matched_labels = torch.from_numpy(matched_labels).long() elif args.supervised == 'False': g, node_id, edge_type, node_norm, data, labels = \ utils.generate_sampled_graph_and_labels_unsupervised( train_data, args.graph_batch_size, args.graph_split_size, num_rels, degrees, args.negative_sample, args.edge_sampler) data, labels = torch.from_numpy(data), torch.from_numpy(labels) # set node/edge feature node_id = torch.from_numpy(node_id).view(-1, 1).long() edge_type = torch.from_numpy(edge_type) edge_norm = node_norm_to_edge_norm( g, torch.from_numpy(node_norm).view(-1, 1)) deg = g.in_degrees(range(g.number_of_nodes())).float().view(-1, 1) if use_cuda: node_id, deg, g = node_id.cuda(), deg.cuda(), g.to('cuda') edge_type, edge_norm = edge_type.cuda(), edge_norm.cuda() if args.supervised == 'True': matched_labels = matched_labels.cuda() elif args.supervised == 'False': data, labels = data.cuda(), labels.cuda() embed, pred = model(g, node_id, edge_type, edge_norm) if args.supervised == 'True': loss = model.get_supervised_loss(pred, matched_labels, matched_index, multi) elif args.supervised == 'False': loss = model.get_unsupervised_loss(g, embed, data, labels) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_norm) # clip gradients optimizer.step() optimizer.zero_grad() print( time.strftime("%a, %d %b %Y %H:%M:%S +0000: ", time.localtime()) + "Epoch {:05d} | Loss {:.4f}".format(epoch, loss.item()), flush=True) print(time.strftime("%a, %d %b %Y %H:%M:%S +0000: ", time.localtime()) + "training done", flush=True) print(time.strftime("%a, %d %b %Y %H:%M:%S +0000: ", time.localtime()) + "start output...", flush=True) model.eval() if args.attributed == 'True': np.random.shuffle(train_data) node_emb, node_over = np.zeros((num_nodes, args.n_hidden)), set() batch_total = math.ceil(len(train_data) / args.graph_batch_size) for batch_num in range(batch_total): # perform edge neighborhood sampling to generate training graph and data g, old_node_id, edge_type, node_norm, data, labels = \ utils.generate_sampled_graph_and_labels_unsupervised( train_data, args.graph_batch_size, args.graph_split_size, num_rels, degrees, args.negative_sample, args.edge_sampler) # set node/edge feature node_id = torch.from_numpy(old_node_id).view(-1, 1).long() edge_type = torch.from_numpy(edge_type) edge_norm = node_norm_to_edge_norm( g, torch.from_numpy(node_norm).view(-1, 1)) if use_cuda: node_id, g = node_id.cuda(), g.to('cuda') edge_type, edge_norm = edge_type.cuda(), edge_norm.cuda() embed, _ = model(g, node_id, edge_type, edge_norm) node_emb[old_node_id] = embed.detach().cpu().numpy().astype( np.float32) for each in old_node_id: node_over.add(each) print(time.strftime("%a, %d %b %Y %H:%M:%S +0000: ", time.localtime()) + f'finish output batch nubmer {batch_num} -> {batch_total}', flush=True) utils.save(args, node_emb) elif args.attributed == 'False': utils.save( args, model.rgcn.layers[0].embedding.weight.detach().cpu().numpy()) emb, labs = creoSpazio("./data/PubMed/emb.dat") new_emb = TSNEImpl(emb) drawImpl(new_emb, labs, "./ciao1.png") return
# -*- coding: utf-8 -*- # Created by Jinkey on 2018/1/4. __author__ = 'Jinkey' import tensorflow as tf import jieba as jb import numpy as np import utils titles = utils.load_data(catalogue=utils.MULTI_FLAG) target = utils.load_label(catalogue=utils.MULTI_FLAG) max_sequence_length = 30 embedding_size = 50 # 标题分词 titles = [".".join(jb.cut(t, cut_all=True)) for t in titles] # word2vec 词袋化 vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor( max_sequence_length, min_frequency=1) text_processed = np.array(list(vocab_processor.fit_transform(titles))) # 读取标签 dict = vocab_processor.vocabulary_._mapping sorted_vocab = sorted(dict.items(), key=lambda x: x[1]) # 配置网络结构 model = utils.build_netword(catalogue=utils.MULTI_FLAG, dict=dict,
#Hyperparameters batch_size = args.batch_size lr = args.lr momentum = args.momentum num_epoch = args.num_epoch data_path = args.data_path label_path = args.label_path k = args.k input_dim = 40*(2*k+1) output_dim = 138 '''Step 1: Load Dataset''' print('Loading Training Data...') train_data, train_idx = utils.load_data(os.path.join(args.data_path,'train.npy'), k) train_label = utils.load_label(os.path.join(args.label_path,'train_labels.npy')) train_dataset = utils.SpeechDataset(train_data,train_label,train_idx,k) train_dataloader = DataLoader( train_dataset, batch_size=batch_size, shuffle=True ) print('Loading Validation Data...') val_data, val_idx = utils.load_data(os.path.join(args.data_path, 'dev.npy'), k) val_label = utils.load_label(os.path.join(args.label_path,'dev_labels.npy')) val_dataset = utils.SpeechDataset(val_data,val_label,val_idx,k) val_dataloader = DataLoader( val_dataset, batch_size=batch_size )
for ori_velo_path in tqdm.tqdm(sorted(ori_velo_paths)): velo_idx = get_filename(ori_velo_path, False) calib_path = get_file_path_by_idx(velo_idx, src_calib_dir) label_path = get_file_path_by_idx(velo_idx, src_label_dir) image_path = get_file_path_by_idx(velo_idx, src_image_dir) output_velo_path = os.path.join(output_disturb_dir, velo_idx + ".bin") output_viz_velo_ori_path = os.path.join(output_viz_original_dir, velo_idx + ".jpg") output_viz_velo_disturb_path = os.path.join(output_viz_disturb_dir, velo_idx + ".jpg") # Load calibration calib = read_calib_file(calib_path) # Load labels labels = load_label(label_path) # Load Lidar PC pc_velo = load_velo_scan(ori_velo_path)[:, :3] proj_cam_to_velo = project_cam2_to_velo(calib) temp = np.asarray([[1, 1, 1]]) delete_inds = np.asarray([0]) for obj in labels: # get obj range info range_info = get_obj_range_info(obj) inds = get_obj_inds(pc_velo, range_info) selected = pc_velo[inds] selected = selected[selected[:, 2].argsort()] selected = get_randon_pc(selected) temp = np.concatenate((temp, selected), axis=0)
def model_q2(data_path, save_path, train_data, train_label, test_data, test_label, C, iter_time, separate_type, print_predict=False, PCA_visualize=False, tSNE_visualize=False): if not os.path.exists(save_path): os.mkdir(save_path) training_data = utils.load_data(data_path, train_data) training_label = utils.load_label(data_path, train_label) testing_data = utils.load_data(data_path, test_data) testing_label = utils.load_label(data_path, test_label) if separate_type == 'random': trn_1_r1_data, trn_1_r1_label, trn_1_r2_data, trn_1_r2_label = utils.separate_data_random( data_path, train_data, train_label, mode='1r') trn_2_r1_data, trn_2_r1_label, trn_2_r2_data, trn_2_r2_label = utils.separate_data_random( data_path, train_data, train_label, mode='2r') trn_3_r1_data, trn_3_r1_label, trn_3_r2_data, trn_3_r2_label = utils.separate_data_random( data_path, train_data, train_label, mode='3r') elif separate_type == 'prior': trn_1_r1_data, trn_1_r1_label, trn_1_r2_data, trn_1_r2_label = utils.separate_data_prior( data_path, train_data, train_label, mode='1r') trn_2_r1_data, trn_2_r1_label, trn_2_r2_data, trn_2_r2_label = utils.separate_data_prior( data_path, train_data, train_label, mode='2r') trn_3_r1_data, trn_3_r1_label, trn_3_r2_data, trn_3_r2_label = utils.separate_data_prior( data_path, train_data, train_label, mode='3r') if PCA_visualize == True: utils.visual_2D_PCA(training_data, training_label, save_path) if tSNE_visualize == True: utils.visual_2D_tSNE(training_data, training_label, save_path) scaler = preprocessing.StandardScaler().fit(training_data) trn_1_r1_data = scaler.transform(trn_1_r1_data) trn_1_r2_data = scaler.transform(trn_1_r2_data) trn_2_r1_data = scaler.transform(trn_2_r1_data) trn_2_r2_data = scaler.transform(trn_2_r2_data) trn_3_r1_data = scaler.transform(trn_3_r1_data) trn_3_r2_data = scaler.transform(trn_3_r2_data) testing_data = scaler.transform(testing_data) model_1_r1 = SVC(C=C, max_iter=iter_time, class_weight='balanced') model_1_r2 = SVC(C=C, max_iter=iter_time, class_weight='balanced') model_2_r1 = SVC(C=C, max_iter=iter_time, class_weight='balanced') model_2_r2 = SVC(C=C, max_iter=iter_time, class_weight='balanced') model_3_r1 = SVC(C=C, max_iter=iter_time, class_weight='balanced') model_3_r2 = SVC(C=C, max_iter=iter_time, class_weight='balanced') model_1_r1.fit(trn_1_r1_data, trn_1_r1_label) model_1_r2.fit(trn_1_r2_data, trn_1_r2_label) model_2_r1.fit(trn_2_r1_data, trn_2_r1_label) model_2_r2.fit(trn_2_r2_data, trn_2_r2_label) model_3_r1.fit(trn_3_r1_data, trn_3_r1_label) model_3_r2.fit(trn_3_r2_data, trn_3_r2_label) y_predict = [] right_predict = 0.0 for i in range(len(testing_data)): y1_r1 = int(model_1_r1.predict([testing_data[i]])[0]) y1_r2 = int(model_1_r2.predict([testing_data[i]])[0]) y2_r1 = int(model_2_r1.predict([testing_data[i]])[0]) y2_r2 = int(model_2_r2.predict([testing_data[i]])[0]) y3_r1 = int(model_3_r1.predict([testing_data[i]])[0]) y3_r2 = int(model_3_r2.predict([testing_data[i]])[0]) predict_1 = [[y1_r1, y1_r2], [y2_r1, y2_r2], [y3_r1, y3_r2]] predict_2 = [y1_r1 and y1_r2, y2_r1 and y2_r2, y3_r1 and y3_r2] y_possible = np.where(np.array(predict_2) == 1)[0] if len(y_possible) > 0: y_idx = np.random.randint(low=0, high=len(y_possible)) y = y_possible[y_idx] + 1 else: y = np.random.randint(low=1, high=4) y_predict.append(y) if y == testing_label[i][0]: right_predict = right_predict + 1.0 acc = right_predict * 1.0 / len(testing_data) joblib.dump(model_1_r1, os.path.join(save_path, 'svm_model_1_r1.m')) joblib.dump(model_1_r2, os.path.join(save_path, 'svm_model_1_r2.m')) joblib.dump(model_2_r1, os.path.join(save_path, 'svm_model_2_r1.m')) joblib.dump(model_2_r2, os.path.join(save_path, 'svm_model_2_r2.m')) joblib.dump(model_3_r1, os.path.join(save_path, 'svm_model_3_r1.m')) joblib.dump(model_3_r2, os.path.join(save_path, 'svm_model_3_r2.m')) if print_predict == True: print y_predict f1 = open(os.path.join(save_path, 'problem_2.txt'), 'a') f1.write( 'If we use our Part vs Part model by sklearn, and the separate type is %s, the classification accuracy is: %g.\n' % (separate_type, acc)) f1.close() return acc
def model_q1(data_path, save_path, train_data, train_label, test_data, test_label, C, iter_time, print_predict=False, PCA_visualize=False, tSNE_visualize=False): if not os.path.exists(save_path): os.mkdir(save_path) training_data = utils.load_data(data_path, train_data) testing_data = utils.load_data(data_path, test_data) training_label = utils.load_label(data_path, train_label) testing_label = utils.load_label(data_path, test_label) training_label_1 = utils.load_label(data_path, train_label, mode='1r') training_label_2 = utils.load_label(data_path, train_label, mode='2r') training_label_3 = utils.load_label(data_path, train_label, mode='3r') if PCA_visualize == True: utils.visual_2D_PCA(training_data, training_label, save_path) if tSNE_visualize == True: utils.visual_2D_tSNE(training_data, training_label, save_path) scaler = preprocessing.StandardScaler().fit(training_data) training_data = scaler.transform(training_data) testing_data = scaler.transform(testing_data) model = SVC(C=C, max_iter=iter_time, class_weight='balanced') model_1r = SVC(C=C, max_iter=iter_time, class_weight='balanced') model_2r = SVC(C=C, max_iter=iter_time, class_weight='balanced') model_3r = SVC(C=C, max_iter=iter_time, class_weight='balanced') model.fit(training_data, training_label) model_1r.fit(training_data, training_label_1) model_2r.fit(training_data, training_label_2) model_3r.fit(training_data, training_label_3) acc_multi = model.score(testing_data, testing_label) y_predict = [] right_predict = 0.0 for i in range(len(testing_data)): y1 = model_1r.predict([testing_data[i]]) y2 = model_2r.predict([testing_data[i]]) y3 = model_3r.predict([testing_data[i]]) y_possible = np.where(np.array([y1, y2, y3]) == 1)[0] if len(y_possible) > 0: y_idx = np.random.randint(low=0, high=len(y_possible)) y = y_possible[y_idx] + 1 else: y = np.random.randint(low=1, high=4) y_predict.append(y) if y == testing_label[i][0]: right_predict = right_predict + 1.0 acc = right_predict * 1.0 / len(testing_data) joblib.dump(model_1r, os.path.join(save_path, 'svm_model_1r.m')) joblib.dump(model_2r, os.path.join(save_path, 'svm_model_2r.m')) joblib.dump(model_3r, os.path.join(save_path, 'svm_model_3r.m')) if print_predict == True: print y_predict f1 = open(os.path.join(save_path, 'problem_1.txt'), 'w') f1.write( 'If we use multi-classifying SVM in sklearn, the classification accuracy is: %g;\n' % (acc_multi)) f1.write( 'If we use our 1 vs Rest model by sklearn, the classification accuracy is: %g.' % (acc)) f1.close() return acc_multi, acc
def main(args): #先看数据集数据是否存在 if not os.path.exists(args.dataset_train_dir) or not os.path.exists( args.dataset_validate_dir): raise NameError( '数据集路径"./dataset/MIR-1K/Wavfile"或"./dataset/MIR-1K/UndividedWavfile"不存在!' ) # 1. 导入需要训练的数据集文件路径,存到列表中即可 train_file_list = load_file(args.dataset_train_dir) valid_file_list = load_file(args.dataset_validate_dir) # 数据集的采样率 mir1k_sr = args.dataset_sr # 用于短时傅里叶变换,窗口大小 n_fft = 1024 # 步幅;帧移对应卷积中的stride; hop_length = n_fft // 4 # Model parameters # 学习率 learning_rate = args.learning_rate # 用于创建rnn节点数 num_hidden_units = [1024, 1024, 1024, 1024, 1024] # batch 长度 batch_size = args.batch_size # 获取多少帧数据 sample_frames = args.sample_frames # 训练迭代次数 iterations = args.iterations # dropout dropout_rate = args.dropout_rate # 模型保存路径 model_dir = args.model_dir model_filename = args.model_filename #导入训练数据集的wav数据, #wavs_mono_train存的是单声道,wavs_label_train 存的是标签 label_train = load_label(args.dataset_label_dir, sr=mir1k_sr) label_test = load_label(args.dataset_label_dir, sr=mir1k_sr) wavs_mono_train, wavs_label_train = load_wavs(filenames=train_file_list, wavs_label=label_train, sr=mir1k_sr) # 通过短时傅里叶变换将声音转到频域 stfts_mono_train, stfts_label_train = wavs_to_specs( wavs_mono=wavs_mono_train, wavs_label=wavs_label_train, n_fft=n_fft, hop_length=hop_length) # 跟上面一样,只不过这里是测试集的数据 wavs_mono_valid, wavs_label_valid = load_wavs(filenames=valid_file_list, wavs_label=label_test, sr=mir1k_sr) stfts_mono_valid, stfts_label_valid = wavs_to_specs( wavs_mono=wavs_mono_valid, wavs_label=wavs_label_valid, n_fft=n_fft, hop_length=hop_length) #初始化模型 model = SVMRNN(num_features=n_fft // 2 + 1, num_hidden_units=num_hidden_units) # 加载模型,如果没有模型,则初始化所有变量 startepo = model.load(file_dir=model_dir) print('startepo:' + str(startepo)) #开始训练 for i in (range(iterations)): #从模型中断处开始训练 if i < startepo: continue # 获取下一batch数据 data_mono_batch, data_label_batch = get_next_batch( stfts_mono=stfts_mono_train, stfts_label=stfts_label_train, batch_size=batch_size, sample_frames=sample_frames) #获取频率值 x_mixed_src, _ = separate_magnitude_phase(data=data_mono_batch) y_label_src, _ = separate_magnitude_phase(data=data_label_batch) #送入神经网络,开始训练 train_loss = model.train(x_mixed_src=x_mixed_src, y_label_src=y_label_src, learning_rate=learning_rate, dropout_rate=dropout_rate) if i % 10 == 0: print('Step: %d Train Loss: %f' % (i, train_loss)) if i % 200 == 0: #这里是测试模型准确率的 print('==============================================') data_mono_batch, data_label_batch = get_next_batch( stfts_mono=stfts_mono_valid, stfts_label=stfts_label_valid, batch_size=batch_size, sample_frames=sample_frames) x_mixed_src, _ = separate_magnitude_phase(data=data_mono_batch) y_label_src, _ = separate_magnitude_phase(data=data_label_batch) y_sing_src_pred, validate_loss = model.validate( x_mixed_src=x_mixed_src, y_label_src=y_label_src, dropout_rate=dropout_rate) print('Step: %d Validation Loss: %f' % (i, validate_loss)) print('==============================================') if i % 200 == 0: model.save(directory=model_dir, filename=model_filename, global_step=i)
arg = parse_args() print("========Call with Arguments========") print(arg) if not os.path.exists(RESULTS_PATH): os.mkdir(RESULTS_PATH) print(">>> Directory {} created.".format(RESULTS_PATH)) if not os.path.exists(BCM_PATH): os.mkdir(BCM_PATH) print(">>> Directory {} created.".format(BCM_PATH)) print("\n========Reading Data========") data, _ = load_mat(arg.data_path, False, 1, 1, ',', True, False, None, None) label = load_label(arg.label_path, ',', '0') data = data["data"] k_means_logger = Logger(LOG_PATH, "Benchmark_K_MEANS.log", benchmark_logger=True) dbscan_logger = Logger(LOG_PATH, "Benchmark_DBSCAN.log", benchmark_logger=True) k_means_results = {} dbscan_results = {} print("\n========Benchmarking========") for dim in DR_DIM: for method in ["PCA", "TSNE"]:
if args.algo == 'pane': Xf = utils.load_emd(path_emb + ".f", n, d / 2, n - 1) Xb = utils.load_emd(path_emb + ".b", n, d / 2, n - 1) Xf = preprocessing.normalize(Xf, norm='l2', axis=1) Xb = preprocessing.normalize(Xb, norm='l2', axis=1) X = np.hstack([Xf, Xb]) print(X.shape) else: X = utils.load_emd(path_emb, n, d, n - 1) path_label = settings.DATA_INFO[args.data]['path'] + 'labels.txt' maf1 = [] mif1 = [] if args.multi: y = utils.load_label(path_label, n) X, y = filter(X, y) y = MultiLabelBinarizer(sparse_output=True).fit_transform(y) else: y = utils.read_cluster(n, path_label) for ratio in [0.9, 0.7, 0.5, 0.3, 0.1]: print("labelled data ratio:" + str(1 - ratio)) macro_f1_avg, micro_f1_avg = eval(X, y, ratio, args.multi, 3) maf1.append(macro_f1_avg) mif1.append(micro_f1_avg) print("macro-f1=%f, micro-f1=%f", macro_f1_avg, micro_f1_avg) print(maf1) print(mif1)
def get_label(self, i): assert (i < self.size) fname = os.path.join(self.label, '{:06d}.txt'.format(i)) return utils.load_label(fname)
var = 'card2' A = sp.load_npz( '/home/sh/anaconda3/fraud/ieee-fraud-detection/edge.npz') #(N, N) A = A.astype(np.int16) A = A.toarray() gc.collect() #%% X = np.load( '/home/sh/anaconda3/fraud/ieee-fraud-detection/X.npy') # (27, N, F) #%% X = transpose(X, [1, 0, 2]) #(N,27,F) X = np.array(X, dtype=np.float16) gc.collect() #%% Y_train, Y_val, Y_test = load_label() # (27, n, 2) idx_train = np.load( '/home/sh/anaconda3/fraud/ieee-fraud-detection/mask_train.npy') #(N,) idx_val = np.load( '/home/sh/anaconda3/fraud/ieee-fraud-detection/mask_val.npy') # (N,) idx_test = np.load( '/home/sh/anaconda3/fraud/ieee-fraud-detection/mask_test.npy') #(N,) idx_train = idx_train.astype(np.int16) idx_val = idx_val.astype(np.int16) idx_test = idx_test.astype(np.int16) gc.collect() # Parameters t = 27 N = X.shape[0] # Number of nodes in the graph F = X.shape[2] # Original feature dimension #n_classes = 2