def main(args): path = C.PATH # model = PureCapsNet(input_shape=C.INPUT_SHAPE, n_class=C.OUTPUT_CLASS, routings=C.ROUTINGS) model = TestMixCapsNet(input_shape=C.INPUT_SHAPE, n_class=C.OUTPUT_CLASS, routings=C.ROUTINGS) # model = MultiScaleCapsNet(input_shape=C.INPUT_SHAPE, n_class=C.OUTPUT_CLASS, routings=C.ROUTINGS) model.summary() # exit() if args.target == 'train' or args.target == 'retrain': checkpoint = callbacks.ModelCheckpoint(f'check_point/{model.name}_best.h5', monitor='val_loss', save_best_only=True, verbose=1) reduce = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, mode='min') earlystopping = callbacks.EarlyStopping(monitor='val_loss', patience=20) log = callbacks.CSVLogger('logs/log.csv') tb = callbacks.TensorBoard('logs/tensorboard-logs', batch_size=C.BATCH_SIZE, histogram_freq=0) lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: C.LR * (C.LR_DECAY ** epoch)) if args.target == 'retrain': # sgd with lr=0.01 for fine-tune optimizer = optimizers.sgd(lr=0.01, momentum=0.9, nesterov=True, decay=1e-6) model.load_weights(f'check_point/{model.name}_best.h5', by_name=True) print(f"{model.name} loaded.") else: optimizer = optimizers.Adam(lr=C.LR) print("No model loaded.") model.compile(optimizer=optimizer, # loss=[margin_loss], loss='binary_crossentropy', # loss_weights=[1.], metrics=[categorical_accuracy]) # metrics={'capsnet': 'accuracy'}) model.fit_generator(data_generator('/'.join((path, 'train'))), epochs=120, steps_per_epoch=C.TRAIN_SIZE // C.BATCH_SIZE, validation_data=data_generator('/'.join((path, 'val'))), validation_steps=C.VAL_SIZE // C.BATCH_SIZE, verbose=1, callbacks=[checkpoint, log, tb, earlystopping]) # callbacks=[checkpoint]) model.save(f'check_point/{model.name}_final.h5') else: # model.load_weights(f'check_point/{model.name}_best.h5') model.load_weights(f'check_point/{model.name}_0.904204.h5') print("Loading test data ...") x_test, y_test = load_all_data('/'.join((path, 'test'))) y_pred = batch_prediction(model, x_test, batch_size=200) print(len(y_test), len(y_pred), len(y_test)) model_evaluate(y_pred, y_test)
def attribution(coordinate, model, cell_type='mESC', verbose=1): """Calculate the attributions and output a series of bigBed files Args: coordinate (str): e.g., chr1:153500000-153501000,chr1:153540000-153542000 model (keras.models.Model): a loaded model cell_type (str): cell type verbose (int): whether to print Return: No return. Output to output_bigBed/. """ if verbose: print(' Identify the chosen region...') # Step 1: process the coordinate and check whether it's illegal position = parse_coordinate(coordinate) print(position) # Step 2: find the corresponding 200-kb region, return the start coordinate [ch, start_pos, p11, p12, p21, p22] = find_1kb_region(position) print(ch, start_pos, p11, p12, p21, p22) if verbose: print(' Loading data for calculation...') # Step 3: Load data for the chosen region hic, epi = load_all_data(ch, start_pos, signals) if verbose: print(' Calculating attributions...') # Step 4: Calculate attributions attributions = int_grad(model, hic, epi, [p11, p12, p21, p22], steps=100) # return a 1000 * 11 numpy array # np.save(f'att_chr7_22975000.npy', attributions) if verbose: print(' Saving outputs...') # Step 5: Save them into bed file and convert into bigBed file save_bigBed(attributions, signals, ch, start_pos)
def run(): ###### load data ############### print('1. load all data.........') train_fold_x, train_fold_y, val_fold_x, val_fold_y, test_x = load_all_data( ) print('2. build rf model..........') rf = RandomForestRegressor(n_estimators=2500, min_samples_split=3, max_depth=16, n_jobs=-1) print('3. start 10fold cv ..........') total_fold_error = 0 for fold_idx in range(N_FOLDS): print(str(fold_idx) + ' cv running..........') sub_tr_fold_x = train_fold_x[fold_idx] sub_tr_fold_y = train_fold_y[fold_idx] sub_val_fold_x = val_fold_x[fold_idx] sub_val_fold_y = val_fold_y[fold_idx] rf.fit(sub_tr_fold_x, sub_tr_fold_y) Model_Name = 'rf_' + str(fold_idx) ###### save model ######## joblib.dump(rf, 'weights/' + Model_Name + '.m') sub_pred_val = rf.predict(sub_val_fold_x) total_fold_error += gen_report(sub_val_fold_y, sub_pred_val, 'log/report.log', fold_idx) pred_te = rf.predict(test_x) result_csv_path = 'result/rf_' + str(fold_idx) + '.csv' save_results(result_csv_path, pred_te) mean_fold_error = total_fold_error / (N_FOLDS * 1.0) f_report = open('log/report.log', 'a') f_report.write('Mean Fold Error:\t' + str(mean_fold_error)) f_report.close()
def _init(self): province, city, sub_city = load_all_data() self.province_search = PrefixQuery(province) self.city_search = PrefixQuery(city) self.sub_city_search = PrefixQuery(sub_city)
import numpy as np import copy from collections import defaultdict import json import pickle import utils name = 'EAL5' gos = pickle.load(open("GO" + name + ".dat", "rb")) k_tmp = '5' filename = 'test.txt' min_blast = 100 k = 5 sps = ['ce', 'dm', 'hs', 'mm', 'sc'] lr, rl, pairs_dict = utils.load_all_data(min_blast, sps) letters = sps filename = 'test.txt' k = int(k) annotated = 0 correct = 0 c_clustres = np.zeros(int(k) + 1) nb_c = 0 seeds = utils.seed_gen(int(k), filename, pairs_dict, letters) exit() for seed in seeds: nodes = [] for i in range(k): if seed[i] != '-1': nodes.append(letters[i] + seed[i]) cnt = 0 for node in nodes:
from utils import load_all_data, resample_dataframe data = load_all_data() print(data) by_day = resample_dataframe(data, 'D') print(by_day) #sorted = by_day.sort_values('Grid.P') #print(sorted) top = by_day.nsmallest(10, 'Grid.P') print(top)
def main(args): path = C.PATH model, eval_model, manipulate_model = CapsNet(input_shape=C.INPUT_SHAPE, n_class=C.OUTPUT_CLASS, routings=C.ROUTINGS) model.summary() if args.target == 'train': model.compile(optimizer=optimizers.Adam(lr=C.LR), loss=[margin_loss, 'mse'], loss_weights=[1., C.LAM_RECON], metrics={'capsnet': 'accuracy'}) model.fit_generator( data_generator('/'.join((path, 'train')), target='train'), epochs=50, steps_per_epoch=C.TRAIN_SIZE // C.BATCH_SIZE, validation_data=data_generator('/'.join((path, 'val')), target='val'), validation_steps=C.VAL_SIZE // C.BATCH_SIZE, verbose=1, callbacks=[ ModelCheckpoint(f'check_point/{model.name}_best.h5', monitor='val_capsnet_acc', save_best_only=True, save_weights_only=True, verbose=1), ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, mode='min'), EarlyStopping(monitor='val_loss', patience=20), LearningRateScheduler( schedule=lambda epoch: C.LR * (C.LR_DECAY**epoch)) ]) model.save_weights(f'check_point/{model.name}_final.h5') else: # manipulate_latent(manipulate_model, (x_test, y_test), args) # test(model=eval_model, data=(x_test, y_test), args=args) # eval_model.load_weights(f'check_point/{train_model.name}_best.h5') eval_model.load_weights(f'check_point/model_1_best.h5') x_test, y_test = load_all_data('/'.join((path, 'test')), target='test') index = 0 batch_size = 200 print("length of y_test:", len(y_test)) test_input = x_test[index:index + batch_size] y_pred = eval_model.predict(test_input)[0] index += batch_size while index < len(y_test) - batch_size: test_input = x_test[index:index + batch_size] temp = eval_model.predict(test_input)[0] y_pred = np.vstack((y_pred, temp)) index += batch_size percent = index / len(y_test) progress(percent, width=30) test_input = x_test[index:] temp = eval_model.predict(test_input)[0] y_pred = np.vstack((y_pred, temp)) # print(y_test.shape, y_pred.shape) # print("START COMPUTING...") rocauc = metrics.roc_auc_score(y_test, y_pred) prauc = metrics.average_precision_score(y_test, y_pred, average='macro') y_pred = (y_pred > 0.5).astype(np.float32) acc = metrics.accuracy_score(y_test, y_pred) f1 = metrics.f1_score(y_test, y_pred, average='samples') print( f'Test scores: rocauc={rocauc:.6f}\tprauc={prauc:.6f}\tacc={acc:.6f}\tf1={f1:.6f}' )
print(err) print(command_text) sys.exit(2) datafile = None date = None for opt, arg in opts: if opt == '-h': print(command_text) sys.exit() elif opt in ("-f", "--file"): datafile = arg elif opt in ("-d", "--date"): date = arg df = load_file(datafile) if datafile else load_all_data() if hasattr(df.index, 'floor') and date: df = df[df.index.floor('D') == date] dataset_stats(df) if 'simulation.Grid.P' in df: df["ACLoad.P"].plot(label="Load") df["simulation.PV.P"].plot(label="PV") # data["PV.P"].plot() # data['Grid.P'].plot() # data['simulation.Battery.P'].plot(label="Battery") df['simulation.Battery.SoC'].plot(label="SoC", secondary_y=True) df['simulation.Grid.P'].plot(label="Grid") ax = df['simulation.Spill.P'].plot(label="Spill") plt.legend()
""" author: lixh mission: text similarity date: train model: ESIM """ from utils import load_all_data, evaluationMetrics from model import ESIM, arrayToTensor import tensorflow as tf from args import args import numpy as np # 获取已经处理好的文本数据,共有10万条文本, # 最大文本长度为20,<list>p_train.shape = [data_size, max_len],返回的是包含字索引的文本数据 p_train, h_trian, y_trian = load_all_data(args.train_path, data_size=3200) p_train, h_train = np.array(p_train), np.array(h_trian) p_eval, h_eval, y_eval = load_all_data(args.dev_path, data_size=100) p_eval, h_eval = np.array(p_eval), np.array(h_eval) # primise,hypothesis,label三类数据转化为tensor # train_prem = arrayToTensor(p_train) # train_hypo = arrayToTensor(h_trian) # eval_prem = arrayToTensor(p_eval) # eval_hypo = arrayToTensor(p_eval) # 生成数据集 train_dataset = tf.data.Dataset.from_tensor_slices((p_train, h_trian, y_trian)) eval_dataset = tf.data.Dataset.from_tensor_slices((p_eval, h_eval, y_eval))