def main(): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' DATASET_DIRECTORY = '../data_part1' X, y, X_hidden = dataset_manip.load_dataset(DATASET_DIRECTORY) num_classes = len(set(y)) print('X.shape = ' + str(X.shape)) print('X_hidden.shape = ' + str(X_hidden.shape)) ens = Ensemble(input_shape=(77, 71, 1), num_classes=10, num_models=11, batch_size=512, path='./ensemble_files', load=False) ens.train(X=X, y=y, epochs_per_model=300, split_rate=0.9) print(ens.measure_accuracy(X, y)) return X_train, X_validation, y_train, y_validation = dataset_manip.split_dataset( X, y, rate=0.5) model = Model(image_shape=X.shape[1:], num_classes=num_classes, model_path='./model_files/model', batch_size=512, first_run=True) # 1250 model.train(X_train, y_train, X_validation, y_validation, 500) model.train_unsupervised(X_hidden, X_validation, y_validation, 200) print('Final Accuracy: {}'.format( model.measure_accuracy(X_validation, y_validation)))
def __init__(self, sourceFile, targetFile): self.SWindow = [] self.TWindow = [] self.TPredictWindow = [] self.SDataBuffer = [] #Queue self.TDataBuffer = [] #Queue self.SInitialDataBuffer = [] self.TInitialDataBuffer = [] self.changeDetector = ChangeDetection(Properties.GAMMA, Properties.SENSITIVITY, Properties.MAX_WINDOW_SIZE) self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE) classNameList = [] self.source = Stream(sourceFile, classNameList, Properties.INITIAL_DATA_SIZE) self.target = Stream(targetFile, classNameList, Properties.INITIAL_DATA_SIZE) Properties.MAXVAR = self.source.MAXVAR self.gateway = JavaGateway( start_callback_server=True, gateway_parameters=GatewayParameters(port=Properties.PY4JPORT), callback_server_parameters=CallbackServerParameters( port=Properties.PY4JPORT + 1)) self.app = self.gateway.entry_point
def process_all_images(config): filenames = sorted(config.files) tables = {} segmentations = {} ensembles = {} for segmentation in config.segmentations: segmentations[segmentation] = {} ensembles['walker_binary'] = {} ensembles['opt'] = {} erosions = {} methods = {'unet': {}, 'walker_binary': {}, 'opt': {}} for key in ['jac', 'af1', 'merge_rate', 'split_rate']: tables[key] = pd.DataFrame(columns=list(methods.keys()), copy=True) os.makedirs(config.output, exist_ok=True) root_dir = os.path.join(config.output, config.filename) if os.path.exists(root_dir): shutil.rmtree(root_dir) os.makedirs(root_dir, exist_ok=True) counter = 0 for file in filenames: if counter % 50 == 0: print(counter) if counter == config.counter: break annot_path = os.path.join(config.annot, file.strip()) annot = skimage.io.imread(annot_path, as_gray=True) for segmentation in segmentations.keys(): path = os.path.join(config.root, segmentation, file.strip()) segmentations[segmentation]['orig'] = skimage.io.imread( path, as_gray=True) segmentations[segmentation][ 'results'] = comp.get_per_image_metrics( annot, segmentations[segmentation]['orig'], False) segmentations[segmentation]['mask'] = np.where( segmentations[segmentation]['orig'] > 0, 255, 0) for ensemble in ensembles.keys(): ensembles[ensemble]['orig'] = Ensemble( segmentations, config.erosions, config.beta).ensemble(ensemble) ensembles[ensemble]['results'] = comp.get_per_image_metrics( annot, ensembles[ensemble]['orig'], False) ensembles[ensemble]['mask'] = np.where( ensembles[ensemble]['orig'] > 0, 255, 0) for key in tables.keys(): results = {} # for segmentation in segmentations.keys(): # results[segmentation] = segmentations[segmentation]['results'][key] avg = statistics.mean([ segmentations[segmentation]['results'][key] for segmentation in segmentations ]) results['unet'] = avg for ensemble in ensembles.keys(): if ensemble != 'union': results[ensemble] = ensembles[ensemble]['results'][key] tables[key] = tables[key].append(results, ignore_index=True) counter += 1 os.makedirs(os.path.join(root_dir, 'stats'), exist_ok=True) output_charts(tables, list(methods.keys()), os.path.join(root_dir, 'stats'), config)
def main(): print(args) print("=> creating model '{}'".format(args.arch)) model = Ensemble() model = torch.nn.DataParallel(model).cuda() print(model) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) test_data = datautil.SceneDataset(args.data,img_transform= transforms.Compose([ transforms.Resize((args.img_size,args.img_size)), transforms.ToTensor(), normalize])) test_loader = torch.utils.data.DataLoader(test_data,batch_size=args.batch_size,shuffle=False,num_workers=4,pin_memory=True) checkpoint = torch.load(args.test_model) model.load_state_dict(checkpoint['state_dict']) #model.load_state_dict(checkpoint) if os.path.isdir(args.data): ret = test(test_loader,model) imgs = [i[:-4] for i in os.listdir(args.data)] with open('result3_.csv', 'w') as f: ''' f.write(','.join(['FILE_ID','CATEGORY_ID'])+'\n') f.write('\n'.join([','.join([str(a),str(b)]) for a,b in zip(imgs,ret)])) ''' #FILE_ID,CATEGORY_ID0,CATEGORY_ID1,CATEGORY_ID2 f.write(','.join(['FILE_ID','CATEGORY_ID0','CATEGORY_ID1','CATEGORY_ID2'])+'\n') f.write('\n'.join([','.join([str(a)]+[str(int(i)) for i in b]) for a,b in zip(imgs,ret)])) else: test_labeled(test_loader,model)
async def main(): this_dir = os.path.dirname(os.path.abspath(__file__)) input_path = os.path.join(this_dir, "input.txt") with open(input_path) as f: raw_code = f.readline() e = Ensemble(raw_code) await e.run()
def create_song(graph_attributes={ 'graph_type': 'Small World', 'average_degree': 4, 'rewiring_prob': 0.3 }, number_players=20, number_time_steps=300, tempo=108, player_attributes=None): """ arguments: graph_type : 'Small World', 'Random', 'Configuration', 'Structured' average_degree number_of_players rewiring_prob number_time_steps tempo player_attributes: { duration: (min_duration, max_duration) note_change_choices: 'All', 'Neighbors of Neighbors' harmonicity threshold: 'Fixed' or 'Moving Average' fixed threshold moving average threshold susceptibility to influence } """ graph_type = graph_attributes['graph_type'] #create player graph if graph_type == 'Small World': #assert rewiring_prob G = nx.watts_strogatz_graph(number_players, graph_attributes['average_degree'], graph_attributes['rewiring_prob']) elif graph_type == 'Random': pass elif graph_type == 'Structured': pass #add starting pitch to node starting_pitches = {i: 'random' for i in range(len(G))} nx.set_node_attributes(G, starting_pitches, 'starting_pitch') #create ensembel object5 ensemble = Ensemble(G, player_attributes) #evolve ensemble ensemble.evolve(number_time_steps) #show pitch history pitch_history_data = ensemble.get_pitch_history_data() harmonicity_data = ensemble.get_harmonicity_data() #create file filename = create_midi_file(ensemble, tempo) create_data_file(filename.replace('.mid', '.txt'), pitch_history_data, harmonicity_data) return filename, pitch_history_data, harmonicity_data
def __init__(self, sourceFile, targetFile): self.SDataBufferArr = None #2D array representation of self.SDataBuffer self.SDataLabels = None self.TDataBufferArr = None #2D array representation of self.TDataBuffer self.TDataLabels = None self.useKliepCVSigma = Properties.useKliepCVSigma self.kliep = None self.useSvmCVParams = Properties.useSvmCVParams self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE) self.initialWindowSize = int(Properties.INITIAL_DATA_SIZE) self.maxWindowSize = int(Properties.MAX_WINDOW_SIZE) self.enableForceUpdate = int(Properties.enableForceUpdate) self.forceUpdatePeriod = int(Properties.forceUpdatePeriod) """ - simulate source and target streams from corresponding files. """ print("Reading the Source Dataset") self.source = Stream(sourceFile, Properties.INITIAL_DATA_SIZE) print("Reading the Target Dataset") self.target = Stream(targetFile, Properties.INITIAL_DATA_SIZE) print("Finished Reading the Target Dataset") Properties.MAXVAR = self.source.initialData.shape[0]
def worker(fold, n_users, n_items, dataset_dir): traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt' trasR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, traFilePath), binarize_threshold)) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape, trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0]))) tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt' tstsR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, tstFilePath), binarize_threshold)) sampler = Sampler(trasR=trasR, batch_size=batch_size) en = Ensemble(n_users, n_items, kensemble, topN, split_method, eval_metrics, reg, n_factors, batch_size) scores = en.train(fold + 1, trasR, tstsR, sampler) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) + '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores])) en.close() return scores
def reward_func(sigma_index_lst=[1, 2, 3], default_n=20, epoch_num=4, epoch_min=100, epoch_step=50): ''' input sigma_lst - The component index from the ssa gene for example the gen [0, 1, 0] -> sigma_lst=[1] #the index where gen=1 default_n - the window length for ssa - <= N /2 where N is the length of the time series - default 20 epoch_num - The number of submodel used epoch_min - Min epoch of submodel epoch_step - number of epoch difference bw 2 submodels output a tuple contain 2 value (nse_q, nse_h) ''' K.clear_session() with open('./settings/model/config.yaml', 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) # train model = Ensemble(mode='train', model_kind='rnn_cnn', sigma_lst=sigma_index_lst, default_n=default_n, epoch_num=epoch_num, epoch_min=epoch_min, epoch_step=epoch_step, **config) model.train_model_outer() # test model = Ensemble(mode='test', model_kind='rnn_cnn', sigma_lst=sigma_index_lst, default_n=default_n, epoch_num=epoch_num, epoch_min=epoch_min, epoch_step=epoch_step, **config) model.train_model_outer() model.retransform_prediction(mode='roll') return model.evaluate_model(mode='roll')
def get_unique_model(): xg = xgb.XGBRegressor(n_estimators=200, learning_rate=0.02, gamma=0, subsample=0.75, colsample_bytree=1, max_depth=6) en = ElasticNet(l1_ratio=0.95, alpha=0.15, max_iter=50000) ada = AdaBoostRegressor(learning_rate=0.01, loss='square', n_estimators=100) lr = Ilbeom_Linear() lst = [xg, en, ada, lr] return Ensemble(lst)
def __init__(self, saved_model: str = None): """Create a new object. Args: - saved_model (str optional): load a pre-treined model if `saved_name` is not None """ super().__init__() # Creating a XGBoost model for stacking xgb_params = {} xgb_params['learning_rate'] = 0.01 xgb_params['n_estimators'] = 750 xgb_params['max_depth'] = 6 xgb_params['colsample_bytree'] = 0.6 xgb_params['min_child_weight'] = 0.6 xgb_model = XGBClassifier(**xgb_params) # Creating a random forest model for stacking rf_params = {} rf_params['n_estimators'] = 200 rf_params['max_depth'] = 6 rf_params['min_samples_split'] = 70 rf_params['min_samples_leaf'] = 30 rf_model = RandomForestClassifier(**rf_params) # Creating a Logist Regression model to act as a stacker of other base models log_model = LogisticRegression() # Creating the stack stack = Ensemble(n_splits=3, stacker=log_model, base_models=(rf_model, xgb_model)) # To use as a prefix of model and processed dataset self.datetime_prefix = datetime.datetime.now().replace( microsecond=0).isoformat().replace(':', '-') # Loads a saved model or create a new one if saved_model: self.model_name = saved_model else: self.model_name = self.datetime_prefix + '_fraud_ensemble.bin' # The final model self.model = stack print('Model: {}'.format(self.model_name))
def main(): #print(sys.argv) test_set_path = sys.argv[1] output_file_path = sys.argv[2] X_test = dataset_manip.load_images(load_directory(test_set_path)) / 255 #model = Model(image_shape = (77, 71, 1), num_classes = 10, model_path = './model_files/model', batch_size = 512, first_run = False) #dataset_manip.store_predictions(dataset_manip.get_filenames(test_set_path), model.predict(X_test), output_file_path) ens = Ensemble(input_shape=(77, 71, 1), num_classes=10, num_models=11, batch_size=512, path='./ensemble_files', load=True) dataset_manip.store_predictions(dataset_manip.get_filenames(test_set_path), ens.predict(X_test), output_file_path)
def main(): # Dataset path dataset_name = ['credit_card_clients_balanced', 'credit_card_clients'] for data_name in dataset_name: dataset_path = os.getcwd() + "\\dataset\\" + data_name + ".csv" dataset = pd.read_csv(dataset_path, encoding='utf-8') # Datasets columns data_x = dataset[[ 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11', 'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20', 'X21', 'X22', 'X23' ]] data_y = dataset['Y'] # Preprocessing data min_max_scaler = preprocessing.MinMaxScaler() X_normalized = min_max_scaler.fit_transform(data_x) acc_rate = [] reject_rate = [] # Runs to test the model for i in range(20): print('---------------- Ensemble -----------------') print('--- MLP - SVM - KNN - GMM - Naive Bayes ---') print(i + 1, 'of 20 iterations') X_train, X_test, y_train, y_test = train_test_split(X_normalized, data_y, test_size=0.2) y_train = np.array(y_train) y_test = np.array(y_test) model = Ensemble() model.train(X_train, y_train, gridSearch=False) y_hat = model.predict(X_test) error, reject = model.evaluate(y_hat, y_test) acc_rate.append(1 - error) reject_rate.append(reject) graphics(acc_rate, reject_rate, data_name)
def __init__(self, selected_algorithms='all', selected_hyperparameters='default', ensemble_size=3, ensemble_method='Logit', error_matrix_values='default', verbose=True): """instantiates an AutoLearner object """ self.error_matrix = ErrorMatrix(selected_algorithms, selected_hyperparameters, ensemble_size, error_matrix_values, verbose) """error matrix defined for specific dataset""" self.ensemble = Ensemble(ensemble_size=ensemble_size, ensemble_method=ensemble_method, verbose=verbose) """instantiate empty ensemble object"""
def run(ncyc, N=1, lim=(20, 20), T=300, ensemble=None, animation=False, dframe=0.001): #initialize system time_total = 0 #total "time" of the system time_pulse = 0 #time of the pulse if ensemble == None: ensemble = Ensemble(N, lim, T) ensemble.Plot("Initial Configuration") else: ensemble.Plot("Initial Configuration") #start simulation y = [ensemble.Energy_Total()] start_time = time.time() #for i in trange(ncyc): for i in range(ncyc): if (i in list(range(0, ncyc, int(ncyc / 20)))): print("{0} cycles: {1} s".format(i, time.time() - start_time)) ensemble.Cycle(time_pulse=time_pulse, time_total=time_total) y.append(ensemble.Energy_Total()) time_total += dt if time_pulse + dt > 120: time_pulse += dt - 120 else: time_pulse += dt print("Elapsed time:", time.time() - start_time, "(s)") print("Initial energy:", y[0], "(J)") print("Final energy:", y[-1], "(J)") print("Average energy:", ensemble.Average(), "(J)") ensemble.Plot("Final Configuration") x = range(0, len(y)) fig, ax = plt.subplots(figsize=(20, 10)) plt.plot(x, y) plt.xlim([0, ncyc]) plt.ylim([min(y), max(y)]) plt.title("Total Energy vs. Cycle") return ensemble
def main(): ATOM_NUM = 1 CYCLE_NUM = 50 paricles = [Particle() for i in range(ATOM_NUM)] ensemble = Ensemble(paricles) ensemble.positions += 2 print("初期位置\n", ensemble.positions) print("初速度\n", ensemble.velocities) myfield = Field(ensemble, dt=0.01) for i in range(CYCLE_NUM): myfield.update() print("t:", myfield.dt * (i + 1)) print("x:", myfield.ensemble.positions) print("v:", myfield.ensemble.velocities) print()
def ensemble_test(): ATOM_NUM = 2 paricles = [Particle() for i in range(ATOM_NUM)] ensemble = Ensemble(paricles) print("位置の配列\n", ensemble.positions) ensemble.positions += 1 #全体に+1 print("+1\n", ensemble.positions) ensemble.positions *= 2 #全体に*2 print("*2\n", ensemble.positions) ensemble.positions *= np.array([1, 2, 3]) #x*1, y*2, z*3 print("x*1, y*2, z*3\n", ensemble.positions) ensemble.positions = np.ones((ensemble.N, 3)) * 100 #100にセット print("=100\n", ensemble.positions) print("速度の配列\n", ensemble.velocities)
def test(texts, classes, models, nn_params, folds=4): ''' Check the performance on an SVM implementation, given a list of texts and their classes (negative/neutral/positive) Uses k-fold cross-validation (keeping in mind to divide the data appropriately, depending on the class) ''' classes = np.array(classes) texts = np.array(texts) wrongs = [] auc_sum = 0 for train, test in cross_validation.StratifiedKFold(classes, folds): texts_train = texts[train] classes_train = classes[train] texts_test = texts[test] classes_test = classes[test] n = Ensemble(texts_train, classes_train, nn_params, models) predictions = n.classify(texts_test) predictions[predictions<0] = 0 auc = calculate_auc(classes_test, predictions) print auc auc_sum += auc for i in range(len(texts_test)): if abs(classes_test[i] - predictions[i]) > 0.5: wrongs.append((classes_test[i], predictions[i], texts_test[i])) ''' import csv writer = open('wrongs.csv', 'w') for w in wrongs: writer.write('%s,%s,%s\n' % w) writer.close() ''' return auc_sum / folds
print('\n___PARTITIONS 2___') partitions2 = np.transpose(partitions) ensemble2 = Ensemble(partitions=partitions2, n_cluster=3, partitions_format='PE') e2, ts2, pr2 = ensemble2.mcla(times=True, partial_results=True) for t in ts2: print(f'{t[0]}: {t[1]}s') for r in pr2: print(r[0]) print(r[1]) """ print('\n___PARTITIONS 3___') partitions3 = np.random.randint(8, size=(8, 100000)) ensemble3 = Ensemble(partitions=partitions3, n_cluster=8, partitions_format='PE') e3, ts3, _pr3 = ensemble3.mcla(times=True) for t in ts3: print(f'{t[0]}: {t[1]}s') """ hypergraph4 = np.array([ [1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 0, 0], [0, 0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 1, 1], [1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0], [0, 0, 1, 1, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1],
def main(): input_dir = "/amit/kaggle/tgs" output_dir = "/artifacts" image_size_target = 128 batch_size = 32 epochs_to_train = 300 bce_loss_weight_gamma = 0.98 sgdr_min_lr = 0.0001 # 0.0001, 0.001 sgdr_max_lr = 0.001 # 0.001, 0.03 sgdr_cycle_epochs = 20 sgdr_cycle_epoch_prolongation = 3 sgdr_cycle_end_patience = 3 train_abort_epochs_without_improval = 30 ensemble_model_count = 3 swa_epoch_to_start = 30 model_dir = sys.argv[1] if len(sys.argv) > 1 else None train_data = TrainData(input_dir) train_set = TrainDataset(train_data.train_set_df, image_size_target, augment=True) train_set_data_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8) val_set = TrainDataset(train_data.val_set_df, image_size_target, augment=False) val_set_data_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=2) if model_dir: model = create_model(pretrained=False).to(device) model.load_state_dict(torch.load("{}/model.pth".format(model_dir), map_location=device)) else: model = create_model(pretrained=True).to(device) torch.save(model.state_dict(), "{}/model.pth".format(output_dir)) swa_model = create_model(pretrained=False).to(device) print("train_set_samples: %d, val_set_samples: %d" % (len(train_set), len(val_set))) global_val_precision_best_avg = float("-inf") global_swa_val_precision_best_avg = float("-inf") sgdr_cycle_val_precision_best_avg = float("-inf") epoch_iterations = len(train_set) // batch_size # optimizer = optim.SGD(model.parameters(), lr=sgdr_max_lr, weight_decay=0, momentum=0.9, nesterov=True) optimizer = optim.Adam(model.parameters(), lr=sgdr_max_lr) lr_scheduler = CosineAnnealingLR(optimizer, T_max=sgdr_cycle_epochs, eta_min=sgdr_min_lr) optim_summary_writer = SummaryWriter(log_dir="{}/logs/optim".format(output_dir)) train_summary_writer = SummaryWriter(log_dir="{}/logs/train".format(output_dir)) val_summary_writer = SummaryWriter(log_dir="{}/logs/val".format(output_dir)) swa_val_summary_writer = SummaryWriter(log_dir="{}/logs/swa_val".format(output_dir)) sgdr_iterations = 0 sgdr_reset_count = 0 batch_count = 0 epoch_of_last_improval = 0 sgdr_next_cycle_end_epoch = sgdr_cycle_epochs + sgdr_cycle_epoch_prolongation swa_update_count = 0 ensemble_model_index = 0 for model_file_path in glob.glob("{}/model-*.pth".format(output_dir)): model_file_name = os.path.basename(model_file_path) model_index = int(model_file_name.replace("model-", "").replace(".pth", "")) ensemble_model_index = max(ensemble_model_index, model_index + 1) print('{"chart": "best_val_precision", "axis": "epoch"}') print('{"chart": "val_precision", "axis": "epoch"}') print('{"chart": "val_loss", "axis": "epoch"}') print('{"chart": "sgdr_reset", "axis": "epoch"}') print('{"chart": "precision", "axis": "epoch"}') print('{"chart": "loss", "axis": "epoch"}') print('{"chart": "swa_val_precision", "axis": "epoch"}') print('{"chart": "swa_val_loss", "axis": "epoch"}') train_start_time = time.time() criterion = nn.BCEWithLogitsLoss() for epoch in range(epochs_to_train): epoch_start_time = time.time() model.train() train_loss_sum = 0.0 train_precision_sum = 0.0 train_step_count = 0 for batch in train_set_data_loader: images, masks, mask_weights = \ batch[0].to(device, non_blocking=True), \ batch[1].to(device, non_blocking=True), \ batch[2].to(device, non_blocking=True) lr_scheduler.step(epoch=min(sgdr_cycle_epochs, sgdr_iterations / epoch_iterations)) optimizer.zero_grad() prediction_logits = model(images) predictions = torch.sigmoid(prediction_logits) criterion.weight = mask_weights loss = criterion(prediction_logits, masks) loss.backward() optimizer.step() train_loss_sum += loss.item() train_precision_sum += np.mean(precision_batch(predictions, masks)) sgdr_iterations += 1 train_step_count += 1 batch_count += 1 optim_summary_writer.add_scalar("lr", get_learning_rate(optimizer), batch_count + 1) train_loss_avg = train_loss_sum / train_step_count train_precision_avg = train_precision_sum / train_step_count val_loss_avg, val_precision_avg = evaluate(model, val_set_data_loader, criterion) model_improved_within_sgdr_cycle = val_precision_avg > sgdr_cycle_val_precision_best_avg if model_improved_within_sgdr_cycle: torch.save(model.state_dict(), "{}/model-{}.pth".format(output_dir, ensemble_model_index)) sgdr_cycle_val_precision_best_avg = val_precision_avg model_improved = val_precision_avg > global_val_precision_best_avg ckpt_saved = False if model_improved: torch.save(model.state_dict(), "{}/model.pth".format(output_dir)) global_val_precision_best_avg = val_precision_avg ckpt_saved = True swa_model_improved = False if epoch + 1 >= swa_epoch_to_start: if model_improved_within_sgdr_cycle: swa_update_count += 1 moving_average(swa_model, model, 1.0 / swa_update_count) bn_update(train_set_data_loader, swa_model) swa_model_improved = val_precision_avg > global_swa_val_precision_best_avg if swa_model_improved: torch.save(swa_model.state_dict(), "{}/swa_model.pth".format(output_dir)) global_swa_val_precision_best_avg = val_precision_avg if model_improved or swa_model_improved: epoch_of_last_improval = epoch sgdr_reset = False if (epoch + 1 >= sgdr_next_cycle_end_epoch) and (epoch - epoch_of_last_improval >= sgdr_cycle_end_patience): sgdr_iterations = 0 sgdr_next_cycle_end_epoch = epoch + 1 + sgdr_cycle_epochs + sgdr_cycle_epoch_prolongation ensemble_model_index += 1 sgdr_cycle_val_precision_best_avg = float("-inf") sgdr_reset_count += 1 sgdr_reset = True swa_val_loss_avg, swa_val_precision_avg = evaluate(swa_model, val_set_data_loader, criterion) optim_summary_writer.add_scalar("sgdr_reset", sgdr_reset_count, epoch + 1) train_summary_writer.add_scalar("loss", train_loss_avg, epoch + 1) train_summary_writer.add_scalar("precision", train_precision_avg, epoch + 1) val_summary_writer.add_scalar("loss", val_loss_avg, epoch + 1) val_summary_writer.add_scalar("precision", val_precision_avg, epoch + 1) swa_val_summary_writer.add_scalar("loss", swa_val_loss_avg, epoch + 1) swa_val_summary_writer.add_scalar("precision", swa_val_precision_avg, epoch + 1) epoch_end_time = time.time() epoch_duration_time = epoch_end_time - epoch_start_time print( "[%03d/%03d] %ds, lr: %.6f, loss: %.3f, val_loss: %.3f|%.3f, prec: %.3f, val_prec: %.3f|%.3f, ckpt: %d, rst: %d" % ( epoch + 1, epochs_to_train, epoch_duration_time, get_learning_rate(optimizer), train_loss_avg, val_loss_avg, swa_val_loss_avg, train_precision_avg, val_precision_avg, swa_val_precision_avg, int(ckpt_saved), int(sgdr_reset)), flush=True) print('{"chart": "best_val_precision", "x": %d, "y": %.3f}' % (epoch + 1, global_val_precision_best_avg)) print('{"chart": "val_precision", "x": %d, "y": %.3f}' % (epoch + 1, val_precision_avg)) print('{"chart": "val_loss", "x": %d, "y": %.3f}' % (epoch + 1, val_loss_avg)) print('{"chart": "sgdr_reset", "x": %d, "y": %.3f}' % (epoch + 1, sgdr_reset_count)) print('{"chart": "precision", "x": %d, "y": %.3f}' % (epoch + 1, train_precision_avg)) print('{"chart": "loss", "x": %d, "y": %.3f}' % (epoch + 1, train_loss_avg)) print('{"chart": "swa_val_precision", "x": %d, "y": %.3f}' % (epoch + 1, swa_val_precision_avg)) print('{"chart": "swa_val_loss", "x": %d, "y": %.3f}' % (epoch + 1, swa_val_loss_avg)) if sgdr_reset and sgdr_reset_count >= ensemble_model_count and epoch - epoch_of_last_improval >= train_abort_epochs_without_improval: print("early abort") break optim_summary_writer.close() train_summary_writer.close() val_summary_writer.close() train_end_time = time.time() print() print("Train time: %s" % str(datetime.timedelta(seconds=train_end_time - train_start_time))) eval_start_time = time.time() print() print("evaluation of the training model") model.load_state_dict(torch.load("{}/model.pth".format(output_dir), map_location=device)) analyze(Ensemble([model]), train_data.val_set_df, use_tta=False) analyze(Ensemble([model]), train_data.val_set_df, use_tta=True) score_to_model = {} ensemble_model_candidates = glob.glob("{}/model-*.pth".format(output_dir)) ensemble_model_candidates.append("{}/swa_model.pth".format(output_dir)) for model_file_path in ensemble_model_candidates: model_file_name = os.path.basename(model_file_path) m = create_model(pretrained=False).to(device) m.load_state_dict(torch.load(model_file_path, map_location=device)) val_loss_avg, val_precision_avg = evaluate(m, val_set_data_loader, criterion) print("ensemble '%s': val_loss=%.3f, val_precision=%.3f" % (model_file_name, val_loss_avg, val_precision_avg)) if len(score_to_model) < ensemble_model_count or min(score_to_model.keys()) < val_precision_avg: del score_to_model[min(score_to_model.keys())] score_to_model[val_precision_avg] = m ensemble_models = list(score_to_model.values()) for ensemble_model in ensemble_models: val_loss_avg, val_precision_avg = evaluate(ensemble_model, val_set_data_loader, criterion) print("ensemble: val_loss=%.3f, val_precision=%.3f" % (val_loss_avg, val_precision_avg)) model = Ensemble(ensemble_models) mask_threshold_global, mask_threshold_per_cc = analyze(model, train_data.val_set_df, use_tta=True) eval_end_time = time.time() print() print("Eval time: %s" % str(datetime.timedelta(seconds=eval_end_time - eval_start_time))) print() print("submission preparation") submission_start_time = time.time() test_data = TestData(input_dir) calculate_predictions(test_data.df, model, use_tta=True) calculate_prediction_masks(test_data.df, mask_threshold_global) print() print(test_data.df.groupby("predictions_cc").agg({"predictions_cc": "count"})) write_submission(test_data.df, "prediction_masks", "{}/{}".format(output_dir, "submission.csv")) write_submission(test_data.df, "prediction_masks_best", "{}/{}".format(output_dir, "submission_best.csv")) submission_end_time = time.time() print() print("Submission time: %s" % str(datetime.timedelta(seconds=submission_end_time - submission_start_time)))
@child('e3') def i2(x): return x**2 @child('e3') def i3(x, y): return x**3 + y if __name__ == '__main__': # create our first ensemble and give it a name e1 = Ensemble('e1') # create a second ensemble e2 = Ensemble('e2') # you may use the ensembles as long as you specify which model you use print(e1(child='f', x=2)) print(e1(child='g', y=3)) print(e2(child='f', x=2)) # try to use model `g` but it's not in ensemble `e2` try: print(e2(child='g', y=3)) except ValueError: pass # try to use model `h` but it's not decorated with @model
model = resnet101() num_ftrs = model.fc.in_features model.fc = nn.Linear(num_ftrs, 1) if latest_model_path != "": model.load_state_dict(torch.load(latest_model_path)) model.cuda() # Set parameters for model criterion = Loss(Wt1, Wt0) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=1, verbose=True) # Train model model = train_model(model, criterion, optimizer, dataloaders, scheduler, dataset_sizes, epochs - current_epoch, costs, accs, num_ID, model_type) # For testing ensemble model else: model = Ensemble("models/best_model_dense_4.pth", "models/best_model_res_12.pth", "models/best_model_vgg_19.pth") model.cuda() criterion = Loss(Wt1, Wt0) test_acc, test_loss = test_ensemble_mean(model, criterion, dataloaders, dataset_sizes)
# classes.append(int(row[0])) # #results = n.classify(texts) # #results[results<0] = 0 # #print calculate_auc(classes, results) # r1 = np.array(m1.classify(texts)) # print calculate_auc(classes, r1) # r2 = np.array(m2.classify(texts)) # print calculate_auc(classes, r2) # r = (1.2*r1 + 0.8*r2) / 2 # r[r>1] = 1 # r[r<0] = 0 # print calculate_auc(classes, r) #print TestSVM.test_model(texts, classes, models[-1]) #print TestSVM.test(texts, classes, models, nn_params) n = Ensemble(texts, classes, nn_params, models) end = time.time() # print "training time=" # print end-start start = time.time() # evaluate the classfier on verification dataset texts = [] inp = raw_input() while inp: texts.append(inp.decode('utf8')) inp = raw_input() results = n.classify(texts)
positives = sum(1 for label in labels if label) predicted_positives = sum(1 for pred in preds if pred) true_positives = sum(1 for label, pred in zip(labels, preds) if label and pred) return 100.0 * true_positives / predicted_positives, 100.0 * true_positives / positives def evaluate(model): def wrapper(dataset): preds = [model(x=x) for x, _ in dataset] precision, recall = get_results(dataset, preds) return { 'precision': f'{precision:.1f}%', 'recall': f'{recall:.1f}%', } return wrapper if __name__ == '__main__': e = Ensemble('ensemble', children=[model1, model2], mode='all') results = Ensemble('results', children=[model1, model2, e]) results.decorate_children(evaluate) print(results) pprint(results(dataset=get_dataset())) """ {'ensemble': {'precision': '100.0%', 'recall': '100.0%'}, 'model1': {'precision': '18.2%', 'recall': '100.0%'}, 'model2': {'precision': '30.0%', 'recall': '100.0%'}} """
print( "[epoch: {:d}] avg train_loss: {:.3f} eval ll: {:.3f} ({:.1f}s)" .format(epoch, sum(losses) / len(losses), eval_ll, time.time() - tic)) print("running test code") name = "sample_" + str(epoch) + ".txt" test_code(model, name=name) print("ran test code") if __name__ == '__main__': import args from model import Model from ensemble import Ensemble if method == 'ensemble': model = Ensemble(vectors).to(device) else: model = Model(vectors).to(device) train(model) # import dill # with open('model.p', 'rb') as h: # model = dill.load(h) # visualize_attn(model)
experiment_path = "./experiments/ensemble/" data_path = "./datasets/paper_ultimate_conv/" cf_data = imp.load_source("cf_data", data_path + "cfg_testing_data_creation.py") nets = [] net_paths = glob.glob(experiment_path + "*/") # Load the networks for net_path in net_paths: net = NetworkConvDropout() net.init(29, 29, 13, 134, 135) net.load_parameters(open_h5file(net_path + "net.net")) n_out = net.n_out nets.append(net) ensemble_net = Ensemble(nets) # Load the scaler scaler = pickle.load(open(net_paths[0] + "s.scaler", "rb")) # Files on which to evaluate the network file_list = list_miccai_files(**{ "mode": "folder", "path": "./datasets/miccai/2/" }) n_files = len(file_list) # Options for the generation of the dataset # The generation/evaluation of the dataset has to be split into batches as a whole brain does not fit into memory batch_size = 100000 select_region = SelectWholeBrain()
def __init__(self): self.ensemble = Ensemble(instruments)
from saveobject import save_obj N = 100 steps = 1000 repeat = 30 res = 0.01 b1 = 0 b2 = 3 B = np.arange(b1, b2, res) B = B[B != 0] B = 1 / B M = np.array([0]) ensemble = Ensemble(N, B, M, steps, repeat, False) ensemble.getStats() beta = ensemble.beta mu = ensemble.mu stats = ensemble.stats save_obj(stats, "stats8") # keys = ["energy","magnetization","population","entropy"] # def calcStats(size,beta,mu,steps,times): # global keys # stats = {} # arr = {} # for key in keys:
'max_depth': 6, 'n_estimators': 1000, 'learning_rate': 0.025, 'subsample': 0.9 } models = { "LGB-1": LGBMClassifier(**lgb_params), "XGB-1": XGBClassifier(**xgb_params), "LGB-2": LGBMClassifier(**lgb_params2), #"LGB-3": LGBMClassifier(**lgb_params3), "XGB-2": XGBClassifier(**xgb_params2), #"CAT": CatBoostClassifier(**cat_params), #"GBM": GradientBoostingClassifier(**gb_params), #"RF": RandomForestClassifier(**rf_params), #"ET": ExtraTreesClassifier(**et_params), #"ABC": AdaBoostClassifier(n_estimators=100), } start = time.time() stack = Ensemble(4, models.values(), stacker=SGDClassifier(loss="log", max_iter=1000)) y_pred = stack.fit_predict(X, y, X_test) print("Finished ensembling in %.2f seconds" % (time.time() - start)) sub = pd.DataFrame() sub['id'] = id_test sub['target'] = y_pred sub.to_csv("%s.csv" % ("-".join(models.keys())), index=False)
def main(): system = Ensemble(3, 3, lim=(30, 30), T=300) run(5000, ensemble=system)