def __init__(self, sourceFile, targetFile): self.SDataBufferArr = None #2D array representation of self.SDataBuffer self.SDataLabels = None self.TDataBufferArr = None #2D array representation of self.TDataBuffer self.TDataLabels = None self.useKliepCVSigma = Properties.useKliepCVSigma self.kliep = None self.useSvmCVParams = Properties.useSvmCVParams self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE) self.initialWindowSize = int(Properties.INITIAL_DATA_SIZE) self.maxWindowSize = int(Properties.MAX_WINDOW_SIZE) self.enableForceUpdate = int(Properties.enableForceUpdate) self.forceUpdatePeriod = int(Properties.forceUpdatePeriod) """ - simulate source and target streams from corresponding files. """ print("Reading the Source Dataset") self.source = Stream(sourceFile, Properties.INITIAL_DATA_SIZE) print("Reading the Target Dataset") self.target = Stream(targetFile, Properties.INITIAL_DATA_SIZE) print("Finished Reading the Target Dataset") Properties.MAXVAR = self.source.initialData.shape[0]
def worker(fold, n_users, n_items, dataset_dir): traFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tra.txt' trasR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, traFilePath), binarize_threshold)) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), trasR.shape, trasR.nnz, '%.2f' % (trasR.nnz / float(trasR.shape[0]))) tstFilePath = dataset_dir + 'ratings__' + str(fold + 1) + '_tst.txt' tstsR = lil_matrix( matBinarize(loadSparseR(n_users, n_items, tstFilePath), binarize_threshold)) sampler = Sampler(trasR=trasR, batch_size=batch_size) en = Ensemble(n_users, n_items, kensemble, topN, split_method, eval_metrics, reg, n_factors, batch_size) scores = en.train(fold + 1, trasR, tstsR, sampler) print( dataset_dir.split('/')[-2] + '@%d:' % (fold + 1), ','.join(['%s' % eval_metric for eval_metric in eval_metrics]) + '@%d=' % (topN) + ','.join(['%.6f' % (score) for score in scores])) en.close() return scores
def main(): print(args) print("=> creating model '{}'".format(args.arch)) model = Ensemble() model = torch.nn.DataParallel(model).cuda() print(model) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) test_data = datautil.SceneDataset(args.data,img_transform= transforms.Compose([ transforms.Resize((args.img_size,args.img_size)), transforms.ToTensor(), normalize])) test_loader = torch.utils.data.DataLoader(test_data,batch_size=args.batch_size,shuffle=False,num_workers=4,pin_memory=True) checkpoint = torch.load(args.test_model) model.load_state_dict(checkpoint['state_dict']) #model.load_state_dict(checkpoint) if os.path.isdir(args.data): ret = test(test_loader,model) imgs = [i[:-4] for i in os.listdir(args.data)] with open('result3_.csv', 'w') as f: ''' f.write(','.join(['FILE_ID','CATEGORY_ID'])+'\n') f.write('\n'.join([','.join([str(a),str(b)]) for a,b in zip(imgs,ret)])) ''' #FILE_ID,CATEGORY_ID0,CATEGORY_ID1,CATEGORY_ID2 f.write(','.join(['FILE_ID','CATEGORY_ID0','CATEGORY_ID1','CATEGORY_ID2'])+'\n') f.write('\n'.join([','.join([str(a)]+[str(int(i)) for i in b]) for a,b in zip(imgs,ret)])) else: test_labeled(test_loader,model)
def main(): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' DATASET_DIRECTORY = '../data_part1' X, y, X_hidden = dataset_manip.load_dataset(DATASET_DIRECTORY) num_classes = len(set(y)) print('X.shape = ' + str(X.shape)) print('X_hidden.shape = ' + str(X_hidden.shape)) ens = Ensemble(input_shape=(77, 71, 1), num_classes=10, num_models=11, batch_size=512, path='./ensemble_files', load=False) ens.train(X=X, y=y, epochs_per_model=300, split_rate=0.9) print(ens.measure_accuracy(X, y)) return X_train, X_validation, y_train, y_validation = dataset_manip.split_dataset( X, y, rate=0.5) model = Model(image_shape=X.shape[1:], num_classes=num_classes, model_path='./model_files/model', batch_size=512, first_run=True) # 1250 model.train(X_train, y_train, X_validation, y_validation, 500) model.train_unsupervised(X_hidden, X_validation, y_validation, 200) print('Final Accuracy: {}'.format( model.measure_accuracy(X_validation, y_validation)))
def __init__(self, sourceFile, targetFile): self.SWindow = [] self.TWindow = [] self.TPredictWindow = [] self.SDataBuffer = [] #Queue self.TDataBuffer = [] #Queue self.SInitialDataBuffer = [] self.TInitialDataBuffer = [] self.changeDetector = ChangeDetection(Properties.GAMMA, Properties.SENSITIVITY, Properties.MAX_WINDOW_SIZE) self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE) classNameList = [] self.source = Stream(sourceFile, classNameList, Properties.INITIAL_DATA_SIZE) self.target = Stream(targetFile, classNameList, Properties.INITIAL_DATA_SIZE) Properties.MAXVAR = self.source.MAXVAR self.gateway = JavaGateway( start_callback_server=True, gateway_parameters=GatewayParameters(port=Properties.PY4JPORT), callback_server_parameters=CallbackServerParameters( port=Properties.PY4JPORT + 1)) self.app = self.gateway.entry_point
def iterpose(self, rmsd=0.0001): confs = self._confs.copy() Ensemble.iterpose(self, rmsd) self._confs = confs LOGGER.info("Final superposition to calculate transformations.") self.superpose()
def delCoordset(self, index): """Delete a coordinate set from the ensemble.""" Ensemble.delCoordset(self, index) if isinstance(index, int): index = [index] else: index = list(index) index.sort(reverse=True) for i in index: self._labels.pop(i)
def process_all_images(config): filenames = sorted(config.files) tables = {} segmentations = {} ensembles = {} for segmentation in config.segmentations: segmentations[segmentation] = {} ensembles['walker_binary'] = {} ensembles['opt'] = {} erosions = {} methods = {'unet': {}, 'walker_binary': {}, 'opt': {}} for key in ['jac', 'af1', 'merge_rate', 'split_rate']: tables[key] = pd.DataFrame(columns=list(methods.keys()), copy=True) os.makedirs(config.output, exist_ok=True) root_dir = os.path.join(config.output, config.filename) if os.path.exists(root_dir): shutil.rmtree(root_dir) os.makedirs(root_dir, exist_ok=True) counter = 0 for file in filenames: if counter % 50 == 0: print(counter) if counter == config.counter: break annot_path = os.path.join(config.annot, file.strip()) annot = skimage.io.imread(annot_path, as_gray=True) for segmentation in segmentations.keys(): path = os.path.join(config.root, segmentation, file.strip()) segmentations[segmentation]['orig'] = skimage.io.imread( path, as_gray=True) segmentations[segmentation][ 'results'] = comp.get_per_image_metrics( annot, segmentations[segmentation]['orig'], False) segmentations[segmentation]['mask'] = np.where( segmentations[segmentation]['orig'] > 0, 255, 0) for ensemble in ensembles.keys(): ensembles[ensemble]['orig'] = Ensemble( segmentations, config.erosions, config.beta).ensemble(ensemble) ensembles[ensemble]['results'] = comp.get_per_image_metrics( annot, ensembles[ensemble]['orig'], False) ensembles[ensemble]['mask'] = np.where( ensembles[ensemble]['orig'] > 0, 255, 0) for key in tables.keys(): results = {} # for segmentation in segmentations.keys(): # results[segmentation] = segmentations[segmentation]['results'][key] avg = statistics.mean([ segmentations[segmentation]['results'][key] for segmentation in segmentations ]) results['unet'] = avg for ensemble in ensembles.keys(): if ensemble != 'union': results[ensemble] = ensembles[ensemble]['results'][key] tables[key] = tables[key].append(results, ignore_index=True) counter += 1 os.makedirs(os.path.join(root_dir, 'stats'), exist_ok=True) output_charts(tables, list(methods.keys()), os.path.join(root_dir, 'stats'), config)
async def main(): this_dir = os.path.dirname(os.path.abspath(__file__)) input_path = os.path.join(this_dir, "input.txt") with open(input_path) as f: raw_code = f.readline() e = Ensemble(raw_code) await e.run()
def main(): #print(sys.argv) test_set_path = sys.argv[1] output_file_path = sys.argv[2] X_test = dataset_manip.load_images(load_directory(test_set_path)) / 255 #model = Model(image_shape = (77, 71, 1), num_classes = 10, model_path = './model_files/model', batch_size = 512, first_run = False) #dataset_manip.store_predictions(dataset_manip.get_filenames(test_set_path), model.predict(X_test), output_file_path) ens = Ensemble(input_shape=(77, 71, 1), num_classes=10, num_models=11, batch_size=512, path='./ensemble_files', load=True) dataset_manip.store_predictions(dataset_manip.get_filenames(test_set_path), ens.predict(X_test), output_file_path)
def __init__(self, selected_algorithms='all', selected_hyperparameters='default', ensemble_size=3, ensemble_method='Logit', error_matrix_values='default', verbose=True): """instantiates an AutoLearner object """ self.error_matrix = ErrorMatrix(selected_algorithms, selected_hyperparameters, ensemble_size, error_matrix_values, verbose) """error matrix defined for specific dataset""" self.ensemble = Ensemble(ensemble_size=ensemble_size, ensemble_method=ensemble_method, verbose=verbose) """instantiate empty ensemble object"""
def create_song(graph_attributes={ 'graph_type': 'Small World', 'average_degree': 4, 'rewiring_prob': 0.3 }, number_players=20, number_time_steps=300, tempo=108, player_attributes=None): """ arguments: graph_type : 'Small World', 'Random', 'Configuration', 'Structured' average_degree number_of_players rewiring_prob number_time_steps tempo player_attributes: { duration: (min_duration, max_duration) note_change_choices: 'All', 'Neighbors of Neighbors' harmonicity threshold: 'Fixed' or 'Moving Average' fixed threshold moving average threshold susceptibility to influence } """ graph_type = graph_attributes['graph_type'] #create player graph if graph_type == 'Small World': #assert rewiring_prob G = nx.watts_strogatz_graph(number_players, graph_attributes['average_degree'], graph_attributes['rewiring_prob']) elif graph_type == 'Random': pass elif graph_type == 'Structured': pass #add starting pitch to node starting_pitches = {i: 'random' for i in range(len(G))} nx.set_node_attributes(G, starting_pitches, 'starting_pitch') #create ensembel object5 ensemble = Ensemble(G, player_attributes) #evolve ensemble ensemble.evolve(number_time_steps) #show pitch history pitch_history_data = ensemble.get_pitch_history_data() harmonicity_data = ensemble.get_harmonicity_data() #create file filename = create_midi_file(ensemble, tempo) create_data_file(filename.replace('.mid', '.txt'), pitch_history_data, harmonicity_data) return filename, pitch_history_data, harmonicity_data
def test(texts, classes, models, nn_params, folds=4): ''' Check the performance on an SVM implementation, given a list of texts and their classes (negative/neutral/positive) Uses k-fold cross-validation (keeping in mind to divide the data appropriately, depending on the class) ''' classes = np.array(classes) texts = np.array(texts) wrongs = [] auc_sum = 0 for train, test in cross_validation.StratifiedKFold(classes, folds): texts_train = texts[train] classes_train = classes[train] texts_test = texts[test] classes_test = classes[test] n = Ensemble(texts_train, classes_train, nn_params, models) predictions = n.classify(texts_test) predictions[predictions<0] = 0 auc = calculate_auc(classes_test, predictions) print auc auc_sum += auc for i in range(len(texts_test)): if abs(classes_test[i] - predictions[i]) > 0.5: wrongs.append((classes_test[i], predictions[i], texts_test[i])) ''' import csv writer = open('wrongs.csv', 'w') for w in wrongs: writer.write('%s,%s,%s\n' % w) writer.close() ''' return auc_sum / folds
def ensemble_test(): ATOM_NUM = 2 paricles = [Particle() for i in range(ATOM_NUM)] ensemble = Ensemble(paricles) print("位置の配列\n", ensemble.positions) ensemble.positions += 1 #全体に+1 print("+1\n", ensemble.positions) ensemble.positions *= 2 #全体に*2 print("*2\n", ensemble.positions) ensemble.positions *= np.array([1, 2, 3]) #x*1, y*2, z*3 print("x*1, y*2, z*3\n", ensemble.positions) ensemble.positions = np.ones((ensemble.N, 3)) * 100 #100にセット print("=100\n", ensemble.positions) print("速度の配列\n", ensemble.velocities)
def run(ncyc, N=1, lim=(20, 20), T=300, ensemble=None, animation=False, dframe=0.001): #initialize system time_total = 0 #total "time" of the system time_pulse = 0 #time of the pulse if ensemble == None: ensemble = Ensemble(N, lim, T) ensemble.Plot("Initial Configuration") else: ensemble.Plot("Initial Configuration") #start simulation y = [ensemble.Energy_Total()] start_time = time.time() #for i in trange(ncyc): for i in range(ncyc): if (i in list(range(0, ncyc, int(ncyc / 20)))): print("{0} cycles: {1} s".format(i, time.time() - start_time)) ensemble.Cycle(time_pulse=time_pulse, time_total=time_total) y.append(ensemble.Energy_Total()) time_total += dt if time_pulse + dt > 120: time_pulse += dt - 120 else: time_pulse += dt print("Elapsed time:", time.time() - start_time, "(s)") print("Initial energy:", y[0], "(J)") print("Final energy:", y[-1], "(J)") print("Average energy:", ensemble.Average(), "(J)") ensemble.Plot("Final Configuration") x = range(0, len(y)) fig, ax = plt.subplots(figsize=(20, 10)) plt.plot(x, y) plt.xlim([0, ncyc]) plt.ylim([min(y), max(y)]) plt.title("Total Energy vs. Cycle") return ensemble
def get_unique_model(): xg = xgb.XGBRegressor(n_estimators=200, learning_rate=0.02, gamma=0, subsample=0.75, colsample_bytree=1, max_depth=6) en = ElasticNet(l1_ratio=0.95, alpha=0.15, max_iter=50000) ada = AdaBoostRegressor(learning_rate=0.01, loss='square', n_estimators=100) lr = Ilbeom_Linear() lst = [xg, en, ada, lr] return Ensemble(lst)
def __init__(self, saved_model: str = None): """Create a new object. Args: - saved_model (str optional): load a pre-treined model if `saved_name` is not None """ super().__init__() # Creating a XGBoost model for stacking xgb_params = {} xgb_params['learning_rate'] = 0.01 xgb_params['n_estimators'] = 750 xgb_params['max_depth'] = 6 xgb_params['colsample_bytree'] = 0.6 xgb_params['min_child_weight'] = 0.6 xgb_model = XGBClassifier(**xgb_params) # Creating a random forest model for stacking rf_params = {} rf_params['n_estimators'] = 200 rf_params['max_depth'] = 6 rf_params['min_samples_split'] = 70 rf_params['min_samples_leaf'] = 30 rf_model = RandomForestClassifier(**rf_params) # Creating a Logist Regression model to act as a stacker of other base models log_model = LogisticRegression() # Creating the stack stack = Ensemble(n_splits=3, stacker=log_model, base_models=(rf_model, xgb_model)) # To use as a prefix of model and processed dataset self.datetime_prefix = datetime.datetime.now().replace( microsecond=0).isoformat().replace(':', '-') # Loads a saved model or create a new one if saved_model: self.model_name = saved_model else: self.model_name = self.datetime_prefix + '_fraud_ensemble.bin' # The final model self.model = stack print('Model: {}'.format(self.model_name))
def main(): ATOM_NUM = 1 CYCLE_NUM = 50 paricles = [Particle() for i in range(ATOM_NUM)] ensemble = Ensemble(paricles) ensemble.positions += 2 print("初期位置\n", ensemble.positions) print("初速度\n", ensemble.velocities) myfield = Field(ensemble, dt=0.01) for i in range(CYCLE_NUM): myfield.update() print("t:", myfield.dt * (i + 1)) print("x:", myfield.ensemble.positions) print("v:", myfield.ensemble.velocities) print()
def main(): # Dataset path dataset_name = ['credit_card_clients_balanced', 'credit_card_clients'] for data_name in dataset_name: dataset_path = os.getcwd() + "\\dataset\\" + data_name + ".csv" dataset = pd.read_csv(dataset_path, encoding='utf-8') # Datasets columns data_x = dataset[[ 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11', 'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20', 'X21', 'X22', 'X23' ]] data_y = dataset['Y'] # Preprocessing data min_max_scaler = preprocessing.MinMaxScaler() X_normalized = min_max_scaler.fit_transform(data_x) acc_rate = [] reject_rate = [] # Runs to test the model for i in range(20): print('---------------- Ensemble -----------------') print('--- MLP - SVM - KNN - GMM - Naive Bayes ---') print(i + 1, 'of 20 iterations') X_train, X_test, y_train, y_test = train_test_split(X_normalized, data_y, test_size=0.2) y_train = np.array(y_train) y_test = np.array(y_test) model = Ensemble() model.train(X_train, y_train, gridSearch=False) y_hat = model.predict(X_test) error, reject = model.evaluate(y_hat, y_test) acc_rate.append(1 - error) reject_rate.append(reject) graphics(acc_rate, reject_rate, data_name)
def main(): input_dir = "/amit/kaggle/tgs" output_dir = "/artifacts" image_size_target = 128 batch_size = 32 epochs_to_train = 300 bce_loss_weight_gamma = 0.98 sgdr_min_lr = 0.0001 # 0.0001, 0.001 sgdr_max_lr = 0.001 # 0.001, 0.03 sgdr_cycle_epochs = 20 sgdr_cycle_epoch_prolongation = 3 sgdr_cycle_end_patience = 3 train_abort_epochs_without_improval = 30 ensemble_model_count = 3 swa_epoch_to_start = 30 model_dir = sys.argv[1] if len(sys.argv) > 1 else None train_data = TrainData(input_dir) train_set = TrainDataset(train_data.train_set_df, image_size_target, augment=True) train_set_data_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8) val_set = TrainDataset(train_data.val_set_df, image_size_target, augment=False) val_set_data_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=2) if model_dir: model = create_model(pretrained=False).to(device) model.load_state_dict(torch.load("{}/model.pth".format(model_dir), map_location=device)) else: model = create_model(pretrained=True).to(device) torch.save(model.state_dict(), "{}/model.pth".format(output_dir)) swa_model = create_model(pretrained=False).to(device) print("train_set_samples: %d, val_set_samples: %d" % (len(train_set), len(val_set))) global_val_precision_best_avg = float("-inf") global_swa_val_precision_best_avg = float("-inf") sgdr_cycle_val_precision_best_avg = float("-inf") epoch_iterations = len(train_set) // batch_size # optimizer = optim.SGD(model.parameters(), lr=sgdr_max_lr, weight_decay=0, momentum=0.9, nesterov=True) optimizer = optim.Adam(model.parameters(), lr=sgdr_max_lr) lr_scheduler = CosineAnnealingLR(optimizer, T_max=sgdr_cycle_epochs, eta_min=sgdr_min_lr) optim_summary_writer = SummaryWriter(log_dir="{}/logs/optim".format(output_dir)) train_summary_writer = SummaryWriter(log_dir="{}/logs/train".format(output_dir)) val_summary_writer = SummaryWriter(log_dir="{}/logs/val".format(output_dir)) swa_val_summary_writer = SummaryWriter(log_dir="{}/logs/swa_val".format(output_dir)) sgdr_iterations = 0 sgdr_reset_count = 0 batch_count = 0 epoch_of_last_improval = 0 sgdr_next_cycle_end_epoch = sgdr_cycle_epochs + sgdr_cycle_epoch_prolongation swa_update_count = 0 ensemble_model_index = 0 for model_file_path in glob.glob("{}/model-*.pth".format(output_dir)): model_file_name = os.path.basename(model_file_path) model_index = int(model_file_name.replace("model-", "").replace(".pth", "")) ensemble_model_index = max(ensemble_model_index, model_index + 1) print('{"chart": "best_val_precision", "axis": "epoch"}') print('{"chart": "val_precision", "axis": "epoch"}') print('{"chart": "val_loss", "axis": "epoch"}') print('{"chart": "sgdr_reset", "axis": "epoch"}') print('{"chart": "precision", "axis": "epoch"}') print('{"chart": "loss", "axis": "epoch"}') print('{"chart": "swa_val_precision", "axis": "epoch"}') print('{"chart": "swa_val_loss", "axis": "epoch"}') train_start_time = time.time() criterion = nn.BCEWithLogitsLoss() for epoch in range(epochs_to_train): epoch_start_time = time.time() model.train() train_loss_sum = 0.0 train_precision_sum = 0.0 train_step_count = 0 for batch in train_set_data_loader: images, masks, mask_weights = \ batch[0].to(device, non_blocking=True), \ batch[1].to(device, non_blocking=True), \ batch[2].to(device, non_blocking=True) lr_scheduler.step(epoch=min(sgdr_cycle_epochs, sgdr_iterations / epoch_iterations)) optimizer.zero_grad() prediction_logits = model(images) predictions = torch.sigmoid(prediction_logits) criterion.weight = mask_weights loss = criterion(prediction_logits, masks) loss.backward() optimizer.step() train_loss_sum += loss.item() train_precision_sum += np.mean(precision_batch(predictions, masks)) sgdr_iterations += 1 train_step_count += 1 batch_count += 1 optim_summary_writer.add_scalar("lr", get_learning_rate(optimizer), batch_count + 1) train_loss_avg = train_loss_sum / train_step_count train_precision_avg = train_precision_sum / train_step_count val_loss_avg, val_precision_avg = evaluate(model, val_set_data_loader, criterion) model_improved_within_sgdr_cycle = val_precision_avg > sgdr_cycle_val_precision_best_avg if model_improved_within_sgdr_cycle: torch.save(model.state_dict(), "{}/model-{}.pth".format(output_dir, ensemble_model_index)) sgdr_cycle_val_precision_best_avg = val_precision_avg model_improved = val_precision_avg > global_val_precision_best_avg ckpt_saved = False if model_improved: torch.save(model.state_dict(), "{}/model.pth".format(output_dir)) global_val_precision_best_avg = val_precision_avg ckpt_saved = True swa_model_improved = False if epoch + 1 >= swa_epoch_to_start: if model_improved_within_sgdr_cycle: swa_update_count += 1 moving_average(swa_model, model, 1.0 / swa_update_count) bn_update(train_set_data_loader, swa_model) swa_model_improved = val_precision_avg > global_swa_val_precision_best_avg if swa_model_improved: torch.save(swa_model.state_dict(), "{}/swa_model.pth".format(output_dir)) global_swa_val_precision_best_avg = val_precision_avg if model_improved or swa_model_improved: epoch_of_last_improval = epoch sgdr_reset = False if (epoch + 1 >= sgdr_next_cycle_end_epoch) and (epoch - epoch_of_last_improval >= sgdr_cycle_end_patience): sgdr_iterations = 0 sgdr_next_cycle_end_epoch = epoch + 1 + sgdr_cycle_epochs + sgdr_cycle_epoch_prolongation ensemble_model_index += 1 sgdr_cycle_val_precision_best_avg = float("-inf") sgdr_reset_count += 1 sgdr_reset = True swa_val_loss_avg, swa_val_precision_avg = evaluate(swa_model, val_set_data_loader, criterion) optim_summary_writer.add_scalar("sgdr_reset", sgdr_reset_count, epoch + 1) train_summary_writer.add_scalar("loss", train_loss_avg, epoch + 1) train_summary_writer.add_scalar("precision", train_precision_avg, epoch + 1) val_summary_writer.add_scalar("loss", val_loss_avg, epoch + 1) val_summary_writer.add_scalar("precision", val_precision_avg, epoch + 1) swa_val_summary_writer.add_scalar("loss", swa_val_loss_avg, epoch + 1) swa_val_summary_writer.add_scalar("precision", swa_val_precision_avg, epoch + 1) epoch_end_time = time.time() epoch_duration_time = epoch_end_time - epoch_start_time print( "[%03d/%03d] %ds, lr: %.6f, loss: %.3f, val_loss: %.3f|%.3f, prec: %.3f, val_prec: %.3f|%.3f, ckpt: %d, rst: %d" % ( epoch + 1, epochs_to_train, epoch_duration_time, get_learning_rate(optimizer), train_loss_avg, val_loss_avg, swa_val_loss_avg, train_precision_avg, val_precision_avg, swa_val_precision_avg, int(ckpt_saved), int(sgdr_reset)), flush=True) print('{"chart": "best_val_precision", "x": %d, "y": %.3f}' % (epoch + 1, global_val_precision_best_avg)) print('{"chart": "val_precision", "x": %d, "y": %.3f}' % (epoch + 1, val_precision_avg)) print('{"chart": "val_loss", "x": %d, "y": %.3f}' % (epoch + 1, val_loss_avg)) print('{"chart": "sgdr_reset", "x": %d, "y": %.3f}' % (epoch + 1, sgdr_reset_count)) print('{"chart": "precision", "x": %d, "y": %.3f}' % (epoch + 1, train_precision_avg)) print('{"chart": "loss", "x": %d, "y": %.3f}' % (epoch + 1, train_loss_avg)) print('{"chart": "swa_val_precision", "x": %d, "y": %.3f}' % (epoch + 1, swa_val_precision_avg)) print('{"chart": "swa_val_loss", "x": %d, "y": %.3f}' % (epoch + 1, swa_val_loss_avg)) if sgdr_reset and sgdr_reset_count >= ensemble_model_count and epoch - epoch_of_last_improval >= train_abort_epochs_without_improval: print("early abort") break optim_summary_writer.close() train_summary_writer.close() val_summary_writer.close() train_end_time = time.time() print() print("Train time: %s" % str(datetime.timedelta(seconds=train_end_time - train_start_time))) eval_start_time = time.time() print() print("evaluation of the training model") model.load_state_dict(torch.load("{}/model.pth".format(output_dir), map_location=device)) analyze(Ensemble([model]), train_data.val_set_df, use_tta=False) analyze(Ensemble([model]), train_data.val_set_df, use_tta=True) score_to_model = {} ensemble_model_candidates = glob.glob("{}/model-*.pth".format(output_dir)) ensemble_model_candidates.append("{}/swa_model.pth".format(output_dir)) for model_file_path in ensemble_model_candidates: model_file_name = os.path.basename(model_file_path) m = create_model(pretrained=False).to(device) m.load_state_dict(torch.load(model_file_path, map_location=device)) val_loss_avg, val_precision_avg = evaluate(m, val_set_data_loader, criterion) print("ensemble '%s': val_loss=%.3f, val_precision=%.3f" % (model_file_name, val_loss_avg, val_precision_avg)) if len(score_to_model) < ensemble_model_count or min(score_to_model.keys()) < val_precision_avg: del score_to_model[min(score_to_model.keys())] score_to_model[val_precision_avg] = m ensemble_models = list(score_to_model.values()) for ensemble_model in ensemble_models: val_loss_avg, val_precision_avg = evaluate(ensemble_model, val_set_data_loader, criterion) print("ensemble: val_loss=%.3f, val_precision=%.3f" % (val_loss_avg, val_precision_avg)) model = Ensemble(ensemble_models) mask_threshold_global, mask_threshold_per_cc = analyze(model, train_data.val_set_df, use_tta=True) eval_end_time = time.time() print() print("Eval time: %s" % str(datetime.timedelta(seconds=eval_end_time - eval_start_time))) print() print("submission preparation") submission_start_time = time.time() test_data = TestData(input_dir) calculate_predictions(test_data.df, model, use_tta=True) calculate_prediction_masks(test_data.df, mask_threshold_global) print() print(test_data.df.groupby("predictions_cc").agg({"predictions_cc": "count"})) write_submission(test_data.df, "prediction_masks", "{}/{}".format(output_dir, "submission.csv")) write_submission(test_data.df, "prediction_masks_best", "{}/{}".format(output_dir, "submission_best.csv")) submission_end_time = time.time() print() print("Submission time: %s" % str(datetime.timedelta(seconds=submission_end_time - submission_start_time)))
@child('e3') def i2(x): return x**2 @child('e3') def i3(x, y): return x**3 + y if __name__ == '__main__': # create our first ensemble and give it a name e1 = Ensemble('e1') # create a second ensemble e2 = Ensemble('e2') # you may use the ensembles as long as you specify which model you use print(e1(child='f', x=2)) print(e1(child='g', y=3)) print(e2(child='f', x=2)) # try to use model `g` but it's not in ensemble `e2` try: print(e2(child='g', y=3)) except ValueError: pass # try to use model `h` but it's not decorated with @model
def __str__(self): return "PDB" + Ensemble.__str__(self)
def __repr__(self): return "<PDB" + Ensemble.__repr__(self)[1:]
def __init__(self, title="Unknown"): self._labels = [] Ensemble.__init__(self, title) self._trans = None
class Manager(object): def __init__(self, sourceFile, targetFile): self.SDataBufferArr = None #2D array representation of self.SDataBuffer self.SDataLabels = None self.TDataBufferArr = None #2D array representation of self.TDataBuffer self.TDataLabels = None self.useKliepCVSigma = Properties.useKliepCVSigma self.kliep = None self.useSvmCVParams = Properties.useSvmCVParams self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE) self.initialWindowSize = int(Properties.INITIAL_DATA_SIZE) self.maxWindowSize = int(Properties.MAX_WINDOW_SIZE) self.enableForceUpdate = int(Properties.enableForceUpdate) self.forceUpdatePeriod = int(Properties.forceUpdatePeriod) """ - simulate source and target streams from corresponding files. """ print("Reading the Source Dataset") self.source = Stream(sourceFile, Properties.INITIAL_DATA_SIZE) print("Reading the Target Dataset") self.target = Stream(targetFile, Properties.INITIAL_DATA_SIZE) print("Finished Reading the Target Dataset") Properties.MAXVAR = self.source.initialData.shape[0] """ Detect drift on a given data stream. Returns the change point index on the stream array. """ def __detectDrift(self, slidingWindow, flagStream): changePoint = -1 if flagStream == 0: changePoint = self.changeDetector.detectSourceChange(slidingWindow) elif flagStream == 1: changePoint = self.changeDetector.detectTargetChange(slidingWindow) else: raise Exception('flagStream var has value ' + str(flagStream) + ' that is not supported.') return changePoint """ Write value (accuracy or confidence) to a file with DatasetName as an identifier. """ def __saveResult(self, acc, datasetName): with open(datasetName + '_' + Properties.OUTFILENAME, 'a') as f: f.write(str(acc) + "\n") f.close() def convListOfDictToNDArray(self, listOfDict): arrayRep = [] if not listOfDict: return arrayRep arrayRep = np.array([[float(v)] for k, v in listOfDict[0].items() if k != -1]) for i in range(1, len(listOfDict)): arrayRep = np.append(arrayRep, np.array([[float(v)] for k, v in listOfDict[i].items() if k != -1]), axis=1) return arrayRep def collectLabels(self, listOfDict): labels = [] for d in listOfDict: labels.append(str(d[-1])) return labels """ The main method handling multistream classification using KLIEP. """ def startFusion(self, datasetName, probFromSource): #save the timestamp globalStartTime = time.time() Properties.logger.info('Global Start Time: ' + datetime.datetime.fromtimestamp(globalStartTime) .strftime('%Y-%m-%d %H:%M:%S')) #open files for saving accuracy and confidence fAcc = open(datasetName + '_' + Properties.OUTFILENAME, 'w') fConf = open( datasetName + '_confidence' + '_' + Properties.OUTFILENAME, 'w') #initialize gaussian models gmOld = gm.GaussianModel() gmUpdated = gm.GaussianModel() #variable to track forceupdate period idxLastUpdate = 0 #Get data buffer self.SDataBufferArr = self.source.initialData self.SDataLabels = self.source.initialDataLabels self.TDataBufferArr = self.target.initialData #first choose a suitable value for sigma self.kliep = Kliep(Properties.kliepParEta, Properties.kliepParLambda, Properties.kliepParB, Properties.kliepParThreshold, Properties.kliepDefSigma) #self.kliep = Kliep(Properties.kliepParEta, Properties.kliepParLambda, Properties.kliepParB, Properties.MAXVAR*Properties.kliepParThreshold, Properties.kliepDefSigma) if self.useKliepCVSigma == 1: self.kliep.kliepDefSigma = self.kliep.chooseSigma( self.SDataBufferArr, self.TDataBufferArr) #calculate alpha values #self.kliep.kliepDefSigma = 0.1 Properties.logger.info('Estimating initial DRM') gmOld.alphah, kernelMatSrcData, kernelMatTrgData, gmOld.refPoints = self.kliep.KLIEP( self.SDataBufferArr, self.TDataBufferArr) #initialize the updated gaussian model gmUpdated.setAlpha(gmOld.alphah) gmUpdated.setRefPoints(gmOld.refPoints) #now resize the windows appropriately self.SDataBufferArr = self.SDataBufferArr[:, -Properties.MAX_WINDOW_SIZE:] self.SDataLabels = self.SDataLabels[-Properties.MAX_WINDOW_SIZE:] self.TDataBufferArr = self.TDataBufferArr[:, -Properties.MAX_WINDOW_SIZE:] kernelMatSrcData = kernelMatSrcData[-Properties.MAX_WINDOW_SIZE:, :] kernelMatTrgData = kernelMatTrgData[-Properties.MAX_WINDOW_SIZE:, :] #meanDistSrcData = self.kliep.colWiseMeanTransposed(kernelMatSrcData) Properties.logger.info('Initializing Ensemble with the first model') #target model #first calculate weight for source instances weightSrcData = self.kliep.calcInstanceWeights(kernelMatSrcData, gmUpdated.alphah) #since weightSrcData is a column matrix, convert it to a list before sending to generating new model SDataBufferArrTransposed = self.SDataBufferArr.T TDataBufferArrTransposed = self.TDataBufferArr.T if self.useSvmCVParams == 1: params = {'gamma': [2**2, 2**-16], 'C': [2**-6, 2**15]} svr = svm.SVC() opt = grid_search.GridSearchCV(svr, params) opt.fit(SDataBufferArrTransposed.tolist(), self.SDataLabels) optParams = opt.best_params_ self.ensemble.generateNewModelKLIEP(SDataBufferArrTransposed, self.SDataLabels, TDataBufferArrTransposed, weightSrcData[0].tolist(), optParams['C'], optParams['gamma']) else: self.ensemble.generateNewModelKLIEP( SDataBufferArrTransposed.tolist(), self.SDataLabels, TDataBufferArrTransposed.tolist(), weightSrcData[0].tolist(), Properties.svmDefC, Properties.svmDefGamma, Properties.svmKernel) Properties.logger.info(self.ensemble.getEnsembleSummary()) sDataIndex = 0 tDataIndex = 0 trueTargetNum = 0 targetConfSum = 0 #enoughInstToUpdate is used to see if there are enough instances in the windows to #estimate the weights Properties.logger.info( 'Starting MultiStream Classification with FUSION') while self.target.data.shape[1] > tDataIndex: """ if source stream is not empty, do proper sampling. Otherwise, just take the new instance from the target isntance. """ if self.source.data.shape[1] > sDataIndex: fromSource = random.uniform(0, 1) < probFromSource else: print("\nsource stream sampling not possible") fromSource = False if fromSource: # Source Stream: '.' means sampling from source print('.', end="") #print("Source data index: ", sDataIndex) #print("\nlen(self.SDataBufferList) = ", len(self.SDataBufferList), ": source window slides") #remove the first instance, and add the new instance in the buffers newSrcDataArr = self.source.data[:, sDataIndex][np.newaxis].T self.SDataBufferArr = self.SDataBufferArr[:, 1:] self.SDataLabels = self.SDataLabels[1:] kernelMatSrcData = kernelMatSrcData[1:, :] #add new instance to the buffers self.SDataBufferArr = np.append(self.SDataBufferArr, newSrcDataArr, axis=1) self.SDataLabels.append(self.source.dataLabels[sDataIndex]) #update kernelMatSrcData dist_tmp = np.power( np.tile(newSrcDataArr, (1, gmUpdated.refPoints.shape[1])) - gmUpdated.refPoints, 2) dist_2 = np.sum(dist_tmp, axis=0, dtype='float64') kernelSDataNewFromRefs = np.exp( -dist_2 / (2 * math.pow(self.kliep.kliepDefSigma, 2)), dtype='float64') kernelMatSrcData = np.append( kernelMatSrcData, kernelSDataNewFromRefs[np.newaxis], axis=0) #print("Satisfying the constrains.") gmUpdated.alphah, kernelMatSrcData = self.kliep.satConstraints( self.SDataBufferArr, self.TDataBufferArr, gmUpdated.refPoints, gmUpdated.alphah, kernelMatSrcData) sDataIndex += 1 else: # Target Stream print('#', end="") # '#' indicates new point from target newTargetDataArr = self.target.data[:, tDataIndex][np.newaxis].T # get Target Accuracy on the new instance resTarget = self.ensemble.evaluateEnsembleKLIEP( np.reshape(newTargetDataArr, (1, -1))) if isinstance(resTarget[0], float) and abs( resTarget[0] - self.target.dataLabels[tDataIndex]) < 0.0001: trueTargetNum += 1 elif resTarget[0] == self.target.dataLabels[tDataIndex]: trueTargetNum += 1 acc = float(trueTargetNum) / (tDataIndex + 1) if (tDataIndex % 100) == 0: Properties.logger.info('\nTotal test instance: ' + str(tDataIndex + 1) + ', correct: ' + str(trueTargetNum) + ', accuracy: ' + str(acc)) fAcc.write(str(acc) + "\n") conf = resTarget[1] # confidence # save confidence targetConfSum += conf fConf.write( str(float(targetConfSum) / (tDataIndex + 1)) + "\n") #update alpha, and satisfy constraints #print("Update alpha and satisfy constrains") gmUpdated.alphah, kernelMatSrcData = self.kliep.updateAlpha( self.SDataBufferArr, self.TDataBufferArr, newTargetDataArr, gmUpdated.refPoints, gmUpdated.alphah, kernelMatSrcData) #print("\nlen(self.TDataBufferList) = ", len(self.TDataBufferList), ": target window slides") #remove the first instance from buffers self.TDataBufferArr = self.TDataBufferArr[:, 1:] #update ref points gmUpdated.refPoints = gmUpdated.refPoints[:, 1:] # update kernelMatSrcData, as ref points has been updated kernelMatSrcData = kernelMatSrcData[:, 1:] # update kernelMatTrgData, as ref points has been updated kernelMatTrgData = kernelMatTrgData[1:, 1:] #update ref points gmUpdated.refPoints = np.append(gmUpdated.refPoints, newTargetDataArr, axis=1) #add to kernelMatSrcData for the last ref point dist_tmp = np.power( np.tile(newTargetDataArr, (1, self.SDataBufferArr.shape[1])) - self.SDataBufferArr, 2) dist_2 = np.sum(dist_tmp, axis=0, dtype='float64') kernel_dist_2 = np.exp( -dist_2 / (2 * math.pow(self.kliep.kliepDefSigma, 2)), dtype='float64') kernelMatSrcData = np.append(kernelMatSrcData, kernel_dist_2[np.newaxis].T, axis=1) #now update kernelMatTrgData, as ref points has been updated #first add distance from the new ref points to all the target points dist_tmp = np.power( np.tile(newTargetDataArr, (1, self.TDataBufferArr.shape[1])) - self.TDataBufferArr, 2) dist_2 = np.sum(dist_tmp, axis=0, dtype='float64') kernel_dist_2 = np.exp( -dist_2 / (2 * math.pow(self.kliep.kliepDefSigma, 2)), dtype='float64') kernelMatTrgData = np.append(kernelMatTrgData, kernel_dist_2[np.newaxis].T, axis=1) #now add distances for the newly added instance to all the ref points #add the new instance to the buffers self.TDataBufferArr = np.append(self.TDataBufferArr, newTargetDataArr, axis=1) dist_tmp = np.power( np.tile(newTargetDataArr, (1, gmUpdated.refPoints.shape[1])) - gmUpdated.refPoints, 2) dist_2 = np.sum(dist_tmp, axis=0, dtype='float64') kernelTDataNewFromRefs = np.exp( -dist_2 / (2 * math.pow(self.kliep.kliepDefSigma, 2)), dtype='float64') kernelMatTrgData = np.append( kernelMatTrgData, kernelTDataNewFromRefs[np.newaxis], axis=0) tDataIndex += 1 #print "sDataIndex: ", str(sDataIndex), ", tDataIndex: ", str(tDataIndex) enoughInstToUpdate = self.SDataBufferArr.shape[ 1] >= Properties.kliepParB and self.TDataBufferArr.shape[ 1] >= Properties.kliepParB if enoughInstToUpdate: #print("Enough points in source and target sliding windows. Attempting to detect any change of distribution.") changeDetected, changeScore, kernelMatTrgData = self.kliep.changeDetection( self.TDataBufferArr, gmOld.refPoints, gmOld.alphah, gmUpdated.refPoints, gmUpdated.alphah, kernelMatTrgData) #print("Change Score: ", changeScore) #instances from more than one class are needed for svm training if len(set(self.SDataLabels)) > 1 and ( changeDetected or (self.enableForceUpdate and (tDataIndex + sDataIndex - idxLastUpdate) > self.forceUpdatePeriod) ): #or (tDataIndex>0 and (targetConfSum/tDataIndex)<0.1): fConf.write(str(7777777.0) + "\n") Properties.logger.info( '\n-------------------------- Change of Distribution ------------------------------------' ) Properties.logger.info('Change of distribution found') Properties.logger.info('sDataIndex=' + str(sDataIndex) + '\ttDataIndex=' + str(tDataIndex)) Properties.logger.info('Change Detection Score: ' + str(changeScore) + ', Threshold: ' + str(self.kliep.kliepParThreshold)) #Build a new model #First calculate the weights for each source instances gmOld.alphah, kernelMatSrcData, kernelMatTrgData, gmOld.refPoints = self.kliep.KLIEP( self.SDataBufferArr, self.TDataBufferArr) #update the updated gaussian model as well gmUpdated.setAlpha(gmOld.alphah) gmUpdated.setRefPoints(gmOld.refPoints) weightSrcData = self.kliep.calcInstanceWeights( kernelMatSrcData, gmUpdated.alphah) #Build a new model Properties.logger.info( 'Training a model due to change detection') SDataBufferArrTransposed = self.SDataBufferArr.T TDataBufferArrTransposed = self.TDataBufferArr.T if self.useSvmCVParams == 1: params = {'gamma': [2**2, 2**-16], 'C': [2**-6, 2**15]} svr = svm.SVC() opt = grid_search.GridSearchCV(svr, params) opt.fit(SDataBufferArrTransposed.tolist(), self.SDataLabels) optParams = opt.best_params_ self.ensemble.generateNewModelKLIEP( SDataBufferArrTransposed.tolist(), self.SDataLabels, TDataBufferArrTransposed.tolist(), weightSrcData[0].tolist(), optParams['C'], optParams['gamma']) else: self.ensemble.generateNewModelKLIEP( SDataBufferArrTransposed.tolist(), self.SDataLabels, TDataBufferArrTransposed.tolist(), weightSrcData[0].tolist(), Properties.svmDefC, Properties.svmDefGamma, Properties.svmKernel) Properties.logger.info(self.ensemble.getEnsembleSummary()) #update the idx idxLastUpdate = tDataIndex + sDataIndex changeDetected = False #keep the latest 1/4th of data and update the arrays and lists #Properties.logger.info('Updating source and target sliding windows') """ In the target window, we want to keep (3x/4) instances, where x is the number of gaussian kernel centers, So that we will try for detecting change point again after (x/4) instances. Since there might be a diff between arrival rate in the source and target, we calculate number of points to retain in the source keeping that in mind. """ #numberOfPointsInTargetToRetain = Properties.kliepParB - int(((1-probFromSource)*3*Properties.kliepParB)/4) #numberOfPointsInSourceToRetain = Properties.kliepParB - int((probFromSource*3*Properties.kliepParB)/4) #save the timestamp fConf.close() fAcc.close() globalEndTime = time.time() Properties.logger.info('\nGlobal Start Time: ' + datetime.datetime.fromtimestamp(globalEndTime). strftime('%Y-%m-%d %H:%M:%S')) Properties.logger.info('Total Time Spent: ' + str(globalEndTime - globalStartTime) + ' seconds') Properties.logger.info('Done !!')
print('\n___PARTITIONS 2___') partitions2 = np.transpose(partitions) ensemble2 = Ensemble(partitions=partitions2, n_cluster=3, partitions_format='PE') e2, ts2, pr2 = ensemble2.mcla(times=True, partial_results=True) for t in ts2: print(f'{t[0]}: {t[1]}s') for r in pr2: print(r[0]) print(r[1]) """ print('\n___PARTITIONS 3___') partitions3 = np.random.randint(8, size=(8, 100000)) ensemble3 = Ensemble(partitions=partitions3, n_cluster=8, partitions_format='PE') e3, ts3, _pr3 = ensemble3.mcla(times=True) for t in ts3: print(f'{t[0]}: {t[1]}s') """ hypergraph4 = np.array([ [1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 0, 0], [0, 0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 1, 1], [1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0], [0, 0, 1, 1, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1],
model = resnet101() num_ftrs = model.fc.in_features model.fc = nn.Linear(num_ftrs, 1) if latest_model_path != "": model.load_state_dict(torch.load(latest_model_path)) model.cuda() # Set parameters for model criterion = Loss(Wt1, Wt0) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=1, verbose=True) # Train model model = train_model(model, criterion, optimizer, dataloaders, scheduler, dataset_sizes, epochs - current_epoch, costs, accs, num_ID, model_type) # For testing ensemble model else: model = Ensemble("models/best_model_dense_4.pth", "models/best_model_res_12.pth", "models/best_model_vgg_19.pth") model.cuda() criterion = Loss(Wt1, Wt0) test_acc, test_loss = test_ensemble_mean(model, criterion, dataloaders, dataset_sizes)
# classes.append(int(row[0])) # #results = n.classify(texts) # #results[results<0] = 0 # #print calculate_auc(classes, results) # r1 = np.array(m1.classify(texts)) # print calculate_auc(classes, r1) # r2 = np.array(m2.classify(texts)) # print calculate_auc(classes, r2) # r = (1.2*r1 + 0.8*r2) / 2 # r[r>1] = 1 # r[r<0] = 0 # print calculate_auc(classes, r) #print TestSVM.test_model(texts, classes, models[-1]) #print TestSVM.test(texts, classes, models, nn_params) n = Ensemble(texts, classes, nn_params, models) end = time.time() # print "training time=" # print end-start start = time.time() # evaluate the classfier on verification dataset texts = [] inp = raw_input() while inp: texts.append(inp.decode('utf8')) inp = raw_input() results = n.classify(texts)
positives = sum(1 for label in labels if label) predicted_positives = sum(1 for pred in preds if pred) true_positives = sum(1 for label, pred in zip(labels, preds) if label and pred) return 100.0 * true_positives / predicted_positives, 100.0 * true_positives / positives def evaluate(model): def wrapper(dataset): preds = [model(x=x) for x, _ in dataset] precision, recall = get_results(dataset, preds) return { 'precision': f'{precision:.1f}%', 'recall': f'{recall:.1f}%', } return wrapper if __name__ == '__main__': e = Ensemble('ensemble', children=[model1, model2], mode='all') results = Ensemble('results', children=[model1, model2, e]) results.decorate_children(evaluate) print(results) pprint(results(dataset=get_dataset())) """ {'ensemble': {'precision': '100.0%', 'recall': '100.0%'}, 'model1': {'precision': '18.2%', 'recall': '100.0%'}, 'model2': {'precision': '30.0%', 'recall': '100.0%'}} """
import csv from data_present import Data from ensemble import Ensemble from sklearn.metrics import accuracy_score counter = 892 data = Data() submission = [['PassengerId', 'Survived']] # Set up pred ensemble = Ensemble(data) ensemble.pred = map(int, ensemble.pred) for entry in ensemble.pred: submission.append([counter, entry]) counter += 1 with open('submission.csv', 'wb') as f: writer = csv.writer(f) for val in submission: writer.writerow(val)
print( "[epoch: {:d}] avg train_loss: {:.3f} eval ll: {:.3f} ({:.1f}s)" .format(epoch, sum(losses) / len(losses), eval_ll, time.time() - tic)) print("running test code") name = "sample_" + str(epoch) + ".txt" test_code(model, name=name) print("ran test code") if __name__ == '__main__': import args from model import Model from ensemble import Ensemble if method == 'ensemble': model = Ensemble(vectors).to(device) else: model = Model(vectors).to(device) train(model) # import dill # with open('model.p', 'rb') as h: # model = dill.load(h) # visualize_attn(model)
def reward_func(sigma_index_lst=[1, 2, 3], default_n=20, epoch_num=4, epoch_min=100, epoch_step=50): ''' input sigma_lst - The component index from the ssa gene for example the gen [0, 1, 0] -> sigma_lst=[1] #the index where gen=1 default_n - the window length for ssa - <= N /2 where N is the length of the time series - default 20 epoch_num - The number of submodel used epoch_min - Min epoch of submodel epoch_step - number of epoch difference bw 2 submodels output a tuple contain 2 value (nse_q, nse_h) ''' K.clear_session() with open('./settings/model/config.yaml', 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) # train model = Ensemble(mode='train', model_kind='rnn_cnn', sigma_lst=sigma_index_lst, default_n=default_n, epoch_num=epoch_num, epoch_min=epoch_min, epoch_step=epoch_step, **config) model.train_model_outer() # test model = Ensemble(mode='test', model_kind='rnn_cnn', sigma_lst=sigma_index_lst, default_n=default_n, epoch_num=epoch_num, epoch_min=epoch_min, epoch_step=epoch_step, **config) model.train_model_outer() model.retransform_prediction(mode='roll') return model.evaluate_model(mode='roll')
classes.append(int(row[0])) #results = n.classify(texts) #results[results<0] = 0 #print calculate_auc(classes, results) r1 = m1.classify(texts) print calculate_auc(classes, r1) r2 = np.array(m2.classify(texts)) print calculate_auc(classes, r2) r = (1.2*r1 + 0.8*r2) / 2 r[r>1] = 1 r[r<0] = 0 print calculate_auc(classes, r) #print TestSVM.test_model(texts, classes, models[-1]) #print TestSVM.test(texts, classes, models, nn_params) n = Ensemble(texts, classes, nn_params, models) texts = [] csvr = csv.reader(open('test.csv', 'rb'), delimiter=',', quotechar='"') csvr.next() for row in csvr: texts.append(row[1].decode('utf8')) results = n.classify(texts) results[results<0] = 0 results[results>1] = 1 writer = open('rez.csv', 'w') for r in results:
def __repr__(self): return '<PDB' + Ensemble.__repr__(self)[1:]
def __str__(self): return 'PDB' + Ensemble.__str__(self)
plt.rcParams["figure.figsize"] = (14, 12) plt.ticklabel_format(style='plain', useOffset=False) #%% #data = pd.read_csv('../tommi_test_data.csv', sep=";", header=0) data = pd.read_csv('../tommi_test_data_more_diff_steps.csv', sep=";", header=0) data = data.loc[data["Warning_code"] == 0] data = data.reset_index(drop=True) tforce_DF = DataHandler.calculateTotalForce(data) step_t_DF = DataHandler.calculateStepTime(data) #%% Bagging test avg_acc, real_label, pred_label = Ensemble.testBagging(step_t_DF) pred_label_df = pred_label real_label_df = real_label pred_label_df = pred_label_df.replace("Normal", 0) pred_label_df = pred_label_df.replace("Fall", 1) real_label_df = real_label_df.replace("Normal", 0) real_label_df = real_label_df.replace("Fall", 1) avg_auc = roc_auc_score(real_label_df, pred_label_df) print("AUC score: ", round(avg_auc, 2)) #%% 2d scatter from sklearn.decomposition import PCA
from saveobject import save_obj N = 100 steps = 1000 repeat = 30 res = 0.01 b1 = 0 b2 = 3 B = np.arange(b1, b2, res) B = B[B != 0] B = 1 / B M = np.array([0]) ensemble = Ensemble(N, B, M, steps, repeat, False) ensemble.getStats() beta = ensemble.beta mu = ensemble.mu stats = ensemble.stats save_obj(stats, "stats8") # keys = ["energy","magnetization","population","entropy"] # def calcStats(size,beta,mu,steps,times): # global keys # stats = {} # arr = {} # for key in keys:
'max_depth': 6, 'n_estimators': 1000, 'learning_rate': 0.025, 'subsample': 0.9 } models = { "LGB-1": LGBMClassifier(**lgb_params), "XGB-1": XGBClassifier(**xgb_params), "LGB-2": LGBMClassifier(**lgb_params2), #"LGB-3": LGBMClassifier(**lgb_params3), "XGB-2": XGBClassifier(**xgb_params2), #"CAT": CatBoostClassifier(**cat_params), #"GBM": GradientBoostingClassifier(**gb_params), #"RF": RandomForestClassifier(**rf_params), #"ET": ExtraTreesClassifier(**et_params), #"ABC": AdaBoostClassifier(n_estimators=100), } start = time.time() stack = Ensemble(4, models.values(), stacker=SGDClassifier(loss="log", max_iter=1000)) y_pred = stack.fit_predict(X, y, X_test) print("Finished ensembling in %.2f seconds" % (time.time() - start)) sub = pd.DataFrame() sub['id'] = id_test sub['target'] = y_pred sub.to_csv("%s.csv" % ("-".join(models.keys())), index=False)
def __init__(self): self.ensemble = Ensemble(instruments)