def test_get_my_doc_meta_with_category(client, admin, monkeypatch): login(client, admin) monkeypatch.setattr('app.task.views.DOCUMENT_PER_PAGE', 3) with captured_templates(client.application) as templates: assert client.get( url_for('task.my_doc_meta', category=Category.FLIP.value)).status_code == 200 template, context = templates.pop() assert template.name == 'task/document_dashboard.html' assert context['current_category'] == Category.FLIP.value assert set(get_ids(context['documents'].items)) == set( get_ids( DocumentMeta.objects(create_by=admin, category=Category.FLIP.value).order_by( '-priority', '-update_at').all()[:3])) assert client.get( url_for('task.my_doc_meta', category=Category.SHORT_TERM.value, page=2)).status_code == 200 template, context = templates.pop() assert template.name == 'task/document_dashboard.html' assert context['current_category'] == Category.SHORT_TERM.value assert set(get_ids(context['documents'].items)) == set( get_ids( DocumentMeta.objects( create_by=admin, category=Category.SHORT_TERM.value).order_by( '-priority', '-update_at').all()[3:6]))
def test_get_my_doc_meta_with_search(client, admin, monkeypatch): login(client, admin) monkeypatch.setattr('app.task.views.DOCUMENT_PER_PAGE', 3) with captured_templates(client.application) as templates: # search empty string to return all documents assert client.get(url_for('task.my_doc_meta', search='')).status_code == 200 template, context = templates.pop() assert template.name == 'task/document_dashboard.html' assert context['current_search'] == '' assert set(get_ids(context['documents'].items)) == set( get_ids( DocumentMeta.objects(create_by=admin).order_by( '-priority', '-update_at').all()[:3]))
def evaluate_sents(data_source, uids, batch_size=10): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) sent_loss = defaultdict(list) for i in range(0, data_source.size(0) - 1, args.bptt): data_batch = torch.load(open('test.pickle','rb')) data_test=data_batch['data'] targets_test = data_batch['targets'] data, targets = get_batch(data_source, i, args, evaluation=True) batch_uids = get_ids(uids, i, args, evaluation=True) # pdb.set_trace() output, hidden = model(data, hidden, decode=True) output_flat = output.view(-1, ntokens) per_word_loss = criterion(output_flat, targets) batch_uids_list = batch_uids.reshape(-1).tolist() loss_list = per_word_loss.tolist() for loss, uid in zip(loss_list, batch_uids_list): sent_loss[uid].append(loss) incre = (torch.mean(per_word_loss).item()*len(data)) total_loss += incre # print('incre=',incre) hidden = repackage_hidden(hidden) # pdb.set_trace() avg_sent_loss = {} for (uid, losses) in sent_loss.items(): avg_sent_loss[uid]=float(np.mean(losses)) # pdb.set_trace() return total_loss / len(data_source), avg_sent_loss
def get_result(net, gpu=False): ids = get_ids(dir_img) val = get_imgs_and_masks(ids, dir_img, dir_mask, 1.0) val_dice = eval_net(net, val, gpu) print('Validation Dice Coeff: {}'.format(val_dice))
def send_reqs(self): lines = utils.get_ids() for profile_id in lines: print('[] Send request to {}'.format(profile_id)) req = utils.set_request(profile_id=profile_id) r = requests.post(config.urls['base'] + config.urls['connect'], headers=req['headers'], data=req['body']) print('\t-> Done') utils.update_ids(profile_id=profile_id) print("\t-> Next round : {}\n".format( utils.get_next_round(round_duration=180))) time.sleep(180)
def relevant_full_corpus(kwLimit): #corpus = utils.get_data('SELECT id FROM refdesc WHERE abstract_keywords IS NOT NULL;','../../Data/dumps/20160224_cybergeo.sqlite3') corpus = utils.get_ids('cybergeo','keywords') occurence_dicos = utils.import_kw_dico('cybergeo','keywords') mongo = pymongo.MongoClient('localhost',27017) database = mongo['relevant'] relevant = 'relevant_full_'+str(kwLimit) network = 'network_full_'+str(kwLimit)+'_eth10' database[relevant].delete_many({"cumtermhood":{"$gt":0}}) database[relevant].create_index('keyword') [keywords,dico,frequencies,edge_list] = kwFunctions.extract_relevant_keywords(corpus,kwLimit,occurence_dicos) print('insert relevant...') for kw in keywords.keys(): butils.update_kw_tm(kw,keywords[kw],frequencies[kw],math.log(keywords[kw])*math.log(len(corpus)/frequencies[kw]),database,relevant) print('insert edges...') database[network].delete_many({"weight":{"$gt":0}}) database[network].insert_many(edge_list)
def _init_data(self): if 'market' in str(self.data_dir).lower(): self.dataset = 'market' elif 'duke' in str(self.data_dir).lower(): self.dataset = 'duke' self.imgs = list(self.data_dir.glob('*.jpg')) # Filter out labels with -1 self.imgs = [img for img in self.imgs if '-1' not in img.stem] self.cam_ids, self.labels, self.frames = get_ids( self.imgs, self.dataset) self.num_cams = len(set(self.cam_ids)) self.classes = tuple(set(self.labels)) # Convert labels to continuous idxs self.class_to_idx = {label: i for i, label in enumerate(self.classes)} self.targets = [self.class_to_idx[label] for label in self.labels]
def relevant_full_corpus(kwLimit): #corpus = utils.get_data('SELECT id FROM refdesc WHERE abstract_keywords IS NOT NULL;','../../Data/dumps/20160224_cybergeo.sqlite3') corpus = utils.get_ids('cybergeo', 'keywords') occurence_dicos = utils.import_kw_dico('cybergeo', 'keywords') mongo = pymongo.MongoClient('localhost', 27017) database = mongo['relevant'] relevant = 'relevant_full_' + str(kwLimit) network = 'network_full_' + str(kwLimit) + '_eth10' database[relevant].delete_many({"cumtermhood": {"$gt": 0}}) database[relevant].create_index('keyword') [keywords, dico, frequencies, edge_list ] = kwFunctions.extract_relevant_keywords(corpus, kwLimit, occurence_dicos) print('insert relevant...') for kw in keywords.keys(): butils.update_kw_tm( kw, keywords[kw], frequencies[kw], math.log(keywords[kw]) * math.log(len(corpus) / frequencies[kw]), database, relevant) print('insert edges...') database[network].delete_many({"weight": {"$gt": 0}}) database[network].insert_many(edge_list)
for player in sorted(players)] print '%s %s' % (score, salary) print '\t%s' % (', '.join(player_strs[:4])) print '\t%s' % (', '.join(player_strs[-4:])) if __name__=='__main__': parser = argparse.ArgumentParser() parser.add_argument('-s', '--salaries', type=str) parser.add_argument('-p', '--prefill', type=str, help=('Quoted list of comma-separated player names' ' (e.g. "Stephen Curry, James Harden")')) parser.add_argument('-i', '--ignore', type=str, help=('Quoted list of comma-separated player names' ' (e.g. "Stephen Curry, James Harden")')) parser.add_argument('-m', '--min_score', type=float) args = parser.parse_args() if args.salaries: print 'Loading score data...' scores = get_scores_with_freq(get_ids(limit=7), min_games=3) salaries = load_salaries(args.salaries) prefill_players = ([x.strip() for x in args.prefill.split(',')] if args.prefill else []) ignore_players = ([x.strip() for x in args.ignore.split(',')] if args.ignore else []) min_score = args.min_score if args.min_score != None else -1 generate(scores, salaries, prefill_players=prefill_players, ignore_players=ignore_players, min_score=min_score) else: print 'Unable to run script: requires a --salaries argument'
parser.add_argument('-s', '--salaries', type=str) parser.add_argument('-p', '--prefill', type=str, help=('Quoted list of comma-separated player names' ' (e.g. "Stephen Curry, James Harden")')) parser.add_argument('-i', '--ignore', type=str, help=('Quoted list of comma-separated player names' ' (e.g. "Stephen Curry, James Harden")')) parser.add_argument('-m', '--min_score', type=float) args = parser.parse_args() if args.salaries: print 'Loading score data...' scores = get_scores_with_freq(get_ids(limit=7), min_games=3) salaries = load_salaries(args.salaries) prefill_players = ([x.strip() for x in args.prefill.split(',')] if args.prefill else []) ignore_players = ([x.strip() for x in args.ignore.split(',')] if args.ignore else []) min_score = args.min_score if args.min_score != None else -1 generate(scores, salaries, prefill_players=prefill_players, ignore_players=ignore_players, min_score=min_score) else: print 'Unable to run script: requires a --salaries argument'
"taylor_batches": args.taylor_batches, "prune_channels": args.prune_channels, "gpu": args.gpu, "load": args.load, "channel_txt": args.channel_txt, "scale": args.scale, "lr": args.lr, "iters": args.iters, "epochs": args.epochs }, indent=4, sort_keys=True))) # Dataset if not os.path.exists(splitfile): # Our constant datasplit ids = get_ids(dir_img) # [file1, file2] ids = split_ids(ids) # [(file1, 0), (file1, 1), (file2, 0), ...] iddataset = split_train_val(ids, 0.2, splitfile) log.info("New split dataset") else: with open(splitfile) as f: iddataset = json.load(f) log.info("Load split dataset") train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, args.scale) val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, args.scale) # Model Initialization net = UNet(n_channels=3, n_classes=1, f_channels=args.channel_txt)
def train_net(net, epochs=5, batch_size=1, lr=0.1, val_percent=0.2, save_cp=True, gpu=False, img_scale=0.5): path = [['data/ori1/', 'data/gt1/'], ['data/original1/', 'data/ground_truth1/'], ['data/Original/', 'data/Ground_Truth/']] dir_img = path[0][0] dir_mask = path[0][1] dir_checkpoint = 'sdgcheck/' ids = get_ids(dir_img) ids = split_ids(ids) iddataset = split_train_val(ids, val_percent) print(''' Starting training: Epochs: {} Batch size: {} Learning rate: {} Training size: {} Validation size: {} Checkpoints: {} CUDA: {} '''.format(epochs, batch_size, lr, len(iddataset['train']), len(iddataset['val']), str(save_cp), str(gpu))) N_train = len(iddataset['train']) optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.7, weight_decay=0.005) ''' optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=0.0005) ''' criterion = nn.BCELoss() for epoch in range(epochs): print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) net.train() # reset the generators train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale) val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale) epoch_loss = 0 x = 0 for i, b in enumerate(batch(train, batch_size)): imgs = np.array([i[0] for i in b]).astype(np.float32) true_masks = np.array([i[1] for i in b]) ''' ori=np.transpose(imgs[0], axes=[1, 2, 0]) scipy.misc.imsave("ori/ori_"+str(x)+'.jpg', ori) gt = np.stack((true_masks[0],)*3, axis=-1) #gt=np.transpose(true_masks[0], axes=[1, 2, 0]) scipy.misc.imsave("gt/gt_"+str(x)+'.jpg', gt) ''' imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) x += 1 if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() masks_pred = net(imgs) masks_probs_flat = masks_pred.view(-1) true_masks_flat = true_masks.view(-1) loss = criterion(masks_probs_flat, true_masks_flat) epoch_loss += loss.item() print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch finished ! Loss: {}'.format(epoch_loss / i)) if 1: val_dice = eval_net(net, val, gpu) print('Validation Dice Coeff: {}'.format(val_dice)) if save_cp: torch.save(net.state_dict(), dir_checkpoint + 'CP{}.pth'.format(epoch + 1)) print('Checkpoint {} saved !'.format(epoch + 1))
def train_net(net, epochs=5, batch_size=1, lr=0.003, val_percent=0.20, loss_lambda=5, save_cp=True, gpu=False, img_scale=0.5, expositions_num=15, logg_freq=15, tb=False, w_decay=0.0005, use_notifications=False, polyaxon=False, outputs_path='checkpoints'): # === Localize training data =================================================== if polyaxon: data_paths = get_data_paths() dir_checkpoints = get_outputs_path() dataSets_dir = os.path.join(data_paths['data1'], 'eprado', 'USLDR-DataSet') #dataSets_dir = os.path.join(data_paths['data1'] , 'eprado', 'LDR_DataSet') else: dataSets_dir = os.path.join(wk_dir, "LDR_DataSet") dir_checkpoints = os.path.join(wk_dir, outputs_path) print('Dataset_dir', dataSets_dir) print('Outputs_path', dir_checkpoints) experiment_id = datetime.datetime.now().strftime('%d%m_%H%M_') experiment_name = 'ExpandnetL_psn_{}_bs{}_lr{}_exps{}'.format( experiment_id, batch_size, lr, expositions_num) dir_img = os.path.join(dataSets_dir, 'Org_images/') dir_compressions = os.path.join(dataSets_dir, 'c_images/') dir_mask = os.path.join(dataSets_dir, 'c_images/') #if tb: #dummy_input = torch.rand(1, 3, 128, 128) #writer.add_graph(net, (dummy_input,)) #writer.close() # === Load Training/Validation data ===================================================== ids = get_ids(dir_compressions) # Split into train test idsset = list(ids) kf = KFold(n_splits=5, shuffle=False) #print('Train splits: ',kf.get_n_splits(dataset)) best_psnr_m = 0 best_psnr_hvs = 0 #for train_index, test_index in kf.split(idsset): iddataset = split_train_val(idsset, expositions_num, val_percent) #test_set = [] #for im_id in test_index: # for e in range(expositions_num): # test_set.append(idsset[im_id]) N_train = len(iddataset['train']) N_val = len(iddataset['val']) N_test = 0 #len(test_set) #=====CHOOSE Loss Criterion============================================================= #criterion = nn.MSELoss(reduction='mean') criterion = ExpandNetLoss(loss_lambda=loss_lambda) optimizer = optim.Adagrad(net.parameters(), lr=lr, lr_decay=0.000001, weight_decay=w_decay) #optimizer = optim.SGD(net.parameters(), # lr=lr, # momentum=0.9, # weight_decay=0.0005) since = time.time() print(''' Training SETUP: Epochs: {0:} Batch size: {1:} Optimizer: Adagrad Learning rate: {2:} Weight decay: {3:} Training size: {4:} Validation size: {5:} Test size: {6:} Checkpoints: {7:} CUDA: {8:} '''.format(epochs, batch_size, lr, w_decay, N_train, N_val, N_test, str(save_cp), str(gpu))) train_dataset = HdrDataset(iddataset['train'], dir_compressions, dir_mask, expositions_num) val_dataset = HdrDataset(iddataset['val'], dir_compressions, dir_mask, expositions_num) #test_dataset = HdrDataset(test_set, dir_compressions, dir_mask,expositions_num) train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=False) val_data_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=False) #test_data_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=True) best_hvsm = 0.0 global_psnr_m = [] global_psnr_hvs = [] for epoch in range(epochs): print('\n') print('{}{}{}'.format('+', '=' * 78, '+')) print('| Starting epoch {}/{}. {}'.format(epoch + 1, epochs, (' ' * 57) + '|')) print('{}{}{}'.format('|', '-' * 78, '|')) begin_of_epoch = time.time() tot_steps = math.trunc(N_train / batch_size) net.train() train_loss = 0 losses = [] val_loss = 0 step = 0 train_sample = [] train_acc = 0 val_hvsm = 0 val_hvs = 0 model_pnsr_m = 0 for i, b in enumerate(train_data_loader): step += 1 imgs, true_masks, imgs_ids = b['input'], b['target'], b['id'] #print(i, b['input'].size(), b['target'].size()) #input: [15, 3, 224, 224]), target: [15, 3, 224, 224] #print('>>>>>>> Input max: ' , torch.max(imgs[0])) #print('>>>>>>> mask max : ', torch.max(true_masks[0])) if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() else: print(' GPU not available') # Predicted mask images optimizer.zero_grad() prediction = net(imgs) #prediction shape: [B, 3, 224, 224] #cost, cost_input_output = Hdr_loss(imgs, true_masks, prediction, sep_loss=False, gpu=gpu, tb=tb) cost = criterion(prediction, true_masks) #loss is torch tensor losses.append(cost.item()) train_loss = np.mean(losses) cost.backward() optimizer.step() if step == 1 or step % logg_freq == 0: #print('| Step: {0:}, cost:{1:}, Train Loss:{2:.9f}, Train Acc:{3:.9f}'.format(step,cost, train_loss,train_acc/step)) print('| Step: {0:}, cost:{1:}, Train Loss:{2:.9f}'.format( step, cost, train_loss)) #Last Step of this Epoch if step == math.trunc(tot_steps): num_in_batch = random.randrange(imgs.size(0)) train_sample_name = imgs_ids[num_in_batch] train_sample = [ imgs[num_in_batch], true_masks[num_in_batch], prediction[num_in_batch] ] t_exp_name = 'Train_' + experiment_name saveTocheckpoint(dir_checkpoints, t_exp_name, train_sample_name, epoch, train_sample[0], train_sample[1], train_sample[2]) if tb: print( '| saving train step {0:} sample : input,target & pred' .format(step)) grid = torchvision.utils.make_grid(train_sample, nrow=3) writer.add_image('train_sample', grid, 0) #if epoch == 1 or epoch % 15 == 0 or epoch == epochs: val_loss, val_hvsm, val_hvs = eval_hdr_net(net, dir_checkpoints, experiment_name, val_data_loader, criterion, epoch, gpu, batch_size, expositions_num=15, tb=tb) if tb: writer.add_scalar('training_loss: ', train_loss, epoch) writer.add_scalar('validation_loss', val_loss, epoch) writer.add_scalar('val_hvsm', val_hvsm, epoch) writer.add_scalar('val_hvs', val_hvs, epoch) writer.add_scalars('losses', { 'training_loss': train_loss, 'val_loss': val_loss }, epoch) if polyaxon: experiment.log_metrics(step=epoch, training_loss=train_loss, validation_loss=val_loss, val_hvsm=val_hvsm, val_hvs=val_hvs) print('{}{}{}'.format('+', '=' * 78, '+')) print('| {0:} Epoch {1:} finished ! {2:}|'.format( ' ' * 28, (epoch + 1), ' ' * 29)) print('{}{}{}'.format('+', '-' * 78, '+')) print('| Summary: Train Loss: {0:0.07}, Val Loss:{1:}'.format( train_loss, val_loss)) print('| Avrg psnr-hvs_m :{0:0.04},Avrg psnr-hvs :{1:0.04}'. format(val_hvsm, val_hvs)) time_epoch = time.time() - begin_of_epoch print('| Epoch ETC: {:.0f}m {:.0f}s'.format(time_epoch // 60, time_epoch % 60)) print('{}{}{}'.format('+', '=' * 78, '+')) if save_cp and (val_hvsm > best_hvsm): best_hvsm = val_hvsm model_path = os.path.join(dir_checkpoints, 'BestCP.pth') torch.save(net.state_dict(), model_path) print('Checkpoint saved !') global_psnr_hvs.append(val_hvs) global_psnr_m.append(val_hvsm) ''' test_psnr_m, test_psnr_hvs = test_hdr_net(model_path,dir_checkpoints, experiment_name, test_data_loader, criterion,gpu,tb) if save_cp and (test_psnr_m > best_psnr_m): best_psnr_m = test_psnr_m best_model_path = os.path.join(dir_checkpoints, 'Best_CP.pth') torch.save(net.state_dict(),best_model_path) print('Best model saved !') ''' print('>' * 80) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Final Average psnr-hvs_m: {:.0f}, psnr-hvs: {:.0f}'.format( np.mean(global_psnr_m), np.mean(global_psnr_hvs))) if tb: writer.close() if use_notifications: end_msg = "train.py finished at: {}(".format( str(datetime.datetime.now())) push = pb.push_note("usHDR: Finish", end_msg)
def main(): parser = argparse.ArgumentParser( description='Graph CNNs for population graphs: ' 'classification of the ABIDE dataset') parser.add_argument( '--dropout', default=0.3, type=float, help='Dropout rate (1 - keep probability) (default: 0.3)') parser.add_argument( '--decay', default=5e-4, type=float, help='Weight for L2 loss on embedding matrix (default: 5e-4)') parser.add_argument( '--hidden1', default=32, type=int, help='Number of filters in hidden layers (default: 16)') # parser.add_argument('--lrate', default=0.005, type=float, help='Initial learning rate (default: 0.005)') parser.add_argument('--lrate', default=1e-2, type=float, help='Initial learning rate (default: 0.005)') # parser.add_argument('--atlas', default='ho', help='atlas for network construction (node definition) (default: ho, ' # 'see preprocessed-connectomes-project.org/abide/Pipelines.html ' # 'for more options )') parser.add_argument('--epochs', default=100, type=int, help='Number of epochs to train') parser.add_argument('--num_features', default=2000, type=int, help='Number of features to keep for ' 'the feature selection step (default: 2000)') parser.add_argument('--num_training', default=1.0, type=float, help='Percentage of training set used for ' 'training (default: 1.0)') parser.add_argument('--depth', default=0, type=int, help='Number of additional hidden layers in the GCN. ' 'Total number of hidden layers: 1+depth (default: 0)') parser.add_argument('--model', default='gcn_cheby', help='gcn model used (default: gcn_cheby, ' 'uses chebyshev polynomials, ' 'options: gcn, gcn_cheby, dense )') # parser.add_argument('--seed', default=89, type=int, help='Seed for random initialisation (default: 123)') parser.add_argument( '--folds', default=11, type=int, help='For cross validation, specifies which fold will be ' 'used. All folds are used if set to 11 (default: 11)') parser.add_argument( '--save', default=200, type=int, help='Parameter that specifies if results have to be saved. ' 'Results will be saved if set to 1 (default: 1)') parser.add_argument('--connectivity', default='correlation', help='Type of connectivity used for network ' 'construction (default: correlation, ' 'options: correlation, partial correlation, ' 'tangent)') parser.add_argument('--train', default=1, type=int) args = parser.parse_args() start_time = time.time() # GCN Parameters params = dict() params['model'] = args.model # gcn model using chebyshev polynomials params['lrate'] = args.lrate # Initial learning rate params['epochs'] = args.epochs # Number of epochs to train params['dropout'] = args.dropout # Dropout rate (1 - keep probability) params['hidden1'] = args.hidden1 # Number of units in hidden layers params['decay'] = args.decay # Weight for L2 loss on embedding matrix params['early_stopping'] = params[ 'epochs'] # Tolerance for early stopping (# of epochs). No early stopping if set to param.epochs params['max_degree'] = 3 # Maximum Chebyshev polynomial degree. params[ 'depth'] = args.depth # number of additional hidden layers in the GCN. Total number of hidden layers: 1+depth # params['seed'] = args.seed # seed for random initialisation # GCN Parameters params[ 'num_features'] = args.num_features # number of features for feature selection step params[ 'num_training'] = args.num_training # percentage of training set used for training params[ 'train'] = args.train # percentage of training set used for training # atlas = args.atlas # atlas for network construction (node definition) # connectivity = args.connectivity # type of connectivity used for network construction # Get class labels # subject_IDs = Reader.get_ids() ################################################################## subject_IDs, shuffled_indices = Reader.get_ids() ################################################################## labels = Reader.get_labels(subject_IDs, score='DX_Group') # labels # Get acquisition site # ####### sites = Reader.get_subject_score(subject_IDs, score='SITE_ID') ########## unique = np.unique(list(sites.values())).tolist() num_classes = 2 # MDD or HC num_nodes = len(subject_IDs) # Initialise variables for class labels and acquisition sites y_data = np.zeros([num_nodes, num_classes]) y = np.zeros([num_nodes, 1]) ########## site = np.zeros([num_nodes, 1], dtype=np.int) # Get class labels and acquisition site for all subjects for i in range(num_nodes): y_data[i, int(labels[subject_IDs[i]]) - 1] = 1 y[i] = int(labels[subject_IDs[i]]) ########## site[i] = unique.index(sites[subject_IDs[i]]) import pickle # with open('./label.pkl', 'wb') as filehandle: # pickle.dump(np.argmax(y_data, axis=1), filehandle) # Compute feature vectors (vectorised connectivity networks) ####### Granger Causality Analysis # data_fld = './granger_casuality' # features = Reader.load_ec_GCA(subject_IDs, data_fld) ####### features = Reader.get_networks(subject_IDs, variable='correlation', isDynamic=False, isEffective=True) ############################################################ shuffled_features = features[shuffled_indices] features = shuffled_features.copy() ############################################################ # features = Reader.get_networks(subject_IDs, variable='graph_measure', isDynamic=True) # np.save('./MDD_dataset/features_GCA.npy', features) # np.save('./MDD_dataset/labels.npy', np.argmax(y_data, axis=1)) # Compute population graph using gender and acquisition site graph = Reader.create_affinity_graph_from_scores(['Age', 'Sex'], subject_IDs) # graph = Reader.create_affinity_graph_from_scores(['Sex'], subject_IDs) # Folds for cross validation experiments #num_samples = np.shape(features)[0] skf = StratifiedKFold(n_splits=10) #loo = LeaveOneOut() train_ind_set = [] test_ind_set = [] for train_ind, test_ind in reversed( list(skf.split(np.zeros(num_nodes), np.squeeze(y)))): train_ind_set.append(train_ind) test_ind_set.append(test_ind) cur_time = time.time() # import pickle # with open('./MDD_dataset/train_ind.pkl', 'wb') as filehandle: # pickle.dump(train_ind_set, filehandle) # with open('./MDD_dataset/test_ind.pkl', 'wb') as filehandle: # pickle.dump(test_ind_set, filehandle) if args.folds == 11: # run cross validation on all folds scores = Parallel(n_jobs=10)(delayed(train_fold)( cv, train_ind, test_ind, test_ind, graph, features, y, y_data, params, subject_IDs, cur_time) for train_ind, test_ind, cv in zip( train_ind_set, test_ind_set, range(10))) test_auc = [x[0] for x in scores] test_accuracy = [x[1] for x in scores] test_sensitivity = [x[2] for x in scores] test_specificity = [x[3] for x in scores] test_pred = [x[4] for x in scores] test_lab = [x[5] for x in scores] print('Accuracy : ' + str(np.mean(test_accuracy)) + ' + ' + str(np.std(test_accuracy))) print('Sensitivity : ' + str(np.mean(test_sensitivity)) + ' + ' + str(np.std(test_sensitivity))) print('Specificity : ' + str(np.mean(test_specificity)) + ' + ' + str(np.std(test_specificity))) print('AUC : ' + str(np.mean(test_auc)) + ' + ' + str(np.std(test_auc))) # np.savez('./statistical_test/FC_Lasso_MLP_pred.npz', pred=test_pred, allow_pickle=True) # np.savez('./statistical_test/FC_Lasso_MLP_lab.npz', lab=test_lab, allow_pickle=True) else: # compute results for only one fold cv_splits = list(skf.split(features, np.squeeze(y))) train = cv_splits[args.folds][0] test = cv_splits[args.folds][1] val = test scores_acc, scores_auc, scores_lin, scores_auc_lin, fold_size = train_fold( train, test, val, graph, features, y, y_data, params, subject_IDs, cur_time) print('overall linear accuracy %f' + str(np.sum(scores_lin) * 1. / fold_size)) print('overall linear AUC %f' + str(np.mean(scores_auc_lin))) print('overall accuracy %f' + str(np.sum(scores_acc) * 1. / fold_size)) print('overall AUC %f' + str(np.mean(scores_auc)))
def train(data_path, *, base_output_path="models", run_name=None, data_name=None, net_name="wave_net", clean=False, input_length=9, output_length=1, n_markers=60, stride=1, train_fraction=.85, val_fraction=0.15, only_moving_frames=False, n_filters=512, filter_width=2, layers_per_level=3, n_dilations=None, latent_dim=750, epochs=50, batch_size=1000, lossfunc='mean_squared_error', lr=1e-4, batches_per_epoch=0, val_batches_per_epoch=0, reduce_lr_factor=0.5, reduce_lr_patience=3, reduce_lr_min_delta=1e-5, reduce_lr_cooldown=0, reduce_lr_min_lr=1e-10, save_every_epoch=False): """Trains the network and saves the results to an output directory. :param data_path: Path to an HDF5 file with marker data. :param base_output_path: Path to folder in which the run data folder will be saved :param run_name: Name of the training run. If not specified, will be formatted according to other parameters. :param data_name: Name of the dataset for use in formatting run_name :param net_name: Name of the network for use in formatting run_name :param clean: If True, deletes the contents of the run output path :param input_length: Number of frames to input into model :param output_length: Number of frames model will attempt to predict :param n_markers: Number of markers to use :param stride: Downsampling rate of training set. :param train_fraction: Fraction of dataset to use as training :param val_fraction: Fraction of dataset to use as validation :param only_moving_frames: If True only use moving_frames. :param filter_width: Width of base convolution filter :param layers_per_level: Number of layers to use at each convolutional block :param n_dilations: Number of dilations for wavenet filters. (See models.wave_net) :param latent_dim: Number of latent dimensions (Currently just for LSTM) :param n_filters: Number of filters to use as baseline (see create_model) :param epochs: Number of epochs to train for :param batch_size: Number of samples per batch :param batches_per_epoch: Number of batches per epoch (validation is evaluated at the end of the epoch) :param val_batches_per_epoch: Number of batches for validation :param reduce_lr_factor: Factor to reduce the learning rate by (see ReduceLROnPlateau) :param reduce_lr_patience: How many epochs to wait before reduction (see ReduceLROnPlateau) :param reduce_lr_min_delta: Minimum change in error required before reducing LR (see ReduceLROnPlateau) :param reduce_lr_cooldown: How many epochs to wait after reduction before LR can be reduced again (see ReduceLROnPlateau) :param reduce_lr_min_lr: Minimum that the LR can be reduced down to (see ReduceLROnPlateau) :param save_every_epoch: Save weights at every epoch. If False, saves only initial, final and best weights. """ # Set the n_dilations param if n_dilations is None: n_dilations = np.int32(np.floor(np.log2(input_length))) else: n_dilations = int(n_dilations) # Load Data print('Loading Data') markers, marker_means, marker_stds, bad_frames, moving_frames = \ load_dataset(data_path) moving_frames = np.squeeze(moving_frames > 0) if only_moving_frames: markers = markers[moving_frames, :] bad_frames = bad_frames[moving_frames, :] markers = markers[::stride, :] bad_frames = bad_frames[::stride, :] # Get Ids print('Getting indices') [input_ids, target_ids] = get_ids(bad_frames, input_length, output_length, True, True) # Get the training, testing, and validation trajectories by indexing into # the marker arrays n_train = np.int32(np.round(input_ids.shape[0]*train_fraction)) n_val = np.int32(np.round(input_ids.shape[0]*val_fraction)) X = markers[input_ids[:n_train, :], :] Y = markers[target_ids[:n_train, :], :] val_X = markers[input_ids[n_train:(n_train+n_val), :], :] val_Y = markers[target_ids[n_train:(n_train+n_val), :], :] test_X = markers[input_ids[(n_train+n_val):, :], :] test_Y = markers[target_ids[(n_train+n_val):, :], :] # Create network print('Compiling network') if isinstance(net_name, keras.models.Model): model = net_name net_name = model.name elif net_name == 'wave_net': model = create_model(net_name, lossfunc=lossfunc, lr=lr, input_length=input_length, output_length=output_length, n_markers=n_markers, n_filters=n_filters, filter_width=filter_width, layers_per_level=layers_per_level, n_dilations=n_dilations, print_summary=False) elif net_name == 'lstm_model': model = create_model(net_name, lossfunc=lossfunc, lr=lr, input_length=input_length, n_markers=n_markers, latent_dim=latent_dim, print_summary=False) elif net_name == 'wave_net_res_skip': model = create_model(net_name, lossfunc=lossfunc, lr=lr, input_length=input_length, n_markers=n_markers, n_filters=n_filters, filter_width=filter_width, layers_per_level=layers_per_level, n_dilations=n_dilations, print_summary=True) if model is None: print("Could not find model:", net_name) return # Build run name if needed if data_name is None: data_name = os.path.splitext(os.path.basename(data_path))[0] if run_name is None: run_name = "%s-%s_epochs=%d_input_%d_output_%d" \ % (data_name, net_name, epochs, input_length, output_length) print("data_name:", data_name) print("run_name:", run_name) # Initialize run directories print('Building run folders') run_path = create_run_folders(run_name, base_path=base_output_path, clean=clean) # Save the training information in a mat file. print('Saving training info') savemat(os.path.join(run_path, "training_info.mat"), {"data_path": data_path, "base_output_path": base_output_path, "run_name": run_name, "data_name": data_name, "net_name": net_name, "clean": clean, "stride": stride, "input_length": input_length, "output_length": output_length, "n_filters": n_filters, "n_markers": n_markers, "epochs": epochs, "batch_size": batch_size, "train_fraction": train_fraction, "val_fraction": val_fraction, "only_moving_frames": only_moving_frames, "filter_width": filter_width, "layers_per_level": layers_per_level, "n_dilations": n_dilations, "batches_per_epoch": batches_per_epoch, "val_batches_per_epoch": val_batches_per_epoch, "reduce_lr_factor": reduce_lr_factor, "reduce_lr_patience": reduce_lr_patience, "reduce_lr_min_delta": reduce_lr_min_delta, "reduce_lr_cooldown": reduce_lr_cooldown, "reduce_lr_min_lr": reduce_lr_min_lr, "save_every_epoch": save_every_epoch}) # Save initial network print('Saving initial network') model.save(os.path.join(run_path, "initial_model.h5")) # Initialize training callbacks history_callback = LossHistory(run_path=run_path) reduce_lr_callback = ReduceLROnPlateau(monitor="val_loss", factor=reduce_lr_factor, patience=reduce_lr_patience, verbose=1, mode="auto", epsilon=reduce_lr_min_delta, cooldown=reduce_lr_cooldown, min_lr=reduce_lr_min_lr) if save_every_epoch: save_string = "weights/weights.{epoch:03d}-{val_loss:.9f}.h5" checkpointer = ModelCheckpoint(filepath=os.path.join(run_path, save_string), verbose=1, save_best_only=False) else: checkpointer = ModelCheckpoint(filepath=os.path.join(run_path, "best_model.h5"), verbose=1, save_best_only=True) # Train! print('Training') t0_train = time() training = model.fit(X, Y, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(val_X, val_Y), callbacks=[history_callback, checkpointer, reduce_lr_callback]) # Compute total elapsed time for training elapsed_train = time() - t0_train print("Total runtime: %.1f mins" % (elapsed_train / 60)) # Save final model print('Saving final model') model.history = history_callback.history model.save(os.path.join(run_path, "final_model.h5"))
def train_net( net, epochs=5, batch_size=1, lr=0.1, val_percent=0.05, # 训练集:验证集= 0.95: 0.05 save_cp=True, gpu=False, img_scale=0.5): dir_img = opt_train.dir_img dir_mask = opt_train.dir_mask dir_checkpoint = opt_train.dir_checkpoint # 得到 图片路径列表 ids为 图片名称(无后缀名) ids = get_ids(dir_img) # 得到truple元组 (无后缀名的 图片名称,序号) # eg:当n为2 图片名称为bobo.jpg 时, 得到(bobo,0) (bobo,1) # 当序号为0 时,裁剪宽度,得到左边部分图片 当序号为1 时,裁剪宽度,得到右边部分图片 ids = split_ids(ids) # 打乱数据集后,按照val_percent的比例来 切分 训练集 和 验证集 iddataset = split_train_val(ids, val_percent) print(''' 开始训练: Epochs: {} Batch size: {} Learning rate: {} 训练集大小: {} 验证集大小: {} GPU: {} '''.format(epochs, batch_size, lr, len(iddataset['train']), len(iddataset['val']), str(gpu))) #训练集大小 N_train = len(iddataset['train']) optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=0.0005) #二进制交叉熵 criterion = nn.BCELoss() for epoch in range(epochs): print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) # reset the generators # 每轮epoch得到 训练集 和 验证集 train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale) val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale) # 重置epoch损失计数器 epoch_loss = 0 for i, b in enumerate(batch(train, batch_size)): # 得到 一个batch的 imgs tensor 及 对应真实mask值 # 当序号为0 时,裁剪宽度,得到左边部分图片[384,384,3] 当序号为1 时,裁剪宽度,得到右边部分图片[384,190,3] imgs = np.array([i[0] for i in b]).astype(np.float32) true_masks = np.array([i[1] for i in b]) # 将值转为 torch tensor imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) # 训练数据转到GPU上 if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() # 得到 网络输出的预测mask [10,1,384,384] masks_pred = net(imgs) # 经过sigmoid masks_probs = F.sigmoid(masks_pred) masks_probs_flat = masks_probs.view(-1) true_masks_flat = true_masks.view(-1) # 计算二进制交叉熵损失 loss = criterion(masks_probs_flat, true_masks_flat) # 统计一个epoch的所有batch的loss之和,用以计算 一个epoch的 loss均值 epoch_loss += loss.item() # 输出 当前epoch的第几个batch 及 当前batch的loss print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item())) # 优化器梯度清零 optimizer.zero_grad() # 反向传播 loss.backward() # 更新参数 optimizer.step() # 一轮epoch结束,该轮epoch的 loss均值 print('Epoch finished ! Loss: {}'.format(epoch_loss / i)) # 每轮epoch之后使用验证集进行评价 if True: # 评价函数:Dice系数 Dice距离用于度量两个集合的相似性 val_dice = eval_net(net, val, gpu) print('Validation Dice Coeff: {}'.format(val_dice)) # 保存模型 if save_cp: torch.save(net.state_dict(), dir_checkpoint + 'CP{}.pth'.format(epoch + 1)) print('Checkpoint {} saved !'.format(epoch + 1))
def train_net(net, epochs=5, batch_size=1, lr=0.1, val_percent=0.05, save_cp=True, gpu=False, img_scale=0.5): dir_img = 'data/train/' # 训练图像文件夹 dir_mask = 'data/train_masks/' # 图像的结果文件夹 dir_checkpoint = 'checkpoints/' # 训练好的网络保存文件夹 ids = get_ids(dir_img) # 图片名字的后4位为数字,能作为图片id # 得到元祖列表为[(id1,0),(id1,1),(id2,0),(id2,1),...,(idn,0),(idn,1)] # 这样的作用是后面重新设置生成器时会通过后面的0,1作为utils.py中get_square函数的pos参数,pos=0的取左边的部分,pos=1的取右边的部分 # 这样图片的数量就会变成2倍 ids = split_ids(ids) iddataset = split_train_val(ids, val_percent) # 将数据分为训练集和验证集两份 print(''' Starting training: Epochs: {} Batch size: {} Learning rate: {} Training size: {} Validation size: {} Checkpoints: {} CUDA: {} '''.format(epochs, batch_size, lr, len(iddataset['train']), len(iddataset['val']), str(save_cp), str(gpu))) N_train = len(iddataset['train']) # 训练集长度 optimizer = optim.SGD( net.parameters(), # 定义优化器 lr=lr, momentum=0.9, weight_decay=0.0005) criterion = nn.BCELoss() # 损失函数 for epoch in range(epochs): # 开始训练 print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) net.train() # 设置为训练模式 # reset the generators重新设置生成器 # 对输入图片dir_img和结果图片dir_mask进行相同的图片处理,即缩小、裁剪、转置、归一化后,将两个结合在一起,返回(imgs_normalized, masks) train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale) val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale) epoch_loss = 0 for i, b in enumerate(batch(train, batch_size)): imgs = np.array([i[0] for i in b]).astype(np.float32) # 得到输入图像数据 true_masks = np.array([i[1] for i in b]) # 得到图像结果数据 imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() masks_pred = net(imgs) # 图像输入的网络后得到结果masks_pred,结果为灰度图像 masks_probs_flat = masks_pred.view(-1) # 将结果压扁 true_masks_flat = true_masks.view(-1) loss = criterion(masks_probs_flat, true_masks_flat) # 对两个结果计算损失 epoch_loss += loss.item() print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch finished ! Loss: {}'.format(epoch_loss / i)) # 一次迭代后得到的平均损失 if 1: val_dice = eval_net(net, val, gpu) print('Validation Dice Coeff: {}'.format(val_dice)) if save_cp: torch.save(net.state_dict(), dir_checkpoint + 'CP{}.pth'.format(epoch + 1)) print('Checkpoint {} saved !'.format(epoch + 1))
if score_tup[1] >= min_games } def get_scores_with_freq(ids, min_games=0): return { player: score_tup for player, score_tup in get_weighted_scores(ids).iteritems() if score_tup[1] >= min_games } if __name__=='__main__': parser = argparse.ArgumentParser() parser.add_argument('-w', '--weighted', action='store_const', const=True) parser.add_argument('-d', '--deltas', action='store_const', const=True) parser.add_argument('-l', '--limit', type=int) parser.add_argument('-s', '--salaries', type=str) parser.add_argument('-v', '--verbose', action='store_const', const=True) args = parser.parse_args() ids = get_ids(args.limit) if args.limit else get_ids() print 'Computing results with contests: %s' % ', '.join(ids) if args.weighted: if args.limit: run_weighted(ids[-args.limit:], args.salaries, args.verbose) else: run_weighted(ids, args.salaries, args.verbose) if args.deltas: limit = args.limit if args.limit else 7 run_deltas(ids[-limit+2:], ids[-limit:-2], args.salaries, args.verbose)
def train_net(net, epochs=20, batch_size=1, lr=0.1, lrd=0.99, val_percent=0.05, save_cp=True, gpu=True, img_scale=0.5, imagepath='', maskpath='', cpsavepath=''): dir_img = imagepath dir_mask = maskpath dir_checkpoint = cpsavepath classweight = [1, 2, 3, 2] ids = get_ids(dir_img) ids = split_ids(ids) iddataset = split_train_val(ids, val_percent) logname = cpsavepath + '/' + 'losslog.txt' print(''' Starting training: Epochs: {} Batch size: {} Learning rate: {} Training size: {} Validation size: {} Checkpoints: {} CUDA: {} '''.format(epochs, batch_size, lr, len(iddataset['train']), len(iddataset['val']), str(save_cp), str(gpu))) N_train = len(iddataset['train']) optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=0.0005) # classweight = [1,4,8,4] criterion = BCELoss_weight(classweight) for epoch in range(epochs): print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) with open(logname, "a") as f: f.write('Starting epoch {}/{}.'.format(epoch + 1, epochs) + "\n") net.train() # reset the generators train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale) val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale) epoch_loss = 0 lr = lr * lrd for param_group in optimizer.param_groups: param_group['lr'] = lr print('lr', lr) for i, b in enumerate(batch(train, batch_size)): imgs = np.array([i[0] for i in b]).astype(np.float32) true_masks = np.array([i[1] for i in b]) true_masks = np.transpose(true_masks, axes=[0, 3, 1, 2]) imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() masks_pred = net(imgs) # print('masks_pred.shape',masks_pred.shape) # print('true_masks.shape', true_masks.shape) masks_probs_flat = masks_pred true_masks_flat = true_masks loss = criterion(masks_probs_flat, true_masks_flat) epoch_loss += loss.item() printinfo = '{0:.4f} --- loss: {1:.6f}'.format( i * batch_size / N_train, loss.item()) print(printinfo) with open(logname, "a") as f: f.write(printinfo + "\n") optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch finished ! Loss: {}'.format(epoch_loss / i)) with open(logname, "a") as f: f.write('Epoch finished ! Loss: {}'.format(epoch_loss / i) + "\n") if 1: val_dice = eval_net(net, val) print('Validation Dice Coeff: {}'.format(val_dice)) with open(logname, "a") as f: f.write('Validation Dice Coeff: {}'.format(val_dice) + "\n") if save_cp: torch.save(net.state_dict(), dir_checkpoint + 'CP{}.pth'.format(epoch + 1)) print('Checkpoint {} saved !'.format(epoch + 1)) with open(logname, "a") as f: f.write('Checkpoint {} saved !'.format(epoch + 1) + "\n")
def train_net(net, epochs=5, batch_size=1, lr=0.1, val_percent=0.05, save_cp=True, gpu=False, img_scale=0.5): dir_img = '../dataset/train/images/' dir_mask = '../dataset/train/masks/' dir_checkpoint = 'checkpoints/' ids = get_ids(dir_img) ids = split_ids(ids) iddataset = split_train_val(ids, val_percent) print((''' Starting training: Epochs: {} Batch size: {} Learning rate: {} Training size: {} Validation size: {} Checkpoints: {} CUDA: {} '''.format(epochs, batch_size, lr, len(iddataset['train']), len(iddataset['val']), str(save_cp), str(gpu)))) N_train = len(iddataset['train']) optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=0.0005) criterion = nn.BCELoss() for epoch in range(epochs): print(('Starting epoch {}/{}.'.format(epoch + 1, epochs))) net.train() # reset the generators train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale) val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale) epoch_loss = 0 for i, b in enumerate(batch(train, batch_size)): # b[batch_id][0/1]: a batch of image(0)+mask(1) #print(('b[0]',b[0][0].shape,b[0][1].shape)) #imgs = [] #for img_msk in b: # imgs.append(img_msk[0]) #print(len(imgs)) #imgs = np.array(imgs) # Wrong: not all images are of the same shape imgs = np.array([i[0] for i in b]).astype(np.float32) true_masks = np.array([i[1] for i in b]) imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() masks_pred = net(imgs) masks_probs_flat = masks_pred.view(-1) true_masks_flat = true_masks.view(-1) loss = criterion(masks_probs_flat, true_masks_flat) epoch_loss += loss.item() print(('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item()))) optimizer.zero_grad() loss.backward() optimizer.step() print(('Epoch finished ! Loss: {}'.format(epoch_loss / i))) if 1: val_dice = eval_net(net, val, gpu) print(('Validation Dice Coeff: {}'.format(val_dice))) if save_cp: torch.save(net.state_dict(), dir_checkpoint + 'cropped_CP{}.pth'.format(epoch + 1)) print(('Checkpoint {} saved !'.format(epoch + 1)))
if __name__ == "__main__": args = parser.parse_args() seed = args.seed bs = args.bs lr = args.lr width = args.width depth = args.depth epochs = args.epochs verbose = args.verbose # set random seed set_random_seed(seed) # define the device for training device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # get training ids train_ids, valid_ids, test_ids = get_ids(65) # define dataloaders if args.dynamic: train_data = TrainDataset(train_ids) train_iter = DataLoader(train_data, batch_size=bs, num_workers=6, sampler=LoopSampler) else: train_data = StaticTrainDataset(train_ids) train_iter = DataLoader(train_data, batch_size=bs, num_workers=6, shuffle=True) train_tdata = TestDataset(train_ids)
def train_net(net, device, epochs=5, batch_size=1, lr=0.1, val_percent=0.15, save_cp=True, img_scale=0.5): ids = get_ids(dir_img) iddataset = split_train_val(ids, val_percent) n_train = len(iddataset['train']) n_val = len(iddataset['val']) optimizer = optim.Adam(net.parameters(), lr=lr) if net.n_classes > 1: criterion = nn.CrossEntropyLoss() else: criterion = nn.BCEWithLogitsLoss() for epoch in range(epochs): net.train() # reset the generators train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale) val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale) epoch_loss = 0 with tqdm(total=n_train, desc='Epoch {0}/{1}'.format(epoch + 1, epochs), unit='img') as pbar: for i, b in enumerate(batch(train, batch_size)): current_lr = adjust_learning_rate(optimizer, epoch, epochs, pbar.n, n_train) random_rate = 0 if epoch > epochs / 2: random_rate = (epoch * 0.1) / epochs b = custom_transforms.random_data_augmentation( b, random_rate=random_rate) imgs = np.array([i[0] for i in b]).astype(np.float32) true_masks = np.array([i[1][0] for i in b]) imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) imgs = imgs.to(device=device) true_masks = true_masks.to(device=device) masks_pred = net(imgs) loss = criterion(masks_pred, true_masks.long()) epoch_loss += loss.item() pbar.set_postfix( **{ 'lr:{0}, random_rate:{1}, loss:'.format( current_lr, random_rate): loss.item() }) optimizer.zero_grad() loss.backward() optimizer.step() pbar.update(batch_size) if save_cp: try: os.mkdir(dir_checkpoint) logging.info('Created checkpoint directory') except OSError: pass torch.save(net.state_dict(), dir_checkpoint + 'CP_epoch{0}.pth'.format(epoch + 1)) logging.info('Checkpoint {0} saved !'.format(epoch + 1)) val_score = eval_net(net, val, device, n_val) if net.n_classes > 1: logging.info('Validation cross entropy: {0}'.format(val_score)) else: logging.info('Validation Dice Coeff: {0}'.format(val_score))
action='store_true', default=True, help='whether to save checkpoint') (options, args) = parser.parse_args() return options if __name__ == '__main__': args = get_args() # dir_img = '/home/zzh/数据/mid project/raw_data' # dir_mask = '/home/zzh/数据/mid project/groundtruth' dir_img = '/home/zhuzhu/Desktop/mid project/raw_data' dir_mask = '/home/zhuzhu/Desktop/mid project/groundtruth' ids = get_ids(dir_img) # 1,2,3,...的生成器 iddataset = split_train_val( ids, args.val_percent) # {'train':[23,98,59,...],'val':[12,37,48,...]} net = UNet(n_channels=1, n_classes=args.num_classes) optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-3) scheduler = StepLR(optimizer, step_size=30, gamma=0.1) criterion = nn.BCEWithLogitsLoss() if args.load: print('load model from checkpoint') net.load_state_dict(torch.load('checkpoint/unet.pth'))
def train_net(net, device, epochs=5, batch_size=1, lr=0.1, val_percent=0.15, save_cp=True, img_scale=0.5): ids = get_ids(dir_img) iddataset = split_train_val(ids, val_percent) logging.info('''Starting training: Epochs: {epochs} Batch size: {batch_size} Learning rate: {lr} Training size: {len(iddataset["train"])} Validation size: {len(iddataset["val"])} Checkpoints: {save_cp} Device: {device.type} Images scaling: {img_scale} ''') n_train = len(iddataset['train']) n_val = len(iddataset['val']) optimizer = optim.Adam(net.parameters(), lr=lr) if net.n_classes > 1: criterion = nn.CrossEntropyLoss() else: criterion = nn.BCEWithLogitsLoss() for epoch in range(epochs): net.train() # reset the generators train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale) val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale) epoch_loss = 0 f1_score = 0 num = 0 with tqdm(total=n_train, desc='Epoch {epoch + 1}/{epochs}', unit='img') as pbar: for i, b in enumerate(batch(train, batch_size)): imgs = np.array([i[0] for i in b]).astype(np.float32) true_masks = np.array([i[1] for i in b]) imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) imgs = imgs.to(device=device) true_masks = true_masks.to(device=device) masks_pred = net(imgs) # print('mask:',masks_pred.size()) # print('lab:',true_masks.size()) loss = criterion(masks_pred, true_masks) masks_pred_np = masks_pred.detach().cpu().numpy() true_masks_np = true_masks.detach().cpu().numpy() epoch_loss += loss.item() # print("----------------------------------------") # print('masks_pred',type(masks_pred),masks_pred,'\n') # print('true_masks',type(true_masks),true_masks,'\n') # print('mask:',masks_pred.size(),'\n') # print('lab:',true_masks.size(),'\n') pre_2D = np.array(masks_pred_np[0][0]) true_2D = np.array(true_masks_np[0][0]) pre_2D_threhold = pre_2D pre_2D_threhold[pre_2D_threhold > 0.5] = 1 pre_2D_threhold[pre_2D_threhold <= 0.5] = 0 # print("pre_2D.shape",pre_2D.shape,'\n') # print("true_2D.shape" ,true_2D.shape,'\n') # print("true_2D.flatten()",true_2D.flatten(),'\n') # print("pre_2D.flatten()",pre_2D.flatten(),'\n') pixel_accuracy = (pre_2D, true_2D) f1_score += metrics.f1_score(true_2D.flatten(), pre_2D_threhold.flatten()) num = num + 1 # print("----------------------------------------") # val_score1 = eval_net1(net,val,device,n_val) pbar.set_postfix(**{'loss (batch)': loss.item()}) optimizer.zero_grad() loss.backward() optimizer.step() pbar.update(batch_size) if save_cp: try: os.mkdir(dir_checkpoint) logging.info('Created checkpoint directory') except OSError: pass torch.save(net.state_dict(), dir_checkpoint + 'CP_epoch{epoch + 1}.pth') logging.info('Checkpoint {epoch + 1} saved !') val_score = eval_net(net, val, device, n_val) f1_score /= num print("f1-score:", f1_score, '\n') if net.n_classes > 1: logging.info('Validation cross entropy: {}'.format(val_score)) else: logging.info('Validation Dice Coeff: {}'.format(val_score))
pre_list_all = [] re_list_all = [] f1_list_all = [] filepath = './bugreports_sds/' step = 6 bc = BertClient() sentences = [] vectors = [] for i in range(36): report_sent = [] with open(filepath + str(i + 1) + '.txt', "r", encoding='utf-8') as f: for line in f.readlines(): report_sent.append(line.strip('\n')) sentences.append(report_sent) labels_ids = read_label('./data/goldset_sds.txt') ids = get_ids() labels = [] for index, id_list in enumerate(ids): label = [] for id in id_list: if id in labels_ids[index]: label.append(1) else: label.append(0) labels.append(label) sentences, labels = clear_data(sentences, labels) for i in range(0, 36, step): # model = create_classify_dense(EMBEDDING_DIM) model = create_classify_lstm_att(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) # model = create_classify_textcnn(EMBEDDING_DIM)
def train_net(net, epochs=30, batch_size=6, lr=0.1, val_percent=0.05, save_cp=True, gpu=False, img_scale=0.5): # dir_img = 'E:/A_paper_thesis/paper5/tensorflow_deeplabv3plus_scrapingData/dataset/Scraping_Data2/train_db' dir_img = 'data/train_db/' dir_mask = 'data/GT_bw/' dir_checkpoint = 'checkpoint0919/' ids = get_ids(dir_img) ids = split_ids(ids) iddataset = split_train_val(ids, val_percent) print(''' Starting training: Epochs: {} Batch size: {} Learning rate: {} Training size: {} Validation size: {} Checkpoints: {} CUDA: {} '''.format(epochs, batch_size, lr, len(iddataset['train']), len(iddataset['val']), str(save_cp), str(gpu))) N_train = len(iddataset['train']) optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=0.0005) criterion = nn.BCELoss() for epoch in range(epochs): print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) net.train() # reset the generators train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale) val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale) epoch_loss = 0 epoch_iou = 0 epoch_xor=0 for i, b in enumerate(batch(train, batch_size)): imgs = np.array([i[0] for i in b]).astype(np.float32) true_masks = np.array([i[1] for i in b]) imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() masks_pred = net(imgs) masks_probs_flat = masks_pred.view(-1) true_masks_flat = true_masks.view(-1) loss = criterion(masks_probs_flat, true_masks_flat) epoch_loss += loss.item() print('step:', i) # print('Validation Dice Coeff: {0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item())) print('Validation Dice Coeff: {0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item())) # # mean iou # intersect = sum(masks_probs_flat*true_masks_flat) # union = sum(masks_probs_flat+true_masks_flat) # iou = (intersect+0.001)/(union-intersect+0.001) # epoch_iou +=iou # mean iou smooth = 1e-6 # we smooth to avoid our devision 0/0 intersect = sum(masks_probs_flat*true_masks_flat) union = sum(masks_probs_flat+true_masks_flat)-intersect iou = (intersect+smooth)/(union+smooth) epoch_iou +=iou # calculate xor # xor quation is: xor = (union(output hợp ground truth) - intersect(output giao ground truth))/ ground truth # xor = (union-intersect)/ground truth xor = (union - intersect)/sum(true_masks_flat) epoch_xor += xor print('mean IoU: {:.4f}'.format(iou)) # print('mean IoU1: {:.4f}'.format(iou1)) print('mean xor: {:.4f}'.format(xor)) # end of mean iou optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch finished ! epoch_Loss: {:.6f}'.format(epoch_loss / i)) print('epoch_iou: {:.4f}'.format(epoch_iou / i)) print('epoch_xor: {:.4f}'.format(epoch_xor / i)) if 1: val_dice = eval_net(net, val, gpu) print('epoch_Validation Dice Coeff: {:.4f}'.format(val_dice)) # need to write mean iou of evaluate here(reference val_dice) if save_cp: torch.save(net.state_dict(), dir_checkpoint + 'CP{}.pth'.format(epoch + 1)) print('Checkpoint {} saved !'.format(epoch + 1))
def preprocess(file, BATCH_SIZE, max_length, tokenizer): train_dataset = [] input_vocab_size = len(tokenizer.vocab) f = open(file, 'r') words = f.read() words = words.replace('\n\n', '.') words = words.replace('\n', ' ') words = re.split('[;:.!?]', words) i = 0 for _ in range(len(words) // BATCH_SIZE + 1): if i + 1 >= len(words): break input_ids_list = [] segment_list = [] is_masked_list = [] is_next_list = [] for j in range(BATCH_SIZE): if i + 1 >= len(words): break now = int( random.random() > 0.5 ) # decide if the 2nd sentence has to be next sentence or not if now == 1: res = ["[CLS]"] + tokenizer.tokenize(words[i]) + [ "[SEP]" ] + tokenizer.tokenize(words[i + 1]) + ["[SEP]"] else: res = ["[CLS]"] + tokenizer.tokenize( words[i]) + ["[SEP]"] + tokenizer.tokenize( words[random.randint(0, len(words) - 1)]) + ["[SEP]"] input_ids = get_ids(res, tokenizer, max_length) segment_list.append(get_segments(res, max_length)) is_next_list.append(now) is_masked = [0] * max_length for ind in range(max_length): if input_ids[ind] == 0: # is padding token appears, then break break if input_ids[ind] == 101 or input_ids[ ind] == 102: # don't mask [CLS] and [SEP] tokens continue if random.random() < 0.15: # mask 15% of tokens is_masked[ind] = input_ids[ind] if random.random() < 0.8: # out of 15%, mask 80% input_ids[ind] = 103 elif random.random( ) < 0.5: # replace 10% with random token input_ids[ind] = random.randint(1000, input_vocab_size) #in the remaining tokens, keep the same token input_ids_list.append(input_ids) is_masked_list.append(is_masked) if now == 1: i += 2 else: i += 1 input_ids_list = np.array(input_ids_list) is_masked_list = np.array(is_masked_list) masks = create_padding_mask(input_ids_list) segment_list = np.array(segment_list) is_next_list = np.array(is_next_list) is_next_list = np.reshape(is_next_list, (len(is_next_list), 1)) train_dataset.append([ input_ids_list, segment_list, masks, is_next_list, is_masked_list ]) return train_dataset
def train_net( net, epochs=5, batch_size=1, lr=0.1, #val_percent=0.1, save_cp=True, gpu=False, img_scale=0.5): img_train = '/home/lixiaoxing/github/Pytorch-UNet/data/DRIVE/AV_groundTruth/training/images_jpg/' mask_train = '/home/lixiaoxing/github/Pytorch-UNet/data/DRIVE/AV_groundTruth/training/vessel/' img_val = '/home/lixiaoxing/github/Pytorch-UNet/data/DRIVE/AV_groundTruth/training/val_jpg/' mask_val = '/home/lixiaoxing/github/Pytorch-UNet/data/DRIVE/AV_groundTruth/training/val_vessel/' dir_checkpoint = 'checkpoints_drive3_adam/' if os.path.exists(dir_checkpoint) is False: os.makedirs(dir_checkpoint) ids_train = get_ids(img_train) data_train = split_ids(ids_train) data_train = list(data_train) ids_val = get_ids(img_val) data_val = split_ids(ids_val) data_val = list(data_val) #iddataset = split_train_val(ids, val_percent) print(''' Starting training: Epochs: {} Batch size: {} Learning rate: {} Training size: {} Validation size: {} Checkpoints: {} CUDA: {} '''.format(epochs, batch_size, lr, len(data_train), len(data_val), str(save_cp), str(gpu))) N_train = len(data_train) #optimizer = optim.SGD(net.parameters(), # lr=lr, # momentum=0.9, # weight_decay=0.0005) optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=1e-5) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, mode='min', patience=3, verbose=True) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1) criterion = nn.BCELoss() #criterion = DiceCoeff() for epoch in range(epochs): print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) net.train() # reset the generators train = get_imgs_and_masks_y(data_train, img_train, mask_train, img_scale) val = get_imgs_and_masks_y(data_val, img_val, mask_val, img_scale) epoch_loss = 0 for i, b in enumerate(batch(train, batch_size)): imgs = np.array([i[0] for i in b]).astype(np.float32) true_masks = np.array([i[1] for i in b]) imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() masks_pred = net(imgs) masks_probs_flat = masks_pred.view(-1) true_masks_flat = true_masks.view(-1) #print(masks_pred.shape, true_masks.shape) loss = criterion(masks_probs_flat, true_masks_flat) epoch_loss += loss.item() #print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch finished ! Loss: {}'.format(epoch_loss / i)) if 1: val_dice = eval_net(net, val, gpu) print('Validation Dice Coeff: {}'.format(val_dice)) if save_cp: torch.save(net.state_dict(), dir_checkpoint + 'CP{}.pth'.format(epoch + 1)) print('Checkpoint {} saved !'.format(epoch + 1)) scheduler.step(val_dice)
# # plt.plot(hist_img, 'k-') # plt.plot(hist_coal, 'r-') # plt.plot(hist_gangue, 'g-') # plt.xlim([0, 256]) # plt.show() from sklearn.metrics import precision_recall_curve, roc_curve, classification_report from utils import get_imgs_and_masks, get_ids, split_train_val from unet import UNet import torch ori_w, ori_h = 852, 480 dir_img = '/home/zhuzhu/Desktop/mid project/raw_data' dir_mask = '/home/zhuzhu/Desktop/mid project/groundtruth' ids = get_ids(dir_img) iddataset = split_train_val(ids, 0.05) net = UNet(1, 2) net.eval() net.load_state_dict( torch.load( '/media/zhuzhu/0C5809B80C5809B8/draft/unet/checkpoint/unet_0.854608765.pth', map_location='cpu')) val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask) c = 0 for i, b in enumerate(val): img = np.array(b[0]).astype(np.float32) mask = np.array(b[1]).astype(np.float32)
def train_net(args, net, val_percent=0.05, save_cp=True): dir_img = os.path.join(args.dataset_folder, 'data/train/') dir_mask = os.path.join(args.dataset_folder, 'data/train_masks/') dir_checkpoint = os.path.join(args.dataset_folder, 'checkpoints/') if not os.path.exists(dir_checkpoint): os.makedirs(dir_checkpoint) ids = get_ids(dir_img) ids = split_ids(ids) iddataset = split_train_val(ids, val_percent) print(''' Starting training: Epochs: {} Batch size: {} Learning rate: {} Training size: {} Validation size: {} Checkpoints: {} '''.format(args.epochs, args.batch_size, args.lr, len(iddataset['train']), len(iddataset['val']), str(save_cp))) N_train = len(iddataset['train']) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0005) criterion = nn.BCELoss() for epoch in range(args.epochs): print('Starting epoch {}/{}.'.format(args.epochs + 1, args.epochs)) net.train() # reset the generators train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, args.img_scale) val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, args.img_scale) epoch_loss = 0 for i, b in enumerate(batch(train, args.batch_size)): imgs = np.array([i[0] for i in b]).astype(np.float32) true_masks = np.array([i[1] for i in b]) imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) # if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() masks_pred = net(imgs) masks_probs_flat = masks_pred.view(-1) true_masks_flat = true_masks.view(-1) loss = criterion(masks_probs_flat, true_masks_flat) epoch_loss += loss.item() print('{0:.4f} --- loss: {1:.6f}'.format( i * args.batch_size / N_train, loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch finished ! Loss: {}'.format(epoch_loss / i)) if 1: val_dice = eval_net(net, val) print('Validation Dice Coeff: {}'.format(val_dice)) if save_cp: torch.save(net.state_dict(), dir_checkpoint + 'CP{}.pth'.format(epoch + 1)) print('Checkpoint {} saved !'.format(epoch + 1))
def train_net(net, device, epochs=5, batch_size=1, lr=0.1, val_percent=0.15, save_cp=True, img_scale=0.5): ids = get_ids(dir_img) iddataset = split_train_val(ids, val_percent) logging.info('''Starting training: Epochs: {epochs} Batch size: {batch_size} Learning rate: {lr} Training size: {len(iddataset["train"])} Validation size: {len(iddataset["val"])} Checkpoints: {save_cp} Device: {device.type} Images scaling: {img_scale} ''') n_train = len(iddataset['train']) n_val = len(iddataset['val']) optimizer = optim.Adam(net.parameters(), lr=lr) #optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.75) criterion = nn.BCELoss() for epoch in range(epochs): net.train() # reset the generators train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale) val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale) epoch_loss = 0 with tqdm(total=n_train, desc='Epoch {epoch + 1}/{epochs}', unit='img') as pbar: for i, b in enumerate(batch(train, batch_size)): imgs = np.array([i[0] for i in b]).astype(np.float32) true_masks = np.array([i[1] for i in b]) imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) imgs = imgs.to(device=device) true_masks = true_masks.to(device=device) masks_pred = net(imgs) loss = criterion(masks_pred, true_masks) epoch_loss += loss.item() pbar.set_postfix(**{'loss (batch)': loss.item()}) optimizer.zero_grad() loss.backward() optimizer.step() pbar.update(batch_size) if save_cp: try: os.mkdir(dir_checkpoint) logging.info('Created checkpoint directory') except OSError: pass torch.save(net.state_dict(), dir_checkpoint + 'gpu_3.pth') logging.info('gpu_3 saved !') val_dice = eval_net(net, val, device, n_val) logging.info('Validation Dice Coeff: {}'.format(val_dice))
def submission(model, sampling_method, data_dir, results_dir, device='cpu', verbose=True): if verbose: print("Using device: {}".format(device)) print("Reading train data in...") if model == 'lgbm': X_train, Y_train, feature_labels = get_train(data_dir, one_hot=False) else: X_train, Y_train, feature_labels = get_train(data_dir) X_test = get_test(data_dir) train_ids, test_ids = get_ids(data_dir) country_names = get_country_names(data_dir) if verbose: print("Successfully loaded data") lgbm_params = { 'task': 'train', 'objective': 'multiclass', 'num_class': 12, 'num_leaves': 31, 'learning_rate': 0.3, 'lambda_l2': 1.0, 'feature_fraction': 0.9, 'min_child_weight': 1.0, 'device': device, 'gpu_device_id': 0, 'gpu_platform_id': 0, 'max_bin': 63, 'verbose': 0 } if device == 'cpu': xgb_params = { "objective": "multi:softprob", "num_class": 12, "tree_method": "hist", "colsample_bytree": 0.9, "n_jobs": 2, "silent": 1 } else: xgb_params = { "objective": "multi:softprob", "num_class": 12, "tree_method": "gpu_hist", "colsample_bytree": 0.9, "gpu_id": 0, "max_bin": 16, "silent": 1 } if verbose: print("{} sampling process started...".format(sampling_method)) curr_time = time.time() if sampling_method == "adasyn": X_train_resampled, Y_train_resampled = ADASYN().fit_sample( X_train, Y_train) elif sampling_method == "smote": X_train_resampled, Y_train_resampled = SMOTE().fit_sample( X_train, Y_train) elif sampling_method == "random": X_train_resampled, Y_train_resampled = RandomOverSampler().fit_sample( X_train, Y_train) elif sampling_method == "smoteenn": X_train_resampled, Y_train_resampled = SMOTEENN().fit_sample( X_train, Y_train) else: X_train_resampled, Y_train_resampled = X_train, Y_train if verbose: print("Oversampling completed") print("Time Taken: {:.2f}".format(time.time() - curr_time)) print("Size of Oversampled data: {}".format(X_train_resampled.shape)) print("{} selected for classification".format(model)) curr_time = time.time() if model == 'lgbm': categorical_feature = [ 'age_bucket', 'gender', 'signup_method', 'signup_flow', 'language', 'affiliate_channel', 'affiliate_provider', 'first_affiliate_tracked', 'signup_app', 'first_device_type', 'first_browser' ] lgb_train = lgb.Dataset(data=X_train_resampled, label=Y_train_resampled, feature_name=feature_labels, categorical_feature=categorical_feature) clf = lgb.train(lgbm_params, lgb_train, num_boost_round=30) print("Time taken: {:.2f}".format(time.time() - curr_time)) Y_probs = clf.predict(X_test) order = np.argsort(-Y_probs[:, :5], axis=1) else: X_train_xgb = xgb.DMatrix(X_train_resampled, Y_train_resampled, feature_names=feature_labels) X_test_xgb = xgb.DMatrix(X_test, feature_names=feature_labels) clf = xgb.train(xgb_params, X_train_xgb, 30) print("Time taken: {:.2f}".format(time.time() - curr_time)) Y_probs = clf.predict(X_test_xgb) order = np.argsort(-Y_probs[:, :5], axis=1) print("Generating submission csv...") with open(os.path.join(results_dir, 'submission_{}.csv'.format(model)), 'w') as f: writer = csv.writer(f, delimiter=',', quoting=csv.QUOTE_MINIMAL) writer.writerow(['id', 'country']) for i in range(len(test_ids)): for k in range(5): writer.writerow([test_ids[i], country_names[order[i, k]]]) print("Finished.")
def train_net(net, epochs=5, batch_size=1, lr=0.1, val_percent=0.05, save_cp=True, gpu=False, img_scale=0.5): # dir_img = 'data/train/' # dir_mask = 'data/train_masks/' dir_img = 'E:/git/dataset/tgs-salt-identification-challenge/train/images/' dir_mask = 'E:/git/dataset/tgs-salt-identification-challenge/train/masks/' # dir_img = 'E:/git/dataset/tgs-salt-identification-challenge/train/my_images/' # dir_mask = 'E:/git/dataset/tgs-salt-identification-challenge/train/my_masks/' dir_checkpoint = 'checkpoints/' ids = get_ids(dir_img) ids = split_ids(ids) iddataset = split_train_val(ids, val_percent) print(''' Starting training: Epochs: {} Batch size: {} Learning rate: {} Training size: {} Validation size: {} Checkpoints: {} CUDA: {} '''.format(epochs, batch_size, lr, len(iddataset['train']), len(iddataset['val']), str(save_cp), str(gpu))) N_train = len(iddataset['train']) optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=0.0005) criterion = nn.BCELoss() for epoch in range(epochs): print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) net.train() # reset the generators train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale) val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale) epoch_loss = 0 for i, b in enumerate(batch(train, batch_size)): imgs = np.array([i[0] for i in b]).astype(np.float32) # true_masks = np.array([i[1] for i in b])#np.rot90(m) true_masks = np.array([i[1].T / 65535 for i in b]) #np.rot90(m) # show_batch_image(true_masks) imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() # show_batch_image(imgs) masks_pred = net(imgs) masks_probs_flat = masks_pred.view(-1) true_masks_flat = true_masks.view(-1) loss = criterion(masks_probs_flat, true_masks_flat) epoch_loss += loss.item() print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch finished ! Loss: {}'.format(epoch_loss / i)) if 1: val_dice = eval_net(net, val, gpu) print('Validation Dice Coeff: {}'.format(val_dice)) if save_cp: torch.save(net.state_dict(), dir_checkpoint + 'CP{}.pth'.format(epoch + 1)) print('Checkpoint {} saved !'.format(epoch + 1))
def get_all_group_ids(amount): global group_ids group_ids = utils.get_ids(scim.search_with_get_on_groups, amount)