def main(): # Run model. model, results = experiments.tspec() # Plot training curves. visualize.training(results) # Save model. torch.save( model, os.path.join(PKG_PATH, 'models/best_tspec_model_{}.pt'.format(TSTAMP))) # Save results. utils.write_results( results, os.path.join(PKG_PATH, 'models/best_tspec_results_{}.pkl'.format(TSTAMP))) # Visualizations using non-shuffled data. train_data = utils.Data(train=True, augmentation=True) valid_data = utils.Data(train=False, augmentation=False) visualize.spectra(train_data, log=False, name='spectra_train') visualize.spectra(valid_data, log=False, name='spectra_valid') visualize.timeseries(train_data, name='timeseries_train') visualize.timeseries(valid_data, name='timeseries_valid') visualize.pca(train_data) visualize.tsne(train_data)
def main(argv): logging.getLogger("transformers.tokenization_utils").setLevel(logging.ERROR) FLAGS = flags.FLAGS utils.print_flags(FLAGS) # Random seed initialization. random.seed(FLAGS.random_seed) np.random.seed(FLAGS.random_seed) torch.manual_seed(FLAGS.random_seed) # Configuration and paths. cfg = yaml.load(open(FLAGS.config, 'r'), Loader=yaml.BaseLoader) PATH_DATA = cfg['path_data'] PATH_CORPUS = '{}/{}'.format(PATH_DATA, cfg['corpus']) PATH_DATA_PREFIX = '{}/{}'.format(PATH_DATA, cfg['data_prefix']) PATH_MODEL_PREFIX = '{}/{}'.format(cfg['path_model'], FLAGS.model_prefix) os.makedirs(PATH_MODEL_PREFIX, exist_ok=True) # Set up the experimental environment. exp = experiment.Experiment(FLAGS, cfg) # Change attention window size. for i, layer in enumerate(exp.model.base.encoder.layer): layer.attention.self.attention_window = FLAGS.window_size # Load the corpus. corpus = utils.Corpus(PATH_CORPUS, FLAGS) # Load train/dev data. train_data = utils.Data(PATH_DATA_PREFIX + 'train', corpus, FLAGS) dev_data = utils.Data(PATH_DATA_PREFIX + 'dev', corpus, FLAGS) test_data = utils.Data(PATH_DATA_PREFIX + 'test', corpus, FLAGS) for epoch in range(FLAGS.last_epoch, FLAGS.num_epochs): print('Epoch {}'.format(epoch + 1), file=sys.stderr) # Train the model. train_loss = exp.train(train_data, eval_data=dev_data, test_data=test_data, num_sample_eval=FLAGS.num_sample_eval) print('Epoch {}, train_loss = {}'.format( epoch + 1, train_loss), file=sys.stderr) # Dump the model. print('Dump model for epoch {}.'.format(epoch + 1)) exp.dump_model(PATH_MODEL_PREFIX, str(epoch + 1)) # Evaluate dev data. test_eval = exp.eval_dump(test_data, FLAGS.num_sample_eval, 'Evaluating test queries') print('Test Evaluation', test_eval, file=sys.stderr) # Dump tensorboard results. if exp.tb: exp.tb_writer.add_scalar('Epoch_Eval_cut10/NDCG', test_eval['ndcg10'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_cut10/MRR', test_eval['mrr10'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_cut10/MAP', test_eval['map10'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_overall/NDCG', test_eval['ndcg'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_overall/MRR', test_eval['mrr'], epoch + 1) exp.tb_writer.add_scalar('Epoch_Eval_overall/MAP', test_eval['map'], epoch + 1)
def run(): ventilatorData = utils.Data(**{ 'url': URL, 'name': 'ventilatorData', }) ventilatorData.getAndParseCSV() def ventilatorManipulator(this): data = this.data.pivot(index='fecha', columns='ventiladores', values='numero') data = data.reset_index() return data ventilatorData.manipulateData(ventilatorManipulator) def structureVentilatorOutput(this): return { 'latest': this.data['fecha'].max(), 'ventilators': { 'occupied': utils.filterAndFormat('fecha', 'ocupados', this.data), 'available': utils.filterAndFormat('fecha', 'disponibles', this.data), 'total': utils.filterAndFormat('fecha', 'total', this.data), } } ventilatorData.structureOutput(structureVentilatorOutput) ventilatorData.writeLocal(utils.STORE + 'ventilator.json') return ventilatorData
def run(): testingData = utils.Data(**{ 'url': URL, 'name': 'testingData', }) testingData.getAndParseCSV() def testingManipulator(this): data = this.data.tail(48) dataTail1 = data.iloc[0:16]['pcr realizados'].astype(int).sum() dataTail2 = data.iloc[16:32]['pcr realizados'].astype(int).sum() dataTail3 = data.iloc[32:48]['pcr realizados'].astype(int).sum() this.dataTail1 = dataTail1 this.dataTail2 = dataTail2 this.dataTail3 = dataTail3 return this.data testingData.manipulateData(testingManipulator) testingData.structureOutput(utils.structureStandardOutput) def iterTestingFormat(this, index): region = this.data.loc[lambda x: x['region id'] == index] region = region.sort_values(by='fecha') this.output['regions'][f"{index}"] = { 'tests': utils.filterAndFormat('fecha', 'pcr realizados', region) } testingData.iterFormat(1, 17, iterTestingFormat) testingData.writeLocal(utils.STORE + 'testing.json') return testingData
def train(task, size, data, shards, checkpoint): name = '{}/{}-{}/'.format(task, size, shards) model = getattr(models, task)[size] task = getattr(tasks, task) df = utils.get_df(data, shards) df = utils.normalize_df(df) df = df.sample(frac=1) dataset = utils.Data(df, task) callbacks = get_callbacks('logs/{}'.format(name)) model.compile(optimizer='adam', loss=task['outputs'], metrics=task.get('metrics')) model.summary() model.fit( dataset, callbacks=callbacks, workers=2, max_queue_size=10, use_multiprocessing=True, ) if checkpoint: model.save('checkpoints/{}'.format(name))
def main(argv): logging.getLogger("transformers.tokenization_utils").setLevel( logging.ERROR) FLAGS = flags.FLAGS utils.print_flags(FLAGS) # Random seed initialization. random.seed(FLAGS.random_seed) np.random.seed(FLAGS.random_seed) torch.manual_seed(FLAGS.random_seed) # Configuration and paths. cfg = yaml.load(open(FLAGS.config, 'r'), Loader=yaml.BaseLoader) PATH_DATA = cfg['path_data'] PATH_CORPUS = '{}/{}'.format(PATH_DATA, cfg['corpus']) PATH_DATA_PREFIX = '{}/{}'.format(PATH_DATA, cfg['data_prefix']) # Set up the experimental environment. exp = experiment.Experiment(FLAGS, cfg, dumpflag=False) for i, layer in enumerate(exp.model.base.encoder.layer): layer.attention.self.attention_window = FLAGS.window_size # Load the corpus. corpus = utils.Corpus(PATH_CORPUS, FLAGS) # Load train/dev data. test_data = utils.Data(PATH_DATA_PREFIX + 'test', corpus, FLAGS) # Evaluate dev data. test_eval = exp.eval_dump(test_data, FLAGS.num_sample_eval, 'Evaluating test queries') print('Test Evaluation', test_eval, file=sys.stderr)
def eval_model(dataset_file, model_filename, results_filename): """ Docstring for eval_model. """ model = None # Load your best model. if model_filename: model_filename = Path(model_filename) model = torch.load(model_filename) if CUDA: model = model.cuda() print("\nLoading model from", model_filename.absolute()) if model: N_SUBJ = 160 data = utils.read_memfile(dataset_file, shape=(N_SUBJ, 3750), dtype='float32') results = utils.read_results(results_filename) # y is generated as we do not have predictions here. fake_y = np.random.randint(1, 32, size=((N_SUBJ, 4))) fake_y[0, -1] = 32 data = {'X': data, 'y': fake_y} data = utils.Data(precomputed=data, augmentation=False) # Overwrite ymap in Data with one computed using real data. data.ymap = results['ymap'] dataloader = torch.utils.data.DataLoader(data, batch_size=64, num_workers=2) # Generate predictions with model. results = experiments.evalu_loop(model, dataloader, return_preds=True) y_pred = results['preds'] # Convert ID column back to original format. ids = y_pred[:, -1] ids = data.ymap.inverse_transform(ids.astype(np.int)) y_pred[:, -1] = ids else: print("\nYou did not specify a model, generating dummy data instead!") c = 32 n = 10 y_pred = np.concatenate( [np.random.rand(n, 3), np.random.randint(0, c, (n, 1))], axis=1).astype(np.float32) return (y_pred)
def run(): regionalData = utils.Data(**{ 'url': URL, 'name': 'regionalData', }) regionalData.getAndParseCSV() def regionalManipulator(this): data_1 = this.data.loc[ lambda x: pd.to_datetime(x['fecha']) < '2020-04-29'] data_2 = this.data.loc[ lambda x: pd.to_datetime(x['fecha']) > '2020-04-28'] data_2 = data_2.drop(columns=[ 'nuevos casos', 'casos confirmados', 'tasa', 'casosnuevossinsintomas*', 'casosnuevosconsintomas' ]) data_2 = data_2.rename( columns={ 'casosnuevostotales': 'nuevos casos', 'casostotalesacumulados': 'casos confirmados', 'tasa*100000': 'tasa' }) #Tasa*100000 no longer works in origin. return data_1.append(data_2) regionalData.manipulateData(regionalManipulator) regionalData.structureOutput(utils.structureStandardOutput) def iterRegionalFormat(this, index): region = this.data.loc[lambda x: x['region id'] == index] region = region.sort_values(by='fecha') this.output['regions'][f'{index}'] = {} this.output['regions'][f'{index}'] = { 'newCases': utils.filterAndFormat('fecha', 'nuevos casos', region), 'confCases': utils.filterAndFormat('fecha', 'casos confirmados', region), 'dead': utils.filterAndFormat('fecha', 'fallecidos', region), } regionalData.iterFormat(1, 17, iterRegionalFormat) regionalData.writeLocal(utils.STORE + 'regional.json') return regionalData
def run(): totalData = utils.Data(**{ 'url': URL, 'name': 'totalData', }) totalData.getAndParseCSV() def totalManipulator(this): data = this.data.pivot_table(index='fecha', columns='dato', values='total') data = data.reset_index() data = utils.normalizeColumnNames(data) return data totalData.manipulateData(totalManipulator) def structureTotalOutput(this): return { 'latest': this.data['fecha'].max(), 'totals': { 'active': utils.filterAndFormat('fecha', 'casos activos', this.data), 'new': utils.filterAndFormat('fecha', 'casos nuevos totales', this.data), 'recovered': utils.filterAndFormat('fecha', 'casos recuperados', this.data), 'totales': utils.filterAndFormat('fecha', 'casos totales', this.data), 'dead': utils.filterAndFormat('fecha', 'fallecidos', this.data) } } totalData.printData() totalData.structureOutput(structureTotalOutput) totalData.writeLocal(utils.STORE + 'totals.json') return totalData
def run(): criticalData = utils.Data(**{ 'url': URL, 'name': 'criticalData', }) criticalData.getAndParseCSV() def structureCriticalOutput(this): output = { 'latest': this.data['fecha'].max(), 'critical': { 'total': int(this.data.tail(1)['casos confirmados'].astype(int).sum()) } } return output criticalData.structureOutput(structureCriticalOutput) criticalData.writeLocal(utils.STORE + 'critical.json') return criticalData
def run(): uciData = utils.Data(**{ 'url': URL, 'name': 'uciData', }) uciData.getAndParseCSV() uciData.structureOutput(utils.structureStandardOutput) print(uciData) def iterUCIFormat(this, index): region = this.data.loc[lambda x: x['codigo region'] == index] region = region.sort_values(by='fecha') this.output['regions'][f"{index}"] = { 'camas': utils.filterAndFormat('fecha', 'numero', region) } uciData.iterFormat(1, 17, iterUCIFormat) uciData.writeLocal(utils.STORE + 'uci.json') return uciData
import sys import numpy as np import torch import torch.nn.functional as F import torch.nn as nn import torch.optim as optim import utils from evaluation import precision_recall_f1 data = utils.Data() DEBUG = True batch_size = 32 n_epochs = 4 learning_rate = 0.005 learning_rate_decay = np.sqrt(2) dropout_keep_probability = 0.5 def print_debug(msg): if DEBUG: print(msg) class BiLSTMModel(nn.Module): def __init__(self, vocab_size, n_tags, embedding_dim, n_hidden_rnn, pad_index): super(BiLSTMModel, self).__init__() self.hidden_dim = n_hidden_rnn
from tensorflow.contrib.slim import fully_connected as fc import matplotlib.pyplot as plt import pandas as pd import utils #import scedar as sce #import vae #-------------------------------------------# ''' try: data,scdata except: data = pd.read_csv('/Users/dawnstear/desktop/chop_cellpred/data.csv') sclabels = data['Labels'] scdata = data.drop(['Labels','TYPE'],axis=1) ''' DataObj = utils.Data(scdata, sclabels, drop_remainder=True) n_cells, n_dims = np.shape(scdata) #from tensorflow.examples.tutorials.mnist import input_data #mnist = input_data.read_data_sets('MNIST_data', one_hot=True) num_sample = n_cells #mnist.train.num_examples input_dim = n_dims #mnist.train.images[0].shape[0] #----------------------------------------------------# class VariantionalAutoencoder(object): def __init__(self, learning_rate=1e-3, batch_size=100, n_z=10): self.learning_rate = learning_rate self.batch_size = batch_size self.n_z = n_z self.build()
effective_label = torch.sum(value, dim=1).tolist() total = sum(effective_label) hit = 0. logt_tag = torch.argmax( predict , dim=2).tolist() real_tag = torch.argmax( real_label, dim=2).tolist() for effnum, ltag, rtag in zip(effective_label, logt_tag, real_tag): hit = hit + sum([1. for t1,t2 in zip(ltag[:int(effnum)],rtag[:int(effnum)]) if t1==t2] ) tag_idx.extend(ltag[:int(effnum)]) print(logt_tag[0]) print(real_tag[0]) print(hit) return hit/total,tag_idx data = utils.Data('eng.train.bioes','eng.testa.bioes','vocab.txt') data.data_to_tensor(cuda=True) device = torch.device('cuda') net = RNN(data.wn) net.to(device) optimizer = optim.SGD(net.parameters(), net.config.lr) f=open('predict.txt','w') acc_list = [] loss_list = []
delay = float(config['TIME']['Delay']) print("Delay is..", delay) s = sched.scheduler(time.time, time.sleep) uci = uciScraper.run() testing = testingScraper.run() comunal = comunalScraper.run() regional = regionalScraper.run() ventilator = ventilatorScraper.run() totals = totalsScraper.run() critical = criticalScraper.run() mainPage = utils.Data(**{ 'url': '', 'name': 'mainPage' }) def job(): uci = uciScraper.run() testing = testingScraper.run() comunal = comunalScraper.run() regional = regionalScraper.run() ventilator = ventilatorScraper.run() totals = totalsScraper.run() critical = criticalScraper.run() def structureMainPage(this): return { 'latest': totals.output['latest'],
def train_mdl(mdl, optimizer): """ Trains a submitted model using the submitted optimizer. """ pp = pprint.PrettyPrinter(indent=4) LOGGER.info('+ Begin training with configuration:\n{}'.format( pp.pformat(CONFIG))) epochs = CONFIG['training']['epochs'] load_args = { 'batch_size': CONFIG['dataloader']['batch_size'], 'num_workers': CONFIG['dataloader']['num_workers'], 'shuffle': CONFIG['dataloader']['shuffle'] } # Shuffles data between day1=test and day2=valid. data = utils.get_shuffled_data( test_p=CONFIG['dataloader']['test_proportion']) # Set up Dataloaders. train_data = utils.Data(precomputed=data['train'], augmentation=True) valid_data = utils.Data(precomputed=data['valid'], augmentation=False) train_load = torch.utils.data.DataLoader(train_data, **load_args) valid_load = torch.utils.data.DataLoader(valid_data, **load_args) # Move model to GPU if required. if CUDA: mdl = mdl.cuda() # Initial values. valid_loss = 10000 best_valid_loss = 10000 all_train_losses, all_valid_losses = [], [] all_train_scores, all_valid_scores = [], [] # Reduce learning rate if we plateau (valid_loss does not decrease). scheduler = ReduceLROnPlateau( optimizer, patience=CONFIG['training']['schedule_patience']) for ep in range(epochs): t1 = time.time() scheduler.step(valid_loss) train_results = train_loop(mdl, optimizer, train_load) valid_results = evalu_loop(mdl, valid_load) # Keep track of per-epoch stats for plots. all_train_losses.append(train_results['loss']['mean']) all_valid_losses.append(valid_results['loss']['mean']) all_train_scores.append([ train_results['scores']['pr_mu'], train_results['scores']['rt_mu'], train_results['scores']['rr_std'], train_results['scores']['id_recall'], train_results['scores']['total_score'] ]) all_valid_scores.append([ valid_results['scores']['pr_mu'], valid_results['scores']['rt_mu'], valid_results['scores']['rr_std'], valid_results['scores']['id_recall'], valid_results['scores']['total_score'] ]) # Get the best model (early stopping). if valid_results['loss']['mean'] < best_valid_loss: best_valid_loss = all_valid_losses[-1] best_model = mdl.state_dict() best_epoch = ep + 1 LOGGER.info('+ New best model found: loss={}, score={}'.format( best_valid_loss, valid_results['scores']['total_score'])) # Log training performance. time_elapsed = time.time() - t1 msg_info = '[{}/{}] {:.2f} sec: '.format(ep + 1, epochs, time_elapsed) msg_loss = 'loss(t/v)={:.2f}/{:.2f}, '.format( train_results['loss']['mean'], valid_results['loss']['mean']) msg_scr1 = '{:.2f}/{:.2f}'.format(train_results['scores']['pr_mu'], valid_results['scores']['pr_mu']) msg_scr2 = '{:.2f}/{:.2f}'.format(train_results['scores']['rt_mu'], valid_results['scores']['rt_mu']) msg_scr3 = '{:.2f}/{:.2f}'.format(train_results['scores']['rr_std'], valid_results['scores']['rr_std']) msg_scr4 = '{:.2f}/{:.2f}'.format(train_results['scores']['id_recall'], valid_results['scores']['id_recall']) msg_scrt = '{:.2f}/{:.2f}'.format( train_results['scores']['total_score'], valid_results['scores']['total_score']) msg_task = 'scores(t/v)=[{} + {} + {} + {} = {}]'.format( msg_scr1, msg_scr2, msg_scr3, msg_scr4, msg_scrt) LOGGER.info(msg_info + msg_loss + msg_task) # Early stopping patience breaks training if we are just overfitting. if ep + 1 >= best_epoch + CONFIG['training']['early_stopping_patience']: LOGGER.info('Impatient! No gen. improvement in {} epochs'.format( CONFIG['training']['early_stopping_patience'])) break # Rewind to best epoch. LOGGER.info('Early Stopping: Rewinding to epoch {}'.format(best_epoch)) mdl.load_state_dict(best_model) # Stack scores into one numpy array each. all_train_losses = np.vstack(all_train_losses) all_train_scores = np.vstack(all_train_scores) all_valid_losses = np.vstack(all_valid_losses) all_valid_scores = np.vstack(all_valid_scores) results = { 'train': { 'losses': all_train_losses, 'scores': all_train_scores }, 'valid': { 'losses': all_valid_losses, 'scores': all_valid_scores }, 'best_epoch': best_epoch, 'ymap': train_data.ymap, 'config': CONFIG } return (mdl, results)
# data_converter = utils.Data(PATH_TO_LR, SAVE_DIR, NAME, mode='test', output_data_dir=PATH_TO_LR) # input_list, output_list = data_converter.get_files() # data_converter.convert_to_tfrecord(input_list, output_list, num_files=1) #### MRI data ## Training data PATH_TO_HR = 'D:/PROJECT_DATA/axial_superresolution/raw_data/train_hr/' PATH_TO_LR = 'D:/PROJECT_DATA/axial_superresolution/raw_data/train_lr_bicubic/' SAVE_DIR = 'D:/PROJECT_DATA/axial_superresolution/tfrecords/' NAME = 'training_data_bicubic' data_converter = utils.Data(PATH_TO_LR, SAVE_DIR, NAME, mode='train', output_data_dir=PATH_TO_HR) input_list, output_list = data_converter.get_files() data_converter.convert_to_tfrecord(input_list, output_list, num_files=4) ## Validation data PATH_TO_HR = 'D:/PROJECT_DATA/axial_superresolution/raw_data/valid_hr/' PATH_TO_LR = 'D:/PROJECT_DATA/axial_superresolution/raw_data/valid_lr_bicubic/' SAVE_DIR = 'D:/PROJECT_DATA/axial_superresolution/tfrecords/' NAME = 'validation_data_bicubic' data_converter = utils.Data(PATH_TO_LR, SAVE_DIR, NAME, mode='valid',