示例#1
0
def train(modelname: str):
    ds = Dataset()
    emails = ds.get_data()
    md = Model()
    md.train(emails)
    md.serialize(modelname)
    return {"Hello": "World"}
示例#2
0
def train():

    # 1. Crear modelo
    print('(TRAINER) Creating model...')    
    model = Model()

    # 2. Entrenar clasificador
    print('(TRAINER) Training model...')
    model.train()

    # 3. Guardar clasificador
    print('(TRAINER) Saving model...')
    model.save()

    return model
示例#3
0
文件: train.py 项目: ttslr/BVAE-TTS
def main(args):
    train_loader, val_loader, collate_fn = prepare_dataloaders(hp)
    model = Model(hp).cuda()
    optimizer = torch.optim.Adamax(model.parameters(), lr=hp.lr)
    writer = get_writer(hp.output_directory, args.logdir)
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1")

    iteration = 0
    model.train()
    print(f"Training Start!!! ({args.logdir})")
    while iteration < (hp.train_steps):
        for i, batch in enumerate(train_loader):
            text_padded, text_lengths, mel_padded, mel_lengths = [ x.cuda() for x in batch ]
            recon_loss, kl_loss, duration_loss, align_loss = model(text_padded, mel_padded, text_lengths, mel_lengths)

            alpha=min(1, iteration/hp.kl_warmup_steps)
            with amp.scale_loss((recon_loss + alpha*kl_loss + duration_loss + align_loss), optimizer) as scaled_loss:
                scaled_loss.backward()

            iteration += 1
            lr_scheduling(optimizer, iteration)
            nn.utils.clip_grad_norm_(model.parameters(), hp.grad_clip_thresh)
            optimizer.step()
            model.zero_grad()
            writer.add_scalar('train_recon_loss', recon_loss, global_step=iteration)
            writer.add_scalar('train_kl_loss', kl_loss, global_step=iteration)
            writer.add_scalar('train_duration_loss', duration_loss, global_step=iteration)
            writer.add_scalar('train_align_loss', align_loss, global_step=iteration)

            if iteration % (hp.iters_per_validation) == 0:
                validate(model, val_loader, iteration, writer)

            if iteration % (hp.iters_per_checkpoint) == 0:
                save_checkpoint(model, optimizer, hp.lr, iteration, filepath=f'{hp.output_directory}/{args.logdir}')

            if iteration == (hp.train_steps):
                break
示例#4
0
def main(args):
    train_loader, val_loader, collate_fn = prepare_dataloaders(hparams, stage=args.stage)

    if args.stage!=0:
        checkpoint_path = f"training_log/aligntts/stage{args.stage-1}/checkpoint_{hparams.train_steps[args.stage-1]}"
        state_dict = {}
        for k, v in torch.load(checkpoint_path)['state_dict'].items():
            state_dict[k[7:]]=v

        model = Model(hparams).cuda()
        model.load_state_dict(state_dict)
        model = nn.DataParallel(model).cuda()
    else:
        model = nn.DataParallel(Model(hparams)).cuda()

    criterion = MDNLoss()
    writer = get_writer(hparams.output_directory, f'{hparams.log_directory}/stage{args.stage}')
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=hparams.lr,
                                 betas=(0.9, 0.98),
                                 eps=1e-09)
    iteration, loss = 0, 0
    model.train()

    print(f'Stage{args.stage} Start!!! ({str(datetime.now())})')
    while True:
        for i, batch in enumerate(train_loader):
            if args.stage==0:
                text_padded, mel_padded, text_lengths, mel_lengths = [
                    reorder_batch(x, hparams.n_gpus).cuda() for x in batch
                ]
                align_padded=None
            else:
                text_padded, mel_padded, align_padded, text_lengths, mel_lengths = [
                    reorder_batch(x, hparams.n_gpus).cuda() for x in batch
                ]

            sub_loss = model(text_padded,
                             mel_padded,
                             align_padded,
                             text_lengths,
                             mel_lengths,
                             criterion,
                             stage=args.stage)
            sub_loss = sub_loss.mean()/hparams.accumulation
            sub_loss.backward()
            loss = loss+sub_loss.item()
            iteration += 1

            if iteration%hparams.accumulation == 0:
                lr_scheduling(optimizer, iteration//hparams.accumulation)
                nn.utils.clip_grad_norm_(model.parameters(), hparams.grad_clip_thresh)
                optimizer.step()
                model.zero_grad()
                writer.add_scalar('Train loss', loss, iteration//hparams.accumulation)
                loss=0

            if iteration%(hparams.iters_per_validation*hparams.accumulation)==0:
                validate(model, criterion, val_loader, iteration, writer, args.stage)

            if iteration%(hparams.iters_per_checkpoint*hparams.accumulation)==0:
                save_checkpoint(model,
                                optimizer,
                                hparams.lr,
                                iteration//hparams.accumulation,
                                filepath=f'{hparams.output_directory}/{hparams.log_directory}/stage{args.stage}')

            if iteration==(hparams.train_steps[args.stage]*hparams.accumulation):
                break

        if iteration==(hparams.train_steps[args.stage]*hparams.accumulation):
            break
            
    print(f'Stage{args.stage} End!!! ({str(datetime.now())})')
示例#5
0
class BERTable():
    def __init__(self,
                 df,
                 column_type,
                 embedding_dim=5,
                 n_layers=5,
                 dim_feedforward=100,
                 n_head=5,
                 dropout=0.15,
                 ns_exponent=0.75,
                 share_category=False,
                 use_pos=False,
                 device='cpu'):

        self.logger = create_logger(name="BERTable")

        self.col_type = {'numerical': [], 'categorical': [], 'vector': []}
        for i, data_type in enumerate(column_type):
            self.col_type[data_type].append(i)

        self.embedding_dim = embedding_dim
        self.use_pos = use_pos
        self.device = device

        self.vocab = Vocab(df, self.col_type, share_category, ns_exponent)

        vocab_size = {
            'numerical': len(self.vocab.item2idx['numerical']),
            'categorical': len(self.vocab.item2idx['categorical'])
        }

        vector_dims = [np.shape(df[col])[1] for col in self.col_type['vector']]
        tab_len = len(column_type)
        self.model = Model(vocab_size, self.col_type, use_pos, vector_dims,
                           embedding_dim, dim_feedforward, tab_len, n_layers,
                           n_head, dropout)

    def pretrain(self,
                 df,
                 max_epochs=3,
                 lr=1e-4,
                 lr_weight={
                     'numerical': 0.33,
                     'categorical': 0.33,
                     'vector': 0.33
                 },
                 loss_clip=[0, 100],
                 n_sample=4,
                 mask_rate=0.15,
                 replace_rate=0.8,
                 batch_size=32,
                 shuffle=True,
                 num_workers=1):

        self.model.loss_clip = loss_clip
        self.logger.info("[-] Converting to indices")
        data = self.vocab.convert(df, num_workers)

        self.model.to(self.device)
        self.model.train()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=float(lr))

        self.logger.info("[-] Start Pretraining")

        process_bar = tqdm(range(max_epochs),
                           desc=f"[Progress]",
                           total=max_epochs,
                           leave=True,
                           position=0)

        for epoch in process_bar:

            generator = create_dataloader(data,
                                          self.col_type,
                                          self.vocab,
                                          self.embedding_dim,
                                          self.use_pos,
                                          batch_size,
                                          num_workers,
                                          mask_rate=mask_rate,
                                          replace_rate=replace_rate,
                                          n_sample=n_sample,
                                          shuffle=shuffle)

            metric_bar = tqdm([0],
                              desc=f"[Metric]",
                              bar_format="{desc} {postfix}",
                              leave=False,
                              position=2)

            epoch_bar = tqdm(generator,
                             desc=f"[Epoch]",
                             leave=False,
                             position=1)

            loss_history = {'numerical': [], 'categorical': [], 'vector': []}

            for batch_data in epoch_bar:

                batch_data = transfer(batch_data, self.device)
                _, losses = self.model.forward(batch_data, mode='train')

                loss = sum([
                    losses[data_type] / len(self.col_type[data_type]) *
                    lr_weight[data_type] for data_type in self.col_type
                    if len(self.col_type[data_type]) > 0
                ])

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                display = ''
                for types in losses:
                    loss_history[types].append(losses[types].item())
                    display += f'{types}: {np.mean(loss_history[types]):5.2f} '
                metric_bar.set_postfix_str(display)

            process_bar.write(f'[Log] Epoch {epoch:0>2d}| ' + display)
            epoch_bar.close()
            metric_bar.close()

        process_bar.close()

        self.model.cpu()

    # def transform(self, df, batch_size=32, num_workers=1):
    #     self.logger.info("[-] Converting to indices")
    #     data = self.vocab.convert(df, num_workers)

    #     generator = create_dataloader(
    #         data, self.col_type, self.vocab,
    #         self.embedding_dim, self.use_pos,
    #         batch_size, num_workers, mode='test')

    #     self.logger.info("[-] Start Transforming")

    #     process_bar = tqdm(
    #         generator,
    #         desc=f"[Process]",
    #         leave=False,
    #         position=0)

    #     self.model.to(self.device)
    #     self.model.eval()

    #     df_t = []
    #     for batch_data in process_bar:
    #         batch_data = transfer(batch_data, self.device)
    #         feature = self.model.forward(batch_data, mode='test')
    #         df_t += list(feature.cpu().detach().numpy())

    #     process_bar.close()
    #     self.model.cpu()

    #     return df_t

    def save(self, model_path='model.ckpt', vocab_path='vocab.pkl'):
        torch.save(self.model.state_dict(), model_path)
        with open(vocab_path, 'wb') as file:
            pkl.dump(self.vocab, file)
示例#6
0
import argparse
from modules.model import Model

parser = argparse.ArgumentParser(description='train covid-diagnosis')
parser.add_argument('--model_name', required=True, help='choose model name')
parser.add_argument('--backbone',
                    required=True,
                    help='choose backbone for network')
parser.add_argument('--dataset',
                    required=True,
                    help='choose dataset from x-ray & CT scan data')
parser.add_argument('--grad_cam',
                    default=False,
                    help='visualization of heat map')

args = parser.parse_args()

test_model = Model(args.model_name, args.backbone)
test_model.set_dataset(args.dataset)
test_model.train()
示例#7
0
def evaluate(grid_search=False):

    # Lista de 6-uplas (model, params, accuracy, precision, recall, f1_score)
    results_list = []

    # Iterar segun tipos de modelo
    for model_type in const.MODELS:
            
        print()
        print('(EVALUATOR) Evaluating model ' + model_type)

        if grid_search:

            # Lista de 6-uplas (model, params, accuracy, precision, recall, f1_score)
            grid_search_list = []
            param_space = get_parameter_space(model_type)

            for params in param_space:

                # 1. Crear modelo
                model = Model(model=model_type, params={'model': model_type, 'params': params})

                # 2. Entrenar clasificador
                model.train()

                # 3. Evaluar clasificador
                accuracy, results, _, _ = model.evaluate()
                grid_search_list.append((model_type, params, accuracy, results['precision'], results['recall'], results['f1_score']))

            # Ordenar resultados segun f1_score
            grid_search_list = sorted(grid_search_list, key=lambda x: x[5], reverse=True)

            print()
            print('(EVALUATOR) Grid search results -> Model - ', model_type)
            for _, params, accuracy, precision, recall, f1_score in grid_search_list:
                print()
                print("Params - ", params)
                print("-> F1 Score - ", "{0:.2f}".format(f1_score))
                print("-> Precision - ", "{0:.2f}".format(precision))
                print("-> Recall - ", "{0:.2f}".format(recall))
                print("-> Accuracy - ", "{0:.2f}".format(accuracy))
            print()

            best_params = grid_search_list[0][1]
            best_accuracy = grid_search_list[0][2]
            best_precision = grid_search_list[0][3]
            best_recall = grid_search_list[0][4]
            best_f1_score = grid_search_list[0][5]
            results_list.append((model_type, best_params, best_accuracy, best_precision, best_recall, best_f1_score))

        else:

            # 1. Crear modelo
            model = Model(model=model_type)

            # 2. Entrenar clasificador
            model.train()

            # 3. Evaluar clasificador
            accuracy, results, _, _ = model.evaluate()
            results_list.append((model_type, None, accuracy, results['precision'], results['recall'], results['f1_score']))

    # Ordenar resultados segun f1_score
    results_list = sorted(results_list, key=lambda x: x[5], reverse=True)

    # Mostrar resultados
    print()
    print('(EVALUATOR) Sorted results: ')
    for model, params, accuracy, precision, recall, f1_score in results_list:
        print()
        print("Model - ", model)
        if params is not None:
            print("Params - ", params)
        print("-> F1 Score - ", "{0:.2f}".format(f1_score))
        print("-> Precision - ", "{0:.2f}".format(precision))
        print("-> Recall - ", "{0:.2f}".format(recall))
        print("-> Accuracy - ", "{0:.2f}".format(accuracy))
    print()

    best_solution = {
        'model': results_list[0][0],
        'params': results_list[0][1]
    }

    # Elegir mejor modelo, entrenarlo por completo y guardarlo
    model = Model(model=results_list[0][0], params=best_solution)
    model.train()
    model.save()

    print('(EVALUATOR) Trained and saved best model')
示例#8
0
文件: train.py 项目: ChenX17/aligntts
def main(args):
    train_loader, val_loader, collate_fn = prepare_dataloaders(
        hparams, stage=args.stage)
    initial_iteration = None
    if args.stage != 0:
        checkpoint_path = f"training_log/aligntts/stage{args.stage-1}/checkpoint_{hparams.train_steps[args.stage-1]}"

        if not os.path.isfile(checkpoint_path):
            print(f'{checkpoint_path} does not exist')
            checkpoint_path = sorted(
                glob(f"training_log/aligntts/stage{args.stage-1}/checkpoint_*")
            )[-1]
            print(f'Loading {checkpoint_path} instead')

        state_dict = {}
        for k, v in torch.load(checkpoint_path)['state_dict'].items():
            state_dict[k[7:]] = v

        model = Model(hparams).cuda()
        model.load_state_dict(state_dict)
        model = nn.DataParallel(model).cuda()
    else:
        if args.pre_trained_model != '':
            if not os.path.isfile(args.pre_trained_model):
                print(f'{args.pre_trained_model} does not exist')

            state_dict = {}
            for k, v in torch.load(
                    args.pre_trained_model)['state_dict'].items():
                state_dict[k[7:]] = v
            initial_iteration = torch.load(args.pre_trained_model)['iteration']
            model = Model(hparams).cuda()
            model.load_state_dict(state_dict)
            model = nn.DataParallel(model).cuda()
        else:

            model = nn.DataParallel(Model(hparams)).cuda()

    criterion = MDNLoss()
    writer = get_writer(hparams.output_directory,
                        f'{hparams.log_directory}/stage{args.stage}')
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=hparams.lr,
                                 betas=(0.9, 0.98),
                                 eps=1e-09)
    iteration, loss = 0, 0
    if initial_iteration is not None:
        iteration = initial_iteration
    model.train()

    print(f'Stage{args.stage} Start!!! ({str(datetime.now())})')
    while True:
        for i, batch in enumerate(train_loader):
            if args.stage == 0:
                text_padded, mel_padded, text_lengths, mel_lengths = [
                    reorder_batch(x, hparams.n_gpus).cuda() for x in batch
                ]
                align_padded = None
            else:
                text_padded, mel_padded, align_padded, text_lengths, mel_lengths = [
                    reorder_batch(x, hparams.n_gpus).cuda() for x in batch
                ]

            sub_loss = model(text_padded,
                             mel_padded,
                             align_padded,
                             text_lengths,
                             mel_lengths,
                             criterion,
                             stage=args.stage,
                             log_viterbi=args.log_viterbi,
                             cpu_viterbi=args.cpu_viterbi)
            sub_loss = sub_loss.mean() / hparams.accumulation
            sub_loss.backward()
            loss = loss + sub_loss.item()
            iteration += 1
            if iteration % 100 == 0:
                print(
                    f'[{str(datetime.now())}] Stage {args.stage} Iter {iteration:<6d} Loss {loss:<8.6f}'
                )

            if iteration % hparams.accumulation == 0:
                lr_scheduling(optimizer, iteration // hparams.accumulation)
                nn.utils.clip_grad_norm_(model.parameters(),
                                         hparams.grad_clip_thresh)
                optimizer.step()
                model.zero_grad()
                writer.add_scalar('Train loss', loss,
                                  iteration // hparams.accumulation)
                writer.add_scalar('Learning rate', get_lr(optimizer),
                                  iteration // hparams.accumulation)
                loss = 0

            if iteration % (hparams.iters_per_validation *
                            hparams.accumulation) == 0:
                validate(model, criterion, val_loader, iteration, writer,
                         args.stage)

            if iteration % (hparams.iters_per_checkpoint *
                            hparams.accumulation) == 0:
                save_checkpoint(
                    model,
                    optimizer,
                    hparams.lr,
                    iteration // hparams.accumulation,
                    filepath=
                    f'{hparams.output_directory}/{hparams.log_directory}/stage{args.stage}'
                )

            if iteration == (hparams.train_steps[args.stage] *
                             hparams.accumulation):
                break

        if iteration == (hparams.train_steps[args.stage] *
                         hparams.accumulation):
            break

    print(f'Stage{args.stage} End!!! ({str(datetime.now())})')
示例#9
0
def analyze(company):
    """ This route responds when the user submits how many models they would like to train.
        It trains and predicts with a model as many times as the user specified and then
        redirects to the final results page.
        
        Parameters:
            company(str): stock symbol of the stock that the model will analyze
    """

    # Reads the user's submission of how many models they would like to train and sets
    # it to 1 if the user entered something besides a positive integer
    try:
        count = int(request.form['count'])
        if (count <= 0 or count > 3):
            count = 1
    except ValueError:
        count = 1

    # Reading the data stored locally and then cleaning out the filesystem
    X_pred = pd.read_csv('prediction_data.csv')
    x = pd.read_csv('x.csv')
    y = pd.read_csv('y.csv')
    os.remove('prediction_data.csv')
    os.remove('x.csv')
    os.remove('y.csv')

    # Stores the final prediction and error of each model after it has
    # completed all of the epochs of traing
    predictions = []
    errors = []

    # Stores the predictions each model makes after each epoch
    prediction_json = []

    for i in range(0, count):

        reg = Model(len(x.columns))

        prediction_history, rmse = reg.train(x, y, X_pred)

        prediction_history.insert(0, 'Model ' + str(i + 1))
        prediction_json.append(prediction_history)

        Y_pred = reg.predict(X_pred)
        predictions.append(Y_pred)
        errors.append(rmse)

    # Average the predictions to get the final or "true" prediction/error
    true_prediction = sum(predictions) / len(predictions)
    true_error = sum(errors) / len(errors)

    # Saving result data so that it can be used in the next route
    session['predictions'] = prediction_json
    session['true_prediction'] = true_prediction
    session['true_error'] = true_error

    print('')
    print('********************')
    print('TRUE PREDICTION: ' + str(true_prediction))
    print('********************')
    print('')
    print('True Error: ' + str(true_error))
    print('')

    return redirect('/' + company + '/' + str(count) + '/' 'results')
示例#10
0
def testing(non_adapted_model_dir, adapted_model_dir, classifier_dir,
            nb_clss_labels, feat_path, labels_path, device, src_batch_size,
            trgt_batch_size):
    """Implements the complete test process of the AUDASC method

    :param non_adapted_model_dir: directory of non adapted model
    :param adapted_model_dir: directory of adapted model
    :param classifier_dir: directory of classifier
    :param nb_clss_labels: number of acoustic scene classes
    :param feat_path: directory of test features
    :param labels_path: directory of test labels
    :param device: The device that will be used.
    :param src_batch_size: source batch size
    :param trgt_batch_size: target batch size
    """
    non_adapted_cnn = Model().to(device)
    non_adapted_cnn.load_state_dict(
        torch.load(path.join(non_adapted_model_dir,
                             'non_adapted_cnn.pytorch')))

    adapted_cnn = Model().to(device)
    adapted_cnn.load_state_dict(
        torch.load(path.join(adapted_model_dir, 'target_cnn.pytorch')))

    label_classifier = LabelClassifier(nb_clss_labels).to(device)
    label_classifier.load_state_dict(
        torch.load(path.join(classifier_dir, 'label_classifier.pytorch')))

    non_adapted_cnn.train(False)
    adapted_cnn.train(False)
    label_classifier.train(False)

    feat = file_io.load_pickled_features(feat_path)
    labels = file_io.load_pickled_features(labels_path)

    non_adapted_acc = {}
    adapted_acc = {}

    '********************************************'
    '** testing for all data, device A, B, & C **'
    '********************************************'

    # testing on source data
    src_batch_feat, src_batch_labels = \
        test_step.test_data_mini_batch(feat['A'].to(device), labels['A'].to(device), batch_size=src_batch_size)
    non_adapted_src_correct, adapted_src_correct, src_temp = \
        test_step.test_function(non_adapted_cnn, adapted_cnn, label_classifier, src_batch_feat, src_batch_labels)

    non_adapted_src_len = src_temp * src_batch_size
    adapted_src_len = src_temp * src_batch_size

    # testing on target data
    target_feat = torch.cat([feat['B'], feat['C']], dim=0).to(device)
    target_labels = torch.cat([labels['B'], labels['C']], dim=0).to(device)

    trgt_batch_feat, trgt_batch_labels =\
        test_step.test_data_mini_batch(target_feat, target_labels, batch_size=trgt_batch_size)
    non_adapted_tgt_correct, adapted_tgt_correct, trgt_temp = \
        test_step.test_function(non_adapted_cnn, adapted_cnn, label_classifier, trgt_batch_feat, trgt_batch_labels)

    non_adapted_tgt_len = trgt_temp * trgt_batch_size
    adapted_tgt_len = trgt_temp * trgt_batch_size

    # calculating the accuracy of both models on data from device A
    non_adapted_acc['A'] = math_funcs.to_percentage(non_adapted_src_correct,
                                                    non_adapted_src_len)
    adapted_acc['A'] = math_funcs.to_percentage(adapted_src_correct,
                                                adapted_src_len)

    # calculating the accuracy of both models on data from devices B & C
    non_adapted_acc['BC'] = math_funcs.to_percentage(non_adapted_tgt_correct,
                                                     non_adapted_tgt_len)
    adapted_acc['BC'] = math_funcs.to_percentage(adapted_tgt_correct,
                                                 adapted_tgt_len)

    # calculating the accuracy of both models on data from all devices
    non_adapted_beta, non_adapted_alpha = math_funcs.weighting_factors(
        non_adapted_src_len, non_adapted_tgt_len)
    adapted_beta, adapted_alpha = math_funcs.weighting_factors(
        adapted_src_len, adapted_tgt_len)

    non_adapted_weighted_acc = (non_adapted_beta * non_adapted_acc['A']) + (
        non_adapted_alpha * non_adapted_acc['BC'])
    adapted_weighted_acc = (adapted_beta * adapted_acc['A']) + (
        adapted_alpha * adapted_acc['BC'])

    non_adapted_acc['all'] = non_adapted_weighted_acc
    adapted_acc['all'] = adapted_weighted_acc

    printing.testing_result_msg(non_adapted_acc,
                                adapted_acc,
                                ending='\n',
                                flushing=True)