Пример #1
0
def model_evaluation(model_file, data_file, file_plot_path, file_data_path):
    model_pkl = open(model_file, 'rb')
    model = pickle.load(model_pkl)
    model_pkl.close()

    #    try:
    #        model.pca
    #        pca = PCA(n_components=params['n_components'])
    #        del params['n_components']
    #        X = pca.fit(X).transform(X)
    #    except AttributeError:
    #        pass

    # Load data
    n_cols, weeks, y, X = load_data(filename=data_file)

    # Get stats from the prediction
    scores, mean, std_dev = stats(X, y, model)

    # Predict and save results and metrics
    y_true, y_pred = y, model.predict(X)

    MSE = mean_squared_error(y_true, y_pred)

    print("The MSE of the prediction is {}".format(MSE))

    my_plotter = PlotData()
    my_plotter.generate_curve_plot(weeks, y_true, y_pred)
    my_plotter.save_plot_result(file_plot_path)

    save_data(file_data_path, weeks, y_true, y_pred)
Пример #2
0
def compress(name, k=8, force_update=False):
    result = []
    block_list = []
    elements_file_name = name + '.pickle'
    compressed_elements_file_name = name + '_compressed.pickle'
    if not force_update and os.path.exists(compressed_elements_file_name):
        return load_data(compressed_elements_file_name)
    else:
        elements = load_data(elements_file_name)
        for i in xrange(0, len(elements), k):
            block_list.append(
                    (0,
                     elements[i].term,
                     elements[i].count,
                     elements[i].posting_lists
                     )
            )
            last_index = len(elements) - i
            for bi in xrange(1, min(k, last_index)):
                c = compare(elements[i + bi - 1].term, elements[i + bi].term)
                block_list.append(
                        (c,
                         elements[i + bi].term[c:],
                         elements[i + bi].count,
                         # elements[i + bi].term,
                         elements[i + bi].posting_lists
                         )
                )
            result.append(block_list)
            block_list = []
        save_data(result, compressed_elements_file_name)
        return result
Пример #3
0
def get_index(folder_name, force_update=False):
    index_file_name = folder_name + '.pickle'
    if not force_update and os.path.exists(index_file_name):
        return load_data(index_file_name)
    else:
        elements = []
        documents = get_file_list(folder_name)
        for doc_id in xrange(len(documents)):
            elements += map(lambda x: Element(x, doc_id), get_tokens(documents[doc_id]))
        elements.sort()
        result = []
        for el in elements:
            if result and result[-1] == el:
                result[-1].update(el)
            else:
                result.append(el)

        save_data(result, name=index_file_name)
Пример #4
0
def train_model(text):
    # parser = argparse.ArgumentParser()
    # parser.add_argument('--max-epochs', type=int, default=10)
    # parser.add_argument('--batch-size', type=int, default=256)
    # parser.add_argument('--sequence-length', type=int, default=4)
    # args = parser.parse_args()

    args = {
        'max_epochs': 5,
        'batch_size': 256,
        'sequence_length': 5,
        'max_len': 100,
    }
    tokenized = text.apply(tokenize_text)
    dataset = Dataset(tokenized, **args)
    model = Model(dataset)

    train(dataset, model, args)

    # model.index_to_word = dataset.index_to_word
    # model.word_to_index = dataset.word_to_index
    from config import DATA_DIR
    save_data(model, DATA_DIR / 'models' / 'massage_model.pickle')
Пример #5
0
        "office_sqm_1000", "trc_sqm_1000", "cafe_count_1000_price_high",
        "mosque_count_1000", "cafe_count_1500_price_high", "mosque_count_1500",
        "cafe_count_2000_price_high", 'hospital_beds_raion'
    ])
    data = data.fillna(-1)

    data = utils.convert_data_to_numeric(data)
    data = dimensionality_reduction.principal_components_analysis(6, data)

    return data


if __name__ == '__main__':
    train_data = utils.load_data('../files/train.csv')
    test_data = utils.load_data('../files/test.csv')

    print('====================[TRAIN DATA]====================')
    train_data = first_iteration(train_data)
    count_na_values(train_data)
    print(train_data.describe())

    print('====================[TEST DATA]====================')
    test_data = first_iteration(test_data)
    count_na_values(test_data)
    print(test_data.describe())

    # print(train_data.head())
    utils.save_data(train_data, 'clean_train.csv')
    utils.save_data(test_data, 'clean_test.csv')
    # graph_outliers(data)
Пример #6
0
def train(args, encoder, decoder, loader, epoch, optimizer_encoder,
          optimizer_decoder, outpath, is_train, device):
    epoch_total_loss = 0
    labels = []
    gen_imgs = []
    if args.compareFigs:
        original = []

    if is_train:
        encoder.train()
        decoder.train()
    # else:
    #     encoder.eval()
    #     decoder.eval()

    for i, batch in enumerate(loader, 0):
        X, Y = batch[0].to(device), batch[1]
        batch_gen_imgs = decoder(encoder(X), args)

        loss = ChamferLoss(device)
        batch_loss = loss(batch_gen_imgs, X)
        epoch_total_loss += batch_loss.item()

        # True if batch_loss has at least one NaN value
        if (batch_loss != batch_loss).any():
            raise RuntimeError('Batch loss is NaN!')

        # back prop
        if is_train:
            optimizer_encoder.zero_grad()
            optimizer_decoder.zero_grad()
            batch_loss.backward()
            optimizer_encoder.step()
            optimizer_decoder.step()
        #     print(f"epoch {epoch+1}, batch {i+1}/{len(loader)}, train_loss={batch_loss.item()}", end='\r', flush=True)
        # else:
        #     print(f"epoch {epoch+1}, batch {i+1}/{len(loader)}, valid_loss={batch_loss.item()}", end='\r', flush=True)

        # Save all generated images
        if args.save_figs and args.save_allFigs:
            labels.append(Y.cpu())
            gen_imgs.append(torch.tanh(batch_gen_imgs).cpu())
            if args.compareFigs:
                original.append(X.cpu())

        # Save only the last batch
        elif args.save_figs:
            if (i == len(loader) - 1):
                labels.append(Y.cpu())
                gen_imgs.append(torch.tanh(batch_gen_imgs).cpu())
                if args.compareFigs:
                    original.append(X.cpu())

    # Save model
    if is_train:
        make_dir(f'{outpath}/weights_encoder')
        make_dir(f'{outpath}/weights_decoder')
        torch.save(
            encoder.state_dict(),
            f"{outpath}/weights_encoder/epoch_{epoch+1}_encoder_weights.pth")
        torch.save(
            decoder.state_dict(),
            f"{outpath}/weights_decoder/epoch_{epoch+1}_decoder_weights.pth")

    # Compute average loss
    epoch_avg_loss = epoch_total_loss / len(loader)
    save_data(epoch_avg_loss, "loss", epoch, is_train, outpath)

    for i in range(len(gen_imgs)):
        if args.compareFigs:
            save_gen_imgs(gen_imgs[i],
                          labels[i],
                          epoch,
                          is_train,
                          outpath,
                          originals=original[i].cpu())
        else:
            save_gen_imgs(gen_imgs[i], labels[i], epoch, is_train, outpath)

    return epoch_avg_loss, gen_imgs
Пример #7
0
def train_loop(args,
               encoder,
               decoder,
               train_loader,
               valid_loader,
               optimizer_encoder,
               optimizer_decoder,
               outpath,
               device=None):
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    assert (args.save_dir
            is not None), "Please specify directory of saving the models!"
    make_dir(args.save_dir)

    train_avg_losses = []
    train_dts = []
    valid_avg_losses = []
    valid_dts = []

    for ep in range(args.num_epochs):
        if args.load_toTrain:
            epoch = args.load_epoch + ep + 1
        else:
            epoch = ep

        # Training
        start = time.time()
        train_avg_loss, train_gen_imgs = train(args,
                                               encoder,
                                               decoder,
                                               train_loader,
                                               epoch,
                                               optimizer_encoder,
                                               optimizer_decoder,
                                               outpath,
                                               is_train=True,
                                               device=device)
        train_dt = time.time() - start

        train_avg_losses.append(train_avg_loss)
        train_dts.append(train_dt)

        save_data(data=train_avg_loss,
                  data_name="loss",
                  epoch=epoch,
                  outpath=outpath,
                  is_train=True)
        save_data(data=train_dt,
                  data_name="dt",
                  epoch=epoch,
                  outpath=outpath,
                  is_train=True)

        # Validation
        start = time.time()
        valid_avg_loss, valid_gen_imgs = test(args,
                                              encoder,
                                              decoder,
                                              valid_loader,
                                              epoch,
                                              optimizer_encoder,
                                              optimizer_decoder,
                                              outpath,
                                              device=device)
        valid_dt = time.time() - start

        valid_avg_losses.append(train_avg_loss)
        valid_dts.append(valid_dt)

        save_data(data=valid_avg_loss,
                  data_name="loss",
                  epoch=epoch,
                  outpath=outpath,
                  is_train=False)
        save_data(data=valid_dt,
                  data_name="dt",
                  epoch=epoch,
                  outpath=outpath,
                  is_train=False)

        print(
            f'epoch={epoch+1}/{args.num_epochs if not args.load_toTrain else args.num_epochs+args.load_epoch}, '
            +
            f'train_loss={train_avg_loss}, valid_loss={valid_avg_loss}, dt={train_dt+valid_dt}'
        )

        if (epoch > 0) and ((epoch + 1) % 10 == 0):
            plot_eval_results(args, (train_avg_losses, valid_avg_losses),
                              f"losses to {epoch+1}",
                              outpath,
                              global_data=False)

    # Save global data
    save_data(data=train_avg_losses,
              data_name="losses",
              epoch="global",
              outpath=outpath,
              is_train=True,
              global_data=True)
    save_data(data=train_dts,
              data_name="dts",
              epoch="global",
              outpath=outpath,
              is_train=True,
              global_data=True)
    save_data(data=valid_avg_losses,
              data_name="losses",
              epoch="global",
              outpath=outpath,
              is_train=False,
              global_data=True)
    save_data(data=valid_dts,
              data_name="dts",
              epoch="global",
              outpath=outpath,
              is_train=False,
              global_data=True)

    return train_avg_losses, valid_avg_losses, train_dts, valid_dts
 def save_output(self, output):
     save_data(output, self.output().path)