Пример #1
0
def main():
    train_X, train_Y, test_X, test_Y = load_2D_dataset(False)

    np.random.seed(1)
    parameters = initialize_parameters_random([train_X.shape[0], 15, 10, 1])
    print('Training without Regularization..')
    trained_weights = train(train_X,
                            train_Y,
                            parameters,
                            iterations=30000,
                            learning_rate=0.5)
    print('Prediction on Train set:')
    predict(train_X, train_Y, trained_weights)
    print('Prediction on Dev set:')
    predict(test_X, test_Y, trained_weights)
    plot_decision_boundary(lambda x: predict_dec(trained_weights, x.T),
                           train_X, train_Y)

    for l in [0.01, 0.03, 0.1, 0.3, 1, 3, 10]:
        print('\nTraining with L2 Regularization (lambda = {})'.format(l))
        parameters = initialize_parameters_random(
            [train_X.shape[0], 15, 10, 1])
        trained_weights = train(train_X, train_Y, parameters, \
            iterations=30000, learning_rate=0.3, lambd=l, print_cost=True)
        predict(train_X, train_Y, trained_weights)
        predict(test_X, test_Y, trained_weights)
def predict_data(model_path:str,data_path:str,output_path:str):
    with open(model_path,'rb') as f:
        model = pickle.load(f)
    df = pd.read_csv(data_path)
    X,y = preprocess.preprocess(df)
    predictions = predict.predict(model,X)
    pd.DataFrame(predictions).to_csv(output_path,index=False)
    print("Predictions Done")
    print("Output file saved in: %s" % output_path)
def train_model(model_path:str,data_path:str,prov:str='Córdoba'):
    df=pd.read_csv(data_path)
    X,y = preprocess.preprocess(df,prov)
    model,X_test,y_test = train.train_randomforest(X,y)
    predictions = predict.predict(model,X_test)
    mse = mean_squared_error(y_test,predictions)
    r2 = r2_score(y_test,predictions)
    with open(model_path,'wb') as f:
        pickle.dump(model,f)
    print("All Done")
    print("Metrics - MSE: {} R2: {}".format(mse,r2))
    print("Model Saved in: %s" % model_path)
def main(path: str):
    raw_files = []
    for files in glob.glob(path):
        raw_files.append(files)
    df = pd.read_csv(raw_files[0])
    X,y = preprocess.preprocess(df)
    model,X_test,y_test = train.train_randomforest(X,y)
    predictions = predict.predict(model,X_test)
    mse = mean_squared_error(y_test,predictions)
    r2 = r2_score(y_test,predictions)
    print("All Done")
    print("Metrics - MSE: {} R2: {}".format(mse,r2))
    preds = pd.DataFrame(predictions).reset_index().drop(columns='index')
    real = pd.DataFrame(y_test).reset_index().drop(columns='index')
    output = pd.concat([preds,real],axis=1,ignore_index=True)
    output.columns = ['preds','real']
    output.to_csv('output.csv',index=False)
def main():

    loss_norm = []
    loss_difference = []
    countries = [
        'Afghanistan', 'Indien', 'Irak', 'Kolumbien', 'Pakistan',
        'Philippinen', 'sandbox_attacks', 'test_exp_chirp'
    ]

    for country in countries:
        data_name = country
        data_dir = '../../' + data_name + '.csv'
        train_data_scaled, train_date, test_data, test_date, std = prepare_data(
            data_dir, normalize=True, scaling='minmax')
        model_dir = './model/'
        num_epoch = 1000
        n_steps = 100
        n_inputs = 1
        n_neurons = 30
        n_layers = 1
        print(country)
        sess, train_loss, epoch_count = training(train_data_scaled,
                                                 model_dir,
                                                 num_epoch=num_epoch,
                                                 n_steps=n_steps,
                                                 n_inputs=n_inputs,
                                                 n_neurons=n_neurons,
                                                 n_layers=n_layers)
        print()

        prediction, true_labels, label_dates = predict(test_data, test_date,
                                                       sess, std, model_dir,
                                                       n_steps, n_inputs)
        #        rescaled_prediction = std.inverse_transform(prediction.reshape(-1,1))
        #        rescaled_labels = std.inverse_transform(true_labels.reshape(-1,1))

        loss_norm.append(normed_loss(prediction, true_labels))
        loss_difference.append(differential_loss(prediction, true_labels))

        plt.figure()
        plt.subplot(211)
        plt.plot(epoch_count, train_loss)
        plt.title('Training loss')
        plt.xlabel('Epoch')
        plt.ylabel('Training MSE')
        plt.subplot(212)
        plt.plot_date(label_dates,
                      true_labels,
                      xdate=True,
                      label='Labels',
                      ls="-")
        plt.plot_date(label_dates,
                      prediction,
                      xdate=True,
                      label='Predictions',
                      ls="-")
        plt.xticks(rotation="vertical")
        plt.title('Prediction')
        plt.legend()
        plt.xlabel('Days')
        plt.ylabel('Attack')
        save_fig('predicted value feedback' + data_name, './Images/')
    loss_dict = {
        'Countries': countries,
        'Normed_loss': loss_norm,
        'Differential_loss': loss_difference
    }
    pd.DataFrame(loss_dict).to_csv('./RNN_loss.csv')
from modules import preprocess, train, predict
import glob
import sys
import pandas as pd

if __name__ == '__main__':
    raw_files = []
    path = '../data/raw/*.csv.gz'
    for files in glob.glob(path):
        raw_files.append(files)
    path = raw_files[0]
    df = pd.read_csv(path)
    df = preprocess.preprocess(df)
    model, X_test, y_test = train.train_randomforest(df)
    predictions, mse, r2 = predict.predict(model, X_test, y_test)
    print("All Done")
    print("Metrics - MSE: {} R2: {}".format(mse, r2))
    preds = pd.DataFrame(predictions).reset_index().drop(columns='index')
    real = pd.DataFrame(y_test).reset_index().drop(columns='index')
    output = pd.concat([preds, real], axis=1, ignore_index=True)
    output.columns = ['preds', 'real']
    output.to_csv('output.csv', index=False)
Пример #7
0
def polish_genome(assembly, model_path, sketch_path, genus, threads,
                  output_dir, minimap_args, mash_threshold,
                  download_contig_nums, debug):

    out = []
    output_dir = FileManager.handle_output_directory(output_dir)
    contig_output_dir_debug = output_dir + '/debug'
    contig_output_dir_debug = FileManager.handle_output_directory(
        contig_output_dir_debug)
    assembly_name = assembly.rsplit('/', 1)[-1]
    assembly_name = assembly_name.split('.')[0]

    total_start_time = time.time()
    for contig in SeqIO.parse(assembly, 'fasta'):
        timestr = time.strftime("[%Y/%m/%d %H:%M]")
        sys.stderr.write(TextColor.GREEN + str(timestr) + " INFO: RUN-ID: " +
                         contig.id + "\n" + TextColor.END)
        contig_output_dir = contig_output_dir_debug + '/' + contig.id
        contig_output_dir = FileManager.handle_output_directory(
            contig_output_dir)
        contig_name = contig_output_dir + '/' + contig.id + '.fasta'
        SeqIO.write(contig, contig_name, "fasta")

        if sketch_path:
            screen_start_time = time.time()
            print_system_log('MASH SCREEN')
            mash_file = mash.screen(contig_name, sketch_path, threads,
                                    contig_output_dir, mash_threshold,
                                    download_contig_nums, contig.id)
            screen_end_time = time.time()

            ncbi_id = mash.get_ncbi_id(mash_file)
            if len(
                    ncbi_id
            ) < 5:  #Would'nt polish if closely-related genomes less than 5
                out.append(contig_name)
                continue

            url_list = download.parser_url(ncbi_id)

        if genus:
            ncbi_id, url_list = download.parser_genus(genus)

        download_start_time = time.time()
        print_system_log('DOWNLOAD CONTIGS')
        db = download.download(contig_output_dir, ncbi_id, url_list)
        download_end_time = time.time()

        pileup_start_time = time.time()
        print("\n")
        print_system_log('PILE UP')
        db_npz = alignment.align(contig_name, minimap_args, threads, db,
                                 contig_output_dir)
        if db_npz == False:
            continue
        pileup_end_time = time.time()

        align2df_start_time = time.time()
        print_system_log('TO DATAFRAME')
        df = align2df.todf(contig_name, db_npz, contig_output_dir)
        align2df_end_time = time.time()

        predict_start_time = time.time()
        print_system_log('PREDICT')
        df = contig_output_dir + '/' + contig.id + '.feather'
        result = predict.predict(df, model_path, threads, contig_output_dir)
        predict_end_time = time.time()

        polish_start_time = time.time()
        print_system_log('POLISH')
        finish = polish.stitch(contig_name, result, contig_output_dir)
        polish_end_time = time.time()

        if sketch_path:
            screen_time = get_elapsed_time_string(screen_start_time,
                                                  screen_end_time)
            print_stage_time('SCREEN', screen_time)

        #calculating time
        download_time = get_elapsed_time_string(download_start_time,
                                                download_end_time)
        pileup_time = get_elapsed_time_string(pileup_start_time,
                                              pileup_end_time)
        align2df_time = get_elapsed_time_string(align2df_start_time,
                                                align2df_end_time)
        predict_time = get_elapsed_time_string(predict_start_time,
                                               predict_end_time)
        polish_time = get_elapsed_time_string(polish_start_time,
                                              polish_end_time)

        #print stage time
        print_stage_time('DOWNLOAD', download_time)
        print_stage_time('PILEUP', pileup_time)
        print_stage_time('TO DATAFRAME', align2df_time)
        print_stage_time('PREDICT', predict_time)
        print_stage_time('POLISH', polish_time)
        out.append(finish)

    os.system('cat {} > {}/{}_homopolished.fasta'.format(
        ' '.join(out), output_dir, assembly_name))

    if debug:
        try:
            shutil.rmtree(contig_output_dir_debug)
        except OSError as e:
            print(e)
        else:
            return True

    total_end_time = time.time()
    total_time = get_elapsed_time_string(total_start_time, total_end_time)
    print_stage_time('Total', total_time)
Пример #8
0
def main():
    print(os.listdir("./platesv2/"))
    # Any results you write to the current directory are saved as output.
    data_root = './platesv2/plates/'
    print(os.listdir(data_root))
    # Create directory "val_dir" for validation
    train_dir = 'train'
    val_dir = 'val'
    class_names = ['cleaned', 'dirty']
    for dir_name in [train_dir, val_dir]:
        for class_name in class_names:
            os.makedirs(os.path.join(dir_name, class_name), exist_ok=True)
# Move each 6-th photo from "train_dir" to "val_dir" for validation
    create_val(data_root, train_dir, val_dir, class_names, 6)
    # Apply augmentations and form datasets for training and validation
    train_dataset, val_dataset = form_dataset(test=False)
    # We will feed the net with data in the form of batches
    batch_size = 8
    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   num_workers=batch_size)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 num_workers=batch_size)
    # Let's have a look at the first batch
    X_batch, y_batch = next(iter(train_dataloader))
    for x_item, y_item in zip(X_batch, y_batch):
        show_input(x_item, title=class_names[y_item])
# Model - pretrained ResNet18, trained on ImageNet
    model = models.resnet18(pretrained=True)
    # Disable grad for all conv layers
    for param in model.parameters():
        param.requires_grad = False
    print("Output of ResNet18 before FC layer, that we add later: ",
          model.fc.in_features)
    # Add FC layer with 2 outputs: cleaned or dirty
    model.fc = torch.nn.Linear(model.fc.in_features, 2)
    # Put model on GPU
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    # Loss function - binary Cross-Entropy
    loss = torch.nn.CrossEntropyLoss()
    # Optimization method - Adam
    optimizer = torch.optim.Adam(model.parameters(), amsgrad=True, lr=1.0e-3)
    # Decay LR by a factor of 0.1 every 7 epochs
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=7,
                                                gamma=0.1)
    # Training
    print("Begin training: ")
    train(model,
          train_dataloader,
          val_dataloader,
          loss,
          optimizer,
          scheduler,
          device,
          num_epochs=10)

    # Now let's make predictions
    test_dir = 'test'
    # Make additional directory in test to let ImageFolder identify pictures in it and create iterator
    shutil.copytree(os.path.join(data_root, 'test'),
                    os.path.join(test_dir, 'unknown'))
    test_dataset = form_dataset(test=True)
    #Form batches on test
    test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  num_workers=batch_size)
    model.eval()
    # Make predictions
    test_predictions = []
    test_img_paths = []
    test_predictions, test_img_paths = predict(test_dataloader, model, device)
    # Show predictions for the test
    inputs, labels, paths = next(iter(test_dataloader))

    for img, pred in zip(inputs, test_predictions):
        show_input(img, title=pred)


# Submit
    print("Making submission")
    make_submit(test_img_paths, test_predictions)
Пример #9
0
arguments = docopt(__doc__, version='opensoundscape.py version 0.0.1')

# Get the default config variables
config = generate_config("config/opensoundscape.ini", arguments["--ini"])

# Initialize empty string for arguments['<image>'] (not supported by docopt)
if not arguments['<image>']:
    arguments['<image>'] = ''

if arguments['spect_gen']:
    # Pass the configuration to spect_gen
    spect_gen(config)

elif arguments['view']:
    # Preprocess the file with the defaults
    # -> optionally write image to a file
    view(arguments['<label>'], arguments['<image>'], arguments['--segments'],
         config)

elif arguments['model_fit']:
    # Using defined algorithm, create model
    model_fit(config, arguments['--rerun_statistics'])

elif arguments['predict']:
    # Make a prediction based on a model
    predict(config)

elif arguments['init']:
    # Initialize INI section
    init(config)
	def process_item(self, item, spider):

		sentiment_dictionary = predict.predict(item)
		return sentiment_dictionary
Пример #11
0
            print("  * (8) for Bar plot visualization of Clusters")
            print("  * (9) for Pie plot visualization of Clusters")

        choice = int(input("Enter action: "))

        if choice == 1:  # Previewing the records - Plots a basic view
            preview.preview(csv_file)

        elif choice == 2:  # Training data
            train()

        elif choice == 3:  # Exiting Program
            break

        elif choice == 4:  # Predict Cluster of Student
            predict.predict(CLUSTERS)

        elif choice == 5:  # Plots more detailed preview
            preview.rich_preview(CLUSTERS)

        elif choice == 6:  # Naming Clusters
            identify.name_clusters(CLUSTERS, NAMED.get())
            NAMED.set(True)

        elif choice == 7:  # Plots a basic View
            pt.view(CLUSTERS)

        elif choice == 8:  # Plots a bar graph representation
            preview.bar_plot(CLUSTERS)

        elif choice == 9:  # Plots a pie chart representation