Exemplo n.º 1
0
def run():
    model_state_path = "/home/ryan/code/nanopore_assembly/output/training_2018-8-30-14-31-58-3-242/model_checkpoint_4"
    # directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_chr1_full/test"   # spoa 2 pass variants excluded
    directory = "/home/ryan/code/nanopore_assembly/output/chr1_800k_2500_windows/test"  # spoa 2 pass arbitray region 2500 windows

    file_paths = FileManager.get_all_file_paths_by_type(
        parent_directory_path=directory, file_extension=".npz", sort=False)

    # Architecture parameters
    hidden_size = 16
    input_channels = 1  # 1-dimensional signal
    output_size = 5  # '-','A','C','T','G' one hot vector
    n_layers = 3

    # Hyperparameters
    dropout_rate = 0

    # Training parameters
    batch_size_train = 1

    checkpoint_interval = 300

    data_loader = DataLoader(file_paths=file_paths,
                             batch_size=batch_size_train)

    model = Decoder(hidden_size=hidden_size,
                    input_size=input_channels,
                    output_size=output_size,
                    n_layers=n_layers,
                    dropout_rate=dropout_rate)
    model.load_state_dict(torch.load(model_state_path))

    # consensus_caller = ConsensusCaller(sequence_to_index, sequence_to_float)

    test(model=model, data_loader=data_loader)
Exemplo n.º 2
0
def run():
    model_state_path = "/home/ryan/code/nanopore_assembly/output/training_2018-9-14-14-17-3-4-257/model_checkpoint_5"
    directory = "/home/ryan/code/nanopore_assembly/output/chr1_800k-1200k_standard_20width/chr1/test"     # no variants, chr1:1200k 200 window test

    file_paths = FileManager.get_all_file_paths_by_type(parent_directory_path=directory, file_extension=".npz", sort=False)

    # Architecture parameters
    hidden_size = 128
    input_channels = 1      # 1-dimensional signal
    output_size = 5         # '-','A','C','T','G' one hot vector
    n_layers = 1

    # Hyperparameters
    dropout_rate = 0.1

    # Training parameters
    batch_size_train = 1
    n_batches = 8000

    checkpoint_interval = 1000

    data_loader = DataLoader(file_paths=file_paths, batch_size=batch_size_train)

    model = EncoderDecoder(hidden_size=hidden_size, input_size=input_channels, output_size=output_size, n_layers=n_layers, dropout_rate=dropout_rate)
    model.load_state_dict(torch.load(model_state_path))

    test(model=model, data_loader=data_loader)

    consensus_caller = ConsensusCaller(sequence_to_float=sequence_to_float, sequence_to_index=sequence_to_index)

    test_consensus(consensus_caller=consensus_caller, data_loader=data_loader)
Exemplo n.º 3
0
def run():
    # model_state_path = "/home/ryan/code/nanopore_assembly/output/training_2018-8-28-17-13-26-1-240/model_checkpoint_15"
    # model_state_path = "/home/ryan/code/nanopore_assembly/output/training_2018-8-29-12-11-15-2-241/model_checkpoint_21"
    # model_state_path = "/home/ryan/code/nanopore_assembly/output/training_2018-8-30-11-49-32-3-242/model_checkpoint_43"
    # directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_chr1_full/test"   # spoa 2 pass variants excluded
    # directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_2018-9-4-17-30-38-1-247"   # arbitrary 2500 window test region
    directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_celegans_chr1_1mbp_NONRUNLENGTH_2018-9-19"  # c elegans
    # directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_human_chr1_1mbp_NONRUNLENGTH_2018-9-18"      # human

    file_paths = FileManager.get_all_file_paths_by_type(
        parent_directory_path=directory, file_extension=".npz", sort=False)

    # Architecture parameters
    hidden_size = 16
    input_channels = 1  # 1-dimensional signal
    output_size = 5  # '-','A','C','T','G' one hot vector
    n_layers = 3

    # Hyperparameters
    dropout_rate = 0

    # Training parameters
    batch_size_train = 1
    n_batches = 5000

    checkpoint_interval = 300

    data_loader = DataLoader(file_paths=file_paths,
                             batch_size=batch_size_train)

    # model = EncoderDecoder(hidden_size=hidden_size, input_size=input_channels, output_size=output_size, n_layers=n_layers, dropout_rate=dropout_rate)
    # model.load_state_dict(torch.load(model_state_path))

    consensus_caller = ConsensusCaller(sequence_to_index, sequence_to_float)

    # test(model=model, data_loader=data_loader)

    print(n_batches, len(data_loader))
    test_consensus(consensus_caller=consensus_caller,
                   data_loader=data_loader,
                   n_batches=n_batches)
Exemplo n.º 4
0
def run():
    model_state_path = "/home/ryan/code/nanopore_assembly/output/simple_rnn_2_layer_32_hidden_celegans/model_checkpoint_10"
    # directory = "/home/ryan/code/nanopore_assembly/output/chr1_800k-1200k_standard_20width/chr1/test"     # no variants, chr1:1200k 200 window test
    directory = "/home/ryan/code/nanopore_assembly/output/celegans_250_window_test_1m"  # celegans 50 windows

    file_paths = FileManager.get_all_file_paths_by_type(
        parent_directory_path=directory, file_extension=".npz", sort=False)

    # Architecture parameters
    hidden_size = 32
    input_channels = 5  # 1-dimensional signal
    output_size = 5  # '-','A','C','T','G' one hot vector
    n_layers = 2

    # Hyperparameters
    dropout_rate = 0.1

    # Training parameters
    batch_size_train = 1
    n_batches = 8000

    checkpoint_interval = 1000

    data_loader = DataLoader(file_paths=file_paths,
                             batch_size=batch_size_train,
                             convert_to_frequency=True)

    model = Decoder(hidden_size=hidden_size,
                    input_size=input_channels,
                    output_size=output_size,
                    n_layers=n_layers,
                    dropout_rate=dropout_rate)
    model.load_state_dict(torch.load(model_state_path))

    test(model=model, data_loader=data_loader)

    consensus_caller = ConsensusCaller(sequence_to_float=sequence_to_float,
                                       sequence_to_index=sequence_to_index)

    test_consensus(consensus_caller=consensus_caller, data_loader=data_loader)
Exemplo n.º 5
0
def run(load_model=False, model_state_path=None):
    directory = "/home/ryan/code/nanopore_assembly/output/chr1_800k-1200k_standard_20width/chr1/train"  # spoa 2 pass arbitray region 2500 windows

    file_paths = FileManager.get_all_file_paths_by_type(
        parent_directory_path=directory, file_extension=".npz", sort=False)

    results_handler = ResultsHandler()

    # Architecture parameters
    hidden_size = 128
    input_channels = 1  # 1-dimensional signal
    output_size = 5  # '-','A','C','T','G' one hot vector
    n_layers = 1

    # Hyperparameters
    learning_rate = 1e-3
    weight_decay = 1e-5
    dropout_rate = 0.1

    # Training parameters
    batch_size_train = 1
    n_batches = None

    checkpoint_interval = 1000

    data_loader = DataLoader(file_paths=file_paths,
                             batch_size=batch_size_train)
    model = EncoderDecoder(hidden_size=hidden_size,
                           input_size=input_channels,
                           output_size=output_size,
                           n_layers=n_layers,
                           dropout_rate=dropout_rate)

    # Initialize the optimizer with above parameters
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=weight_decay)

    # Define the loss function
    # loss_fn = nn.MSELoss()
    loss_fn = nn.CrossEntropyLoss()

    if load_model:
        # get weight parameters from saved model state
        model.load_state_dict(torch.load(model_state_path))

    # Train and get the resulting loss per iteration
    losses = train(model=model,
                   data_loader=data_loader,
                   optimizer=optimizer,
                   loss_fn=loss_fn,
                   n_batches=n_batches,
                   results_handler=results_handler,
                   checkpoint_interval=checkpoint_interval)

    # test(model=model,
    #      data_loader=data_loader,
    #      n_batches=4)

    results_handler.save_model(model)
    results_handler.save_plot(losses)

    print(model)
Exemplo n.º 6
0
def run(load_model=False, model_state_path=None):
    # directory = "/home/ryan/code/nanopore_assembly/output/pileup_generation_2018-8-24-12-54-20-4-236"       # poapy
    # directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_2018-8-27-13-51-41-0-239"  # spoa
    # directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_2018-8-27-16-13-23-0-239"  # spoa with 2 pass alignment
    directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_chr1_full/train"  # spoa 2 pass variants excluded

    file_paths = FileManager.get_all_file_paths_by_type(
        parent_directory_path=directory, file_extension=".npz", sort=False)

    results_handler = ResultsHandler()

    # Architecture parameters
    hidden_size = 16
    input_channels = 1  # 1-dimensional signal
    output_size = 5  # '-','A','C','T','G' one hot vector
    n_layers = 3

    # Hyperparameters
    learning_rate = 1e-3
    weight_decay = 0
    dropout_rate = 0

    # Training parameters
    batch_size_train = 1
    n_batches = 8000

    checkpoint_interval = 1000

    data_loader = DataLoader(file_paths=file_paths,
                             batch_size=batch_size_train)
    model = EncoderDecoder(hidden_size=hidden_size,
                           input_size=input_channels,
                           output_size=output_size,
                           n_layers=n_layers,
                           dropout_rate=dropout_rate)

    # Initialize the optimizer with above parameters
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=weight_decay)

    # Define the loss function
    # loss_fn = nn.MSELoss()
    loss_fn = nn.CrossEntropyLoss()

    if load_model:
        # get weight parameters from saved model state
        model.load_state_dict(torch.load(model_state_path))

    # Train and get the resulting loss per iteration
    losses = train(model=model,
                   data_loader=data_loader,
                   optimizer=optimizer,
                   loss_fn=loss_fn,
                   n_batches=n_batches,
                   results_handler=results_handler,
                   checkpoint_interval=checkpoint_interval)

    # test(model=model,
    #      data_loader=data_loader,
    #      n_batches=4)

    results_handler.save_model(model)
    results_handler.save_plot(losses)

    print(model)