Пример #1
0
def runGetSeqENA(args):
    start_time = time.time()

    listENA_IDs = utils.getListIDs(os.path.abspath(args.listENAids.name))
    outdir = os.path.abspath(args.outdir)
    utils.check_create_directory(outdir)
    asperaKey = args.asperaKey
    if asperaKey is not None:
        asperaKey = os.path.abspath(asperaKey.name)

    # Start logger
    logfile = utils.start_logger(outdir)

    # Get general information
    utils.general_information(logfile, version)

    # Check programms
    requiredPrograms(args)

    runs_successfully = 0
    with open(os.path.join(outdir, 'getSeqENA.report.txt'), 'wt') as writer:
        header_sequencing = ['run_accession', 'instrument_platform', 'instrument_model', 'library_layout', 'library_source', 'extra_run_accession', 'nominal_length', 'read_count', 'base_count', 'date_download']
        writer.write('#sample' + '\t' + '\t'.join(header_sequencing) + '\n')
        for ena_id in listENA_IDs:
            if args.maximumSamples is None:
                maximumSamples = runs_successfully + 1
            else:
                maximumSamples = args.maximumSamples

            if runs_successfully < maximumSamples:
                print '\n' + 'Download ENA_ID ' + ena_id

                ena_id_folder = os.path.join(outdir, ena_id)
                utils.check_create_directory(ena_id_folder)

                sequencingInformation = {'run_accession': None, 'instrument_platform': None, 'instrument_model': None, 'library_layout': None, 'library_source': None, 'extra_run_accession': None, 'nominal_length': None, 'read_count': None, 'base_count': None, 'date_download': None}
                time_taken, run_successfully, fastq_files, sequencingInformation = download.run_download(ena_id, args.downloadLibrariesType, asperaKey, ena_id_folder, args.downloadCramBam, args.threads, args.downloadInstrumentPlatform, args.SRA, args.SRAopt)

                if run_successfully:
                    runs_successfully += 1
                else:
                    utils.removeDirectory(ena_id_folder)
                    print ena_id + ' was not downloaded'

                writer.write(ena_id + '\t' + '\t'.join([str(sequencingInformation[i]) for i in header_sequencing]) + '\n')
            else:
                    break

    time_taken = utils.runTime(start_time)
    del time_taken

    if runs_successfully == 0:
        sys.exit('No ENA_IDs were successfully downloaded!')
Пример #2
0
def downloadAndINNUca(outdir, run_ID, asperaKey, threads):
    start_time = time.time()
    temp_file = os.path.join(outdir, run_ID + '.temp.runID_fileList.txt')
    with open(temp_file, 'wt') as writer:
        writer.write(run_ID + '\n')

    command = [
        'getSeqENA.py', '-l', temp_file, '-o', outdir, '-a', asperaKey,
        '--downloadLibrariesType', 'PE'
    ]
    getSeqENA_run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
        command, False, None)

    os.remove(temp_file)

    sample_directory = os.path.join(outdir, run_ID, '')

    innuca_run_successfully = False
    if getSeqENA_run_successfully:
        command = [
            'INNUca.py', '-i', sample_directory, '-s',
            '"Campylobacter jejuni"', '-g', '1.6', '-o', sample_directory,
            '-j',
            str(threads), '--jarMaxMemory', 'auto'
        ]
        innuca_run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(
            command, False, None)

        innuca_dir = os.path.join(sample_directory, run_ID, '')
        files = [
            f for f in os.listdir(innuca_dir) if not f.startswith('.')
            and os.path.isfile(os.path.join(innuca_dir, f))
        ]
        for file_innuca in files:
            shutil.move(os.path.join(innuca_dir, file_innuca),
                        os.path.join(sample_directory, file_innuca))
        utils.removeDirectory(innuca_dir)

    removeFiles(sample_directory, '.gz')
    removeFiles(sample_directory, '.log')
    removeFiles(sample_directory, '.cpu.txt')

    if innuca_run_successfully:
        time_taken = utils.runTime(start_time)
        utils.saveVariableToPickle(time_taken, sample_directory,
                                   run_ID + '_downloadAndINNUca_time')

    utils.saveVariableToPickle(innuca_run_successfully, sample_directory,
                               run_ID + '_run_successfully')
Пример #3
0
def train(env, nets, replayBuffer, batch_size, episodes_train, episodes_test,
          startTime):
    try:
        profits = []
        epsilon = 1.0
        decayEpsilon = 0.99

        optimizers = []
        for net in nets:
            optimizers.append(optim.Adam(net.parameters()))

        for episode in range(episodes_train + 1):
            epsilon *= decayEpsilon
            profit = 0
            episode_loss = 0
            current_state = utils.State(0, np.zeros(N))
            criterion = torch.nn.MSELoss()

            for t in range(1, T + 2):
                if t == T + 1:
                    qOld = stateValue(env, nets, t, current_state)
                    action = utils.Action(T + 1, np.zeros(N))
                    reward = env.step(t, current_state, action)
                    profit += reward
                    qNew = torch.Tensor([reward])
                    loss = criterion(qOld, qNew)
                    episode_loss += loss
                    optimizers[t - 1].zero_grad()
                    loss.backward()
                    optimizers[t - 1].step()
                else:
                    '''
                    action = act(env, nets, t, current_state, epsilon)
                    next_state, reward, done = env.step(t, current_state, action)
                    profit += reward
                    qOld = actionValue(nets, t, current_state, action)
                    qNew = torch.Tensor([reward]) + stateValue(env, nets, t + 1, next_state)
                    loss = criterion(qOld, qNew)
                    episode_loss += loss
                    optimizers[t - 1].zero_grad()
                    loss.backward()
                    optimizers[t - 1].step()

                    current_state = next_state
                    '''
                    action = act(env, nets, t, current_state, epsilon)
                    next_state, reward, done = env.step(
                        t, current_state, action)
                    profit += reward
                    replayBuffer.push(t, current_state, action, next_state,
                                      reward, done)
                    current_state = next_state

                    if replayBuffer.len(t) > batch_size:
                        current_states, actions, next_states, rewards, dones = replayBuffer.sample(
                            t, batch_size)
                        qOld = []
                        qNew = []
                        for i in range(batch_size):
                            qOld.append(
                                actionValue(nets, t, current_states[i],
                                            actions[i]))
                            qNew.append(
                                torch.Tensor([rewards[i]]) +
                                stateValue(env, nets, t + 1, next_states[i]))
                        qOld = torch.stack(qOld, 0)
                        qNew = torch.stack(qNew, 0)
                        loss = criterion(qOld, qNew)
                        episode_loss += loss
                        optimizers[t - 1].zero_grad()
                        loss.backward()
                        optimizers[t - 1].step()
            profits.append(profit)
            if episode % 100 == 0:
                print('episode = {} \t time = {:.2f} \t loss = {:.2f} \t average training profit = {} \t average testing profit = {}'\
                          .format(episode, utils.runTime(startTime), episode_loss, \
                                  np.mean(profits), test(env, nets, episodes_test, 0)), flush = True)
                profits = []
                epsilon = 1.0
    except:
        utils.printErrorAndExit('train')
Пример #4
0
def runCampyGenomes(args):
    start_time = time.time()

    listRunIDs = utils.getListIDs(os.path.abspath(args.listRunIDs.name))
    outdir = os.path.abspath(args.outdir)
    utils.check_create_directory(outdir)
    asperaKey = args.asperaKey.name
    threads_to_use = [j for j in general_threads_to_use if j <= args.threads]

    # Start logger
    logfile, time_str = utils.start_logger(outdir)

    # Get general information
    utils.general_information(logfile, version, outdir, time_str)

    # Check programms
    requiredPrograms()

    # Randomize the list with Run IDs
    random.shuffle(listRunIDs)

    number_process = determineNumberProcess(threads_to_use)

    samples_each_threads = determineBatchSamples(listRunIDs, threads_to_use)

    run_successfully = 0
    with open(
            os.path.join(outdir, 'samples_with_problems.' + time_str + '.tab'),
            'wt') as writer_success:
        with open(os.path.join(outdir, 'running_times.' + time_str + '.tab'),
                  'wt') as writer_times:

            for threads in samples_each_threads:
                print '\n' + 'Running for ' + str(threads) + ' threads' + '\n'
                threads_dir = os.path.join(outdir,
                                           str(threads) + '_threads', '')
                utils.check_create_directory(threads_dir)

                pool = multiprocessing.Pool(processes=number_process[threads])
                for sample in samples_each_threads[threads]:
                    pool.apply_async(downloadAndINNUca,
                                     args=(
                                         threads_dir,
                                         sample,
                                         asperaKey,
                                         threads,
                                     ))
                pool.close()
                pool.join()

                removeFiles(threads_dir, '.log')
                removeFiles(threads_dir, 'getSeqENA.samples_with_problems.txt')
                removeFiles(threads_dir, '.cpu.txt')

                samples_directories = [
                    d for d in os.listdir(threads_dir) if not d.startswith('.')
                    and os.path.isdir(os.path.join(threads_dir, d, ''))
                ]
                for sample_dir in samples_directories:
                    sample_dir_path = os.path.join(threads_dir, sample_dir, '')

                    files = [
                        f for f in os.listdir(sample_dir_path)
                        if not f.startswith('.')
                        and os.path.isfile(os.path.join(sample_dir_path, f))
                    ]
                    for file_found in files:
                        file_path = os.path.join(sample_dir_path, file_found)
                        if file_found == sample_dir + '_run_successfully.pkl':
                            sample_run_successfully = utils.extractVariableFromPickle(
                                file_path)
                            if not sample_run_successfully:
                                writer_success.write(sample_dir + '\t' +
                                                     threads_dir + '\n')
                            else:
                                run_successfully += 1
                            os.remove(file_path)
                        elif file_found == sample_dir + '_downloadAndINNUca_time.pkl':
                            time_taken = utils.extractVariableFromPickle(
                                file_path)
                            writer_times.write(sample_dir + '\t' +
                                               threads_dir + '\t' +
                                               str(time_taken) + '\n')
                            os.remove(file_path)

    time_taken = utils.runTime(start_time)
    del time_taken

    if run_successfully == 0:
        sys.exit('No RunIDs were successfully run!')
    else:
        print str(run_successfully) + ' samples out of ' + str(
            len(listRunIDs)) + ' run successfully'