def runGetSeqENA(args): start_time = time.time() listENA_IDs = utils.getListIDs(os.path.abspath(args.listENAids.name)) outdir = os.path.abspath(args.outdir) utils.check_create_directory(outdir) asperaKey = args.asperaKey if asperaKey is not None: asperaKey = os.path.abspath(asperaKey.name) # Start logger logfile = utils.start_logger(outdir) # Get general information utils.general_information(logfile, version) # Check programms requiredPrograms(args) runs_successfully = 0 with open(os.path.join(outdir, 'getSeqENA.report.txt'), 'wt') as writer: header_sequencing = ['run_accession', 'instrument_platform', 'instrument_model', 'library_layout', 'library_source', 'extra_run_accession', 'nominal_length', 'read_count', 'base_count', 'date_download'] writer.write('#sample' + '\t' + '\t'.join(header_sequencing) + '\n') for ena_id in listENA_IDs: if args.maximumSamples is None: maximumSamples = runs_successfully + 1 else: maximumSamples = args.maximumSamples if runs_successfully < maximumSamples: print '\n' + 'Download ENA_ID ' + ena_id ena_id_folder = os.path.join(outdir, ena_id) utils.check_create_directory(ena_id_folder) sequencingInformation = {'run_accession': None, 'instrument_platform': None, 'instrument_model': None, 'library_layout': None, 'library_source': None, 'extra_run_accession': None, 'nominal_length': None, 'read_count': None, 'base_count': None, 'date_download': None} time_taken, run_successfully, fastq_files, sequencingInformation = download.run_download(ena_id, args.downloadLibrariesType, asperaKey, ena_id_folder, args.downloadCramBam, args.threads, args.downloadInstrumentPlatform, args.SRA, args.SRAopt) if run_successfully: runs_successfully += 1 else: utils.removeDirectory(ena_id_folder) print ena_id + ' was not downloaded' writer.write(ena_id + '\t' + '\t'.join([str(sequencingInformation[i]) for i in header_sequencing]) + '\n') else: break time_taken = utils.runTime(start_time) del time_taken if runs_successfully == 0: sys.exit('No ENA_IDs were successfully downloaded!')
def downloadAndINNUca(outdir, run_ID, asperaKey, threads): start_time = time.time() temp_file = os.path.join(outdir, run_ID + '.temp.runID_fileList.txt') with open(temp_file, 'wt') as writer: writer.write(run_ID + '\n') command = [ 'getSeqENA.py', '-l', temp_file, '-o', outdir, '-a', asperaKey, '--downloadLibrariesType', 'PE' ] getSeqENA_run_successfully, stdout, stderr = utils.runCommandPopenCommunicate( command, False, None) os.remove(temp_file) sample_directory = os.path.join(outdir, run_ID, '') innuca_run_successfully = False if getSeqENA_run_successfully: command = [ 'INNUca.py', '-i', sample_directory, '-s', '"Campylobacter jejuni"', '-g', '1.6', '-o', sample_directory, '-j', str(threads), '--jarMaxMemory', 'auto' ] innuca_run_successfully, stdout, stderr = utils.runCommandPopenCommunicate( command, False, None) innuca_dir = os.path.join(sample_directory, run_ID, '') files = [ f for f in os.listdir(innuca_dir) if not f.startswith('.') and os.path.isfile(os.path.join(innuca_dir, f)) ] for file_innuca in files: shutil.move(os.path.join(innuca_dir, file_innuca), os.path.join(sample_directory, file_innuca)) utils.removeDirectory(innuca_dir) removeFiles(sample_directory, '.gz') removeFiles(sample_directory, '.log') removeFiles(sample_directory, '.cpu.txt') if innuca_run_successfully: time_taken = utils.runTime(start_time) utils.saveVariableToPickle(time_taken, sample_directory, run_ID + '_downloadAndINNUca_time') utils.saveVariableToPickle(innuca_run_successfully, sample_directory, run_ID + '_run_successfully')
def train(env, nets, replayBuffer, batch_size, episodes_train, episodes_test, startTime): try: profits = [] epsilon = 1.0 decayEpsilon = 0.99 optimizers = [] for net in nets: optimizers.append(optim.Adam(net.parameters())) for episode in range(episodes_train + 1): epsilon *= decayEpsilon profit = 0 episode_loss = 0 current_state = utils.State(0, np.zeros(N)) criterion = torch.nn.MSELoss() for t in range(1, T + 2): if t == T + 1: qOld = stateValue(env, nets, t, current_state) action = utils.Action(T + 1, np.zeros(N)) reward = env.step(t, current_state, action) profit += reward qNew = torch.Tensor([reward]) loss = criterion(qOld, qNew) episode_loss += loss optimizers[t - 1].zero_grad() loss.backward() optimizers[t - 1].step() else: ''' action = act(env, nets, t, current_state, epsilon) next_state, reward, done = env.step(t, current_state, action) profit += reward qOld = actionValue(nets, t, current_state, action) qNew = torch.Tensor([reward]) + stateValue(env, nets, t + 1, next_state) loss = criterion(qOld, qNew) episode_loss += loss optimizers[t - 1].zero_grad() loss.backward() optimizers[t - 1].step() current_state = next_state ''' action = act(env, nets, t, current_state, epsilon) next_state, reward, done = env.step( t, current_state, action) profit += reward replayBuffer.push(t, current_state, action, next_state, reward, done) current_state = next_state if replayBuffer.len(t) > batch_size: current_states, actions, next_states, rewards, dones = replayBuffer.sample( t, batch_size) qOld = [] qNew = [] for i in range(batch_size): qOld.append( actionValue(nets, t, current_states[i], actions[i])) qNew.append( torch.Tensor([rewards[i]]) + stateValue(env, nets, t + 1, next_states[i])) qOld = torch.stack(qOld, 0) qNew = torch.stack(qNew, 0) loss = criterion(qOld, qNew) episode_loss += loss optimizers[t - 1].zero_grad() loss.backward() optimizers[t - 1].step() profits.append(profit) if episode % 100 == 0: print('episode = {} \t time = {:.2f} \t loss = {:.2f} \t average training profit = {} \t average testing profit = {}'\ .format(episode, utils.runTime(startTime), episode_loss, \ np.mean(profits), test(env, nets, episodes_test, 0)), flush = True) profits = [] epsilon = 1.0 except: utils.printErrorAndExit('train')
def runCampyGenomes(args): start_time = time.time() listRunIDs = utils.getListIDs(os.path.abspath(args.listRunIDs.name)) outdir = os.path.abspath(args.outdir) utils.check_create_directory(outdir) asperaKey = args.asperaKey.name threads_to_use = [j for j in general_threads_to_use if j <= args.threads] # Start logger logfile, time_str = utils.start_logger(outdir) # Get general information utils.general_information(logfile, version, outdir, time_str) # Check programms requiredPrograms() # Randomize the list with Run IDs random.shuffle(listRunIDs) number_process = determineNumberProcess(threads_to_use) samples_each_threads = determineBatchSamples(listRunIDs, threads_to_use) run_successfully = 0 with open( os.path.join(outdir, 'samples_with_problems.' + time_str + '.tab'), 'wt') as writer_success: with open(os.path.join(outdir, 'running_times.' + time_str + '.tab'), 'wt') as writer_times: for threads in samples_each_threads: print '\n' + 'Running for ' + str(threads) + ' threads' + '\n' threads_dir = os.path.join(outdir, str(threads) + '_threads', '') utils.check_create_directory(threads_dir) pool = multiprocessing.Pool(processes=number_process[threads]) for sample in samples_each_threads[threads]: pool.apply_async(downloadAndINNUca, args=( threads_dir, sample, asperaKey, threads, )) pool.close() pool.join() removeFiles(threads_dir, '.log') removeFiles(threads_dir, 'getSeqENA.samples_with_problems.txt') removeFiles(threads_dir, '.cpu.txt') samples_directories = [ d for d in os.listdir(threads_dir) if not d.startswith('.') and os.path.isdir(os.path.join(threads_dir, d, '')) ] for sample_dir in samples_directories: sample_dir_path = os.path.join(threads_dir, sample_dir, '') files = [ f for f in os.listdir(sample_dir_path) if not f.startswith('.') and os.path.isfile(os.path.join(sample_dir_path, f)) ] for file_found in files: file_path = os.path.join(sample_dir_path, file_found) if file_found == sample_dir + '_run_successfully.pkl': sample_run_successfully = utils.extractVariableFromPickle( file_path) if not sample_run_successfully: writer_success.write(sample_dir + '\t' + threads_dir + '\n') else: run_successfully += 1 os.remove(file_path) elif file_found == sample_dir + '_downloadAndINNUca_time.pkl': time_taken = utils.extractVariableFromPickle( file_path) writer_times.write(sample_dir + '\t' + threads_dir + '\t' + str(time_taken) + '\n') os.remove(file_path) time_taken = utils.runTime(start_time) del time_taken if run_successfully == 0: sys.exit('No RunIDs were successfully run!') else: print str(run_successfully) + ' samples out of ' + str( len(listRunIDs)) + ' run successfully'