def ProcessExperimentSeparate(experiment_id, json, batch_dir, sample_dir_id, preserve, failed_accession, skip_files=False): m = ExtractExperimentMetadata(experiment_id, json) if m.valid_metadata(): # Check if a run ID was submitted, and if so only process that if experiment_id in m.runIDs: m.runIDs = [experiment_id] # Process the runIDs as samples _logger.info("Found Following Runs: %s", ', '.join(m.runIDs)) for runid in m.runIDs: with TemporaryDirectory() as tmpdir: os.chdir(batch_dir) sample_dir = "%s/%s/"%(batch_dir, sample_dir_id) if os.path.exists(sample_dir): sfiles = [x for x in os.listdir(sample_dir) if any([y in x for y in ['fq','fastq']])] else: sfiles = [] if not preserve or not skip_files or len(sfiles) == 0: sfiles = DownloadRunFiles(runid, tmpdir) if sfiles is not None: success = CreateSampleDir(sfiles, m, sample_dir, preserve, skip_files) if success: sample_dir_id += 1 else: failed_accession.append(runid) else: _logger.error("Files could not be retrieved! (%s)", runid) failed_accession.append(runid) else: _logger.error("Metadata Invalid! (%s) - %s", experiment_id, m.metadata.items()) failed_accession.append(experiment_id) return sample_dir_id
def ProcessExperimentCombined(experiment_id, json, batch_dir, sample_dir_id, preserve, failed_accession, skip_files=False): m = ExtractExperimentMetadata(experiment_id, json) if m.valid_metadata(): # Check if a run ID was submitted, and if so only process that if experiment_id in m.runIDs: m.runIDs = [experiment_id] # Process the runs as one sample _logger.info("Found Following Runs: %s", ', '.join(m.runIDs)) with TemporaryDirectory() as tmpdir: os.chdir(batch_dir) sample_dir = "%s/%s/"%(batch_dir, sample_dir_id) csfiles = [] if preserve and os.path.exists(sample_dir): csfiles = [x for x in os.listdir(sample_dir) if any([y in x for y in ['fq','fastq']])] if csfiles == [] and not skip_files: sfiles = [] for runid in m.runIDs: sf = DownloadRunFiles(runid, tmpdir) if sf is not None: sfiles.append(sf) else: _logger.error("Run files could not be retrieved! (%s)", runid) _logger.info("Found Following files sets:\n%s\n", '\n'.join([', '.join(sf) for sf in sfiles])) # Combine sfiles into one entry if len(sfiles) > 1: for file_no, file_set in enumerate(zip(*sfiles)): ext = '.'.join(file_set[0].split('/')[-1].split('.')[1:]) if len(sfiles[0]) > 1: new_file = "%s_%s.combined.%s"%(experiment_id,file_no+1, ext) else: new_file = "%s.combined.%s"%(experiment_id, ext) with open(new_file, 'w') as nf: for fn in file_set: with open(fn, 'rb') as f: nf.write(f.read()) if os.path.exists(new_file): csfiles.append(new_file) else: _logger.error("Combined file creation failed! (%s: %s)", experiment_id, file_no) break elif isinstance(sfiles[0], list): csfiles = sfiles[0] if csfiles == []: _logger.error("Files could not be combined! (%s)", experiment_id) failed_accession.append(experiment_id) if csfiles != [] or skip_files: success = CreateSampleDir(csfiles, m, sample_dir, preserve, skip_files) if success: sample_dir_id += 1 else: failed_accession.append(experiment_id) else: _logger.error("Files could not be retrieved! (%s)", experiment_id) failed_accession.append(experiment_id) else: _logger.error("Metadata Invalid! (%s) - %s", experiment_id, m.metadata.items()) failed_accession.append(experiment_id) return sample_dir_id