def getSRARuns(orgname, platform): """Return a list of run accessions for an organism given a sequencing platform.""" assert platform in ['454', 'Illumina'] term = "%s[Organism] AND transcriptom*" % orgname search_params = {'db':'sra', 'term':term, 'usehistory':'y'} root = ElementTree.XML(getURL(ESEARCH_URL, search_params)) result_count = root.findtext("Count") if int(result_count) == 0: return [] search_params['WebEnv'] = root.findtext('WebEnv') search_params['retmax'] = 20 runs = [] sraxmlparser = SRAXMLParser() for i in range(0, int(result_count), search_params['retmax']): search_params['retstart'] = str(i) root = ElementTree.XML(getURL(ESEARCH_URL, search_params)) idlist = [] for idelement in root.findall('IdList/Id'): idlist.append(idelement.text.strip()) fetch_params = {'db':'sra', 'id':','.join(idlist)} sraruns = sraxmlparser.parseFromString(getURL(EFETCH_URL, fetch_params)) for srarun in sraruns: if srarun.platform == platform: runs.append(srarun) return runs
def isPaired(srafetchxml, accession): """Check if the run with accession is paired.""" if accession and \ srafetchxml and \ os.path.exists(srafetchxml): sraxmlparser = SRAXMLParser() runs = sraxmlparser.parse(srafetchxml) for run in runs: if run.accession.strip() == accession.strip(): return run.is_paired return False
def getRunsByPlatform(cf): """Write the run accessions from a particular platform to a file.""" platform = cf.get_parameter('platform', 'string') if not platform in ['454', 'Illumina']: cf.write_error("Unknown sequencing platform %s" % platform) return constants.GENERIC_ERROR srafetchxml = cf.get_input('srafetchxml') srarunlist = cf.get_output('srarunlist') sraxmlparser = SRAXMLParser() runs = sraxmlparser.parse(srafetchxml) writer = csv.writer(open(srarunlist, 'wb'), quoting=csv.QUOTE_NONE) writer.writerow(['NCBISRARunID']) num_accessions = 0 for run in runs: if run.platform == platform: writer.writerow([run.accession]) num_accessions += 1 cf.write_log("GetRunsByPlatform: wrote %s run accessions" % num_accessions) return constants.OK
def getExpAccessions(cf): """Write the experiment accessions to a file.""" platform = cf.get_parameter('platform') srafetchxml = cf.get_input('srafetchxml') sraexplist = cf.get_output('sraexplist') sraxmlparser = SRAXMLParser() runs = sraxmlparser.parse(srafetchxml) writer = csv.writer(open(sraexplist, 'wb'), quoting=csv.QUOTE_NONE) writer.writerow(['NCBISRAExpID']) accessions = [] for run in runs: if platform and \ not run.platform == platform: continue elif not run.exp_accession in accessions: writer.writerow([run.exp_accession]) accessions.append(run.exp_accession) cf.write_log("GetExpAccessions: wrote %s experiment accessions" % len(accessions)) return constants.OK
def getRunsByExp(cf): """Write the runs accessions for a given experiment to a file.""" exp_accession = cf.get_parameter('accession', 'string') platform = cf.get_parameter('platform', 'string') srafetchxml = cf.get_input('srafetchxml') srarunlist = cf.get_output('srarunlist') sraxmlparser = SRAXMLParser() runs = sraxmlparser.parse(srafetchxml) writer = csv.writer(open(srarunlist, 'wb'), quoting=csv.QUOTE_NONE) writer.writerow(['NCBISRARunID']) index = 0 for run in runs: if platform and \ not run.platform == platform: continue elif run.exp_accession == exp_accession: writer.writerow([run.accession]) index += 1 cf.write_log("GetRunsByExp: wrote %s run accessions" % index) return constants.OK