def search_read_group_sets(request): """ Mock function """ read_group_set_list = [] for i in xrange(10): read_group_set = protocol.ReadGroupSet() read_group_set.id = str(i) read_group_set.name = "Hi there" read_group_set_list.append(read_group_set) return (read_group_set_list, "somepagetoken")
def search_read_group_sets(request): ncbi_bioproject_id = request.dataset_id page_size = 100 # Default page size if request.page_size != 0: page_size = request.page_size esearch_params = { 'db': 'sra', 'dbfrom': 'bioproject', 'id': ncbi_bioproject_id, 'term': 'all[filter]' } esearch_response = requests.get( "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi", esearch_params) # parse xml response: get SRA IDs ids = [] root = ET.fromstring(esearch_response.text) for id in root.findall("./LinkSet/LinkSetDb"): if (id.find("LinkName").text == "bioproject_sra_all"): for sra in id.findall("./Link/Id"): ids.append(sra.text) # === get all data for these SRAs === readgroupsets = [] while (len(ids)): readgroupset = protocol.ReadGroupSet() # e.g., https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=sra&id=3543186,3543185,3543183 sra_ids = ids[:page_size] ids = ids[page_size:] esearch_params = {'db': 'sra', 'id': ','.join(sra_ids)} esearch_response = requests.get( "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi", esearch_params) # parse xml response: get relevant data for these SRAs for child in ET.fromstring(esearch_response.text): readgroup = readgroupset.read_groups.add() for pid in child.findall("./SUBMISSION/IDENTIFIERS/PRIMARY_ID"): readgroup.dataset_id = pid.text for pid in child.findall("./RUN_SET/RUN/IDENTIFIERS/PRIMARY_ID"): readgroup.id = pid.text for eid in child.findall( "./RUN_SET/RUN/Pool/Member/IDENTIFIERS/EXTERNAL_ID"): readgroup.biosample_id = eid.text for node in child.findall("./RUN_SET/RUN"): if (node.attrib.has_key('assembly')): readgroup.reference_set_id = node.attrib['assembly'] readgroupsets.append(readgroup) return readgroupsets
def toProtocolElement(self): """ Returns the GA4GH protocol representation of this ReadGroupSet. """ readGroupSet = protocol.ReadGroupSet() readGroupSet.id = self.getId() readGroupSet.read_groups.extend([ readGroup.toProtocolElement() for readGroup in self.getReadGroups() ]) readGroupSet.name = self.getLocalId() readGroupSet.dataset_id = self.getParentContainer().getId() readGroupSet.stats.CopyFrom(self.getStats()) self.serializeAttributes(readGroupSet) return readGroupSet