filename = os.path.join(tmp_path, "gene_association." + org + ".tar.gz") obiGO.Annotations.DownloadAnnotations(org, filename) ## Load the annotations to test them and collect all taxon ids from them print filename a = obiGO.Annotations(filename, genematcher=obiGene.GMDirect()) taxons = set([ann.Taxon for ann in a.annotations]) ## exclude taxons with cardinality 2 taxons = [tax for tax in taxons if "|" not in tax] for tax in taxons: taxid = tax.split(":", 1)[-1] updatedTaxonomy[taxid].add(org) del a orgName = obiTaxonomy.name(commonOrgs[org]) taxid = obiTaxonomy.taxname_to_taxid(orgName) print "Uploading", "gene_association." + org + ".tar.gz" sf_server.upload( "GO", "gene_association." + org + ".tar.gz", filename, title="GO Annotations for " + orgName, tags=["gene", "annotation", "ontology", "GO", orgName, "#uncompressed:%i" % uncompressedSize(filename), "#organism:" + orgName, "#version:%i" % obiGO.Annotations.version] + (["essential"] if org in essentialOrgs else []) + obiTaxonomy.shortname(taxid) ) sf_server.unprotect("GO", "gene_association." + org + ".tar.gz")
for hi in hist: if any(hi.startswith(id + "\t") for id in taxids): history[hi.split("\t", 1)[0]].append(hi.strip()) for taxid, genes in genes.items(): filename = os.path.join(tmpdir, "gene_info.%s.db" % taxid) f = open(filename, "wb") f.write("\n".join(genes)) f.flush() f.close() print "Uploading", filename sf_server.upload( "NCBI_geneinfo", "gene_info.%s.db" % taxid, filename, title="NCBI gene info for %s" % obiTaxonomy.name(taxid), tags=["NCBI", "gene info", "gene_names", obiTaxonomy.name(taxid)] + obiTaxonomy.shortname(taxid) + (["essential"] if taxid in essential else [])) sf_server.unprotect("NCBI_geneinfo", "gene_info.%s.db" % taxid) filename = os.path.join(tmpdir, "gene_history.%s.db" % taxid) f = open(filename, "wb") f.write("\n".join(history.get(taxid, ""))) f.flush() f.close() print "Uploading", filename sf_server.upload("NCBI_geneinfo", "gene_history.%s.db" % taxid, filename, title="NCBI gene history for %s" %
############################################################################################################################################################## ############################################################################################################################################################## path = os.path.join(environ.buffer_dir, "tmp_miRNA") print 'path: ', path serverFiles = sf_server try: os.mkdir(path) except OSError: pass org_taxo = [tax.name(id) for id in tax.common_taxids()] ### targets library from TargetScan try: tarscan_url = 'http://www.targetscan.org//vert_50//vert_50_data_download/Conserved_Site_Context_Scores.txt.zip' zf = zipfile.ZipFile(StringIO.StringIO(urllib.urlopen(tarscan_url).read())) arch = zf.read(zf.namelist()[0]).splitlines()[1:] arch.pop() mirnas = [a.split('\t')[3] for a in arch] gene_ids = [a.split('\t')[1] for a in arch] TargetScanLib = {} for m, t in zip(mirnas, gene_ids): if not (m in TargetScanLib):
############################################################################################################################################################## ############################################################################################################################################################## path = os.path.join(environ.buffer_dir, "tmp_miRNA") print 'path: ', path serverFiles = sf_server try: os.mkdir(path) except OSError: pass org_taxo = [tax.name(id) for id in tax.common_taxids()] ### targets library from TargetScan try: tarscan_url = 'http://www.targetscan.org//vert_50//vert_50_data_download/Conserved_Site_Context_Scores.txt.zip' zf = zipfile.ZipFile(StringIO(urllib.urlopen(tarscan_url).read())) arch = zf.read(zf.namelist()[0]).splitlines()[1:] arch.pop() mirnas = [a.split('\t')[3] for a in arch] gene_ids = [a.split('\t')[1] for a in arch] TargetScanLib = {} for m,t in zip(mirnas,gene_ids): if not(m in TargetScanLib):
if any(gi.startswith(id + "\t") for id in taxids): genes[gi.split("\t", 1)[0]].append(gi.strip()) history = dict([(taxid, []) for taxid in taxids]) for hi in hist: if any(hi.startswith(id + "\t") for id in taxids): history[hi.split("\t", 1)[0]].append(hi.strip()) for taxid, genes in genes.items(): filename = os.path.join(tmpdir, "gene_info.%s.db" % taxid) f = open(filename, "wb") f.write("\n".join(genes)) f.flush() f.close() print "Uploading", filename sf_server.upload("NCBI_geneinfo", "gene_info.%s.db" % taxid, filename, title = "NCBI gene info for %s" % obiTaxonomy.name(taxid), tags = ["NCBI", "gene info", "gene_names", obiTaxonomy.name(taxid)] + obiTaxonomy.shortname(taxid) + (["essential"] if taxid in essential else [])) sf_server.unprotect("NCBI_geneinfo", "gene_info.%s.db" % taxid) filename = os.path.join(tmpdir, "gene_history.%s.db" % taxid) f = open(filename, "wb") f.write("\n".join(history.get(taxid, ""))) f.flush() f.close() print "Uploading", filename sf_server.upload("NCBI_geneinfo", "gene_history.%s.db" % taxid, filename, title = "NCBI gene history for %s" % obiTaxonomy.name(taxid), tags = ["NCBI", "gene info", "history", "gene_names", obiTaxonomy.name(taxid)] + obiTaxonomy.shortname(taxid) + (["essential"] if taxid in essential else [])) sf_server.unprotect("NCBI_geneinfo", "gene_history.%s.db" % taxid)