def get_data(gds_id, report_genes, transpose, sample_type, title): gds_ensure_downloaded(gds_id, progress) gds = geo.GDS(gds_id) data = gds.getdata(report_genes=report_genes, transpose=transpose, sample_type=sample_type) data.name = title return data
gds_names = [m.search(d).group(0) for d in dirlist if m.search(d)] gds_names = [ name for name in gds_names if not (name in gds_info or name in excluded) ] print('{} new files will be added!'.format(len(gds_names))) skipped = [] helper = SyncHelper(DOMAIN, GEOTest) if len(gds_names): for count, gds_name in enumerate(gds_names): print("%3d of %3d -- Adding %s ..." % (count + 1, len(gds_names), gds_name)) try: time.sleep(1) gds = geo.GDS(gds_name) if gds.info["taxid"] not in taxonomy.common_taxids(): excluded[gds_name] = gds.info["taxid"] print("... excluded (%s)." % gds.info["sample_organism"]) else: gds_info.update({gds_name: gds.info}) with open(localfile, 'wb') as f: pickle.dump((gds_info, excluded), f, True) f.close() print("... added.") except Exception as ex: print("... skipped (error):", str(ex)) skipped.append(gds_name) # update .info file create_info_file(localfile, title=TITLE, tags=TAGS)
import Orange from orangecontrib.bio import dicty, geneset, gsea, gene, geo gds = geo.GDS("GDS10") data = gds.getdata(transpose=True) matcher = gene.matcher([gene.GMKEGG("H**o sapiens")]) genesets = geneset.collections((("KEGG", ), "H**o sapiens")) #the number of permutations (n) should be much higher res = gsea.run(data, gene_sets=genesets, matcher=matcher, min_part=0.05, permutation="phenotype", n=10, phen_desc=data.domain["tissue"], gene_desc=True) print print "GSEA results (descriptor: tissue)" print "%-40s %6s %6s %6s %7s" % ("LABEL", "NES", "FDR", "SIZE", "MATCHED") for gs, resu in sorted(res.items(), key=lambda x: x[1]["fdr"])[:10]: print "%-40s %6.3f %6.3f %6d %7d" % (gs.name[:30], resu["nes"], resu["fdr"], resu["size"], resu["matched_size"])