示例#1
0
 def get_data(gds_id, report_genes, transpose, sample_type, title):
     gds_ensure_downloaded(gds_id, progress)
     gds = geo.GDS(gds_id)
     data = gds.getdata(report_genes=report_genes,
                        transpose=transpose,
                        sample_type=sample_type)
     data.name = title
     return data
示例#2
0
gds_names = [m.search(d).group(0) for d in dirlist if m.search(d)]
gds_names = [
    name for name in gds_names if not (name in gds_info or name in excluded)
]
print('{} new files will be added!'.format(len(gds_names)))
skipped = []

helper = SyncHelper(DOMAIN, GEOTest)

if len(gds_names):
    for count, gds_name in enumerate(gds_names):
        print("%3d of %3d -- Adding %s ..." %
              (count + 1, len(gds_names), gds_name))
        try:
            time.sleep(1)
            gds = geo.GDS(gds_name)
            if gds.info["taxid"] not in taxonomy.common_taxids():
                excluded[gds_name] = gds.info["taxid"]
                print("... excluded (%s)." % gds.info["sample_organism"])
            else:
                gds_info.update({gds_name: gds.info})
                with open(localfile, 'wb') as f:
                    pickle.dump((gds_info, excluded), f, True)
                    f.close()
                print("... added.")
        except Exception as ex:
            print("... skipped (error):", str(ex))
            skipped.append(gds_name)

    # update .info file
    create_info_file(localfile, title=TITLE, tags=TAGS)
示例#3
0
import Orange
from orangecontrib.bio import dicty, geneset, gsea, gene, geo

gds = geo.GDS("GDS10")
data = gds.getdata(transpose=True)

matcher = gene.matcher([gene.GMKEGG("H**o sapiens")])
genesets = geneset.collections((("KEGG", ), "H**o sapiens"))

#the number of permutations (n) should be much higher
res = gsea.run(data,
               gene_sets=genesets,
               matcher=matcher,
               min_part=0.05,
               permutation="phenotype",
               n=10,
               phen_desc=data.domain["tissue"],
               gene_desc=True)

print
print "GSEA results (descriptor: tissue)"
print "%-40s %6s %6s %6s %7s" % ("LABEL", "NES", "FDR", "SIZE", "MATCHED")
for gs, resu in sorted(res.items(), key=lambda x: x[1]["fdr"])[:10]:
    print "%-40s %6.3f %6.3f %6d %7d" % (gs.name[:30], resu["nes"],
                                         resu["fdr"], resu["size"],
                                         resu["matched_size"])