def __init__(self):
     self.test_name = args.test_name
     self.sample_sets = GMT(args.gene_sets)
     self.anno_list = GMT(args.annotation_list)
     self.background = BACKGROUND([], args.background_list)
     self.alpha = args.rate
     self.output = args.output
     self.cpu_count = args.cpu
     self.precision = args.precision
     self.console = str2bool(args.console)
     self.significant = str2bool(args.significant)
     self.table = str2bool(args.table)
    def test_enrichment_score(self):
        anno = GMT(os.path.join("files", "unittest_files",
                                "GO_shortened.gmt")).genesets['GO:0070507']
        expr_list = MAT(os.path.join("files", "unittest_files",
                                     "CLUSTERS.mat"))
        cluster = 0

        self.assertAlmostEqual(float(
            enrichment_score(anno, cluster, expr_list, 1)),
                               0.629079429538,
                               delta=0.0001)
    def test_page(self):
        anno = GMT(os.path.join("files", "unittest_files", "GO_shortened.gmt"))
        expr_list = MAT(os.path.join("files", "unittest_files",
                                     "CLUSTERS.mat"))
        cluster = 0

        page_result = page(expr_list, cluster, anno, 1, cpu_count())
        self.assertAlmostEqual(float(page_result[0][0].p_value),
                               6.35801240607e-17,
                               delta=0.0001)
        self.assertAlmostEqual(float(page_result[0][1].p_value),
                               1.1116067475e-10,
                               delta=0.0001)
    def test_wilcoxon(self):
        anno = GMT(os.path.join("files", "unittest_files", "GO_shortened.gmt"))
        expr_list = MAT(os.path.join("files", "unittest_files",
                                     "CLUSTERS.mat"))
        cluster = 0

        wilcoxon_result = wilcoxon(expr_list, cluster, anno, 1, cpu_count())
        self.assertAlmostEqual(float(wilcoxon_result[0][0].p_value),
                               1.1206994619e-10,
                               delta=0.0001)
        self.assertAlmostEqual(float(wilcoxon_result[0][1].p_value),
                               0.0026701906744509285,
                               delta=0.0001)
    def test_gsea(self):
        anno = GMT(os.path.join("files", "unittest_files", "GO_shortened.gmt"))
        expr_list = MAT(os.path.join("files", "unittest_files",
                                     "CLUSTERS.mat"))
        cluster = 0
        permutations = 5

        gsea_result = gsea(expr_list, cluster, anno, permutations, 1.0, 1,
                           cpu_count())
        self.assertAlmostEqual(float(gsea_result[0][0].es),
                               0.629079429538,
                               delta=0.0001)
        self.assertAlmostEqual(float(gsea_result[0][1].es),
                               0.458915389493,
                               delta=0.0001)
    def test_generate_inputs(self):
        anno = GMT(os.path.join("files", "unittest_files", "test_go.gmt"))
        expr_list = MAT(os.path.join("files", "unittest_files",
                                     "CLUSTERS.mat"))
        cluster = 0
        permutations = 5

        test_set = list(range(0, 40))
        test_set = set([str(i) for i in test_set])

        inputs = generate_inputs(anno, cluster, expr_list, permutations)
        self.assertEqual(inputs[0].anno_id, '0')
        self.assertEqual(inputs[0].anno_list, test_set)
        self.assertEqual(inputs[0].expr_cluster, 0)
        self.assertEqual(inputs[0].expr_list, expr_list)
        self.assertEqual(inputs[0].permutations, 5)
Exemplo n.º 7
0
 def __init__(self):
     self.test_name = args.test_name
     self.anno_list = GMT(args.annotation_list)
     self.expr_list = MAT(args.expr_list)
     #self.expr_list.DAVID_to_gene_symbol("C:\Users\Jimmy\Documents\dev\projects\enrichments\\files\\test_files\GPL_converted_list")
     #self.expr_list.normalize(args.cluster_number)
     self.expr_clusters = args.cluster_array
     self.permutations = args.permutations
     self.alpha = args.rate
     self.output = args.output
     self.weight = args.weight
     self.cpu_count = args.cpu
     self.precision = args.precision
     self.console = str2bool(args.console)
     self.significant = str2bool(args.significant)
     self.table = str2bool(args.table)
Exemplo n.º 8
0
                    nargs=1,
                    help='Ontology to use for propagation')
args = parser.parse_args()

MIN_POS, MAX_POS = 5, 500

if args.ontology == 'DO':
    onto = DiseaseOntology.generate()
elif args.ontology == 'GO':
    onto = GeneOntology.generate()
else:
    onto = Ontology.generate()

if args.gmt:
    # Load GMT genes onto Disease Ontology and propagate
    gmt = GMT(filename=args.gmt)
    onto.populate_annotations_from_gmt(gmt)
    onto.propagate()

    # Filter terms by number of gene annotations
    terms = [
        term.go_id for term in onto.get_termobject_list() if
        len(term.annotations) >= MIN_POS and len(term.annotations) <= MAX_POS
    ]

    if args.slim:
        # Build ontology aware labels
        lines = open(args.slim).readlines()
        slim_terms = set([l.strip() for l in lines])
        labels = OntoLabels(obo=onto, slim_terms=slim_terms)
    else:
Exemplo n.º 9
0
        "--When filtering by publication, must provide GO namespace.\n")
    sys.exit()

id_name = None
if args.idfile is not None:
    id_name = IDMap(args.idfile)

gene_ontology = OBO(args.obo)

logger.info('Populating gene associations')
if args.ass:
    gene_ontology.populate_annotations(args.ass,
                                       gene_col=args.gcol,
                                       term_col=args.term_col)
elif args.gmt:
    gmt = GMT(args.gmt)
    gene_ontology.populate_annotations_from_gmt(gmt)
else:
    sys.stderr.write(
        "--Provide gene annotations from an association file or a GMT file")
    exit()

if args.pub_filter:
    pub_counts = defaultdict(set)
    for (term_id, term) in gene_ontology.go_terms.iteritems():
        if term.namespace != args.nspace:
            continue
        for a in term.annotations:
            pub_counts[a.ref].add((term, a))
    for (ref, annots) in pub_counts.iteritems():
        if len(annots) > 50:
Exemplo n.º 10
0
args = parser.parse_args()

ubiq = None
if args.ubiq_genes:
    ubiq = set()
    with open(args.ubiq_genes) as f:
        for l in f.readlines():
            ubiq.add(l.strip())

    logger.info('Total ubiquitous genes: %i', len(ubiq))

onto = None
if args.tissue_onto:
    onto = OBO(args.tissue_onto)

tissue_genes = GMT(args.tissue_genes)

if onto:
    onto.populate_annotations_from_gmt(tissue_genes)
    onto.propagate()
    tissue_genes = onto.as_gmt()

with open(args.pos) as f:
    edge_lines = f.readlines()

tissue_std_edges = defaultdict(dict)

for line in edge_lines:
    g1, g2, std = line.strip().split()[:3]
    edge = frozenset([g1, g2])
Exemplo n.º 11
0
                    type=str,
                    help='geneset id')
parser.add_argument('--prob',
                    '-p',
                    dest='prob_fit',
                    choices=['SIGMOID', 'ISO'],
                    default=None,
                    help='probability fit')

args = parser.parse_args()

standards = {}
Std = namedtuple('Std', ['pos', 'neg'])

if args.gmt:
    gmt = GMT(filename=args.gmt)
    if args.geneset_id:
        pos_genes = gmt.get_genes(args.geneset_id)
        neg_genes = gmt.genes - pos_genes
        standards[args.geneset_id] = Std(pos=pos_genes, neg=neg_genes)
    else:
        for (gsid, genes) in gmt.genesets.iteritems():
            pos_genes = gmt.get_genes(gsid)
            neg_genes = gmt.genes - pos_genes
            if len(pos_genes) >= 10 and len(pos_genes) <= 1000:
                standards[gsid] = Std(pos=pos_genes, neg=neg_genes)
elif args.dir:
    for f in os.listdir(args.dir):
        pos_genes, neg_genes = set(), set()
        with open(args.dir + '/' + f) as labelf:
            lines = labelf.readlines()