Python GeneAssociations.fromFile示例

编程语言: Python

命名空间/包名称: dp.associations

类/类型: GeneAssociations

方法/功能: fromFile

hotexamples.com的示例: 4

Python GeneAssociations.fromFile - 已找到4个示例。这些是从开源项目中提取的最受好评的dp.associations.GeneAssociations.fromFile现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

fromFile(2)

常用方法

fromFile (2)

示例#1

显示文件

文件： rmfastadup.py 项目： jachymb/DiplomovaPrace

#!/usr/bin/env python
"""Remove duplicates in a fasta file"""
import sys
from dp.associations import GeneAssociations
from dp.ontology import Ontology
from collections import Counter
from dp.utils import parseFasta
seqs = set()
names = set()
fastafile = open(sys.argv[1])

MIN_SEQ_LEN = 32
MAX_SEQ_UNK = 0.1

TAXONS_HOMMO_SAPIENS = {9606}
asoc = GeneAssociations.fromFile(sys.argv[2], taxons = TAXONS_HOMMO_SAPIENS)
ontology = Ontology(sys.argv[3])
ontology.setAssociations(asoc)
asoc.transitiveClosure()
associated = set()
for k,v in asoc.associations.items():
    associated.update({g.upper() for g in v})

ss = dict(parseFasta("data/ss.txt"))
#print(associated)

for l in fastafile:
    name, typ, *_ = l[1:].split(" ")
    name = name.upper()
    seq = next(fastafile)
    sskey = "%s:secstr" % name.replace("_",":")

示例#2

显示文件

文件： main.py 项目： jachymb/DiplomovaPrace

    dataset = None
    if options.dataset:
        # FIXME: When dataset is changed, serialized associations need to be regenerated. This is serious bug if we don't seed random to a constant
        dataset = [l.strip() for l in open(options.dataset)]
        random.shuffle(dataset)
        #assert options.reserve > 0.0
        #if options.reserve < 1.0: # Use ratio
        #    splitIndex = int(options.reserve * len(dataset))
        #else:
        #    splitIndex = int(options.reserve)
        #reserved = set(dataset[:splitIndex])
        #dataset = set(dataset[splitIndex:])
        dataset = set(dataset)

    associations = GeneAssociations.fromFile(associationsFileName,
                                             taxons=TAXONS,
                                             dataset=dataset)
    #reservedAssociations = GeneAssociations.fromFile(associationsFileName+"_reserved", dataset = reserved)
    ontology.setAssociations(associations)
    #ontology.setAssociations(reservedAssociations, 'reserved')

    if options.associationsDump:
        associations.serialize(options.associationsDump)
        #reservedAssociations.serialize(options.associationsDump+"_reserved")
        sys.exit()

    ontology.deleteSmallTerms(options.lb)
    associations.shrink(options.max, options.lb)

    ontology.overView()
    ontology.dotExport()

示例#3

显示文件

文件： rmfastadup.py 项目： jachymb/DiplomovaPrace

#!/usr/bin/env python
"""Remove duplicates in a fasta file"""
import sys
from dp.associations import GeneAssociations
from dp.ontology import Ontology
from collections import Counter
from dp.utils import parseFasta
seqs = set()
names = set()
fastafile = open(sys.argv[1])

MIN_SEQ_LEN = 32
MAX_SEQ_UNK = 0.1

TAXONS_HOMMO_SAPIENS = {9606}
asoc = GeneAssociations.fromFile(sys.argv[2], taxons=TAXONS_HOMMO_SAPIENS)
ontology = Ontology(sys.argv[3])
ontology.setAssociations(asoc)
asoc.transitiveClosure()
associated = set()
for k, v in asoc.associations.items():
    associated.update({g.upper() for g in v})

ss = dict(parseFasta("data/ss.txt"))
#print(associated)

for l in fastafile:
    name, typ, *_ = l[1:].split(" ")
    name = name.upper()
    seq = next(fastafile)
    sskey = "%s:secstr" % name.replace("_", ":")

示例#4

显示文件

文件： main.py 项目： jachymb/DiplomovaPrace

    dataset = None
    if options.dataset:
        # FIXME: When dataset is changed, serialized associations need to be regenerated. This is serious bug if we don't seed random to a constant
        dataset = [l.strip() for l in open(options.dataset)]
        random.shuffle(dataset)
        #assert options.reserve > 0.0
        #if options.reserve < 1.0: # Use ratio
        #    splitIndex = int(options.reserve * len(dataset))
        #else:
        #    splitIndex = int(options.reserve)
        #reserved = set(dataset[:splitIndex])
        #dataset = set(dataset[splitIndex:])
        dataset = set(dataset)

    associations = GeneAssociations.fromFile(associationsFileName, taxons = TAXONS, dataset = dataset)
    #reservedAssociations = GeneAssociations.fromFile(associationsFileName+"_reserved", dataset = reserved)
    ontology.setAssociations(associations)
    #ontology.setAssociations(reservedAssociations, 'reserved')
 
    if options.associationsDump:
        associations.serialize(options.associationsDump)
        #reservedAssociations.serialize(options.associationsDump+"_reserved")
        sys.exit()

    ontology.deleteSmallTerms(options.lb)
    associations.shrink(options.max, options.lb)
    
    ontology.overView()
    ontology.dotExport()