Exemplos de UtilMethods.foldClusterData em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: utils

Classe / Tipo: UtilMethods

Método / Função: foldClusterData

Exemplos em hotexamples.com: 2

UtilMethods.foldClusterData em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de utils.UtilMethods.foldClusterData em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

listFilesExt(13)

normalizePath(11)

loadConfig(8)

getSpecies(3)

getLabel(3)

foldClusterData(2)

getFileName(2)

charGramShuffle(1)

getFileExt(1)

getSparkConf(1)

makeDefaultName(1)

Métodos Frequentes

listFilesExt (13)

normalizePath (11)

loadConfig (8)

getSpecies (3)

getLabel (3)

foldClusterData (2)

getFileName (2)

charGramShuffle (1)

getFileExt (1)

getSparkConf (1)

Métodos Frequentes

makeDefaultName (1)

Exemplo n.º 1

0

Exibir arquivo

def __init__(self): self.config = Utils.loadConfig() self.task = self.config.get('eval', 'task') self.gold = self.config.get('eval', 'goldID.path') self.result = self.config.get('eval', 'result.path') self.threshold = float(self.config.get('eval', 'threshold')) self.sparkContext = SparkContext(conf=Utils.getSparkConf('filter')) self.Similarity = Similarity.Similarity(self.config) self.Filter = Filter.Filter(self.config, sparkContext=self.sparkContext) self.Merger = Merger.Merger(self.config) self.goldIDs = Utils.readFileLines(self.gold)[1:] self.resultFiles = Utils.listFilesExt(self.result, 'IDs.test') # total nb of gold genes self.nbGoldGenes = len(self.goldIDs) # total nb of gold clusters self.foldedGold = Utils.foldClusterData(self.goldIDs, 'gold', 0) self.goldGenes = [ gene for genes in self.foldedGold.values() for gene in genes ] self.nbGoldClusters = len(self.foldedGold) self.outputheader = 'goldClusterID\tgoldGeneID\tpredictedClusterLabel\tpredictedClusterID\n' self.scoreheader = 'goldClusterID\tpredictedClusterID\tclusterScore\n'

Exemplo n.º 2

0

Exibir arquivo

def clustersToGFF(clusterspath, gffpath, goldpath, annotpath, source_type): gffcontent = Gff3(gffpath) clustercontent, goldContent, annotationContent = "", "", "" clustercontent = Utils.readFileLines(clusterspath) clusters = Utils.foldClusterData( clustercontent, "", 0.5) if 'score' in clusterspath else Utils.foldClusterData( clustercontent, "gold", "") goldContent = '\t'.join(Utils.readFileLines(goldpath)) if goldpath else "" annotationList = Utils.readFileLines(annotpath) if annotpath else "" annotationContent = ('\n').join(annotationList) if annotpath else "" # sort dict by key clusters = OrderedDict(sorted(clusters.items(), key=lambda x: x[0])) gffclusterfile = clusterspath.rsplit('.', 1)[0] + '.percluster.gff3' gffgenefile = clusterspath.rsplit('.', 1)[0] + '.pergene.gff3' outputcluster, outputgene = "##gff-version 3\n", "##gff-version 3\n" # filter only "mRNA" features, return dict {gene name, gff line} mRNAdict = { line['attributes']['Name'].replace('.1', ''): line for line in gffcontent.lines if line['type'] == 'mRNA' } for key, value in clusters.items(): for gene in value: gene = gene.replace('.1', '') thisgene = mRNAdict.get(gene) if (thisgene is not None): chr = thisgene['seqid'] position = str(thisgene['start']) + '\t' + str(thisgene['end']) score = '?' strand = thisgene['strand'] phase = thisgene['phase'] info = 'Name=' + gene + ';Note=' + key + '\n' if (goldContent): if (gene in annotationContent): annot = [ item for item in annotationList if gene in item ] annot = annot[0].split('\t')[1] if annot else '' if ('backbone' in annot): info = info.replace("\n", ";color=#EE0000\n") # red elif ('tailor' in annot): info = info.replace("\n", ";color=#EE9300\n") # orange elif ('transcript') in annot: info = info.replace( "\n", ";color=#048014\n") # forest green elif ('transport' in annot): info = info.replace( "\n", ";color=#1888f0\n") # light blue elif (gene in goldContent): info = info.replace( "\n", ";color=#9931f2\n") # bright purple outputgene += chr + '\t' + source_type + '\t' + position + '\t' + score + '\t' + strand + '\t' + phase + '\t' + info else: print('gene not found:', gene) startID = value[0].replace('.1', '') endID = value[-1].replace('.1', '') startGene = mRNAdict.get(startID) endGene = mRNAdict.get(endID) chr = startGene['seqid'] position = str(startGene['start']) + '\t' + str(endGene['end']) strand = startGene['strand'] phase = startGene['phase'] score = '?' info = 'Name=' + key + ';Note=' + ('|').join(value) + '\n' outputcluster += chr + '\t' + source_type + '\t' + position + '\t' + score + '\t' + strand + '\t' + phase + '\t' + info Utils.writeFile(gffclusterfile, outputcluster) Utils.writeFile(gffgenefile, outputgene) return gffcontent