Python HHOutputParser примеры использования

Язык программирования: Python

Пространство имен/Пакет: csb.bio.io.hhpred

Класс/Тип: HHOutputParser

Примеров на hotexamples.com: 8

Python HHOutputParser - 8 примеров найдено. Это лучшие примеры Python кода для csb.bio.io.hhpred.HHOutputParser, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

HHOutputParser(7)

parse_file(3)

parse_string(1)

Пример #1

Показать файл

Файл: rtb.py Проект: mmagnus/rossmann-toolbox

	def _run_hhsearch(self, sequence, min_prob=0.5):
		temp = tempfile.NamedTemporaryFile(mode='w+t')
		temp.writelines(">seq\n{}\n".format(sequence))
		temp.seek(0)
		fn = temp.name
		cmd = f'{self.hhsearch_loc} -i {fn} -d {self._path}/utils/hhdb/core -n 1'
		result = subprocess.call(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
		temp.close()
		if result == 0:
			out_fn = f'{fn}.hhr'
			parser = HHOutputParser()
			hits = {i: (hit.qstart, hit.qend, hit.probability) for i, hit in
					enumerate(parser.parse_file(out_fn)) if hit.probability >= min_prob}
			os.remove(out_fn)

			# Choose highest prob hit from overlapping hits
			hits_nr = {}
			for (beg, end, prob) in hits.values():
				found_overlap = False
				res_set = {i for i in range(beg, end + 1)}
				for key, hit in hits_nr.items():
					hit_set = {i for i in range(hit[0], hit[1] + 1)}
					if len(hit_set & res_set) >= 0:
						if prob > hit[2]:
							hits_nr[key] = (beg, end, prob)
						found_overlap = True
						break
				if not found_overlap:
					hits_nr[len(hits_nr)] = (beg, end, prob)
			probs = [0]*len(sequence)
			for (beg, end, prob) in hits_nr.values():
				for i in range(beg, end+1):
					probs[i] = prob
			return hits_nr, probs
		return {}, ()

Пример #2

Показать файл

Файл: __init__.py Проект: khasinski/csb

 def setUp(self):
     
     super(TestHHOutputParser, self).setUp()
     
     filename = self.config.getTestFile('d1ea0a1.hhr')
     content = open(filename).read()
     tmp = HHOutputParser(True)
     
     self.hitlist = tmp.parse_file(filename)
     self.hitlist2 = tmp.parse_string(content)

Пример #3

Показать файл

    def setUp(self):

        super(TestHHOutputParser, self).setUp()

        filename = self.config.getTestFile('d1ea0a1.hhr')
        content = open(filename).read()
        tmp = HHOutputParser(True)

        self.hitlist = tmp.parse_file(filename)
        self.hitlist2 = tmp.parse_string(content)

Пример #4

Показать файл

def parse_hhr(dir="./"):
    HHRS = glob.glob(dir + "*.hhr")

    parser = HHOutputParser(False)

    keys = set([])
    links = []

    for hhr in HHRS:
        results = parser.parse_file(hhr)
        this = results._query_name
        for hit in results:
            if hit._id == this or hit._evalue > EVAL_CUTOFF:
                continue
            links.append([this, hit._id, hit._evalue])
            # print "{}\t{}\t{}".format(this, hit._id,hit._evalue)
            keys.add(this)
            keys.add(hit._id)
    return links, list(keys)

Пример #5

Показать файл

Файл: hhpredofas.py Проект: labstructbioinf/lbs-tools

def generate_msa(hhr_file, queryseq, hitslist, maxevalue=1e-3, ident_cut=0.5, qcov_cut=0.5, eval_cut=1e-3):
	
	assert len(hitslist) > 0, 'provide at least one hit id in `hitlist`'

	fasta = [queryseq]

	for hit in HHOutputParser(alignments=True).parse_file(hhr_file):
	
		hit_id = f'{hit.id}_{hit.qstart}_{hit.qend}'
		if not hit_id in hitslist: continue
	
		query_cov = 1.*len(hit.alignment.subject.replace('-', ''))
		if hit.identity < ident_cut: continue
		if query_cov / len(queryseq) < qcov_cut: continue
		if hit.evalue > eval_cut: continue
		
		temp = ''
		mpos = 0
		sbjct = "-"*(hit.qstart-1) + hit.alignment.subject
		
		# for each aa in sbjct
		for i in range(len(sbjct)):
		
			# no insertion at this position
			if i - hit.qstart + 1 < 0 or hit.alignment.query[i - hit.qstart+ 1 ] != '-': 
				while fasta[0][mpos] == '-':
					mpos = mpos + 1
					temp = temp + '-'      
				temp = temp + str(sbjct[i])
				mpos = mpos + 1    
				
			# insertion present
			else: 
				if fasta[0][mpos] != '-':
					for f in range(len(fasta)): # we need to add a gap
						fasta[f] = fasta[f][:mpos] + "-" + fasta[f][mpos:]		
				temp = temp + str(sbjct[i])	
				mpos = mpos + 1
			
		fasta.append(temp)

	# fill gaps at the N terminus
	for f in range(len(fasta)):
		if len(fasta[f])<len(fasta[0]):
			fasta[f]=fasta[f]+ "-" * (len(fasta[0])-len(fasta[f]))

	return fasta

Пример #6

Показать файл

Файл: builder.py Проект: Hoecker-Lab/protlego

    def get_alignment(self, query: str, no: str) -> HHpredHitAlignment:
        """ Obtain the HHS alignment 'no' for query 'query'.
        Only the alignment from the fragment region is retrieved.
        This implies that when the fragment is not located in the
        N-terminus hit.q_start and the position in the output won't
        be the same. For example, if q_start = 20, that aminoacid is
        in position 0 in aln.query.

        :param query: str. Domain query
        :param no: int. Specifies the position in the file (alignment with subject)

        :return: HHpredHitAlignment. Alignment between query and subject for the fragment region.
        """

        hhF = get_FUZZLE_hhs(query)
        try:
            hh = HHOutputParser().parse_file(hhF)
            pair = hh[int(no) - 1]
            aln = pair.alignment
            return aln
        except Exception as e:
            logger.error(f"Parsing of {hhF} failed. Error follows: {e}")

Пример #7

Показать файл

def HHSearch_parseTo_DMandNX(hhrs, labels=None):
    clusternames = []
    for i, hhr in enumerate(hhrs):
        try:
            profile = HHOutputParser(alignments=False).parse_file(hhr)
            if profile.query_name not in clusternames or labels != None:
                if labels == None:
                    clusternames.append(profile.query_name)
                else:
                    clusternames.append(labels[i])
        except:
            print(hhr)
            pass

    print(clusternames)
    evalDM = np.ones((len(clusternames), len(clusternames)))
    pvalDM = np.ones((len(clusternames), len(clusternames)))
    scoreDM = np.ones((len(clusternames), len(clusternames)))
    SSDM = np.ones((len(clusternames), len(clusternames)))
    probaDM = np.zeros((len(clusternames), len(clusternames)))
    lenDM = np.ones((len(clusternames), len(clusternames)))
    covDM = np.ones((len(clusternames), len(clusternames)))
    NX = nx.Graph()
    for i, hhr in enumerate(hhrs):
        protlist = []
        profile = HHOutputParser(alignments=False).parse_file(hhr)
        for hit in profile:
            DMscore = float(hit.evalue)
            proba = hit.probability

            if 'anchor' not in hit.id and 'anchor' not in profile.query_name:
                i = clusternames.index(hit.id.strip())
                j = clusternames.index(profile.query_name.strip())

                covq = hit.qlength / (hit.qend - hit.qstart)
                covDM[i, j] = min([covq, covDM[i, j]])

                if hit.evalue < evalDM[i, j]:
                    evalDM[i, j] = hit.evalue
                    evalDM[j, i] = evalDM[i, j]

                if hit.pvalue < pvalDM[i, j]:
                    pvalDM[i, j] = hit.pvalue
                    pvalDM[j, i] = pvalDM[i, j]

                if scoreDM[i, j] < hit.score:
                    scoreDM[i, j] = hit.score
                    scoreDM[j, i] = scoreDM[i, j]

                if SSDM[i, j] < hit.ss_score:
                    SSDM[i, j] = hit.ss_score
                    SSDM[j, i] = SSDM[i, j]

                if probaDM[i, j] < hit.probability:
                    probaDM[i, j] = hit.probability
                    probaDM[j, i] = probaDM[i, j]

                #use smallest of the two prots
                if lenDM[i, j] == 1 or lenDM[i, j] > hit.qlength:
                    lenDM[i, j] = hit.qlength
                    lenDM[j, i] = lenDM[i, j]
            if hit.id != profile.query_name:
                dico = {}
                dico['score'] = scoreDM[i, j]
                dico['prob'] = probaDM[i, j]
                dico['eval'] = evalDM[i, j]
                dico['ss'] = SSDM[i, j]
                dico['length'] = lenDM[i, j]
                dico['qend'] = covDM[i, j]
                dico['qstart'] = covDM[i, j]
                dico['end'] = covDM[i, j]
                dico['start'] = covDM[i, j]
                dico['end'] = covDM[i, j]
                NX.add_edge(hit.id, profile.query_name, dict=dico)
                NX.nodes[hit.id]['len'] = hit.slength
                NX.nodes[profile.query_name]['len'] = hit.qlength
                NX.nodes[profile.query_name]['file'] = hhr
    return probaDM, evalDM, pvalDM, lenDM, scoreDM, SSDM, covDM, NX, clusternames

Пример #8

Показать файл

    def setUp(self):

        super(TestHHOutputRegressions, self).setUp()

        filename = self.config.getTestFile('d1ea0a1.hhr')
        self.hitlist = HHOutputParser().parse_file(filename)