def localHHBLITS(seq = "NWLGVKRQPLWTLVLILWPVIIFIILAITRTKFPP", database = "../data/PP2db", minEVal = 1): import time out.writeDebug("Do a local hhblits search for {} in {}".format( seq, database ) ) char_set = string.ascii_uppercase + string.digits time_stamp = str(int(time.time())) + ''.join(random.sample(char_set*6, 40)) + str(random.randint(0, 100000000)) seq_file = '' seq = str(seq) if re.match('^[A-Z]*$',seq): seq_file = 'hhblits_input_'+time_stamp fh = open(seq_file,'w') fh.write('>no_header\n'+seq) fh.close() else: seq_file = seq outfile = 'hhblits_'+time_stamp+'.out' command = "hhblits -i {} -o {} -d {} -e {} -n 1".format(seq_file, outfile, database, minEVal) try: hhblitsResults = subprocess.check_output(command, stderr=subprocess.STDOUT, shell = True) hhblitsResults = open(outfile).read() os.remove(outfile) except subprocess.CalledProcessError as err: out.writeLog("Return code for hhblits search {} in {} returned with exit code {}!".format( seq, database, err.returncode ) ) hhblitsResults = '' if seq != seq_file: os.remove(seq_file) return HHBLITS(hhblitsResults)
def getHpoTermById(self, id, log = True): """ returns an hpo term by an hpo id """ try: return self.hpoTermsDict[id.split(" ")[0]] except KeyError: if log: out.writeLog( "KeyError getting term for id: \"" + str( id ) + "\"! => returning None!" ) return None
def localBlast(seq="NWLGVKRQPLWTLVLILWPVIIFIILAITRTKFPP", database="../data/genes_UniProt.fasta", minEVal=1): out.writeDebug("Do a local blast search for {} in {}".format(seq, database)) blastResults = commands.getstatusoutput( 'echo "{}" | blast2 -p blastp -d {} -N -e {} -m 7'.format(seq, database, minEVal) ) if blastResults[0] != 0: out.writeLog( "Return code for blast search {} in {} returned with exit code {}!".format( seq, database, blastResults[0] ) ) return Blast(blastResults[1])
def predictSequence(args, hpoGraph, uni2hpoDict, name="Sequence", seq=""): # ok, do the whole thing try: # debug msg out.writeLog('Predict function for protein: id: "' + str(name) + '" sequence: "' + str(seq) + '"') # lookup resulst if available foundInLookUp, hits = False, [] if args.lookupdb: out.writeLog("Checking for precalculated results!") # ok, load them f = open(args.lookupdb, "r") for line in f: if line.strip() == name.strip(): # oh, cool, its precalculated foundInLookUp = True elif foundInLookUp and line.startswith("\t"): # ok, this belongs to result, load it m = re.search("\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)", line) hits.append( { "method": m.group(1), "hit_id": m.group(2), "hit_value": float(m.group(3)), "hit_from": int(m.group(4)), "hit_to": int(m.group(5)), "hit_order": bool(m.group(6)), } ) elif foundInLookUp: break f.close() # ok, first of all, get similar sequences! if not foundInLookUp: out.writeLog("Check blast and hhblits for sequence orthologs!") blastResults = blast.Blast.localBlast(seq=seq, database=args.blastDbFile, minEVal=args.blastMinEVal) for hit in blastResults.hits: out.writeDebug("Blast: found hit: " + str(hit)) hhblitsResults = hhblits.HHBLITS.localHHBLITS(seq=str(seq), database=args.hhblitsDbFile) for hit in hhblitsResults.hits: out.writeDebug("hhblits: found hit: " + str(hit)) hits.extend(blastResults.hits) hits.extend(hhblitsResults.hits) # reduce hits if fast prediction if args.fast: out.writeLog("Reduce hits for faster prediction!") hitsTmp = sorted(hits, key=lambda t: t["hit_value"]) hits = hitsTmp[:6] # now get the hpo-Identifiers for each similar sequence out.writeLog("uniprot ids ({}) 2 HPO Terms".format(len(hits))) for hit in hits: try: # Do not output this, it might be some GB output # out.writeDebug("found hpoTerms for " + str( hit[ "hit_id" ] ) + ": " + str( uni2hpoDict[ hit[ "hit_id" ] ] ) ) hit.update({"hpoTerms": uni2hpoDict[hit["hit_id"]]}) except KeyError: out.writeWarning("MISSING HPO TERMS FOR HIT: " + str(hit)) # build and merge trees out.writeLog("Build and merge tree for similar sequences!") graph, hit_id = hpoGraph.getHpoSubGraph(hpoGraph.getRoot()), 0 for hit in hits: # out.writeDebug("@blast merging: {}".format(hit)) subtree = hpoGraph.getHpoSubGraph(hit["hpoTerms"], {hit_id: hit}) hit_id += 1 graph += subtree # do the prediciton out.writeLog("Run main prediction!") # init the predictor p = predictor.Predictor(args.neuronalNet) p.runprediction(seq, graph) # always accept the root for root in hpoGraph.getRoot(): graph.getHpoTermById(root).accepted = 1 # do the output out.writeLog("writing output") for node in graph.getAcceptedNodes(args.minimalConfidence): out.writeOutput("{}\t{}\t{}".format(name, node.id, "%.*f" % (2, (node.accepted + 2) / 4))) # svg image desired? if args.createSvgImage: out.writeLog("Create a svg image showing all results!") if graph != None: graph.writeSvgImage(fileName=str(name) + ".svg") else: out.writeWarning("Can't create a svg image from an empty tree!") # clear attrs from all tree nodes, so that these don't interfere with later predictions # out.writeLog("Clear memory for next prediction") # hpoGraph.clearAttr() except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() out.writeError("Predicting Error: " + str(err) + " on line: " + str(exc_tb.tb_lineno)) exit(1) pass
help="The minimal confidance value an accepted node should have; [from -2 to 2] (default: 0.0)!", ) parser.add_argument("--fast", action="store_true", dest="fast", help="Weather to perform a fast prediction!") args = parser.parse_args() # init output format out.supressMessage = bool(args.verbosity >> 0 & 1) out.supressDebug = bool(args.verbosity >> 1 & 1) out.supressLog = bool(args.verbosity >> 2 & 1) out.supressWarning = bool(args.verbosity >> 3 & 1) out.supressError = bool(args.verbosity >> 4 & 1) out.supressOutput = bool(args.verbosity >> 5 & 1) out.outputFormat = args.outputFormat # init the hpoParser out.writeLog("Build hpoGraph from file") hpoGraph = None if os.path.isfile(args.hpoFile): hpoGraph = hpoParser.HpoGraph(hpoFile=args.hpoFile) else: out.writeLog("missing hpoFile! Try standard hpoFile in the data directory") hpoGraph = hpoParser.HpoGraph() # init the hpo-identifier dict out.writeLog("Build uniprot 2 hpo dictionary") uni2hpoDict = {} f = open(args.uni2hpo, "r") for line in f: line = line.strip() uni2hpoDict.update({line.split("\t")[0]: line.split("\t")[1].split(",")}) f.close()