def example(): if request.method == 'GET': _file_a = request.args.get("file_a") _file_b = request.args.get("file_b") _file_c = request.args.get("file_c") _v = ViewRenderer("ml") _v.render() if(_file_a != None): #if you have ther parameters... #m = Ml("data/n2/n2_sample/1425405680330/","data/n2/n2_1/1425406094608/","data/n2/n2_2/1425407232389/") m = Ml(_file_a,_file_b,_file_c) _data = m.classify() h = Helper() _data = h.listToGrid(_data) _v = _v.inject("%%%checkboxes%%%",str(_data)) return _v else: _v = _v.inject("%%%checkboxes%%%","") return _v else: return "Method nor supported"
def deleteOverlapsFromVcf(self,variants): ''' delete the variants from 'variantsA' which also are in 'variantsB' ''' variantSetA = set(self.variantDict.keys()) #detrmine type of variantB if type(variants) == str: variantsB = open(variants) elif type(variants) != file: raise TypeError("variantB has wrong type, need str or file, %s found" % type(variantsB)) #TODO: variants could also be another object of VariantsSet #get Start time startTime = Helper.getTime() Helper.info(" [%s] Delete overlapps from %s" % (startTime.strftime("%c"),variantsB.name),self.logFile,self.textField) for line in variantsB: if line.startswith("#"): continue for varTuple in self.getVariantTuble(line): if varTuple in variantSetA: #A.discard(varTuple) variantSetA.remove(varTuple) del self.variantDict[varTuple] #calculate duration Helper.printTimeDiff(startTime,self.logFile,self.textField)
def overallAnalytics(self): print('Total number of entries: ', end=' ') print(len(self.wordCountOfEntriesDict)) print('First entry: ', end=' ') print(Helper.prettyPrintDate(self.firstDate)) print('Last entry: ', end=' ') print(Helper.prettyPrintDate(self.mostRecentDate)) print('Total days from first to last entry: ', end=' ') totalDays = self.mostRecentDate - self.firstDate #this is correct days = totalDays.days print(days) print('Percentage of days from first to last with an entry: ', end=' ') print(str(round(float(len(self.wordCountOfEntriesDict)) / days * 100, 2)) + '%') print('Average length per entry: ', end=' ') numberOfEntries = len(self.wordCountOfEntriesDict) sumOfLengths = 0 longestEntryLength = 0 for date in list(self.wordCountOfEntriesDict.keys()): length = self.wordCountOfEntriesDict[date] if length > longestEntryLength: longestEntryLength = length longestEntryDate = date sumOfLengths += length print(round(float(sumOfLengths) / numberOfEntries, 2)) print('Longest entry: ' + str(longestEntryLength) + ' words on ', end=' ') print(Helper.prettyPrintDate(longestEntryDate)) print('Total number of words written: ', end=' ') print(locale.format("%d", self.totalNumberOfWords, grouping=True))
def _getSubsetForGP(self, S, random=True, normalize=True): Nsubset = min(self.numSamplesSubsetGP, S.shape[0]) if random: return Helper.getRandomSubset(S, Nsubset) else: return Helper.getRepresentativeRows(S, Nsubset, normalize)
def parseSummaryFile(sumFile,logFile=None,textField=0): ''' Parses a .summary file from an rnaEditor output directory and returns it as an ordered dict Note: unedited Genes will be skipped :param sumFile: .summary file of rnaEditor :param logFile: :param textField: :return: OrderedDict {GeneName1:[GeneId1,3'UTR,5'UTR,EXON,Intron,Total]} ''' if type(sumFile)==str: try: sumFile=open(sumFile,"r") except IOError: Helper.warning("Could not open %s to write Variant" % sumFile ,logFile,textField) elif type(sumFile)==file: pass else: raise TypeError("Summary file hat to be path or file object", logFile, textField) dict=OrderedDict() totalGenes=0 for line in sumFile: if line.startswith("#"): continue #skip comments line = line.rstrip().split() totalGenes+=1 if int(line[6])<1: continue #skip unedited genes try: v=map(int,line[2:7]) except ValueError: v=line[2:7] dict[line[0]]=[line[1]]+v return dict,totalGenes
def __init__(self,rnaEdit): ''' Constructor ''' self.rnaEdit=rnaEdit """ #check read Quality encoding and convert to phred33 quality if necessary for i in range(len(self.rnaEdit.fastqFiles)): if Helper.isPhred33Encoding(self.rnaEdit.fastqFiles[i], 1000000, self.rnaEdit.logFile, self.rnaEdit.textField) == False: self.rnaEdit.fastqFiles[i]=Helper.convertPhred64toPhred33(self.rnaEdit.fastqFiles[i],self.rnaEdit.params.output+ "_" + str(i+1) + "_phred33.fastq",self.rnaEdit.logFile,self.rnaEdit.textField) """ #set fastQ files and check if the qualitys have to be converted if self.rnaEdit.params.paired==True: if Helper.isPhred33Encoding(self.rnaEdit.fastqFiles[0], 1000000, self.rnaEdit.logFile, self.rnaEdit.textField) == False or Helper.isPhred33Encoding(self.rnaEdit.fastqFiles[1], 1000000, self.rnaEdit.logFile, self.rnaEdit.textField) == False: self.fastqFile1 = Helper.convertPhred64toPhred33(self.rnaEdit.fastqFiles[0],self.rnaEdit.params.output+ "_1_phred33.fastq",self.rnaEdit.logFile,self.rnaEdit.textField) self.fastqFile2 = Helper.convertPhred64toPhred33(self.rnaEdit.fastqFiles[1],self.rnaEdit.params.output+ "_2_phred33.fastq",self.rnaEdit.logFile,self.rnaEdit.textField) else: self.fastqFile1=self.rnaEdit.fastqFiles[0] self.fastqFile2=self.rnaEdit.fastqFiles[1] elif self.rnaEdit.params.paired==False: if Helper.isPhred33Encoding(self.rnaEdit.fastqFiles[0], 1000000, self.rnaEdit.logFile, self.rnaEdit.textField) == False: self.fastqFile1 = Helper.convertPhred64toPhred33(self.rnaEdit.fastqFiles[0], self.rnaEdit.params.output + "_1_phred33.fastq", self.rnaEdit.logFile, self.rnaEdit.textField) else: self.fastqFile = self.rnaEdit.fastqFiles[0]
def parse_pls(url): urls = [] pls_content = Helper.downloadString(url) stream = Helper.parsePls(pls_content) if stream: urls.append(stream) return urls
def newAssay(self): ''' Function wich starts a new analysis ''' inputTab = self.view.tabMainWindow.widget(0) #get Parameters parameters=Parameters(inputTab) if parameters.paired==True: #fastqs=inputTab.dropList.dropFirstTwoItems() fastqs = inputTab.dropList.dropFirstItem() if fastqs[0]!=None: if not str(fastqs[0].text()).endswith(".bam"): fastqs+=inputTab.dropList.dropFirstItem() else: fastqs = inputTab.dropList.dropFirstItem() """ check if droplist returned a value """ if parameters.paired==True: if fastqs[-1] == None: QtGui.QMessageBox.information(self.view,"Warning","Warning:\nNot enough Sequencing Files for paired-end sequencing!!!\n\nDrop FASTQ-Files to the drop area!") return if fastqs[0] == None: QtGui.QMessageBox.information(self.view,"Warning","Warning:\nNo Sequencing Files found!!!\n\nDrop FASTQ-Files to the drop area!") return sampleName = Helper.getSampleName(str(fastqs[0].text())) if sampleName == None: QtGui.QMessageBox.information(self.view,"Warning","Warning:\nNo valid Sequencing File!!!\n\nDrop FASTQ-Files to the drop area!") return fastqFiles=[] for fastq in fastqs: fastqFiles.append(str(fastq.text())) runTab = RunTab(self) #initialize new Thread with new assay try: assay = RnaEdit(fastqFiles, parameters,runTab.commandBox) except Exception as err: QtGui.QMessageBox.information(self.view,"Error", str(err)+"Cannot start Analysis!") Helper.error(str(err) + "\n creating rnaEditor Object Failed!", textField=runTab.commandBox) currentIndex = self.view.tabMainWindow.count() # self.view.tabMainWindow.addTab(self.runTab, "Analysis"+ str(Helper.assayCount)) self.view.tabMainWindow.addTab(runTab, sampleName + " " + str(currentIndex)) Helper.runningThreads.append(assay) assay.start() self.view.connect(assay, QtCore.SIGNAL("taskDone"), self.openAnalysis)
def parse_m3u(url): urls = [] m3u_content = Helper.downloadString(url) stream = Helper.parsem3u(m3u_content) if stream: urls.append(stream) return urls
def deleteNonEditingBases(self): startTime=Helper.getTime() Helper.info("Delete non Editing Bases (keep only T->C and A->G)",self.logFile,self.textField) for varTuple in self.variantDict.keys(): chr,pos,ref,alt = varTuple if (ref =="A" and alt == "G") or (ref=="T" and alt=="C"): pass else: del self.variantDict[varTuple]
def _updateBandwidthsGP(self, Ssub): bwNonKb = Helper.getBandwidth(Ssub[:, 0:self.NUM_NON_KB_DIM], Ssub.shape[0], self.bwFactorNonKbGP) kbPos = Ssub[:, self.NUM_NON_KB_DIM:] bwKb = Helper.getBandwidth(self._reshapeKbPositions(kbPos), Ssub.shape[0], self.bwFactorKbGP) self.policy.kernel.setBandwidth(bwNonKb, bwKb) self.policy.kernel.setWeighting(self.weightNonKbGP)
def readFile(self, url): try: f = open(url, 'r') except: print('File not found') newPath = input('Enter new path > '); return self.readFile(newPath) #TODO: this doesn't work for entirely unknown reasons newdate = re.compile('\s*([0-9]{1,2}-[0-9]{1,2}-[0-9]{2})\s*') currentDateStr = None currentDateObj = None numWords = 0 namesFound = set() totalWordNum = 0 currentDayEntry = '' #holds all the lines for the current day, so we can compute a hash of the day later on line = f.readline() while (line != ''): if self.prefs.GUESS_NAMES: self.guessNames(line) #check a line to see if it's a date, therefore a new day dateFound = newdate.match(line) if dateFound != None: #it's a new date, so wrapup the previous date and set up to move onto the next one if namesFound != None: self.addRelatedNames(namesFound) namesFound = set() self.dayEntryHashTable[currentDateObj] = hashlib.md5(currentDayEntry.encode()) #TODO: deal with first date if numWords > 0: self.wordCountOfEntriesDict[currentDateObj] = numWords #should be here, since we want it triggered at the end totalWordNum += numWords numWords = 0 currentDateStr = dateFound.group(0) currentDateStr = Helper.formatDateStringIntoCleanedString(currentDateStr) currentDateObj = Helper.makeDateObject(currentDateStr) if currentDateObj > self.mostRecentDate: #found a higher date than what we've seen so far self.mostRecentDate = currentDateObj if currentDateObj < self.firstDate: #found a lower date than what we have now self.firstDate = currentDateObj line = line[len(currentDateStr):] #remove date from line, so it's not a word if currentDateStr != None: (wordsFound, namesFoundThisLine) = self.addLine(line, currentDateObj) for name in namesFoundThisLine: namesFound.add(name) numWords += wordsFound line = f.readline() currentDayEntry += line #add line to the day's entry #need to capture the last date for the entry length self.wordCountOfEntriesDict[currentDateObj] = numWords self.totalNumberOfWords = totalWordNum + numWords #need to get words from last line f.close()
def stopImmediately(self): if hasattr(self, 'callEditSites'): self.callEditSites.cleanUp() self.isTerminated=True if self.runningCommand != False: self.runningCommand.kill() else: self.terminate() self.wait() Helper.error("Analysis was terminated by User", self.logFile, self.textField)
def run(self): try: self.startAnalysis() except Exception: Helper.error("RnaEditor Failed",self.logFile,self.textField) """ At this point the RnaEditor has succesfully finished """ fileDir = os.path.dirname(os.path.realpath(__file__)) cmd=["python",fileDir+"/createDiagrams.py","-o", self.params.output] a=subprocess.call(cmd) self.emit(QtCore.SIGNAL("taskDone"), self.params.output+".html")
def topGenes(self,sumDict, fileName,number=20,value=4): if number > len(sumDict): if len(sumDict)<1: Helper.warning("no edited genes found", self.logFile, self.textField) return Helper.warning("The number of top genes you wanted is bigger than the number of edited genes", self.logFile, self.textField) number=len(sumDict) if value > 4: Helper.error("sumDict only hold four values", self.logFile, self.textField) counts=collections.OrderedDict(sorted(sumDict.items(), key=lambda t: t[1][value],reverse=True)[:number]) barNameTuple=() valueMatrix=[[]] for array in counts.values(): valueMatrix[0].append(array[value]) for gene in counts.keys(): barNameTuple+=(gene.names[0],) if value==0: barName="3'-UTR" elif value==1: barName="5'-UTR" elif value==2: barName="Exonic" elif value==3: barName="Intronic" elif value==4: barName="Total" yLim=max(max(i) for i in valueMatrix)+1 Helper.createBarplot(valueMatrix, fileName, barNameTuple, [barName], width=0.35, title="Highly Edited Genes",yLim=yLim,barText=False,yText="Editing Counts")
def startAnalysis(self): """ START MAPPING """ if self.fastqFiles[0].endswith("bam"): if self.fastqFiles[0].endswith("noDup.realigned.recalibrated.bam"): Helper.info("Bam File given. Skip mapping", self.logFile, self.textField) self.mapFastQ=None mapResultFile=self.fastqFiles[0] else: Helper.error("Bam File was not mapped with RnaEditor, this is not supported. Please provide the fastq Files to RnaEditor", self.logFile, self.textField, "red") else: self.mapFastQ=MapFastq(self) mapResultFile=self.mapFastQ.startAnalysis() """ START CALLING EDITING SITES """ self.callEditSites=CallEditingSites(mapResultFile,self) result = self.callEditSites.startAnalysis() #finished self.isTerminated=True Helper.status("rnaEditor Finished with %s" % self.params.output, self.logFile, self.textField,"green",True) Helper.status("Open %s to see the results" % self.params.output+".html", self.logFile, self.textField,"green",True) self.cleanUp()
def _updateKernelParameters(self, S, A, random=True, normalize=True): SA = self._getStateActionMatrix(S, A) if random: self.MuS = Helper.getRandomSubset(S, self.numFeatures) self.MuSA = Helper.getRandomSubset(SA, self.numFeatures) else: self.MuS = Helper.getRepresentativeRows(S, self.numFeatures, normalize) self.MuSA = Helper.getRepresentativeRows(SA, self.numFeatures, normalize) NUM_SAMPLES_FOR_BW_ESTIMATE = 500 # bandwidth for PHI_S bwNonKbS = Helper.getBandwidth(self.MuS[:, 0:self.NUM_NON_KB_DIM], NUM_SAMPLES_FOR_BW_ESTIMATE, self.bwFactorNonKbS) kbPosS = self._reshapeKbPositions(self.MuS[:, self.NUM_NON_KB_DIM:]) bwKbS = Helper.getBandwidth(kbPosS, NUM_SAMPLES_FOR_BW_ESTIMATE, self.bwFactorKbS) self.kernelS.setBandwidth(bwNonKbS, bwKbS) self.kernelS.setWeighting(self.weightNonKbS) # bandwidth for PHI_SA bwNonKbSA = Helper.getBandwidth(self.MuSA[:, 0:(self.NUM_NON_KB_DIM + 2)], NUM_SAMPLES_FOR_BW_ESTIMATE, self.bwFactorNonKbSA) kbPosSA = self._reshapeKbPositions(self.MuSA[:, (self.NUM_NON_KB_DIM + 2):]) bwKbSA = Helper.getBandwidth(kbPosSA, NUM_SAMPLES_FOR_BW_ESTIMATE, self.bwFactorKbSA) self.kernelSA.setBandwidth(bwNonKbSA, bwKbSA) self.kernelSA.setWeighting(self.weightNonKbSA)
def __init__(self, fastqFiles, params, textField): QtCore.QThread.__init__(self) if isinstance(params, Parameters): self.params = params else: Helper.error("Params has to be Instance of Parameters") if isinstance(textField, QtGui.QTextEdit) or textField==0: self.textField=textField else: Helper.error("textField has to be Instance of QtGui.QTextEdit or 0") self.fastqFiles=fastqFiles #hold the running Popen object self.runningCommand=False self.isTerminated = False #check if the input Files are there #hold basic statistic values of the run basicStatDict={} #set directory where the outputFiles should be written to if self.params.output=="default": if self.fastqFiles[0].endswith("noDup.realigned.recalibrated.bam"): self.sampleName=fastqFiles[0][fastqFiles[0].rfind("/")+1:fastqFiles[0].rfind(".noDup.realigned.recalibrated.bam")] self.outdir=fastqFiles[0][0:fastqFiles[0].rfind("/")+1] else: self.sampleName=fastqFiles[0][fastqFiles[0].rfind("/")+1:fastqFiles[0].rfind(".")] # outdir = /path/to/output/rnaEditor/samplename/ self.outdir=fastqFiles[0][0:fastqFiles[0].rfind("/")+1]+"rnaEditor/"+self.sampleName+"/" #output=/path/to/output/rnaEditor/samplename/samplename self.params.output=self.outdir+self.sampleName if not os.path.exists(self.outdir): os.makedirs(self.outdir, mode=0755) os.chmod(self.outdir, 0755) #create folder for html output if not os.path.exists(self.outdir+"/html"): os.makedirs(self.outdir+"/html", mode=0755) os.chmod(self.outdir, 0755) self.checkDependencies() #check if the input Files are there self.printParameters()
def annotateVariantDict(self,genome): ''' adds the corresponding Gene and the exact segment wehre the SNP appears :param genome: Genome ''' startTime = Helper.getTime() Helper.info(" [%s] Annotating Variants" % (startTime.strftime("%c")),self.logFile,self.textField) for v in self.variantDict.values(): anno = genome.annotatePosition(v.chromosome,v.position) #[(gene1,segment1;segment2;..)..] GI=[] for a in anno: GI.append(a) v.attributes["GI"]=GI Helper.printTimeDiff(startTime,self.logFile,self.textField)
def post(self): itemName = self.request.get("itemName") categoryName = self.request.get("categoryName") userEmail = self.request.get("userEmail") comment = self.request.get("comment") owner = self.request.get("owner") comment = str(comment).strip() message = "" if comment: ifAlreadyExists = [] #self.response.out.write("Key is "+ str(Helper.getItemKey(userEmail, categoryName, itemName))) ifAlreadyExists = ItemComment.gql("WHERE ANCESTOR IS :1",Helper.getItemKey(userEmail, categoryName, itemName)) #self.response.out.write("Key is "+ str(Helper.getItemKey(userEmail, categoryName, itemName))) #self.response.out.write("Count is "+ str(ifAlreadyExists.count())) if (ifAlreadyExists.count() == 0): #self.response.out.write("Adding comment for " + itemName + " " + categoryName + " " + userEmail) itemComment = ItemComment(parent=Helper.getItemKey(userEmail, categoryName, itemName)) itemComment.comment = comment itemComment.commenter = userEmail itemComment.item = itemName itemComment.category = categoryName itemComment.put() message="Comment Saved" else: message="You can comment only once on an item" else: #self.response.out.write("empty comment") message = "Cannot enter empty comment" items = Item.gql("WHERE ANCESTOR IS :1",Helper.getCategoryKey(owner, categoryName)) template_values = { 'items' : items, 'owner': owner, 'user' : users.get_current_user(), 'category' : categoryName, 'logoutURL' : users.create_logout_url('./'), 'message' : message } path = os.path.join(os.path.dirname(__file__), './html/items.html') self.response.out.write(template.render(path, template_values))
def cleanUp(self): #print "deleteAssay " + str(self) if self.runningCommand != False: self.runningCommand.kill() try: if self.mapFastQ!=None: self.mapFastQ.cleanUp() del self.mapFastQ except AttributeError: Helper.error("could not delete MapFastQ instance", self.logFile, self.textField) try: self.callEditSites.cleanUp() del self.callEditSites except AttributeError: Helper.error("could not delete RnaEdit instance", self.logFile, self.textField)
def getMarkUnderWord(self, displayName, last20Words, wasPluralWithApostrophe): assert type(displayName) is str originalWord = displayName #needed when the name isn't actually a name displayName = Helper.cleanWord(displayName, True) print('\n\n\n') for x in last20Words: print(x + ' ', end='') print('\n' + displayName + ':') numPossibleLastNames = 0 if displayName in self.uniqueDisplayNamesToNameDict.keys(): #we've specified to give the same markup to all these display names firstName = self.uniqueDisplayNamesToNameDict[displayName][0] lastName = self.uniqueDisplayNamesToNameDict[displayName][1] else: #proceed normally firstName = '' print('Is this the proper first name for ' + displayName + '? [enter] for yes, [n] for no') isProperFirstName = input('>') if isProperFirstName == 'n': print('Enter proper first name (or enter "None" if this is not a name)') possibleFirstName = input('>') if possibleFirstName == 'None' or possibleFirstName == 'none': #not actually a name return WordClass.addWordOrMarkup(originalWord) firstName = possibleFirstName else: firstName = displayName try: self.lastNamesForFirstNameDict[firstName] #trigger exception if there's one to be thrown for nameFromDict in self.lastNamesForFirstNameDict[firstName]: print(str(numPossibleLastNames) + ': ' + nameFromDict) numPossibleLastNames = numPossibleLastNames + 1 print('Or type new last name (append "!" at end to auto assign all instance of this name to this last name):') except: print('Type last name (append "!" at end to auto assign all instance of this name to this last name):') #get the last name either from the number of the choice (if it's a number) or the last name that was directly entered lastName = '' choice = input('>') lastName = choice for x in range(0, numPossibleLastNames): if choice == str(x): lastName = self.lastNamesForFirstNameDict[firstName][x] break if lastName[-1] == '!': #specify that all instance of this display name are assigned to this last name, without asking again lastName = lastName[:-1] self.uniqueDisplayNamesToNameDict[displayName] = (firstName, lastName) try: if lastName not in self.lastNamesForFirstNameDict[firstName]: self.lastNamesForFirstNameDict[firstName].append(lastName) except: self.lastNamesForFirstNameDict[firstName] = [lastName] return WordClass.addNameWithMarkupPieces(displayName, firstName, lastName, wasPluralWithApostrophe)
def _call(path, param=None): #print('call radio with path=%s, param=%s', path, param) url = '{0}/{1}'.format(RadioUrl, path) if param: url += '?' + urlencode(param) print("call radio with url: " + url) response = Helper.downloadString(url) json_data = json.loads(response) return json_data
def _call(url, param=None): # print('call radio with path=%s, param=%s', path, param) if param: param['key'] = yt_key url += '?' + urlencode(param) print("call tunein with url: " + url) response = Helper.downloadString(url) json_data = json.loads(response) return json_data
def fillDicts(files,columns,keys): ''' creates the table and fills the set of keys ''' fileNumber=len(files) fileCounter=0 keySet=() fileCounter=0 for file in files: #loop through all files i=0 Helper.info("Get information from %s" % file) file = open(file) for line in file: #loop through current file line = line.split() keyTuple=() for k in keys: keyTuple=keyTuple+(line[k-1],) value=[] for column in columns: #get the needed values try: value.append(line[column-1]) except IndexError: raise ValueError("Not enough rows in line: %s in file %s" % (" ".join(line),file.name)) if keyTuple in keySet: #currentDefaultList=idDict[keyTuple] #currentDefaultList[fileCounter]=value #idDict[keyTuple]=currentDefaultList idDict[keyTuple][fileCounter]=value #replace filecounter List with values from current File else: currentDefaultList=[["--"]*len(columns)]*len(files) #create default list, with all values empty currentDefaultList[fileCounter]=value idDict[keyTuple]=currentDefaultList keySet=keySet+(keyTuple,) i+=1 if i % 1000 == 0: Helper.status("%s lines parsed" % i) fileCounter+=1 return idDict,keySet
def parse_summary(ixp, inputfile, ipversion, ixpParam): """ Function to parse a BGP summary output file. It prints the ASN->Neighbor IP mapping in a file """ ipToAsn = {} addrPos, asnPos, ipcountPos, rtrType = [int(ixpParam["summary"]["ip"]), int(ixpParam["summary"]["asn"]), int(ixpParam["summary"]["ipCount"]), ixpParam["type"]] with open(inputfile, 'rb') as f: for line in f: # split the line to white spaces lineTokens = line.strip().split() if len(lineTokens) <= ipcountPos: continue interfaces = re.findall( r'(?:\s|^|\(|\[)(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})(?:\s|\)|$|\])', line) if len(lineTokens) > addrPos and len(interfaces) > 0: # check if the string that is supposed to be in the position of the address is indeed a valid IP address ip = lineTokens[addrPos] ipType = Helper.getIPNetwork(ip) if str(ipType) == str(ipversion) or (ipType > 0 and int(ipversion) == 10): # check if the string in the position of the ASN is a valid number asn = lineTokens[asnPos] asn = asn.replace("AS", "") if '.' in asn: asn = Helper.convertToAsn32(asn) if Helper.isPositiveInt(asn): # check if the ASN is active and advertises prefixes # often the number of advertised prefixes may be split in total received/best # in this case we want the total which is the first of the two numbers ipcount = lineTokens[ipcountPos] try: if rtrType == "bird": received = re.findall(r"[\w']+", ipcount)[0] elif rtrType == "quagga": received = ipcount except: print ipcount # if string represents a valid positive number add asn->ip mapping to the dictionary if Helper.isPositiveInt(received): if ip not in ipToAsn: ipToAsn[ip] = asn return ipToAsn
def post(self): user = users.get_current_user() message="" if user: category = self.request.get("category") isExport = self.request.get("isExport") owner = self.request.get("owner") if isExport: self.exportToXml(owner,category) else: newItemName = self.request.get("item_name").strip() ifAlreadyExists = Item.gql("WHERE name = :1 AND ANCESTOR IS :2",newItemName,Helper.getCategoryKey(user.email(), category)) if (ifAlreadyExists.count() == 0) and newItemName: item = Item(parent=Helper.getCategoryKey(user.email(), category)) item.name = self.request.get("item_name") item.wins = 0 item.loses = 0 item.put() else: if newItemName: message = "Item already exists" else: message = "Item name cannot be empty or spaces" items = db.GqlQuery("SELECT * FROM Item WHERE ANCESTOR IS :1",Helper.getCategoryKey(user.email(), category)) template_values = { 'items' : items, 'category' : category, 'message' : message, 'logoutURL' : users.create_logout_url('./'), 'user':user, 'owner' : owner } path = os.path.join(os.path.dirname(__file__), './html/items.html') self.response.out.write(template.render(path, template_values)) else: self.redirect(users.create_login_url(self.request.uri))
def getOverlapsFromBed(self,bedFile,getNonOverlaps=False): ''' returns overlaps from bed file features :param bedFile: as string or file :param getNonOverlaps: boolean :return new variantSet of overlaps ''' if type(bedFile) == str: bedFile = open(bedFile) elif type(bedFile) != file: raise TypeError("bedFile has wrong type, need str or file, %s found" % type(bedFile)) startTime=Helper.getTime() Helper.info("[%s] Delete overlaps from %s" % (startTime.strftime("%c"),bedFile.name) ,self.logFile,self.textField) variantsByChromosome = self.getVariantListByChromosome() overlapps = set() for line in bedFile: try: sl = line.split("\t") #if "\t" in line else line.split(" ") chromosome,start,stop = sl[:3] start,stop=(int(start),int(stop)) except ValueError: raise ValueError("Error in line '%s'" % line) for v in variantsByChromosome[chromosome]: if start < v.position < stop: overlapps.add((v.chromosome,v.position,v.ref,v.alt)) if getNonOverlaps: overlapps = set(self.variantDict.keys()) - overlapps #delete all accept the ones which are overlapping newSet={} for variantTuple in overlapps: #del self.variantDict[variantTuple] newSet[variantTuple]=self.variantDict[variantTuple] Helper.printTimeDiff(startTime, self.logFile,self.textField) return newSet
def printClusters(self, outFile): if type(outFile) == str: try: outFile=open(outFile,"w") except IOError: Helper.warning("Could not open %s to write Variant" % outFile ,self.logFile,self.textField) if type(outFile) != file: raise AttributeError("Invalid outfile type in 'printVariantDict' (need string or file, %s found)" % type(outFile)) startTime=Helper.getTime() Helper.info("[%s] Print Clusters to %s" % (startTime.strftime("%c"),outFile.name),self.logFile,self.textField) outFile.write("\t".join(["#Chr","Start","Stop","IslandID","GeneID","Gene Symbol","Cluster Length","Number of Editing_sites","Editing_rate","\n"])) for cluster in self.clusterDict.keys(): end = max(v.position for v in self.clusterDict[cluster]) start = min(v.position for v in self.clusterDict[cluster]) length = end - start editingRate=float(len(self.clusterDict[cluster]))/float(length) geneIdSet=set() geneNameSet=set() for v in self.clusterDict[cluster]: try: gene = v.attributes['GI'][0][0] if type(gene) == Gene: geneIdSet.add(gene.geneId) geneNameSet |= set(gene.names) #geneList.append(v.attributes['GI'][0][0]) else: geneIdSet.add("Intergenic") geneNameSet.add("Intergenic") except KeyError: geneIdSet.add("N/A") #when variant has no attribute GI outFile.write("\t".join([v.chromosome,str(start),str(end),"Island"+str(cluster), #Chr","Start","Stop","Cluster Name", ",".join(map(str,geneIdSet)),",".join(map(str,geneNameSet)), #"GeneID","Gene Symbol" str(length),str(len(self.clusterDict[cluster])),'%1.2f'%float(editingRate),"\n"]))
def is_allowed(self, url): """ Returns ``True`` if allowed (not in robots.txt) - else returns ``False``. """ disallowed = self.get_disallowed_sites(url, 'GingerWhiskeyCrawler') urlpath = Helper.get_path(url) result = True for path in disallowed: if path[-1] == '/': path += '*' if fnmatch(urlpath, path): result = False break return result
lgpar = lgParameters.LgParameters() now = datetime.datetime.now() #currentDate = now.strftime("%d-%m-%Y") parameters = lgpar.getLgProfile(asn) basename = '.'.join(outputfile.split(".")[:-1]) extension = outputfile.split(".")[-1] if command == "summary": filepath = sendQuery(outputfile, asn, parameters, command) ip_to_asn = BgpParser.parse_summary(asn, filepath, 4, parameters["output"]) ipfile = basename+"_addresses."+extension for ip in ip_to_asn: #print ip_to_asn[ip] #Helper.saveToFile(ipfile, ip+" "+ip_to_asn[ip]+"\n", "a+", asn) Helper.saveToFile(ipfile, ip+" "+str(ip_to_asn[ip])+"\n", "a+", asn) elif command == "neighbor": # read the IP addresses/prefixes addresses = dict() addresses = getIptoASN(inputfile) counter = 1 # just for printing progress if len(addresses) < 1: print "Not enough addresses to query" else: for address in addresses: print str(counter) + ". " + asn + " " + ": " + address counter += 1 filepath = sendQuery(outputfile, asn, parameters, command, address) elif command == "bgp": addresses = getIptoASN(inputfile2) neigh_file = basename+"_addresses."+extension
def newAssay(self): ''' Function wich starts a new analysis ''' inputTab = self.view.tabMainWindow.widget(0) #get Parameters parameters = Parameters(inputTab) if parameters.paired == True: #fastqs=inputTab.dropList.dropFirstTwoItems() fastqs = inputTab.dropList.dropFirstItem() if fastqs[0] != None: if not str(fastqs[0].text()).endswith(".bam"): fastqs += inputTab.dropList.dropFirstItem() else: fastqs = inputTab.dropList.dropFirstItem() """ check if droplist returned a value """ if parameters.paired == True: if fastqs[-1] == None: QtGui.QMessageBox.information( self.view, "Warning", "Warning:\nNot enough Sequencing Files for paired-end sequencing!!!\n\nDrop FASTQ-Files to the drop area!" ) return if fastqs[0] == None: QtGui.QMessageBox.information( self.view, "Warning", "Warning:\nNo Sequencing Files found!!!\n\nDrop FASTQ-Files to the drop area!" ) return sampleName = Helper.getSampleName(str(fastqs[0].text())) if sampleName == None: QtGui.QMessageBox.information( self.view, "Warning", "Warning:\nNo valid Sequencing File!!!\n\nDrop FASTQ-Files to the drop area!" ) return fastqFiles = [] for fastq in fastqs: fastqFiles.append(str(fastq.text())) runTab = RunTab(self) #initialize new Thread with new assay try: assay = RnaEdit(fastqFiles, parameters, runTab.commandBox) except Exception as err: QtGui.QMessageBox.information(self.view, "Error", str(err) + "Cannot start Analysis!") Helper.error(str(err) + "\n creating rnaEditor Object Failed!", textField=runTab.commandBox) currentIndex = self.view.tabMainWindow.count() # self.view.tabMainWindow.addTab(self.runTab, "Analysis"+ str(Helper.assayCount)) self.view.tabMainWindow.addTab(runTab, sampleName + " " + str(currentIndex)) Helper.runningThreads.append(assay) assay.start() self.view.connect(assay, QtCore.SIGNAL("taskDone"), self.openAnalysis)
def word2vec(): # hyperparameters - TODO - place into FLAGS (tensorflow website has examples) batch_size = 128 # how many target/context words to get in each batch embedding_size = 128 # Dimension of the embedding vector. skip_window = 1 # How many words to consider left and right - context size num_skips = 2 # How many times to reuse an input to generate a label # TAKEN FROM TF WEBSITE EXAMPLE: # We pick a random validation set to sample nearest neighbors. here we limit the # validation samples to the words that have a low numeric ID, which by # construction are also the most frequent. valid_size = 16 # Random set of words to evaluate similarity on. valid_window = 100 # Only pick dev samples in the head of the distribution. valid_examples = np.array(random.sample(range(valid_window), valid_size)) num_sampled = 64 # Number of negative examples to sample. num_steps = 50001 # steps to run for steps_per_checkpoint = 50 # save the params every 50 steps. # prep work basedir = os.getcwd() #pull the data and get it into a usable format. get_imdb_data(basedir) data, count, dictionary, reverse_dictionary = build_dataset(basedir) # save the dictionary to file - very important for Data Processor Helper.store_stuff(dictionary, "dictionary.pickle", reverse_dictionary, "reverse_dictionary.pickle") print('Most common words (+UNK)', count[:5]) print('Sample data', data[:10]) batch_tester(data, reverse_dictionary) print('three index', dictionary['three']) ckpt_path = os.path.join(basedir, 'checkpoints') if not os.path.exists(ckpt_path): os.makedirs(ckpt_path) ckpt_embed = os.path.join(ckpt_path, "embeddings") if not os.path.exists(ckpt_embed): os.makedirs(ckpt_embed) # actual computation # TODO refactor this! # could follow guidelines here https://danijar.com/structuring-your-tensorflow-models/ graph = tf.Graph() with graph.as_default(): # variable to track progress global_step = tf.Variable(0, trainable=False) # Input data. train_dataset = tf.placeholder(tf.int32, shape=[batch_size]) train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) valid_dataset = tf.constant(valid_examples, dtype=tf.int32) with tf.device('/cpu:0'): # Variables. embeddings = tf.Variable(tf.random_uniform( [vocabulary_size, embedding_size], -1.0, 1.0), name="embeddings") nce_weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size))) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) # Model. # Look up embeddings for inputs. # note that the embeddings are Variable params that will # be optimised! embed = tf.nn.embedding_lookup(embeddings, train_dataset) # Compute the nce loss, using a sample of the negative labels each time. # tried using sampled_softmax_loss, but performance was worse, so decided # to use NCE loss instead. Might be worth some more testing, especially with # the hyperparameters (ie num_sampled), to see what gives the best performance. # tuning these params is a TODO. loss = tf.reduce_mean( tf.nn.nce_loss(nce_weights, nce_biases, embed, train_labels, num_sampled, vocabulary_size)) # PART BELOW LIFTED FROM TF EXAMPLES # Optimizer. # Note: The optimizer will optimize the nce weights AND the embeddings. # This is because the embeddings are defined as a variable quantity and the # optimizer's `minimize` method will by default modify all variable quantities # that contribute to the tensor it is passed. # See docs on `tf.train.Optimizer.minimize()` for more details. optimizer = tf.train.GradientDescentOptimizer(1.0).minimize( loss, global_step=global_step) # Compute the similarity between minibatch examples and all embeddings. # We use the cosine distance: norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) normalized_embeddings = embeddings / norm valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset) similarity = tf.matmul(valid_embeddings, tf.transpose(normalized_embeddings)) # This helps us terminate early if training started before. started_before = False with tf.Session(graph=graph) as session: # want to save the overall state and the embeddings for later. # I think we can do this in one, but I haven't had time to test this yet. # TODO make this a bit more efficient, avoid having to save stuff twice. # NOTE - this part is very closely coupled with the lstm.py script, as it # reads the embeddings from the location specified here. Might be worth # relaxing this dependency and passing the save location as a variable param. ckpt = tf.train.get_checkpoint_state(ckpt_path) saver = tf.train.Saver(tf.all_variables()) saver_embed = tf.train.Saver({'embeddings': embeddings}) if ckpt and gfile.Exists(ckpt.model_checkpoint_path): print("Reading model parameters from {0}".format( ckpt.model_checkpoint_path)) saver.restore(session, ckpt.model_checkpoint_path) print("done") started_before = True else: print("Creating model with fresh parameters.") tf.initialize_all_variables().run() print('Initialized') average_loss = 0 for step in range(num_steps): batch_data, batch_labels = generate_batch(data, batch_size, num_skips, skip_window) feed_dict = {train_dataset: batch_data, train_labels: batch_labels} _, l = session.run([optimizer, loss], feed_dict=feed_dict) average_loss += l if step >= 10000 and (average_loss / 2000) < 5 and started_before: print('early finish as probably loaded from earlier') break if step % steps_per_checkpoint == 0: # save stuff checkpoint_path = os.path.join(ckpt_path, "model_ckpt") embed_path = os.path.join(ckpt_embed, "embeddings_ckpt") saver.save(session, checkpoint_path, global_step=global_step) saver_embed.save(session, embed_path) if step % 2000 == 0: if step > 0: average_loss = average_loss / 2000 # The average loss is an estimate of the loss over the last 2000 batches. print('Average loss at step %d: %f' % (step, average_loss)) average_loss = 0 # note that this is expensive (~20% slowdown if computed every 500 steps) if step % 10000 == 0: sim = similarity.eval() for i in range(valid_size): valid_word = reverse_dictionary[valid_examples[i]] top_k = 8 # number of nearest neighbors nearest = (-sim[i, :]).argsort()[1:top_k + 1] log = 'Nearest to %s:' % valid_word for k in range(top_k): close_word = reverse_dictionary[nearest[k]] log = '%s %s,' % (log, close_word) print(log) final_embeddings = normalized_embeddings.eval() tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) plot_only = 500 low_dim_embs = tsne.fit_transform(final_embeddings[1:plot_only + 1, :]) labels = [reverse_dictionary[i] for i in range(plot_only)] plot_with_labels(low_dim_embs, labels)
def __init__(self, arg): self.withDependencies = arg self.helper = Helper()
def getQueryTermFrequency(self, query): query_tf = defaultdict(int) for term in query.split(): if term not in query_tf: query_tf[term] += 1 return query_tf def getDocumentsContainingTerm(self, queryTF): documents_containing_term = defaultdict() for term in queryTF: documents_containing_term[term] = r.unigram_inverted_index[term] return documents_containing_term def performQueryTfIdf(self): self.queries = r.get_stemmed_queries() for query in self.queries: self.queries[query] = r.parse_query(self.queries[query]) self.queryTF = self.getQueryTermFrequency(self.queries[query]) self.inverted_list = self.getDocumentsContainingTerm(self.queryTF) self.getTfIdf(self.queryTF, self.inverted_list, query, self.queries[query]) # self.getTfIdf (self.queries[query],query) # for query in self.queries: # self.getTfIdf (self.queries[query],query) r = Helper() t = TfIdf()
import AddressTweet import operator import os from collections import defaultdict from Helper import Helper if __name__ == '__main__': helper = Helper() addressTweet = AddressTweet.AddressTweet() # print "get hashtags..." # addressTweet.getHashtags('NYCattack') # print "get userIdName..." # addressTweet.getUserName('NYCattack') top10HashTags = helper.loadPickle( os.path.join('NYCattack', "top10HashTags.pkl")) addressTweet.getPlot(top10HashTags, 'NYCattack', "top10HashTags.png", True) print "top10HashTags.png has been saved..." top10UserName = helper.loadPickle( os.path.join('NYCattack', "top10UserName.pkl")) addressTweet.getPlot(top10UserName, 'NYCattack', "top10UserName.png", False) print "top10UserName.png has been saved..."
def splitByBed(self, bedFile): ''' returns overlaps and nonOverlaps from bed file features :param bedFile: as string or file :param getNonOverlaps: boolean ''' if type(bedFile) == str: bedFile = open(bedFile) elif type(bedFile) != file: raise TypeError( "bedFile has wrong type, need str or file, %s found" % type(bedFile)) startTime = Helper.getTime() Helper.info( "[%s] Split Variants by Bed File %s" % (startTime.strftime("%c"), bedFile.name), self.logFile, self.textField) variantsByChromosome = self.getVariantListByChromosome() overlapSet = set() i = 0 for line in bedFile: try: sl = line.split("\t") #if "\t" in line else line.split(" ") chromosome, start, stop = sl[:3] start, stop = (int(start), int(stop)) except ValueError: raise ValueError("Error in line '%s'" % line) for v in variantsByChromosome[chromosome]: if start < v.position < stop: overlapSet.add((v.chromosome, v.position, v.ref, v.alt)) i += 1 if i % 100000 == 0: Helper.status("%s Bed Feautes parsed" % i, self.logFile, self.textField, "grey") Helper.info("finished parsing Bed file", self.logFile, self.textField) Helper.printTimeDiff(startTime, self.logFile, self.textField) #nonOverlapSet = set(self.variantDict.keys()) - overlapSet #delete all accept the ones which are overlapping overlaps = { key: self.variantDict[key] for key in self.variantDict if key in overlapSet } Helper.info("finished creating overlaps", self.logFile, self.textField) Helper.printTimeDiff(startTime, self.logFile, self.textField) nonOverlaps = { key: self.variantDict[key] for key in self.variantDict if key not in overlapSet } """ overlaps={} for variantTuple in overlapSet: #del self.variantDict[variantTuple] overlaps[variantTuple]=self.variantDict[variantTuple] nonOverlaps={} for variantTuple in nonOverlapSet: nonOverlaps[variantTuple]=self.variantDict """ Helper.printTimeDiff(startTime, self.logFile, self.textField) return overlaps, nonOverlaps
import Helper.Helper as h import csv import datetime import os URLCRIPTO = 'https://m.investing.com/crypto/' URLDOLAR = 'https://m.investing.com/currencies/usd-brl' h.get(URLCRIPTO) hora = datetime.datetime.now().strftime("%d-%m-%Y %H:%M") lista = h.listxpath('/html/body/div[1]/div[1]/section/div/div/div/table/tbody/tr') tamanholista = len(lista) for i in range(0,tamanholista): rank = h.xpath('/html/body/div[1]/div[1]/section/div/div/div/table/tbody/tr['+str(i+1)+']/td[1]').text nomeMoeda = h.xpath('/html/body/div[1]/div[1]/section/div/div/div/table/tbody/tr['+str(i+1)+']/td[2]').text valor = str(h.xpath('/html/body/div[1]/div[1]/section/div/div/div/table/tbody/tr['+str(i+1)+']/td[3]').text) valor = valor.replace(',','') file_exists = os.path.isfile("Cripto.csv") with open("Cripto.csv", 'a', newline='') as saida: headers = ['Rank', 'NomeMoeda', 'Valor-USD','Hora'] writer = csv.DictWriter(saida, delimiter=';', lineterminator='\n', fieldnames=headers) if not file_exists: writer.writeheader() writer.writerow({'Rank': rank, 'NomeMoeda': nomeMoeda, 'Valor-USD': valor, 'Hora': hora}) h.get(URLDOLAR) cotacaoDolar = str(h.xpath('//*[@id="siteWrapper"]/div[1]/section[2]/div[4]/div[2]/span[1]').text)
def printParameters(self): Helper.info("*** Start RnaEditor with: ***", self.logFile,self.textField) if self.fastqFiles[0].endswith(".bam"): Helper.info("\t Bam File: " + self.fastqFiles[0],self.logFile,self.textField) else: if self.params.paired: Helper.info("\t FastQ-File_1: " + self.fastqFiles[0],self.logFile,self.textField) Helper.info("\t FastQ-File_2: " + self.fastqFiles[1],self.logFile,self.textField) else: Helper.info("\t FastQ-File: " + self.fastqFiles[0],self.logFile,self.textField) Helper.info("\t outfilePrefix:" + self.params.output,self.logFile,self.textField) Helper.info("\t refGenome:" + self.params.refGenome,self.logFile,self.textField) Helper.info("\t dbsnp:" + self.params.dbsnp,self.logFile,self.textField) Helper.info("\t sourceDir:" + self.params.sourceDir,self.logFile,self.textField) Helper.info("\t threads:" + self.params.threads,self.logFile,self.textField) Helper.info("\t maxDiff:" + self.params.maxDiff,self.logFile,self.textField) Helper.info("\t seedDiff:" + self.params.seedDiff,self.logFile,self.textField) Helper.info("\t paired:" + str(self.params.paired),self.logFile,self.textField) Helper.info("\t keepTemp:" + str(self.params.keepTemp),self.logFile,self.textField) Helper.info("\t overwrite:" + str(self.params.overwrite),self.logFile,self.textField) Helper.info("",self.logFile,self.textField)
from Helper import Helper correct_count = 0 not_correct_count = 0 wrong_correct_count = 0 wrong_not_correct_count = 0 wrong_data = [] conn = pymysql.connect(host='localhost', port=3306, user='******', passwd='', db='small_rekomendacyjny') cur = conn.cursor() cur.execute("SELECT * FROM help WHERE checked = 5") svc = Helper.train_svm() for row in cur: print('########################################################') print(row) correct = Helper.one_check(mov_id=row[1], us_id=row[0], rat=float(row[2])) print(correct) if not correct: if row[4] == '0': print('NOT CORRECT!!! OK') not_correct_count += 1 wrong_data.append([row[0], row[1], float(row[2])]) #Helper.repare_one(svc=svc, movie_id=row[1], user_id=row[0], rating=float(row[2])) else: print("WRONG!!! SHOULD BE CORRECT!") wrong_not_correct_count += 1 wrong_data.append([row[0], row[1], float(row[2])])
backbones = [] SnmpProtocol = SnmpProtocol() for record in records: switches = [] backbone = Backbone(str(record[0]), record[1], record[2], record[3]) backbones.append(backbone) for backbone in backbones: print(backbone.id) print(backbone.binaad) print(backbone.ip) switches = backbone.switches for switchsingle in switches: stmt2 = SnmpProtocol.execute(switchsingle.ip,'1.3.6.1.2.1.17.7.1.2.2.1.2','-v "INTEGER: 418"') eachline = stmt2.splitlines() print("SwitchIp:"+switchsingle.ip) for i in eachline: clients = [] #maci convert et ve vlan çek rtr = Helper.decimaltohex(i) mac = rtr[0] vlan = str(rtr[1]) #port string olarak bul port = Helper.findport(i,switchsingle.ip,SnmpProtocol) client = Client(mac,port,vlan) clients.append(client) switchsingle.setClients(clients) for client in clients: print("Mac: "+client.mac + " Port: "+client.port+ " Vlan: "+client.vlan)
# !/usr/bin/env python # _*_ coding:utf-8 _*_ __author__ = 'Hongrui' import paramiko, re, os, sys, time import threading from Helper import Helper Helper = Helper() username = Helper.get_credential()['username'] passwd = Helper.get_credential()['password'] host_info = Helper.get_hostinfo() #Multi-threads to execute the commands def thread_run(cmd): threads = [] print 'Now Begining......' for v in host_info.values(): print 'ip address:', v[0] ip = v[0] t = threading.Thread(target=ssh2, args=(ip, username, passwd, cmd)) threads.append(t) for t in threads: t.setDaemon(True) t.start() for t in threads: t.join() print 'All Command Executed......\n'
def configure(self): helper = Helper() if not helper.checkFile('/etc/bash_completion.d/git-completion.bash'): print "-- add bash completion" helper.wget( 'https://raw.githubusercontent.com/git/git/master/contrib/completion/git-completion.bash', '/etc/bash_completion.d/') if 'name' in self.attrs: print "-- set your name in git config" helper.execute('git config --global user.name "' + self.attrs['name'] + '"') if 'email' in self.attrs: fileName = helper.homeFolder() + '.ssh/id_rsa' print "-- set your email in git config" helper.execute('git config --global user.email "' + self.attrs['email'] + '"') if 'passphrase' in self.attrs and len( self.attrs['passphrase']) > 4: print "-- create ssh key for auto-authorization (add string below to https://github.com/settings/ssh)" if not helper.checkFile(fileName): helper.execute('mkdir ' + helper.homeFolder() + '.ssh') helper.execute('ssh-keygen -f "' + fileName + '" -N "' + self.attrs['passphrase'] + '" -t rsa -C "' + self.attrs['email'] + '"') print helper.execute('cat ' + fileName + '.pub')
def __init__(self, scope, num_dirs, opt="adagrad", lr=0.025): print("num_dirs:", num_dirs) sub_dir = "ecb_" + scope + "/" # init stuff print("TORCH VERSION:", torch.__version__) global args self.args = config.parse_known_args() self.args.cuda = self.args.cuda and torch.cuda.is_available() device = torch.device("cuda:0" if self.args.cuda else "cpu") torch.manual_seed(self.args.seed) random.seed(self.args.seed) print("TREELSTM:", opt, "lr:", lr) # paths train_dir = os.path.join(self.args.data, str(num_dirs), 'train/', sub_dir) dev_dir = os.path.join(self.args.data, str(num_dirs), 'dev/', sub_dir) test_dir = os.path.join(self.args.data, str(num_dirs), 'test/', sub_dir) print("train_dir:", train_dir) print("dev_dir:", dev_dir) # builds vocabulary sick_vocab_file = Helper.build_entire_vocab( os.path.join(self.args.data, str(num_dirs), 'sick.vocab'), train_dir, dev_dir, test_dir) vocab = Vocab(filename=sick_vocab_file, data=[ Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD ]) print('==> SICK vocabulary size : %d ' % vocab.size()) # loads SICKDataset: Trees, sentences, and labels self.train_dataset = Helper.load_data( train_dir, os.path.join(self.args.data, str(num_dirs), 'sick_train.pth'), vocab, self.args.num_classes) self.dev_dataset = Helper.load_data( dev_dir, os.path.join(self.args.data, str(num_dirs), 'sick_dev.pth'), vocab, self.args.num_classes) self.test_dataset = Helper.load_data( test_dir, os.path.join(self.args.data, str(num_dirs), 'sick_test.pth'), vocab, self.args.num_classes) # creates the TreeLSTM model = SimilarityTreeLSTM(vocab.size(), self.args.input_dim, self.args.mem_dim, self.args.hidden_dim, \ self.args.num_classes, self.args.sparse, self.args.freeze_embed, vocab) criterion = nn.KLDivLoss() #nn.CrossEntropyLoss() # loads glove embeddings emb = Helper.load_embeddings( self.args, os.path.join(self.args.data, str(num_dirs), 'sick_embed.pth'), vocab, device) # sets up the model model.emb.weight.data.copy_( emb) # plug these into embedding matrix inside model model.to(device) criterion.to(device) opt = optim.Adagrad(filter(lambda p: p.requires_grad, \ model.parameters()), lr=lr, weight_decay=self.args.wd) if opt == "adam": opt = optim.Adam(filter(lambda p: p.requires_grad, \ model.parameters()), lr=lr) self.metrics = Metrics(self.args.num_classes) # create trainer object for training and testing self.trainer = Trainer(self.args, model, criterion, opt, device, vocab)
def printAttributes(self): print Helper.info("*** MAP READS WITH FOLLOWING ATTRIBUTES ***", self.rnaEdit.logFile, self.rnaEdit.textField) if self.rnaEdit.params.paired: Helper.info("\t FastQ-File_1: " + self.fastqFile1, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t FastQ-File_2: " + self.fastqFile2, self.rnaEdit.logFile, self.rnaEdit.textField) else: Helper.info("\t FastQ-File: " + self.fastqFile, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t outfilePrefix:" + self.rnaEdit.params.output, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t refGenome:" + self.rnaEdit.params.refGenome, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t dbsnp:" + self.rnaEdit.params.dbsnp, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t sourceDir:" + self.rnaEdit.params.sourceDir, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t threads:" + self.rnaEdit.params.threads, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t maxDiff:" + self.rnaEdit.params.maxDiff, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t seedDiff:" + self.rnaEdit.params.seedDiff, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t paired:" + str(self.rnaEdit.params.paired), self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t keepTemp:" + str(self.rnaEdit.params.keepTemp), self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t overwrite:" + str(self.rnaEdit.params.overwrite), self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("", self.rnaEdit.logFile, self.rnaEdit.textField)
def checkDependencies(self): """checks if all files are there if all programs are installed properly and if the output directory is writable""" try: self.logFile=open(self.params.output + ".log","w+") except IOError: Helper.error("Cannot open Log File", textField=self.textField) if type(self.fastqFiles) == list: self.fastqFiles=self.fastqFiles elif type(self.fastqFile) == str: self.fastqFiles=[self.fastqFiles] else: Helper.error("FastQ File has wrong variable type",self.logFile,self.textField) for file in self.fastqFiles: if not os.path.isfile(file): Helper.error("Could not find: %s" %file,self.logFile,self.textField) ''' Checks the existence of the necessary packages and tools :param sourceDir: folder which contains all the software ''' Helper.newline(1) Helper.info("CHECK DEPENDENCIES",self.logFile,self.textField) #check if all tools are there if not os.path.isfile(self.params.sourceDir+"bwa"): Helper.error("BWA not found in %s" % self.params.sourceDir,self.logFile,self.textField) if not os.path.isfile(self.params.sourceDir+"picard-tools/SortSam.jar"): Helper.error("SortSam.jar not found in %s" % self.params.sourceDir+"picard-tools",self.logFile,self.textField) if not os.path.isfile(self.params.sourceDir+"picard-tools/MarkDuplicates.jar"): Helper.error("MarkDuplicates.jar not found in %s" % self.params.sourceDir+"picard-tools",self.logFile,self.textField) if not os.path.isfile(self.params.sourceDir+"GATK/GenomeAnalysisTK.jar"): Helper.error("GenomeAnalysisTK.jar not found in %s" % self.params.sourceDir+"GATK/",self.logFile,self.textField) if not os.path.isfile(self.params.sourceDir+"blat"): Helper.error("blat not found in %s" % self.params.sourceDir,self.logFile,self.textField) if not os.path.isfile(self.params.sourceDir+"samtools"): Helper.error("samtools not found in %s" % self.params.sourceDir,self.logFile,self.textField) if not os.system("java -version")==0: Helper.error("Java could not be found, Please install java",self.logFile,self.textField) #check if all files are there if not os.path.isfile(self.params.refGenome): Helper.error("Could not find Reference Genome in %s: " % self.params.refGenome,self.logFile,self.textField) # Files for BWA if not os.path.isfile(self.params.refGenome+".amb"): Helper.warning("Could not find %s.amb" % self.params.refGenome,self.logFile,self.textField) Helper.error("run: 'bwa index %s' to create it" % self.params.refGenome,self.logFile,self.textField) if not os.path.isfile(self.params.refGenome+".ann"): Helper.warning("Could not find %s.ann" % self.params.refGenome,self.logFile,self.textField) Helper.error("run: 'bwa index %s' to create it" % self.params.refGenome,self.logFile,self.textField) if not os.path.isfile(self.params.refGenome+".bwt"): Helper.warning("Could not find %s.bwt" % self.params.refGenome,self.logFile,self.textField) Helper.error("run: 'bwa index %s' to create it" % self.params.refGenome,self.logFile,self.textField) if not os.path.isfile(self.params.refGenome+".pac"): Helper.warning("Could not find %s.pac" % self.params.refGenome,self.logFile,self.textField) Helper.error("run: 'bwa index %s' to create it" % self.params.refGenome,self.logFile,self.textField) if not os.path.isfile(self.params.refGenome+".sa"): Helper.warning("Could not find %s.sa" % self.params.refGenome,self.logFile,self.textField) Helper.error("run: 'bwa index %s' to create it" % self.params.refGenome,self.logFile,self.textField) #Files for GATK if self.params.refGenome.endswith("fasta"): if not os.path.isfile(self.params.refGenome.replace(".fasta",".dict")): Helper.warning("Could not find %s" % self.params.refGenome.replace(".fasta",".dict"),self.logFile,self.textField) Helper.error("run: 'java -jar %spicard-tools/CreateSequenceDictionary.jar R=%s O= %s' to create it" % (self.params.sourceDir,self.params.refGenome,self.params.refGenome.replace(".fastq",".dict")),self.logFile,self.textField) elif self.params.refGenome.endswith("fa"): if not os.path.isfile(self.params.refGenome.replace(".fa",".dict")): Helper.warning("Could not find %s" % self.params.refGenome.replace(".fa",".dict"),self.logFile,self.textField) Helper.error("run: 'java -jar %spicard-tools/CreateSequenceDictionary.jar R=%s O= %s' to create it" % (self.params.sourceDir,self.params.refGenome,self.params.refGenome.replace(".fa",".dict")),self.logFile,self.textField) else: Helper.error("RefGenome has wrong suffix. Either '.fa' or '.fasta'") if not os.path.isfile(self.params.refGenome+".fai"): Helper.warning("Could not find %s.sai" % self.params.refGenome,self.logFile,self.textField) Helper.error("run: 'samtools faidx %s' to create it" % self.params.refGenome,self.logFile,self.textField) #SNP databases if not os.path.isfile(self.params.dbsnp): Helper.error("Could not find dbSNP database %s: " % self.params.dbsnp,self.logFile,self.textField) if not os.path.isfile(self.params.hapmap) and self.params.hapmap != "None": Helper.error("Could not find Hapmap database %s: " % self.params.hapmap,self.logFile,self.textField) if not os.path.isfile(self.params.omni) and self.params.omni != "None": Helper.error("Could not find Omni database %s: " % self.params.omni,self.logFile,self.textField) if not os.path.isfile(self.params.esp) and self.params.esp != "None": Helper.error("Could not find 1000G database %s: " % self.params.esp,self.logFile,self.textField) #region Files if not os.path.isfile(self.params.aluRegions): Helper.error("Could not find %s: " % self.params.aluRegions,self.logFile,self.textField) if not os.path.isfile(self.params.gtfFile): Helper.error("Could not find %s: " % self.params.gtfFile,self.logFile,self.textField) Helper.info("Dependencies satisfied", self.logFile, self.textField)
def checkDependencies(args): ''' Checks the existence of the necessary packages and tools :param sourceDir: folder which contains all the software ''' Helper.newline(1) Helper.info("CHECK DEPENDENCIES") #check if all tools are there if not os.path.isfile(args.sourceDir + "bwa"): Helper.error("BWA not found in %s" % args.sourceDir) if not os.path.isfile(args.sourceDir + "picard-tools/SortSam.jar"): Helper.error("SortSam.jar not found in %s" % args.sourceDir + "picard-tools") if not os.path.isfile(args.sourceDir + "picard-tools/MarkDuplicates.jar"): Helper.error("MarkDuplicates.jar not found in %s" % args.sourceDir + "picard-tools") if not os.path.isfile(args.sourceDir + "GATK/GenomeAnalysisTK.jar"): Helper.error("GenomeAnalysisTK.jar not found in %s" % args.sourceDir + "GATK/") if not os.path.isfile(args.sourceDir + "samtools"): Helper.error("samtools not found in %s" % args.sourceDir) if not os.system("java -version") == 0: Helper.error("Java could not be found, Please install java") #check if all files are there if not os.path.isfile(args.RefGenome): Helper.error("Could not find Reference Genome in %s: " % args.RefGenome) # Files for BWA if not os.path.isfile(args.RefGenome + ".amb"): Helper.error("Could not find %s.amb" % args.RefGenome) Helper.error("run: 'bwa index %s' to create it" % args.RefGenome) if not os.path.isfile(args.RefGenome + ".ann"): Helper.error("Could not find %s.ann" % args.RefGenome) Helper.error("run: 'bwa index %s' to create it" % args.RefGenome) if not os.path.isfile(args.RefGenome + ".bwt"): Helper.error("Could not find %s.bwt" % args.RefGenome) Helper.error("run: 'bwa index %s' to create it" % args.RefGenome) if not os.path.isfile(args.RefGenome + ".pac"): Helper.error("Could not find %s.pac" % args.RefGenome) Helper.error("run: 'bwa index %s' to create it" % args.RefGenome) if not os.path.isfile(args.RefGenome + ".sa"): Helper.error("Could not find %s.sa" % args.RefGenome) Helper.error("run: 'bwa index %s' to create it" % args.RefGenome) #Files for GATK if not os.path.isfile(args.RefGenome.replace(".fastq", ".dict")): Helper.error("Could not find %s" % args.RefGenome.replace(".fastq", ".dict")) Helper.error( "run: 'java -jar %s/picard-tools/CreateSequenceDictionary.jar R=%s O= %s.dict' to create it" % (args.sourceDir, args.RefGenome, args.RefGenome.replace(".fastq", ".dict"))) if not os.path.isfile(args.RefGenome + ".fai"): Helper.error("Could not find %s.fai" % args.RefGenome) Helper.error("run: 'samtools faidx %s' to create it" % args.RefGenome) #SNP databases if not os.path.isfile(args.dbsnp): Helper.error("Could not find %s: " % args.dbsnp)
def stopSafely(self): self.quit() Helper.info("Analysis was stopped by User", self.logFile, self.textField)
def startAnalysis(self): '''Proceeds all the steps to detect editing Sites from a bam File @return: 0 on success and 1 if analysis was canceled by user ''' '''check if result file already exists''' if os.path.isfile(self.rnaEdit.params.output + ".editingSites.clusters" ) and self.rnaEdit.params.overwrite == False: Helper.status("\t [SKIP] Final result file already exist", self.rnaEdit.logFile, self.rnaEdit.textField, "green") return 1 #Rough variant calling with GATK self.printAttributes() #create transcriptome from GTF-File #startTime = Helper.getTime() #Helper.info(" [%s] Parsing Gene Data from %s" % (startTime.strftime("%c"),self.rnaEdit.params.gtfFile),self.rnaEdit.logFile,self.rnaEdit.textField) #duration = Helper.getTime() -startTime #Helper.info(" Finished parsing in %s" % (str(duration)),self.rnaEdit.logFile,self.rnaEdit.textField) vcfFile = self.rnaEdit.params.output + ".vcf" cmd = [ "java", "-Xmx12G", "-jar", self.rnaEdit.params.sourceDir + "GATK/GenomeAnalysisTK.jar", "-T", "UnifiedGenotyper", "-R", self.rnaEdit.params.refGenome, "-glm", "SNP", "-I", self.bamFile, "-D", self.rnaEdit.params.dbsnp, "-o", vcfFile, "-metrics", self.rnaEdit.params.output + ".snp.metrics", "-nt", self.rnaEdit.params.threads, "-l", "ERROR", "-stand_call_conf", self.rnaEdit.params.standCall, "-stand_emit_conf", self.rnaEdit.params.standEmit, "-A", "Coverage", "-A", "AlleleBalance", "-A", "BaseCounts" ] #print cmd Helper.proceedCommand("Call variants", cmd, self.bamFile, vcfFile, self.rnaEdit) ################################# ### Delete known SNPs!!! ### ################################# #check if file already exists if not os.path.isfile(self.rnaEdit.params.output + ".noSNPs.vcf" ) or self.rnaEdit.params.overwrite == True: #read in initial SNPs variants = VariantSet(vcfFile, self.rnaEdit.logFile, self.rnaEdit.textField) '''delete SNPs from dbSNP''' variants.deleteOverlapsFromVcf(self.rnaEdit.params.dbsnp) '''delete variants from 1000 Genome Project''' if self.rnaEdit.params.omni != "None": variants.deleteOverlapsFromVcf(self.rnaEdit.params.omni) '''delete variants from UW exome calls''' if self.rnaEdit.params.esp != "None": variants.deleteOverlapsFromVcf(self.rnaEdit.params.esp) '''annotate all Variants''' #variants.annotateVariantDict(self.genome) '''save variants if something goes wrong''' variants.printVariantDict(self.rnaEdit.params.output + ".noSNPs.vcf") else: if not os.path.isfile(self.rnaEdit.params.output + ".noReadEdges.vcf"): variants = VariantSet( self.rnaEdit.params.output + ".noSNPs.vcf", self.rnaEdit.logFile, self.rnaEdit.textField) ############################################### ### Delete variants from read edges!!! ### ############################################### if not os.path.isfile(self.rnaEdit.params.output + ".noReadEdges.vcf" ) or self.rnaEdit.params.overwrite == True: '''erase artificial missmatches at read-edges from variants''' variants.removeEdgeMismatches(self.bamFile, self.rnaEdit.params.edgeDistance, 25) #self.removeEdgeMissmatches(variants, self.bamFile, self.rnaEdit.params.edgeDistance, 25) '''save variants if something goes wrong''' variants.printVariantDict(self.rnaEdit.params.output + ".noReadEdges.vcf") else: if not os.path.isfile(self.rnaEdit.params.output + ".alu.vcf") or not os.path.isfile( self.rnaEdit.params.output + ".nonAlu.vcf"): variants = VariantSet( self.rnaEdit.params.output + ".noReadEdges.vcf", self.rnaEdit.logFile, self.rnaEdit.textField) ############################################### ### split Alu- and non-Alu Variants!!! ### ############################################### if (not os.path.isfile(self.rnaEdit.params.output + ".alu.vcf") or not os.path.isfile(self.rnaEdit.params.output + ".nonAlu.vcf") ) or self.rnaEdit.params.overwrite == True: '''get non-Alu Variants''' nonAluVariants = copy(variants) #nonAluVariants.variantDict=variants.getOverlapsFromBed(self.rnaEdit.params.aluRegions,getNonOverlaps=True) '''get Alu Variants''' aluVariants = copy(variants) #aluVariants.variantDict=variants.getOverlapsFromBed(self.rnaEdit.params.aluRegions,getNonOverlaps=False) aluVariants.variantDict, nonAluVariants.variantDict = variants.splitByBed( self.rnaEdit.params.aluRegions) aluVariants.printVariantDict(self.rnaEdit.params.output + ".alu.vcf") nonAluVariants.printVariantDict(self.rnaEdit.params.output + ".nonAlu.vcf") else: aluVariants = VariantSet(self.rnaEdit.params.output + ".alu.vcf", self.rnaEdit.logFile, self.rnaEdit.textField) if not os.path.isfile(self.rnaEdit.params.output + ".noSpliceJunction.vcf"): nonAluVariants = VariantSet( self.rnaEdit.params.output + ".nonAlu.vcf", self.rnaEdit.logFile, self.rnaEdit.textField) #print out variants from Alu regions # ############################################## ### proceed with non-Alu reads only!!! ## ############################################## ############################################## ### Remove intronic Splice junction!!! ## ############################################## self.genome = Genome(self.rnaEdit.params.gtfFile, self.rnaEdit.logFile, self.rnaEdit.textField) #erase variants from intronic splice junctions if not os.path.isfile(self.rnaEdit.params.output + ".noSpliceJunction.vcf" ) or self.rnaEdit.params.overwrite == True: self.removeIntronicSpliceJunctions( nonAluVariants, self.genome, distance=self.rnaEdit.params.intronDistance) nonAluVariants.printVariantDict(self.rnaEdit.params.output + ".noSpliceJunction.vcf") else: if not os.path.isfile(self.rnaEdit.params.output + ".noHomo.vcf"): nonAluVariants = VariantSet( self.rnaEdit.params.output + ".noSpliceJunction.vcf", self.rnaEdit.logFile, self.rnaEdit.textField) ############################################## ### erase variants from homopolymers!!! ## ############################################## if not os.path.isfile(self.rnaEdit.params.output + ".noHomo.vcf" ) or self.rnaEdit.params.overwrite == True: self.removeHomopolymers(nonAluVariants, self.rnaEdit.params.output, 4) nonAluVariants.printVariantDict(self.rnaEdit.params.output + ".noHomo.vcf") else: if not os.path.isfile(self.rnaEdit.params.output + ".noBlat.vcf"): nonAluVariants = VariantSet( self.rnaEdit.params.output + ".noHomo.vcf", self.rnaEdit.logFile, self.rnaEdit.textField) ############################################## ### erase duplicate mapped reads!!! ## ############################################## if not os.path.isfile(self.rnaEdit.params.output + ".noBlat.vcf" ) or self.rnaEdit.params.overwrite == True: blatOutfile = self.rnaEdit.params.output + ".noBlat.vcf" self.blatSearch(nonAluVariants, blatOutfile, 25, 2) #print nonAlu variants nonAluVariants.printVariantDict(self.rnaEdit.params.output + ".noBlat.vcf") else: if not os.path.isfile(self.rnaEdit.params.output + ".editingSites.nonAlu.vcf"): nonAluVariants = VariantSet( self.rnaEdit.params.output + ".noBlat.vcf", self.rnaEdit.logFile, self.rnaEdit.textField) #nonAluVariants.deleteNonEditingBases() #nonAluVariants.printVariantDict(self.rnaEdit.params.output+".editingSites.nonAlu.vcf") else: nonAluVariants = VariantSet( self.rnaEdit.params.output + ".editingSites.nonAlu.vcf", self.rnaEdit.logFile, self.rnaEdit.textField) #nonAluVariants.printGeneList(self.genome,self.rnaEdit.params.output+".nonAlu.gvf", printSummary=True) #print nonAlu editing Sites nonAluVariants.deleteNonEditingBases() nonAluVariants.annotateVariantDict(self.genome) nonAluVariants.printVariantDict(self.rnaEdit.params.output + ".editingIslands.bed") nonAluVariants.printGeneList(self.genome, self.rnaEdit.params.output + ".editingSites.nonAlu.gvf", printSummary=True) nonAluVariants.createClusters(eps=50, minSamples=5) nonAluVariants.printClusters(self.rnaEdit.params.output + ".editingIslands.bed") #print Alu editing Sites aluVariants.deleteNonEditingBases() aluVariants.annotateVariantDict(self.genome) aluVariants.printVariantDict(self.rnaEdit.params.output + ".editingSites.alu.vcf") aluVariants.printGeneList(self.genome, self.rnaEdit.params.output + ".editingSites.alu.gvf", printSummary=True) aluVariants.createClusters(eps=50, minSamples=5) aluVariants.printClusters(self.rnaEdit.params.output + ".editingIslands.bed") #combine alu and non Alu sites variants = aluVariants + nonAluVariants variants.deleteNonEditingBases() #print Final tables '''Read Genome''' variants.annotateVariantDict(self.genome) variants.printVariantDict(self.rnaEdit.params.output + ".editingSites.vcf") variants.printGeneList(self.genome, self.rnaEdit.params.output + ".editingSites.gvf", printSummary=True) variants.createClusters(eps=self.rnaEdit.params.eps, minSamples=self.rnaEdit.params.minPts) variants.printClusters(self.rnaEdit.params.output + ".editingIslands.bed") return 1
def printGeneList(self, genome, outfile, printSummary=True): ''' print List of genes with all the variants Gene-Variation-File "Gene_ID","gene_Name","SEGMENT","#CHROM","GENE_START","GENE_STOP","VAR_POS","REF","ALT","QUAL","BaseCount(A,C,T,G)" Gene Summary File "Gene_ID",Gene_Name,#3'UTR,#5'UTR,#EXON,'INTRON,#TOTAL :param genome: object of class Genome :param outfile: :param printSummary: boolean wether to print summary-file ''' sumDict = {} if type(genome) != Genome: raise AttributeError( "Type of genome is %s, but has to be an object of Genome" % type(genome)) if type(outfile) == str: try: outfile = open(outfile, "w") except IOError: Helper.warning("Could not open %s to write Variant" % outfile, self.logFile, self.textField) if type(outfile) != file: raise AttributeError( "Invalid outfile type in 'printVariantDict' (need string or file, %s found)" % type(outfile)) startTime = Helper.getTime() Helper.info( "[%s] Print Genes and Variants to %s" % (startTime.strftime("%c"), outfile.name), self.logFile, self.textField) sumFile = open(outfile.name[:outfile.name.rfind(".")] + ".summary", "w") outfile.write("\t".join([ "#Gene_ID", "Name", "SEGMENT", "#CHROM", "GENE_START", "GENE_STOP", "VAR_ID", "VAR_POS", "REF", "ALT", "QUAL", "#A", "#C", "#G", "#T", "Reads_Total", "Edited_Reads", "Editing_Ratio", "\n" ])) for v in self.variantDict.values(): anno = v.attributes["GI"] for a in anno: gene, segments = a totalReads = str(int(sum(map(int, v.attributes["BaseCounts"])))) if v.ref == "A" and v.alt == "G": editedReads = str(v.attributes["BaseCounts"][2]) ratio = str( round(float(editedReads) / float(totalReads), 2)) elif (v.ref == "T" and v.alt == "C"): editedReads = str(v.attributes["BaseCounts"][1]) ratio = str( round(float(editedReads) / float(totalReads), 2)) else: editedReads = "0" ratio = "0" if gene == "-": out = [ "-", "-", ",".join(segments), v.chromosome, "-", "-", v.id, str(v.position), v.ref, v.alt, str(v.qual), "\t".join(v.attributes["BaseCounts"]), totalReads, editedReads, ratio, "\n" ] outfile.write("\t".join(out)) else: out = [ gene.geneId, gene.names[0], ",".join(segments), v.chromosome, str(gene.start), str(gene.end), v.id, str(v.position), v.ref, v.alt, str(v.qual), "\t".join(v.attributes["BaseCounts"]), totalReads, editedReads, ratio, "\n" ] outfile.write("\t".join(out)) #count variations per gene if gene not in sumDict: sumDict[gene] = [0, 0, 0, 0, 0] for seg in segments: if seg == "3'UTR": sumDict[gene][0] += 1 elif seg == "5'UTR": sumDict[gene][1] += 1 elif seg in ("coding-exon", "noncoding-exon"): sumDict[gene][2] += 1 elif seg == "intron": sumDict[gene][3] += 1 sumDict[gene][4] += 1 #print number of variants per gene if printSummary: sumDictGeneIds = set() sumFile.write("\t".join([ "#Gene_ID", "Name", "#3'UTR", "#5'UTR", "#EXON", "INTRON", "#TOTAL", "\n" ])) for gene in sumDict.keys(): numbers = map(str, sumDict[gene]) if gene == "-": sumFile.write( "\t".join(["intergenic", "-"] + ["-", "-", "-", "-", numbers[4]] + ["\n"])) else: sumFile.write("\t".join([gene.geneId, gene.names[0]] + numbers + ["\n"])) sumDictGeneIds.add(gene.geneId) #print non effected Genes #this was added to have the whole set og genes in the summary file #so that it is easier to compare results in Excel genesByGeneId = genome.getGenesByGeneID() a = set(genesByGeneId.keys()) b = sumDictGeneIds nonEffectedGenes = a - b for geneId in nonEffectedGenes: gene = genesByGeneId[geneId] sumFile.write("\t".join([gene.geneId, gene.names[0]] + [ "0", "0", "0", "0", "0", ] + ["\n"])) ################################################################ ############ Draw Barplots with high edited Genes ########### ################################################################ '''
import sys from PyQt5.QtWidgets import QApplication from MyWindow import MyWindow from Task3Logic import Task3Logic from Task4Logic import Task4Logic from Helper import Helper def task3(h): task3 = Task3Logic() lines = h.reader3task('task3.input1.txt') h.writer3task('task3.output1.txt', task3.logic(lines)) arr = h.reader3task('task3.output1.txt') print(arr) def task4(h): task4 = Task4Logic() lines = h.reader4task('task4.input1.txt') h.writer4task('task4.output1.txt', task4.logic(lines)) arr = h.reader4task('task4.output1.txt') print(arr) h = Helper() task3(h) task4(h)
def startAnalysis(self): recaledBamFile = self.rnaEdit.params.output + ".noDup.realigned.recalibrated.bam" if os.path.isfile(recaledBamFile): Helper.info( "* * * [Skipping] Mapping result File already exists * * *", self.rnaEdit.logFile, self.rnaEdit.textField) self.rnaEdit.logFile.flush() return recaledBamFile if self.rnaEdit.params.paired == True: #For paired end sequencing #Align first Fastq Reads to the Genome saiFile1 = self.rnaEdit.params.output + "_1.sai" cmd = [ self.rnaEdit.params.sourceDir + "bwa", "aln", "-t", self.rnaEdit.params.threads, "-n", self.rnaEdit.params.maxDiff, "-k", self.rnaEdit.params.seedDiff, self.rnaEdit.params.refGenome, self.fastqFile1 ] Helper.proceedCommand("Align first Reads with BWA", cmd, self.fastqFile1, saiFile1, self.rnaEdit) #Align second Fastq Reads to the Genome saiFile2 = self.rnaEdit.params.output + "_2.sai" cmd = [ self.rnaEdit.params.sourceDir + "bwa", "aln", "-t", self.rnaEdit.params.threads, "-n", self.rnaEdit.params.maxDiff, "-k", self.rnaEdit.params.seedDiff, self.rnaEdit.params.refGenome, self.fastqFile2 ] Helper.proceedCommand("Align second Reads with BWA", cmd, self.fastqFile2, saiFile2, self.rnaEdit) #convert sai to sam samFile = self.rnaEdit.params.output + ".sam" cmd = [ self.rnaEdit.params.sourceDir + "bwa", "sampe", "-r", "@RG\tID:bwa\tSM:A\tPL:ILLUMINA\tPU:HiSEQ2000", self.rnaEdit.params.refGenome, saiFile1, saiFile2, self.fastqFile1, self.fastqFile2 ] Helper.proceedCommand("convert sai to sam", cmd, saiFile1, samFile, self.rnaEdit) elif self.rnaEdit.params.paired == False: #For single end sequencing #Align Fastq Reads to the Genome saiFile = self.rnaEdit.params.output + ".sai" cmd = [ self.rnaEdit.params.sourceDir + "bwa", "aln", "-t", self.rnaEdit.params.threads, "-n", self.rnaEdit.params.maxDiff, "-k", self.rnaEdit.params.seedDiff, self.rnaEdit.params.refGenome, self.fastqFile ] Helper.proceedCommand("Align Reads with BWA", cmd, self.fastqFile, saiFile, self.rnaEdit) #convert sai to sam samFile = self.rnaEdit.params.output + ".sam" cmd = [ self.rnaEdit.params.sourceDir + "bwa", "samse", "-r", "@RG\tID:bwa\tSM:A\tPL:ILLUMINA\tPU:HiSEQ2000", self.rnaEdit.params.refGenome, saiFile, self.fastqFile ] #cmd = [self.rnaEdit.params.sourceDir + "bwa", "samse", self.rnaEdit.params.refGenome, saiFile, self.fastqFile] Helper.proceedCommand("convert sai to sam", cmd, saiFile, samFile, self.rnaEdit) #convert sam to bam unsortedBamFile = self.rnaEdit.params.output + ".unsorted.bam" bamFile = self.rnaEdit.params.output + ".bam" """ cmd=["java", "-Xmx8 G", "-jar", self.rnaEdit.params.sourceDir + "picard-tools/SortSam.jar", "INPUT=" + samFile, "OUTPUT=" + bamFile, "SO=coordinate", "VALIDATION_STRINGENCY=LENIENT", "CREATE_INDEX=true"] Helper.proceedCommand("convert sam to bam", cmd, samFile, bamFile, self.rnaEdit) """ #Sort and Index Bam File #Helper.status("Sort Bam", self.rnaEdit.logFile,self.rnaEdit.textField) '''pysamSamFile = pysam.Samfile(samFile,'r') pysamBamFile = pysam.Samfile(unsortedBamFile,'wb', template=pysamSamFile) for read in pysamSamFile.fetch(): pysamBamFile.write(read)''' #pysam.sort(samFile,"-o", bamFile) cmd = [ self.rnaEdit.params.sourceDir + "samtools", "sort", samFile, "-o", bamFile ] Helper.proceedCommand("Sort Bam File", cmd, samFile, bamFile, self.rnaEdit) #Helper.status("index Bam", self.rnaEdit.logFile,self.rnaEdit.textField) #pysam.index(bamFile) cmd = [self.rnaEdit.params.sourceDir + "samtools", "index", bamFile] Helper.proceedCommand("Index Bam File", cmd, samFile, bamFile + ".bai", self.rnaEdit) #mark PCR duplicates #Helper.status("Remove Duplicates", self.rnaEdit.logFile,self.rnaEdit.textField) markedFile = self.rnaEdit.params.output + ".noDup.bam" cmd = [ "java", "-Xmx16G", "-jar", self.rnaEdit.params.sourceDir + "picard-tools/MarkDuplicates.jar", "INPUT=" + bamFile, "OUTPUT=" + markedFile, "METRICS_FILE=" + self.rnaEdit.params.output + ".pcr.metrics", "VALIDATION_STRINGENCY=LENIENT", "CREATE_INDEX=true" ] Helper.proceedCommand("Remove PCR duplicates", cmd, bamFile, markedFile, self.rnaEdit) """if self.rnaEdit.params.paired == False: pysam.rmdup("-s",bamFile,markedFile) else: pysam.rmdup(bamFile,markedFile) #pysam.rmdup(bamFile,markedFile) if self.rnaEdit.params.paired == False: cmd = [self.rnaEdit.params.sourceDir + "samtools", "rmdup", "-s", bamFile, markedFile] else: cmd = [self.rnaEdit.params.sourceDir + "samtools", "rmdup", bamFile, markedFile] Helper.proceedCommand("Index Bam File", cmd, bamFile, markedFile, self.rnaEdit) Helper.status("index Bam", self.rnaEdit.logFile,self.rnaEdit.textField) pysam.index(markedFile) cmd = [self.rnaEdit.params.sourceDir + "samtools", "index", bamFile] Helper.proceedCommand("Index Bam File", cmd, bamFile, markedFile+".bai", self.rnaEdit) #return bamFile""" #run Alignement with tophat """ bamFile=self.rnaEdit.params.output+"/accepted_hits.bam" cmd=[self.rnaEdit.params.sourceDir + "tophat/tophat2", "--no-coverage-search","--keep-fasta-order", "-p", "12", "--rg-id", "A","--rg-sample","A","--rg-library","illumina","--rg-platform-unit","HiSeq", "-o", self.rnaEdit.params.output, self.rnaEdit.params.refGenome, self.fastqFile ] print cmd Helper.proceedCommand("Map reads with tophat", cmd, self.rnaEdit.params.fastqFile, bamFile, self.rnaEdit.) """ #sort bam #sortBamFile=self.rnaEdit.params.output+".bam" #cmd=["java", "-Xmx4G", "-jar", self.rnaEdit.params.sourceDir + "picard-tools/SortSam.jar", "INPUT=" + bamFile, "OUTPUT=" + sortBamFile, "SO=coordinate", "VALIDATION_STRINGENCY=LENIENT", "CREATE_INDEX=true"] #Helper.proceedCommand("sort bam", cmd, bamFile, sortBamFile, self.rnaEdit) #Add read group ONLY NEEDED WHEN MAPPED WITH TOPHAT #rgFile=self.rnaEdit.params.output+".bam" #cmd=["java", "-Xmx4G", "-jar", self.rnaEdit.params.sourceDir + "picard-tools/AddOrReplaceReadGroups.jar", "INPUT=" + bamFile, "OUTPUT=" + rgFile, "SO=coordinate", "VALIDATION_STRINGENCY=LENIENT", "CREATE_INDEX=true", "ID=A", "LB=A", "SM=A", "PL=illumina", "PU=HiSeq2000", "SM=A"] #Helper.proceedCommand("Add read Groups", cmd, bamFile, rgFile, self.rnaEdit) #Identify Target Regions for realignment intervalFile = self.rnaEdit.params.output + ".indels.intervals" cmd = [ "java", "-Xmx16G", "-jar", self.rnaEdit.params.sourceDir + "GATK/GenomeAnalysisTK.jar", "-nt", self.rnaEdit.params.threads, "-T", "RealignerTargetCreator", "-R", self.rnaEdit.params.refGenome, "-I", markedFile, "-o", intervalFile, "-l", "ERROR" ] Helper.proceedCommand("Identify Target Regions for realignment", cmd, bamFile, intervalFile, self.rnaEdit) #Proceed Realignement realignedFile = self.rnaEdit.params.output + ".noDup.realigned.bam" cmd = [ "java", "-Xmx16G", "-jar", self.rnaEdit.params.sourceDir + "GATK/GenomeAnalysisTK.jar", "-T", "IndelRealigner", "-R", self.rnaEdit.params.refGenome, "-I", markedFile, "-l", "ERROR", "-targetIntervals", intervalFile, "-o", realignedFile ] Helper.proceedCommand("Proceed Realignement", cmd, intervalFile, realignedFile, self.rnaEdit) """cmd=["java","-Xmx16G","-jar",self.rnaEdit.params.sourceDir + "picard-tools/MarkDuplicates.jar","INPUT=" + realignedFile, "OUTPUT=" + markedFile, "METRICS_FILE="+self.rnaEdit.params.output+".pcr.metrics", "VALIDATION_STRINGENCY=LENIENT", "CREATE_INDEX=true"] Helper.proceedCommand("mark PCR duplicates", cmd, realignedFile, markedFile, self.rnaEdit) """ #Find Quality Score recalibration spots recalFile = self.rnaEdit.params.output + ".recalSpots.grp" cmd = [ "java", "-Xmx16G", "-jar", self.rnaEdit.params.sourceDir + "GATK/GenomeAnalysisTK.jar", "-T", "BaseRecalibrator", "-l", "ERROR", "-R", self.rnaEdit.params.refGenome, "-knownSites", self.rnaEdit.params.dbsnp, "-I", realignedFile, "-cov", "CycleCovariate", "-cov", "ContextCovariate", "-o", recalFile ] Helper.proceedCommand("Find Quality Score recalibration spots", cmd, realignedFile, recalFile, self.rnaEdit) #proceed Quality Score recalibration cmd = [ "java", "-Xmx16G", "-jar", self.rnaEdit.params.sourceDir + "GATK/GenomeAnalysisTK.jar", "-T", "PrintReads", "-l", "ERROR", "-R", self.rnaEdit.params.refGenome, "-I", realignedFile, "-BQSR", recalFile, "-o", recaledBamFile ] Helper.proceedCommand("Proceed Quality Score recalibration", cmd, recalFile, recaledBamFile, self.rnaEdit) return recaledBamFile
def printAttributes(self): Helper.info("*** CALL VARIANTS WITH FOLLOWING ATTRIBUTES ***", self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t Bam-File: " + self.bamFile, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t outfilePrefix:" + self.rnaEdit.params.output, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t refGenome:" + self.rnaEdit.params.refGenome, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t dbsnp:" + self.rnaEdit.params.dbsnp, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t HapMap:" + self.rnaEdit.params.hapmap, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t 1000G Omni:" + self.rnaEdit.params.omni, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t Alu-Regions:" + self.rnaEdit.params.aluRegions, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t sourceDir:" + self.rnaEdit.params.sourceDir, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t threads:" + self.rnaEdit.params.threads, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t StandCall:" + self.rnaEdit.params.standCall, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t standEmit:" + self.rnaEdit.params.standEmit, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t keepTemp:" + str(self.rnaEdit.params.keepTemp), self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info( "\t intronDistance:" + str(self.rnaEdit.params.intronDistance), self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t minPts:" + str(self.rnaEdit.params.minPts), self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t eps:" + str(self.rnaEdit.params.eps), self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info("\t overwrite:" + str(self.rnaEdit.params.overwrite), self.rnaEdit.logFile, self.rnaEdit.textField)
def check(self): return (Helper()).checkVersion(self.serviceName, '-v')
def blatSearch(self, variants, outFile, minBaseQual, minMissmatch): startTime = Helper.getTime() Helper.info( " [%s] Search non uniquely mapped reads" % (startTime.strftime("%c")), self.rnaEdit.logFile, self.rnaEdit.textField) bamFile = Samfile(self.bamFile, "rb") #create Fasta file for blat to remap the variant overlapping reads tempFasta = outFile + "_tmp.fa" if not os.path.isfile(tempFasta) or not os.path.getsize( tempFasta ) > 0: #check if temFast exists and is not empty. If it exist it will not be created again tempFastaFile = open(tempFasta, "w+") mmNumberTotal = len(variants.variantDict) ############################################# ######### CREATE FASTA FILE ####### ############################################# Helper.info( " [%s] Create fasta file for blat " % (startTime.strftime("%c")), self.rnaEdit.logFile, self.rnaEdit.textField) counter = 1 if len(variants.variantDict.keys()) == 0: Helper.error("No Variants left", self.rnaEdit.logFile, self.rnaEdit.textField) for varKey in variants.variantDict.keys(): variant = variants.variantDict[varKey] varPos = variant.position - 1 iter = bamFile.pileup(variant.chromosome, variant.position - 1, variant.position) alignements = [] for x in iter: if x.pos == varPos: #loop over reads of that position for pileupread in x.pileups: if not pileupread.is_del and not pileupread.is_refskip: if pileupread.alignment.query_sequence[ pileupread. query_position] == variant.alt and pileupread.alignment.query_qualities[ pileupread. query_position] >= minBaseQual: #if pileupread.alignment.query_sequence[pileupread.query_position] == variant.alt: alignements.append( pileupread.alignment.seq) if len(alignements) >= minMissmatch: missmatchReadCount = 0 for sequence in alignements: tempFastaFile.write("> " + variant.chromosome + "-" + str(variant.position) + "-" + variant.ref + "-" + variant.alt + "-" + str(missmatchReadCount) + "\n" + sequence + "\n") missmatchReadCount += 1 counter += 1 if counter % 1000 == 0: sys.stdout.write("\r" + str(counter) + " of " + str(mmNumberTotal) + " variants done") Helper.info( str(counter) + " of " + str(mmNumberTotal) + " variants done", self.rnaEdit.logFile, self.rnaEdit.textField) sys.stdout.flush() Helper.info("\n created fasta file " + tempFasta, self.rnaEdit.logFile, self.rnaEdit.textField) Helper.printTimeDiff(startTime, self.rnaEdit.logFile, self.rnaEdit.textField) tempFastaFile.close() ############################# ##### do blat search ##### ############################# pslFile = outFile + ".psl" if not os.path.isfile(pslFile) or not os.path.getsize(pslFile) > 0: cmd = [ self.rnaEdit.params.sourceDir + "blat", "-stepSize=5", "-repMatch=2253", "-minScore=20", "-minIdentity=0", "-noHead", self.rnaEdit.params.refGenome, tempFasta, pslFile ] #print cmd Helper.proceedCommand("do blat search for unique reads", cmd, tempFasta, "None", self.rnaEdit) Helper.info(" [%s] Blat finished" % (startTime.strftime("%c")), self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info( " [%s] Parse Blat output to look for non uniquely mapped reads" % (startTime.strftime("%c")), self.rnaEdit.logFile, self.rnaEdit.textField) if not os.path.isfile(outFile): #open psl file pslFile = open(pslFile, "r") blatDict = {} for line in pslFile: #summarize the blat hits pslFields = line.split() chr, pos, ref, alt, mmReadCount = pslFields[9].split("-") varTuple = (chr, int(pos), ref, alt) try: blatScore = [ pslFields[0], pslFields[13], pslFields[17], pslFields[18], pslFields[20] ] # #of Matches, targetName, blockCount, blockSize, targetStarts except IndexError: Helper.warning("Not enough Values in '%s' (Skip)" % line, self.rnaEdit.logFile, self.rnaEdit.textField) continue if varTuple in blatDict: blatDict[varTuple] = blatDict[varTuple] + [blatScore] else: blatDict[varTuple] = [blatScore] siteDict = {} discardDict = {} Helper.info( " [%s] Analyse Blat hits (Slow)" % (startTime.strftime("%c")), self.rnaEdit.logFile, self.rnaEdit.textField) #loop over blat Hits for varTuple in blatDict.keys( ): #Loop over all blat hits of mmReads to observe the number of Alignements keepSNP = False chr, pos, ref, alt = varTuple pslLine = blatDict[varTuple] largestScore = 0 largestScoreLine = pslLine[0] scoreArray = [] #look for largest blatScore and save the largest line too for blatHit in pslLine: lineScore = int(blatHit[0]) scoreArray.append(lineScore) if lineScore > largestScore: largestScore = lineScore largestScoreLine = blatHit scoreArray.sort(reverse=True) if len(scoreArray) < 2: #test if more than one blat Hit exists scoreArray.append(0) if chr == largestScoreLine[1] and scoreArray[1] < scoreArray[ 0] * 0.95: #check if same chromosome and hit is lower the 95 percent of first hit blockCount, blockSizes, blockStarts = int( largestScoreLine[2]), largestScoreLine[3].split( ",")[:-1], largestScoreLine[4].split(",")[:-1] for i in range(blockCount): startPos = int(blockStarts[i]) + 1 endPos = startPos + int(blockSizes[i]) if pos >= startPos and pos < endPos: #check if alignement overlaps missmatch keepSNP = True if keepSNP == True: if varTuple in siteDict: siteDict[varTuple] += 1 else: siteDict[varTuple] = 1 elif keepSNP == False: #when read not passes the blat criteria if varTuple in discardDict: discardDict[varTuple] += 1 else: discardDict[varTuple] = 1 pslFile.close() ############################################################################## ##### loop through variants and delete invalid variants ###### ############################################################################## Helper.info( " [%s] Deleting invalid variants" % (startTime.strftime("%c")), self.rnaEdit.logFile, self.rnaEdit.textField) mmNumberTotal = 0 mmNumberTooSmall = 0 mmReadsSmallerDiscardReads = 0 for key in variants.variantDict.keys(): numberBlatReads = 0 numberDiscardReads = 0 if key in siteDict: numberBlatReads = siteDict[key] if key in discardDict: numberDiscardReads = discardDict[key] if numberBlatReads <= minMissmatch and numberBlatReads <= numberDiscardReads: del variants.variantDict[key] #count statistics if numberBlatReads < minMissmatch: mmNumberTooSmall += 1 elif numberBlatReads < numberDiscardReads: #check if more reads fit the blat criteria than not mmReadsSmallerDiscardReads += 1 mmNumberTotal += 1 if self.rnaEdit.params.keepTemp == False: os.remove(tempFasta) os.remove(pslFile.name) #output statistics mmPassedNumber = mmNumberTotal - (mmNumberTooSmall + mmReadsSmallerDiscardReads) Helper.info( "\t\t %d out of %d passed blat criteria" % (mmPassedNumber, mmNumberTotal), self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info( "\t\t %d Missmatches had fewer than %d missmatching-Reads." % (mmNumberTooSmall, minMissmatch), self.rnaEdit.logFile, self.rnaEdit.textField) Helper.info( "\t\t %d Missmatches had more missaligned reads than correct ones." % (mmReadsSmallerDiscardReads), self.rnaEdit.logFile, self.rnaEdit.textField) Helper.printTimeDiff(startTime, self.rnaEdit.logFile, self.rnaEdit.textField)
def restart(self): (Helper()).execute('sudo /etc/init.d/' + self.serviceName + ' restart')
def enableSite(self, siteName): (Helper()).execute('sudo ln -s ' + self.pathAvailable + '/' + siteName + ' ' + self.pathEnabled)
def disableSite(self, siteName): (Helper()).rm(self.pathEnabled + '/' + siteName)
class Config: data = {} helper = False withDependencies = False def __init__(self, arg): self.withDependencies = arg self.helper = Helper() def getConf(self): return self.data # =================== Checking ====================== # def createQueue(self, conf): queue = [] try: for key, vals in conf.iteritems(): if (type(vals) is list): for val in vals: self.checkDependencies(key, val, conf, queue) elif (type(vals) is dict): for name, params in vals.iteritems(): self.checkDependencies(key, name, conf, queue, params) else: self.checkDependencies(key, vals, conf, queue) return self.sortQueue(queue) except: print sys.exc_info() def checkDependencies(self, folder, className, conf, queue, params=False): curClass = self.helper.getClass(folder + '.' + self.helper.ucfirst(className))() if hasattr(curClass, 'dependencies') and self.withDependencies == '1': for val in curClass.dependencies: curVal = val.split('.') if curVal[0] in conf: if curVal[1] == conf[curVal[0]] or (hasattr(conf[curVal[0]], 'keys') and curVal[1] in conf[curVal[0]].keys()): continue self.checkDependencies(curVal[0], curVal[1], conf, queue) if params: curClass.attrs = {} for key, val in params.iteritems(): curClass.attrs[key] = val self.helper.listAdd(curClass, queue) def sortQueue(self, queue): newQueue = [] for val in queue: if hasattr(val, 'sortOrder'): for sortEl in val.sortOrder: self.helper.listFindAndAdd(sortEl, queue, newQueue) if hasattr(val, 'dependencies'): for sortEl in val.dependencies: self.helper.listFindAndAdd(sortEl, queue, newQueue) self.helper.listMerge(queue, newQueue) return newQueue # =================== Creation ====================== # def createConf(self): self.data['dist'] = self.setDist() self.data['language'] = self.setLang() self.data['server'] = self.setServer() # self.data['db'] = self.setDb() return self.getConf() def choice(self, arr, question, isRequired=False): string = '===== '+question+' =====\n' num = 0 for key, val in arr.iteritems(): string += str(num)+'. '+key+'\n' num += 1; if (isRequired is False): string += str(num)+'. Nothing\n' curChoice = raw_input(string+'Your choice? ') if (curChoice == str(num) and isRequired is False): print '' try: return arr.keys()[int(curChoice)] except: print 'not correct number' func = inspect.getouterframes(inspect.currentframe())[1][3] getattr(self, func)() def setDist(self): grep = self.helper.execute("cat /etc/lsb-release") dist = grep.split('\n')[0].split('=')[1].lower() if (dist in self.dist): return self.dist[dist] else: dist = raw_input('type your linux distribution name? ').lower() if (dist in self.dist): return dist else: sys.exit('Sorry, this distribution does not supported:(') def setLang(self): choice = self.choice(ConfigPaths.languages, 'Select language? ') return ConfigPaths.languages[choice] def setServer(self): choice = self.choice(cl.servers, 'Select http server? ') return cl.servers[choice] def setVcs(self): return '' def setDb(self): choice = self.choice(self.languages, 'Select database? ') return self.languages[choice]
def __init__(self): self.helper = Helper() self.sid_list = [] self.core_list = [] pass