def readFromFile(self, filename): with open(filename, 'rU') as f: lines = [line.strip() for line in f if line.strip()] L = len(lines) if L < 6: raise IOError("Format error (too few lines).") for s in lines[:4]: if not (s.startswith('0') or s.startswith('1')): raise ValueError("Expected string with initial character 0 or 1, but recieved\n'%s'" % s) Ncolfilt = int(lines[4]) if 5 + Ncolfilt != L: raise ValueError("Wrong number of column filters:\n" \ "Number indicated is %d, but the file contains %d" % (Ncolfilt, L - 5)) restrict_genes_txt = lines[0][2:] if lines[0].startswith('1') and lines[0][2:].strip() else None exclude_genes_txt = lines[1][2:] if lines[1].startswith('1') and lines[1][2:].strip() else None exclude_var_txt = lines[2][2:] if lines[2].startswith('1') and lines[2][2:].strip() else None regions_txt = lines[3][2:] if lines[3].startswith('1') and lines[3][2:].strip() else None columnfilters = [] for k in range(5, 5 + Ncolfilt): col, rel, val, keep = lines[k].split(' ::: ') if ('less' in rel or 'greater' in rel): try: val = float(val) except ValueError: FiltusUtils.warningMessage("Column filter ignored:\n\n'%s %s %s'\n\nNumerical value needed."%(col, rel, val)) continue columnfilters.append((col, rel, val, int(keep))) return restrict_genes_txt, exclude_genes_txt, exclude_var_txt, regions_txt, columnfilters
def _f(): if pgnr in [0,1] and not self.checkLoadedSamples(select="all"): return if not hasattr(self, 'databaseTool'): self.databaseTool = FiltusDatabase.DatabaseWidget(self) self.databaseTool.notebook.selectpage(pgnr) FiltusUtils.activateInCenter(self.parent, self.databaseTool)
def extractdb(self): try: st = time.time() inFilename, inNS, inNV, inFormat = self.browser.getInfo() subset = self.lists.selection sampleNames = self.lists.getright() outFilename = self.save_browser.getvalue() outFormat = self.formatSelect.getvalue() cfilter = self.columnFilter.getfilter() if cfilter: filter = Filter.Filter(columnfilters=[cfilter]) else: filter = None if not inFilename: raise RuntimeError("Please specify existing database") if inFormat == "Extended" and not subset: raise RuntimeError("No samples selected") if not outFilename: raise RuntimeError( "Please specify output file name and format") db = VariantDatabase.readFileAndExtract(inFilename, inFormat, inNS, subset, sampleNames, outFormat, filter=filter) db.save(outFilename) message = "Variant database written to %s.\n\n" % outFilename \ + "\n".join(db.infoSummary()) \ + '\n\nTime used: %.2f seconds' %(time.time()-st,) FiltusUtils.infoMessage(message) except Exception as e: FiltusUtils.warningMessage(e)
def _splitFORMAT_update(self, reset=False): '''callback for the splitFormat checkbox''' if reset: self._FORMATheaders = [] self._sampleNames = [] split = self.splitFormatVar.get() column = self.formatColMenu.getvalue() if split and (not column or self.formatColMenu.inconsistent): self.splitFormatVar.set(0) return h = self.currentHeaders[:] def unsplit(): if self._FORMATheaders: h[h.index('GT'):] = [self.formatCol] + self._sampleNames self._FORMATheaders = [] self._sampleNames = [] if split: first = self.firstvariants[0][self.originalHeaders.index(column)] if not first.startswith('GT'): self.formatColMenu.setColor(False) FiltusUtils.warningMessage("FORMAT column entries must begin with 'GT'") return unsplit() # undo possible previous split self.formatCol = column self._FORMATheaders = first.split(':') ind = h.index(column) self._sampleNames = h[ind+1:] h[ind:] = self._FORMATheaders else: unsplit() self._updateColnameMenus(h)
def _executeDialogButton(self, button): try: if button is None or button == 'Cancel': self.stopLoading = True self.dialog.deactivate() return elif button == "Skip this file": self.skipFile = True self.dialog.deactivate() return self.prompt = button != "Use for all files" #button is either this or "Use for this file" self.guess = False try: self._setParameters() except Exception as e: FiltusUtils.warningMessage(e) return self.dialog.deactivate() except Exception as e: FiltusUtils.warningMessage( "Something went wrong. Trying to close the input dialog.") self.dialog.destroy() del self.filtus.fileReader return
def okForDB(VFlist, ndef=None): message = "" if ndef is not None and any( len(VF.varDefColNames) != ndef for VF in VFlist): message = 'The existing database has %d variant-defining columns, but at least one of the selected samples does not match this. \ To extend this database, make sure to indicate matching columns in the "Columns uniquely defining a variant" entry when loading new files.' % ndef elif len(set(len(VF.varDefColNames) for VF in VFlist)) > 1: message = 'The selected files do not have the same number of variant-defining columns. To create the database, please load the files again, making sure the "Columns uniquely defining a variant" entries match.' elif any(VF.varDefGetter is None for VF in VFlist): message = 'There is a problem with the variant-defining columns of (at least one of) the selected samples.' if message: FiltusUtils.warningMessage(message) return False else: return True def readDB(self, filename): with open(filename, 'rU') as dbfil: db = [line.strip().split('\t') for line in dbfil] m = next(i for i in xrange(len(old)) if len(old[i]) > 1) meta = '\n'.join(old[:m]) + '\n' old[:] = old[m:] len_old = len(old) - 1 old_heads = old[0] type = 'simple' if old_heads[-3:] == [ 'Total', 'Heterozygous', 'Homozygous' ] else 'extended'
def _executeDialogButton(self, button): try: if button is None or button == 'Cancel': self.stopLoading = True self.dialog.deactivate() return elif button == "Skip this file": self.skipFile = True self.dialog.deactivate() return self.prompt = button != "Use for all files" #button is either this or "Use for this file" self.guess = False try: self._setParameters() except Exception as e: FiltusUtils.warningMessage(e) return self.dialog.deactivate() except Exception as e: FiltusUtils.warningMessage("Something went wrong. Trying to close the input dialog.") self.dialog.destroy() del self.filtus.fileReader return
def doAddSamples(self): st = time.time() filtus = self.filtus inFilename, inNS, inNV, inFormat = self.browser.getInfo() outFilename = self.save_browser.getvalue() outFormat = self.formatSelect.getvalue() selection = self.lists.selection VFlist = [filtus.filteredFiles[i] for i in selection] sampleNames = self.lists.getright() if not inFilename: raise RuntimeError("Please specify existing database") if not selection: raise RuntimeError("No samples selected") if not outFilename: raise RuntimeError("Please specify output file name and format") newmeta = '' db = VariantDatabase.readFileAndAdd(inFilename, inFormat=inFormat, inNS=inNS, outFormat=outFormat, VFlist=VFlist, sampleNames=sampleNames) if db.nSamples == inNS: raise IndexError("No samples to add") db.save(outFilename) message = "Variant database written to %s.\n\n" % outFilename \ + "\n".join(db.infoSummary()) \ + '\n\nTime used: %.2f seconds' %(time.time()-st,) FiltusUtils.infoMessage(message)
def save(self): if self.results is None: return filtus = self.filtus db_summary = '## Database file: %s\n## Format: %s\n## Number of samples: %d\n## Number of variants: %d\n##\n' % ( self.filename, self.formatLong, self.nSamples, self.nVariants) query = '## Query: Chromosome %s, position: %s' % tuple(self.query) meta = FiltusUtils.composeMeta( VFlist=None, analysis="VARIANT DATABASE - SEARCH\n##\n" + db_summary + query) fname = tkFileDialog.asksaveasfilename(initialdir=filtus.currentDir, title="Save search results as") if not fname: return filtus.currentDir = os.path.dirname(fname) includePre = self.filtus.includePreamble try: with open(fname, 'w') as utfil: if 'Top' in includePre: utfil.write(meta) utfil.write(self.results) if 'Bottom' in includePre: utfil.write('\n' + meta) except Exception as e: FiltusUtils.warningMessage('%s\n\nFile not saved.' % e) return
def _m(): VFlist = self.checkLoadedSamples(select="selection", minimum=2) if not VFlist: return try: mergedVF = FiltusAnalysis.merge(VFlist, collapse=collapse) self.text.prettyPrint(mergedVF, label='') except Exception as e: FiltusUtils.warningMessage(e)
def relatedness_trio_prompt(self): if not self.checkLoadedSamples(select="all"): return if not hasattr(self, 'relatedness_trio_gui'): self.relatedness_trio_gui = FiltusWidgets.RelatednessTrio_GUI(self) try: FiltusUtils.activateInCenter(self.parent, self.relatedness_trio_gui) except Exception as e: FiltusUtils.warningMessage("%s: %s" %(type(e).__name__, e))
def pedwriter_prompt(self): if not self.checkLoadedSamples(select="all"): return if not hasattr(self, 'pedwriter'): self.pedwriter = FiltusWidgets.PedWriter(self) try: FiltusUtils.activateInCenter(self.parent, self.pedwriter) except Exception as e: FiltusUtils.warningMessage("%s: %s" %(type(e).__name__, e))
def denovo_prompt(self): if not self.checkLoadedSamples(select="all", VF=False, minimum=3): return if not hasattr(self, 'denovogui'): self.denovogui = FiltusWidgets.DeNovo_GUI(self) try: FiltusUtils.activateInCenter(self.parent, self.denovogui) except Exception as e: FiltusUtils.warningMessage("%s: %s" %(type(e).__name__, e))
def autozyg_prompt(self): if not self.checkLoadedSamples(select="selection", VF=False, minimum=1, maximum=1): return if not hasattr(self, 'autexgui'): self.autexgui = FiltusWidgets.AutEx_GUI(self) try: FiltusUtils.activateInCenter(self.parent, self.autexgui) except Exception as e: FiltusUtils.warningMessage("%s: %s" %(type(e).__name__, e))
def _histogramButtonExecute(self): try: col = self.histo_var.getvalue() if col=='': raise RuntimeError("Column variable not selected") VFlist = self._validateInput(checkPresence=[col]) bins = int(self.histo_bins.getvalue()) histogramPlot(VFlist, col, bins) except Exception as e: FiltusUtils.warningMessage(e)
def _comparativeButtonExecute(self): try: VFlist = self._validateInput() plotselect = self.comparative_checks.getvalue() p, h, g = (str in plotselect for str in ['private','heterozygosity', 'gender']) writetofile = self.save_browser.getvalue() if self.save_browser.on() else None QC_3plots(VFlist, private=p, heterozygosity=h, gender=g, writetofile=writetofile, save=None) except Exception as e: FiltusUtils.warningMessage(e)
def geneLookup_prompt(self): if not self.checkLoadedSamples(select="all"): return if all(VF.geneGetter is None for VF in self.filteredFiles): FiltusUtils.warningMessage("None of the loaded samples have known gene column.") return if not hasattr(self, 'geneLookup'): self.geneLookup = FiltusWidgets.GeneLookup(self.parent, self) FiltusUtils.activateInCenter(self.parent, self.geneLookup.prompt)
def _splitINFO_update(self, column=None, reset=False): '''callback for the INFO option menu. Also called from _readAndSetHeaders (with column=None)''' if reset: self._INFOheaders = [] if column is None: if self.infoColMenu.inconsistent: return column = self.infoColMenu.getvalue() self.infoColMenu.setColor(True) h = self.currentHeaders[:] ### Always start by unsplitting everything: # If CSQ is split: unsplit this first splitCsq = self.hasCSQ and self.splitCsqVar.get() if splitCsq: self.splitCsqVar.set(0) self._splitCsq_update() h[:] = self.currentHeaders[:] # Unsplit INFO fields if self._INFOheaders: ind = h.index(self._INFOheaders[0]) h[ind:(ind + len(self._INFOheaders))] = [self.infoCol] self._INFOheaders = [] self.infoCol = '' ### If empty selection: Reset and return if column == "": self._updateColnameMenus(h) self.splitCsqButt.configure(state="disabled") return ### Otherwise: split selected column as INFO (if possible) first_infos = [v[self.originalHeaders.index(column)] for v in self.firstvariants] _INFOheaders = sorted(set(s.split('=')[0] + '_INFO' for info in first_infos for s in info.split(';') if '=' in s)) if not _INFOheaders: self.infoColMenu.setColor(False) self.splitCsqButt.configure(state="disabled") self._updateColnameMenus(h) FiltusUtils.warningMessage("I don't recognise %s as an INFO column"%column) return ind = h.index(column) h[ind:(ind + 1)] = _INFOheaders self._updateColnameMenus(h) if self.hasCSQ and "CSQ_INFO" in _INFOheaders: self.splitCsqButt.configure(state="normal") if splitCsq: self.splitCsqVar.set(1) self._splitCsq_update() self._INFOheaders = _INFOheaders self.infoCol = column
def QC_prompt(self): if not self.checkLoadedSamples(select="all", minimum=1): return if not hasattr(self, 'QC'): self.QC = FiltusQC.QC(self) try: FiltusUtils.activateInCenter(self.parent, self.QC.dialog) except Exception as e: print "Killing myself because of: %s"%e self.parent.destroy()
def _scatterButtonExecute(self): try: xcol, ycol = self.scatter_x.getvalue(), self.scatter_y.getvalue() if xcol=='': raise RuntimeError("X axis column not selected") if ycol=='': raise RuntimeError("Y axis column not selected") VFlist = self._validateInput(checkPresence=[xcol, ycol]) alpha = float(self.scatter_alpha.getvalue()) thin = int(self.scatter_thin.getvalue()) scatterPlot(VFlist, xcol, ycol, alpha, thin) except Exception as e: FiltusUtils.warningMessage(e)
def loadMeta_and_update(self): self.filename = self.browser.getvalue() updates = self.updates try: meta, self.nSamples, self.nVariants, self.format, self.colNames = VariantDatabase.readMeta( self.filename) self.updateSummary(self.nSamples, self.nVariants, self.format) if updates: updates(inFormat=self.format, colNames=self.colNames) except Exception as e: FiltusUtils.warningMessage("Could not load database.\n\n %s" % e) self.summaryLabel.configure(text="Summary: Error") self.filename = None if updates: updates(reset=True)
def geneMaster(cls, geneMaster, nSamples, minSampleCount=1, genelengths={}, model="Dominant", meta=''): intlist2string = FiltusUtils.intlist2string shareCounts, data = [0]*nSamples, [] if genelengths is None: genelengths = {} M = float(len(genelengths)) totL = float(sum(genelengths.itervalues())) # Average number of variants (after filt) per sample: m_aver = sum(g.length for g in geneMaster.itervalues())/float(nSamples) for gene in geneMaster.keys(): geneData = geneMaster[gene] samplecount = geneData.nFiles() if samplecount < minSampleCount: del geneMaster[gene] continue shareCounts[samplecount-1] += 1 samples = intlist2string(geneData.getFiles()) nvars = geneData.length nuniqvars = geneData.nUniqVars() _info = [gene, samplecount, samples, nvars, nuniqvars] if genelengths: length = genelengths.get(gene, '-') try: pval = FiltusUtils.pValue(m=m_aver, Lrel=length/totL, n=nSamples, k=samplecount, model=model) pval_bonf = min(pval * M, 1) _info.extend([length, '{:.3g}'.format(pval), '{:.3g}'.format(pval_bonf)]) except: _info.extend([length, '-', '-']) data.append(_info) return cls(data, nSamples, geneMaster, shareCounts, minSampleCount=minSampleCount, meta=meta)
def addPvalues(self, samplecount, m_aver, length, totL, M, model): try: pval = FiltusUtils.pValue(m=m_aver, Lrel=length/totL, n=nSamples, k=samplecount, model=model) pval_bonf = min(pval * M, 1) return ['{:.3g}'.format(pval), '{:.3g}'.format(pval_bonf)] except: return ['-', '-']
def setAllColnames(self): uniqueCols = FiltusUtils.listUnique([head for VF in self.files for head in VF.columnNames]) self.FM.setColnames(uniqueCols) menubar = self.menuBar viewmenu = menubar.component('View-menu') if not uniqueCols: viewmenu.entryconfigure(0, state='disabled') return viewmenu.entryconfigure(0, state='normal') summarymenu = menubar.component('columnsum-menu') L = summarymenu.index('end') if L is not None: menubar.deletemenuitems('columnsum', 0, L) summarizer = FiltusAnalysis.ColumnSummary() for col in uniqueCols: menubar.addmenuitem('columnsum', 'command', None, label=col, font=self.defaultfont, command=self._showSummary(summarizer, col)) nUnique = len(uniqueCols) nCols = -(-nUnique/28) # hvorfor? heltallsdivisjon? N = -(-nUnique/nCols) for i in range(1, nCols): summarymenu.entryconfigure(i * N, columnbreak=1)
def select(self): box1, box2 = self._leftlist, self._rightlist filelist = box1.get() sel = box1.getcurselection() sel_ind = [filelist.index(s) for s in sel] taken = [i for i in sel_ind if i in self.selection] if len(taken) > 0: FiltusUtils.warningMessage("Samples already selected:\n\n%s" % '\n'.join(filelist[i] for i in taken)) sel = [s for s, i in zip(sel, sel_ind) if not i in taken] sel_ind = [i for i in sel_ind if not i in taken] box2.insert('end', *sel) self.selection.extend(sel_ind) box2.settoptext("Selected: %d" % box2.size())
def _prepare(self): files = self.filtus.files self.names.setlist(['%2d: %s' %(i + 1, os.path.basename(VF.shortName)) for i, VF in enumerate(files)]) self._selectall_and_update() cols = FiltusUtils.listUnique([head for VF in files for head in VF.columnNames]) for colmenu in [self.scatter_x, self.scatter_y, self.histo_var]: colmenu.setItems(['']+cols)
def read(self, filename, **kwargs): self.skipFile = False self.stopLoading = False new_ext = self.currentfile is None or (os.path.splitext(filename)[1] != os.path.splitext(self.currentfile)[1]) self.prompt = kwargs.pop('prompt', self.prompt or new_ext) self.guess = kwargs.pop('guess', self.guess or (self.prompt and new_ext)) promptShowsOk = None # modified when trying to show prompt try: self._guessAndPrepare(filename, kwargs) if self.prompt or any(OM.inconsistent for OM in self._activeMenus()): promptShowsOk = False FiltusUtils.activateInCenter(self.parent, self.dialog) promptShowsOk = True else: self._setParameters() if self.stopLoading or self.skipFile: return self.filtus.busy() common_params = dict(filename=filename, sep=self.sep, chromCol=self.chromCol, posCol=self.posCol, geneCol=self.geneCol, splitAsInfo=self.infoCol, split_general=self.split_general, prefilter=self.prefilter) # Note: splitAsInfo works also for nonVCF, but not splitCSQ (which requires correct preamble data) if self.vcf: VF = self.reader.readVCFlike(formatCol=self.formatCol, splitFormat=self.splitFormat, splitCsq=self.splitCsq, keep00=self.keep00, **common_params) else: VF = self.reader.readNonVCF(skiplines=self.skiplines, gtCol=self.gtCol, homSymbol=self.homSymbol, **common_params) self.filtus.notbusy() except (ValueError, RuntimeError) as e: self.filtus.notbusy() FiltusUtils.warningMessage(e) return self.read(filename, guess=False, prompt=True) except Exception as e: self.filtus.notbusy() if promptShowsOk: FiltusUtils.warningMessage("An error occured while reading this file:\n%s\n\n%s: %s\n\nPlease try again or skip file." %(filename, type(e).__name__, e)) return self.read(filename, guess=False, prompt=True) else: FiltusUtils.warningMessage("%s: %s\n\nSkipping this file: %s" %(type(e).__name__, e, filename)) self.skipFile = True return if self.checkHomozygosity and VF.noHomozygotes(): tryagain = FiltusUtils.yesnoMessage('The file %s has no homozygous variants. Go back to settings dialog?'%filename) if tryagain: VF = self.read(filename, guess = False, prompt=True) return VF
def _doCreate(self): st = time.time() filtus = self.filtus selection = self.lists.selection VFlist = [filtus.filteredFiles[i] for i in selection] sampleNames = self.lists.getright() outFormat = self.formatSelect.getvalue() outFilename = self.save_browser.getvalue() if not selection: raise RuntimeError("No samples selected") if not outFilename: raise RuntimeError("Please specify output file name and format") db = VariantDatabase.buildFromSamples(VFlist, outFormat, sampleNames) db.save(outFilename) message = "Variant database written to %s.\n\n" % outFilename \ + "\n".join(db.infoSummary()) \ + '\n\nTime used: %.2f seconds' %(time.time()-st,) FiltusUtils.infoMessage(message)
def _getFormatHeads(self, data, formatIndex): formats = set(x[formatIndex] for x in data) allEqual = len(formats) == 1 if allEqual: formatHeads = list(formats)[0].split(':') else: formatHeads = FiltusUtils.listUnique( [field for F in formats for field in F.split(':')]) return formatHeads, allEqual
def __init__(self, filename, columnNames, columnDescriptions, variants, chromCol, posCol, geneCol, formatHeads, splitFormat, splitInfo, prefilter=None, appliedFilters=None, keep00=None, nGenes=None, meta=''): gtCol = 'GT' if splitFormat else columnNames[-1] homSymbol = '<vcf format>' VariantData.__init__(self, filename, columnNames, variants, chromCol, posCol, geneCol, gtCol, homSymbol, columnDescriptions=columnDescriptions, prefilter=prefilter, appliedFilters=appliedFilters, nGenes=nGenes, meta=meta) self.splitFormat = splitFormat self.splitInfo = splitInfo self.isVCFtype = True self.keep00 = keep00 self.formatHeads = formatHeads columns_lower = [h.lower() for h in columnNames] self.refCol = next((h for h in ['REF', 'VCF_REF', 'vcf_ref', 'Ref'] if h in columnNames), None) self.altCol = next((h for h in ['ALT', 'VCF_ALT', 'vcf_alt', 'Alt', 'Obs'] if h in columnNames), None) if self.refCol and self.altCol: self.chromPosRefAlt = self.columnGetter(chromCol, posCol, self.refCol, self.altCol) else: FiltusUtils.warningMessage("Unknown REF/ALT columns in %s" %filename) self.chromPosRefAlt = self.varDefGetter self._mainAttributes = [a for a in self._mainAttributes if not a in ['gtCol', 'homSymbol']] + ['formatHeads', 'splitFormat', 'splitInfo', 'keep00']
def checkLoadedSamples(self, select, minimum=None, maximum=None, VF=True, filtered=True): '''Typically called imediately after a button press starting analysis. Checks that there are sufficient loaded samples for analysis, and returns either indices or VF objects. If any problems occurs, a warning is displayed and False is returned. Select = either "selection" or "all" ''' def plural_s(k): return '' if k==1 else 's' useMin, useMax = minimum is not None, maximum is not None try: if len(self.files)==0: raise IndexError("No samples are loaded") files = self.filteredFiles if filtered else self.files if len(files) == 0: files = self.filteredFiles = self.filteredFiles_initialcopy() if select =="all": if useMin and len(files) < minimum: raise IndexError("This option requires at least %d loaded sample%s." % (minimum, plural_s(minimum))) return files if VF else range(len(files)) if select == "selection": seleci = [int(i) for i in self.fileListbox.curselection()] nsel = len(seleci) if useMin and useMax and not minimum <= nsel <= maximum: if minimum==maximum: raise IndexError("Please select exactly %d sample%s in the 'Loaded samples' window" %(minimum, plural_s(minimum))) else: raise IndexError("Please select between %d and %d samples in the 'Loaded samples' window" %(minimum, maximum)) elif useMin and nsel < minimum: raise IndexError("Please select at least %d sample%s in the 'Loaded samples' window" %(minimum, plural_s(minimum))) elif useMax and nsel > maximum: raise IndexError("Please select at most %d sample%s in the 'Loaded samples' window" %(maximum, plural_s(maximum))) return [files[i] for i in seleci] if VF else seleci except Exception as e: FiltusUtils.warningMessage(e) return False
def _splitFORMAT_update(self, reset=False): '''callback for the splitFormat checkbox''' if reset: self._FORMATheaders = [] self._sampleNames = [] split = self.splitFormatVar.get() column = self.formatColMenu.getvalue() if split and (not column or self.formatColMenu.inconsistent): self.splitFormatVar.set(0) return h = self.currentHeaders[:] def unsplit(): if self._FORMATheaders: h[h.index('GT'):] = [self.formatCol] + self._sampleNames self._FORMATheaders = [] self._sampleNames = [] if split: first = self.firstvariants[0][self.originalHeaders.index(column)] if not first.startswith('GT'): self.formatColMenu.setColor(False) FiltusUtils.warningMessage( "FORMAT column entries must begin with 'GT'") return unsplit() # undo possible previous split self.formatCol = column self._FORMATheaders = first.split(':') ind = h.index(column) self._sampleNames = h[ind + 1:] h[ind:] = self._FORMATheaders else: unsplit() self._updateColnameMenus(h)
def _run(self, event): import autorun reload(autorun) def execute(button): if button == 'Cancel': prompt.deactivate() else: prompt.deactivate(prompt.get().strip()) if event.keysym == 'Return': f = 'x' else: prompt = Pmw.PromptDialog(self.parent, buttons=('OK', 'Cancel'), title='Run command', label_text='Function to run:', entryfield_labelpos='n', command=execute, defaultbutton=0) f = FiltusUtils.activateInCenter(self.parent, prompt) if f: getattr(autorun, f, FiltusUtils.ignore_break)(self)
def buildFromSamples(cls, VFlist, outFormat, sampleNames=None): r"""Build database. If format is "Simple", the database has 6 columns by default: CHROM POS OBS (# samples with a variant at this position) HET (# samples with het variant), HOM (# samples with hom variant) AFREQ (allele frequency) In extended format, the first 6 columns are as above, followed by one column per sample. Entries in these columns are 0, 1 or 2 (= not present, het, hom) """ N = len(VFlist) # First creating the extended part, i.e. a matrix (list of lists) with 0,1,2. # (This turned out to be quicker than getUniqueVariants(), also for simple format.) extended = collections.defaultdict(lambda: [0] * N) for i, VF in enumerate(VFlist): gt = VF.GTnum() vDef = VF.varDefGetter for v in VF.variants: extended[vDef(v)][i] = gt(v) colNames = ['CHROM', 'POS', 'OBS', 'HET', 'HOM', 'AFREQ'] outFormat = formatInit(outFormat) if outFormat == "E": if sampleNames is None: sampleNames = [VF.shortName for VF in VFlist] colNames += sampleNames meta = FiltusUtils.composeMeta(VFlist=VFlist, analysis="NEW VARIANT DATABASE", sort=False) return cls(outFormat, nSamples=N, columnNames=colNames, extendedDict=extended, meta=meta)
def settingsPrompt(self): FiltusUtils.activateInCenter(self.parent, self.settingsDialog)
def plink_prompt(self): if not self.checkLoadedSamples(select="selection", VF=False, minimum=1, maximum=1): return if not hasattr(self, 'plinkgui'): self.plinkgui = FiltusWidgets.PLINK_GUI(self) FiltusUtils.activateInCenter(self.parent, self.plinkgui)
def apply(self, VF, checks = True, inplace=False): if checks: try: self.checks(VF) except ValueError as message: FiltusUtils.warningMessage(message) if inplace: VF.setVariants([]) VF.appliedFilters = self return else: return VF.copyAttributes(variants=[], appliedFilters=self) headers = VF.columnNames columnfilters = self.columnfilters exclude_variants = self.exclude_variants restrict_to_variants = self.restrict_to_variants exclude_genes = self.exclude_genes restrict_to_genes = self.restrict_to_genes regionsChromdic = self.regionsChromdic res = VF.variants[:] ### 0. if removeClosePairs - do this first if self.closePairLimit > 0: res[:] = removeClosePairs(VF, minDist = self.closePairLimit, variants_only=inplace) #TODO ### 1. restriction filters. Usually not used ### if restrict_to_genes: #Doing this first because: Usually a very small set, thus reducing the variant set substantially annGenes = VF.annotatedGenes res[:] = [v for v in res if any(g in restrict_to_genes for g in annGenes(v))] if restrict_to_variants: #Not yet implemented in the GUI, but used in Family Gene Sharing ((stemmer det??) varDef = VF.varDefGetter res[:] = [v for v in res if varDef(v) in restrict_to_variants] ### 2. exclude variants. Usually present, and usually gives large reduction. if exclude_variants: varDef = VF.varDefGetter res[:] = [v for v in res if varDef(v) not in exclude_variants] ### 3. column filters: These are already sorted w.r.t. speed ### if columnfilters: for col, op, entry, keep in columnfilters: if col in headers: getcol = itemgetter(headers.index(col)) res[:] = [v for v in res if op(getcol(v), entry)] elif keep: continue # else: This is dealt with in checks() ### 4. regions: ### if regionsChromdic is not None: firstGreater = FiltusUtils.firstGreater chrom, pos = VF.chromGetter, VF.posGetter chrom_vars = collections.defaultdict(list) for v in res: chrom_vars[chrom(v)].append(v) res = [] for chr in sorted(chrom_vars.keys(), key=FiltusUtils.chromInt): reg_startstops = regionsChromdic[chr] vars = chrom_vars[chr] vars.sort(key=lambda v: float(pos(v))) # possible to avoid doing float(pos()) twice?? interv = firstGreater((float(pos(v)) for v in vars), reg_startstops) # --> odd/even if inside/outside some region! res.extend(v for v, intv in zip(vars, interv) if intv % 2 == 1) ### 5. exclude genes. Might be relatively slow because of annGenes. Haven't checked this though. ### if exclude_genes: annGenes = VF.annotatedGenes res[:] = [v for v in res if not any(g in exclude_genes for g in annGenes(v))] ### 6. model filter and controls (benign pairs in compound rec models) ### comb = itertools.combinations model = self.model if model == 'Recessive homozygous': isHom = VF.isHomALT() res[:] = [v for v in res if isHom(v)] elif model == 'Recessive': varDef = VF.varDefGetter annGenes = VF.annotatedGenes isHom = VF.isHomALT() benignPairs = self.benignPairs if benignPairs: remov = set() heteroDict = collections.defaultdict(set) #dict of heterozygous variants for v in res: if not isHom(v): vdef = varDef(v) for g in annGenes(v): heteroDict[g].add(vdef) # the following ignores that variants can be annotated with multiple gene. TODO for g, vars in heteroDict.iteritems(): if g in benignPairs: nonBenign = set(frozenset(pair) for pair in comb(vars, 2)) - benignPairs[g] remov.update(vars - set(vdef for pair in nonBenign for vdef in pair)) res[:] = [v for v in res if varDef(v) not in remov] geneCount = collections.Counter(g for w in res for g in annGenes(w)) res[:] = [v for v in res if isHom(v) or any(geneCount[g] > 1 for g in annGenes(v))] if inplace: VF.setVariants(res) VF.appliedFilters = self return else: return VF.copyAttributes(variants=res, appliedFilters=self)
def QC_3plots(VFlist, gender=True, private=True, heterozygosity=True, writetofile=None, save=None, show=True): if private + heterozygosity + gender == 0: return None N = len(VFlist) add_legend = N < 13 Nplots = private + heterozygosity + gender + add_legend nrow = int(math.sqrt(Nplots)) ncol = math.ceil(float(Nplots)/nrow) fig = plt.figure(figsize=(3.5*ncol, 3.5*nrow)) if add_legend: markers = ['D','^','*','d','<','s','p','v','D','^','*','d'] sizes = [6,8,8,8,8,8,8,8,6,8,8,8] cols = ['red', 'lime', 'cyan', 'brown', 'magenta', 'gold', 'pink', 'black', 'purple', 'gray', 'silver', 'green'] else: markers, sizes, cols = ['o']*N, [6]*N, ['red']*N DB = FiltusDatabase.VariantDatabase.buildFromSamples(VFlist, "Extended") db_str = DB.variants if writetofile: sep = '\t' text_out = FiltusUtils.composeMeta(VFlist, analysis="QC PLOTS") plotnr = 0 if gender: plotnr += 1 ax_sex = fig.add_subplot(nrow, ncol, plotnr, aspect=1) XminusPAR = FiltusUtils.XminusPAR db_X_raw = [x[6:] for x in db_str if XminusPAR(x[:2])] if db_X_raw: db_X = zip(*db_X_raw) totals_X = [sum(map(bool, x)) for x in db_X] hets = [sum(g == 1 for g in sample)*100.0/tot if tot>0 else 0 for sample, tot in zip(db_X, totals_X)] for i in range(N): ax_sex.plot(totals_X[i], hets[i], marker=markers[i], color=cols[i], markersize=sizes[i]) else: totals_X, hets = [0]*N, [0]*N #print "Empty gender estimation plot.\n\nNo variants found on X \ PAR." setPlotParams(ax_sex, "Gender estimation", 'Variants on X (-PAR)', 'Heterozygosity (%)', ylim=(0,100)) ax_sex.axhspan(0, 15, facecolor='blue', alpha=0.2) ax_sex.axhspan(15, 35, facecolor='red', alpha=0.2) ax_sex.axhspan(35, 100, facecolor='green', alpha=0.2) props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) ax_sex.text(0.05, 0.95, "FEMALE", transform=ax_sex.transAxes, fontsize="x-small", va="top", ha='left', bbox=props) ax_sex.text(0.05, 0.27, "? ", transform=ax_sex.transAxes, fontsize="x-small", va="center", ha='left', bbox=props) ax_sex.text(0.95, 0.05, "MALE", transform=ax_sex.transAxes, fontsize="x-small", va="bottom", ha='right', bbox=props) if writetofile: headers = sep.join(['Sample', 'Variants on X (-PAR)', 'Heterozygosity (%)', 'Gender']) genders = ['?' if tot==0 or 15<h<35 else 'Male' if h<=15 else 'Female' for tot, h in zip(totals_X, hets)] points = [sep.join([s, str(x), '%.2f'%y, g]) for s,x,y,g in zip(DB.sampleNames, totals_X, hets, genders)] text_out += "***Plot: Gender estimation***\n" + headers + '\n' + '\n'.join(points) + '\n\n' if private: plotnr += 1 ax_priv = fig.add_subplot(nrow, ncol, plotnr, aspect=1) db_nonz = [map(bool, x) for x in zip(*db_str)[6:]] totals_all = map(sum, db_nonz) if max(totals_all)>2000: totals_all = [tot/1000.0 for tot in totals_all] xlab = '# variants/1000' else: xlab = '# variants' rowSums_nonz = map(sum, zip(*db_nonz)) priv_ind = [i for i in range(len(rowSums_nonz)) if rowSums_nonz[i]==1] privates = [sum(sampl[i] for i in priv_ind) for sampl in db_nonz] for i in range(N): ax_priv.plot(totals_all[i], privates[i], marker=markers[i], color=cols[i], markersize=sizes[i]) setPlotParams(ax_priv, "Private variants", xlab, 'Private') if writetofile: headers = sep.join(['Sample', xlab, 'Private']) points = [sep.join([s, str(x), str(y)]) for s,x,y in zip(DB.sampleNames, totals_all, privates)] text_out += "***Plot: Private variants***\n" + headers + '\n' + '\n'.join(points) + '\n\n' if heterozygosity: plotnr += 1 ax_het = fig.add_subplot(nrow, ncol, plotnr, aspect=1) chromInt = FiltusUtils.chromInt db_AUT = zip(*[x[6:] for x in db_str if chromInt(x[0]) < 23]) if not db_AUT: raise RuntimeError("Empty heterozygosity plot.\n\nNo autosomal variants found.") totals_AUT = [sum(map(bool, x)) for x in db_AUT] hets = [sum(g == 1 for g in sample)*100.0/tot if tot>0 else 0 for sample, tot in zip(db_AUT, totals_AUT)] if max(totals_AUT) > 2000: totals_AUT = [tot/1000.0 for tot in totals_AUT] xlab = '# autosomal variants/1000' else: xlab = '# autosomal variants' for i in range(N): ax_het.plot(totals_AUT[i], hets[i], marker=markers[i], color=cols[i], markersize=sizes[i]) setPlotParams(ax_het, "Heterozygosity", xlab, 'Heterozygosity (%)', ylim=(-5,105)) if writetofile: headers = sep.join(['Sample', 'A'+xlab[3:], 'Heterozygosity (%)']) points = [sep.join([s, str(x), '%.2f'%y]) for s,x,y in zip(DB.sampleNames, totals_AUT, hets)] text_out += "***Plot: Heterozygosity***\n" + headers + '\n' + '\n'.join(points) + '\n' if writetofile: with open(writetofile, 'w') as out: out.write(text_out) if add_legend: plotnr +=1 ax_legend = fig.add_subplot(nrow, ncol, plotnr, aspect=1) simplenames = [VF.shortName for VF in VFlist] ax_legend.set_frame_on(False) ax_legend.axis('off') for i in range(N): ax_legend.plot([], marker=markers[i], color=cols[i], markersize=sizes[i], label=simplenames[i], ls='None') ax_legend.legend(loc=2, numpoints=1, fontsize='small', frameon=False, title="Legend") showAndSave(fig, tight=True, show=show, save=save) return fig
def __init__(self, parent): self.parent = parent self.version = VERSION parent.title("FILTUS " + self.version) self.manualdir = os.path.join(SCRIPT_DIR, "man") self.datadir = os.path.join(SCRIPT_DIR, "data") self.busyManager = BusyManager(parent) self.windowingsystem = parent.tk.call('tk', 'windowingsystem') self.rightclickevents = ['<2>', '<Control-1>'] if self.windowingsystem == 'aqua' else ['<3>'] parent.rowconfigure(1, weight=1) parent.columnconfigure(0, weight=1) self.scrollframe = Pmw.ScrolledFrame(parent, borderframe=0, clipper_borderwidth=0, vertflex='expand', horizflex='expand') frame = self.scrollframe.interior() frame.rowconfigure(2, weight=1) frame.columnconfigure(1, weight=1) self.frame = frame ### fonts self.defaultfont = tkFont.nametofont("TkDefaultFont") self.smallfont, self.smallbold, self.tinyfont, self.titlefont = self.defaultfont.copy(), self.defaultfont.copy(), self.defaultfont.copy(), self.defaultfont.copy() self.smallbold['weight'] = 'bold' self.monofont = tkFont.nametofont("TkFixedFont") self.monobold = self.monofont.copy() self.monobold['weight'] = 'bold' self.textfont = tkFont.nametofont("TkTextFont") #self.menufont = tkFont.nametofont("TkMenuFont") # this didn't respond to change... Used workaround by setting menu label fonts manually to defaultfont. for opt in ['family','weight','slant','underline','overstrike']: self.textfont[opt] = self.monofont[opt] self.setFontSizes(self.defaultfont['size'], self.textfont['size'], init=True) self.files = [] self.filteredFiles = [] self.shortFilenames = False self.longFileNameList = [] self.shortFileNameList = [] self.currentFileNameList = [] self.currentDir = "" self.currentFileDir = os.getcwd() self.storage = {} # storage for variant databases (to avoid reloading when filtering) ############## The file group self.fileGroup = Tkinter.Frame(frame) self.fileGroup.columnconfigure(0, weight=1) self.fileListbox = FiltusWidgets.LabeledListBox(self.fileGroup, filtus=self, toptext="Loaded files: 0", width=50) self.fileListbox.component('bottomlabel').destroy() self.fileSummary1 = FiltusWidgets.SummaryBox(self.fileGroup, filtus=self, toptext="Unfiltered summaries", width=36) self.fileSummary2 = FiltusWidgets.SummaryBox(self.fileGroup, filtus=self, toptext="Filtered summaries", width=36) self.fileListbox.grid(sticky='new') self.fileSummary1.grid(row=0, column=1, sticky='nw', padx=(10, 0)) self.fileSummary2.grid(row=0, column=2, sticky='nw', padx=(10, 0)) ############## The filter group self.FM = FiltusWidgets.FilterMachine(frame, filtus=self, manpage="filters") ############## The big text field self.text = FiltusWidgets.FiltusText(frame, filtus=self, labelpos='nw', label_font=self.smallfont) ############ Sharing notebook self.sharingNotebook = Pmw.NoteBook(frame, arrownavigation=False, pagemargin=0) self.gs = FiltusWidgets.GeneSharingPage(self.sharingNotebook, self, 'Gene sharing', manpage="genesharing") self.fs = FiltusWidgets.GeneSharingPage(self.sharingNotebook, self, 'Gene sharing fam', manpage="familybased", family=True) self.vs = FiltusWidgets.VariantSharingPage(self.sharingNotebook, self, 'Variant sharing', manpage="filtus") self.sharingNotebook.setnaturalsize() ###### Settings self.fileListbox.fixselectall() self.sepOutput = '\t' self.truncate = 50 self.makeSettingsDialog() self.settingsDialog.invoke() self.menuBar = self.makeMainMenu() self.menuBar.bind('<Triple-1>', self._run) self.parent.bind('<Shift-Return>', self._run) ########### Place on grid self.fileGroup.grid(sticky='news', columnspan=2, pady=(0, 0)) self.FM.grid(row=1, column=0, pady=(10,0), sticky='new') self.text.grid(row=1, column=1, rowspan=2, sticky='news', padx=(20, 0), pady=0) self.sharingNotebook.grid(row=2, sticky='new', pady=(10, 0)) # on parent grid self.menuBar.grid(row=0, column=0, sticky='ew') self.scrollframe.grid(row=1, sticky='news', padx=20, pady=(10, 20)) parent.update_idletasks() self.scrollframe.component('clipper').configure(height=min(frame.winfo_height(), frame.winfo_screenheight()-100), width=min(frame.winfo_width()+200, frame.winfo_screenwidth()-100)) # the 200 is ad hoc to increase startup width a little if PLOT_error: FiltusUtils.warningMessage("Plotting functionality is disabled. Error message:\n\n%s\n\nNote: On MAC and Linux the modules 'numpy' and 'matplotlib' must be installed separately to make the plots work.\nSee also Filtus homepage: http://folk.uio.no/magnusv/filtus.html"%PLOT_error)
def createdb(self): try: self._doCreate() except Exception as e: FiltusUtils.warningMessage(e)
def _guessAndPrepare(self, filename, kwargs): self.currentfile = filename self.fileLabel.configure( text=FiltusUtils.wrapFilename(filename, joinsep='\n ')) preamble, headerline, firstline = self._getFirstLines(filename) self.__dict__.update(kwargs) sep = self.sep if sep is None or sep not in headerline: sep = next((char for char in ['\t', ',', ';', ' '] if char in headerline and char in firstline), '\t') self.sepInputOM.invoke(self._sepDicInv[sep]) headers = self.currentHeaders def _doGuess(col): '''Dont guess if specified in arguments, or if the current value is consistent.''' return self.guess and col not in kwargs and (getattr( self, col) is None or getattr(self, col + 'Menu').inconsistent) lowheaders = [h.lower() for h in headers] def _matchHeader(alts): for h in alts: if h in lowheaders: return headers[lowheaders.index(h)] return '' if 'chromCol' in kwargs: self.chromColMenu.setAndCheck(kwargs['chromCol']) elif _doGuess('chromCol'): chromCol = _matchHeader([ '#chrom', 'vcf_chrom', 'vcf_chr', 'chrom', 'chr', 'chromosome' ]) if chromCol: self.chromColMenu.setAndCheck(chromCol) if 'posCol' in kwargs: self.posColMenu.setAndCheck(kwargs['posCol']) elif _doGuess('posCol'): posCol = _matchHeader([ 'pos', 'vcf_pos', 'vcf_start', 'start', 'position', 'pos_start', 'chromosome_position' ]) if posCol: self.posColMenu.setAndCheck(posCol) if 'splitAsInfo' in kwargs: self.infoColMenu.setAndCheck(kwargs['splitAsInfo']) self._splitINFO_update() # If VEP CSQ info present: Store headers self.hasCSQ = False for line in preamble: if 'ID=CSQ,' in line: self.hasCSQ = True self._CSQheaders = line.split("Format: ")[1].strip().strip( '">').split("|") break if not self.hasCSQ: self.splitCsqVar.set(0) self.splitCsqButt.configure(state="disabled") else: self._splitINFO_update() if 'geneCol' in kwargs: self.geneColMenu.setAndCheck(kwargs['geneCol']) elif _doGuess('geneCol'): geneCol = _matchHeader(['gene', 'gene.refgene', 'gene symbol']) if geneCol == '': genecCol = next((h for h, lowh in zip(headers, lowheaders) if 'gene' in lowh and 'name' in lowh), '') if geneCol: self.geneColMenu.setAndCheck(geneCol) vcfGuess, formatColGuess = None, None if self.guess and self.firstvariants: vcfGuess, infoCol, formatColGuess = self._guessVCF( self.originalHeaders, self.firstvariants[0]) # infoCol not used self.vcfChooser.invoke(int(not vcfGuess)) if vcfGuess: self.splitFormatVar.set(1) # Default option: Split FORMAT if 'formatCol' in kwargs: self.formatColMenu.setAndCheck(kwargs['formatCol']) elif formatColGuess: self.formatColMenu.setAndCheck(formatColGuess) if 'splitFormat' in kwargs: self.splitFormatVar.set(kwargs['splitFormat']) self._splitFORMAT_update() if 'keep00' in kwargs: self.keep00Var.set(kwargs['keep00']) if 'gtCol' in kwargs: self.gtColMenu.setAndCheck(kwargs['gtCol']) elif _doGuess('gtCol'): gtCol = '' if vcfGuess else _matchHeader( ['genotype', 'gt', 'zygosity', 'homozygous', 'attribute_het']) self.gtColMenu.setAndCheck(gtCol) if 'split_general' in kwargs: s = kwargs['split_general'] split, sep = s[0] self.splitcol1Menu.setAndCheck(split) self.splitcol1_sep.setvalue(sep) if len(s) > 1: split, sep = s[1] self.splitcol2Menu.setAndCheck(split) self.splitcol2_sep.setvalue(sep) if 'prefilter' in kwargs: operatorText, value = kwargs['prefilter'] self.prefilter_operatorOM.setAndCheck(operatorText) self.prefilter_valueEntry.setvalue(value)
def addSamples(self): try: self.doAddSamples() except Exception as e: FiltusUtils.warningMessage(e)
def readFileAndAdd(cls, filename, inFormat, inNS, outFormat, VFlist, sampleNames=None): if not os.path.isfile(filename): return VariantDatabase.buildFromSamples(VFlist=VFlist, outFormat=outFormat, sampleNames=sampleNames) inFormat = formatInit(inFormat) outFormat = formatInit(outFormat) if sampleNames is None: sampleNames = [VF.shortName for VF in VFlist] if inFormat == "S": old = VariantDatabase.loadSimple(filename, inNS) new = VariantDatabase.buildFromSamples(VFlist, outFormat, sampleNames) meta = FiltusUtils.composeMeta(VFlist=VFlist, analysis="ADDED TO DATABASE", sort=False, appendTo=old.meta) return old.addSimple(new, meta=meta) N_add = len(VFlist) with open(filename, 'rU') as dbfil: meta, colNames, dbfil = _readTop(dbfil) OBS_ind = colNames.index('OBS') v_extractor = itemgetter(*range(OBS_ind)) sample_ind = OBS_ind + 4 N = inNS + N_add extended = collections.defaultdict(lambda: [0] * N) for line in dbfil: dat = line.strip().split('\t') vdef = v_extractor(dat) extended[vdef][:inNS] = map(int, dat[sample_ind:]) simpleColnames = colNames[:sample_ind] sampleNames = colNames[sample_ind:] + sampleNames k = inNS skip = [] for VF in VFlist: # not enumerate() since the sample might be skipped gt = VF.GTnum() vDef = VF.varDefGetter for v in VF.variants: extended[vDef(v)][k] = gt(v) # check if already included copy = next( (j for j in range(inNS) if all(x[j] == x[k] for x in extended.itervalues())), None) if copy is None: k += 1 continue else: question = "The new sample '%s' is exactly equal to '%s' in the database." % (sampleNames[k], sampleNames[copy]) \ + "\n\nSkip this sample?" action = FiltusUtils.yesnoMessage(question) if action: skip.append(VF) for v in extended: del extended[v][k] del sampleNames[k] extended.default_factory = lambda: [0] * len(sampleNames) new_meta = FiltusUtils.composeMeta( VFlist=[VF for VF in VFlist if not VF in skip], sort=False, analysis="ADDED TO DATABASE", appendTo='\n'.join(meta)) colNames = simpleColnames + sampleNames return cls(outFormat, nSamples=len(sampleNames), columnNames=colNames, extendedDict=extended, meta=new_meta)
def advLoad_prompt(self): if not hasattr(self, 'advLoad'): self.advLoad = FiltusWidgets.AdvancedLoad(self) FiltusUtils.activateInCenter(self.parent, self.advLoad.dialog)
def _splitINFO_update(self, column=None, reset=False): '''callback for the INFO option menu. Also called from _readAndSetHeaders (with column=None)''' if reset: self._INFOheaders = [] if column is None: if self.infoColMenu.inconsistent: return column = self.infoColMenu.getvalue() self.infoColMenu.setColor(True) h = self.currentHeaders[:] ### Always start by unsplitting everything: # If CSQ is split: unsplit this first splitCsq = self.hasCSQ and self.splitCsqVar.get() if splitCsq: self.splitCsqVar.set(0) self._splitCsq_update() h[:] = self.currentHeaders[:] # Unsplit INFO fields if self._INFOheaders: ind = h.index(self._INFOheaders[0]) h[ind:(ind + len(self._INFOheaders))] = [self.infoCol] self._INFOheaders = [] self.infoCol = '' ### If empty selection: Reset and return if column == "": self._updateColnameMenus(h) self.splitCsqButt.configure(state="disabled") return ### Otherwise: split selected column as INFO (if possible) first_infos = [ v[self.originalHeaders.index(column)] for v in self.firstvariants ] _INFOheaders = sorted( set( s.split('=')[0] + '_INFO' for info in first_infos for s in info.split(';') if '=' in s)) if not _INFOheaders: self.infoColMenu.setColor(False) self.splitCsqButt.configure(state="disabled") self._updateColnameMenus(h) FiltusUtils.warningMessage( "I don't recognise %s as an INFO column" % column) return ind = h.index(column) h[ind:(ind + 1)] = _INFOheaders self._updateColnameMenus(h) if self.hasCSQ and "CSQ_INFO" in _INFOheaders: self.splitCsqButt.configure(state="normal") if splitCsq: self.splitCsqVar.set(1) self._splitCsq_update() self._INFOheaders = _INFOheaders self.infoCol = column
def _guessAndPrepare(self, filename, kwargs): self.currentfile = filename self.fileLabel.configure(text=FiltusUtils.wrapFilename(filename, joinsep='\n ')) preamble, headerline, firstline = self._getFirstLines(filename) self.__dict__.update(kwargs) sep = self.sep if sep is None or sep not in headerline: sep = next((char for char in ['\t', ',', ';', ' '] if char in headerline and char in firstline), '\t') self.sepInputOM.invoke(self._sepDicInv[sep]) headers = self.currentHeaders def _doGuess(col): '''Dont guess if specified in arguments, or if the current value is consistent.''' return self.guess and col not in kwargs and (getattr(self, col) is None or getattr(self, col+'Menu').inconsistent) lowheaders = [h.lower() for h in headers] def _matchHeader(alts): for h in alts: if h in lowheaders: return headers[lowheaders.index(h)] return '' if 'chromCol' in kwargs: self.chromColMenu.setAndCheck(kwargs['chromCol']) elif _doGuess('chromCol'): chromCol = _matchHeader(['#chrom', 'vcf_chrom', 'vcf_chr', 'chrom', 'chr', 'chromosome']) if chromCol: self.chromColMenu.setAndCheck(chromCol) if 'posCol' in kwargs: self.posColMenu.setAndCheck(kwargs['posCol']) elif _doGuess('posCol'): posCol = _matchHeader(['pos', 'vcf_pos', 'vcf_start', 'start', 'position', 'pos_start', 'chromosome_position']) if posCol: self.posColMenu.setAndCheck(posCol) if 'splitAsInfo' in kwargs: self.infoColMenu.setAndCheck(kwargs['splitAsInfo']) self. _splitINFO_update() # If VEP CSQ info present: Store headers self.hasCSQ = False for line in preamble: if 'ID=CSQ,' in line: self.hasCSQ = True self._CSQheaders = line.split("Format: ")[1].strip().strip('">').split("|") break if not self.hasCSQ: self.splitCsqVar.set(0) self.splitCsqButt.configure(state="disabled") else: self._splitINFO_update() if 'geneCol' in kwargs: self.geneColMenu.setAndCheck(kwargs['geneCol']) elif _doGuess('geneCol'): geneCol = _matchHeader(['gene', 'gene.refgene', 'gene symbol']) if geneCol =='': genecCol = next((h for h, lowh in zip(headers, lowheaders) if 'gene' in lowh and 'name' in lowh), '') if geneCol: self.geneColMenu.setAndCheck(geneCol) vcfGuess, formatColGuess = None, None if self.guess and self.firstvariants: vcfGuess, infoCol, formatColGuess = self._guessVCF(self.originalHeaders, self.firstvariants[0]) # infoCol not used self.vcfChooser.invoke(int(not vcfGuess)) if vcfGuess: self.splitFormatVar.set(1) # Default option: Split FORMAT if 'formatCol' in kwargs: self.formatColMenu.setAndCheck(kwargs['formatCol']) elif formatColGuess: self.formatColMenu.setAndCheck(formatColGuess) if 'splitFormat' in kwargs: self.splitFormatVar.set(kwargs['splitFormat']) self._splitFORMAT_update() if 'keep00' in kwargs: self.keep00Var.set(kwargs['keep00']) if 'gtCol' in kwargs: self.gtColMenu.setAndCheck(kwargs['gtCol']) elif _doGuess('gtCol'): gtCol = '' if vcfGuess else _matchHeader(['genotype', 'gt', 'zygosity', 'homozygous', 'attribute_het']) self.gtColMenu.setAndCheck(gtCol) if 'split_general' in kwargs: s = kwargs['split_general'] split, sep = s[0] self.splitcol1Menu.setAndCheck(split) self.splitcol1_sep.setvalue(sep) if len(s) > 1: split, sep = s[1] self.splitcol2Menu.setAndCheck(split) self.splitcol2_sep.setvalue(sep) if 'prefilter' in kwargs: operatorText, value = kwargs['prefilter'] self.prefilter_operatorOM.setAndCheck(operatorText) self.prefilter_valueEntry.setvalue(value)
def read(self, filename, **kwargs): self.skipFile = False self.stopLoading = False new_ext = self.currentfile is None or (os.path.splitext(filename)[1] != os.path.splitext( self.currentfile)[1]) self.prompt = kwargs.pop('prompt', self.prompt or new_ext) self.guess = kwargs.pop('guess', self.guess or (self.prompt and new_ext)) promptShowsOk = None # modified when trying to show prompt try: self._guessAndPrepare(filename, kwargs) if self.prompt or any(OM.inconsistent for OM in self._activeMenus()): promptShowsOk = False FiltusUtils.activateInCenter(self.parent, self.dialog) promptShowsOk = True else: self._setParameters() if self.stopLoading or self.skipFile: return self.filtus.busy() common_params = dict(filename=filename, sep=self.sep, chromCol=self.chromCol, posCol=self.posCol, geneCol=self.geneCol, splitAsInfo=self.infoCol, split_general=self.split_general, prefilter=self.prefilter) # Note: splitAsInfo works also for nonVCF, but not splitCSQ (which requires correct preamble data) if self.vcf: VF = self.reader.readVCFlike(formatCol=self.formatCol, splitFormat=self.splitFormat, splitCsq=self.splitCsq, keep00=self.keep00, **common_params) else: VF = self.reader.readNonVCF(skiplines=self.skiplines, gtCol=self.gtCol, homSymbol=self.homSymbol, **common_params) self.filtus.notbusy() except (ValueError, RuntimeError) as e: self.filtus.notbusy() FiltusUtils.warningMessage(e) return self.read(filename, guess=False, prompt=True) except Exception as e: self.filtus.notbusy() if promptShowsOk: FiltusUtils.warningMessage( "An error occured while reading this file:\n%s\n\n%s: %s\n\nPlease try again or skip file." % (filename, type(e).__name__, e)) return self.read(filename, guess=False, prompt=True) else: FiltusUtils.warningMessage("%s: %s\n\nSkipping this file: %s" % (type(e).__name__, e, filename)) self.skipFile = True return if self.checkHomozygosity and VF.noHomozygotes(): tryagain = FiltusUtils.yesnoMessage( 'The file %s has no homozygous variants. Go back to settings dialog?' % filename) if tryagain: VF = self.read(filename, guess=False, prompt=True) return VF
def doSearch(self): self.results = None self.resultWindow.clear() chrom = self.chrom.getvalue().strip() pos = self.pos.getvalue().strip() inFilename, inNS, inNV, inFormat = self.browser.getInfo() if not all(x for x in (inFilename, chrom, pos)): if not inFilename: FiltusUtils.warningMessage("No database loaded") elif not chrom: FiltusUtils.warningMessage("Please indicate chromosome") elif not pos: FiltusUtils.warningMessage("Please indicate position") return query = self.query = [chrom, pos] self.filename = inFilename self.nSamples = inNS self.nVariants = inNV self.formatLong = inFormat self.colNames = self.browser.getColnames() try: if self.browser.modified(): self.firstIndex, self.lastIndex = self._chromStartIndex( inFilename) self.browser.setUnmodified() with open(self.filename, 'rU') as database: slice_database = itertools.islice(database, self.firstIndex[chrom], self.lastIndex[chrom]) data = next( (v for v in slice_database if v.split('\t')[:2] == query), None) if data is None: self.results = "Not found in the database" self.resultWindow.settext(self.results) return data = data.strip().split('\t') fields = [ 'Total observations', 'Heterozygous', 'Homozygous', 'Allele frequency in database' ] results = ['%s: %s' % x for x in zip(fields, data[2:6])] results[0] += ' (out of %d)' % inNS if self.formatLong == "Extended": allSamples = self.colNames[-inNS:] allObs = map(int, data[-inNS:]) observations = [(s, obs) for s, obs in zip(allSamples, allObs) if obs != 0] samples, gtCode = zip(*observations) gt = [('heterozygous', 'homozygous')[x - 1] for x in gtCode] width = max(map(len, samples)) results.extend( ['', 'Samples:'] + [s.ljust(width) + ' - ' + g for s, g in zip(samples, gt)]) self.results = '\n'.join(results) self.resultWindow.settext(self.results) except Exception as e: FiltusUtils.warningMessage(e) return