Пример #1
0
    def readFromFile(self, filename):
        with open(filename, 'rU') as f:
            lines = [line.strip() for line in f if line.strip()]
        L = len(lines)
        if L < 6:
            raise IOError("Format error (too few lines).")
        for s in lines[:4]:
            if not (s.startswith('0') or s.startswith('1')):
                raise ValueError("Expected string with initial character 0 or 1, but recieved\n'%s'" % s)
        Ncolfilt = int(lines[4])
        if 5 + Ncolfilt != L:
            raise ValueError("Wrong number of column filters:\n" \
            "Number indicated is %d, but the file contains %d" % (Ncolfilt, L - 5))

        restrict_genes_txt = lines[0][2:] if lines[0].startswith('1') and lines[0][2:].strip() else None
        exclude_genes_txt  = lines[1][2:] if lines[1].startswith('1') and lines[1][2:].strip() else None
        exclude_var_txt    = lines[2][2:] if lines[2].startswith('1') and lines[2][2:].strip() else None
        regions_txt        = lines[3][2:] if lines[3].startswith('1') and lines[3][2:].strip() else None
        columnfilters = []
        for k in range(5, 5 + Ncolfilt):
            col, rel, val, keep = lines[k].split(' ::: ')
            if ('less' in rel or 'greater' in rel):
                try:
                    val = float(val)
                except ValueError:
                    FiltusUtils.warningMessage("Column filter ignored:\n\n'%s  %s  %s'\n\nNumerical value needed."%(col, rel, val))
                    continue
            columnfilters.append((col, rel, val, int(keep)))
        return restrict_genes_txt, exclude_genes_txt, exclude_var_txt, regions_txt, columnfilters
Пример #2
0
 def _f():
     if pgnr in [0,1] and not self.checkLoadedSamples(select="all"):
         return
     if not hasattr(self, 'databaseTool'):
         self.databaseTool = FiltusDatabase.DatabaseWidget(self)
     self.databaseTool.notebook.selectpage(pgnr)
     FiltusUtils.activateInCenter(self.parent, self.databaseTool)
Пример #3
0
    def extractdb(self):
        try:
            st = time.time()
            inFilename, inNS, inNV, inFormat = self.browser.getInfo()
            subset = self.lists.selection
            sampleNames = self.lists.getright()
            outFilename = self.save_browser.getvalue()
            outFormat = self.formatSelect.getvalue()
            cfilter = self.columnFilter.getfilter()
            if cfilter:
                filter = Filter.Filter(columnfilters=[cfilter])
            else:
                filter = None
            if not inFilename:
                raise RuntimeError("Please specify existing database")
            if inFormat == "Extended" and not subset:
                raise RuntimeError("No samples selected")
            if not outFilename:
                raise RuntimeError(
                    "Please specify output file name and format")
            db = VariantDatabase.readFileAndExtract(inFilename,
                                                    inFormat,
                                                    inNS,
                                                    subset,
                                                    sampleNames,
                                                    outFormat,
                                                    filter=filter)

            db.save(outFilename)
            message = "Variant database written to %s.\n\n" % outFilename \
                    + "\n".join(db.infoSummary()) \
                    + '\n\nTime used: %.2f seconds' %(time.time()-st,)
            FiltusUtils.infoMessage(message)
        except Exception as e:
            FiltusUtils.warningMessage(e)
Пример #4
0
 def _splitFORMAT_update(self, reset=False): 
     '''callback for the splitFormat checkbox'''
     if reset:
         self._FORMATheaders = []
         self._sampleNames = []
     split = self.splitFormatVar.get()
     column = self.formatColMenu.getvalue()
     if split and (not column or self.formatColMenu.inconsistent):
         self.splitFormatVar.set(0)
         return
     
     h = self.currentHeaders[:]
     
     def unsplit():
         if self._FORMATheaders:
             h[h.index('GT'):] = [self.formatCol] + self._sampleNames
         self._FORMATheaders = []
         self._sampleNames = []
     
     if split: 
         first = self.firstvariants[0][self.originalHeaders.index(column)]
         if not first.startswith('GT'):
             self.formatColMenu.setColor(False)
             FiltusUtils.warningMessage("FORMAT column entries must begin with 'GT'")
             return
         unsplit() # undo possible previous split
         self.formatCol = column
         self._FORMATheaders = first.split(':')
         ind = h.index(column)
         self._sampleNames = h[ind+1:]
         h[ind:] = self._FORMATheaders
     else:
         unsplit()
         
     self._updateColnameMenus(h)
Пример #5
0
    def _executeDialogButton(self, button):
        try:
            if button is None or button == 'Cancel':
                self.stopLoading = True
                self.dialog.deactivate()
                return
            elif button == "Skip this file":
                self.skipFile = True
                self.dialog.deactivate()
                return
            self.prompt = button != "Use for all files"  #button is either this or "Use for this file"
            self.guess = False

            try:
                self._setParameters()
            except Exception as e:
                FiltusUtils.warningMessage(e)
                return

            self.dialog.deactivate()
        except Exception as e:
            FiltusUtils.warningMessage(
                "Something went wrong. Trying to close the input dialog.")
            self.dialog.destroy()
            del self.filtus.fileReader
            return
Пример #6
0
def okForDB(VFlist, ndef=None):
    message = ""
    if ndef is not None and any(
            len(VF.varDefColNames) != ndef for VF in VFlist):
        message = 'The existing database has %d variant-defining columns, but at least one of the selected samples does not match this. \
To extend this database, make sure to indicate matching columns in the "Columns uniquely defining a variant" entry when loading new files.' % ndef
    elif len(set(len(VF.varDefColNames) for VF in VFlist)) > 1:
        message = 'The selected files do not have the same number of variant-defining columns. To create the database, please load the files again, making sure the "Columns uniquely defining a variant" entries match.'
    elif any(VF.varDefGetter is None for VF in VFlist):
        message = 'There is a problem with the variant-defining columns of (at least one of) the selected samples.'
    if message:
        FiltusUtils.warningMessage(message)
        return False
    else:
        return True

    def readDB(self, filename):
        with open(filename, 'rU') as dbfil:
            db = [line.strip().split('\t') for line in dbfil]

        m = next(i for i in xrange(len(old)) if len(old[i]) > 1)
        meta = '\n'.join(old[:m]) + '\n'
        old[:] = old[m:]

        len_old = len(old) - 1
        old_heads = old[0]
        type = 'simple' if old_heads[-3:] == [
            'Total', 'Heterozygous', 'Homozygous'
        ] else 'extended'
Пример #7
0
 def _executeDialogButton(self, button):
     try:
         if button is None or button == 'Cancel':
             self.stopLoading = True
             self.dialog.deactivate()
             return
         elif button == "Skip this file":
             self.skipFile = True
             self.dialog.deactivate()
             return
         self.prompt = button != "Use for all files"  #button is either this or "Use for this file"
         self.guess = False
         
         try:
             self._setParameters()
         except Exception as e:
             FiltusUtils.warningMessage(e)
             return
     
         self.dialog.deactivate()
     except Exception as e:
         FiltusUtils.warningMessage("Something went wrong. Trying to close the input dialog.")
         self.dialog.destroy()
         del self.filtus.fileReader
         return
Пример #8
0
 def doAddSamples(self):
     st = time.time()
     filtus = self.filtus
     inFilename, inNS, inNV, inFormat = self.browser.getInfo()
     outFilename = self.save_browser.getvalue()
     outFormat = self.formatSelect.getvalue()
     selection = self.lists.selection
     VFlist = [filtus.filteredFiles[i] for i in selection]
     sampleNames = self.lists.getright()
     if not inFilename:
         raise RuntimeError("Please specify existing database")
     if not selection:
         raise RuntimeError("No samples selected")
     if not outFilename:
         raise RuntimeError("Please specify output file name and format")
     newmeta = ''
     db = VariantDatabase.readFileAndAdd(inFilename,
                                         inFormat=inFormat,
                                         inNS=inNS,
                                         outFormat=outFormat,
                                         VFlist=VFlist,
                                         sampleNames=sampleNames)
     if db.nSamples == inNS: raise IndexError("No samples to add")
     db.save(outFilename)
     message = "Variant database written to %s.\n\n" % outFilename \
             + "\n".join(db.infoSummary()) \
             + '\n\nTime used: %.2f seconds' %(time.time()-st,)
     FiltusUtils.infoMessage(message)
Пример #9
0
    def save(self):
        if self.results is None: return
        filtus = self.filtus
        db_summary = '## Database file: %s\n## Format: %s\n## Number of samples: %d\n## Number of variants: %d\n##\n' % (
            self.filename, self.formatLong, self.nSamples, self.nVariants)
        query = '## Query: Chromosome %s, position: %s' % tuple(self.query)
        meta = FiltusUtils.composeMeta(
            VFlist=None,
            analysis="VARIANT DATABASE - SEARCH\n##\n" + db_summary + query)

        fname = tkFileDialog.asksaveasfilename(initialdir=filtus.currentDir,
                                               title="Save search results as")
        if not fname:
            return
        filtus.currentDir = os.path.dirname(fname)

        includePre = self.filtus.includePreamble
        try:
            with open(fname, 'w') as utfil:
                if 'Top' in includePre: utfil.write(meta)
                utfil.write(self.results)
                if 'Bottom' in includePre: utfil.write('\n' + meta)
        except Exception as e:
            FiltusUtils.warningMessage('%s\n\nFile not saved.' % e)
            return
Пример #10
0
 def _m():
     VFlist = self.checkLoadedSamples(select="selection", minimum=2)
     if not VFlist: return
     try:
         mergedVF = FiltusAnalysis.merge(VFlist, collapse=collapse)
         self.text.prettyPrint(mergedVF, label='')
     except Exception as e:
         FiltusUtils.warningMessage(e)
Пример #11
0
 def relatedness_trio_prompt(self):
     if not self.checkLoadedSamples(select="all"): return
     if not hasattr(self, 'relatedness_trio_gui'):
         self.relatedness_trio_gui = FiltusWidgets.RelatednessTrio_GUI(self)
     try:
         FiltusUtils.activateInCenter(self.parent, self.relatedness_trio_gui)
     except Exception as e:
         FiltusUtils.warningMessage("%s: %s" %(type(e).__name__, e))
Пример #12
0
 def pedwriter_prompt(self):
     if not self.checkLoadedSamples(select="all"):
         return
     if not hasattr(self, 'pedwriter'):
         self.pedwriter = FiltusWidgets.PedWriter(self)
     try:
         FiltusUtils.activateInCenter(self.parent, self.pedwriter)
     except Exception as e:
         FiltusUtils.warningMessage("%s: %s" %(type(e).__name__, e))
Пример #13
0
 def denovo_prompt(self):
     if not self.checkLoadedSamples(select="all", VF=False, minimum=3):
         return
     if not hasattr(self, 'denovogui'):
         self.denovogui = FiltusWidgets.DeNovo_GUI(self)
     try:
         FiltusUtils.activateInCenter(self.parent, self.denovogui)
     except Exception as e:
         FiltusUtils.warningMessage("%s: %s" %(type(e).__name__, e))
Пример #14
0
 def autozyg_prompt(self):
     if not self.checkLoadedSamples(select="selection", VF=False, minimum=1, maximum=1):
         return
     if not hasattr(self, 'autexgui'):
         self.autexgui = FiltusWidgets.AutEx_GUI(self)
     try:
         FiltusUtils.activateInCenter(self.parent, self.autexgui)
     except Exception as e:
         FiltusUtils.warningMessage("%s: %s" %(type(e).__name__, e))
Пример #15
0
 def _histogramButtonExecute(self):
     try:   
         col = self.histo_var.getvalue()
         if col=='': raise RuntimeError("Column variable not selected")
         VFlist = self._validateInput(checkPresence=[col])
         bins = int(self.histo_bins.getvalue())
         histogramPlot(VFlist, col, bins)
     except Exception as e:
         FiltusUtils.warningMessage(e)
Пример #16
0
 def _comparativeButtonExecute(self):
     try:
         VFlist = self._validateInput()
         plotselect = self.comparative_checks.getvalue()
         p, h, g = (str in plotselect for str in ['private','heterozygosity', 'gender'])
         writetofile = self.save_browser.getvalue() if self.save_browser.on() else None
         QC_3plots(VFlist, private=p, heterozygosity=h, gender=g, writetofile=writetofile, save=None)
     except Exception as e:
         FiltusUtils.warningMessage(e)
Пример #17
0
 def geneLookup_prompt(self):
     if not self.checkLoadedSamples(select="all"):
         return
     if all(VF.geneGetter is None for VF in self.filteredFiles):
         FiltusUtils.warningMessage("None of the loaded samples have known gene column.")
         return
     if not hasattr(self, 'geneLookup'):
         self.geneLookup = FiltusWidgets.GeneLookup(self.parent, self)
     FiltusUtils.activateInCenter(self.parent, self.geneLookup.prompt)
Пример #18
0
    def _splitINFO_update(self, column=None, reset=False): 
        '''callback for the INFO option menu. Also called from _readAndSetHeaders (with column=None)'''
        if reset:
            self._INFOheaders = []
        if column is None:
            if self.infoColMenu.inconsistent:
               return
            column = self.infoColMenu.getvalue()
        
        self.infoColMenu.setColor(True)
        h = self.currentHeaders[:]
        
        ### Always start by unsplitting everything:
        # If CSQ is split: unsplit this first
        splitCsq = self.hasCSQ and self.splitCsqVar.get()
        if splitCsq:
            self.splitCsqVar.set(0)
            self._splitCsq_update()
            h[:] = self.currentHeaders[:]

        # Unsplit INFO fields
        if self._INFOheaders:
            ind = h.index(self._INFOheaders[0])
            h[ind:(ind + len(self._INFOheaders))] = [self.infoCol]
        self._INFOheaders = []
        self.infoCol = ''
           
        ### If empty selection: Reset and return
        if column == "": 
            self._updateColnameMenus(h)  
            self.splitCsqButt.configure(state="disabled")
            return
        
        ### Otherwise: split selected column as INFO (if possible)
        first_infos = [v[self.originalHeaders.index(column)] for v in self.firstvariants]
        _INFOheaders = sorted(set(s.split('=')[0] + '_INFO' for info in first_infos for s in info.split(';') if '=' in s))
        
        if not _INFOheaders:
            self.infoColMenu.setColor(False)
            self.splitCsqButt.configure(state="disabled")
            self._updateColnameMenus(h)
            FiltusUtils.warningMessage("I don't recognise %s as an INFO column"%column)
            return
        
        ind = h.index(column)
        h[ind:(ind + 1)] = _INFOheaders
        self._updateColnameMenus(h)   
        
        if self.hasCSQ and "CSQ_INFO" in _INFOheaders:
            self.splitCsqButt.configure(state="normal")
            if splitCsq:
                self.splitCsqVar.set(1)
                self._splitCsq_update()
        
        self._INFOheaders = _INFOheaders
        self.infoCol = column
Пример #19
0
 def QC_prompt(self):
     if not self.checkLoadedSamples(select="all", minimum=1):
         return
     if not hasattr(self, 'QC'):
         self.QC = FiltusQC.QC(self)
     try:
         FiltusUtils.activateInCenter(self.parent, self.QC.dialog)
     except Exception as e:
         print "Killing myself because of: %s"%e
         self.parent.destroy()
Пример #20
0
 def _scatterButtonExecute(self):
     try:   
         xcol, ycol = self.scatter_x.getvalue(), self.scatter_y.getvalue()
         if xcol=='': raise RuntimeError("X axis column not selected")
         if ycol=='': raise RuntimeError("Y axis column not selected")
         VFlist = self._validateInput(checkPresence=[xcol, ycol])
         alpha = float(self.scatter_alpha.getvalue())
         thin = int(self.scatter_thin.getvalue())
         scatterPlot(VFlist, xcol, ycol, alpha, thin)
     except Exception as e:
         FiltusUtils.warningMessage(e)
Пример #21
0
 def loadMeta_and_update(self):
     self.filename = self.browser.getvalue()
     updates = self.updates
     try:
         meta, self.nSamples, self.nVariants, self.format, self.colNames = VariantDatabase.readMeta(
             self.filename)
         self.updateSummary(self.nSamples, self.nVariants, self.format)
         if updates: updates(inFormat=self.format, colNames=self.colNames)
     except Exception as e:
         FiltusUtils.warningMessage("Could not load database.\n\n %s" % e)
         self.summaryLabel.configure(text="Summary: Error")
         self.filename = None
         if updates: updates(reset=True)
Пример #22
0
    def geneMaster(cls, geneMaster, nSamples, minSampleCount=1, genelengths={}, model="Dominant", meta=''):
        intlist2string = FiltusUtils.intlist2string
        shareCounts, data = [0]*nSamples, []
        if genelengths is None: genelengths = {}
        M = float(len(genelengths))
        totL = float(sum(genelengths.itervalues()))
        
        # Average number of variants (after filt) per sample:
        m_aver = sum(g.length for g in geneMaster.itervalues())/float(nSamples) 
        
        for gene in geneMaster.keys():
            geneData = geneMaster[gene]
            samplecount = geneData.nFiles()
            if samplecount < minSampleCount: 
                del geneMaster[gene]
                continue
            shareCounts[samplecount-1] += 1
            samples = intlist2string(geneData.getFiles())
            nvars = geneData.length
            nuniqvars = geneData.nUniqVars()
            _info = [gene, samplecount, samples, nvars, nuniqvars]
            if genelengths:
                length = genelengths.get(gene, '-')
                try:
                    pval = FiltusUtils.pValue(m=m_aver, Lrel=length/totL, n=nSamples, k=samplecount, model=model)
                    pval_bonf = min(pval * M, 1)
                    _info.extend([length, '{:.3g}'.format(pval), '{:.3g}'.format(pval_bonf)])
                except:
                    _info.extend([length, '-', '-'])
            data.append(_info)

        return cls(data, nSamples, geneMaster, shareCounts, minSampleCount=minSampleCount, meta=meta)
Пример #23
0
 def addPvalues(self, samplecount, m_aver, length, totL, M, model):
     try:
         pval = FiltusUtils.pValue(m=m_aver, Lrel=length/totL, n=nSamples, k=samplecount, model=model)
         pval_bonf = min(pval * M, 1)
         return ['{:.3g}'.format(pval), '{:.3g}'.format(pval_bonf)]
     except:
         return ['-', '-']
Пример #24
0
    def setAllColnames(self):
        uniqueCols = FiltusUtils.listUnique([head for VF in self.files for head in VF.columnNames])
        self.FM.setColnames(uniqueCols)

        menubar = self.menuBar
        viewmenu = menubar.component('View-menu')
        if not uniqueCols:
            viewmenu.entryconfigure(0, state='disabled')
            return

        viewmenu.entryconfigure(0, state='normal')
        summarymenu = menubar.component('columnsum-menu')
        L = summarymenu.index('end')
        if L is not None:
            menubar.deletemenuitems('columnsum', 0, L)

        summarizer = FiltusAnalysis.ColumnSummary()
        for col in uniqueCols:
            menubar.addmenuitem('columnsum', 'command', None, label=col, font=self.defaultfont,
                command=self._showSummary(summarizer, col))
        
        nUnique = len(uniqueCols)
        nCols = -(-nUnique/28) # hvorfor? heltallsdivisjon?
        N = -(-nUnique/nCols)
        for i in range(1, nCols):
            summarymenu.entryconfigure(i * N, columnbreak=1)
Пример #25
0
    def select(self):
        box1, box2 = self._leftlist, self._rightlist
        filelist = box1.get()
        sel = box1.getcurselection()
        sel_ind = [filelist.index(s) for s in sel]
        taken = [i for i in sel_ind if i in self.selection]

        if len(taken) > 0:
            FiltusUtils.warningMessage("Samples already selected:\n\n%s" %
                                       '\n'.join(filelist[i] for i in taken))
            sel = [s for s, i in zip(sel, sel_ind) if not i in taken]
            sel_ind = [i for i in sel_ind if not i in taken]

        box2.insert('end', *sel)
        self.selection.extend(sel_ind)
        box2.settoptext("Selected: %d" % box2.size())
Пример #26
0
 def _prepare(self):
     files = self.filtus.files
     self.names.setlist(['%2d: %s' %(i + 1, os.path.basename(VF.shortName)) for i, VF in enumerate(files)])
     self._selectall_and_update()
     
     cols = FiltusUtils.listUnique([head for VF in files for head in VF.columnNames])
     for colmenu in [self.scatter_x, self.scatter_y, self.histo_var]:
         colmenu.setItems(['']+cols)
Пример #27
0
    def read(self, filename, **kwargs):
        self.skipFile = False
        self.stopLoading = False
        new_ext = self.currentfile is None or (os.path.splitext(filename)[1] != os.path.splitext(self.currentfile)[1])
        self.prompt = kwargs.pop('prompt', self.prompt or new_ext)
        self.guess = kwargs.pop('guess', self.guess or (self.prompt and new_ext))
        promptShowsOk = None # modified when trying to show prompt
        
        try:
            self._guessAndPrepare(filename, kwargs)
            if self.prompt or any(OM.inconsistent for OM in self._activeMenus()):
                promptShowsOk = False
                FiltusUtils.activateInCenter(self.parent, self.dialog)
                promptShowsOk = True
            else:
                self._setParameters()
            if self.stopLoading or self.skipFile:
                return
            
            self.filtus.busy()
            common_params = dict(filename=filename, sep=self.sep, chromCol=self.chromCol, posCol=self.posCol, geneCol=self.geneCol, 
                                splitAsInfo=self.infoCol, split_general=self.split_general, prefilter=self.prefilter)
            
            # Note: splitAsInfo works also for nonVCF, but not splitCSQ (which requires correct preamble data)
            if self.vcf:
                VF = self.reader.readVCFlike(formatCol=self.formatCol, splitFormat=self.splitFormat, splitCsq=self.splitCsq, keep00=self.keep00, **common_params)
            else:
                VF = self.reader.readNonVCF(skiplines=self.skiplines, gtCol=self.gtCol, homSymbol=self.homSymbol, **common_params)
            self.filtus.notbusy()
            
        except (ValueError, RuntimeError) as e:
            self.filtus.notbusy()
            FiltusUtils.warningMessage(e)
            return self.read(filename, guess=False, prompt=True)
        except Exception as e:
            self.filtus.notbusy()
            if promptShowsOk:
                FiltusUtils.warningMessage("An error occured while reading this file:\n%s\n\n%s: %s\n\nPlease try again or skip file." %(filename, type(e).__name__, e))
                return self.read(filename, guess=False, prompt=True)
            else:
                FiltusUtils.warningMessage("%s: %s\n\nSkipping this file: %s" %(type(e).__name__, e, filename))
                self.skipFile = True
                return
                
        if self.checkHomozygosity and VF.noHomozygotes():
            tryagain = FiltusUtils.yesnoMessage('The file %s has no homozygous variants. Go back to settings dialog?'%filename)
            if tryagain:
                VF = self.read(filename, guess = False, prompt=True)

        return VF
Пример #28
0
 def _doCreate(self):
     st = time.time()
     filtus = self.filtus
     selection = self.lists.selection
     VFlist = [filtus.filteredFiles[i] for i in selection]
     sampleNames = self.lists.getright()
     outFormat = self.formatSelect.getvalue()
     outFilename = self.save_browser.getvalue()
     if not selection:
         raise RuntimeError("No samples selected")
     if not outFilename:
         raise RuntimeError("Please specify output file name and format")
     db = VariantDatabase.buildFromSamples(VFlist, outFormat, sampleNames)
     db.save(outFilename)
     message = "Variant database written to %s.\n\n" % outFilename \
             + "\n".join(db.infoSummary()) \
             + '\n\nTime used: %.2f seconds' %(time.time()-st,)
     FiltusUtils.infoMessage(message)
Пример #29
0
 def _getFormatHeads(self, data, formatIndex):
     formats = set(x[formatIndex] for x in data)
     allEqual = len(formats) == 1
     if allEqual:
         formatHeads = list(formats)[0].split(':')
     else:
         formatHeads = FiltusUtils.listUnique(
             [field for F in formats for field in F.split(':')])
     return formatHeads, allEqual
Пример #30
0
 def __init__(self, filename, columnNames, columnDescriptions, variants, chromCol, posCol, geneCol, 
              formatHeads, splitFormat, splitInfo,  prefilter=None, appliedFilters=None, keep00=None, nGenes=None, meta=''):    
     gtCol = 'GT' if splitFormat else columnNames[-1]
     homSymbol = '<vcf format>'
     VariantData.__init__(self, filename, columnNames, variants, chromCol, posCol, geneCol, gtCol, homSymbol, 
                          columnDescriptions=columnDescriptions, prefilter=prefilter, appliedFilters=appliedFilters, nGenes=nGenes, meta=meta)
     self.splitFormat = splitFormat 
     self.splitInfo = splitInfo
     self.isVCFtype = True
     self.keep00 = keep00
     self.formatHeads = formatHeads
     columns_lower = [h.lower() for h in columnNames]
     self.refCol = next((h for h in ['REF', 'VCF_REF', 'vcf_ref', 'Ref'] if h in columnNames), None)
     self.altCol = next((h for h in ['ALT', 'VCF_ALT', 'vcf_alt', 'Alt', 'Obs'] if h in columnNames), None)
     if self.refCol and self.altCol:    
         self.chromPosRefAlt = self.columnGetter(chromCol, posCol, self.refCol, self.altCol) 
     else:
         FiltusUtils.warningMessage("Unknown REF/ALT columns in %s" %filename)
         self.chromPosRefAlt = self.varDefGetter
     self._mainAttributes = [a for a in self._mainAttributes if not a in ['gtCol', 'homSymbol']] + ['formatHeads', 'splitFormat', 'splitInfo',  'keep00']
Пример #31
0
 def checkLoadedSamples(self, select, minimum=None, maximum=None, VF=True, filtered=True):
     '''Typically called imediately after a button press starting analysis. 
     Checks that there are sufficient loaded samples for analysis, and returns either indices or VF objects.
     If any problems occurs, a warning is displayed and False is returned.
     Select = either "selection" or "all"
     '''
     def plural_s(k):
         return '' if k==1 else 's'
     
     useMin, useMax = minimum is not None, maximum is not None
     try:
         if len(self.files)==0:
             raise IndexError("No samples are loaded")
         files = self.filteredFiles if filtered else self.files
         if len(files) == 0:
             files = self.filteredFiles = self.filteredFiles_initialcopy()
         
         if select =="all":
             if useMin and len(files) < minimum:
                 raise IndexError("This option requires at least %d loaded sample%s." % (minimum, plural_s(minimum)))
             return files if VF else range(len(files))
         
         if select == "selection":
             seleci = [int(i) for i in self.fileListbox.curselection()]
             nsel = len(seleci)
             if useMin and useMax and not minimum <= nsel <= maximum:
                 if minimum==maximum:
                     raise IndexError("Please select exactly %d sample%s in the 'Loaded samples' window" %(minimum, plural_s(minimum)))
                 else:
                     raise IndexError("Please select between %d and %d samples in the 'Loaded samples' window" %(minimum, maximum))
             elif useMin and nsel < minimum:
                 raise IndexError("Please select at least %d sample%s in the 'Loaded samples' window" %(minimum, plural_s(minimum)))
             elif useMax and nsel > maximum:
                 raise IndexError("Please select at most %d sample%s in the 'Loaded samples' window" %(maximum, plural_s(maximum)))
         
         return [files[i] for i in seleci] if VF else seleci    
     
     except Exception as e:
         FiltusUtils.warningMessage(e)
         return False
Пример #32
0
    def _splitFORMAT_update(self, reset=False):
        '''callback for the splitFormat checkbox'''
        if reset:
            self._FORMATheaders = []
            self._sampleNames = []
        split = self.splitFormatVar.get()
        column = self.formatColMenu.getvalue()
        if split and (not column or self.formatColMenu.inconsistent):
            self.splitFormatVar.set(0)
            return

        h = self.currentHeaders[:]

        def unsplit():
            if self._FORMATheaders:
                h[h.index('GT'):] = [self.formatCol] + self._sampleNames
            self._FORMATheaders = []
            self._sampleNames = []

        if split:
            first = self.firstvariants[0][self.originalHeaders.index(column)]
            if not first.startswith('GT'):
                self.formatColMenu.setColor(False)
                FiltusUtils.warningMessage(
                    "FORMAT column entries must begin with 'GT'")
                return
            unsplit()  # undo possible previous split
            self.formatCol = column
            self._FORMATheaders = first.split(':')
            ind = h.index(column)
            self._sampleNames = h[ind + 1:]
            h[ind:] = self._FORMATheaders
        else:
            unsplit()

        self._updateColnameMenus(h)
Пример #33
0
    def _run(self, event):
        import autorun
        reload(autorun)

        def execute(button):
            if button == 'Cancel': prompt.deactivate()
            else: prompt.deactivate(prompt.get().strip())

        if event.keysym == 'Return':
            f = 'x'
        else:
            prompt = Pmw.PromptDialog(self.parent, buttons=('OK', 'Cancel'), title='Run command',
                label_text='Function to run:', entryfield_labelpos='n', command=execute, defaultbutton=0)
            f = FiltusUtils.activateInCenter(self.parent, prompt)
        if f: getattr(autorun, f, FiltusUtils.ignore_break)(self)
Пример #34
0
    def buildFromSamples(cls, VFlist, outFormat, sampleNames=None):
        r"""Build database.

        If format is "Simple", the database has 6 columns by default: 
        CHROM 
        POS 
        OBS (# samples with a variant at this position)
        HET (# samples with het variant),
        HOM (# samples with hom variant)
        AFREQ (allele frequency)
        
        In extended format, the first 6 columns are as above, followed by one column per sample. 
        Entries in these columns are 0, 1 or 2 (= not present, het, hom)
        """

        N = len(VFlist)
        # First creating the extended part, i.e. a matrix (list of lists) with 0,1,2.
        # (This turned out to be quicker than getUniqueVariants(), also for simple format.)
        extended = collections.defaultdict(lambda: [0] * N)
        for i, VF in enumerate(VFlist):
            gt = VF.GTnum()
            vDef = VF.varDefGetter
            for v in VF.variants:
                extended[vDef(v)][i] = gt(v)

        colNames = ['CHROM', 'POS', 'OBS', 'HET', 'HOM', 'AFREQ']

        outFormat = formatInit(outFormat)
        if outFormat == "E":
            if sampleNames is None:
                sampleNames = [VF.shortName for VF in VFlist]
            colNames += sampleNames
        meta = FiltusUtils.composeMeta(VFlist=VFlist,
                                       analysis="NEW VARIANT DATABASE",
                                       sort=False)

        return cls(outFormat,
                   nSamples=N,
                   columnNames=colNames,
                   extendedDict=extended,
                   meta=meta)
Пример #35
0
 def settingsPrompt(self):
     FiltusUtils.activateInCenter(self.parent, self.settingsDialog)
Пример #36
0
 def plink_prompt(self):
     if not self.checkLoadedSamples(select="selection", VF=False, minimum=1, maximum=1):
         return
     if not hasattr(self, 'plinkgui'):
         self.plinkgui = FiltusWidgets.PLINK_GUI(self)
     FiltusUtils.activateInCenter(self.parent, self.plinkgui)
Пример #37
0
    def apply(self, VF, checks = True, inplace=False):
        if checks:
            try:
                self.checks(VF)
            except ValueError as message:
                FiltusUtils.warningMessage(message)
                if inplace: 
                    VF.setVariants([])
                    VF.appliedFilters = self
                    return
                else:
                    return VF.copyAttributes(variants=[], appliedFilters=self)

        headers = VF.columnNames
        columnfilters = self.columnfilters
        exclude_variants = self.exclude_variants
        restrict_to_variants = self.restrict_to_variants
        exclude_genes = self.exclude_genes
        restrict_to_genes = self.restrict_to_genes
        regionsChromdic = self.regionsChromdic
        res = VF.variants[:]

        ### 0. if removeClosePairs - do this first
        if self.closePairLimit > 0:
            res[:] = removeClosePairs(VF, minDist = self.closePairLimit, variants_only=inplace) #TODO

        ### 1. restriction filters. Usually not used ###
        if restrict_to_genes: #Doing this first because: Usually a very small set, thus reducing the variant set substantially
            annGenes = VF.annotatedGenes
            res[:] = [v for v in res if any(g in restrict_to_genes for g in annGenes(v))]
        if restrict_to_variants: #Not yet implemented in the GUI, but used in Family Gene Sharing ((stemmer det??)
            varDef = VF.varDefGetter
            res[:] = [v for v in res if varDef(v) in restrict_to_variants]

        ### 2. exclude variants. Usually present, and usually gives large reduction.
        if exclude_variants:
            varDef = VF.varDefGetter
            res[:] = [v for v in res if varDef(v) not in exclude_variants]

        ### 3. column filters: These are already sorted w.r.t. speed ###
        if columnfilters:
            for col, op, entry, keep in columnfilters:
                if col in headers:
                    getcol = itemgetter(headers.index(col))
                    res[:] = [v for v in res if op(getcol(v), entry)]
                elif keep: continue
                # else: This is dealt with in checks()

        ### 4. regions: ###
        if regionsChromdic is not None:
            firstGreater = FiltusUtils.firstGreater
            chrom, pos = VF.chromGetter, VF.posGetter
            chrom_vars = collections.defaultdict(list)
            for v in res:
                chrom_vars[chrom(v)].append(v)
            res = []
            for chr in sorted(chrom_vars.keys(), key=FiltusUtils.chromInt):
                reg_startstops = regionsChromdic[chr]
                vars = chrom_vars[chr]
                vars.sort(key=lambda v: float(pos(v))) # possible to avoid doing float(pos()) twice??
                interv = firstGreater((float(pos(v)) for v in vars), reg_startstops) # --> odd/even if inside/outside some region!
                res.extend(v for v, intv in zip(vars, interv) if intv % 2 == 1)
     
     
        ### 5. exclude genes. Might be relatively slow because of annGenes. Haven't checked this though. ###
        if exclude_genes:
            annGenes = VF.annotatedGenes
            res[:] = [v for v in res if not any(g in exclude_genes for g in annGenes(v))]

        ### 6. model filter and controls (benign pairs in compound rec models) ###
        comb = itertools.combinations
        model = self.model
        if model == 'Recessive homozygous':
            isHom = VF.isHomALT()
            res[:] = [v for v in res if isHom(v)]
        elif model == 'Recessive':
            varDef = VF.varDefGetter
            annGenes = VF.annotatedGenes
            isHom = VF.isHomALT()
            benignPairs = self.benignPairs
            if benignPairs:
                remov = set()
                heteroDict = collections.defaultdict(set) #dict of heterozygous variants
                for v in res:
                    if not isHom(v):
                        vdef = varDef(v)
                        for g in annGenes(v):
                            heteroDict[g].add(vdef)
                # the following ignores that variants can be annotated with multiple gene. TODO
                for g, vars in heteroDict.iteritems():
                    if g in benignPairs:
                        nonBenign = set(frozenset(pair) for pair in comb(vars, 2)) - benignPairs[g]
                        remov.update(vars - set(vdef for pair in nonBenign for vdef in pair))
                res[:] = [v for v in res if varDef(v) not in remov]
            geneCount = collections.Counter(g for w in res for g in annGenes(w))
            res[:] = [v for v in res if isHom(v) or any(geneCount[g] > 1 for g in annGenes(v))]
        if inplace: 
            VF.setVariants(res)
            VF.appliedFilters = self
            return
        else:
            return VF.copyAttributes(variants=res, appliedFilters=self)
Пример #38
0
def QC_3plots(VFlist, gender=True, private=True, heterozygosity=True, writetofile=None, save=None, show=True):
    if private + heterozygosity + gender == 0: return None
    N = len(VFlist)
    add_legend = N < 13
    Nplots = private + heterozygosity + gender + add_legend
    nrow = int(math.sqrt(Nplots))
    ncol = math.ceil(float(Nplots)/nrow)
    fig = plt.figure(figsize=(3.5*ncol, 3.5*nrow))
    
    if add_legend:
        markers = ['D','^','*','d','<','s','p','v','D','^','*','d']
        sizes = [6,8,8,8,8,8,8,8,6,8,8,8]
        cols = ['red', 'lime', 'cyan', 'brown', 'magenta', 'gold', 'pink', 'black', 'purple', 'gray', 'silver', 'green']
    else:
        markers, sizes, cols = ['o']*N, [6]*N, ['red']*N
    
    DB = FiltusDatabase.VariantDatabase.buildFromSamples(VFlist, "Extended")
    db_str = DB.variants
    
    if writetofile:
        sep = '\t'
        text_out = FiltusUtils.composeMeta(VFlist, analysis="QC PLOTS")
        
    plotnr = 0
    if gender:
        plotnr += 1
        ax_sex = fig.add_subplot(nrow, ncol, plotnr, aspect=1)
        XminusPAR = FiltusUtils.XminusPAR
        db_X_raw = [x[6:] for x in db_str if XminusPAR(x[:2])]
        if db_X_raw:
            db_X = zip(*db_X_raw)
            totals_X = [sum(map(bool, x)) for x in db_X]
            hets = [sum(g == 1 for g in sample)*100.0/tot if tot>0 else 0 for sample, tot in zip(db_X, totals_X)]
            for i in range(N): 
                ax_sex.plot(totals_X[i], hets[i], marker=markers[i], color=cols[i], markersize=sizes[i])
        else:
            totals_X, hets = [0]*N, [0]*N
            #print "Empty gender estimation plot.\n\nNo variants found on X \ PAR."
        setPlotParams(ax_sex, "Gender estimation", 'Variants on X (-PAR)', 'Heterozygosity (%)', ylim=(0,100))
        ax_sex.axhspan(0, 15, facecolor='blue', alpha=0.2)
        ax_sex.axhspan(15, 35, facecolor='red', alpha=0.2)
        ax_sex.axhspan(35, 100, facecolor='green', alpha=0.2)
        
        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
        ax_sex.text(0.05, 0.95, "FEMALE", transform=ax_sex.transAxes, fontsize="x-small", va="top", ha='left', bbox=props)
        ax_sex.text(0.05, 0.27, "? ", transform=ax_sex.transAxes, fontsize="x-small", va="center", ha='left', bbox=props)
        ax_sex.text(0.95, 0.05, "MALE", transform=ax_sex.transAxes, fontsize="x-small", va="bottom", ha='right', bbox=props)
    
        if writetofile:
            headers = sep.join(['Sample', 'Variants on X (-PAR)', 'Heterozygosity (%)', 'Gender'])
            genders = ['?' if tot==0 or 15<h<35 else 'Male' if h<=15 else 'Female' for tot, h in zip(totals_X, hets)]
            points = [sep.join([s, str(x), '%.2f'%y, g]) for s,x,y,g in zip(DB.sampleNames, totals_X, hets, genders)]
            text_out += "***Plot: Gender estimation***\n" + headers + '\n' + '\n'.join(points) + '\n\n'
       
    if private:
        plotnr += 1
        ax_priv = fig.add_subplot(nrow, ncol, plotnr, aspect=1)
        db_nonz = [map(bool, x) for x in zip(*db_str)[6:]]
        totals_all = map(sum, db_nonz)
        
        if max(totals_all)>2000:
            totals_all = [tot/1000.0 for tot in totals_all]
            xlab = '# variants/1000'
        else: xlab = '# variants'
        rowSums_nonz = map(sum, zip(*db_nonz))
        
        priv_ind = [i for i in range(len(rowSums_nonz)) if rowSums_nonz[i]==1]
        privates = [sum(sampl[i] for i in priv_ind) for sampl in db_nonz]
        for i in range(N):
            ax_priv.plot(totals_all[i], privates[i], marker=markers[i], color=cols[i], markersize=sizes[i])
        setPlotParams(ax_priv, "Private variants", xlab, 'Private')
             
        if writetofile:
            headers = sep.join(['Sample', xlab, 'Private'])
            points = [sep.join([s, str(x), str(y)]) for s,x,y in zip(DB.sampleNames, totals_all, privates)]
            text_out += "***Plot: Private variants***\n" + headers + '\n' + '\n'.join(points) + '\n\n'
            
    if heterozygosity:
        plotnr += 1
        ax_het = fig.add_subplot(nrow, ncol, plotnr, aspect=1)
        chromInt = FiltusUtils.chromInt
        db_AUT = zip(*[x[6:] for x in db_str if chromInt(x[0]) < 23])
        if not db_AUT:
            raise RuntimeError("Empty heterozygosity plot.\n\nNo autosomal variants found.")  
        totals_AUT = [sum(map(bool, x)) for x in db_AUT]
        hets = [sum(g == 1 for g in sample)*100.0/tot if tot>0 else 0 for sample, tot in zip(db_AUT, totals_AUT)]
        if max(totals_AUT) > 2000:
            totals_AUT = [tot/1000.0 for tot in totals_AUT]
            xlab = '# autosomal variants/1000'
        else: xlab = '# autosomal variants'
        
        for i in range(N): 
            ax_het.plot(totals_AUT[i], hets[i], marker=markers[i], color=cols[i], markersize=sizes[i])
        setPlotParams(ax_het, "Heterozygosity", xlab, 'Heterozygosity (%)', ylim=(-5,105))
        
        if writetofile:
            headers = sep.join(['Sample', 'A'+xlab[3:], 'Heterozygosity (%)'])
            points = [sep.join([s, str(x), '%.2f'%y]) for s,x,y in zip(DB.sampleNames, totals_AUT, hets)]
            text_out += "***Plot: Heterozygosity***\n" + headers + '\n' + '\n'.join(points) + '\n'
     
    if writetofile:
        with open(writetofile, 'w') as out:
            out.write(text_out)
            
    if add_legend:
        plotnr +=1 
        ax_legend = fig.add_subplot(nrow, ncol, plotnr, aspect=1)
        simplenames = [VF.shortName for VF in VFlist]
        ax_legend.set_frame_on(False)
        ax_legend.axis('off')
        for i in range(N):
            ax_legend.plot([], marker=markers[i], color=cols[i], markersize=sizes[i], label=simplenames[i], ls='None')
        ax_legend.legend(loc=2, numpoints=1, fontsize='small', frameon=False, title="Legend")
        
    showAndSave(fig, tight=True, show=show, save=save)
    return fig
Пример #39
0
    def __init__(self, parent):
        self.parent = parent
        self.version = VERSION
        parent.title("FILTUS " + self.version)
        
        self.manualdir = os.path.join(SCRIPT_DIR, "man")
        self.datadir = os.path.join(SCRIPT_DIR, "data")
        
        self.busyManager = BusyManager(parent)
        self.windowingsystem = parent.tk.call('tk', 'windowingsystem')
        self.rightclickevents = ['<2>', '<Control-1>'] if self.windowingsystem == 'aqua' else ['<3>']
        parent.rowconfigure(1, weight=1)
        parent.columnconfigure(0, weight=1)
        
        self.scrollframe = Pmw.ScrolledFrame(parent, borderframe=0, clipper_borderwidth=0, vertflex='expand', horizflex='expand')
        frame = self.scrollframe.interior()
        frame.rowconfigure(2, weight=1)
        frame.columnconfigure(1, weight=1)
        self.frame = frame
    
        ### fonts
        self.defaultfont = tkFont.nametofont("TkDefaultFont")
        self.smallfont, self.smallbold, self.tinyfont, self.titlefont = self.defaultfont.copy(), self.defaultfont.copy(), self.defaultfont.copy(), self.defaultfont.copy()
        self.smallbold['weight'] = 'bold'
        self.monofont = tkFont.nametofont("TkFixedFont")
        self.monobold = self.monofont.copy()
        self.monobold['weight'] = 'bold'
        
        self.textfont = tkFont.nametofont("TkTextFont")
        #self.menufont = tkFont.nametofont("TkMenuFont") # this didn't respond to change... Used workaround by setting menu label fonts manually to defaultfont.
        for opt in ['family','weight','slant','underline','overstrike']:
            self.textfont[opt] = self.monofont[opt]
        self.setFontSizes(self.defaultfont['size'], self.textfont['size'], init=True)

        self.files = []
        self.filteredFiles = []
        self.shortFilenames = False
        self.longFileNameList = []
        self.shortFileNameList = []
        self.currentFileNameList = []
        
        self.currentDir = ""
        self.currentFileDir = os.getcwd()
        self.storage = {} # storage for  variant databases (to avoid reloading when filtering)
        
        ############## The file group
        self.fileGroup = Tkinter.Frame(frame)
        self.fileGroup.columnconfigure(0, weight=1)

        self.fileListbox = FiltusWidgets.LabeledListBox(self.fileGroup, filtus=self, toptext="Loaded files: 0", width=50)
        self.fileListbox.component('bottomlabel').destroy()
        self.fileSummary1 = FiltusWidgets.SummaryBox(self.fileGroup, filtus=self, toptext="Unfiltered summaries", width=36)
        self.fileSummary2 = FiltusWidgets.SummaryBox(self.fileGroup, filtus=self, toptext="Filtered summaries", width=36)

        self.fileListbox.grid(sticky='new')
        self.fileSummary1.grid(row=0, column=1, sticky='nw', padx=(10, 0))
        self.fileSummary2.grid(row=0, column=2, sticky='nw', padx=(10, 0))
        
        ############## The filter group
        self.FM = FiltusWidgets.FilterMachine(frame, filtus=self, manpage="filters")
        
        ############## The big text field
        self.text = FiltusWidgets.FiltusText(frame, filtus=self, labelpos='nw', label_font=self.smallfont)
        
        ############ Sharing notebook
        self.sharingNotebook = Pmw.NoteBook(frame, arrownavigation=False, pagemargin=0)
        self.gs = FiltusWidgets.GeneSharingPage(self.sharingNotebook, self, 'Gene sharing', manpage="genesharing")
        self.fs = FiltusWidgets.GeneSharingPage(self.sharingNotebook, self, 'Gene sharing fam', manpage="familybased", family=True)
        self.vs = FiltusWidgets.VariantSharingPage(self.sharingNotebook, self, 'Variant sharing', manpage="filtus")
        self.sharingNotebook.setnaturalsize()
        
        ###### Settings
        self.fileListbox.fixselectall()
        self.sepOutput = '\t'
        self.truncate = 50
        self.makeSettingsDialog()
        self.settingsDialog.invoke()

        self.menuBar = self.makeMainMenu()
        self.menuBar.bind('<Triple-1>', self._run)
        self.parent.bind('<Shift-Return>', self._run)

        ########### Place on grid
        self.fileGroup.grid(sticky='news', columnspan=2, pady=(0, 0))
        self.FM.grid(row=1, column=0, pady=(10,0), sticky='new')
        self.text.grid(row=1, column=1, rowspan=2, sticky='news', padx=(20, 0), pady=0)
        self.sharingNotebook.grid(row=2, sticky='new', pady=(10, 0))

        # on parent grid
        self.menuBar.grid(row=0, column=0, sticky='ew')
        self.scrollframe.grid(row=1, sticky='news', padx=20, pady=(10, 20))
        
        parent.update_idletasks()
        self.scrollframe.component('clipper').configure(height=min(frame.winfo_height(), frame.winfo_screenheight()-100),
                                                    width=min(frame.winfo_width()+200, frame.winfo_screenwidth()-100)) # the 200 is ad hoc to increase startup width a little
        if PLOT_error:
            FiltusUtils.warningMessage("Plotting functionality is disabled. Error message:\n\n%s\n\nNote: On MAC and Linux the modules 'numpy' and 'matplotlib' must be installed separately to make the plots work.\nSee also Filtus homepage: http://folk.uio.no/magnusv/filtus.html"%PLOT_error)
Пример #40
0
 def createdb(self):
     try:
         self._doCreate()
     except Exception as e:
         FiltusUtils.warningMessage(e)
Пример #41
0
    def _guessAndPrepare(self, filename, kwargs):
        self.currentfile = filename
        self.fileLabel.configure(
            text=FiltusUtils.wrapFilename(filename, joinsep='\n     '))
        preamble, headerline, firstline = self._getFirstLines(filename)
        self.__dict__.update(kwargs)

        sep = self.sep
        if sep is None or sep not in headerline:
            sep = next((char for char in ['\t', ',', ';', ' ']
                        if char in headerline and char in firstline), '\t')

        self.sepInputOM.invoke(self._sepDicInv[sep])
        headers = self.currentHeaders

        def _doGuess(col):
            '''Dont guess if specified in arguments, or if the current value is consistent.'''
            return self.guess and col not in kwargs and (getattr(
                self, col) is None or getattr(self, col + 'Menu').inconsistent)

        lowheaders = [h.lower() for h in headers]

        def _matchHeader(alts):
            for h in alts:
                if h in lowheaders: return headers[lowheaders.index(h)]
            return ''

        if 'chromCol' in kwargs:
            self.chromColMenu.setAndCheck(kwargs['chromCol'])
        elif _doGuess('chromCol'):
            chromCol = _matchHeader([
                '#chrom', 'vcf_chrom', 'vcf_chr', 'chrom', 'chr', 'chromosome'
            ])
            if chromCol: self.chromColMenu.setAndCheck(chromCol)

        if 'posCol' in kwargs: self.posColMenu.setAndCheck(kwargs['posCol'])
        elif _doGuess('posCol'):
            posCol = _matchHeader([
                'pos', 'vcf_pos', 'vcf_start', 'start', 'position',
                'pos_start', 'chromosome_position'
            ])
            if posCol: self.posColMenu.setAndCheck(posCol)

        if 'splitAsInfo' in kwargs:
            self.infoColMenu.setAndCheck(kwargs['splitAsInfo'])
            self._splitINFO_update()

        # If VEP CSQ info present: Store headers
        self.hasCSQ = False
        for line in preamble:
            if 'ID=CSQ,' in line:
                self.hasCSQ = True
                self._CSQheaders = line.split("Format: ")[1].strip().strip(
                    '">').split("|")
                break
        if not self.hasCSQ:
            self.splitCsqVar.set(0)
            self.splitCsqButt.configure(state="disabled")
        else:
            self._splitINFO_update()

        if 'geneCol' in kwargs: self.geneColMenu.setAndCheck(kwargs['geneCol'])
        elif _doGuess('geneCol'):
            geneCol = _matchHeader(['gene', 'gene.refgene', 'gene symbol'])
            if geneCol == '':
                genecCol = next((h for h, lowh in zip(headers, lowheaders)
                                 if 'gene' in lowh and 'name' in lowh), '')
            if geneCol: self.geneColMenu.setAndCheck(geneCol)

        vcfGuess, formatColGuess = None, None
        if self.guess and self.firstvariants:
            vcfGuess, infoCol, formatColGuess = self._guessVCF(
                self.originalHeaders,
                self.firstvariants[0])  # infoCol not used
            self.vcfChooser.invoke(int(not vcfGuess))
            if vcfGuess:
                self.splitFormatVar.set(1)  # Default option: Split FORMAT

        if 'formatCol' in kwargs:
            self.formatColMenu.setAndCheck(kwargs['formatCol'])
        elif formatColGuess:
            self.formatColMenu.setAndCheck(formatColGuess)

        if 'splitFormat' in kwargs:
            self.splitFormatVar.set(kwargs['splitFormat'])

        self._splitFORMAT_update()

        if 'keep00' in kwargs: self.keep00Var.set(kwargs['keep00'])

        if 'gtCol' in kwargs: self.gtColMenu.setAndCheck(kwargs['gtCol'])
        elif _doGuess('gtCol'):
            gtCol = '' if vcfGuess else _matchHeader(
                ['genotype', 'gt', 'zygosity', 'homozygous', 'attribute_het'])
            self.gtColMenu.setAndCheck(gtCol)

        if 'split_general' in kwargs:
            s = kwargs['split_general']
            split, sep = s[0]
            self.splitcol1Menu.setAndCheck(split)
            self.splitcol1_sep.setvalue(sep)
            if len(s) > 1:
                split, sep = s[1]
                self.splitcol2Menu.setAndCheck(split)
                self.splitcol2_sep.setvalue(sep)

        if 'prefilter' in kwargs:
            operatorText, value = kwargs['prefilter']
            self.prefilter_operatorOM.setAndCheck(operatorText)
            self.prefilter_valueEntry.setvalue(value)
Пример #42
0
 def addSamples(self):
     try:
         self.doAddSamples()
     except Exception as e:
         FiltusUtils.warningMessage(e)
Пример #43
0
    def readFileAndAdd(cls,
                       filename,
                       inFormat,
                       inNS,
                       outFormat,
                       VFlist,
                       sampleNames=None):
        if not os.path.isfile(filename):
            return VariantDatabase.buildFromSamples(VFlist=VFlist,
                                                    outFormat=outFormat,
                                                    sampleNames=sampleNames)

        inFormat = formatInit(inFormat)
        outFormat = formatInit(outFormat)

        if sampleNames is None: sampleNames = [VF.shortName for VF in VFlist]
        if inFormat == "S":
            old = VariantDatabase.loadSimple(filename, inNS)
            new = VariantDatabase.buildFromSamples(VFlist, outFormat,
                                                   sampleNames)
            meta = FiltusUtils.composeMeta(VFlist=VFlist,
                                           analysis="ADDED TO DATABASE",
                                           sort=False,
                                           appendTo=old.meta)
            return old.addSimple(new, meta=meta)

        N_add = len(VFlist)
        with open(filename, 'rU') as dbfil:
            meta, colNames, dbfil = _readTop(dbfil)

            OBS_ind = colNames.index('OBS')
            v_extractor = itemgetter(*range(OBS_ind))

            sample_ind = OBS_ind + 4
            N = inNS + N_add
            extended = collections.defaultdict(lambda: [0] * N)
            for line in dbfil:
                dat = line.strip().split('\t')
                vdef = v_extractor(dat)
                extended[vdef][:inNS] = map(int, dat[sample_ind:])

        simpleColnames = colNames[:sample_ind]
        sampleNames = colNames[sample_ind:] + sampleNames
        k = inNS
        skip = []
        for VF in VFlist:  # not enumerate() since the sample might be skipped
            gt = VF.GTnum()
            vDef = VF.varDefGetter
            for v in VF.variants:
                extended[vDef(v)][k] = gt(v)

            # check if already included
            copy = next(
                (j
                 for j in range(inNS) if all(x[j] == x[k]
                                             for x in extended.itervalues())),
                None)
            if copy is None:
                k += 1
                continue
            else:
                question = "The new sample '%s' is exactly equal to '%s' in the database." % (sampleNames[k], sampleNames[copy]) \
                        + "\n\nSkip this sample?"
                action = FiltusUtils.yesnoMessage(question)
                if action:
                    skip.append(VF)
                    for v in extended:
                        del extended[v][k]
                    del sampleNames[k]
                    extended.default_factory = lambda: [0] * len(sampleNames)

        new_meta = FiltusUtils.composeMeta(
            VFlist=[VF for VF in VFlist if not VF in skip],
            sort=False,
            analysis="ADDED TO DATABASE",
            appendTo='\n'.join(meta))
        colNames = simpleColnames + sampleNames
        return cls(outFormat,
                   nSamples=len(sampleNames),
                   columnNames=colNames,
                   extendedDict=extended,
                   meta=new_meta)
Пример #44
0
 def advLoad_prompt(self):
     if not hasattr(self, 'advLoad'):
         self.advLoad = FiltusWidgets.AdvancedLoad(self)
     FiltusUtils.activateInCenter(self.parent, self.advLoad.dialog)
Пример #45
0
    def _splitINFO_update(self, column=None, reset=False):
        '''callback for the INFO option menu. Also called from _readAndSetHeaders (with column=None)'''
        if reset:
            self._INFOheaders = []
        if column is None:
            if self.infoColMenu.inconsistent:
                return
            column = self.infoColMenu.getvalue()

        self.infoColMenu.setColor(True)
        h = self.currentHeaders[:]

        ### Always start by unsplitting everything:
        # If CSQ is split: unsplit this first
        splitCsq = self.hasCSQ and self.splitCsqVar.get()
        if splitCsq:
            self.splitCsqVar.set(0)
            self._splitCsq_update()
            h[:] = self.currentHeaders[:]

        # Unsplit INFO fields
        if self._INFOheaders:
            ind = h.index(self._INFOheaders[0])
            h[ind:(ind + len(self._INFOheaders))] = [self.infoCol]
        self._INFOheaders = []
        self.infoCol = ''

        ### If empty selection: Reset and return
        if column == "":
            self._updateColnameMenus(h)
            self.splitCsqButt.configure(state="disabled")
            return

        ### Otherwise: split selected column as INFO (if possible)
        first_infos = [
            v[self.originalHeaders.index(column)] for v in self.firstvariants
        ]
        _INFOheaders = sorted(
            set(
                s.split('=')[0] + '_INFO' for info in first_infos
                for s in info.split(';') if '=' in s))

        if not _INFOheaders:
            self.infoColMenu.setColor(False)
            self.splitCsqButt.configure(state="disabled")
            self._updateColnameMenus(h)
            FiltusUtils.warningMessage(
                "I don't recognise %s as an INFO column" % column)
            return

        ind = h.index(column)
        h[ind:(ind + 1)] = _INFOheaders
        self._updateColnameMenus(h)

        if self.hasCSQ and "CSQ_INFO" in _INFOheaders:
            self.splitCsqButt.configure(state="normal")
            if splitCsq:
                self.splitCsqVar.set(1)
                self._splitCsq_update()

        self._INFOheaders = _INFOheaders
        self.infoCol = column
Пример #46
0
 def _guessAndPrepare(self, filename, kwargs):
     self.currentfile = filename
     self.fileLabel.configure(text=FiltusUtils.wrapFilename(filename, joinsep='\n     '))
     preamble, headerline, firstline = self._getFirstLines(filename)
     self.__dict__.update(kwargs)
     
     sep = self.sep
     if sep is None or sep not in headerline:
         sep = next((char for char in ['\t', ',', ';', ' '] if char in headerline and char in firstline), '\t')
     
     self.sepInputOM.invoke(self._sepDicInv[sep])
     headers = self.currentHeaders
     
     def _doGuess(col):
         '''Dont guess if specified in arguments, or if the current value is consistent.'''
         return self.guess and col not in kwargs and (getattr(self, col) is None or getattr(self, col+'Menu').inconsistent)
     
     lowheaders = [h.lower() for h  in headers]
     def _matchHeader(alts):
         for h in alts:
             if h in lowheaders: return headers[lowheaders.index(h)]
         return ''
         
     if 'chromCol' in kwargs: self.chromColMenu.setAndCheck(kwargs['chromCol'])
     elif _doGuess('chromCol'):
         chromCol = _matchHeader(['#chrom', 'vcf_chrom', 'vcf_chr', 'chrom', 'chr', 'chromosome'])
         if chromCol: self.chromColMenu.setAndCheck(chromCol)
     
     if 'posCol' in kwargs: self.posColMenu.setAndCheck(kwargs['posCol'])
     elif _doGuess('posCol'):
         posCol = _matchHeader(['pos', 'vcf_pos', 'vcf_start', 'start', 'position', 'pos_start', 'chromosome_position'])
         if posCol: self.posColMenu.setAndCheck(posCol)
     
     if 'splitAsInfo' in kwargs: 
         self.infoColMenu.setAndCheck(kwargs['splitAsInfo'])
         self. _splitINFO_update()
     
     # If VEP CSQ info present: Store headers
     self.hasCSQ = False
     for line in preamble:
         if 'ID=CSQ,' in line:
             self.hasCSQ = True
             self._CSQheaders = line.split("Format: ")[1].strip().strip('">').split("|")
             break
     if not self.hasCSQ:
         self.splitCsqVar.set(0)
         self.splitCsqButt.configure(state="disabled")
     else:
         self._splitINFO_update()
         
     if 'geneCol' in kwargs: self.geneColMenu.setAndCheck(kwargs['geneCol'])
     elif _doGuess('geneCol'):
         geneCol = _matchHeader(['gene', 'gene.refgene', 'gene symbol'])
         if geneCol =='': 
             genecCol = next((h for h, lowh in zip(headers, lowheaders) if 'gene' in lowh and 'name' in lowh), '')
         if geneCol: self.geneColMenu.setAndCheck(geneCol)
     
     vcfGuess, formatColGuess = None, None
     if self.guess and self.firstvariants:
         vcfGuess, infoCol, formatColGuess = self._guessVCF(self.originalHeaders, self.firstvariants[0])  # infoCol not used
         self.vcfChooser.invoke(int(not vcfGuess))
         if vcfGuess: 
             self.splitFormatVar.set(1) # Default option: Split FORMAT
         
     if 'formatCol' in kwargs: 
         self.formatColMenu.setAndCheck(kwargs['formatCol'])
     elif formatColGuess:
         self.formatColMenu.setAndCheck(formatColGuess)
     
     if 'splitFormat' in kwargs: 
         self.splitFormatVar.set(kwargs['splitFormat'])
         
     self._splitFORMAT_update()
         
     if 'keep00' in kwargs: self.keep00Var.set(kwargs['keep00'])
     
     if 'gtCol' in kwargs: self.gtColMenu.setAndCheck(kwargs['gtCol'])
     elif _doGuess('gtCol'):
         gtCol = '' if vcfGuess else _matchHeader(['genotype', 'gt', 'zygosity', 'homozygous', 'attribute_het'])
         self.gtColMenu.setAndCheck(gtCol)
     
     if 'split_general' in kwargs:
         s = kwargs['split_general']
         split, sep = s[0]
         self.splitcol1Menu.setAndCheck(split)
         self.splitcol1_sep.setvalue(sep)
         if len(s) > 1:
             split, sep = s[1]
             self.splitcol2Menu.setAndCheck(split)
             self.splitcol2_sep.setvalue(sep)
     
     if 'prefilter' in kwargs:
         operatorText, value = kwargs['prefilter']
         self.prefilter_operatorOM.setAndCheck(operatorText)
         self.prefilter_valueEntry.setvalue(value)
Пример #47
0
    def read(self, filename, **kwargs):
        self.skipFile = False
        self.stopLoading = False
        new_ext = self.currentfile is None or (os.path.splitext(filename)[1] !=
                                               os.path.splitext(
                                                   self.currentfile)[1])
        self.prompt = kwargs.pop('prompt', self.prompt or new_ext)
        self.guess = kwargs.pop('guess', self.guess
                                or (self.prompt and new_ext))
        promptShowsOk = None  # modified when trying to show prompt

        try:
            self._guessAndPrepare(filename, kwargs)
            if self.prompt or any(OM.inconsistent
                                  for OM in self._activeMenus()):
                promptShowsOk = False
                FiltusUtils.activateInCenter(self.parent, self.dialog)
                promptShowsOk = True
            else:
                self._setParameters()
            if self.stopLoading or self.skipFile:
                return

            self.filtus.busy()
            common_params = dict(filename=filename,
                                 sep=self.sep,
                                 chromCol=self.chromCol,
                                 posCol=self.posCol,
                                 geneCol=self.geneCol,
                                 splitAsInfo=self.infoCol,
                                 split_general=self.split_general,
                                 prefilter=self.prefilter)

            # Note: splitAsInfo works also for nonVCF, but not splitCSQ (which requires correct preamble data)
            if self.vcf:
                VF = self.reader.readVCFlike(formatCol=self.formatCol,
                                             splitFormat=self.splitFormat,
                                             splitCsq=self.splitCsq,
                                             keep00=self.keep00,
                                             **common_params)
            else:
                VF = self.reader.readNonVCF(skiplines=self.skiplines,
                                            gtCol=self.gtCol,
                                            homSymbol=self.homSymbol,
                                            **common_params)
            self.filtus.notbusy()

        except (ValueError, RuntimeError) as e:
            self.filtus.notbusy()
            FiltusUtils.warningMessage(e)
            return self.read(filename, guess=False, prompt=True)
        except Exception as e:
            self.filtus.notbusy()
            if promptShowsOk:
                FiltusUtils.warningMessage(
                    "An error occured while reading this file:\n%s\n\n%s: %s\n\nPlease try again or skip file."
                    % (filename, type(e).__name__, e))
                return self.read(filename, guess=False, prompt=True)
            else:
                FiltusUtils.warningMessage("%s: %s\n\nSkipping this file: %s" %
                                           (type(e).__name__, e, filename))
                self.skipFile = True
                return

        if self.checkHomozygosity and VF.noHomozygotes():
            tryagain = FiltusUtils.yesnoMessage(
                'The file %s has no homozygous variants. Go back to settings dialog?'
                % filename)
            if tryagain:
                VF = self.read(filename, guess=False, prompt=True)

        return VF
Пример #48
0
    def doSearch(self):
        self.results = None
        self.resultWindow.clear()
        chrom = self.chrom.getvalue().strip()
        pos = self.pos.getvalue().strip()
        inFilename, inNS, inNV, inFormat = self.browser.getInfo()
        if not all(x for x in (inFilename, chrom, pos)):
            if not inFilename: FiltusUtils.warningMessage("No database loaded")
            elif not chrom:
                FiltusUtils.warningMessage("Please indicate chromosome")
            elif not pos:
                FiltusUtils.warningMessage("Please indicate position")
            return
        query = self.query = [chrom, pos]
        self.filename = inFilename
        self.nSamples = inNS
        self.nVariants = inNV
        self.formatLong = inFormat
        self.colNames = self.browser.getColnames()

        try:
            if self.browser.modified():
                self.firstIndex, self.lastIndex = self._chromStartIndex(
                    inFilename)
                self.browser.setUnmodified()
            with open(self.filename, 'rU') as database:
                slice_database = itertools.islice(database,
                                                  self.firstIndex[chrom],
                                                  self.lastIndex[chrom])
                data = next(
                    (v for v in slice_database if v.split('\t')[:2] == query),
                    None)
            if data is None:
                self.results = "Not found in the database"
                self.resultWindow.settext(self.results)
                return

            data = data.strip().split('\t')
            fields = [
                'Total observations', 'Heterozygous', 'Homozygous',
                'Allele frequency in database'
            ]
            results = ['%s: %s' % x for x in zip(fields, data[2:6])]
            results[0] += ' (out of %d)' % inNS

            if self.formatLong == "Extended":
                allSamples = self.colNames[-inNS:]
                allObs = map(int, data[-inNS:])
                observations = [(s, obs) for s, obs in zip(allSamples, allObs)
                                if obs != 0]

                samples, gtCode = zip(*observations)
                gt = [('heterozygous', 'homozygous')[x - 1] for x in gtCode]
                width = max(map(len, samples))

                results.extend(
                    ['', 'Samples:'] +
                    [s.ljust(width) + ' - ' + g for s, g in zip(samples, gt)])

            self.results = '\n'.join(results)
            self.resultWindow.settext(self.results)

        except Exception as e:
            FiltusUtils.warningMessage(e)
            return