Пример #1
0
 def run(self):
     sqlrunner = SQL(database=self.database)
     out = sqlrunner.custom_sql(statement=self.sql_query)
     out.to_csv(os.path.join(self.root, "temp", "query.txt"), header=True, index=False, sep=",")
Пример #2
0
    def grna_display_runner(self):
        display_grna = DisplayGuideRNA(database_list=self.availible_databases, cas9_list=self.availible_cas9)

        if display_grna.exec_():

            holder = PandasModel(pd.DataFrame({'': []}))
            self.display_candidates.setModel(holder)
            self.display_backup.setModel(holder)
            self.display_dropped.setModel(holder)
            self.display_offtargets.setModel(holder)
            self.database_querried = False

            self.statusBar().showMessage("Preparing ...")
            self.main_progressbar_value += 1
            self.main_progressbar.setValue(self.main_progressbar_value)

            user_options = display_grna.out()
            if "temp" not in os.listdir(self.root):
                tempdir = os.path.join(self.root, "temp")
                os.mkdir(os.path.join(tempdir))
            else:
                tempdir = os.path.join(self.root, "temp")

            database = str(user_options['organism']).replace(" ", "_")
            mismatch = user_options['max_mismatch']
            max_grna = user_options['max_grna_count']
            max_primer_len = user_options['max_primer_len']
            user_cas9 = user_options['cas9']
            user_pam_tolerance = user_options['pam_tolerance']
            user_fiveprime = user_options['nucleotides_5']
            user_threeprime = user_options['nucleotides_3']

            gene_mask_dictionary = {
                'genes': [items.replace("_", "").lower() if "_" in items else items.lower() for items in
                          user_options['genes']],
                'masks': [items.replace("_", "").lower() if "_" in items else items.lower() for items in
                          user_options['masks']]
            }

            sqlrunner = SQL(database=database)
            headers = sqlrunner.custom_sql("SELECT header FROM genes").to_dict('list')

            gene_check = [True if gene in headers['header'] else False for gene in gene_mask_dictionary['genes']]
            mask_check = [True if gene in headers['header'] else False for gene in gene_mask_dictionary['masks']]

            for idx, val in enumerate(gene_check):
                if not val:
                    db = database.replace("_", " ")
                    QtWidgets.QMessageBox.about(self, "Error",
                                                f"{gene_mask_dictionary['genes'][idx]} was not found in {db}")

                    self.main_progressbar_value = 0
                    self.main_progressbar.setValue(self.main_progressbar_value)
                    return None

            for idx, val in enumerate(mask_check):
                if not val:
                    db = database.replace("_", " ")
                    QtWidgets.QMessageBox.about(self, "Error",
                                                f"{gene_mask_dictionary['masks'][idx]} was not found in {db}")

                    self.main_progressbar_value = 0
                    self.main_progressbar.setValue(self.main_progressbar_value)
                    return None

            if mismatch == "":
                QtWidgets.QMessageBox.about(self, "Error",
                                            "First search guide RNA's")

                self.main_progressbar_value = 0
                self.main_progressbar.setValue(self.main_progressbar_value)
                return None

            # Strand is r for reverse
            worker = CrisprInterference_worker(database=database,
                                               mismatch=mismatch,
                                               strand='r',
                                               max_grna=max_grna,
                                               genes_masks=gene_mask_dictionary,
                                               max_primer_size=max_primer_len,
                                               cas9_organism=user_cas9,
                                               pam_tolerance=user_pam_tolerance,
                                               fiveprime_nucleotides=user_fiveprime,
                                               threeprime_nucleotides=user_threeprime)

            self.threadingPool.start(worker)

            while self.threadingPool.activeThreadCount() == 1:
                self.statusBar().showMessage("Gathering guide RNA's...")
                QtWidgets.QApplication.processEvents()

                if self.main_progressbar_value < 90:
                    self.main_progressbar_value += 1
                    self.main_progressbar.setValue(self.main_progressbar_value)
                    time.sleep(0.8)

            if self.threadingPool.waitForDone():
                self.statusBar().showMessage("Gathering data ...")
                self.candidate_gRNA_df = pd.read_csv(
                    filepath_or_buffer=os.path.join(self.root, "temp", "candidates.txt"),
                    sep=",")
                self.backup_gRNA_df = pd.read_csv(filepath_or_buffer=os.path.join(self.root, "temp", "backup.txt"),
                                                  sep=",")
                self.dropped_gRNA_df = pd.read_csv(filepath_or_buffer=os.path.join(self.root, "temp", "dropped.txt"),
                                                   sep=",")
                self.offtarget_df = pd.read_csv(filepath_or_buffer=os.path.join(self.root, "temp", "offtargets.txt"),
                                                sep=",")

                cand_model, backup_model, dropped_model, offtargets_model = map(PandasModel, [self.candidate_gRNA_df,
                                                                                              self.backup_gRNA_df,
                                                                                              self.dropped_gRNA_df,
                                                                                              self.offtarget_df])

                while self.main_progressbar_value < 100:
                    self.main_progressbar_value += 1
                    self.statusBar().showMessage("Formatting for display...")
                    self.main_progressbar.setValue(self.main_progressbar_value)
                    time.sleep(0.01)

                self.display_candidates.setModel(cand_model)
                self.display_backup.setModel(backup_model)
                self.display_dropped.setModel(dropped_model)
                self.display_offtargets.setModel(offtargets_model)
                self.database_querried = True
                self.main_progressbar_value = 0
                self.main_progressbar.setValue(self.main_progressbar_value)
                self.statusBar().showMessage("Ready")

                hits = [genes for genes in self.candidate_gRNA_df['genes']]
                missed = list(set(gene_mask_dictionary['genes']) - set(hits))

                EOSpopup(missed_genes=missed).exec_()

            shutil.rmtree(tempdir)
Пример #3
0
    def run(self):
        sqlrunner = SQL(database=os.path.join(self.root, "databases", self.database))
        gRNA_db = sqlrunner.get_global_gRNA(mismatch=str(self.mismatch))

        # This is a rate limiting step
        if bool(self.gene_mask_dict['genes']):
            query_data = self.get_targeted_data(dataframe=gRNA_db, gene_mask_dict=self.gene_mask_dict)
        else:
            query_data = gRNA_db

        multifasta = sqlrunner.get_gene_multifasta()

        gRNA_runner = RefineCripri(grna_dataframe=query_data,
                                   strand=self.strand,
                                   fasta_dataframe=multifasta,
                                   cas9=self.cas9_organism,
                                   offtarget_ids=sqlrunner.custom_sql("SELECT name, strand FROM global_offtarget"))

        candidates, backup, dropped = gRNA_runner.cripr_interference()

        candidates, backup, dropped = map(self.utils.annotate_dataframe,
                                          [candidates, backup, dropped])

        offtargets = sqlrunner.get_offtargets_by_mismatch(mismatch=self.mismatch)
        offtargets.dropna(subset=['annotation'], inplace=True)
        offtargets = offtargets[offtargets['strand'] != '+']
        offtargets['annotation'] = offtargets['annotation'].apply(
            lambda x: x.replace("_", "") if isinstance(x, str) else x)

        offtargets = offtargets.query("gene != annotation")
        offtargets.reset_index(drop=True, inplace=True)

        offtarget_ids = list(set(offtargets['name']))
        candidates_has_offtargets = self.list_comparison(list1=candidates['names'], list2=offtarget_ids)
        backup_has_offtargets = self.list_comparison(list1=backup['names'], list2=offtarget_ids)

        if candidates_has_offtargets:
            candidate_off_ids = list(set(candidates['names']) & set(offtarget_ids))

            candidates_offtargets = self.grab_offtargets(query=candidates,
                                                         offtargets=offtargets,
                                                         offtarget_ids=offtarget_ids)

            candidates = self.negate_pam_mismatch(grna_dataframe=candidates,
                                                  offtarget_dataframe=candidates_offtargets,
                                                  target_ids=candidate_off_ids)

            candidates, dropped = self.move_grna_by_offtargets(grna_dataframe=candidates,
                                                               dropped_dataframe=dropped,
                                                               offtarget_dataframe=candidates_offtargets,
                                                               masks=self.gene_mask_dict['masks'])

            candidates_offtargets = pd.DataFrame(candidates_offtargets)
        else:
            candidates_offtargets = dict.fromkeys(offtargets, [])
            candidates_offtargets = pd.DataFrame(candidates_offtargets)

        if backup_has_offtargets:
            backup_off_ids = list(set(backup['names']) & set(offtarget_ids))

            backup_offtargets = self.grab_offtargets(query=backup, offtargets=offtargets, offtarget_ids=offtarget_ids)

            backup = self.negate_pam_mismatch(grna_dataframe=backup,
                                              offtarget_dataframe=backup_offtargets,
                                              target_ids=backup_off_ids)

            backup, dropped = self.move_grna_by_offtargets(grna_dataframe=backup,
                                                           dropped_dataframe=dropped,
                                                           offtarget_dataframe=backup_offtargets,
                                                           masks=self.gene_mask_dict['masks'])

            backup_offtargets = pd.DataFrame(backup_offtargets)

        else:
            backup_offtargets = dict.fromkeys(offtargets, [])
            backup_offtargets = pd.DataFrame(backup_offtargets)

        ## add ranking to pam, move between dataframes if ranking is f****d
        candidates, backup = self.scan_maxmismatches(candidates=candidates, backup=backup)
        candidates, backup = self.force_max_grna_in_candidates(candidates=candidates, backup=backup,
                                                               max_grna=self.max_grna)

        candidates = self.force_ag_base(dataframe=candidates, max_primer_size=self.max_primer_size)

        backup = self.force_ag_base(dataframe=backup, max_primer_size=self.max_primer_size)

        candidates, backup, dropped = map(self.calculate_primer_len, [candidates, backup, dropped])

        candidates, backup, dropped = map(self.calculate_gc_content, [candidates, backup, dropped])

        candidates = self.design_primers(dataframe=candidates, cas9=self.cas9_organism,
                                         fiveprime=self.fiveprime, threeprime=self.threeprime)

        backup = self.design_primers(dataframe=backup, cas9=self.cas9_organism,
                                     fiveprime=self.fiveprime, threeprime=self.threeprime)

        candidates, backup, dropped = map(pd.DataFrame,
                                          [candidates, backup, dropped])

        offtarget_empty = [candidates_offtargets.empty, backup_offtargets.empty]

        final_offtargets = pd.DataFrame()

        if not all(offtarget_empty):
            final_offtargets = candidates_offtargets
            final_offtargets['from'] = "candidates"
            backup_offtargets['from'] = "backup"
            final_offtargets = final_offtargets.append(backup_offtargets, ignore_index=True)

        else:
            if not offtarget_empty[0]:
                final_offtargets = candidates_offtargets
                final_offtargets['from'] = "candidates"

            if not offtarget_empty[1]:
                final_offtargets = backup_offtargets
                final_offtargets['from'] = "backup"

        if final_offtargets.empty:
            final_offtargets = pd.DataFrame(columns=offtargets.columns)

        candidates.to_csv(os.path.join(self.root, "temp", "candidates.txt"), header=True, index=False, sep=",")
        backup.to_csv(os.path.join(self.root, "temp", "backup.txt"), header=True, index=False, sep=",")
        dropped.to_csv(os.path.join(self.root, "temp", "dropped.txt"), header=True, index=False, sep=",")
        final_offtargets.to_csv(os.path.join(self.root, "temp", "offtargets.txt"), header=True, index=False, sep=",")