Exemplo n.º 1
0
def prepare_go_serverfiles():
    Path('serverfiles/go/').mkdir(parents=True, exist_ok=True)

    for file in list_files('data/go/'):
        if file.name == 'gene_ontology.obo':
            title = 'Gene Ontology (GO)'
            tags = ['gene', 'ontology', 'GO']
            description = 'Basic version of the GO, filtered such that the graph is guaranteed to be ' \
                          'acyclic and annotations can be propagated up the graph.'
        else:
            tax_id = file.name.replace('.tab', '')
            species_name = common_taxid_to_name(
                tax_id)  # taxonomy_db.get_entry(tax_id).name
            title = f'GO Annotations for {species_name}'
            tags = [
                'gene', 'annotation', 'ontology', 'GO', tax_id, species_name
            ]
            description = 'The gene association file ingested from GO Consortium members.'

        source_path = str(file.absolute())
        dest_path = f'serverfiles/go/{file.name}'
        shutil.copy2(source_path, dest_path)

        # preparse serverfiles
        server_file = ServerFile(domain='go',
                                 file_path=dest_path,
                                 title=title,
                                 description=description,
                                 tags=tags)

        server_file.to_server_format()
Exemplo n.º 2
0
def gene_info_to_dict(gene_data: tuple):
    homology_group = homologs.get(str(gene_data[gene_id]), [None])[0]
    homolog_genes = {
        tax: gid
        for (_, tax, gid) in homologs_by_group.get(homology_group, [])
        if homology_group and tax != gene_data[tax_id]
    }
    return {
        'species':
        common_taxid_to_name(to_species[gene_data[tax_id]]),
        'tax_id':
        gene_data[tax_id],
        'gene_id':
        gene_data[gene_id],
        'symbol':
        gene_data[symbol],
        'synonyms':
        pipe_delimited_to_list(gene_data[synonyms]),
        'db_refs':
        parse_db_refs(gene_data[db_refs]),
        'description':
        gene_data[description] if gene_data[description] != '-' else None,
        'locus_tag':
        gene_data[locus_tag] if gene_data[locus_tag] != '-' else None,
        'chromosome':
        gene_data[chromosome] if gene_data[chromosome] != '-' else None,
        'map_location':
        gene_data[map_location] if gene_data[map_location] != '-' else None,
        'type_of_gene':
        gene_data[type_of_gene] if gene_data[type_of_gene] != '-' else None,
        'symbol_from_nomenclature_authority':
        gene_data[symbol_from_nomenclature_authority]
        if gene_data[symbol_from_nomenclature_authority] != '-' else None,
        'full_name_from_nomenclature_authority':
        gene_data[full_name_from_nomenclature_authority]
        if gene_data[full_name_from_nomenclature_authority] != '-' else None,
        'nomenclature_status':
        gene_data[nomenclature_status]
        if gene_data[nomenclature_status] != '-' else None,
        'other_designations':
        pipe_delimited_to_list(gene_data[other_designations]),
        'modification_date':
        gene_data[modification_date],
        'homology_group_id':
        homology_group,
        'homologs':
        homolog_genes
    }
Exemplo n.º 3
0
def prepare_gene_sets_serverfiles():
    Path('serverfiles/gene_sets/').mkdir(parents=True, exist_ok=True)

    for file in list_files('data/gene_sets/'):
        source_path = str(file.absolute())
        dest_path = f'serverfiles/gene_sets/{file.name}'
        shutil.copy2(source_path, dest_path)

        hierarchy, tax_id = filename_parse(file.name)
        title = f"Gene sets: {', '.join(hierarchy)} ({common_taxid_to_name(tax_id)})"
        tags = list(hierarchy) + ['gene sets',
                                  common_taxid_to_name(tax_id)
                                  ] + shortname(tax_id)

        server_file = ServerFile(domain='gene_sets',
                                 file_path=dest_path,
                                 title=title,
                                 description='',
                                 tags=tags)

        server_file.to_server_format()
Exemplo n.º 4
0
        uncompressed_size = os.stat(
            os.path.join(domain_path,
                         MATCHER_FILENAME.format(taxonomy_id))).st_size

    with bz2.BZ2File(os.path.join(temp_path,
                                  MATCHER_FILENAME.format(taxonomy_id)),
                     mode='w',
                     compresslevel=9) as f:
        shutil.copyfileobj(
            open(
                os.path.join(domain_path,
                             MATCHER_FILENAME.format(taxonomy_id)), "rb"), f)

    create_info_file(os.path.join(temp_path,
                                  MATCHER_FILENAME.format(taxonomy_id)),
                     title=MATCHER_TITLE + ' for ' +
                     common_taxid_to_name(taxonomy_id),
                     tags=MATCHER_TAGS + [taxonomy_id],
                     uncompressed=uncompressed_size,
                     compression='bz2')

con.close()

helper = SyncHelper(DOMAIN, GeneInfo)

# sync files with remote server
helper.run_tests()
helper.sync_files()

helper.remove_update_folder()
class FileUploadHelper(QDialog):

    # settings
    kegg_domain = 'KEGG'

    supported_domains = OrderedDict({
        'Gene Ontology': gene_ontology_domain,
        'Gene Sets': gene_sets_domain
    })

    supported_organisms = [
        common_taxid_to_name(tax_id) for tax_id in common_taxids()
    ]

    hierarchies = {
        'GO - Biological Process': ('GO', 'biological_process'),
        'GO - Molecular Function': ('GO', 'molecular_function'),
        'GO - Cellular Component': ('GO', 'cellular_component'),
        'KEGG - Pathways': ('KEGG', 'pathways'),
        'KEGG - Orthologs': ('KEGG', 'orthologs')
    }

    def __init__(self, parent=None):
        super(FileUploadHelper, self).__init__(
            parent, Qt.Window | Qt.WindowTitleHint | Qt.CustomizeWindowHint
            | Qt.WindowCloseButtonHint | Qt.WindowMaximizeButtonHint)
        self.setAttribute(Qt.WA_DeleteOnClose)
        self.setWindowTitle('Add new file')

        self.info_state = INFO_FILE_SCHEMA
        self.layout = QVBoxLayout(self)

        # domain selection combobox
        self.domain_selection = QComboBox()
        self.domain_selection.addItems(self.supported_domains.keys())
        self.domain_selection.currentIndexChanged.connect(
            self.__on_domain_selection)
        self.__create_selection_row('Domain: ', self.domain_selection)

        # domain selection combobox
        self.hierarchy_selection = QComboBox()
        self.hierarchy_selection.addItems(self.hierarchies.keys())
        self.layout.addWidget(self.hierarchy_selection,
                              alignment=Qt.AlignVCenter)
        self.__on_domain_selection()

        # select organism
        self.organism_selection = QComboBox()
        self.organism_selection.addItems(self.supported_organisms)
        self.__create_selection_row('Organism: ', self.organism_selection)

        # title
        self.line_edit_title = QLineEdit()
        self.__create_selection_row('Title: ', self.line_edit_title)

        # tags
        self.line_edit_tags = QLineEdit()
        self.__create_selection_row('Tags (comma-separated): ',
                                    self.line_edit_tags)

        # file selector
        self.file_info = QLabel()
        self.file_select_btn = QPushButton('Select File', self)
        self.file_select_btn.clicked.connect(self.__handle_file_selector)
        self.__create_selection_row(' ', self.file_select_btn)

        # add file info section
        self.layout.addWidget(self.file_info, alignment=Qt.AlignCenter)

        self.layout.addStretch(1)

        # Ok and Cancel buttons
        self.buttons = QDialogButtonBox(
            QDialogButtonBox.Ok | QDialogButtonBox.Cancel, Qt.Horizontal, self)
        self.layout.addWidget(self.buttons, alignment=Qt.AlignJustify)

        self.buttons.accepted.connect(self.__accept)
        self.buttons.rejected.connect(self.__close)

        # path to a selected file
        self.file_path = None

    def __on_domain_selection(self):
        selected = self.__get_selected_domain() == gene_sets_domain
        self.hierarchy_selection.setVisible(selected)

    def __get_selected_domain(self):
        domain_label = list(self.supported_domains.keys())[
            self.domain_selection.currentIndex()]
        return self.supported_domains[domain_label]

    def __get_selected_hier(self):
        hier_label = list(
            self.hierarchies.keys())[self.hierarchy_selection.currentIndex()]
        return self.hierarchies[hier_label]

    def __create_selection_row(self, label, widget):
        self.layout.addWidget(QLabel(label), alignment=Qt.AlignLeft)
        self.layout.addWidget(widget, alignment=Qt.AlignVCenter)

    def __accept(self):
        if self.file_path:
            self.info_state = self.__parse_selection()
            self.__move_to_serverfiles_folder(self.file_path)

            self.parent().initialize_files_view()
            self.close()

    def __close(self):
        self.close()

    def closeEvent(self, event):
        # clean-up
        self.parent()._dialog = None

    def __filename(self, domain, organism):
        """ Create filename based od domain name and organism.
        """

        if domain in self.supported_domains.values(
        ) and domain == gene_ontology_domain and organism:
            return FILENAME_ANNOTATION.format(organism)

        elif domain in self.supported_domains.values(
        ) and domain == gene_sets_domain and organism:
            return filename((self.__get_selected_hier()), organism)

    def __parse_selection(self):
        try:
            domain = self.__get_selected_domain()
            organism = taxname_to_taxid(self.supported_organisms[
                self.organism_selection.currentIndex()])
        except KeyError as e:
            raise e

        return {
            'domain': domain,
            'organism': organism,
            'filename': self.__filename(domain, organism),
            'title': self.line_edit_title.text(),
            'tags': self.line_edit_tags.text().split(','),
            'source': SOURCE_USER
        }

    def __move_to_serverfiles_folder(self, selected_file_path):
        domain_path = serverfiles.localpath(self.info_state['domain'])
        file_path = os.path.join(domain_path, self.info_state['filename'])
        create_folder(domain_path)

        try:
            copyfile(selected_file_path, file_path)
        except IOError as e:
            # TODO: handle error properly
            raise e

        # if copy successful create .info file
        create_info_file(file_path, **self.info_state)

    def __handle_file_selector(self):
        self.file_path = QFileDialog.getOpenFileName(self, 'Open File')[0]
        self.file_info.setText('Selected File: {}'.format(
            os.path.basename(self.file_path)))
    def __init__(self, parent=None):
        super().__init__(self, parent)

        self.input_data = None
        self.ref_data = None
        self.ontology = None
        self.annotations = None
        self.loaded_annotation_code = None
        self.treeStructRootKey = None
        self.probFunctions = [statistics.Binomial(), statistics.Hypergeometric()]
        self.selectedTerms = []

        self.selectionChanging = 0
        self.__state = State.Ready
        self.__scheduletimer = QTimer(self, singleShot=True)
        self.__scheduletimer.timeout.connect(self.__update)

        #############
        # GUI
        #############
        self.tabs = gui.tabWidget(self.controlArea)
        # Input tab
        self.inputTab = gui.createTabPage(self.tabs, "Input")
        box = gui.widgetBox(self.inputTab, "Info")
        self.infoLabel = gui.widgetLabel(box, "No data on input\n")

        gui.button(box, self, "Ontology/Annotation Info",
                   callback=self.ShowInfo,
                   tooltip="Show information on loaded ontology and annotations")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.inputTab, self, "useReferenceDataset",
            ["Entire genome", "Reference set (input)"],
            tooltips=["Use entire genome for reference",
                      "Use genes from Referece Examples input signal as reference"],
            box="Reference", callback=self.__invalidate)

        self.referenceRadioBox.buttons[1].setDisabled(True)
        gui.radioButtonsInBox(
            self.inputTab, self, "aspectIndex",
            ["Biological process", "Cellular component", "Molecular function"],
            box="Aspect", callback=self.__invalidate)

        # Filter tab
        self.filterTab = gui.createTabPage(self.tabs, "Filter")
        box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes")
        gui.checkBox(box, self, "filterByNumOfInstances", "Genes",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by number of input genes mapped to a term")
        ibox = gui.indentedBox(box)
        gui.spin(ibox, self, 'minNumOfInstances', 1, 100,
                 step=1, label='#:', labelWidth=15,
                 callback=self.FilterAndDisplayGraph,
                 callbackOnReturn=True,
                 tooltip="Min. number of input genes mapped to a term")

        gui.checkBox(box, self, "filterByPValue_nofdr", "p-value",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term p-value")

        gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue_nofdr', 1e-8, 1,
                       step=1e-8,  label='p:', labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        # use filterByPValue for FDR, as it was the default in prior versions
        gui.checkBox(box, self, "filterByPValue", "FDR",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term FDR")
        gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue', 1e-8, 1,
                       step=1e-8,  label='p:', labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        box = gui.widgetBox(box, "Significance test")

        gui.radioButtonsInBox(box, self, "probFunc", ["Binomial", "Hypergeometric"],
                              tooltips=["Use binomial distribution test",
                                        "Use hypergeometric distribution test"],
                              callback=self.__invalidate)  # TODO: only update the p values
        box = gui.widgetBox(self.filterTab, "Evidence codes in annotation",
                              addSpace=True)
        self.evidenceCheckBoxDict = {}
        for etype in go.evidenceTypesOrdered:
            ecb = QCheckBox(
                etype, toolTip=go.evidenceTypes[etype],
                checked=self.useEvidenceType[etype])
            ecb.toggled.connect(self.__on_evidenceChanged)
            box.layout().addWidget(ecb)
            self.evidenceCheckBoxDict[etype] = ecb

        # Select tab
        self.selectTab = gui.createTabPage(self.tabs, "Select")
        box = gui.radioButtonsInBox(
            self.selectTab, self, "selectionDirectAnnotation",
            ["Directly or Indirectly", "Directly"],
            box="Annotated genes",
            callback=self.ExampleSelection)

        box = gui.widgetBox(self.selectTab, "Output", addSpace=True)
        gui.radioButtonsInBox(
            box, self, "selectionDisjoint",
            btnLabels=["All selected genes",
                       "Term-specific genes",
                       "Common term genes"],
            tooltips=["Outputs genes annotated to all selected GO terms",
                      "Outputs genes that appear in only one of selected GO terms",
                      "Outputs genes common to all selected GO terms"],
            callback=self.ExampleSelection)

        # ListView for DAG, and table for significant GOIDs
        self.DAGcolumns = ['GO term', 'Cluster', 'Reference', 'p-value',
                           'FDR', 'Genes', 'Enrichment']

        self.splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(self.splitter)

        # list view
        self.listView = GOTreeWidget(self.splitter)
        self.listView.setSelectionMode(QTreeView.ExtendedSelection)
        self.listView.setAllColumnsShowFocus(1)
        self.listView.setColumnCount(len(self.DAGcolumns))
        self.listView.setHeaderLabels(self.DAGcolumns)

        self.listView.header().setSectionsClickable(True)
        self.listView.header().setSortIndicatorShown(True)
        self.listView.header().setSortIndicator(self.DAGcolumns.index('p-value'), Qt.AscendingOrder)
        self.listView.setSortingEnabled(True)
        self.listView.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))
        self.listView.setRootIsDecorated(True)

        self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged)

        # table of significant GO terms
        self.sigTerms = QTreeWidget(self.splitter)
        self.sigTerms.setColumnCount(len(self.DAGcolumns))
        self.sigTerms.setHeaderLabels(self.DAGcolumns)
        self.sigTerms.setSortingEnabled(True)
        self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection)
        self.sigTerms.header().setSortIndicator(self.DAGcolumns.index('p-value'), Qt.AscendingOrder)
        self.sigTerms.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))

        self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged)

        self.sigTableTermsSorted = []
        self.graph = {}
        self.originalGraph = None

        self.inputTab.layout().addStretch(1)
        self.filterTab.layout().addStretch(1)
        self.selectTab.layout().addStretch(1)

        class AnnotationSlot(SimpleNamespace):
            taxid = ...  # type: str
            name = ...   # type: str
            filename = ...  # type:str

            @staticmethod
            def parse_tax_id(f_name):
                return f_name.split('.')[1]

        try:
            remote_files = serverfiles.ServerFiles().listfiles(DOMAIN)
        except (ConnectTimeout, RequestException, ConnectionError):
            # TODO: Warn user about failed connection to the remote server
            remote_files = []

        self.available_annotations = [
            AnnotationSlot(
                taxid=AnnotationSlot.parse_tax_id(annotation_file),
                name=taxonomy.common_taxid_to_name(AnnotationSlot.parse_tax_id(annotation_file)),
                filename=FILENAME_ANNOTATION.format(AnnotationSlot.parse_tax_id(annotation_file))
            )
            for _, annotation_file in set(remote_files + serverfiles.listfiles(DOMAIN))
            if annotation_file != FILENAME_ONTOLOGY

        ]
        self._executor = ThreadExecutor()
    for record in g_db.select_gene_matcher_data(taxonomy_id):
        parse_gene_record(taxonomy_id, gene_mapper, record)

    with open(os.path.join(domain_path, MATCHER_FILENAME.format(taxonomy_id)), 'wb') as file:
        pickle.dump(gene_mapper, file, protocol=pickle.HIGHEST_PROTOCOL)

    uncompressed_size = os.stat(os.path.join(domain_path, MATCHER_FILENAME.format(taxonomy_id))).st_size

    with bz2.BZ2File(os.path.join(temp_path, MATCHER_FILENAME.format(taxonomy_id)), mode='w', compresslevel=9) as f:
        shutil.copyfileobj(open(os.path.join(domain_path, MATCHER_FILENAME.format(taxonomy_id)), "rb"), f)

    create_info_file(os.path.join(temp_path, MATCHER_FILENAME.format(taxonomy_id)),
                     domain=DOMAIN,
                     filename=MATCHER_FILENAME.format(taxonomy_id),
                     source=SOURCE_SERVER,
                     title=MATCHER_TITLE + ' for ' + common_taxid_to_name(taxonomy_id),
                     tags=MATCHER_TAGS + [taxonomy_id],
                     uncompressed=uncompressed_size,
                     compression='bz2')


con.close()

helper = SyncHelper(DOMAIN, GeneInfo)

# sync files with remote server
helper.run_tests()
helper.sync_files()

helper.remove_update_folder()
Exemplo n.º 8
0
    def set_data(self, data: Table) -> None:
        self.Warning.clear()
        self.data = data

        if self.data:
            if TableAnnotation.gene_as_attr_name not in self.data.attributes:
                self.Warning.mising_gene_as_attribute_name()
                self.data = None
                return
            if self.data.attributes[TableAnnotation.gene_as_attr_name]:
                if TableAnnotation.gene_id_attribute not in self.data.attributes:
                    self.Warning.mising_gene_id_attribute()
                    self.data = None
                    return

            else:
                if TableAnnotation.tax_id not in self.data.attributes:
                    self.Warning.missing_tax_id()
                    self.data = None
                    return
                if TableAnnotation.gene_id_column not in self.data.attributes:
                    self.Warning.mising_gene_as_attribute_name()
                    self.data = None
                    return
                if self.data.attributes[
                        TableAnnotation.
                        gene_id_column] not in self.data.domain:
                    self.Warning.missing_gene_id()
                    self.data = None
                    return
        else:
            self.info.set_input_summary("0")
            self.info.set_output_summary("0")
            self.info_gene.clear()
            self.info_gene_type.setText("No data on input.")
            self.Outputs.genes.send(None)

            return

        self.source_tax = data.attributes[TableAnnotation.tax_id]
        taxonomy = common_taxid_to_name(self.source_tax)
        self.target_organism.clear()
        self.target_organism.addItems([
            tax_name for tax_name in self.taxonomy_names
            if tax_name != taxonomy
        ])

        if taxonomy == self.selected_organism:
            self.combo_box_id = -1
            self.selected_organism = self.taxonomy_names[0]
            self.target_tax = species_name_to_taxid(self.selected_organism)
        else:
            try:
                self.combo_box_id = self.taxonomy_names.index(
                    self.selected_organism)
            except ValueError:
                self.combo_box_id = -1

            if self.combo_box_id != -1:
                self.target_organism.setCurrentIndex(self.combo_box_id)
                self.selected_organism = self.taxonomy_names[self.combo_box_id]
                self.target_tax = species_name_to_taxid(self.selected_organism)
            else:
                self.target_organism.setCurrentIndex(0)
                self.selected_organism = self.taxonomy_names[0]
                self.target_tax = species_name_to_taxid(self.selected_organism)

        self.info_gene_type.setText(f"Organism: {taxonomy}")
        data_len = (len(data.domain.attributes)
                    if self.data.attributes[TableAnnotation.gene_as_attr_name]
                    else len(data))
        self.info_gene.setText(f"Number of genes: {data_len}")
        self.info.set_input_summary(f"{data_len}")

        self.commit()
Exemplo n.º 9
0
    for line in gene2go:
        split_line = line.decode().split('\t')

        if split_line[0] in tax_ids:
            store_lines_by_taxid[split_line[0]].append(line)
        #else:
            #parent = taxonomy.parent(split_line[0])
            #if parent in tax_ids:
                #store_lines_by_taxid[parent].append(line)


for org, lines in store_lines_by_taxid.items():
    filename = FILENAME_ANNOTATION.format(org)
    FILE_PATH = os.path.join(domain_path, filename)
    TITLE = "GO Annotations for " + common_taxid_to_name(org)
    TAGS = ["gene", "annotation", "ontology", "GO", org]

    with open(FILE_PATH, 'wb') as f:
        f.write(header)
        f.writelines(lines)

    db_size = os.stat(FILE_PATH).st_size  # store uncompressed database size

    with bz2.BZ2File(os.path.join(temp_path, filename), mode='w', compresslevel=9) as f_compressed:
        shutil.copyfileobj(open(os.path.join(domain_path, filename), 'rb'), f_compressed)

    create_info_file(os.path.join(temp_path, filename),
                     domain=DOMAIN,
                     filename=filename,
                     source=SOURCE_SERVER,