Пример #1
0
 def load_shark_data(self):
     """ """
     try:
         self._data_tableobject = toolbox_utils.TableFileReader(
             file_path=self._file_path,
             zip_file_name=self._archive_filename,
             zip_file_entry='shark_data.txt',
         )
     except:
         self._data_tableobject = toolbox_utils.TableFileReader(
         )  # Empty object.
Пример #2
0
 def _load_plankton_group_definition(self, excel_file_name):
     """ """
     tablefilereader = toolbox_utils.TableFileReader(
         excel_file_name=excel_file_name)
     #
     for row in tablefilereader.rows():
         scientificname = ''
         try:
             scientificname = row[0].strip()  # Scientific name.
             rank = row[1].strip()  # Rank.
             planktongroup = row[2].strip()  # Plankton group.
             #
             if scientificname and planktongroup:
                 used_rank = rank
                 if not used_rank:
                     used_rank = 'scientific_name'
                 self._planktongroups_ranks_set.add(used_rank)
                 #
                 if used_rank not in self._planktongroups_rank_dict:
                     self._planktongroups_rank_dict[used_rank] = {}
                 self._planktongroups_rank_dict[used_rank][
                     scientificname] = planktongroup
         except:
             toolbox_utils.Logging().warning(
                 'Failed when loading plankton group def. File:' +
                 excel_file_name + '  Taxon: ' + scientificname)
Пример #3
0
    def _load_harmful(self, excel_file_name):
        """ Adds info about harmfulness to the species objects. """
        tablefilereader = toolbox_utils.TableFileReader(
            excel_file_name=excel_file_name)
        #
        header = tablefilereader.header()
        for row in tablefilereader.rows():
            scientific_name = ''
            accepted_name_usage = ''
            try:
                row_dict = dict(zip(header, row))
                scientific_name = row_dict.get('scientific_name', '').strip()
                accepted_name_usage = row_dict.get(
                    'accepted_name_usage',
                    '').strip()  # Valid scientific name.
                #
                if scientific_name and (scientific_name in self._taxa_lookup):
                    # print('Harmful: scientific_name: ' + scientific_name)
                    taxon = self._taxa_lookup[scientific_name]
                    taxon['harmful_name'] = scientific_name
                    taxon['harmful'] = True
                if not (scientific_name == accepted_name_usage):
                    if accepted_name_usage and (accepted_name_usage
                                                in self._taxa_lookup):
                        # print('Harmful: accepted_name_usage: ' + accepted_name_usage + ' ( scientific_name: ' + scientific_name + ')')
                        taxon = self._taxa_lookup[accepted_name_usage]
                        taxon['harmful_name'] = accepted_name_usage
                        taxon['harmful'] = True
#                 else:
#                     toolbox_utils.Logging().warning('Scientific name is missing: ' + scientific_name + '   (Source: ' + excel_file_name + ')')
            except:
                toolbox_utils.Logging().warning(
                    'Failed when loading harmful algae. File:' +
                    excel_file_name + '  Taxon: ' + scientific_name)
Пример #4
0
 def get_counting_method_table(self, path, filename):
     """ """
     tablefilereader = toolbox_utils.TableFileReader(
         file_path=path,
         text_file_name=filename,
     )
     return tablefilereader.header(), tablefilereader.rows()
Пример #5
0
 def load_shark_metadata(self):
     """ """
     try:
         self._metadata_dict = {}
         metadata_tableobject = toolbox_utils.TableFileReader(
             file_path=self._file_path,
             zip_file_name=self._archive_filename,
             zip_file_entry='shark_metadata.txt',
             select_columns_by_index=[0],
         )
         # Metadata is a key/value list with no header. Merge header and row.
         concat_table = [metadata_tableobject.header()
                         ] + metadata_tableobject.rows()
         concat_table = map('\t'.join, concat_table)
         self._metadata_text = '\r\n'.join(concat_table)
     except:
         self._metadata_dict = toolbox_utils.TableFileReader(
         )  # Empty object.
    def import_text_file(self, filename, textfile_encoding):
        """ """
        # Select import format.
        formatparser = plankton_core.FormatSingleFile()
        # Phase 1: Read file into a temporary table.

        sheetname = None
        headerrow = 1
        datarowsfrom = 2
        #
        for rowdict in self._importrows:
            if rowdict['node'] == 'info':
                if rowdict['key'] == 'header_row':
                    headerrow = int(
                        float(rowdict.get('command', '1').replace(',', '.')))
                    if headerrow: headerrow -= 1
                if rowdict['key'] == 'first_data_row':
                    datarowsfrom = int(
                        float(rowdict.get('command', '2').replace(',', '.')))
                    if datarowsfrom: datarowsfrom -= 1

        tablefilereader = toolbox_utils.TableFileReader(
            text_file_name=filename,
            encoding=textfile_encoding,
            header_row=headerrow,
            data_rows_from=datarowsfrom)
        tabledataset = plankton_core.DatasetTable()
        tabledataset.set_header(tablefilereader.header())
        for row in tablefilereader.rows():
            tabledataset.append_row(row)
        #
        toolbox_utils.Logging().info('Loading file. Header content: ' +
                                     str(tabledataset.get_header()))

        # Phase 2: Parse the table and create a corresponding tree structure.
        targetdataset = plankton_core.DatasetNode()
        #
        targetdataset.set_dataset_parser_rows(self._importrows)
        targetdataset.set_export_table_columns(self._columnsinfo)
        #
        formatparser.parse_table_dataset(targetdataset, tabledataset)
        # Phase 3: Reorganize between nodes in tree structure.
        formatparser.reorganize_dataset()
        # Phase 4: Reformat fields in tree structure.
        formatparser.reformat_dataset()
        # Phase 5: Perform basic screening.
        formatparser.basic_screening()
        #
        return targetdataset
    def _load_parser_info(self):
        """ """
        # Read dataset parser.
        tablefilereader = toolbox_utils.TableFileReader(
            excel_file_name=self._parser_file_path)
        tabledata = plankton_core.DatasetTable()
        tabledata.set_header(tablefilereader.header())
        for row in tablefilereader.rows():
            tabledata.append_row(row)
        # Create import info.
        if self._import_column:
            #            self.addMetadata('Import column', self._import_column)
            self._importrows = []
            for rowindex in range(0, tabledata.get_row_count()):
                importcolumndata = tabledata.get_data_item_by_column_name(
                    rowindex, self._import_column)
                if importcolumndata:
                    nodelevel = tabledata.get_data_item(rowindex, 0)
                    key = tabledata.get_data_item(rowindex, 1)
                    viewformat = tabledata.get_data_item(rowindex, 2)
                    self._importrows.append({
                        'node': nodelevel,
                        'key': key,
                        'view_format': viewformat,
                        'command': importcolumndata
                    })
#            self.set_dataset_parser_rows(self._importrows)
# Create export info.
        if self._export_column:
            #            self.addMetadata('Export column', self._export_column)
            self._columnsinfo = []
            for rowindex in range(0, tabledata.get_row_count()):
                exportcolumndata = tabledata.get_data_item_by_column_name(
                    rowindex, self._export_column)
                if exportcolumndata:
                    nodelevel = tabledata.get_data_item(rowindex, 0)
                    if nodelevel != 'info':
                        key = tabledata.get_data_item(rowindex, 1)
                        viewformat = tabledata.get_data_item(rowindex, 2)
                        self._columnsinfo.append({
                            'header': exportcolumndata,
                            'node': nodelevel,
                            'key': key,
                            'view_format': viewformat
                        })
Пример #8
0
    def _load_trophic_types(self, excel_file_name):
        """ Adds trophic type info to the species objects. """
        tablefilereader = toolbox_utils.TableFileReader(
            excel_file_name=excel_file_name)
        #
        for row in tablefilereader.rows():
            scientificname = ''
            try:
                scientificname = row[0].strip()  # Scientific name.
                sizeclass = row[1].strip()  # Size class.
                trophictype = row[2].strip()  # Trophic type.
                #
                if scientificname in self._taxa_lookup:
                    taxon = self._taxa_lookup[scientificname]
                    #
                    if sizeclass:
                        #                         sizeclassfound = False
                        if 'size_classes' in taxon:
                            for sizeclassdict in taxon['size_classes']:
                                if sizeclassdict.get('bvol_size_class',
                                                     '') == sizeclass:
                                    if sizeclassdict.get('trophic_type', ''):
                                        if scientificname == taxon[
                                                'scientific_name']:
                                            #                                             toolbox_utils.Logging().warning('Same taxon/size on multiple rows: ' + scientificname + ' Size: ' + sizeclass + '   (Source: ' + excel_file_name + ')')
                                            #                                             sizeclassfound = True
                                            break
                                    #
                                    sizeclassdict['trophic_type'] = trophictype
                                    #                                     sizeclassfound = True
                                    break
                        #
#                         if sizeclassfound == False:
#                             toolbox_utils.Logging().warning('Size class is missing: ' + scientificname + ' Size: ' + sizeclass + '   (Source: ' + excel_file_name + ')')
                    else:
                        # No sizeclass in indata file. Put on species level.
                        taxon['trophic_type'] = trophictype
                else:
                    #                     toolbox_utils.Logging().warning('Scientific name is missing: ' + scientificname + '   (Source: ' + excel_file_name + ')')
                    pass
            except:
                toolbox_utils.Logging().warning(
                    'Failed when loading trophic types. File:' +
                    excel_file_name + '  Taxon: ' + scientificname)
Пример #9
0
    def _load_taxa(self, excel_file_name):
        """ Creates one data object for each taxon. """
        tablefilereader = toolbox_utils.TableFileReader(
            excel_file_name=excel_file_name)
        #
        header = tablefilereader.header()
        for row in tablefilereader.rows():

            row_dict = dict(zip(header, row))

            scientificname = ''
            try:
                #                 scientificname = row[0].strip() # ScientificName.
                #                 author = row[1].strip() if row[1].strip() != 'NULL' else '' # Author.
                #                 rank = row[2].strip() # Rank.
                #                 parentname = row[3].strip() # Parent.
                scientificname = row_dict.get('scientific_name',
                                              '').strip()  # ScientificName.
                author = row_dict.get('author', '').strip()  # Author.
                rank = row_dict.get('rank', '').strip()  # Rank.
                parentname = row_dict.get('parent_name', '').strip()  # Parent.
                #
                if scientificname:
                    if scientificname not in self._taxa:
                        self._taxa[scientificname] = {}
                        # Lookup dictionary.
                        self._taxa_lookup[scientificname] = self._taxa[
                            scientificname]
                    else:
                        toolbox_utils.Logging().warning(
                            'Scientific name added twice: ' + scientificname +
                            '   (Source: ' + excel_file_name + ')')
                    #
                    speciesobject = self._taxa[scientificname]
                    speciesobject['scientific_name'] = scientificname
                    speciesobject['author'] = author
                    speciesobject['rank'] = rank
                    speciesobject['parent_name'] = parentname
            except:
                toolbox_utils.Logging().warning(
                    'Failed when loading taxa. File:' + excel_file_name +
                    '  Taxon: ' + scientificname)
Пример #10
0
 def _load_bvol_columns(self, excel_file_name):
     """ """
     tablefilereader = toolbox_utils.TableFileReader(
         excel_file_name=excel_file_name)
     #
     for row in tablefilereader.rows():
         columnname = ''
         try:
             # Header: column_name, used_on_rank_level, numeric, internal_toolbox_name.
             columnname = row[0].strip()
             level = row[1].strip()
             numeric = row[2].strip()
             internalname = row[3].strip()
             #
             if columnname and level and internalname:
                 self._bvolcolumns_dict[columnname] = (level, numeric,
                                                       internalname)
         except:
             toolbox_utils.Logging().warning(
                 'Failed when loading BVOL columns. Column name: ' +
                 columnname)
Пример #11
0
    def get_counting_species_table(self, counting_species_file_name):
        """ """
        # Use all prealoaded species.
        if counting_species_file_name == '<valid taxa>':
            #         if counting_species_file_name == '<all species>':
            species_list_of_list = []
            #             for key in sorted(plankton_core.Species().get_taxa_lookup_dict().keys()):
            for key in sorted(plankton_core.Species().get_taxa_dict().keys()):
                species_list_of_list.append([key])
            return ['scientific_name'], species_list_of_list

        # Read stored species file.
        filepath = os.path.join(self._methods_species_lists_dir_path,
                                counting_species_file_name + '.txt')
        if os.path.isfile(filepath):
            tablefilereader = toolbox_utils.TableFileReader(
                file_path=self._methods_species_lists_dir_path,
                text_file_name=counting_species_file_name + '.txt',
            )
            return tablefilereader.header(), tablefilereader.rows()
        else:
            return [], []
Пример #12
0
    def _load_synonyms(self, excel_file_name):
        """ Add synonyms from 'translate_' or 'synonyms_' files. """
        tablefilereader = toolbox_utils.TableFileReader(
            excel_file_name=excel_file_name)
        #
        for row in tablefilereader.rows():
            toname = ''
            fromname = ''
            try:
                toname = row[1].strip()
                fromname = row[0].strip()
                #

                # Check if from name is a valid name.
                if fromname in self._taxa_lookup:
                    toolbox_utils.Logging().warning(
                        'Invalid translate (valid taxa in first column): ' +
                        fromname + '   (Source: ' + excel_file_name + ')')
                    continue

                #
                if toname in self._taxa_lookup:
                    taxon = self._taxa_lookup[toname]
                    if not 'synonyms' in self._taxa[toname]:
                        taxon['synonyms'] = []
                    taxon['synonyms'].append(fromname)
                    # Lookup dictionary.
                    self._taxa_lookup[fromname] = self._taxa[toname]
                else:
                    toolbox_utils.Logging().warning(
                        'Scientific name is missing: ' + toname +
                        '   (Source: ' + excel_file_name + ')')
            except:
                toolbox_utils.Logging().warning(
                    'Failed when loading translates/synonyms. File:' +
                    excel_file_name + '  From taxon: ' + toname)
Пример #13
0
    def _load_bvol(self, excel_file_name):
        """ Adds BVOL data to species objects. Creates additional species objects if missing 
            (i.e. for Unicell, Flagellates). """
        # Import size class data.
        tablefilereader = toolbox_utils.TableFileReader(
            excel_file_name=excel_file_name)
        #
        # Create header list for mapping and translations.
        headerinfo = []  # Contains used columns only.
        for columnindex, columnname in enumerate(tablefilereader.header()):
            # Use loaded information on used columns.
            if columnname in self._bvolcolumns_dict:
                level, numeric, internalname = self._bvolcolumns_dict[
                    columnname]
                headerinfo.append(
                    (columnindex, columnname, level, numeric, internalname))
        #
        for row in tablefilereader.rows():
            taxondict = {}
            sizeclassdict = {}
            try:
                ###                for column, value in enumerate(row):
                for columnindex, columnname, level, numeric, internalname in headerinfo:
                    value = row[columnindex].strip()

                    if len(value) > 0:
                        # Separate columns contains taxon and size-class related info.
                        if level == 'taxon':
                            #                         if level == 'scientific_name':
                            taxondict[internalname] = value
                        elif level == 'size_class':
                            if (internalname == 'bvol_size_class'):
                                try:
                                    # Convert from float to integer and back to str. Excel related problem.
                                    sizeclassdict[internalname] = str(
                                        int(float(value)))
                                except:
                                    sizeclassdict[internalname] = '<ERROR>'
                            #
                            if numeric == 'numeric':
                                try:
                                    value = value.replace(',', '.').replace(
                                        ' ',
                                        '')  # Try/except if already float.
                                    value = float(value)
                                    # Round float values.
                                    n = 4  # Number of significant digits.
                                    if value != 0.0:
                                        if value >= 1000.0:
                                            value = round(value, 1)
                                        else:
                                            value = round(
                                                value, -int(
                                                    math.floor(
                                                        math.log10(
                                                            abs(value)))) +
                                                (n - 1))
                                except:
                                    pass
                                sizeclassdict[internalname] = str(value)
                            else:
                                sizeclassdict[internalname] = str(value)
                # Check if exists in self._taxa
                if 'bvol_species' in taxondict:
                    scientificname = taxondict['bvol_species']
                    if scientificname in self._taxa_lookup:
                        speciesobject = self._taxa_lookup[scientificname]
                    else:
                        size = sizeclassdict.get('bvol_size_class', '')
                        toolbox_utils.Logging().warning(
                            'Scientific name is missing: ' + scientificname +
                            '   Size: ' + size + '   (Source: ' +
                            excel_file_name + ')')
                        continue  # Only add BVOL info if taxon exists in taxa.
                    #
                    speciesobject['bvol_name'] = scientificname
                    #
                    if 'size_classes' not in speciesobject:
                        speciesobject['size_classes'] = []
                        # Add other bvol data to taxon.
                        for key in taxondict.keys():
                            speciesobject[key] = taxondict[key]
                    #

                    # Check if size class already exists.
                    for old_sizeclassdict in speciesobject['size_classes']:
                        if old_sizeclassdict.get('bvol_size_class',
                                                 '') == sizeclassdict.get(
                                                     'bvol_size_class', ''):
                            toolbox_utils.Logging().warning(
                                'Size-class already exists for: ' +
                                scientificname + '   Size: ' +
                                sizeclassdict.get('bvol_size_class', '') +
                                '   (Source: ' + excel_file_name + ')')
                    #
                    speciesobject['size_classes'].append(sizeclassdict)
            except:
                toolbox_utils.Logging().warning(
                    'Failed when loading BVOL data.')
    def read_excel_file(self, excel_file_path=None):
        """ """
        if excel_file_path == None:
            raise UserWarning('Excel file is missing.')
        #
        dir_path = plankton_core.PlanktonCounterManager().get_dataset_dir_path(
        )
        #
        if (not excel_file_path) or (not os.path.isfile(excel_file_path)):
            raise UserWarning('Excel file does not exists.')
        #
        self._dataset_metadata = {}
        self._sample_info = {}
        self._sample_header = []
        self._sample_rows = []
        self._sample_method_dict = {}

        # Dataset metadata as <key>:<value>.
        #         try:
        #             tablefilereader = toolbox_utils.TableFileReader(
        #                     excel_file_name = excel_file_path,
        #                     excel_sheet_name = 'dataset_metadata.txt',
        #                     )
        #             # Merge header and rows. Create dict.
        #             dataset_metadata = [tablefilereader.header()] + tablefilereader.rows()
        #             for row in dataset_metadata:
        #                 if len(row) >= 2:
        #                     self._dataset_metadata[row[0].strip()] = row[1].strip()
        #         except:
        #             pass

        # Sample info as <key>:<value>.
        tablefilereader = toolbox_utils.TableFileReader(
            excel_file_name=excel_file_path,
            excel_sheet_name='sample_info.txt',
        )
        # Merge header and rows. Create dict from ':'-separated rows.
        sample_info = [tablefilereader.header()] + tablefilereader.rows()
        for row in sample_info:
            if len(row) >= 2:
                self._sample_info[row[0].strip()] = row[1].strip()

        # Sample data on table format.
        tablefilereader = toolbox_utils.TableFileReader(
            excel_file_name=excel_file_path,
            excel_sheet_name='sample_data.txt',
        )
        self._sample_header = tablefilereader.header()
        self._sample_rows = tablefilereader.rows()

        # Sample method on table format.
        tablefilereader = toolbox_utils.TableFileReader(
            excel_file_name=excel_file_path,
            excel_sheet_name='counting_method.txt',
        )
        self._sample_method_header = tablefilereader.header()
        self._sample_method_rows = tablefilereader.rows()
        # Create dictionary with method step as key.
        self._sample_method_dict = {}
        for row in self._sample_method_rows:
            method_dict = dict(zip(self._sample_method_header, row))
            if 'counting_method_step' in method_dict:
                self._sample_method_dict[
                    method_dict['counting_method_step']] = method_dict
    def read_file(self, dataset_name=None, sample_name=None):
        """ """
        if dataset_name == None:
            raise UserWarning('Dataset name is missing.')
        if sample_name == None:
            raise UserWarning('Sample name is missing.')
        #
        dir_path = plankton_core.PlanktonCounterManager().get_dataset_dir_path(
        )
        dataset_path = os.path.join(dir_path, dataset_name)
        sample_path = os.path.join(dataset_path, sample_name)
        #
        if (not dataset_path) or (not os.path.exists(dataset_path)):
            raise UserWarning('Dataset files are missing.')
        if (not sample_path) or (not os.path.exists(sample_path)):
            raise UserWarning('Sample files are missing.')
        #
        self._dataset_metadata = {}
        self._sample_info = {}
        self._sample_header = []
        self._sample_rows = []
        self._sample_method_dict = {}

        # Dataset metadata as <key>:<value>.
        try:
            tablefilereader = toolbox_utils.TableFileReader(
                file_path=dataset_path,
                text_file_name='dataset_metadata.txt',
            )
            # Merge header and rows. Create dict.
            dataset_metadata = [tablefilereader.header()
                                ] + tablefilereader.rows()
            for row in dataset_metadata:
                if len(row) >= 2:
                    self._dataset_metadata[row[0].strip()] = row[1].strip()
        except:
            pass

        # Sample info as <key>:<value>.
        tablefilereader = toolbox_utils.TableFileReader(
            file_path=sample_path,
            text_file_name='sample_info.txt',
        )
        # Merge header and rows. Create dict from ':'-separated rows.
        sample_info = [tablefilereader.header()] + tablefilereader.rows()
        for row in sample_info:
            if len(row) >= 2:
                self._sample_info[row[0].strip()] = row[1].strip()

        # Sample data on table format.
        tablefilereader = toolbox_utils.TableFileReader(
            file_path=sample_path,
            text_file_name='sample_data.txt',
        )
        self._sample_header = tablefilereader.header()
        self._sample_rows = tablefilereader.rows()

        # Sample method on table format.
        tablefilereader = toolbox_utils.TableFileReader(
            file_path=sample_path,
            text_file_name='counting_method.txt',
        )
        self._sample_method_header = tablefilereader.header()
        self._sample_method_rows = tablefilereader.rows()
        # Create dictionary with method step as key.
        self._sample_method_dict = {}
        for row in self._sample_method_rows:
            method_dict = dict(zip(self._sample_method_header, row))
            if 'counting_method_step' in method_dict:
                self._sample_method_dict[
                    method_dict['counting_method_step']] = method_dict