def load_shark_data(self): """ """ try: self._data_tableobject = toolbox_utils.TableFileReader( file_path=self._file_path, zip_file_name=self._archive_filename, zip_file_entry='shark_data.txt', ) except: self._data_tableobject = toolbox_utils.TableFileReader( ) # Empty object.
def _load_plankton_group_definition(self, excel_file_name): """ """ tablefilereader = toolbox_utils.TableFileReader( excel_file_name=excel_file_name) # for row in tablefilereader.rows(): scientificname = '' try: scientificname = row[0].strip() # Scientific name. rank = row[1].strip() # Rank. planktongroup = row[2].strip() # Plankton group. # if scientificname and planktongroup: used_rank = rank if not used_rank: used_rank = 'scientific_name' self._planktongroups_ranks_set.add(used_rank) # if used_rank not in self._planktongroups_rank_dict: self._planktongroups_rank_dict[used_rank] = {} self._planktongroups_rank_dict[used_rank][ scientificname] = planktongroup except: toolbox_utils.Logging().warning( 'Failed when loading plankton group def. File:' + excel_file_name + ' Taxon: ' + scientificname)
def _load_harmful(self, excel_file_name): """ Adds info about harmfulness to the species objects. """ tablefilereader = toolbox_utils.TableFileReader( excel_file_name=excel_file_name) # header = tablefilereader.header() for row in tablefilereader.rows(): scientific_name = '' accepted_name_usage = '' try: row_dict = dict(zip(header, row)) scientific_name = row_dict.get('scientific_name', '').strip() accepted_name_usage = row_dict.get( 'accepted_name_usage', '').strip() # Valid scientific name. # if scientific_name and (scientific_name in self._taxa_lookup): # print('Harmful: scientific_name: ' + scientific_name) taxon = self._taxa_lookup[scientific_name] taxon['harmful_name'] = scientific_name taxon['harmful'] = True if not (scientific_name == accepted_name_usage): if accepted_name_usage and (accepted_name_usage in self._taxa_lookup): # print('Harmful: accepted_name_usage: ' + accepted_name_usage + ' ( scientific_name: ' + scientific_name + ')') taxon = self._taxa_lookup[accepted_name_usage] taxon['harmful_name'] = accepted_name_usage taxon['harmful'] = True # else: # toolbox_utils.Logging().warning('Scientific name is missing: ' + scientific_name + ' (Source: ' + excel_file_name + ')') except: toolbox_utils.Logging().warning( 'Failed when loading harmful algae. File:' + excel_file_name + ' Taxon: ' + scientific_name)
def get_counting_method_table(self, path, filename): """ """ tablefilereader = toolbox_utils.TableFileReader( file_path=path, text_file_name=filename, ) return tablefilereader.header(), tablefilereader.rows()
def load_shark_metadata(self): """ """ try: self._metadata_dict = {} metadata_tableobject = toolbox_utils.TableFileReader( file_path=self._file_path, zip_file_name=self._archive_filename, zip_file_entry='shark_metadata.txt', select_columns_by_index=[0], ) # Metadata is a key/value list with no header. Merge header and row. concat_table = [metadata_tableobject.header() ] + metadata_tableobject.rows() concat_table = map('\t'.join, concat_table) self._metadata_text = '\r\n'.join(concat_table) except: self._metadata_dict = toolbox_utils.TableFileReader( ) # Empty object.
def import_text_file(self, filename, textfile_encoding): """ """ # Select import format. formatparser = plankton_core.FormatSingleFile() # Phase 1: Read file into a temporary table. sheetname = None headerrow = 1 datarowsfrom = 2 # for rowdict in self._importrows: if rowdict['node'] == 'info': if rowdict['key'] == 'header_row': headerrow = int( float(rowdict.get('command', '1').replace(',', '.'))) if headerrow: headerrow -= 1 if rowdict['key'] == 'first_data_row': datarowsfrom = int( float(rowdict.get('command', '2').replace(',', '.'))) if datarowsfrom: datarowsfrom -= 1 tablefilereader = toolbox_utils.TableFileReader( text_file_name=filename, encoding=textfile_encoding, header_row=headerrow, data_rows_from=datarowsfrom) tabledataset = plankton_core.DatasetTable() tabledataset.set_header(tablefilereader.header()) for row in tablefilereader.rows(): tabledataset.append_row(row) # toolbox_utils.Logging().info('Loading file. Header content: ' + str(tabledataset.get_header())) # Phase 2: Parse the table and create a corresponding tree structure. targetdataset = plankton_core.DatasetNode() # targetdataset.set_dataset_parser_rows(self._importrows) targetdataset.set_export_table_columns(self._columnsinfo) # formatparser.parse_table_dataset(targetdataset, tabledataset) # Phase 3: Reorganize between nodes in tree structure. formatparser.reorganize_dataset() # Phase 4: Reformat fields in tree structure. formatparser.reformat_dataset() # Phase 5: Perform basic screening. formatparser.basic_screening() # return targetdataset
def _load_parser_info(self): """ """ # Read dataset parser. tablefilereader = toolbox_utils.TableFileReader( excel_file_name=self._parser_file_path) tabledata = plankton_core.DatasetTable() tabledata.set_header(tablefilereader.header()) for row in tablefilereader.rows(): tabledata.append_row(row) # Create import info. if self._import_column: # self.addMetadata('Import column', self._import_column) self._importrows = [] for rowindex in range(0, tabledata.get_row_count()): importcolumndata = tabledata.get_data_item_by_column_name( rowindex, self._import_column) if importcolumndata: nodelevel = tabledata.get_data_item(rowindex, 0) key = tabledata.get_data_item(rowindex, 1) viewformat = tabledata.get_data_item(rowindex, 2) self._importrows.append({ 'node': nodelevel, 'key': key, 'view_format': viewformat, 'command': importcolumndata }) # self.set_dataset_parser_rows(self._importrows) # Create export info. if self._export_column: # self.addMetadata('Export column', self._export_column) self._columnsinfo = [] for rowindex in range(0, tabledata.get_row_count()): exportcolumndata = tabledata.get_data_item_by_column_name( rowindex, self._export_column) if exportcolumndata: nodelevel = tabledata.get_data_item(rowindex, 0) if nodelevel != 'info': key = tabledata.get_data_item(rowindex, 1) viewformat = tabledata.get_data_item(rowindex, 2) self._columnsinfo.append({ 'header': exportcolumndata, 'node': nodelevel, 'key': key, 'view_format': viewformat })
def _load_trophic_types(self, excel_file_name): """ Adds trophic type info to the species objects. """ tablefilereader = toolbox_utils.TableFileReader( excel_file_name=excel_file_name) # for row in tablefilereader.rows(): scientificname = '' try: scientificname = row[0].strip() # Scientific name. sizeclass = row[1].strip() # Size class. trophictype = row[2].strip() # Trophic type. # if scientificname in self._taxa_lookup: taxon = self._taxa_lookup[scientificname] # if sizeclass: # sizeclassfound = False if 'size_classes' in taxon: for sizeclassdict in taxon['size_classes']: if sizeclassdict.get('bvol_size_class', '') == sizeclass: if sizeclassdict.get('trophic_type', ''): if scientificname == taxon[ 'scientific_name']: # toolbox_utils.Logging().warning('Same taxon/size on multiple rows: ' + scientificname + ' Size: ' + sizeclass + ' (Source: ' + excel_file_name + ')') # sizeclassfound = True break # sizeclassdict['trophic_type'] = trophictype # sizeclassfound = True break # # if sizeclassfound == False: # toolbox_utils.Logging().warning('Size class is missing: ' + scientificname + ' Size: ' + sizeclass + ' (Source: ' + excel_file_name + ')') else: # No sizeclass in indata file. Put on species level. taxon['trophic_type'] = trophictype else: # toolbox_utils.Logging().warning('Scientific name is missing: ' + scientificname + ' (Source: ' + excel_file_name + ')') pass except: toolbox_utils.Logging().warning( 'Failed when loading trophic types. File:' + excel_file_name + ' Taxon: ' + scientificname)
def _load_taxa(self, excel_file_name): """ Creates one data object for each taxon. """ tablefilereader = toolbox_utils.TableFileReader( excel_file_name=excel_file_name) # header = tablefilereader.header() for row in tablefilereader.rows(): row_dict = dict(zip(header, row)) scientificname = '' try: # scientificname = row[0].strip() # ScientificName. # author = row[1].strip() if row[1].strip() != 'NULL' else '' # Author. # rank = row[2].strip() # Rank. # parentname = row[3].strip() # Parent. scientificname = row_dict.get('scientific_name', '').strip() # ScientificName. author = row_dict.get('author', '').strip() # Author. rank = row_dict.get('rank', '').strip() # Rank. parentname = row_dict.get('parent_name', '').strip() # Parent. # if scientificname: if scientificname not in self._taxa: self._taxa[scientificname] = {} # Lookup dictionary. self._taxa_lookup[scientificname] = self._taxa[ scientificname] else: toolbox_utils.Logging().warning( 'Scientific name added twice: ' + scientificname + ' (Source: ' + excel_file_name + ')') # speciesobject = self._taxa[scientificname] speciesobject['scientific_name'] = scientificname speciesobject['author'] = author speciesobject['rank'] = rank speciesobject['parent_name'] = parentname except: toolbox_utils.Logging().warning( 'Failed when loading taxa. File:' + excel_file_name + ' Taxon: ' + scientificname)
def _load_bvol_columns(self, excel_file_name): """ """ tablefilereader = toolbox_utils.TableFileReader( excel_file_name=excel_file_name) # for row in tablefilereader.rows(): columnname = '' try: # Header: column_name, used_on_rank_level, numeric, internal_toolbox_name. columnname = row[0].strip() level = row[1].strip() numeric = row[2].strip() internalname = row[3].strip() # if columnname and level and internalname: self._bvolcolumns_dict[columnname] = (level, numeric, internalname) except: toolbox_utils.Logging().warning( 'Failed when loading BVOL columns. Column name: ' + columnname)
def get_counting_species_table(self, counting_species_file_name): """ """ # Use all prealoaded species. if counting_species_file_name == '<valid taxa>': # if counting_species_file_name == '<all species>': species_list_of_list = [] # for key in sorted(plankton_core.Species().get_taxa_lookup_dict().keys()): for key in sorted(plankton_core.Species().get_taxa_dict().keys()): species_list_of_list.append([key]) return ['scientific_name'], species_list_of_list # Read stored species file. filepath = os.path.join(self._methods_species_lists_dir_path, counting_species_file_name + '.txt') if os.path.isfile(filepath): tablefilereader = toolbox_utils.TableFileReader( file_path=self._methods_species_lists_dir_path, text_file_name=counting_species_file_name + '.txt', ) return tablefilereader.header(), tablefilereader.rows() else: return [], []
def _load_synonyms(self, excel_file_name): """ Add synonyms from 'translate_' or 'synonyms_' files. """ tablefilereader = toolbox_utils.TableFileReader( excel_file_name=excel_file_name) # for row in tablefilereader.rows(): toname = '' fromname = '' try: toname = row[1].strip() fromname = row[0].strip() # # Check if from name is a valid name. if fromname in self._taxa_lookup: toolbox_utils.Logging().warning( 'Invalid translate (valid taxa in first column): ' + fromname + ' (Source: ' + excel_file_name + ')') continue # if toname in self._taxa_lookup: taxon = self._taxa_lookup[toname] if not 'synonyms' in self._taxa[toname]: taxon['synonyms'] = [] taxon['synonyms'].append(fromname) # Lookup dictionary. self._taxa_lookup[fromname] = self._taxa[toname] else: toolbox_utils.Logging().warning( 'Scientific name is missing: ' + toname + ' (Source: ' + excel_file_name + ')') except: toolbox_utils.Logging().warning( 'Failed when loading translates/synonyms. File:' + excel_file_name + ' From taxon: ' + toname)
def _load_bvol(self, excel_file_name): """ Adds BVOL data to species objects. Creates additional species objects if missing (i.e. for Unicell, Flagellates). """ # Import size class data. tablefilereader = toolbox_utils.TableFileReader( excel_file_name=excel_file_name) # # Create header list for mapping and translations. headerinfo = [] # Contains used columns only. for columnindex, columnname in enumerate(tablefilereader.header()): # Use loaded information on used columns. if columnname in self._bvolcolumns_dict: level, numeric, internalname = self._bvolcolumns_dict[ columnname] headerinfo.append( (columnindex, columnname, level, numeric, internalname)) # for row in tablefilereader.rows(): taxondict = {} sizeclassdict = {} try: ### for column, value in enumerate(row): for columnindex, columnname, level, numeric, internalname in headerinfo: value = row[columnindex].strip() if len(value) > 0: # Separate columns contains taxon and size-class related info. if level == 'taxon': # if level == 'scientific_name': taxondict[internalname] = value elif level == 'size_class': if (internalname == 'bvol_size_class'): try: # Convert from float to integer and back to str. Excel related problem. sizeclassdict[internalname] = str( int(float(value))) except: sizeclassdict[internalname] = '<ERROR>' # if numeric == 'numeric': try: value = value.replace(',', '.').replace( ' ', '') # Try/except if already float. value = float(value) # Round float values. n = 4 # Number of significant digits. if value != 0.0: if value >= 1000.0: value = round(value, 1) else: value = round( value, -int( math.floor( math.log10( abs(value)))) + (n - 1)) except: pass sizeclassdict[internalname] = str(value) else: sizeclassdict[internalname] = str(value) # Check if exists in self._taxa if 'bvol_species' in taxondict: scientificname = taxondict['bvol_species'] if scientificname in self._taxa_lookup: speciesobject = self._taxa_lookup[scientificname] else: size = sizeclassdict.get('bvol_size_class', '') toolbox_utils.Logging().warning( 'Scientific name is missing: ' + scientificname + ' Size: ' + size + ' (Source: ' + excel_file_name + ')') continue # Only add BVOL info if taxon exists in taxa. # speciesobject['bvol_name'] = scientificname # if 'size_classes' not in speciesobject: speciesobject['size_classes'] = [] # Add other bvol data to taxon. for key in taxondict.keys(): speciesobject[key] = taxondict[key] # # Check if size class already exists. for old_sizeclassdict in speciesobject['size_classes']: if old_sizeclassdict.get('bvol_size_class', '') == sizeclassdict.get( 'bvol_size_class', ''): toolbox_utils.Logging().warning( 'Size-class already exists for: ' + scientificname + ' Size: ' + sizeclassdict.get('bvol_size_class', '') + ' (Source: ' + excel_file_name + ')') # speciesobject['size_classes'].append(sizeclassdict) except: toolbox_utils.Logging().warning( 'Failed when loading BVOL data.')
def read_excel_file(self, excel_file_path=None): """ """ if excel_file_path == None: raise UserWarning('Excel file is missing.') # dir_path = plankton_core.PlanktonCounterManager().get_dataset_dir_path( ) # if (not excel_file_path) or (not os.path.isfile(excel_file_path)): raise UserWarning('Excel file does not exists.') # self._dataset_metadata = {} self._sample_info = {} self._sample_header = [] self._sample_rows = [] self._sample_method_dict = {} # Dataset metadata as <key>:<value>. # try: # tablefilereader = toolbox_utils.TableFileReader( # excel_file_name = excel_file_path, # excel_sheet_name = 'dataset_metadata.txt', # ) # # Merge header and rows. Create dict. # dataset_metadata = [tablefilereader.header()] + tablefilereader.rows() # for row in dataset_metadata: # if len(row) >= 2: # self._dataset_metadata[row[0].strip()] = row[1].strip() # except: # pass # Sample info as <key>:<value>. tablefilereader = toolbox_utils.TableFileReader( excel_file_name=excel_file_path, excel_sheet_name='sample_info.txt', ) # Merge header and rows. Create dict from ':'-separated rows. sample_info = [tablefilereader.header()] + tablefilereader.rows() for row in sample_info: if len(row) >= 2: self._sample_info[row[0].strip()] = row[1].strip() # Sample data on table format. tablefilereader = toolbox_utils.TableFileReader( excel_file_name=excel_file_path, excel_sheet_name='sample_data.txt', ) self._sample_header = tablefilereader.header() self._sample_rows = tablefilereader.rows() # Sample method on table format. tablefilereader = toolbox_utils.TableFileReader( excel_file_name=excel_file_path, excel_sheet_name='counting_method.txt', ) self._sample_method_header = tablefilereader.header() self._sample_method_rows = tablefilereader.rows() # Create dictionary with method step as key. self._sample_method_dict = {} for row in self._sample_method_rows: method_dict = dict(zip(self._sample_method_header, row)) if 'counting_method_step' in method_dict: self._sample_method_dict[ method_dict['counting_method_step']] = method_dict
def read_file(self, dataset_name=None, sample_name=None): """ """ if dataset_name == None: raise UserWarning('Dataset name is missing.') if sample_name == None: raise UserWarning('Sample name is missing.') # dir_path = plankton_core.PlanktonCounterManager().get_dataset_dir_path( ) dataset_path = os.path.join(dir_path, dataset_name) sample_path = os.path.join(dataset_path, sample_name) # if (not dataset_path) or (not os.path.exists(dataset_path)): raise UserWarning('Dataset files are missing.') if (not sample_path) or (not os.path.exists(sample_path)): raise UserWarning('Sample files are missing.') # self._dataset_metadata = {} self._sample_info = {} self._sample_header = [] self._sample_rows = [] self._sample_method_dict = {} # Dataset metadata as <key>:<value>. try: tablefilereader = toolbox_utils.TableFileReader( file_path=dataset_path, text_file_name='dataset_metadata.txt', ) # Merge header and rows. Create dict. dataset_metadata = [tablefilereader.header() ] + tablefilereader.rows() for row in dataset_metadata: if len(row) >= 2: self._dataset_metadata[row[0].strip()] = row[1].strip() except: pass # Sample info as <key>:<value>. tablefilereader = toolbox_utils.TableFileReader( file_path=sample_path, text_file_name='sample_info.txt', ) # Merge header and rows. Create dict from ':'-separated rows. sample_info = [tablefilereader.header()] + tablefilereader.rows() for row in sample_info: if len(row) >= 2: self._sample_info[row[0].strip()] = row[1].strip() # Sample data on table format. tablefilereader = toolbox_utils.TableFileReader( file_path=sample_path, text_file_name='sample_data.txt', ) self._sample_header = tablefilereader.header() self._sample_rows = tablefilereader.rows() # Sample method on table format. tablefilereader = toolbox_utils.TableFileReader( file_path=sample_path, text_file_name='counting_method.txt', ) self._sample_method_header = tablefilereader.header() self._sample_method_rows = tablefilereader.rows() # Create dictionary with method step as key. self._sample_method_dict = {} for row in self._sample_method_rows: method_dict = dict(zip(self._sample_method_header, row)) if 'counting_method_step' in method_dict: self._sample_method_dict[ method_dict['counting_method_step']] = method_dict