def importSetNew(sbtabfile,filename,separator=None): mimetypes.init() file_mimetype = mimetypes.guess_type(filename)[0] if separator: return haveTSV(sbtabfile,separator) elif file_mimetype == 'application/vnd.ms-excel' or file_mimetype == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or file_mimetype == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': return haveXLS(sbtabfile, True, True) else: separator = misc.getDelimiter(sbtabfile) return haveTSV(sbtabfile, separator) '''
def splitDocumentInTables(document_rows): ''' if the document contains more than one SBtab, this function splits the document into the single SBtabs ''' delimiter = misc.getDelimiter('\n'.join(document_rows)) single_sbtab = [document_rows[0]] sbtab_list = [] for row in document_rows[1:]: if not row.startswith('!!'): splitrow = row.split(delimiter) if not row == '' and not row[0] == '' and not splitrow[0] == '' and not splitrow[0] == '': single_sbtab.append(row) else: sbtab_list.append(single_sbtab) single_sbtab = [row] sbtab_list.append(single_sbtab) return sbtab_list
def getColumns(self): """ Extract the column names of the table, add mandatory first column name if necessary. Returns ------- columns : dict dict of colunm names (str) as key and position (int) as value. inserted_column : Boolean True, if mandatory first column was set automatically. Notes ----- First entry has to be the table type. Adds first entry in the list, if it doesn't correspond with the table type. See specification for further informations. """ # Save list of main columns for row in self.table: for entry in row: if str(row[0]).startswith('!') and not str( row[0]).startswith('!!'): delimiter = misc.getDelimiter(row) column_names = list(row) break # Insert mandatory first column if not existent inserted_column = False #if not column_names[0].title() == '!' + self.table_type.title(): # column_names.insert(0, '!' + self.table_type.title()) # inserted_column = True # Get column positions columns = {} for i, column in enumerate(column_names): columns[column] = i return column_names, columns, inserted_column, delimiter
def csv2html(sbtab_file, file_name, definition_file=None, sbtype=None): ''' Generates html view out of csv file. Parameters ---------- sbtab_file : str SBtab file as string representation. file_name : str SBtab file name. definition_file : str SBtab definition file as string representation. sbtype : str SBtab attribute TableType. ''' #extract information from the definition file if not definition_file: try: def_file_open = open('definitions.tsv', 'r') def_file = def_file_open.read() def_delimiter = '\t' col2description = findDescriptions(def_file, def_delimiter, sbtype) def_file_open.close() except: print 'You have not provided the definition file and it cannot be found in this directory. Please provide it.' sys.exit(1) else: def_delimiter = '\t' col2description = findDescriptions(definition_file, def_delimiter, sbtype) #now start building the HTML file from the SBtab file delimiter = misc.getDelimiter(sbtab_file) #checkseparator(sbtab_file) ugly_sbtab = sbtab_file.split('\n') nice_sbtab = '<html>\n<body>\n' nice_sbtab += '<p>\n<h2><b>' + file_name + '</b></h2>\n</p>\n' nice_sbtab += '<a style="background-color:#00BFFF">' + ugly_sbtab[ 0] + '</a>\n<br>\n' nice_sbtab += '<table>\n' ident_url = False ident_cols = [] for row in ugly_sbtab[1:]: if row.startswith('!'): nice_sbtab += '<tr bgcolor="#87CEFA">\n' splitrow = row.split(delimiter) for i, element in enumerate(splitrow): if 'Identifiers:' in element: try: searcher = re.search('Identifiers:(.*)', element) ident_url = 'http://identifiers.org/' + searcher.group( 1) + '/' ident_cols.append(i) except: pass else: nice_sbtab += '<tr>\n' for i, thing in enumerate(row.split(delimiter)): try: title = col2description[thing[1:]] except: title = '' if not ident_url: new_row = '<td title="' + str(title) + '">' + str( thing) + '</td>' nice_sbtab += new_row + '\n' else: if i in ident_cols and not thing.startswith('!'): ref_string = ident_url + thing new_row = '<td><a href="' + ref_string + '" target="_blank">' + str( thing) + '</a></td>' else: new_row = '<td title="' + title + '">' + str( thing) + '</td>' nice_sbtab += new_row + '\n' nice_sbtab += '</tr>\n' nice_sbtab += '</table>\n' nice_sbtab += '</body>\n</html>\n' html_file = open(file_name[:-4] + '.html', 'w') for row in nice_sbtab: html_file.write(row) html_file.close() return nice_sbtab
def __init__(self, table, sbtab_name, def_table=None,def_name=None): ''' Initialises validator and starts check for file and table format. Parameters ---------- table : tablib object Tablib object of the SBtab file. sbtab_name : str File path of the SBtab file. def_table : str SBtab definition table as string representation. def_name : str SBtab definition table name. ''' delimiter = misc.getDelimiter(table) sbtab_tablib = tablibIO.importSetNew(table,sbtab_name,delimiter) if not def_name: def_name = 'definitions.tsv' if not def_table: try: default_def = open(def_name,'r') def_table = default_def.read() default_def.close() except: print 'Definition file could not be loaded, so the validation could not be started. Please provide definition file as argument or make it is located in the same directory as this script.' sys.exit() # import definitions from definition table definition_table = tablibIO.importSetNew(def_table,def_name,separator='\t') definition_sbtab = SBtab.SBtabTable(definition_table, def_name) self.definitions = definition_sbtab.sbtab_list # create set of valid table types self.allowed_table_types = list(set([row[3] for row in self.definitions[2:][0]])) # create dict of valid column names per table type self.allowed_columns = {} for table_type in self.allowed_table_types: self.allowed_columns[table_type] = [row[1] for row in self.definitions[2:][0] if row[3] == table_type] # initialize warning string self.warnings = [] # define self variables self.table = sbtab_tablib self.filename = sbtab_name # check file format and header row self.checkTableFormat() # try creating SBtab instance self.sbtab = SBtab.SBtabTable(self.table, self.filename) self.column2format = {} defs = self.definitions[2] for row in defs: if row[3] == self.sbtab.table_type: self.column2format[row[1]] = row[4] # remove empty column headers f_columns = [] for element in self.sbtab.columns: if element == '': pass else: f_columns.append(element) self.sbtab.columns = f_columns # determine headers self.determineHeaders() # check SBtab object for validity self.checkTable() '''
''' return self.warnings if __name__ == '__main__': try: sys.argv[1] except: print 'You have not provided input arguments. Please start the script by also providing an SBtab file and the required definition file: >python validatorSBtab.py SBtab.csv definition.tsv' sys.exit() file_name = sys.argv[1] sbtab_file_o = open(file_name,'r') sbtab_file = sbtab_file_o.read() sbtab_file_o.close() delimiter = misc.getDelimiter(sbtab_file) sbtab_tablib = tablibIO.importSetNew(sbtab_file,file_name,delimiter) try: default_def = sys.argv[2] def_file = open(default_def,'r') def_tab = def_file.read() def_file.close() except: def_tab = None validator_output = [] Validate_file_class = ValidateFile(sbtab_file,file_name) validator_output.append(Validate_file_class.returnOutput()) Validate_table_class = ValidateTable(sbtab_file,file_name,def_tab) #Validate_table_class = ValidateTable(sbtab_tablib,file_name,def_tab)
if __name__ == '__main__': # this main function is deprecated! try: sys.argv[1] except: print('''You have not provided input arguments. Please start the script by also providing an SBtab file and the required definition f ile: >python validatorSBtab.py SBtab.csv definition.tsv''') sys.exit() file_name = sys.argv[1] sbtab_file_o = open(file_name, 'r') sbtab_file = sbtab_file_o.read() sbtab_file_o.close() delimiter = misc.getDelimiter(sbtab_file) try: default_def = sys.argv[2] def_file = open(default_def, 'r') def_tab = def_file.read() def_file.close() except: def_tab = None validator_output = [] Validate_file_class = ValidateFile(sbtab_file, file_name) validator_output.append(Validate_file_class.return_output()) Validate_table_class = ValidateTable(sbtab_file, file_name, def_tab) validator_output.append(Validate_table_class.return_output())