Пример #1
0
def importSetNew(sbtabfile,filename,separator=None):
    mimetypes.init()
    file_mimetype = mimetypes.guess_type(filename)[0]
    
    if separator:
        return haveTSV(sbtabfile,separator)
    elif file_mimetype == 'application/vnd.ms-excel' or file_mimetype == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or file_mimetype == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
        return haveXLS(sbtabfile, True, True)        
    else:
        separator = misc.getDelimiter(sbtabfile)
        return haveTSV(sbtabfile, separator)            

    '''
Пример #2
0
def splitDocumentInTables(document_rows):
    '''
    if the document contains more than one SBtab, this function splits the document
    into the single SBtabs
    '''
    delimiter = misc.getDelimiter('\n'.join(document_rows))
    
    single_sbtab = [document_rows[0]]
    sbtab_list   = []
    for row in document_rows[1:]:
        if not row.startswith('!!'):
            splitrow = row.split(delimiter)
            if not row == '' and not row[0] == '' and not splitrow[0] == '' and not splitrow[0] == '':
                single_sbtab.append(row)
        else:
            sbtab_list.append(single_sbtab)
            single_sbtab = [row]
    sbtab_list.append(single_sbtab)

    return sbtab_list
Пример #3
0
    def getColumns(self):
        """
        Extract the column names of the table, add mandatory first column name if necessary.

        Returns
        -------
        columns : dict
            dict of colunm names (str) as key and position (int) as value.
        inserted_column : Boolean
            True, if mandatory first column was set automatically.

        Notes
        -----
        First entry has to be the table type.
        Adds first entry in the list, if it doesn't correspond with the table type.
        See specification for further informations.
        """
        # Save list of main columns
        for row in self.table:
            for entry in row:
                if str(row[0]).startswith('!') and not str(
                        row[0]).startswith('!!'):
                    delimiter = misc.getDelimiter(row)
                    column_names = list(row)
                    break

        # Insert mandatory first column if not existent
        inserted_column = False
        #if not column_names[0].title() == '!' + self.table_type.title():
        #    column_names.insert(0, '!' + self.table_type.title())
        #    inserted_column = True

        # Get column positions
        columns = {}
        for i, column in enumerate(column_names):
            columns[column] = i

        return column_names, columns, inserted_column, delimiter
Пример #4
0
def csv2html(sbtab_file, file_name, definition_file=None, sbtype=None):
    '''
    Generates html view out of csv file.

    Parameters
    ----------
    sbtab_file : str
       SBtab file as string representation.
    file_name : str
       SBtab file name.
    definition_file : str
       SBtab definition file as string representation.
    sbtype : str
       SBtab attribute TableType.
    '''
    #extract information from the definition file
    if not definition_file:
        try:
            def_file_open = open('definitions.tsv', 'r')
            def_file = def_file_open.read()
            def_delimiter = '\t'
            col2description = findDescriptions(def_file, def_delimiter, sbtype)
            def_file_open.close()
        except:
            print 'You have not provided the definition file and it cannot be found in this directory. Please provide it.'
            sys.exit(1)
    else:
        def_delimiter = '\t'
        col2description = findDescriptions(definition_file, def_delimiter,
                                           sbtype)

    #now start building the HTML file from the SBtab file
    delimiter = misc.getDelimiter(sbtab_file)  #checkseparator(sbtab_file)
    ugly_sbtab = sbtab_file.split('\n')
    nice_sbtab = '<html>\n<body>\n'
    nice_sbtab += '<p>\n<h2><b>' + file_name + '</b></h2>\n</p>\n'
    nice_sbtab += '<a style="background-color:#00BFFF">' + ugly_sbtab[
        0] + '</a>\n<br>\n'
    nice_sbtab += '<table>\n'

    ident_url = False
    ident_cols = []

    for row in ugly_sbtab[1:]:
        if row.startswith('!'):
            nice_sbtab += '<tr bgcolor="#87CEFA">\n'
            splitrow = row.split(delimiter)
            for i, element in enumerate(splitrow):
                if 'Identifiers:' in element:
                    try:
                        searcher = re.search('Identifiers:(.*)', element)
                        ident_url = 'http://identifiers.org/' + searcher.group(
                            1) + '/'
                        ident_cols.append(i)
                    except:
                        pass

        else:
            nice_sbtab += '<tr>\n'

        for i, thing in enumerate(row.split(delimiter)):
            try:
                title = col2description[thing[1:]]
            except:
                title = ''
            if not ident_url:
                new_row = '<td title="' + str(title) + '">' + str(
                    thing) + '</td>'
                nice_sbtab += new_row + '\n'
            else:
                if i in ident_cols and not thing.startswith('!'):
                    ref_string = ident_url + thing
                    new_row = '<td><a href="' + ref_string + '" target="_blank">' + str(
                        thing) + '</a></td>'
                else:
                    new_row = '<td title="' + title + '">' + str(
                        thing) + '</td>'
                nice_sbtab += new_row + '\n'

        nice_sbtab += '</tr>\n'
    nice_sbtab += '</table>\n'

    nice_sbtab += '</body>\n</html>\n'

    html_file = open(file_name[:-4] + '.html', 'w')
    for row in nice_sbtab:
        html_file.write(row)
    html_file.close()

    return nice_sbtab
Пример #5
0
    def __init__(self, table, sbtab_name, def_table=None,def_name=None):
        '''
        Initialises validator and starts check for file and table format.

        Parameters
        ----------
        table : tablib object
            Tablib object of the SBtab file.
        sbtab_name : str
            File path of the SBtab file.
        def_table : str
            SBtab definition table as string representation.
        def_name : str
            SBtab definition table name.
        '''
        delimiter    = misc.getDelimiter(table)
        sbtab_tablib = tablibIO.importSetNew(table,sbtab_name,delimiter)
        
        if not def_name:
            def_name = 'definitions.tsv'
        if not def_table:
            try:
                default_def = open(def_name,'r')
                def_table   = default_def.read()
                default_def.close()
            except:
                print 'Definition file could not be loaded, so the validation could not be started. Please provide definition file as argument or make it is located in the same directory as this script.'
                sys.exit()
        
        # import definitions from definition table
        definition_table = tablibIO.importSetNew(def_table,def_name,separator='\t')
        definition_sbtab = SBtab.SBtabTable(definition_table, def_name)
        self.definitions = definition_sbtab.sbtab_list

        # create set of valid table types
        self.allowed_table_types = list(set([row[3] for row in self.definitions[2:][0]]))

        # create dict of valid column names per table type
        self.allowed_columns = {}
        for table_type in self.allowed_table_types:
            self.allowed_columns[table_type] = [row[1] for row in self.definitions[2:][0] if row[3] == table_type]

        # initialize warning string
        self.warnings = []
        # define self variables
        self.table = sbtab_tablib
        self.filename = sbtab_name

        # check file format and header row
        self.checkTableFormat()

        # try creating SBtab instance
      
        self.sbtab = SBtab.SBtabTable(self.table, self.filename)

        self.column2format = {}
        defs = self.definitions[2]
        for row in defs:
            if row[3] == self.sbtab.table_type:
                self.column2format[row[1]] = row[4]

        # remove empty column headers
        f_columns = []
        for element in self.sbtab.columns:
            if element == '': pass
            else: f_columns.append(element)
        self.sbtab.columns = f_columns

        # determine headers
        self.determineHeaders()

        # check SBtab object for validity
        self.checkTable()

        '''
Пример #6
0
        '''
        return self.warnings

if __name__ == '__main__':

    try: sys.argv[1]
    except:
        print 'You have not provided input arguments. Please start the script by also providing an SBtab file and the required definition file: >python validatorSBtab.py SBtab.csv definition.tsv'
        sys.exit()

    file_name    = sys.argv[1]
    sbtab_file_o = open(file_name,'r')
    sbtab_file   = sbtab_file_o.read()
    sbtab_file_o.close()
    
    delimiter    = misc.getDelimiter(sbtab_file)
    sbtab_tablib = tablibIO.importSetNew(sbtab_file,file_name,delimiter)

    try:
        default_def = sys.argv[2]
        def_file    = open(default_def,'r')
        def_tab = def_file.read()
        def_file.close()
    except:
        def_tab = None

    validator_output     = []
    Validate_file_class  = ValidateFile(sbtab_file,file_name)
    validator_output.append(Validate_file_class.returnOutput())
    Validate_table_class = ValidateTable(sbtab_file,file_name,def_tab)
    #Validate_table_class = ValidateTable(sbtab_tablib,file_name,def_tab)
Пример #7
0
if __name__ == '__main__':

    # this main function is deprecated!    
    try: sys.argv[1]
    except:
        print('''You have not provided input arguments. Please start the script
               by also providing an SBtab file and the required definition f
               ile: >python validatorSBtab.py SBtab.csv definition.tsv''')
        sys.exit()

    file_name = sys.argv[1]
    sbtab_file_o = open(file_name, 'r')
    sbtab_file = sbtab_file_o.read()
    sbtab_file_o.close()
    delimiter = misc.getDelimiter(sbtab_file)

    try:
        default_def = sys.argv[2]
        def_file = open(default_def, 'r')
        def_tab = def_file.read()
        def_file.close()
    except:
        def_tab = None

    validator_output = []
    Validate_file_class = ValidateFile(sbtab_file, file_name)
    validator_output.append(Validate_file_class.return_output())
    Validate_table_class = ValidateTable(sbtab_file, file_name, def_tab)
    validator_output.append(Validate_table_class.return_output())