Пример #1
0
    def __init__(self, annotation_source, input_file_paths, files_expected={}, files_structure={}):
        self.annotation_source = annotation_source
        self.input_file_paths = input_file_paths
        self.files_expected = files_expected
        self.files_structure = files_structure
        self.input_file_names = [os.path.basename(p) for p in input_file_paths]
        self.paths = {}
        self.dicts = {}

        if len(input_file_paths) != len(files_expected):
            raise ConfigError, "This parser (%s) requires %d file(s), but %d of them were sent. This class is now\
                                confused :/" % (self.annotation_source, len(files_expected), len(input_file_paths))

        if sorted(files_expected.keys()) != sorted(files_structure.keys()):
            raise ConfigError, "Items in files_expected and files_structure must match."

        missing_files = []
        for f in self.files_expected.values():
            if os.path.basename(f) not in self.input_file_names:
                missing_files.append(f)
        if missing_files:
            if sorted(missing_files) == sorted(self.files_expected.values()):
                raise ConfigError, "%s parser requires these file(s): %s. Please refer to the documentation if you\
                                    don't know how to generate them" % (self.annotation_source,
                                                                        ', '.join(self.files_expected.values()))

            raise ConfigError, "%s parser requires %d files (%s). %s missing from your input: %s"\
                                     % (self.annotation_source,
                                        len(self.files_expected),
                                        ', '.join(self.files_expected.values()),
                                        "These files were" if len(missing_files) > 1 else "This file was",
                                        ", ".join(missing_files))

        for alias in self.files_expected:
            for i in range(0, len(self.input_file_names)):
                file_name = self.input_file_names[i]
                if os.path.basename(self.files_expected[alias]) == file_name:
                    self.paths[alias] = self.input_file_paths[i]

        for alias in self.files_expected:
            f = self.files_structure[alias]
            if 'type' in f:
                if f['type'] == 'fasta':
                    self.dicts[alias] = get_dict_f(self.paths[alias])
                else:
                    raise ConfigError, "Parser class does not know about file type '%s' :/" % f['type']
            else:
                # then it is tab-delimited
                no_header = f['no_header'] if 'no_header' in f else False
                separator = f['separator'] if 'separator' in f else '\t'
                indexing_field = f['indexing_field'] if 'indexing_field' in f else 0
                self.dicts[alias] = get_dict(self.paths[alias], no_header=no_header,
                                             column_names=self.files_structure[alias]['col_names'],
                                             column_mapping=self.files_structure[alias]['col_mapping'],
                                             indexing_field=indexing_field, separator=separator,
                                             ascii_only=True)
Пример #2
0
    def __init__(self, annotation_source, input_file_paths, files_expected={}, files_structure={}):
        self.annotation_source = annotation_source
        self.input_file_paths = input_file_paths
        self.files_expected = files_expected
        self.files_structure = files_structure
        self.input_file_names = [os.path.basename(p) for p in input_file_paths]
        self.paths = {}
        self.dicts = {}

        if len(input_file_paths) != len(files_expected):
            raise ConfigError("This parser (%s) requires %d file(s), but %d of them were sent. This class is now\
                                confused :/" % (self.annotation_source, len(files_expected), len(input_file_paths)))

        if sorted(files_expected.keys()) != sorted(files_structure.keys()):
            raise ConfigError("Items in files_expected and files_structure must match.")

        missing_files = []
        for f in list(self.files_expected.values()):
            if os.path.basename(f) not in self.input_file_names:
                missing_files.append(f)
        if missing_files:
            if sorted(missing_files) == sorted(self.files_expected.values()):
                raise ConfigError("%s parser requires these file(s): %s. Please refer to the documentation if you\
                                    don't know how to generate them" % (self.annotation_source,
                                                                        ', '.join(list(self.files_expected.values()))))

            raise ConfigError("%s parser requires %d files (%s). %s missing from your input: %s"\
                                     % (self.annotation_source,
                                        len(self.files_expected),
                                        ', '.join(list(self.files_expected.values())),
                                        "These files were" if len(missing_files) > 1 else "This file was",
                                        ", ".join(missing_files)))

        for alias in self.files_expected:
            for i in range(0, len(self.input_file_names)):
                file_name = self.input_file_names[i]
                if os.path.basename(self.files_expected[alias]) == file_name:
                    self.paths[alias] = self.input_file_paths[i]

        for alias in self.files_expected:
            f = self.files_structure[alias]
            if 'type' in f:
                if f['type'] == 'fasta':
                    self.dicts[alias] = get_dict_f(self.paths[alias])
                else:
                    raise ConfigError("Parser class does not know about file type '%s' :/" % f['type'])
            else:
                # then it is tab-delimited
                no_header = f['no_header'] if 'no_header' in f else False
                separator = f['separator'] if 'separator' in f else '\t'
                indexing_field = f['indexing_field'] if 'indexing_field' in f else 0
                self.dicts[alias], failed_lines = get_dict(self.paths[alias], no_header=no_header,
                                                           column_names=self.files_structure[alias]['col_names'],
                                                           column_mapping=self.files_structure[alias]['col_mapping'],
                                                           indexing_field=indexing_field, separator=separator,
                                                           ascii_only=True, return_failed_lines=True)

                if failed_lines:
                    if len(failed_lines) > 20:
                        failed_lines_text = '%s (... %d more ...)' % (', '.join([str(l) for l in failed_lines]), len(failed_lines) - 20)
                    else:
                        failed_lines_text = '%s' % (', '.join([str(l) for l in failed_lines]))

                    run.warning("This is the base parser class --a part of the code you should never hear from. PLEASE\
                                 READ THIS CAREFULLY. While anvi'o was trying to parse some files assocaited with the\
                                 annotation source `%s`, it found that %d of the lines in this file were not able to\
                                 made sense of. This part of the code does not know anything more than that. It doesn't\
                                 even know what file it is. But in general this error occurs when the mapping function\
                                 does not find what its looking for in a line. For instance, a value that was supposed to\
                                 be an integer ends up being actually a piece of text or something. Well. Here are the\
                                 line numbers if you care and can make sense of this information: %s" % \
                                                        (self.annotation_source, len(failed_lines), failed_lines_text))
Пример #3
0
    def __init__(self,
                 annotation_source,
                 input_file_paths,
                 files_expected={},
                 files_structure={}):
        self.annotation_source = annotation_source
        self.input_file_paths = input_file_paths
        self.files_expected = files_expected
        self.files_structure = files_structure
        self.input_file_names = [os.path.basename(p) for p in input_file_paths]
        self.paths = {}
        self.dicts = {}

        if len(input_file_paths) != len(files_expected):
            raise ConfigError, "This parser (%s) requires %d file(s), but %d of them were sent. This class is now\
                                confused :/" % (self.annotation_source,
                                                len(files_expected),
                                                len(input_file_paths))

        if sorted(files_expected.keys()) != sorted(files_structure.keys()):
            raise ConfigError, "Items in files_expected and files_structure must match."

        missing_files = []
        for f in self.files_expected.values():
            if os.path.basename(f) not in self.input_file_names:
                missing_files.append(f)
        if missing_files:
            if sorted(missing_files) == sorted(self.files_expected.values()):
                raise ConfigError, "%s parser requires these file(s): %s. Please refer to the documentation if you\
                                    don't know how to generate them" % (
                    self.annotation_source, ', '.join(
                        self.files_expected.values()))

            raise ConfigError, "%s parser requires %d files (%s). %s missing from your input: %s"\
                                     % (self.annotation_source,
                                        len(self.files_expected),
                                        ', '.join(self.files_expected.values()),
                                        "These files were" if len(missing_files) > 1 else "This file was",
                                        ", ".join(missing_files))

        for alias in self.files_expected:
            for i in range(0, len(self.input_file_names)):
                file_name = self.input_file_names[i]
                if os.path.basename(self.files_expected[alias]) == file_name:
                    self.paths[alias] = self.input_file_paths[i]

        for alias in self.files_expected:
            f = self.files_structure[alias]
            if f.has_key('type'):
                if f['type'] == 'fasta':
                    self.dicts[alias] = get_dict_f(self.paths[alias])
                else:
                    raise ConfigError, "Parser class does not know about file type '%s' :/" % f[
                        'type']
            else:
                # then it is tab-delimited
                no_header = f['no_header'] if f.has_key('no_header') else False
                separator = f['separator'] if f.has_key('separator') else '\t'
                indexing_field = f['indexing_field'] if f.has_key(
                    'indexing_field') else 0
                self.dicts[alias] = get_dict(
                    self.paths[alias],
                    no_header=no_header,
                    column_names=self.files_structure[alias]['col_names'],
                    column_mapping=self.files_structure[alias]['col_mapping'],
                    indexing_field=indexing_field,
                    separator=separator,
                    ascii_only=True)