Exemplo n.º 1
0
    def load_from_files(self, args):
        if (not self.fasta_file) or (not self.metadata) or (not self.tree) or (not self.output_dir):
            raise ConfigError, "If you do not have a RUNINFO dict, you must declare each of\
                                           '-f', '-m', '-t' and '-o' parameters. Please see '--help' for\
                                           more detailed information on them."

        if self.view:
            raise ConfigError, "You can't use '-v' parameter when this program is not called with a RUNINFO.cp"

        if self.show_views:
            raise ConfigError, "Sorry, there are no views to show when there is no RUNINFO.cp :/"

        metadata_path = os.path.abspath(self.metadata)
        self.p_meta['splits_fasta'] = os.path.abspath(self.fasta_file)
        self.p_meta['output_dir'] = os.path.abspath(self.output_dir)
        self.p_meta['views'] = {}
        self.p_meta['default_view'] = 'single'
        self.p_meta['default_clustering'] = 'default'
        self.p_meta['available_clusterings'] = ['default']
        self.p_meta['clusterings'] = {'default': {'newick': open(os.path.abspath(self.tree)).read()}}

        self.default_view = self.p_meta['default_view']

        if self.summary_index:
            self.p_meta['profile_summary_index'] = os.path.abspath(self.summary_index)
            self.splits_summary_index = dictio.read_serialized_object(self.p_meta['profile_summary_index'])

        # sanity of the metadata
        filesnpaths.is_file_tab_delimited(metadata_path)
        metadata_columns = utils.get_columns_of_TAB_delim_file(metadata_path, include_first_column=True)
        if not metadata_columns[0] == "contig":
            raise ConfigError, "The first row of the first column of the metadata file must\
                                      say 'contig', which is not the case for your metadata file\
                                      ('%s'). Please make sure this is a properly formatted metadata\
                                      file." % (metadata_path)

        # store metadata as view:
        self.views[self.default_view] = {'header': metadata_columns[1:],
                                         'dict': utils.get_TAB_delimited_file_as_dictionary(metadata_path)}
        self.split_names_ordered = self.views[self.default_view]['dict'].keys()

        filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta'])
        self.split_sequences = utils.get_FASTA_file_as_dictionary(self.p_meta['splits_fasta'])

        # setup a mock splits_basic_info dict
        self.splits_basic_info = {}
        for split_id in self.split_names_ordered:
            self.splits_basic_info[split_id] = {'length': len(self.split_sequences[split_id]),
                                                'gc_content': utils.get_GC_content_for_sequence(self.split_sequences[split_id])}

        # reminder: this is being stored in the output dir provided as a commandline parameter:
        self.p_meta['self_path'] = os.path.join(self.p_meta['output_dir'], 'RUNINFO.cp')

        if self.title:
            self.title = self.title

        filesnpaths.gen_output_directory(self.p_meta['output_dir'])
Exemplo n.º 2
0
    def load_from_user_files(self, args):
        if self.contigs_db_path:
            raise ConfigError, "When you want to use the interactive interface in an ad hoc manner, you must\
                                not use a contigs database."

        if not self.profile_db_path:
            raise ConfigError, "Even when you want to use the interactive interface in an ad hoc manner by\
                                using the '--manual-mode' flag, you still need to declare a profile database.\
                                The profile database in this mode only used to read or store the 'state' of\
                                the display for visualization purposes. You DO NOT need to point to an already\
                                existing database, as anvi'o will generate an empty one for your if there is no\
                                profile database."

        if (not self.fasta_file) or (not self.view_data_path) or (not self.tree):
            raise ConfigError, "When you are running the interactive interface in manual mode, you must declare\
                                each of '-f', '-d', and '-t' parameters. Please see the help menu for more info."

        if self.view:
            raise ConfigError, "You can't use '--view' parameter when you are running the interactive interface\
                                in manual mode"

        if self.show_views:
            raise ConfigError, "Sorry, there are no views to show in manual mode :/"

        if self.show_states:
            raise ConfigError, "Sorry, there are no states to show in manual mode :/"


        view_data_path = os.path.abspath(self.view_data_path)
        self.p_meta['splits_fasta'] = os.path.abspath(self.fasta_file)
        self.p_meta['output_dir'] = None
        self.p_meta['views'] = {}
        self.p_meta['merged'] = True
        self.p_meta['default_view'] = 'single'
        self.p_meta['default_clustering'] = 'default'
        self.p_meta['available_clusterings'] = ['default']
        self.p_meta['clusterings'] = {'default': {'newick': open(os.path.abspath(self.tree)).read()}}

        self.default_view = self.p_meta['default_view']

        # sanity of the view data
        filesnpaths.is_file_tab_delimited(view_data_path)
        view_data_columns = utils.get_columns_of_TAB_delim_file(view_data_path, include_first_column=True)
        if not view_data_columns[0] == "contig":
            raise ConfigError, "The first row of the first column of the view data file must\
                                      say 'contig', which is not the case for your view data file\
                                      ('%s'). Please make sure this is a properly formatted view data\
                                      file." % (view_data_path)

        # load view data as the default view:
        self.views[self.default_view] = {'header': view_data_columns[1:],
                                         'dict': utils.get_TAB_delimited_file_as_dictionary(view_data_path)}
        self.split_names_ordered = self.views[self.default_view]['dict'].keys()

        # we assume that the sample names are the header of the view data, so we might as well set it up: 
        self.p_meta['samples'] = self.views[self.default_view]['header']

        filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta'])
        self.split_sequences = utils.get_FASTA_file_as_dictionary(self.p_meta['splits_fasta'])

        # setup a mock splits_basic_info dict
        self.splits_basic_info = {}
        for split_id in self.split_names_ordered:
            self.splits_basic_info[split_id] = {'length': len(self.split_sequences[split_id]),
                                                'gc_content': utils.get_GC_content_for_sequence(self.split_sequences[split_id])}

        # create a new, empty profile database for ad hoc operations
        if not os.path.exists(self.profile_db_path):
            profile_db = ProfileDatabase(self.profile_db_path)
            profile_db.create({'db_type': 'profile', 'merged': True, 'contigs_db_hash': None, 'samples': ','.join(self.p_meta['samples'])})

        # create an instance of states table
        self.states_table = TablesForStates(self.profile_db_path, anvio.__profile__version__)

        # also populate collections, if there are any
        self.collections.populate_sources_dict(self.profile_db_path, anvio.__profile__version__)

        if self.title:
            self.title = self.title
Exemplo n.º 3
0
    def load_manual_mode(self, args):
        if self.contigs_db_path:
            raise ConfigError, "When you want to use the interactive interface in manual mode, you must\
                                not use a contigs database."

        # if the user is using an existing profile database, we need to make sure that it is not associated
        # with a contigs database, since it would mean that it is a full anvi'o profile database and should
        # not be included in manual operations.
        if os.path.exists(self.profile_db_path):
            profile_db = ProfileDatabase(self.profile_db_path)
            if profile_db.meta['contigs_db_hash']:
                raise ConfigError, "Well. It seems the profile database is associated with a contigs database,\
                                    which means using it in manual mode is not the best way to use it. Probably\
                                    what you wanted to do is to let the manual mode create a new profile database\
                                    for you. Simply type in a new profile database path (it can be a file name\
                                    that doesn't exist)."

        if not self.profile_db_path:
            raise ConfigError, "Even when you want to use the interactive interface in manual mode, you need\
                                to declare a profile database. The profile database in this mode only used to\
                                read or store the 'state' of the display for visualization purposes. You DO\
                                NOT need to point to an already existing database, as anvi'o will generate\
                                an empty one for your if there is no profile database."

        if not self.tree:
            raise ConfigError, "When you are running the interactive interface in manual mode, you must declare\
                                at least the tree file. Please see the documentation for help."

        if self.view:
            raise ConfigError, "You can't use '--view' parameter when you are running the interactive interface\
                                in manual mode"

        if self.show_views:
            raise ConfigError, "Sorry, there are no views to show in manual mode :/"

        if self.show_states:
            raise ConfigError, "Sorry, there are no states to show in manual mode :/"

        filesnpaths.is_file_exists(self.tree)
        tree = filesnpaths.is_proper_newick(self.tree)

        view_data_path = os.path.abspath(
            self.view_data_path) if self.view_data_path else None
        self.p_meta['splits_fasta'] = os.path.abspath(
            self.fasta_file) if self.fasta_file else None
        self.p_meta['output_dir'] = None
        self.p_meta['views'] = {}
        self.p_meta['merged'] = True
        self.p_meta['default_view'] = 'single'

        clustering_id = '%s:unknown:unknown' % filesnpaths.get_name_from_file_path(
            self.tree)
        self.p_meta['default_clustering'] = clustering_id
        self.p_meta['available_clusterings'] = [clustering_id]
        self.p_meta['clusterings'] = {
            clustering_id: {
                'newick':
                ''.join([
                    l.strip()
                    for l in open(os.path.abspath(self.tree)).readlines()
                ])
            }
        }

        self.default_view = self.p_meta['default_view']

        if self.view_data_path:
            # sanity of the view data
            filesnpaths.is_file_tab_delimited(view_data_path)
            view_data_columns = utils.get_columns_of_TAB_delim_file(
                view_data_path, include_first_column=True)
            if not view_data_columns[0] == "contig":
                raise ConfigError, "The first row of the first column of the view data file must\
                                    say 'contig', which is not the case for your view data file\
                                    ('%s'). Please make sure this is a properly formatted view data\
                                    file." % (view_data_path)

            # load view data as the default view:
            self.views[self.default_view] = {
                'header': view_data_columns[1:],
                'dict':
                utils.get_TAB_delimited_file_as_dictionary(view_data_path)
            }
        else:
            # no view data is provided... it is only the tree we have. we will creaet a mock 'view data dict'
            # here using what is in the tree.
            names_in_the_tree = [n.name for n in tree.get_leaves()]

            ad_hoc_dict = {}
            for item in names_in_the_tree:
                ad_hoc_dict[item] = {'names': item}

            self.views[self.default_view] = {
                'header': ['names'],
                'dict': ad_hoc_dict
            }

        self.split_names_ordered = self.views[self.default_view]['dict'].keys()

        # we assume that the sample names are the header of the view data, so we might as well set it up:
        self.p_meta['samples'] = self.views[self.default_view]['header']

        # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts,
        # otherwise we will leave them empty
        self.splits_basic_info = {}
        self.split_sequences = None
        if self.p_meta['splits_fasta']:
            filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta'])
            self.split_sequences = utils.get_FASTA_file_as_dictionary(
                self.p_meta['splits_fasta'])

            names_missing_in_FASTA = set(self.split_names_ordered) - set(
                self.split_sequences.keys())
            num_names_missing_in_FASTA = len(names_missing_in_FASTA)
            if num_names_missing_in_FASTA:
                raise ConfigError, 'Some of the names in your view data does not have corresponding entries in the\
                                    FASTA file you provided. Here is an example to one of those %d names that occur\
                                    in your data file, but not in the FASTA file: "%s"' % (
                    num_names_missing_in_FASTA, names_missing_in_FASTA.pop())

            # setup a mock splits_basic_info dict
            for split_id in self.split_names_ordered:
                self.splits_basic_info[split_id] = {
                    'length':
                    len(self.split_sequences[split_id]),
                    'gc_content':
                    utils.get_GC_content_for_sequence(
                        self.split_sequences[split_id])
                }

        # create a new, empty profile database for manual operations
        if not os.path.exists(self.profile_db_path):
            profile_db = ProfileDatabase(self.profile_db_path)
            profile_db.create({
                'db_type': 'profile',
                'merged': True,
                'contigs_db_hash': None,
                'samples': ','.join(self.p_meta['samples'])
            })

        # create an instance of states table
        self.states_table = TablesForStates(self.profile_db_path,
                                            anvio.__profile__version__)

        # also populate collections, if there are any
        self.collections.populate_collections_dict(self.profile_db_path,
                                                   anvio.__profile__version__)

        if self.title:
            self.title = self.title
Exemplo n.º 4
0
    def load_from_user_files(self, args):
        if self.contigs_db_path:
            raise ConfigError, "When you want to use the interactive interface in an ad hoc manner, you must\
                                not use a contigs database."

        if not self.profile_db_path:
            raise ConfigError, "Even when you want to use the interactive interface in an ad hoc manner by\
                                using the '--manual-mode' flag, you still need to declare a profile database.\
                                The profile database in this mode only used to read or store the 'state' of\
                                the display for visualization purposes. You DO NOT need to point to an already\
                                existing database, as anvi'o will generate an empty one for your if there is no\
                                profile database."

        if (not self.view_data_path) or (not self.tree):
            raise ConfigError, "When you are running the interactive interface in manual mode, you must declare\
                                each of the '-d', and '-t' parameters. Please see the documentation for help."

        if self.view:
            raise ConfigError, "You can't use '--view' parameter when you are running the interactive interface\
                                in manual mode"

        if self.show_views:
            raise ConfigError, "Sorry, there are no views to show in manual mode :/"

        if self.show_states:
            raise ConfigError, "Sorry, there are no states to show in manual mode :/"

        filesnpaths.is_file_exists(self.tree)
        filesnpaths.is_proper_newick(self.tree)

        view_data_path = os.path.abspath(self.view_data_path)
        self.p_meta['splits_fasta'] = os.path.abspath(
            self.fasta_file) if self.fasta_file else None
        self.p_meta['output_dir'] = None
        self.p_meta['views'] = {}
        self.p_meta['merged'] = True
        self.p_meta['default_view'] = 'single'
        self.p_meta['default_clustering'] = 'default'
        self.p_meta['available_clusterings'] = ['default']
        self.p_meta['clusterings'] = {
            'default': {
                'newick': open(os.path.abspath(self.tree)).read()
            }
        }

        self.default_view = self.p_meta['default_view']

        # sanity of the view data
        filesnpaths.is_file_tab_delimited(view_data_path)
        view_data_columns = utils.get_columns_of_TAB_delim_file(
            view_data_path, include_first_column=True)
        if not view_data_columns[0] == "contig":
            raise ConfigError, "The first row of the first column of the view data file must\
                                      say 'contig', which is not the case for your view data file\
                                      ('%s'). Please make sure this is a properly formatted view data\
                                      file." % (view_data_path)

        # load view data as the default view:
        self.views[self.default_view] = {
            'header': view_data_columns[1:],
            'dict': utils.get_TAB_delimited_file_as_dictionary(view_data_path)
        }
        self.split_names_ordered = self.views[self.default_view]['dict'].keys()

        # we assume that the sample names are the header of the view data, so we might as well set it up:
        self.p_meta['samples'] = self.views[self.default_view]['header']

        # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts,
        # otherwise we will leave them empty
        self.splits_basic_info = {}
        self.split_sequences = None
        if self.p_meta['splits_fasta']:
            filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta'])
            self.split_sequences = utils.get_FASTA_file_as_dictionary(
                self.p_meta['splits_fasta'])

            names_missing_in_FASTA = set(self.split_names_ordered) - set(
                self.split_sequences.keys())
            num_names_missing_in_FASTA = len(names_missing_in_FASTA)
            if num_names_missing_in_FASTA:
                raise ConfigError, 'Some of the names in your view data does not have corresponding entries in the\
                                    FASTA file you provided. Here is an example to one of those %d names that occur\
                                    in your data file, but not in the FASTA file: "%s"' % (
                    num_names_missing_in_FASTA, names_missing_in_FASTA.pop())

            # setup a mock splits_basic_info dict
            for split_id in self.split_names_ordered:
                self.splits_basic_info[split_id] = {
                    'length':
                    len(self.split_sequences[split_id]),
                    'gc_content':
                    utils.get_GC_content_for_sequence(
                        self.split_sequences[split_id])
                }

        # create a new, empty profile database for ad hoc operations
        if not os.path.exists(self.profile_db_path):
            profile_db = ProfileDatabase(self.profile_db_path)
            profile_db.create({
                'db_type': 'profile',
                'merged': True,
                'contigs_db_hash': None,
                'samples': ','.join(self.p_meta['samples'])
            })

        # create an instance of states table
        self.states_table = TablesForStates(self.profile_db_path,
                                            anvio.__profile__version__)

        # also populate collections, if there are any
        self.collections.populate_sources_dict(self.profile_db_path,
                                               anvio.__profile__version__)

        if self.title:
            self.title = self.title
Exemplo n.º 5
0
    def load_manual_mode(self, args):
        if self.contigs_db_path:
            raise ConfigError, "When you want to use the interactive interface in manual mode, you must\
                                not use a contigs database."

        if not self.profile_db_path:
            raise ConfigError, "Even when you want to use the interactive interface in manual mode, you need\
                                to declare a profile database. The profile database in this mode only used to\
                                read or store the 'state' of the display for visualization purposes. You DO\
                                NOT need to point to an already existing database, as anvi'o will generate\
                                an empty one for your if there is no profile database."

        if not self.tree:
            raise ConfigError, "When you are running the interactive interface in manual mode, you must declare\
                                at least the tree file. Please see the documentation for help."

        if self.view:
            raise ConfigError, "You can't use '--view' parameter when you are running the interactive interface\
                                in manual mode"

        if self.show_views:
            raise ConfigError, "Sorry, there are no views to show in manual mode :/"

        if self.show_states:
            raise ConfigError, "Sorry, there are no states to show in manual mode :/"

        filesnpaths.is_file_exists(self.tree)
        tree = filesnpaths.is_proper_newick(self.tree)

        view_data_path = os.path.abspath(self.view_data_path) if self.view_data_path else None
        self.p_meta['splits_fasta'] = os.path.abspath(self.fasta_file) if self.fasta_file else None
        self.p_meta['output_dir'] = None
        self.p_meta['views'] = {}
        self.p_meta['merged'] = True
        self.p_meta['default_view'] = 'single'

        clustering_id = '%s:unknown:unknown' % filesnpaths.get_name_from_file_path(self.tree)
        self.p_meta['default_clustering'] = clustering_id
        self.p_meta['available_clusterings'] = [clustering_id]
        self.p_meta['clusterings'] = {clustering_id: {'newick': ''.join([l.strip() for l in open(os.path.abspath(self.tree)).readlines()])}}

        self.default_view = self.p_meta['default_view']

        if self.view_data_path:
            # sanity of the view data
            filesnpaths.is_file_tab_delimited(view_data_path)
            view_data_columns = utils.get_columns_of_TAB_delim_file(view_data_path, include_first_column=True)
            if not view_data_columns[0] == "contig":
                raise ConfigError, "The first row of the first column of the view data file must\
                                    say 'contig', which is not the case for your view data file\
                                    ('%s'). Please make sure this is a properly formatted view data\
                                    file." % (view_data_path)

            # load view data as the default view:
            self.views[self.default_view] = {'header': view_data_columns[1:],
                                             'dict': utils.get_TAB_delimited_file_as_dictionary(view_data_path)}
        else:
            # no view data is provided... it is only the tree we have. we will creaet a mock 'view data dict'
            # here using what is in the tree.
            names_in_the_tree = [n.name for n in tree.get_leaves()]

            ad_hoc_dict = {}
            for item in names_in_the_tree:
                ad_hoc_dict[item] = {'names': item}

            self.views[self.default_view] = {'header': ['names'],
                                             'dict': ad_hoc_dict}

        self.split_names_ordered = self.views[self.default_view]['dict'].keys()

        # we assume that the sample names are the header of the view data, so we might as well set it up:
        self.p_meta['samples'] = self.views[self.default_view]['header']

        # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts,
        # otherwise we will leave them empty
        self.splits_basic_info = {}
        self.split_sequences = None
        if self.p_meta['splits_fasta']:
            filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta'])
            self.split_sequences = utils.get_FASTA_file_as_dictionary(self.p_meta['splits_fasta'])

            names_missing_in_FASTA = set(self.split_names_ordered) - set(self.split_sequences.keys())
            num_names_missing_in_FASTA = len(names_missing_in_FASTA)
            if num_names_missing_in_FASTA:
                raise ConfigError, 'Some of the names in your view data does not have corresponding entries in the\
                                    FASTA file you provided. Here is an example to one of those %d names that occur\
                                    in your data file, but not in the FASTA file: "%s"' % (num_names_missing_in_FASTA, names_missing_in_FASTA.pop())

            # setup a mock splits_basic_info dict
            for split_id in self.split_names_ordered:
                self.splits_basic_info[split_id] = {'length': len(self.split_sequences[split_id]),
                                                    'gc_content': utils.get_GC_content_for_sequence(self.split_sequences[split_id])}

        # create a new, empty profile database for manual operations
        if not os.path.exists(self.profile_db_path):
            profile_db = ProfileDatabase(self.profile_db_path)
            profile_db.create({'db_type': 'profile', 'merged': True, 'contigs_db_hash': None, 'samples': ','.join(self.p_meta['samples'])})

        # create an instance of states table
        self.states_table = TablesForStates(self.profile_db_path, anvio.__profile__version__)

        # also populate collections, if there are any
        self.collections.populate_collections_dict(self.profile_db_path, anvio.__profile__version__)

        if self.title:
            self.title = self.title
Exemplo n.º 6
0
    def load_manual_mode(self, args):
        if self.contigs_db_path:
            raise ConfigError(
                "When you want to use the interactive interface in manual mode, you must\
                                not use a contigs database.")

        if not self.profile_db_path:
            raise ConfigError(
                "Even when you want to use the interactive interface in manual mode, you need\
                                to provide a profile database path. But you DO NOT need an already existing\
                                profile database, since anvi'o will generate an empty one for you. The profile\
                                database in this mode only used to read or store the 'state' of the display\
                                for visualization purposes, or to allow you to create and store collections."
            )

        # if the user is using an existing profile database, we need to make sure that it is not associated
        # with a contigs database, since it would mean that it is a full anvi'o profile database and should
        # not be included in manual operations.
        if filesnpaths.is_file_exists(self.profile_db_path, dont_raise=True):
            profile_db = ProfileDatabase(self.profile_db_path)
            if profile_db.meta['contigs_db_hash']:
                raise ConfigError(
                    "Well. It seems the profile database is associated with a contigs database,\
                                    which means using it in manual mode is not the best way to use it. Probably\
                                    what you wanted to do is to let the manual mode create a new profile database\
                                    for you. Simply type in a new profile database path (it can be a file name\
                                    that doesn't exist).")

        if not self.tree and not self.view_data_path:
            raise ConfigError(
                "You must be joking Mr. Feynman. No tree file, and no data file? What is it that\
                               anvi'o supposed to visualize? :(")

        if not self.tree:
            self.run.warning(
                "You haven't declared a tree file. Anvi'o will do its best to come up with an\
                              organization of your items.")

        if self.view:
            raise ConfigError(
                "You can't use '--view' parameter when you are running the interactive interface\
                                in manual mode")

        if self.show_views:
            raise ConfigError(
                "Sorry, there are no views to show in manual mode :/")

        if self.show_states:
            raise ConfigError(
                "Sorry, there are no states to show in manual mode :/")

        if self.tree:
            filesnpaths.is_file_exists(self.tree)
            newick_tree_text = ''.join([
                l.strip()
                for l in open(os.path.abspath(self.tree)).readlines()
            ])
            item_names = utils.get_names_order_from_newick_tree(
                newick_tree_text)
        else:
            item_names = utils.get_column_data_from_TAB_delim_file(
                self.view_data_path, column_indices=[0])[0][1:]

        # try to convert item names into integer values for proper sorting later. it's OK if it does
        # not work.
        try:
            item_names = [int(n) for n in item_names]
        except:
            pass

        view_data_path = os.path.abspath(
            self.view_data_path) if self.view_data_path else None
        self.p_meta['splits_fasta'] = os.path.abspath(
            self.fasta_file) if self.fasta_file else None
        self.p_meta['output_dir'] = None
        self.p_meta['views'] = {}
        self.p_meta['merged'] = True
        self.p_meta['default_view'] = 'single'
        self.default_view = self.p_meta['default_view']

        # set some default organizations of data:
        self.p_meta['clusterings'] = {
            'Alphabetical_(reverse):none:none': {
                'basic': sorted(item_names)
            },
            'Alphabetical:none:none': {
                'basic': sorted(item_names, reverse=True)
            }
        }
        self.p_meta['available_clusterings'] = [
            'Alphabetical_(reverse):none:none', 'Alphabetical:none:none'
        ]
        self.p_meta['default_clustering'] = self.p_meta[
            'available_clusterings'][0]

        # if we have a tree, let's make arrangements for it:
        if self.tree:
            clustering_id = '%s:unknown:unknown' % filesnpaths.get_name_from_file_path(
                self.tree)
            self.p_meta['default_clustering'] = clustering_id
            self.p_meta['available_clusterings'].append(clustering_id)
            self.p_meta['clusterings'][clustering_id] = {
                'newick': newick_tree_text
            }

        if self.view_data_path:
            # sanity of the view data
            filesnpaths.is_file_tab_delimited(view_data_path)
            view_data_columns = utils.get_columns_of_TAB_delim_file(
                view_data_path, include_first_column=True)

            # load view data as the default view:
            self.views[self.default_view] = {
                'header': view_data_columns[1:],
                'dict':
                utils.get_TAB_delimited_file_as_dictionary(view_data_path)
            }
        else:
            # no view data is provided... it is only the tree we have. we will creaet a mock 'view data dict'
            # here using what is in the tree.
            ad_hoc_dict = {}
            for item in item_names:
                ad_hoc_dict[item] = {'names': item}

            self.views[self.default_view] = {
                'header': ['names'],
                'dict': ad_hoc_dict
            }

        self.displayed_item_names_ordered = list(
            self.views[self.default_view]['dict'].keys())

        # we assume that the sample names are the header of the view data, so we might as well set it up:
        self.p_meta['samples'] = self.views[self.default_view]['header']

        # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts,
        # otherwise we will leave them empty
        self.splits_basic_info = {}
        self.split_sequences = None
        if self.p_meta['splits_fasta']:
            filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta'])
            self.split_sequences = utils.get_FASTA_file_as_dictionary(
                self.p_meta['splits_fasta'])

            names_missing_in_FASTA = set(
                self.displayed_item_names_ordered) - set(
                    self.split_sequences.keys())
            num_names_missing_in_FASTA = len(names_missing_in_FASTA)
            if num_names_missing_in_FASTA:
                raise ConfigError(
                    'Some of the names in your view data does not have corresponding entries in the\
                                    FASTA file you provided. Here is an example to one of those %d names that occur\
                                    in your data file, but not in the FASTA file: "%s"'
                    %
                    (num_names_missing_in_FASTA, names_missing_in_FASTA.pop()))

            # setup a mock splits_basic_info dict
            for split_id in self.displayed_item_names_ordered:
                self.splits_basic_info[split_id] = {
                    'length':
                    len(self.split_sequences[split_id]),
                    'gc_content':
                    utils.get_GC_content_for_sequence(
                        self.split_sequences[split_id])
                }

        # create a new, empty profile database for manual operations
        if not os.path.exists(self.profile_db_path):
            profile_db = ProfileDatabase(self.profile_db_path)
            profile_db.create({
                'db_type': 'profile',
                'merged': True,
                'contigs_db_hash': None,
                'samples': ','.join(self.p_meta['samples'])
            })

        # create an instance of states table
        self.states_table = TablesForStates(self.profile_db_path)

        # also populate collections, if there are any
        self.collections.populate_collections_dict(self.profile_db_path)

        # read description from self table, if it is not available get_description function will return placeholder text
        self.p_meta['description'] = get_description_in_db(
            self.profile_db_path)

        if self.title:
            self.title = self.title