Пример #1
0
    def store_collections_dict(self):
        if self.read_only:
            return json.dumps("Sorry! This is a read-only instance.")

        source = request.forms.get('source')
        data = json.loads(request.forms.get('data'))
        colors = json.loads(request.forms.get('colors'))

        if not len(source):
            run.info_single(
                'Lousy attempt from the user to store their collection under an empty source identifier name :/'
            )
            return json.dumps("Error: Collection name cannot be empty.")

        num_splits = sum(len(l) for l in list(data.values()))
        if not num_splits:
            run.info_single('The user to store 0 splits as a collection :/')
            return json.dumps(
                "Error: There are no selections to store (you haven't selected anything)."
            )

        if source in self.interactive.collections.collections_dict:
            e = self.interactive.collections.collections_dict[source]
            if e['read_only']:
                run.info_single(
                    'Lousy attempt from the user to store their collection under "%s" :/'
                    % source)
                return json.dumps(
                    "Well, '%s' is a read-only collection, so you need to come up with a different name... Sorry!"
                    % source)

        run.info_single(
            'A request to store %d bins that describe %d splits under the collection id "%s"\
                         has been made.' % (len(data), num_splits, source),
            cut_after=None)

        bins_info_dict = {}
        for bin_name in data:
            bins_info_dict[bin_name] = {
                'html_color': colors[bin_name],
                'source': "anvi-interactive"
            }

        # the db here is either a profile db, or a pan db, but it can't be both:
        db_path = self.interactive.pan_db_path or self.interactive.profile_db_path
        collections = TablesForCollections(db_path)
        try:
            collections.append(source, data, bins_info_dict)
        except ConfigError as e:
            return json.dumps(e.clear_text())

        # a new collection is stored in the database, but the interactive object
        # does not know about that and needs updatin'
        self.interactive.collections.populate_collections_dict(db_path)

        msg = "New collection '%s' with %d bin%s been stored." % (
            source, len(data), 's have' if len(data) > 1 else ' has')
        run.info_single(msg)
        return json.dumps(msg)
Пример #2
0
    def merge_bins(self, collection_name, new_bin_name, bin_names_list):
        """Merges a given list of bins in a collection"""

        self.sanity_check(collection_name)

        if not self.db_path:
            raise ConfigError(
                "Something is off. The class does not know which database it is supposed to\
                               be working with.")

        if not isinstance(bin_names_list, list):
            raise ConfigError("The `bin_names_list` must be of thpe `set` :/")

        bins_info_dict = self.get_bins_info_dict(collection_name)
        collection_dict = self.get_collection_dict(collection_name)

        invalid_bin_names = [
            b for b in bin_names_list if not b in collection_dict
        ]
        if invalid_bin_names:
            raise ConfigError(
                "Some of the bin names you want to merge is not in the collection %s :/ Here\
                               is a list of them: %s" %
                (collection_name, ', '.join(invalid_bin_names)))

        items_in_new_bin = []
        for bin_name in bin_names_list:
            items_in_new_bin.extend(collection_dict[bin_name])

        info_for_new_bin = copy.deepcopy(bins_info_dict[bin_name])
        info_for_new_bin['source'] = 'anvi-merge-bins'

        # time to remove the ones that are merged
        for bin_name in bin_names_list:
            bins_info_dict.pop(bin_name)
            collection_dict.pop(bin_name)

        # add the merged stuff
        bins_info_dict[new_bin_name] = info_for_new_bin
        collection_dict[new_bin_name] = items_in_new_bin

        tables_for_collections = TablesForCollections(
            self.db_path, run=terminal.Run(verbose=False))
        tables_for_collections.append(collection_name, collection_dict,
                                      bins_info_dict)

        self.run.info_single("You did it. Your bins are now merged.. Onward!",
                             nl_before=1,
                             nl_after=1)
Пример #3
0
    def merge_bins(self, collection_name, new_bin_name, bin_names_list):
        """Merges a given list of bins in a collection"""

        self.sanity_check(collection_name)

        if not self.db_path:
            raise ConfigError("Something is off. The class does not know which database it is supposed to\
                               be working with.")

        if not isinstance(bin_names_list, list):
            raise ConfigError("The `bin_names_list` must be of thpe `set` :/")

        bins_info_dict = self.get_bins_info_dict(collection_name)
        collection_dict = self.get_collection_dict(collection_name)

        invalid_bin_names = [b for b in bin_names_list if not b in collection_dict]
        if invalid_bin_names:
            raise ConfigError("Some of the bin names you want to merge is not in the collection %s :/ Here\
                               is a list of them: %s" % (collection_name, ', '.join(invalid_bin_names)))

        items_in_new_bin = []
        for bin_name in bin_names_list:
            items_in_new_bin.extend(collection_dict[bin_name])

        info_for_new_bin = copy.deepcopy(bins_info_dict[bin_name])
        info_for_new_bin['source'] = 'anvi-merge-bins'

        # time to remove the ones that are merged
        for bin_name in bin_names_list:
            bins_info_dict.pop(bin_name)
            collection_dict.pop(bin_name)

        # add the merged stuff
        bins_info_dict[new_bin_name] = info_for_new_bin
        collection_dict[new_bin_name] = items_in_new_bin

        tables_for_collections = TablesForCollections(self.db_path, run=terminal.Run(verbose=False))
        tables_for_collections.append(collection_name, collection_dict, bins_info_dict)

        self.run.info_single("You did it. Your bins are now merged.. Onward!", nl_before=1, nl_after=1)
Пример #4
0
    def store_clusters_in_db(self, collection_name='CONCOCT'):
        # convert id -> bin mapping dict into a bin -> ids dict
        data = {}
        bin_info_dict = {}

        if not len(self.clusters):
            self.run.info('CONCOCT results in db',
                          'Nope. CONCOCT clusters are empty. Skipping!',
                          mc='red',
                          display_only=True)
            return

        for split_name in self.clusters:
            bin_id = self.clusters[split_name]
            if bin_id in data:
                data[bin_id].add(split_name)
            else:
                data[bin_id] = set([split_name])
                bin_info_dict[bin_id] = {
                    'html_color':
                    '#' + ''.join([
                        '%02X' % random.randint(50, 230) for i in range(0, 3)
                    ]),
                    'source':
                    'CONCOCT'
                }
                # ^
                #  \
                #    poor man's random color generator

        c = TablesForCollections(self.profile_db_path)
        c.append(collection_name, data, bin_info_dict)

        self.run.info('CONCOCT results in db',
                      self.profile_db_path,
                      display_only=True)
Пример #5
0
    def do_profile_db(self):
        # are we working with a merged profile database?
        merged = self.summary.p_meta['merged']
        self.run.info('Merged database', 'True' if merged else 'False')

        self.progress.new('Splitting "%s"' % self.bin_id)
        self.progress.update('Subsetting the %s profile database' %
                             'merged' if merged else 'single')

        bin_profile_db = dbops.ProfileDatabase(self.bin_profile_db_path)
        bin_profile_db.touch()

        # copy-paste tables that will largely stay the same from the parent
        bin_profile_db.db.copy_paste(table_name='self',
                                     source_db_path=self.profile_db_path)
        bin_profile_db.db.copy_paste(table_name='views',
                                     source_db_path=self.profile_db_path)
        bin_profile_db.db.copy_paste(table_name='states',
                                     source_db_path=self.profile_db_path)

        # update some values
        bin_profile_db.db.update_meta_value('contigs_db_hash',
                                            self.contigs_db_hash)
        bin_profile_db.db.update_meta_value('available_clusterings', None)
        bin_profile_db.db.update_meta_value('sample_id', self.bin_id)

        # setup the filtering rules for migrating data:
        tables = {}

        # this is to deal with merge atomic data tables that are stored in merged profiles.
        # they are being created on the fly during merge, so bin_profile_db.touch() did not
        # create them, and we have to do it here ourselves. while creating them in the target
        # db, we will also populate the tables dictionary for data migration::
        sample_names = self.summary.p_meta['samples']
        if merged:
            for table_name in t.atomic_data_table_structure[1:-1]:
                for target in ['splits', 'contigs']:
                    new_table_name = '_'.join([table_name, target])
                    new_table_structure = ['contig'
                                           ] + sample_names + ['__parent__']
                    new_table_types = [
                        'text'
                    ] + ['numeric'] * len(sample_names) + ['text']
                    bin_profile_db.db.create_table(new_table_name,
                                                   new_table_structure,
                                                   new_table_types)

                    tables[new_table_name] = ('contig', self.split_names)
        else:
            profile_db = dbops.ProfileDatabase(self.profile_db_path)
            table_structure = profile_db.db.get_table_structure(
                'atomic_data_contigs')
            table_types = profile_db.db.get_table_column_types(
                'atomic_data_contigs')
            for table_name in ['atomic_data_splits', 'atomic_data_contigs']:
                new_table_structure = profile_db.db.get_table_structure(
                    table_name)
                bin_profile_db.db.create_table(table_name, table_structure,
                                               table_types)

                tables[table_name] = ('contig', self.split_names)

        # we need to migrate these guys, too. unless we don't need to... if we are migrating,
        # the values in the self table are already accurate. if we are skipping, regardless
        # of what the values were, we will set the absolut correct ones.
        if self.skip_variability_tables:
            bin_profile_db.db.update_meta_value('SNVs_profiled', False)
            bin_profile_db.db.update_meta_value('SCVs_profiled', False)
        else:
            tables[t.variable_nts_table_name] = ('split_name',
                                                 self.split_names)
            tables[t.variable_codons_table_name] = ('corresponding_gene_call',
                                                    self.gene_caller_ids)

        bin_profile_db.disconnect()

        self.migrate_data(tables, self.profile_db_path,
                          self.bin_profile_db_path)

        self.progress.end()

        if not self.skip_hierarchical_clustering:
            dbops.do_hierarchical_clustering_of_items(self.bin_profile_db_path, constants.clustering_configs['merged' if merged else 'single'], self.split_names, \
                                                      self.database_paths, input_directory=self.bin_output_directory, \
                                                      default_clustering_config=constants.merged_default, distance=self.distance, \
                                                      linkage=self.linkage, run=terminal.Run(verbose=False), progress=self.progress)

        # add a collection
        collection_dict = {'ALL_SPLITS': self.split_names}
        bins_info_dict = {
            'ALL_SPLITS': {
                'html_color': '#FF0000',
                'source': 'anvi-split'
            }
        }
        collections = TablesForCollections(self.bin_profile_db_path)
        collections.append('DEFAULT',
                           collection_dict,
                           bins_info_dict=bins_info_dict)
Пример #6
0
    def do_profile_db(self):
        # are we working with a merged profile database?
        merged = self.summary.p_meta['merged']
        self.run.info('Merged database', 'True' if merged else 'False')

        self.progress.new('Splitting "%s"' % self.bin_id)
        self.progress.update('Subsetting the %s profile database' % 'merged' if merged else 'single')

        bin_profile_db = dbops.ProfileDatabase(self.bin_profile_db_path)
        bin_profile_db.touch()

        # copy-paste tables that will largely stay the same from the parent
        bin_profile_db.db.copy_paste(table_name='self', source_db_path=self.profile_db_path)
        bin_profile_db.db.copy_paste(table_name='views', source_db_path=self.profile_db_path)
        bin_profile_db.db.copy_paste(table_name='states', source_db_path=self.profile_db_path)

        # update some values
        bin_profile_db.db.update_meta_value('contigs_db_hash', self.contigs_db_hash)
        bin_profile_db.db.update_meta_value('available_clusterings', None)
        bin_profile_db.db.update_meta_value('sample_id', self.bin_id)

        # setup the filtering rules for migrating data:
        tables = {}

        # this is to deal with merge atomic data tables that are stored in merged profiles.
        # they are being created on the fly during merge, so bin_profile_db.touch() did not
        # create them, and we have to do it here ourselves. while creating them in the target
        # db, we will also populate the tables dictionary for data migration::
        sample_names = self.summary.p_meta['samples']
        if merged:
            for table_name in t.atomic_data_table_structure[1:-1]:
                for target in ['splits', 'contigs']:
                    new_table_name = '_'.join([table_name, target])
                    new_table_structure = ['contig'] + sample_names + ['__parent__']
                    new_table_types = ['text'] + ['numeric'] * len(sample_names) + ['text']
                    bin_profile_db.db.create_table(new_table_name, new_table_structure, new_table_types)

                    tables[new_table_name] = ('contig', self.split_names)
        else:
            profile_db = dbops.ProfileDatabase(self.profile_db_path)
            table_structure = profile_db.db.get_table_structure('atomic_data_contigs')
            table_types = profile_db.db.get_table_column_types('atomic_data_contigs')
            for table_name in ['atomic_data_splits', 'atomic_data_contigs']:
                new_table_structure = profile_db.db.get_table_structure(table_name)
                bin_profile_db.db.create_table(table_name, table_structure, table_types)

                tables[table_name] = ('contig', self.split_names)


        # we need to migrate these guys, too. unless we don't need to... if we are migrating,
        # the values in the self table are already accurate. if we are skipping, regardless
        # of what the values were, we will set the absolut correct ones.
        if self.skip_variability_tables:
            bin_profile_db.db.update_meta_value('SNVs_profiled', False)
            bin_profile_db.db.update_meta_value('SCVs_profiled', False)
        else:
            tables[t.variable_nts_table_name] = ('split_name', self.split_names)
            tables[t.variable_codons_table_name] = ('corresponding_gene_call', self.gene_caller_ids)

        bin_profile_db.disconnect()

        self.migrate_data(tables, self.profile_db_path, self.bin_profile_db_path)

        self.progress.end()

        if not self.skip_hierarchical_clustering:
            dbops.do_hierarchical_clustering_of_items(self.bin_profile_db_path, constants.clustering_configs['merged' if merged else 'single'], self.split_names, \
                                                      self.database_paths, input_directory=self.bin_output_directory, \
                                                      default_clustering_config=constants.merged_default, distance=self.distance, \
                                                      linkage=self.linkage, run=terminal.Run(verbose=False), progress=self.progress)

        # add a collection
        collection_dict = {'ALL_SPLITS': self.split_names}
        bins_info_dict = {'ALL_SPLITS': {'html_color': '#FF0000', 'source': 'anvi-split'}}
        collections = TablesForCollections(self.bin_profile_db_path)
        collections.append('DEFAULT', collection_dict, bins_info_dict=bins_info_dict)