Пример #1
0
    def run_iterations(self, start_iter=None, num_iter=None):
        if start_iter is None:
            start_iter = self['start_iteration']
        if num_iter is None:
            num_iter=self['num_iterations'] + 1

        if self.config_params['interactive']:  # stop here in interactive mode
            return

        for iteration in range(start_iter, num_iter):
            start_time = util.current_millis()
            force = self['resume'] and iteration == start_iter
            self.run_iteration(iteration, force=force)

            # garbage collection after everything in iteration went out of scope
            gc.collect()
            elapsed = util.current_millis() - start_time
            logging.debug("performed iteration %d in %f s.", iteration, elapsed / 1000.0)

            if 'profile_mem' in self['debug'] and (iteration == 1 or iteration % 100 == 0):
                with open(os.path.join(self['output_dir'], 'memprofile.tsv'), 'a') as outfile:
                    self.write_mem_profile(outfile, iteration)


        """run post processing after the last iteration. We store the results in
        num_iterations + 1 to have a clean separation"""
        if self['postadjust']:
            logging.info("Postprocessing: Adjusting the clusters....")
            # run combiner using the weights of the last iteration

            rscores = self.row_scoring.combine_cached(self['num_iterations'])
            rd_scores = memb.get_row_density_scores(self.membership(), rscores)
            logging.info("Recomputed combined + density scores.")
            memb.postadjust(self.membership(), rd_scores)

            BSCM_obj = self.column_scoring.get_BSCM()
            if not (BSCM_obj is None):
                new_membership = BSCM_obj.resplit_clusters(self.membership(), cutoff=0.05)

            logging.info("Adjusted. Now re-run scoring (iteration: %d)",
                         self['num_iterations'])
            iteration_result = {'iteration': self['num_iterations'] + 1,
                                'score_means': {}}

            combined_scores = self.row_scoring.compute_force(iteration_result)

            # write the combined scores for benchmarking/diagnostics
            with open(self.combined_rscores_pickle_path(), 'wb') as outfile:
                pickle.dump(combined_scores, outfile)

            self.write_results(iteration_result)
            self.write_stats(iteration_result)
            self.update_iteration(iteration)

            # default behaviour:
            # always write complete result into a cmresults.tsv for R/cmonkey
            # compatibility
            conn = self.__dbconn()
            path =  os.path.join(self['output_dir'], 'cmresults-postproc.tsv.bz2')
            with bz2.BZ2File(path, 'w') as outfile:
                debug.write_iteration(conn, outfile,
                                      self['num_iterations'] + 1,
                                      self['num_clusters'], self['output_dir'])
            # TODO: Why is conn never closed?  Where does it write to the db?

            # additionally: run tomtom on the motifs if requested
            if (self['MEME']['global_background'] == 'True' and
                self['Postprocessing']['run_tomtom'] == 'True'):
                meme.run_tomtom(conn, self['output_dir'], self['MEME']['version'])

        self.write_finish_info()
        logging.info("Done !!!!")
Пример #2
0
 def test_post_adjust(self):
     """tests the row scoring by itself, which combines scoring and fixing
     extreme values"""
     rowscores = read_matrix('testdata/rowscores-43-before-postproc.tsv')
     memb.postadjust(self.membership, rowscores)
Пример #3
0
    def run_iterations(self, start_iter=None, num_iter=None):
        if start_iter is None:
            start_iter = self['start_iteration']
        if num_iter is None:
            num_iter = self['num_iterations'] + 1

        if self.config_params['interactive']:  # stop here in interactive mode
            return

        for iteration in range(start_iter, num_iter):
            start_time = util.current_millis()
            force = self['resume'] and iteration == start_iter
            self.run_iteration(iteration, force=force)

            # garbage collection after everything in iteration went out of scope
            gc.collect()
            elapsed = util.current_millis() - start_time
            logging.debug("performed iteration %d in %f s.", iteration,
                          elapsed / 1000.0)

            if 'profile_mem' in self['debug'] and (iteration == 1
                                                   or iteration % 100 == 0):
                with open(os.path.join(self['output_dir'], 'memprofile.tsv'),
                          'a') as outfile:
                    self.write_mem_profile(outfile, iteration)
        """run post processing after the last iteration. We store the results in
        num_iterations + 1 to have a clean separation"""
        if self['postadjust']:
            logging.info("Postprocessing: Adjusting the clusters....")
            # run combiner using the weights of the last iteration

            rscores = self.row_scoring.combine_cached(self['num_iterations'])
            rd_scores = memb.get_row_density_scores(self.membership(), rscores)
            logging.info("Recomputed combined + density scores.")
            memb.postadjust(self.membership(), rd_scores)

            BSCM_obj = self.column_scoring.get_BSCM()
            if not (BSCM_obj is None):
                new_membership = BSCM_obj.resplit_clusters(self.membership(),
                                                           cutoff=0.05)

            logging.info("Adjusted. Now re-run scoring (iteration: %d)",
                         self['num_iterations'])
            iteration_result = {
                'iteration': self['num_iterations'] + 1,
                'score_means': {}
            }

            combined_scores = self.row_scoring.compute_force(iteration_result)

            # write the combined scores for benchmarking/diagnostics
            with open(self.combined_rscores_pickle_path(), 'wb') as outfile:
                pickle.dump(combined_scores, outfile)

            self.write_results(iteration_result)
            self.write_stats(iteration_result)
            self.update_iteration(iteration)

            # default behaviour:
            # always write complete result into a cmresults.tsv for R/cmonkey
            # compatibility
            conn = self.__dbconn()
            path = os.path.join(self['output_dir'],
                                'cmresults-postproc.tsv.bz2')
            with bz2.BZ2File(path, 'w') as outfile:
                debug.write_iteration(conn, outfile,
                                      self['num_iterations'] + 1,
                                      self['num_clusters'], self['output_dir'])
            # TODO: Why is conn never closed?  Where does it write to the db?

            # additionally: run tomtom on the motifs if requested
            if (self['MEME']['global_background'] == 'True'
                    and self['Postprocessing']['run_tomtom'] == 'True'):
                meme.run_tomtom(conn, self['output_dir'],
                                self['MEME']['version'])

        self.write_finish_info()
        logging.info("Done !!!!")
Пример #4
0
 def test_post_adjust(self):
     """tests the row scoring by itself, which combines scoring and fixing
     extreme values"""
     rowscores = read_matrix('testdata/rowscores-43-before-postproc.tsv')
     memb.postadjust(self.membership, rowscores)