def test_recluster(self): # force reindexing and clustering self.cfg.index_mode = "recluster" self.cfg.k_medoids = 2 pxi = PopconXapianIndex(self.cfg) self.assertEqual(pxi.source_dir,self.cfg.clusters_dir) self.assertEqual(pxi.get_doccount(),2)
def test_submissions(self): pxi = PopconXapianIndex(self.cfg) submissions = pxi.get_submissions(pxi.source_dir) all_submissions = [submissions for (root, dirs, submissions) in os.walk(pxi.source_dir)] self.assertEqual(len(submissions), sum([len(submissions) for submissions in all_submissions]))
def test_clustering(self): # force reindex with clustering self.cfg.index_mode = "cluster" pxi = PopconXapianIndex(self.cfg) self.assertEqual(pxi.source_dir,self.cfg.clusters_dir) all_submissions = [submissions for (root, dirs, submissions) in os.walk(pxi.source_dir)] self.assertEqual(pxi.get_doccount(), sum([len(submissions) for submissions in all_submissions]))
def setUp(self): self.cfg = Config() self.cfg.popcon_index = "test_data/.sample_pxi" self.cfg.popcon_dir = "test_data/popcon_dir" self.cfg.clusters_dir = "test_data/clusters_dir" # build old index for all tests shutil.rmtree(self.cfg.popcon_index,1) self.assertFalse(os.path.exists(self.cfg.popcon_index)) # local variable, index will be closed before test pxi = PopconXapianIndex(self.cfg) self.assertEqual(pxi.get_metadata("old"),"") pxi.set_metadata("old","true")
def iterate(self, params, rep, n): if params['name'] == "clustering": logging.info("Running iteration %d" % params['medoids'][n]) self.cfg.k_medoids = params['medoids'][n] pxi = PopconXapianIndex(self.cfg) result = {'k_medoids': params['medoids'][n], 'dispersion': pxi.cluster_dispersion} else: result = {} return result
def test_reindex(self): # force reindex with no clustering self.cfg.index_mode = "reindex" pxi = PopconXapianIndex(self.cfg) self.assertEqual(pxi.get_metadata("old"),"")
def test_load(self): # load the previously built index pxi = PopconXapianIndex(self.cfg) self.assertEqual(pxi.get_metadata("old"),"true")
You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. """ import os import sys sys.path.insert(0,'../') import logging import datetime from config import Config from data import PopconXapianIndex if __name__ == '__main__': cfg = Config() begin_time = datetime.datetime.now() logging.info("Popcon indexing started at %s" % begin_time) # use config file or command line options popindex = PopconXapianIndex(cfg) end_time = datetime.datetime.now() logging.info("Popcon indexing completed at %s" % end_time) logging.info("Number of documents (submissions): %d" % popindex.get_doccount()) delta = end_time - begin_time logging.info("Time elapsed: %d seconds." % delta.seconds) if cfg.index_mode=="cluster" or cfg.index_mode=="recluster": logging.info("Medoids: %d\tDispersion:%f" % (cfg.k_medoids,popindex.cluster_dispersion))
You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. """ import sys sys.path.insert(0, '../') import logging import datetime from config import Config from data import PopconXapianIndex from error import Error if __name__ == '__main__': try: cfg = Config() begin_time = datetime.datetime.now() logging.info("Popcon indexing started at %s" % begin_time) pxi = PopconXapianIndex(cfg) end_time = datetime.datetime.now() logging.info("Popcon indexing completed at %s" % end_time) delta = end_time - begin_time logging.info("Time elapsed: %d seconds." % delta.seconds) if cfg.index_mode == "cluster" or cfg.index_mode == "recluster": logging.info("Medoids: %d\tDispersion:%f" % (cfg.k_medoids, pxi.cluster_dispersion)) except Error: logging.critical("Aborting proccess. Use '--debug' for more details.")