def __init__(self): """ Set initial parameters. """ self.cfg = Config() # Load xapian indexes # self.axi_programs = xapian.Database(cfg.axi_programs) self.axi_desktopapps = xapian.Database(self.cfg.axi_desktopapps) if self.cfg.popcon: # self.popcon_programs = xapian.Database(cfg.popcon_programs) self.popcon_desktopapps = xapian.Database( self.cfg.popcon_desktopapps) # Load valid programs, desktopapps and tags # format: one package or tag name per line # self.valid_programs = [] self.valid_desktopapps = [] self.valid_tags = [] logging.info("Loading recommender filters") # with open(os.path.join(cfg.filters_dir,"programs")) as pkgs: # self.valid_programs = [line.strip() for line in pkgs # if not line.startswith("#")] with open(os.path.join(self.cfg.filters_dir, "desktopapps")) as pkgs: self.valid_desktopapps = [line.strip() for line in pkgs if not line.startswith("#")] with open(os.path.join(self.cfg.filters_dir, "debtags")) as tags: self.valid_tags = [line.strip() for line in tags if not line.startswith("#")] # Set xapian index weighting scheme if self.cfg.weight == "bm25": self.weight = xapian.BM25Weight(self.cfg.bm25_k1, self.cfg.bm25_k2, self.cfg.bm25_k3, self.cfg.bm25_b, self.cfg.bm25_nl) else: self.weight = xapian.TradWeight() self.set_strategy(self.cfg.strategy)
def _get_new_xapiandb(self): xapiandb = xapian.Database(self._db_pathname) if self._use_axi: try: axi = xapian.Database(APT_XAPIAN_INDEX_DB_PATH) xapiandb.add_database(axi) except Exception as e: logging.warn("failed to add apt-xapian-index db %s" % e) if (Globals.DEBUG_SWITCH): print( "Failed to add apt-xapian-index,some software may not be searched" ) if (self._use_agent and os.path.exists(XAPIAN_BASE_PATH_SOFTWARE_CENTER_AGENT)): try: sca = xapian.Database(XAPIAN_BASE_PATH_SOFTWARE_CENTER_AGENT) xapiandb.add_database(sca) except Exception as e: #logging.warn("failed to add sca db %s" % e) pass if self._use_utsc: try: utsc_xapiandb = xapian.Database(UTSC_PATH) xapiandb.add_database(utsc_xapiandb) except Exception as e: #logging.warn("failed to add utsc_xapiandb db %s" % e) pass for db in self._additional_databases: xapiandb.add_database(db) return xapiandb
def test_spell(): """Test basic spelling correction features. """ dbpath = 'db_test_spell' db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE) db.add_spelling('hello') db.add_spelling('mell', 2) expect(db.get_spelling_suggestion('hell'), 'mell') expect([(item.term, item.termfreq) for item in db.spellings()], [('hello', 1), ('mell', 2)]) dbr = xapian.Database(dbpath) expect(dbr.get_spelling_suggestion('hell'), '') expect([(item.term, item.termfreq) for item in dbr.spellings()], []) db.commit() dbr = xapian.Database(dbpath) expect(db.get_spelling_suggestion('hell'), 'mell') expect(dbr.get_spelling_suggestion('hell'), 'mell') expect([(item.term, item.termfreq) for item in dbr.spellings()], [('hello', 1), ('mell', 2)]) db.close() dbr.close() shutil.rmtree(dbpath)
def is_xapiancachedb_need_update(self): xapian_srcFile = XAPIAN_DB_SOURCE_PATH xapian_destFile = os.path.join(UKSC_CACHE_DIR, "xapiandb") try: src_xapiandb = xapian.Database(xapian_srcFile) new_enquire = xapian.Enquire(src_xapiandb) new_query = xapian.Query("the_#ukxapiandb#_version") new_enquire.set_query(new_query) new_matches = new_enquire.get_mset(0, 1) for new_item in new_matches: new_doc = new_item.document if new_doc.get_data() == "XAPIANDB_VERSION": new_version = new_doc.get_value( 1) #valueslot:1 xapiandb version des_xapiandb = xapian.Database(xapian_destFile) old_enquire = xapian.Enquire(des_xapiandb) old_query = xapian.Query("the_#ukxapiandb#_version") old_enquire.set_query(old_query) old_matches = old_enquire.get_mset(0, 1) for old_item in old_matches: old_doc = old_item.document old_version = old_doc.get_value( 1) #valueslot:1 xapiandb version #if (Globals.DEBUG_SWITCH): print(("old xapiandb version:", old_version, " new xapiandb version:", new_version)) except: return True else: if (new_version > old_version): return True else: return False
def _xapian_database_open(path, writable, create, data='.', log=logging): try: if create: try: directory = os.path.dirname(path) if directory and not os.path.isdir(directory): os.makedirs(directory, 0700) except OSError: pass if writable: database = xapian.WritableDatabase( path, xapian.DB_CREATE_OR_OPEN if create else xapian.DB_OPEN) else: try: database = xapian.Database(path) except xapian.DatabaseError: if create: database = xapian.WritableDatabase( path, xapian.DB_CREATE_OR_OPEN) database.close() database = xapian.Database(path) except xapian.DatabaseLockError as exc: raise InvalidIndexError("Unable to lock index at %s: %s" % (path, exc)) except xapian.DatabaseOpeningError as exc: raise InvalidIndexError("Unable to open index at %s: %s" % (path, exc)) except xapian.DatabaseError as exc: raise InvalidIndexError("Unable to use index at %s: %s" % (path, exc)) return database
def __init__(self, environ=None, request=None): super(XapianConnector, self).__init__(environ, request) self._search_db = xapian.Database( config.get('fedoracommunity.connector.xapian.package-search.db', 'xapian/search')) self._versionmap_db = xapian.Database( config.get('fedoracommunity.connector.xapian.versionmap.db', 'xapian/versionmap'))
def test_metadata_keys_iter(): """Test iterators over list of metadata keys in a database. """ dbpath = 'db_test_metadata_iter' db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE) db.set_metadata('author', 'richard') db.set_metadata('item1', 'hello') db.set_metadata('item1', 'hi') db.set_metadata('item2', 'howdy') db.set_metadata('item3', '') db.set_metadata('item4', 'goodbye') db.set_metadata('item4', '') db.set_metadata('type', 'greeting') expect([item for item in db.metadata_keys()], ['author', 'item1', 'item2', 'type']) expect([item for item in db.metadata_keys('foo')], []) expect([item for item in db.metadata_keys('item')], ['item1', 'item2']) expect([item for item in db.metadata_keys('it')], ['item1', 'item2']) expect([item for item in db.metadata_keys('type')], ['type']) dbr = xapian.Database(dbpath) expect([item for item in dbr.metadata_keys()], []) expect([item for item in dbr.metadata_keys('foo')], []) expect([item for item in dbr.metadata_keys('item')], []) expect([item for item in dbr.metadata_keys('it')], []) expect([item for item in dbr.metadata_keys('type')], []) db.commit() expect([item for item in db.metadata_keys()], ['author', 'item1', 'item2', 'type']) expect([item for item in db.metadata_keys('foo')], []) expect([item for item in db.metadata_keys('item')], ['item1', 'item2']) expect([item for item in db.metadata_keys('it')], ['item1', 'item2']) expect([item for item in db.metadata_keys('type')], ['type']) dbr = xapian.Database(dbpath) expect([item for item in dbr.metadata_keys()], ['author', 'item1', 'item2', 'type']) expect([item for item in dbr.metadata_keys('foo')], []) expect([item for item in dbr.metadata_keys('item')], ['item1', 'item2']) expect([item for item in dbr.metadata_keys('it')], ['item1', 'item2']) expect([item for item in dbr.metadata_keys('type')], ['type']) db.close() dbr.close() shutil.rmtree(dbpath)
def xapian_init_databases(): """ Initializes all database objects. """ field = 'fulltext' database = xapian.Database(XAPIAN_DIR + "/" + field) DATABASES[field] = database
def xapian_init_databases(): """ Initializes all database objects. """ for field in INDEXES: database = xapian.Database(XAPIAN_DIR + "/" + field) DATABASES[field] = database
def indexer_axi(self, axi_sample, filters_path, terms=[]): axi_path = Initialize.DEFAULT_AXI_PATH axi = xapian.Database(axi_path) base_dir = self.config.base_dir begin_time = datetime.datetime.now() # axi sample based on the pkgs sample provided by command line if axi_sample is 'sample': with open(filters_path) as valid: pkgs_list = [line.strip() for line in valid] filter_str = 'axi_' + filters_path.split('/')[-1] index = data.SampleAptXapianIndex( pkgs_list, axi, os.path.join(base_dir, filter_str)) print "Axi size: %d" % axi.get_doccount() print "Packages list length: %d" % len(pkgs_list) print "Sample index size: %d" % index.get_doccount() # axi filtered by terms provided by command line if axi_sample is "filter": terms_str = "_".join([t.split("::")[-1] for t in terms]) index = data.FilteredXapianIndex( terms, axi, os.path.join(base_dir, "axi_" + terms_str)) print "Axi size: %d" % axi.get_doccount() print "Terms filter: %s" % terms print "Filtered index size: %d" % index.get_doccount() end_time = datetime.datetime.now() print "Indexing completed at %s" % end_time delta = end_time - begin_time print "Time elapsed: %d seconds." % delta.seconds
def handle_query(self, q): database = xapian.Database(self.db_path) enquire = xapian.Enquire(database) qp = xapian.QueryParser() stemmer = xapian.Stem("english") qp.set_stemmer(stemmer) qp.set_database(database) qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) query = qp.parse_query(q) # Find the top 100 results for the query. enquire.set_query(query) matches = enquire.get_mset(0, 100) results = [] for m in matches: data = m.document.get_data() if not isinstance(data, string_types): data = data.decode("utf-8") context = self.extract_context(data) results.append((m.document.get_value(self.DOC_PATH), m.document.get_value(self.DOC_TITLE), ''.join(context))) return results
def search(dbpath, querystring, offset=0, pagesize=100, ident=0): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("pt")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) # Start of prefix configuration. #queryparser.add_prefix("text", "XD") # End of prefix configuration. # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): field = match.document.get_data() print(u"%(ident)s Q0 %(id)s %(rank)i %(weight)s danielatkinson_filipemoreira" % { 'ident': ident, 'rank': match.rank, 'weight': match.weight, 'id': field.split(":")[0] }) matches.append(match.docid)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data()) print u"%(rank)i: #%(docid)3.3i %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'title': fields.get('TITLE', u''), } matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def _build_index(self, filepath, recreate=False): """ save txt to LevelDB Input: - filepath: txt file path, support .gzip, .bzip2, and .txt file - recreate: bool, True will force recreate db, default is False """ cached_index = filepath + ".index" if os.path.exists(cached_index): if recreate: shutil.rmtree(cached_index) else: recreate = True stemmer = xapian.Stem("english") if not recreate: database = xapian.Database(cached_index) else: database = xapian.WritableDatabase(cached_index, xapian.DB_CREATE_OR_OPEN) indexer = xapian.TermGenerator() indexer.set_stemmer(stemmer) ext = os.path.splitext(filepath)[-1] if ext == ".bz2": import bz2 open_func = bz2.open elif ext == ".gz": import gzip open_func = gzip.open else: open_func = open with open_func(filepath, mode="rt", encoding="utf-8") as f: totN, totP, totS = 0, 0, 0 for l in tqdm(f, desc="Building index", unit=" lines"): l = l.strip() if len(l) < 1: if totS > 0: totP += 1 totS = 0 continue for sent in nltk.sent_tokenize(l): sent.strip() doc = xapian.Document() doc.set_data(sent) indexer.set_document(doc) indexer.index_text(sent) database.add_document(doc) totN += 1 totS += 1 self.parser = xapian.QueryParser() self.parser.set_stemmer(stemmer) self.parser.set_database(database) self.parser.set_stemming_strategy(xapian.QueryParser.STEM_SOME) self.enquire = xapian.Enquire(database)
def get_axipkgs(self, axi_tag=TAGS[0], axi_path=DEFAULT_AXI_PATH): axi = xapian.Database(axi_path) all_terms = set() for n in range(1, axi.get_lastdocid()): doc = 0 try: doc = axi.get_document(n) except: pass if doc: xp_terms = None for t in doc.termlist(): if t.term.startswith(axi_tag): xp_terms = t.term break if xp_terms: xp_terms = xp_terms.lstrip(axi_tag) if xp_terms.startswith('M'): xp_terms = xp_terms.lstrip('M') all_terms.add(xp_terms.lstrip(axi_tag)) return all_terms
def search(): database = xapian.Database('indexes/') enquire = xapian.Enquire(database) running = 1 while int(running): str = raw_input("input the key words:") terms = [] a = jieba.cut_for_search(str) for b in a: terms.append(b.encode("utf-8")) qp = xapian.QueryParser() #建立查询分析 qp.set_database(database) qp.set_default_op(xapian.Query.OP_AND) #设置查询策略 #query = qp.parse_query(terms) query = xapian.Query(xapian.Query.OP_OR, terms) #查询函数,搞不懂 enquire.set_query(query) matches = enquire.get_mset(0, 10) print "%i results found" % matches.get_matches_estimated() for match in matches: a = match.document.get_data() d = eval(a) print "贴吧:", d["title"] print "作者:", d["reply"]["name"] print "回复:", d["reply"]["content"] print "时间:", d["reply"]["time"] running = raw_input("again?(1(yse)/0(no) :") print "thank you for using!"
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrive db = xapian.Database(dbpath) queryparser = xapian.QueryParser() # choose a language queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") query = queryparser.parse_query(querystring) enquire = xapian.Enquire(db) enquire.set_query(query) matches = [] ret = "" for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data()) tmp = u"%(rank)i: #%(docid)3.3i %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'title': fields.get('TITLE', u''), } ret += tmp ret += '\n' matches.append(match.docid) support.log_matches(querystring, offset, pagesize, matches) return ret ### END of function
def __init__(self, pkg_data, partition_proportion, rounds, metrics_list, labels): super(CrossValidationBOW, self).__init__(pkg_data, partition_proportion, rounds, metrics_list, labels) self.axi = xapian.Database(XAPIAN_DATABASE_PATH) self.label = "Bag of words model"
def search_query(claim): stopWords=set(stopwords.words('english')) claim = word_tokenize(claim) claim = " ".join([w for w in claim if w not in stopWords]) #print(claim) claim = noun_phrases(claim) #print(claim) db=xapian.Database('/home/xusheng/Downloads/ano-titles') query_parser=xapian.QueryParser() query_parser.set_stemmer(xapian.Stem('en')) query_parser.set_stemming_strategy(query_parser.STEM_SOME) #query = query_parser.parse_query("title:"+claim) query =query_parser.parse_query(claim) enquire=xapian.Enquire(db) enquire.set_query(query) matches=[] for match in enquire.get_mset(0,5): match_doc=json.loads(match.document.get_data().decode('utf8')) #the match data is parse as python dict. doc_title=match_doc.get('title') matches.append(match_doc) return matches
def __init__(self, dbpath='simplehaha'): database = xapian.Database(dbpath) enquire = xapian.Enquire(database) qp = xapian.QueryParser() stemmer = xapian.Stem("english") qp.set_stemmer(stemmer) qp.set_database(database) qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) self.qp = qp self.enquire = enquire self.emotionvi = 0 self.keywordsvi = 1 self.timestampvi = 2 self.loctvi = 3 self.reploctvi = 4 self.emotiononlyvi = 5 #usernamevi = 6 self.hashtagsvi = 7 #uidvi = 8 #repnameslistvi = 9 #widvi = 10 self.maxitems = 1000000000 pool = redis.ConnectionPool(host='localhost', port=6379, db=1) self.r = redis.Redis(connection_pool=pool) self.r.flushdb() self.lowkeywords_set_rds = 'lowkeywords' pool1 = redis.ConnectionPool(host='localhost', port=6379, db=2) self.r1 = redis.Redis(connection_pool=pool1) self.r1.flushdb() self.keywords_hash_rds = 'keywords_hash'
def xapianSearch(args, cur): """Do a fuzzy text search on the man pages.""" db = xapian.Database(util.getDir('man_pages')) enquire = xapian.Enquire(db) qp = xapian.QueryParser() stemmer = xapian.Stem("english") qp.set_stemmer(stemmer) qp.set_database(db) qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) query = qp.parse_query(args['search_term']) enquire.set_query(query) limit = args['limit'] if limit is None: limit = 10 matches = enquire.get_mset(0, limit) for m in matches: data = m.document.get_data() name = data.split('\n', 1)[0] # Fuzzy match the package name cur.execute( """SELECT pack FROM package WHERE levenshtein(name, %s) < 3;""", (name, )) packId = cur.fetchone() if packId is not None: cur.execute( """UPDATE descriptor SET relevancy = %s, manpage = %s WHERE pack = %s""", (m.percent, data, packId[0]))
def open_index(self, path, *args, **kwargs): self._path = path is_writable = kwargs.get('writable', True) if is_writable: self.index = xapian.WritableDatabase(path, xapian.DB_OPEN) else: self.index = xapian.Database(path)
def query(keywords): """ Get changelog entries matching the given keywords """ xdb = xapian.Database(MINECHANGELOGS_INDEXDIR) q = None for a in keywords: a = a.strip() if not a: continue if ' ' in a: a = a.split() p = xapian.Query(xapian.Query.OP_PHRASE, a) else: p = xapian.Query(a) if q is None: q = p else: q = xapian.Query(xapian.Query.OP_OR, q, p) if q is None: return enquire = xapian.Enquire(xdb) enquire.set_query(q) enquire.set_sort_by_value(0, True) first = 0 while True: matches = enquire.get_mset(first, 100) count = matches.size() if count == 0: break for m in matches: yield m.document.get_data() first += 100
def __init__(self): # Access the Xapian index self.db = xapian.Database(axi.XAPIANINDEX) self.stem = xapian.Stem("english") # Build query parser self.qp = xapian.QueryParser() self.qp.set_default_op(xapian.Query.OP_AND) self.qp.set_database(self.db) self.qp.set_stemmer(self.stem) self.qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) self.qp.add_prefix("pkg", "XP") self.qp.add_boolean_prefix("tag", "XT") self.qp.add_boolean_prefix("sec", "XS") #notmuch->value_range_processor = new Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_TIMESTAMP); #notmuch->query_parser->add_valuerangeprocessor (notmuch->value_range_processor); # Read state from previous runs self.cache = RawConfigParser() if os.path.exists(CACHEFILE): try: self.cache.read(CACHEFILE) except Error, e: print >> sys.stderr, e print >> sys.stderr, "ignoring %s which seems to be corrupted" % CACHEFILE
def __init__(self, basedir, analyzer=None, create_allowed=True): """initialize or open a xapian database @raise ValueError: the given location exists, but the database type is incompatible (e.g. created by a different indexing engine) @raise OSError: the database failed to initialize @param basedir: the parent directory of the database @type basedir: str @param analyzer: bitwise combination of possible analyzer flags to be used as the default analyzer for this database. Leave it empty to use the system default analyzer (self.ANALYZER_DEFAULT). see self.ANALYZER_TOKENIZE, self.ANALYZER_PARTIAL, ... @type analyzer: int @param create_allowed: create the database, if necessary; default: True @type create_allowed: bool """ # call the __init__ function of our parent super(XapianDatabase, self).__init__(basedir, analyzer=analyzer, create_allowed=create_allowed) self.reader = None self.writer = None if os.path.exists(self.location): # try to open an existing database try: self.reader = xapian.Database(self.location) except xapian.DatabaseOpeningError, err_msg: raise ValueError("Indexer: failed to open xapian database " \ + "(%s) - maybe it is not a xapian database: %s" \ % (self.location, str(err_msg)))
def searcher(self): path = os.path.join(self.options.dir, "%s_xappy" % self.options.indexname) self.db = xapian.Database(path) self.enq = xapian.Enquire(self.db) self.qp = xapian.QueryParser() self.qp.set_database(self.db)
def test_CacheInvalidator_wrong_fedmsg(mocker, test_wrong_fedmsg): """ Test that the update_xapian method returns whith a wrong fedmsg message Case 1 : Wrong topic Case 2 : Wrong msg format Case 3 : No package name in the msg """ mocker.patch('fedoracommunity.consumers.find_config_file', return_value='/usr/share/fedoracommunity/tests/config.py') mocker.patch('fedoracommunity.search.index.Indexer.pull_icons') mocker.patch('fedoracommunity.search.index.Indexer.cache_icons') mocker.patch('fedoracommunity.search.index.Indexer.index_files_of_interest') mocker.patch( 'fedoracommunity.search.index.Indexer.construct_package_dictionary', return_value=pkg_guake) consumer = CacheInvalidator(MockHub()) consumer.update_xapian(test_wrong_fedmsg) db = xapian.Database('/tmp/xapian/search') last_doc = db.get_lastdocid() data = json.loads(db.get_document(last_doc).get_data()) # POC was not changed assert data['devel_owner'] == 'cverna' # We still have only one document in the database assert db.get_doccount() == 1
def test_CacheInvalidator_update_package(mocker): """ Test that the update_xapian method updates the xapian document when we recieve an update from fedmsg """ pkg_guake_update = {'name': 'guake', 'summary': 'Drop-down terminal for GNOME', 'description': 'Guake is a drop-down terminal for Gnome Desktop Environment', 'devel_owner': 'cverna', 'icon': 'guake', 'package': None, 'upstream_url': 'http://guake.org/', 'sub_pkgs': []} mocker.patch('fedoracommunity.consumers.find_config_file', return_value='/usr/share/fedoracommunity/tests/config.py') mocker.patch('fedoracommunity.search.index.Indexer.pull_icons') mocker.patch('fedoracommunity.search.index.Indexer.cache_icons') mocker.patch('fedoracommunity.search.index.Indexer.index_files_of_interest') mocker.patch( 'fedoracommunity.search.index.Indexer.construct_package_dictionary', return_value=pkg_guake_update) consumer = CacheInvalidator(MockHub()) consumer.update_xapian(msg) db = xapian.Database('/tmp/xapian/search') last_doc = db.get_lastdocid() data = json.loads(db.get_document(last_doc).get_data()) assert data['name'] == 'guake' # POC was successfully updated assert data['devel_owner'] == 'cverna' # We still have only one document in the database assert db.get_doccount() == 1
def setUp(self): # create a fake database to simualte a run of software-center-agent # create a StoreDatabase and add our other db self.db = get_test_db() self.db.add_database(xapian.Database(TEST_DB)) self.db.open(use_axi=True) self.enquire = AppEnquire(self.db._aptcache, self.db)
def run_train(cls, pkgs_classifications): bag_of_words = BagOfWords() pkgs_list = pkgs_classifications.keys() axi = xapian.Database(XAPIAN_DATABASE_PATH) bag_of_words.train_model(pkgs_list, axi) BagOfWords.save(bag_of_words, BagOfWords.BAG_OF_WORDS_MODEL)