Exemplo n.º 1
0
 def __init__(self):
     """
     Set initial parameters.
     """
     self.cfg = Config()
     # Load xapian indexes
     # self.axi_programs = xapian.Database(cfg.axi_programs)
     self.axi_desktopapps = xapian.Database(self.cfg.axi_desktopapps)
     if self.cfg.popcon:
         # self.popcon_programs = xapian.Database(cfg.popcon_programs)
         self.popcon_desktopapps = xapian.Database(
             self.cfg.popcon_desktopapps)
     # Load valid programs, desktopapps and tags
     # format: one package or tag name per line
     # self.valid_programs = []
     self.valid_desktopapps = []
     self.valid_tags = []
     logging.info("Loading recommender filters")
     # with open(os.path.join(cfg.filters_dir,"programs")) as pkgs:
     #    self.valid_programs = [line.strip() for line in pkgs
     #                           if not line.startswith("#")]
     with open(os.path.join(self.cfg.filters_dir, "desktopapps")) as pkgs:
         self.valid_desktopapps = [line.strip() for line in pkgs
                                   if not line.startswith("#")]
     with open(os.path.join(self.cfg.filters_dir, "debtags")) as tags:
         self.valid_tags = [line.strip() for line in tags
                            if not line.startswith("#")]
     # Set xapian index weighting scheme
     if self.cfg.weight == "bm25":
         self.weight = xapian.BM25Weight(self.cfg.bm25_k1, self.cfg.bm25_k2,
                                         self.cfg.bm25_k3, self.cfg.bm25_b,
                                         self.cfg.bm25_nl)
     else:
         self.weight = xapian.TradWeight()
     self.set_strategy(self.cfg.strategy)
    def _get_new_xapiandb(self):
        xapiandb = xapian.Database(self._db_pathname)
        if self._use_axi:
            try:
                axi = xapian.Database(APT_XAPIAN_INDEX_DB_PATH)
                xapiandb.add_database(axi)
            except Exception as e:
                logging.warn("failed to add apt-xapian-index db %s" % e)
                if (Globals.DEBUG_SWITCH):
                    print(
                        "Failed to add apt-xapian-index,some software may not be searched"
                    )
        if (self._use_agent
                and os.path.exists(XAPIAN_BASE_PATH_SOFTWARE_CENTER_AGENT)):
            try:
                sca = xapian.Database(XAPIAN_BASE_PATH_SOFTWARE_CENTER_AGENT)
                xapiandb.add_database(sca)
            except Exception as e:
                #logging.warn("failed to add sca db %s" % e)
                pass
        if self._use_utsc:
            try:
                utsc_xapiandb = xapian.Database(UTSC_PATH)
                xapiandb.add_database(utsc_xapiandb)
            except Exception as e:
                #logging.warn("failed to add utsc_xapiandb db %s" % e)
                pass

        for db in self._additional_databases:
            xapiandb.add_database(db)
        return xapiandb
Exemplo n.º 3
0
def test_spell():
    """Test basic spelling correction features.

    """
    dbpath = 'db_test_spell'
    db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)

    db.add_spelling('hello')
    db.add_spelling('mell', 2)
    expect(db.get_spelling_suggestion('hell'), 'mell')
    expect([(item.term, item.termfreq) for item in db.spellings()],
           [('hello', 1), ('mell', 2)])
    dbr = xapian.Database(dbpath)
    expect(dbr.get_spelling_suggestion('hell'), '')
    expect([(item.term, item.termfreq) for item in dbr.spellings()], [])
    db.commit()
    dbr = xapian.Database(dbpath)
    expect(db.get_spelling_suggestion('hell'), 'mell')
    expect(dbr.get_spelling_suggestion('hell'), 'mell')
    expect([(item.term, item.termfreq) for item in dbr.spellings()],
           [('hello', 1), ('mell', 2)])

    db.close()
    dbr.close()
    shutil.rmtree(dbpath)
    def is_xapiancachedb_need_update(self):
        xapian_srcFile = XAPIAN_DB_SOURCE_PATH
        xapian_destFile = os.path.join(UKSC_CACHE_DIR, "xapiandb")

        try:
            src_xapiandb = xapian.Database(xapian_srcFile)
            new_enquire = xapian.Enquire(src_xapiandb)
            new_query = xapian.Query("the_#ukxapiandb#_version")
            new_enquire.set_query(new_query)
            new_matches = new_enquire.get_mset(0, 1)

            for new_item in new_matches:
                new_doc = new_item.document
                if new_doc.get_data() == "XAPIANDB_VERSION":
                    new_version = new_doc.get_value(
                        1)  #valueslot:1 xapiandb version
                    des_xapiandb = xapian.Database(xapian_destFile)
                    old_enquire = xapian.Enquire(des_xapiandb)
                    old_query = xapian.Query("the_#ukxapiandb#_version")
                    old_enquire.set_query(old_query)
                    old_matches = old_enquire.get_mset(0, 1)
                    for old_item in old_matches:
                        old_doc = old_item.document
                        old_version = old_doc.get_value(
                            1)  #valueslot:1 xapiandb version
            #if (Globals.DEBUG_SWITCH):
            print(("old xapiandb  version:", old_version,
                   " new xapiandb version:", new_version))
        except:
            return True
        else:
            if (new_version > old_version):
                return True
            else:
                return False
Exemplo n.º 5
0
def _xapian_database_open(path, writable, create, data='.', log=logging):
    try:
        if create:
            try:
                directory = os.path.dirname(path)
                if directory and not os.path.isdir(directory):
                    os.makedirs(directory, 0700)
            except OSError:
                pass
        if writable:
            database = xapian.WritableDatabase(
                path, xapian.DB_CREATE_OR_OPEN if create else xapian.DB_OPEN)
        else:
            try:
                database = xapian.Database(path)
            except xapian.DatabaseError:
                if create:
                    database = xapian.WritableDatabase(
                        path, xapian.DB_CREATE_OR_OPEN)
                    database.close()
                database = xapian.Database(path)
    except xapian.DatabaseLockError as exc:
        raise InvalidIndexError("Unable to lock index at %s: %s" % (path, exc))
    except xapian.DatabaseOpeningError as exc:
        raise InvalidIndexError("Unable to open index at %s: %s" % (path, exc))
    except xapian.DatabaseError as exc:
        raise InvalidIndexError("Unable to use index at %s: %s" % (path, exc))
    return database
Exemplo n.º 6
0
 def __init__(self, environ=None, request=None):
     super(XapianConnector, self).__init__(environ, request)
     self._search_db = xapian.Database(
         config.get('fedoracommunity.connector.xapian.package-search.db',
                    'xapian/search'))
     self._versionmap_db = xapian.Database(
         config.get('fedoracommunity.connector.xapian.versionmap.db',
                    'xapian/versionmap'))
Exemplo n.º 7
0
def test_metadata_keys_iter():
    """Test iterators over list of metadata keys in a database.

    """
    dbpath = 'db_test_metadata_iter'
    db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)

    db.set_metadata('author', 'richard')
    db.set_metadata('item1', 'hello')
    db.set_metadata('item1', 'hi')
    db.set_metadata('item2', 'howdy')
    db.set_metadata('item3', '')
    db.set_metadata('item4', 'goodbye')
    db.set_metadata('item4', '')
    db.set_metadata('type', 'greeting')

    expect([item for item in db.metadata_keys()],
           ['author', 'item1', 'item2', 'type'])
    expect([item for item in db.metadata_keys('foo')], [])
    expect([item for item in db.metadata_keys('item')], ['item1', 'item2'])
    expect([item for item in db.metadata_keys('it')], ['item1', 'item2'])
    expect([item for item in db.metadata_keys('type')], ['type'])

    dbr = xapian.Database(dbpath)
    expect([item for item in dbr.metadata_keys()], [])
    expect([item for item in dbr.metadata_keys('foo')], [])
    expect([item for item in dbr.metadata_keys('item')], [])
    expect([item for item in dbr.metadata_keys('it')], [])
    expect([item for item in dbr.metadata_keys('type')], [])

    db.commit()
    expect([item for item in db.metadata_keys()],
           ['author', 'item1', 'item2', 'type'])
    expect([item for item in db.metadata_keys('foo')], [])
    expect([item for item in db.metadata_keys('item')], ['item1', 'item2'])
    expect([item for item in db.metadata_keys('it')], ['item1', 'item2'])
    expect([item for item in db.metadata_keys('type')], ['type'])

    dbr = xapian.Database(dbpath)
    expect([item for item in dbr.metadata_keys()],
           ['author', 'item1', 'item2', 'type'])
    expect([item for item in dbr.metadata_keys('foo')], [])
    expect([item for item in dbr.metadata_keys('item')], ['item1', 'item2'])
    expect([item for item in dbr.metadata_keys('it')], ['item1', 'item2'])
    expect([item for item in dbr.metadata_keys('type')], ['type'])

    db.close()
    dbr.close()
    shutil.rmtree(dbpath)
def xapian_init_databases():
    """
    Initializes all database objects.
    """
    field = 'fulltext'
    database = xapian.Database(XAPIAN_DIR + "/" + field)
    DATABASES[field] = database
def xapian_init_databases():
    """
    Initializes all database objects.
    """
    for field in INDEXES:
        database = xapian.Database(XAPIAN_DIR + "/" + field)
        DATABASES[field] = database
Exemplo n.º 10
0
    def indexer_axi(self, axi_sample, filters_path, terms=[]):
        axi_path = Initialize.DEFAULT_AXI_PATH
        axi = xapian.Database(axi_path)
        base_dir = self.config.base_dir

        begin_time = datetime.datetime.now()

        # axi sample based on the pkgs sample provided by command line
        if axi_sample is 'sample':
            with open(filters_path) as valid:
                pkgs_list = [line.strip() for line in valid]
            filter_str = 'axi_' + filters_path.split('/')[-1]

            index = data.SampleAptXapianIndex(
                pkgs_list, axi, os.path.join(base_dir, filter_str))
            print "Axi size: %d" % axi.get_doccount()
            print "Packages list length: %d" % len(pkgs_list)
            print "Sample index size: %d" % index.get_doccount()

        # axi filtered by terms provided by command line
        if axi_sample is "filter":
            terms_str = "_".join([t.split("::")[-1] for t in terms])
            index = data.FilteredXapianIndex(
                terms, axi, os.path.join(base_dir, "axi_" + terms_str))
            print "Axi size: %d" % axi.get_doccount()
            print "Terms filter: %s" % terms
            print "Filtered index size: %d" % index.get_doccount()

        end_time = datetime.datetime.now()
        print "Indexing completed at %s" % end_time
        delta = end_time - begin_time
        print "Time elapsed: %d seconds." % delta.seconds
Exemplo n.º 11
0
    def handle_query(self, q):
        database = xapian.Database(self.db_path)
        enquire = xapian.Enquire(database)
        qp = xapian.QueryParser()
        stemmer = xapian.Stem("english")
        qp.set_stemmer(stemmer)
        qp.set_database(database)
        qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
        query = qp.parse_query(q)

        # Find the top 100 results for the query.
        enquire.set_query(query)
        matches = enquire.get_mset(0, 100)

        results = []

        for m in matches:
            data = m.document.get_data()
            if not isinstance(data, string_types):
                data = data.decode("utf-8")
            context = self.extract_context(data)
            results.append((m.document.get_value(self.DOC_PATH),
                            m.document.get_value(self.DOC_TITLE),
                            ''.join(context)))

        return results
Exemplo n.º 12
0
def search(dbpath, querystring, offset=0, pagesize=100, ident=0):
    # offset - defines starting point within result set
    # pagesize - defines number of records to retrieve

    # Open the database we're going to search.
    db = xapian.Database(dbpath)

    # Set up a QueryParser with a stemmer and suitable prefixes
    queryparser = xapian.QueryParser()
    queryparser.set_stemmer(xapian.Stem("pt"))
    queryparser.set_stemming_strategy(queryparser.STEM_SOME)
    # Start of prefix configuration.
    #queryparser.add_prefix("text", "XD")
    # End of prefix configuration.

    # And parse the query
    query = queryparser.parse_query(querystring)

    # Use an Enquire object on the database to run the query
    enquire = xapian.Enquire(db)
    enquire.set_query(query)

    # And print out something about each match
    matches = []
    for match in enquire.get_mset(offset, pagesize):
        field = match.document.get_data()
        print(u"%(ident)s Q0 %(id)s %(rank)i %(weight)s danielatkinson_filipemoreira" % {
            'ident': ident,
            'rank': match.rank,
            'weight': match.weight,
            'id': field.split(":")[0]
            })
        matches.append(match.docid)
Exemplo n.º 13
0
def search(dbpath, querystring, offset=0, pagesize=10):
    # offset - defines starting point within result set
    # pagesize - defines number of records to retrieve

    # Open the database we're going to search.
    db = xapian.Database(dbpath)

    # Set up a QueryParser with a stemmer and suitable prefixes
    queryparser = xapian.QueryParser()
    queryparser.set_stemmer(xapian.Stem("en"))
    queryparser.set_stemming_strategy(queryparser.STEM_SOME)
    queryparser.add_prefix("title", "S")
    queryparser.add_prefix("description", "XD")

    # And parse the query
    query = queryparser.parse_query(querystring)

    # Use an Enquire object on the database to run the query
    enquire = xapian.Enquire(db)
    enquire.set_query(query)

    # And print out something about each match
    matches = []
    for match in enquire.get_mset(offset, pagesize):
        fields = json.loads(match.document.get_data())
        print u"%(rank)i: #%(docid)3.3i %(title)s" % {
            'rank': match.rank + 1,
            'docid': match.docid,
            'title': fields.get('TITLE', u''),
        }
        matches.append(match.docid)

    # Finally, make sure we log the query and displayed results
    support.log_matches(querystring, offset, pagesize, matches)
Exemplo n.º 14
0
    def _build_index(self, filepath, recreate=False):
        """
            save txt to LevelDB

            Input:
                - filepath: txt file path, support .gzip, .bzip2, and .txt file
                - recreate: bool, True will force recreate db, default is False
        """
        cached_index = filepath + ".index"

        if os.path.exists(cached_index):
            if recreate:
                shutil.rmtree(cached_index)
        else:
            recreate = True

        stemmer = xapian.Stem("english")

        if not recreate:
            database = xapian.Database(cached_index)
        else:
            database = xapian.WritableDatabase(cached_index,
                                               xapian.DB_CREATE_OR_OPEN)
            indexer = xapian.TermGenerator()
            indexer.set_stemmer(stemmer)

            ext = os.path.splitext(filepath)[-1]
            if ext == ".bz2":
                import bz2
                open_func = bz2.open
            elif ext == ".gz":
                import gzip
                open_func = gzip.open
            else:
                open_func = open

            with open_func(filepath, mode="rt", encoding="utf-8") as f:
                totN, totP, totS = 0, 0, 0
                for l in tqdm(f, desc="Building index", unit=" lines"):
                    l = l.strip()
                    if len(l) < 1:
                        if totS > 0: totP += 1
                        totS = 0
                        continue
                    for sent in nltk.sent_tokenize(l):
                        sent.strip()
                        doc = xapian.Document()
                        doc.set_data(sent)
                        indexer.set_document(doc)
                        indexer.index_text(sent)
                        database.add_document(doc)

                        totN += 1
                        totS += 1

        self.parser = xapian.QueryParser()
        self.parser.set_stemmer(stemmer)
        self.parser.set_database(database)
        self.parser.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
        self.enquire = xapian.Enquire(database)
Exemplo n.º 15
0
    def get_axipkgs(self, axi_tag=TAGS[0], axi_path=DEFAULT_AXI_PATH):
        axi = xapian.Database(axi_path)
        all_terms = set()

        for n in range(1, axi.get_lastdocid()):
            doc = 0
            try:
                doc = axi.get_document(n)
            except:
                pass
            if doc:
                xp_terms = None

                for t in doc.termlist():
                    if t.term.startswith(axi_tag):
                        xp_terms = t.term
                        break

                if xp_terms:
                    xp_terms = xp_terms.lstrip(axi_tag)
                    if xp_terms.startswith('M'):
                        xp_terms = xp_terms.lstrip('M')

                    all_terms.add(xp_terms.lstrip(axi_tag))

        return all_terms
Exemplo n.º 16
0
def search():
    database = xapian.Database('indexes/')
    enquire = xapian.Enquire(database)
    running = 1
    while int(running):
        str = raw_input("input the key words:")
        terms = []
        a = jieba.cut_for_search(str)
        for b in a:
            terms.append(b.encode("utf-8"))
        qp = xapian.QueryParser()  #建立查询分析
        qp.set_database(database)
        qp.set_default_op(xapian.Query.OP_AND)  #设置查询策略
        #query = qp.parse_query(terms)
        query = xapian.Query(xapian.Query.OP_OR, terms)  #查询函数,搞不懂
        enquire.set_query(query)
        matches = enquire.get_mset(0, 10)
        print "%i results found" % matches.get_matches_estimated()
        for match in matches:
            a = match.document.get_data()
            d = eval(a)
            print "贴吧:", d["title"]
            print "作者:", d["reply"]["name"]
            print "回复:", d["reply"]["content"]
            print "时间:", d["reply"]["time"]
        running = raw_input("again?(1(yse)/0(no) :")
    print "thank you for using!"
Exemplo n.º 17
0
def search(dbpath, querystring, offset=0, pagesize=10):
    # offset - defines starting point within result set
    # pagesize - defines number of records to retrive
    db = xapian.Database(dbpath)
    queryparser = xapian.QueryParser()

    # choose a language
    queryparser.set_stemmer(xapian.Stem("en"))
    queryparser.set_stemming_strategy(queryparser.STEM_SOME)

    queryparser.add_prefix("title", "S")
    queryparser.add_prefix("description", "XD")

    query = queryparser.parse_query(querystring)

    enquire = xapian.Enquire(db)
    enquire.set_query(query)

    matches = []
    ret = ""
    for match in enquire.get_mset(offset, pagesize):
        fields = json.loads(match.document.get_data())
        tmp = u"%(rank)i: #%(docid)3.3i %(title)s" % {
            'rank': match.rank + 1,
            'docid': match.docid,
            'title': fields.get('TITLE', u''),
        }
        ret += tmp
        ret += '\n'
        matches.append(match.docid)
    support.log_matches(querystring, offset, pagesize, matches)
    return ret


### END of function
Exemplo n.º 18
0
 def __init__(self, pkg_data, partition_proportion, rounds, metrics_list,
              labels):
     super(CrossValidationBOW,
           self).__init__(pkg_data, partition_proportion, rounds,
                          metrics_list, labels)
     self.axi = xapian.Database(XAPIAN_DATABASE_PATH)
     self.label = "Bag of words model"
Exemplo n.º 19
0
def search_query(claim):
    stopWords=set(stopwords.words('english'))
    claim = word_tokenize(claim)
    claim = " ".join([w for w in claim if w not in stopWords])
    #print(claim)
    claim = noun_phrases(claim)
    #print(claim)
    db=xapian.Database('/home/xusheng/Downloads/ano-titles')
    query_parser=xapian.QueryParser()
    query_parser.set_stemmer(xapian.Stem('en'))
    query_parser.set_stemming_strategy(query_parser.STEM_SOME)
    #query = query_parser.parse_query("title:"+claim)
    query =query_parser.parse_query(claim)

    enquire=xapian.Enquire(db)
    enquire.set_query(query)
    matches=[]

    for match in enquire.get_mset(0,5):
        match_doc=json.loads(match.document.get_data().decode('utf8')) #the match data is parse as python dict.
        doc_title=match_doc.get('title')

        matches.append(match_doc)

    return matches
Exemplo n.º 20
0
    def __init__(self, dbpath='simplehaha'):
        database = xapian.Database(dbpath)
        enquire = xapian.Enquire(database)
        qp = xapian.QueryParser()
        stemmer = xapian.Stem("english")
        qp.set_stemmer(stemmer)
        qp.set_database(database)
        qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
        self.qp = qp
        self.enquire = enquire
        self.emotionvi = 0
        self.keywordsvi = 1
        self.timestampvi = 2
        self.loctvi = 3
        self.reploctvi = 4
        self.emotiononlyvi = 5
        #usernamevi = 6
        self.hashtagsvi = 7
        #uidvi = 8
        #repnameslistvi = 9
        #widvi = 10
        self.maxitems = 1000000000

        pool = redis.ConnectionPool(host='localhost', port=6379, db=1)
        self.r = redis.Redis(connection_pool=pool)
        self.r.flushdb()
        self.lowkeywords_set_rds = 'lowkeywords'

        pool1 = redis.ConnectionPool(host='localhost', port=6379, db=2)
        self.r1 = redis.Redis(connection_pool=pool1)
        self.r1.flushdb()
        self.keywords_hash_rds = 'keywords_hash'
Exemplo n.º 21
0
def xapianSearch(args, cur):
    """Do a fuzzy text search on the man pages."""
    db = xapian.Database(util.getDir('man_pages'))
    enquire = xapian.Enquire(db)
    qp = xapian.QueryParser()
    stemmer = xapian.Stem("english")
    qp.set_stemmer(stemmer)
    qp.set_database(db)
    qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
    query = qp.parse_query(args['search_term'])

    enquire.set_query(query)
    limit = args['limit']
    if limit is None:
        limit = 10
    matches = enquire.get_mset(0, limit)

    for m in matches:
        data = m.document.get_data()
        name = data.split('\n', 1)[0]
        # Fuzzy match the package name
        cur.execute(
            """SELECT pack FROM package WHERE 
                levenshtein(name, %s) < 3;""", (name, ))
        packId = cur.fetchone()
        if packId is not None:
            cur.execute(
                """UPDATE descriptor SET relevancy = %s,
                                                 manpage = %s
                                  WHERE pack = %s""",
                (m.percent, data, packId[0]))
Exemplo n.º 22
0
 def open_index(self, path, *args, **kwargs):
     self._path = path
     is_writable = kwargs.get('writable', True)
     if is_writable:
         self.index = xapian.WritableDatabase(path, xapian.DB_OPEN)
     else:
         self.index = xapian.Database(path)
Exemplo n.º 23
0
Arquivo: models.py Projeto: lamby/nm2
def query(keywords):
    """
    Get changelog entries matching the given keywords
    """
    xdb = xapian.Database(MINECHANGELOGS_INDEXDIR)

    q = None
    for a in keywords:
        a = a.strip()
        if not a: continue
        if ' ' in a:
            a = a.split()
            p = xapian.Query(xapian.Query.OP_PHRASE, a)
        else:
            p = xapian.Query(a)
        if q is None:
            q = p
        else:
            q = xapian.Query(xapian.Query.OP_OR, q, p)
    if q is None: return

    enquire = xapian.Enquire(xdb)
    enquire.set_query(q)
    enquire.set_sort_by_value(0, True)

    first = 0
    while True:
        matches = enquire.get_mset(first, 100)
        count = matches.size()
        if count == 0: break
        for m in matches:
            yield m.document.get_data()
        first += 100
Exemplo n.º 24
0
    def __init__(self):
        # Access the Xapian index
        self.db = xapian.Database(axi.XAPIANINDEX)

        self.stem = xapian.Stem("english")

        # Build query parser
        self.qp = xapian.QueryParser()
        self.qp.set_default_op(xapian.Query.OP_AND)
        self.qp.set_database(self.db)
        self.qp.set_stemmer(self.stem)
        self.qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
        self.qp.add_prefix("pkg", "XP")
        self.qp.add_boolean_prefix("tag", "XT")
        self.qp.add_boolean_prefix("sec", "XS")

        #notmuch->value_range_processor = new Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_TIMESTAMP);
        #notmuch->query_parser->add_valuerangeprocessor (notmuch->value_range_processor);

        # Read state from previous runs
        self.cache = RawConfigParser()
        if os.path.exists(CACHEFILE):
            try:
                self.cache.read(CACHEFILE)
            except Error, e:
                print >> sys.stderr, e
                print >> sys.stderr, "ignoring %s which seems to be corrupted" % CACHEFILE
Exemplo n.º 25
0
    def __init__(self, basedir, analyzer=None, create_allowed=True):
        """initialize or open a xapian database

        @raise ValueError: the given location exists, but the database type
                is incompatible (e.g. created by a different indexing engine)
        @raise OSError: the database failed to initialize

        @param basedir: the parent directory of the database
        @type basedir: str
        @param analyzer: bitwise combination of possible analyzer flags
            to be used as the default analyzer for this database. Leave it empty
            to use the system default analyzer (self.ANALYZER_DEFAULT).
            see self.ANALYZER_TOKENIZE, self.ANALYZER_PARTIAL, ...
        @type analyzer: int
        @param create_allowed: create the database, if necessary; default: True
        @type create_allowed: bool
        """
        # call the __init__ function of our parent
        super(XapianDatabase, self).__init__(basedir,
                                             analyzer=analyzer,
                                             create_allowed=create_allowed)
        self.reader = None
        self.writer = None
        if os.path.exists(self.location):
            # try to open an existing database
            try:
                self.reader = xapian.Database(self.location)
            except xapian.DatabaseOpeningError, err_msg:
                raise ValueError("Indexer: failed to open xapian database " \
                        + "(%s) - maybe it is not a xapian database: %s" \
                        % (self.location, str(err_msg)))
Exemplo n.º 26
0
 def searcher(self):
     path = os.path.join(self.options.dir,
                         "%s_xappy" % self.options.indexname)
     self.db = xapian.Database(path)
     self.enq = xapian.Enquire(self.db)
     self.qp = xapian.QueryParser()
     self.qp.set_database(self.db)
Exemplo n.º 27
0
def test_CacheInvalidator_wrong_fedmsg(mocker, test_wrong_fedmsg):
    """ Test that the update_xapian method
    returns whith a wrong fedmsg message
    Case 1 : Wrong topic
    Case 2 : Wrong msg format
    Case 3 : No package name in the msg
    """

    mocker.patch('fedoracommunity.consumers.find_config_file',
                 return_value='/usr/share/fedoracommunity/tests/config.py')
    mocker.patch('fedoracommunity.search.index.Indexer.pull_icons')
    mocker.patch('fedoracommunity.search.index.Indexer.cache_icons')
    mocker.patch('fedoracommunity.search.index.Indexer.index_files_of_interest')
    mocker.patch(
        'fedoracommunity.search.index.Indexer.construct_package_dictionary',
        return_value=pkg_guake)

    consumer = CacheInvalidator(MockHub())
    consumer.update_xapian(test_wrong_fedmsg)

    db = xapian.Database('/tmp/xapian/search')
    last_doc = db.get_lastdocid()
    data = json.loads(db.get_document(last_doc).get_data())
    # POC was not changed
    assert data['devel_owner'] == 'cverna'
    # We still have only one document in the database
    assert db.get_doccount() == 1
Exemplo n.º 28
0
def test_CacheInvalidator_update_package(mocker):
    """ Test that the update_xapian method updates
    the xapian document when we recieve an update from
    fedmsg """

    pkg_guake_update = {'name': 'guake',
                        'summary': 'Drop-down terminal for GNOME',
                        'description': 'Guake is a drop-down terminal for Gnome Desktop Environment',
                        'devel_owner': 'cverna',
                        'icon': 'guake',
                        'package': None,
                        'upstream_url': 'http://guake.org/',
                        'sub_pkgs': []}

    mocker.patch('fedoracommunity.consumers.find_config_file',
                 return_value='/usr/share/fedoracommunity/tests/config.py')
    mocker.patch('fedoracommunity.search.index.Indexer.pull_icons')
    mocker.patch('fedoracommunity.search.index.Indexer.cache_icons')
    mocker.patch('fedoracommunity.search.index.Indexer.index_files_of_interest')
    mocker.patch(
        'fedoracommunity.search.index.Indexer.construct_package_dictionary',
        return_value=pkg_guake_update)

    consumer = CacheInvalidator(MockHub())
    consumer.update_xapian(msg)
    db = xapian.Database('/tmp/xapian/search')
    last_doc = db.get_lastdocid()
    data = json.loads(db.get_document(last_doc).get_data())
    assert data['name'] == 'guake'
    # POC was successfully updated
    assert data['devel_owner'] == 'cverna'
    # We still have only one document in the database
    assert db.get_doccount() == 1
 def setUp(self):
     # create a fake database to simualte a run of software-center-agent
     # create a StoreDatabase and add our other db
     self.db = get_test_db()
     self.db.add_database(xapian.Database(TEST_DB))
     self.db.open(use_axi=True)
     self.enquire = AppEnquire(self.db._aptcache, self.db)
Exemplo n.º 30
0
    def run_train(cls, pkgs_classifications):
        bag_of_words = BagOfWords()
        pkgs_list = pkgs_classifications.keys()
        axi = xapian.Database(XAPIAN_DATABASE_PATH)

        bag_of_words.train_model(pkgs_list, axi)
        BagOfWords.save(bag_of_words, BagOfWords.BAG_OF_WORDS_MODEL)