Пример #1
0
 def __init__(self, pageid):
     self.pageid = pageid
     self.newLine_nonBreak_pattern = re.compile(self.newLine_nonBreak_regex)
     self.es_ops = ElasticSearchOperate()
     wiki_data = self.es_ops.get_wiki_article(pageid)
     if wiki_data is not None and __wiki_raw__ in wiki_data:
         self.html_data = wiki_data[__wiki_raw__]
         parser = etree.XMLParser(ns_clean=True, remove_comments=True)
         self.html_tree = etree.fromstring(self.html_data, parser)
Пример #2
0
 def __init__(self, html_data, isFile):
     self.es_ops = ElasticSearchOperate()
     self.html_data = html_data
     self.newLine_nonBreak_pattern = re.compile(self.newLine_nonBreak_regex)
     parser = etree.XMLParser(ns_clean=True, remove_comments=True)
     if isFile:
         self.html_tree = etree.parse(self.html_data, parser)
     else:
         self.html_tree = etree.fromstring(self.html_data, parser)
Пример #3
0
 def __init__(self, html_data, is_file):
     self.es_ops = ElasticSearchOperate()
     self.html_data = html_data
     self.new_line_non_break_pattern = re.compile(
         self.new_line_non_break_regex)
     parser = etree.HTMLParser(remove_blank_text=True, remove_comments=True)
     if is_file:
         self.html_tree = etree.parse(self.html_data, parser)
     else:
         self.html_tree = etree.fromstring(self.html_data, parser)
Пример #4
0
 def __init__(self, *args, **kwargs):
     super(TestWikiScraper, self).__init__(*args, **kwargs)
     self.es_ops = ElasticSearchOperate()
Пример #5
0
 def __init__(self, page_list):
     self.page_list = page_list
     self.es_ops = ElasticSearchOperate()
Пример #6
0
def search_rank(query):
    es = ElasticSearchOperate()
    result_all = es.search_wiki_article(query)
    logging.debug("Ranked Wiki Pages Title: {0}".format(
        [result.get_wiki_title() for result in result_all]))
    return result_all
Пример #7
0
import logging
from qas.esstore.es_operate import ElasticSearchOperate
"""
Created by felix on 25/3/18 at 5:19 PM
"""

logger = logging.getLogger(__name__)


def search_rank(query):
    es = ElasticSearchOperate()
    result_all = es.search_wiki_article(query)
    logging.debug("Ranked Wiki Pages Title: {0}".format(
        [result.get_wiki_title() for result in result_all]))
    return result_all


if __name__ == "__main__":

    logging.basicConfig(level=logging.DEBUG)

    # mquery = list([[['Cushman', 'known', 'Wakefield', 'are'], [['Cushman', 'Wakefield'], 'or'], [], []]])
    mquery = list([[['Albert', 'Einstein', 'birth'], [], [], []]])

    les = ElasticSearchOperate()
    res_all = les.search_wiki_article(mquery)
    for res in res_all:
        print(res.get_wiki_title())