Python Es_connector.loop_paginatedSearch示例

编程语言: Python

命名空间/包名称: mabed.es_connector

类/类型: Es_connector

方法/功能: loop_paginatedSearch

hotexamples.com的示例: 5

Python Es_connector.loop_paginatedSearch - 已找到5个示例。这些是从开源项目中提取的最受好评的mabed.es_connector.Es_connector.loop_paginatedSearch现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Es_connector(30)

search(16)

init_paginatedSearch(13)

count(7)

update_query(6)

loop_paginatedSearch(5)

bigSearch(4)

update_by_query(3)

update_all(3)

delete(2)

get(2)

update(2)

search_size(2)

update_field(1)

post(1)

remove_field_all(1)

range_tweets(1)

getTweets(1)

getFilteredTweets(1)

bigTweetTextSearch(1)

bigSearchSSE(1)

w2v_tweets(1)

示例#1

显示文件

    def get_tweets(self, index, doc_field):

        my_connector = Es_connector(index=index)
        all_tweets = []
        query = {
            "_source": [doc_field, "timestamp_ms"],
            "query": {
                "exists": {
                    "field": doc_field
                }
            }
        }
        res = my_connector.init_paginatedSearch(query)
        sid = res["sid"]
        scroll_size = res["scroll_size"]

        # Analyse and process page by page
        processed_tweets = 0
        while scroll_size > 0:

            tweets = res["results"]
            all_tweets.extend([{
                '_source': {
                    doc_field:
                    self.tknzr.tokenize(tweet["_source"][doc_field]),
                    "timestamp_ms": tweet["_source"]["timestamp_ms"]
                }
            } for tweet in tweets])
            processed_tweets += scroll_size

            res = my_connector.loop_paginatedSearch(sid, scroll_size)
            scroll_size = res["scroll_size"]

        return all_tweets

示例#2

显示文件

    def download_tweets_from_elastic(self, **kwargs):

        debug_limit = kwargs.get("debug_limit", False)
        log_enabled = kwargs.get("log_enabled", True)

        if "config_relative_path" in kwargs:
            my_connector = Es_connector(
                index=kwargs["index"],
                doc_type="tweet",
                config_relative_path=kwargs["config_relative_path"])
        else:
            my_connector = Es_connector(
                index=kwargs["index"],
                doc_type="tweet")  #  config_relative_path='../')

        res = my_connector.init_paginatedSearch(kwargs["query"])
        sid = res["sid"]
        scroll_size = res["scroll_size"]
        total = int(res["total"])
        processed = len(res["results"])

        self.write_data_in_folders(kwargs["field"], kwargs["folder"],
                                   res["results"])

        while scroll_size > 0:
            res = my_connector.loop_paginatedSearch(sid, scroll_size)
            scroll_size = res["scroll_size"]
            processed += len(res["results"])

            # Writing the retrieved files into the folders
            self.write_data_in_folders(kwargs["field"], kwargs["folder"],
                                       res["results"])
            if log_enabled:
                print("Downloading: ", round(processed * 100 / total, 2), "%")

            if debug_limit:
                print("\nDEBUG LIMIT\n")
                res = my_connector.loop_paginatedSearch(sid, scroll_size)
                self.write_data_in_folders(kwargs["field"], kwargs["folder"],
                                           res["results"])
                scroll_size = 0

        return total

示例#3

显示文件

    def generate_ngrams_for_index(self, **kwargs):

        try:
            # Get the data for performinga paginated search
            self.current_thread_percentage = 0
            print("Starting")
            my_connector = Es_connector(index=kwargs["index"])

            query = kwargs.get('query', {"query": {"match_all": {}}})

            res = my_connector.init_paginatedSearch(query)
            sid = res["sid"]
            scroll_size = res["scroll_size"]
            total = int(res["total"])

            # Analyse and process page by page
            i = 0
            total_scrolls = int(total / scroll_size)
            processed_scrolls = 0

            print("from_property:", kwargs['from_property'])

            while scroll_size > 0:
                tweets = res["results"]
                self.gerenate_ngrams_for_tweets(
                    tweets,
                    from_property=kwargs['from_property'],
                    prop=kwargs["prop"],
                    index=kwargs["index"],
                    length=kwargs["length"])

                i += 1
                res = my_connector.loop_paginatedSearch(sid, scroll_size)
                scroll_size = res["scroll_size"]
                processed_scrolls += 1

                self.current_thread_percentage = round(
                    processed_scrolls * 100 / total_scrolls, 0)

                print("Completed: ", self.current_thread_percentage, "%")

            # Clean it at the end so the clien knows when to end asking for more logs
            self.current_thread_percentage = 100

            return True

        except Exception as e:
            print('Error: ' + str(e))
            return False

示例#4

显示文件

 def get_tweets_scroll(self, index, sid, scroll_size):
     my_connector = Es_connector(index=index)
     res = my_connector.loop_paginatedSearch(sid, scroll_size)
     return res

示例#5

显示文件

文件： text_images_preprocess.py 项目： gbosetti/cati

    print("Languages for stopwords: ", ngramsAnalizer.retrievedLangs)


try:
    my_connector = Es_connector(index=index)
    #query = #"query": {
    #"match_all": {}
    #}
    query = {"query": {"match": {"lang": "en or fr or es"}}}
    res = my_connector.init_paginatedSearch(query=query)

    sid = res["sid"]
    scroll_size = res["scroll_size"]
    init_total = int(res["total"])
    accum_total = 0

    print("\nTotal = ", init_total)
    print("\nScroll = ", scroll_size)
    print("\nLangs = ", langs)

    while scroll_size > 0:

        generate_text_images_prop(res["results"], langs)
        res = my_connector.loop_paginatedSearch(sid, scroll_size)
        scroll_size = res["scroll_size"]
        accum_total += scroll_size
        print(accum_total * 100 / init_total, "%")

except Exception as e:
    print('Error: ' + str(e))