Exemplo n.º 1
0
def read_page(data_part, source):
    import gqueue
    import requests

    log_line("read_page: " + source)
    headers = data_part["headers"]
    with requests.Session() as ss:
        ss.headers.update(headers)
        with ss.get(source) as resp:
            content_type = resp.headers.get('content-type')
            chset = 'utf-8'
            if content_type:
                match_chset = re.search(r'(charset\W+)([^\;]+)',
                                        content_type,
                                        flags=re.IGNORECASE)
                if match_chset and len(match_chset.groups()) > 1:
                    chset = match_chset[2]
                else:
                    log_line(
                        "read_page: content_type is {}".format(content_type))
            # chset = resp.encoding

            page = resp.content.decode(chset)

            result = dict(data_part)
            result["page"] = page

            log_line("read_page: page length is {}, chrset is {}".format(
                len(page), chset))

            gqueue.add_task(body=gqueue.save_body(json.dumps(result),
                                                  Q_KEY_TYPE),
                            uri=flask.url_for('web_mon.run_data_part'))

    return str(len(result))
Exemplo n.º 2
0
def task_email_forward():
    
    res = gqueue.add_task(body='{"mailfolder": "2Vera", "receiver": "*****@*****.**"}', uri=flask.url_for('gqueue.forward_next_email'))
#    res = gqueue.add_task(body='{"mailfolder": "2Vera", "receiver": "*****@*****.**"}', uri=flask.url_for('gqueue.forward_next_email'))
    
#    return res
    return res.name
Exemplo n.º 3
0
def run_mon():
    import gqueue
    mon_setup = read_setup()
    for fs in mon_setup[:-1]:
        for da in fs["data"]:
            data_part = dict(da)
            data_part["headers"] = fs["headers"]
            gqueue.add_task(body=gqueue.save_body(json.dumps(data_part),
                                                  Q_KEY_TYPE),
                            uri=flask.url_for('web_mon.run_data_part'))
    for ls in mon_setup[-1:]:
        for da in ls["data"][:-1]:
            data_part = dict(da)
            data_part["headers"] = ls["headers"]
            gqueue.add_task(body=gqueue.save_body(json.dumps(data_part),
                                                  Q_KEY_TYPE),
                            uri=flask.url_for('web_mon.run_data_part'))
        for da in ls["data"][-1:]:
            data_part = dict(da)
            data_part["headers"] = ls["headers"]
            data_part["last"] = True
            gqueue.add_task(body=gqueue.save_body(json.dumps(data_part),
                                                  Q_KEY_TYPE),
                            uri=flask.url_for('web_mon.run_data_part'))

    return "Ok - " + str(len(mon_setup))
Exemplo n.º 4
0
def send_results():
    import gqueue

    if gqueue.is_tasks_exist(except_of=flask.url_for('web_mon.send_results')):
        gqueue.add_task(body=json.dumps({"go": True}),
                        uri=flask.url_for('web_mon.send_results'),
                        in_seconds=60)
        return "Rerun later"

    from google.cloud import datastore

    dcli = datastore.Client(project=gqueue.PROJECT_ID)
    query = dcli.query(kind=KIND_WM_RESULT, order=["send_to"])
    send_to = '##'
    body = None
    msg = {}
    keys = []

    log_line("send_results: query {}".format(str(query)))

    sender_count = 0

    for ent in query.fetch():
        recipient = ent.get("send_to").lower()
        if recipient != send_to:
            if body and msg:
                msg["body"] = body
                msg["keys"] = keys
                gqueue.add_task(body=gqueue.save_body(json.dumps(msg),
                                                      Q_SEND_MAIL),
                                uri=flask.url_for('web_mon.send_email_result'),
                                in_seconds=120 + sender_count * 5)
                sender_count += 1
            body = ""
            msg = {"send_to": recipient}
            keys.clear()

        send_to = recipient
        body += ent.get("body")
        keys.append({"kind": ent.key.kind, "name": ent.key.name})

    if body and msg:
        msg["body"] = body
        msg["keys"] = keys
        gqueue.add_task(body=gqueue.save_body(json.dumps(msg), Q_SEND_MAIL),
                        uri=flask.url_for('web_mon.send_email_result'),
                        in_seconds=120 + sender_count * 5)

    query = None
    dcli = None

    return "Ok"
Exemplo n.º 5
0
def handling_sources_bunch():
    parts_json = get_body()
    parts = json.loads(parts_json)
    log_line("handling_sources_bunch: sources - {}".format(len(parts)))

    import gqueue

    parts_count = len(parts)  # Всего записей
    if parts_count > MAX_SOURCES_BUNCH:
        from math import ceil
        bunches_count = ceil(
            parts_count / MAX_SOURCES_BUNCH)  # Количество пакетов для отправки
        send_count = parts_count  # Отправляемое количество записей для обработки
        if bunches_count > MAX_SOURCES_BUNCH:
            bunches_count = MAX_SOURCES_BUNCH - 1  # Количество пакетов с записями для обработки
            send_count = bunches_count * MAX_SOURCES_BUNCH
        min_cnt = MAX_SOURCES_BUNCH - (
            send_count % MAX_SOURCES_BUNCH
        )  # Количество записей не хватающих до максимума
        min_cnt = min_cnt if min_cnt < MAX_SOURCES_BUNCH else 0
        max_recs = int(
            min_cnt / bunches_count
        )  # На сколько записей нужно уменьшить в пакете с максимальным количеством записей
        min_cnt = min_cnt % bunches_count  # Сколько записей нужно уменьшить в пакете до минимального количества
        part_lens = [
            MAX_SOURCES_BUNCH - max_recs -
            1 if ii < min_cnt else MAX_SOURCES_BUNCH - max_recs
            for ii in range(bunches_count)
        ]
        cur_part = 0
        for cnt in part_lens:
            links_list = parts[cur_part:cur_part + cnt]
            cur_part += cnt
            gqueue.add_task(
                body=gqueue.save_body(json.dumps(links_list), Q_KEY_TYPE),
                uri=flask.url_for('web_mon.handling_sources_bunch'))
        links_list = parts[cur_part:]
        if links_list:
            gqueue.add_task(
                body=gqueue.save_body(json.dumps(links_list), Q_KEY_TYPE),
                uri=flask.url_for('web_mon.handling_sources_bunch'))
    else:
        time_shift = 0
        for src in parts:
            gqueue.add_task(body=gqueue.save_body(json.dumps(src), Q_KEY_TYPE),
                            uri=flask.url_for('web_mon.run_data_part'),
                            in_seconds=int(time_shift / 5) * 2)
            time_shift += 1

        log_line("handling_sources_bunch: time shift {}".format(time_shift))

    return "Ok"
Exemplo n.º 6
0
def page_desc(data_part, page):
    src = list(data_part.get("sources"))
    src_type = src[0].get("type")
    body = None
    links = None
    errs = None

    log_line("page_desc: Type is {}".format(src_type))

    if src_type == 'jsp':
        body, links, errs = plain_json(page, src[0])
    elif src_type == 'xsearch_xp':
        import html as py_html
        py_html.entities.html5["nbsp"] = ' '
        py_html.entities.html5["nbsp;"] = ' '
        body, links, errs = xsearch(py_html.unescape(page), src[0])

    if body:
        save_mon_result(body, data_part.get("send_to"))

    if errs:
        for e in errs:
            log_line("page_desc error - {}".format(e))
        return "has " + str(len(errs)) + " errors"

    log_line("page_desc: body - {}, links - {}, err - {}".format(
        body if body is None else len(body),
        links if links is None else len(links),
        errs if errs is None else len(errs)))

    result = {}

    import gqueue

    has_last = data_part.get("last")
    if len(src) > 1:
        result["headers"] = data_part["headers"].copy()
        result["send_to"] = data_part["send_to"]

        if links:
            first_source = src[1].get("source")
            has_source = first_source and not first_source.startswith('#')
            if has_last:
                first_lnk = -1
                last_lnk = -1
            else:
                first_lnk = None
                last_lnk = len(links)

            links_list = []
            import copy
            for lnk in links[:first_lnk]:
                if not has_source:
                    src[1]["source"] = str(lnk)
                result["sources"] = list(src[1:])
                # gqueue.add_task(body=gqueue.save_body(json.dumps(result), Q_KEY_TYPE),
                #                 uri=flask.url_for('web_mon.run_data_part'))
                links_list.append(copy.deepcopy(result))

            for lnk in links[last_lnk:None]:
                if not has_source:
                    src[1]["source"] = str(lnk)
                result["sources"] = list(src[1:])
                result["last"] = True
                # gqueue.add_task(body=gqueue.save_body(json.dumps(result), Q_KEY_TYPE),
                #                 uri=flask.url_for('web_mon.run_data_part'))
                links_list.append(copy.deepcopy(result))

            if len(links_list) == 1:
                gqueue.add_task(body=gqueue.save_body(
                    json.dumps(links_list[0]), Q_KEY_TYPE),
                                uri=flask.url_for('web_mon.run_data_part'))
            else:
                gqueue.add_task(
                    body=gqueue.save_body(json.dumps(links_list), Q_KEY_TYPE),
                    uri=flask.url_for('web_mon.handling_sources_bunch'))
        else:
            result["sources"] = src[1:]
            if has_last:
                result["last"] = True
            gqueue.add_task(body=gqueue.save_body(json.dumps(result),
                                                  Q_KEY_TYPE),
                            uri=flask.url_for('web_mon.run_data_part'))
    elif has_last:
        result["go"] = True
        gqueue.add_task(body=json.dumps(result),
                        uri=flask.url_for('web_mon.send_results'),
                        in_seconds=60)

    return str(len(result))
Exemplo n.º 7
0
def run_web_mon():

    res = gqueue.add_task(body=None, uri=flask.url_for('web_mon.run_mon'))

    return res.name
Exemplo n.º 8
0
def bee_serv_check():
    
    res = gqueue.add_task(body=None, uri=flask.url_for('gqueue.view_bee_service'))
    
    return res.name