Пример #1
0
def reindex_uptime():
    try:
        Datastores.es.indices.delete("rcae_uptime_ce_*")
    except elasticsearch.exceptions.NotFoundError as e:
        log.error(e)

    log.info("reindexing uptime")
    i = 0
    task_start = datetime.datetime.now()
    uptime_tables = detect_tables("ae_uptime_ce_metrics_p_")
    for partition_table in uptime_tables:
        conn = DBSession.connection().execution_options(stream_results=True)
        result = conn.execute(partition_table.select())
        while True:
            chunk = result.fetchmany(2000)
            if not chunk:
                break
            es_docs = defaultdict(list)
            for row in chunk:
                i += 1
                item = UptimeMetric(**dict(list(row.items())))
                d_range = item.partition_id
                es_docs[d_range].append(item.es_doc())
            if es_docs:
                name = partition_table.name
                log.info("round  {}, {}".format(i, name))
                for k, v in es_docs.items():
                    to_update = {"_index": k, "_type": "log"}
                    [i.update(to_update) for i in v]
                    elasticsearch.helpers.bulk(Datastores.es, v)

    log.info("total docs {} {}".format(i,
                                       datetime.datetime.now() - task_start))
Пример #2
0
def reindex_slow_calls():
    try:
        Datastores.es.delete_index('rcae_sc*')
    except Exception as e:
        print(e)

    log.info('reindexing slow calls')
    i = 0
    task_start = datetime.datetime.now()
    slow_calls_tables = detect_tables('slow_calls_p_')
    for partition_table in slow_calls_tables:
        conn = DBSession.connection().execution_options(stream_results=True)
        result = conn.execute(partition_table.select())
        while True:
            chunk = result.fetchmany(2000)
            if not chunk:
                break
            es_docs = defaultdict(list)
            for row in chunk:
                i += 1
                item = SlowCall(**dict(list(row.items())))
                d_range = item.partition_id
                es_docs[d_range].append(item.es_doc())
            if es_docs:
                name = partition_table.name
                log.info('round  {}, {}'.format(i, name))
                for k, v in es_docs.items():
                    Datastores.es.bulk_index(k, 'log', v)

    log.info('total docs {} {}'.format(i,
                                       datetime.datetime.now() - task_start))
Пример #3
0
def partitions_remove(request):
    permanent_partitions, daily_partitions = get_partition_stats()
    pg_partitions = []
    es_partitions = []
    for item in list(permanent_partitions.values()) + list(
            daily_partitions.values()):
        es_partitions.extend(item["elasticsearch"])
        pg_partitions.extend(item["pg"])
    FormCls = get_partition_deletion_form(es_partitions, pg_partitions)
    form = FormCls(
        es_index=request.unsafe_json_body["es_indices"],
        pg_index=request.unsafe_json_body["pg_indices"],
        confirm=request.unsafe_json_body["confirm"],
        csrf_context=request,
    )
    if form.validate():
        for ix in form.data["es_index"]:
            log.warning("deleting ES partition: {}".format(ix))
            Datastores.es.indices.delete(ix)
        for ix in form.data["pg_index"]:
            log.warning("deleting PG partition: {}".format(ix))
            stmt = sa.text("DROP TABLE %s CASCADE" % sa.text(ix))
            session = DBSession()
            session.connection().execute(stmt)
            mark_changed(session)

    for field, error in form.errors.items():
        msg = "%s: %s" % (field, error[0])
        request.session.flash(msg, "error")

    permanent_partitions, daily_partitions = get_partition_stats()
    return {
        "permanent_partitions":
        sorted(list(permanent_partitions.items()),
               key=lambda x: x[0],
               reverse=True),
        "daily_partitions":
        sorted(list(daily_partitions.items()),
               key=lambda x: x[0],
               reverse=True),
    }
Пример #4
0
def reindex_reports():
    reports_groups_tables = detect_tables('reports_groups_p_')
    try:
        Datastores.es.delete_index('rcae_r*')
    except Exception as e:
        log.error(e)

    log.info('reindexing report groups')
    i = 0
    task_start = datetime.datetime.now()
    for partition_table in reports_groups_tables:
        conn = DBSession.connection().execution_options(stream_results=True)
        result = conn.execute(partition_table.select())
        while True:
            chunk = result.fetchmany(2000)
            if not chunk:
                break
            es_docs = defaultdict(list)
            for row in chunk:
                i += 1
                item = ReportGroup(**dict(list(row.items())))
                d_range = item.partition_id
                es_docs[d_range].append(item.es_doc())
            if es_docs:
                name = partition_table.name
                log.info('round {}, {}'.format(i, name))
                for k, v in es_docs.items():
                    Datastores.es.bulk_index(k,
                                             'report_group',
                                             v,
                                             id_field="_id")

    log.info('total docs {} {}'.format(i,
                                       datetime.datetime.now() - task_start))

    i = 0
    log.info('reindexing reports')
    task_start = datetime.datetime.now()
    reports_tables = detect_tables('reports_p_')
    for partition_table in reports_tables:
        conn = DBSession.connection().execution_options(stream_results=True)
        result = conn.execute(partition_table.select())
        while True:
            chunk = result.fetchmany(2000)
            if not chunk:
                break
            es_docs = defaultdict(list)
            for row in chunk:
                i += 1
                item = Report(**dict(list(row.items())))
                d_range = item.partition_id
                es_docs[d_range].append(item.es_doc())
            if es_docs:
                name = partition_table.name
                log.info('round {}, {}'.format(i, name))
                for k, v in es_docs.items():
                    Datastores.es.bulk_index(k,
                                             'report',
                                             v,
                                             id_field="_id",
                                             parent_field='_parent')

    log.info('total docs {} {}'.format(i,
                                       datetime.datetime.now() - task_start))

    log.info('reindexing reports stats')
    i = 0
    task_start = datetime.datetime.now()
    reports_stats_tables = detect_tables('reports_stats_p_')
    for partition_table in reports_stats_tables:
        conn = DBSession.connection().execution_options(stream_results=True)
        result = conn.execute(partition_table.select())
        while True:
            chunk = result.fetchmany(2000)
            if not chunk:
                break
            es_docs = defaultdict(list)
            for row in chunk:
                rd = dict(list(row.items()))
                # remove legacy columns
                # TODO: remove the column later
                rd.pop('size', None)
                item = ReportStat(**rd)
                i += 1
                d_range = item.partition_id
                es_docs[d_range].append(item.es_doc())
            if es_docs:
                name = partition_table.name
                log.info('round  {}, {}'.format(i, name))
                for k, v in es_docs.items():
                    Datastores.es.bulk_index(k, 'log', v)

    log.info('total docs {} {}'.format(i,
                                       datetime.datetime.now() - task_start))