Пример #1
0
 def run(self):
     self.execute_query()
     try:
         self.DB_CONNECT.commit()
         return self.DB_CURSOR.rowcount
     except Exception, e:
         ES_LOGGER.debug("CURD 错误:%s" % e)
Пример #2
0
def emergency_process(w_queue, w_lock, dbs, db_name, data_lines_number):
    ES_LOGGER.info("Emergency process index Host: %s DB: %s Start" % (dbs['db_host'], db_name))
    db_connect = init_database(dbs['db_host'], dbs['db_user'], dbs['db_pass'], db_name, dbs['db_port'],
                               dbs['db_charset'])

    emergency_sql = "%s limit %s,18446744073709551615" % (dbs['sql'], data_lines_number)
    ES_LOGGER.info("Host: %s DB: %s Start!SQL: %s" % (dbs['db_host'], db_name, emergency_sql))
    read_line_number = SQL_NUMBER
    for result_lines in db_connect.query(emergency_sql, []).stream_result(line=read_line_number):
        if result_lines == 'Exception':
            db_connect.release()
            emergency_process(w_queue, w_lock, dbs, db_name, data_lines_number)
            break
        if len(result_lines) == 0:
            break
        if w_queue.qsize() + read_line_number <= QUEUE_LENGTH:
            w_lock.acquire()
            for result in result_lines:
                bulk_list = dict(zip(dbs['doc_field'], list(result)))
                w_queue.put(bulk_list)
            data_lines_number += len(result_lines)
            ES_LOGGER.info("Index Host: %s DB: %s Data: %s" % (dbs['db_host'], db_name, data_lines_number),)
            w_lock.release()
        else:
            while w_queue.qsize() + read_line_number > QUEUE_LENGTH:
                ES_LOGGER.info("Queue is full. Sleep 5")
                sleep(5)
            else:
                w_lock.acquire()
                for result in result_lines:
                    bulk_list = dict(zip(dbs['doc_field'], list(result)))
                    w_queue.put(bulk_list)
                data_lines_number += len(result_lines)
                ES_LOGGER.info("Index Host: %s DB: %s Data: %s" % (dbs['db_host'], db_name, data_lines_number),)
                w_lock.release()
Пример #3
0
 def execute_query(self):
     if self.DB_QUERY is None:
         ES_LOGGER.warning("数据库语句空缺")
     try:
         self.DB_CURSOR.execute(self.DB_QUERY, self.DB_ARGS)
     except Exception, e:
         self.DB_CONNECT.rollback()
         ES_LOGGER.debug("数据库执行错误:%s " % e)
Пример #4
0
 def stream_data(self, line):
     try:
         while True:
             rows = self.DB_CURSOR.fetchmany(size=line)
             if not rows:
                #self.release()
                 yield []
             else:
                 yield rows
     except Exception, e:
         ES_LOGGER.debug("数据读取错误,正在重新连接:%s" % e)
         yield 'Exception'
Пример #5
0
 def __init__(self, db_host, db_user, db_pass, db_name, db_port, charset):
     self.DB_QUERY = None
     self.DB_CONNECT = None
     self.DB_CURSOR = None
     self.DB_ARGS = None
     try:
         self.DB_CONNECT = MySQLdb.connect(db_host, db_user, db_pass, db_name, db_port, charset=charset,
                                           cursorclass=MySQLdb.cursors.SSCursor, connect_timeout=36000000)
         self.DB_CURSOR = self.DB_CONNECT.cursor()
         # MySQLdb.cursors.DictCursor 使用MySQLdb.cursors.SSCursor 无法直接返回字典序
     except Exception, e:
         ES_LOGGER.debug("数据库连接错误:%s " % e)
Пример #6
0
def main():

    try:
        for dbs in DATABASES:
            if isinstance(dbs['db_name'], list):
                for db_name in dbs['db_name']:
                    process_pool = ProcessPool(2)
                    managers = Manager()
                    w_queue = managers.Queue(QUEUE_LENGTH)
                    w_lock = managers.Lock()
                    process_pool.apply_async(write_database, args=(w_queue, w_lock, dbs, db_name))
                    process_pool.apply_async(bulk_elasticsearch, args=(w_queue, w_lock, dbs, db_name))
                    process_pool.close()
                    process_pool.join()
    except Exception, e:
        ES_LOGGER.error("BIG ERROR! %s",e)
Пример #7
0
def main():

    try:
        for dbs in DATABASES:
            if isinstance(dbs["db_name"], list):
                for db_name in dbs["db_name"]:
                    process_pool = ProcessPool(2)
                    managers = Manager()
                    w_queue = managers.Queue(QUEUE_LENGTH)
                    w_lock = managers.Lock()
                    process_pool.apply_async(write_database, args=(w_queue, w_lock, dbs, db_name))
                    process_pool.apply_async(bulk_elasticsearch, args=(w_queue, w_lock, dbs, db_name))
                    process_pool.close()
                    process_pool.join()
    except Exception, e:
        ES_LOGGER.error("BIG ERROR! %s", e)
Пример #8
0
def bulk_elasticsearch(r_queue, w_lock, dbs, db_name):
    ES_LOGGER.info("Bulk Host: %s DB: %s Start" % (dbs['db_host'], db_name))
    es = Elasticsearch(dbs['es_colony'], retry_on_timeout=True, max_retries=3, timeout=3600)
    flag = True
    bulks = []
    data_lines_number = 0
    bulk_length = 0
    while flag:
        while not r_queue.empty():
            if bulk_length == 0:
                w_lock.acquire()
            data = r_queue.get()
            data_lines_number += 1
            bulk_length += 1
            if bulk_length >= BULK_LENGTH or r_queue.empty():
                w_lock.release()
            if isinstance(data, str) and data == 'False':
                try:
                    ES_LOGGER.info("Bulk Host: %s DB: %s Data: %s" % (dbs['db_host'], db_name, bulk_length))
                    streaming_bulks = helpers.streaming_bulk(es, bulks, chunk_size=len(bulks))
                    for streaming_bulk in streaming_bulks:
                        if streaming_bulk[0]:
                            pass
                    bulks = []
                except Exception, e:
                    ES_LOGGER.warning(e)
                flag = False
                break
            bulks.append({
                "_index": dbs['index'],
                "_type": dbs['doc_type'],
                "_source": data
            })
            if bulk_length >= BULK_LENGTH:
                try:
                    ES_LOGGER.info("Bulk Host: %s DB: %s Data: %s" % (dbs['db_host'], db_name, data_lines_number),)
                    streaming_bulks = helpers.streaming_bulk(es, bulks, chunk_size=len(bulks))
                    for streaming_bulk in streaming_bulks:
                        if streaming_bulk[0]:
                            pass
                    bulks = []
                    bulk_length = 0
                except Exception, e:
                    ES_LOGGER.warning("Bulk Error! %s", e)
Пример #9
0
def bulk_elasticsearch(r_queue, w_lock, dbs, db_name):
    ES_LOGGER.info("Bulk Host: %s DB: %s Start" % (dbs["db_host"], db_name))
    es = Elasticsearch(dbs["es_colony"], retry_on_timeout=True, max_retries=3, timeout=3600)
    flag = True
    bulks = []
    data_lines_number = 0
    bulk_length = 0
    while flag:
        while not r_queue.empty():
            if bulk_length == 0:
                w_lock.acquire()
            data = r_queue.get()
            data_lines_number += 1
            bulk_length += 1
            if bulk_length >= BULK_LENGTH or r_queue.empty():
                w_lock.release()
            if isinstance(data, str) and data == "False":
                try:
                    ES_LOGGER.info("Bulk Host: %s DB: %s Data: %s" % (dbs["db_host"], db_name, bulk_length))
                    streaming_bulks = helpers.streaming_bulk(es, bulks, chunk_size=len(bulks))
                    for streaming_bulk in streaming_bulks:
                        if streaming_bulk[0]:
                            pass
                    bulks = []
                except Exception, e:
                    ES_LOGGER.warning(e)
                flag = False
                break
            bulks.append({"_index": dbs["index"], "_type": dbs["doc_type"], "_source": data})
            if bulk_length >= BULK_LENGTH:
                try:
                    ES_LOGGER.info("Bulk Host: %s DB: %s Data: %s" % (dbs["db_host"], db_name, data_lines_number))
                    streaming_bulks = helpers.streaming_bulk(es, bulks, chunk_size=len(bulks))
                    for streaming_bulk in streaming_bulks:
                        if streaming_bulk[0]:
                            pass
                    bulks = []
                    bulk_length = 0
                except Exception, e:
                    ES_LOGGER.warning("Bulk Error! %s", e)
Пример #10
0
def emergency_process(w_queue, w_lock, dbs, db_name, data_lines_number):
    ES_LOGGER.info("Emergency process index Host: %s DB: %s Start" % (dbs["db_host"], db_name))
    db_connect = init_database(
        dbs["db_host"], dbs["db_user"], dbs["db_pass"], db_name, dbs["db_port"], dbs["db_charset"]
    )

    emergency_sql = "%s limit %s,18446744073709551615" % (dbs["sql"], data_lines_number)
    ES_LOGGER.info("Host: %s DB: %s Start!SQL: %s" % (dbs["db_host"], db_name, emergency_sql))
    read_line_number = SQL_NUMBER
    for result_lines in db_connect.query(emergency_sql, []).stream_result(line=read_line_number):
        if result_lines == "Exception":
            db_connect.release()
            emergency_process(w_queue, w_lock, dbs, db_name, data_lines_number)
            break
        if len(result_lines) == 0:
            break
        if w_queue.qsize() + read_line_number <= QUEUE_LENGTH:
            w_lock.acquire()
            for result in result_lines:
                bulk_list = dict(zip(dbs["doc_field"], list(result)))
                w_queue.put(bulk_list)
            data_lines_number += len(result_lines)
            ES_LOGGER.info("Index Host: %s DB: %s Data: %s" % (dbs["db_host"], db_name, data_lines_number))
            w_lock.release()
        else:
            while w_queue.qsize() + read_line_number > QUEUE_LENGTH:
                ES_LOGGER.info("Queue is full. Sleep 5")
                sleep(5)
            else:
                w_lock.acquire()
                for result in result_lines:
                    bulk_list = dict(zip(dbs["doc_field"], list(result)))
                    w_queue.put(bulk_list)
                data_lines_number += len(result_lines)
                ES_LOGGER.info("Index Host: %s DB: %s Data: %s" % (dbs["db_host"], db_name, data_lines_number))
                w_lock.release()
Пример #11
0
def init_database(db_host, db_user, db_pass, db_name, db_port, db_charset):
    ES_LOGGER.info("Connect to %s db: %s" % (db_host, db_name))
    return MysqlDrive(db_host, db_user, db_pass, db_name, db_port, db_charset)
Пример #12
0
                "_type": dbs['doc_type'],
                "_source": data
            })
            if bulk_length >= BULK_LENGTH:
                try:
                    ES_LOGGER.info("Bulk Host: %s DB: %s Data: %s" % (dbs['db_host'], db_name, data_lines_number),)
                    streaming_bulks = helpers.streaming_bulk(es, bulks, chunk_size=len(bulks))
                    for streaming_bulk in streaming_bulks:
                        if streaming_bulk[0]:
                            pass
                    bulks = []
                    bulk_length = 0
                except Exception, e:
                    ES_LOGGER.warning("Bulk Error! %s", e)

        ES_LOGGER.info("Queue is empty. Sleep 10")
        sleep(10)
    ES_LOGGER.info("Bulk Host: %s DB: %s Finish! Data: %s" % (dbs['db_host'], db_name, data_lines_number))


def write_database(w_queue, w_lock, dbs, db_name):
    ES_LOGGER.info("Index Host: %s DB: %s Start" % (dbs['db_host'], db_name))
    db_connect = init_database(dbs['db_host'], dbs['db_user'], dbs['db_pass'], db_name, dbs['db_port'],
                               dbs['db_charset'])
    ES_LOGGER.info("Host: %s DB: %s Start!SQL: %s" % (dbs['db_host'], db_name, dbs['sql']))
    data_lines_number = 0
    read_line_number = SQL_NUMBER
    for result_lines in db_connect.query(dbs['sql'], []).stream_result(line=read_line_number):
        if result_lines == 'Exception':
            db_connect.release()
            emergency_process(w_queue, w_lock, dbs, db_name, data_lines_number)
Пример #13
0
def init_database(db_host, db_user, db_pass, db_name, db_port, db_charset):
    ES_LOGGER.info("Connect to %s db: %s" % (db_host, db_name))
    return MysqlDrive(db_host, db_user, db_pass, db_name, db_port, db_charset)
Пример #14
0
                flag = False
                break
            bulks.append({"_index": dbs["index"], "_type": dbs["doc_type"], "_source": data})
            if bulk_length >= BULK_LENGTH:
                try:
                    ES_LOGGER.info("Bulk Host: %s DB: %s Data: %s" % (dbs["db_host"], db_name, data_lines_number))
                    streaming_bulks = helpers.streaming_bulk(es, bulks, chunk_size=len(bulks))
                    for streaming_bulk in streaming_bulks:
                        if streaming_bulk[0]:
                            pass
                    bulks = []
                    bulk_length = 0
                except Exception, e:
                    ES_LOGGER.warning("Bulk Error! %s", e)

        ES_LOGGER.info("Queue is empty. Sleep 10")
        sleep(10)
    ES_LOGGER.info("Bulk Host: %s DB: %s Finish! Data: %s" % (dbs["db_host"], db_name, data_lines_number))


def write_database(w_queue, w_lock, dbs, db_name):
    ES_LOGGER.info("Index Host: %s DB: %s Start" % (dbs["db_host"], db_name))
    db_connect = init_database(
        dbs["db_host"], dbs["db_user"], dbs["db_pass"], db_name, dbs["db_port"], dbs["db_charset"]
    )
    ES_LOGGER.info("Host: %s DB: %s Start!SQL: %s" % (dbs["db_host"], db_name, dbs["sql"]))
    data_lines_number = 0
    read_line_number = SQL_NUMBER
    for result_lines in db_connect.query(dbs["sql"], []).stream_result(line=read_line_number):
        if result_lines == "Exception":
            db_connect.release()