class Convert: re_match_result_table = re.compile('^[puo]([0-9]{8})') re_match_record_table = re.compile('^[afjtz]([0-9]{8})') select_sql = 'select B.record_id, A.user_crc, A.group_crc, A.record_time, B.result ' \ 'from {} as A join {} as B on A.record_id = B.record_id' insert_sql = 'insert into dataset.r{}(record_id, user_crc, group_crc, ' \ 'record_time, result) select B.record_id, A.user_crc, A.group_crc, ' \ 'A.record_time, B.result from webinfor.{} as A join webinfor.{} ' \ 'as B on A.record_id = B.record_id' create_sql = 'use dataset;create table r{}(record_id int(10), user_crc int(10), ' \ 'group_crc int(10), record_time time, result text)' def __init__(self, function): self.table = {} self.pool = Pool(4) self.function = function def start_mysql_to_mongodb(self): self.__get_table_list() self.__dispatch() self.pool.start() def __get_table_list(self): self.table['record'] = [] self.table['result'] = [] manager = MySQLManager(setting.configure) data_set = manager.query('show tables;') for table in data_set: if Convert.date_match(setting.date['start_time'], setting.date['end_time'], table[0]): if Convert.re_match_record_table.match(table[0]): self.table['record'].append(table[0]) if Convert.re_match_result_table.match(table[0]): self.table['result'].append(table[0]) def __dispatch(self): for key1 in self.table['record']: for key2 in self.table['result']: table_name = Convert.compare_date(key1, key2) if table_name is not None: self.pool.add(configure=setting.configure, sql=Convert.select_sql.format(key1, key2), func=self.function, kwargs={'table_name': table_name}) @staticmethod def compare_date(date_record, date_result): tmp_result_date = Convert.re_match_result_table.match(date_result) tmp_record_date = Convert.re_match_record_table.match(date_record) if tmp_record_date is None or tmp_result_date is None: return None if tmp_record_date.group(1) == tmp_result_date.group(1): return tmp_record_date.group(1) else: return None @staticmethod def date_match(start_date, end_date, current_date): re_date = re.compile(r'.*?([0-9]{4})([0-9]{2})([0-9]{2})') current_date_group = re_date.match(current_date) if current_date_group is None: return False start_date_group = re_date.match(start_date) end_date_group = re_date.match(end_date) start_date = datetime(int(start_date_group.group(1)), int(start_date_group.group(2)), int(start_date_group.group(3))) end_date = datetime(int(end_date_group.group(1)), int(end_date_group.group(2)), int(end_date_group.group(3))) current_date = datetime(int(current_date_group.group(1)), int(current_date_group.group(2)), int(current_date_group.group(3))) return start_date <= current_date <= end_date
def __init__(self, function): self.table = {} self.pool = Pool(4) self.function = function