def user_confirm(self):
        info = """
将生成在服务器{mysql_host}上归档数据
迁移数据条件为:
INSERT INTO `{target_database_name}`.`{target_table_name}`
SELECT * FROM `{source_database_name}`.`{source_table_name}`
WHERE {data_condition}
"""
        info = info.format(mysql_host=self.source_mysql_server.mysql_host,
                           source_database_name=self.source_database_name,
                           source_table_name=self.source_table_name,
                           target_database_name=self.target_database_name,
                           target_table_name=self.target_table_name,
                           data_condition=self.data_condition)
        PrintHelper.print_info_message(info)
        if self.is_dry_run:
            PrintHelper.print_info_message("模拟运行。。。")
            return True
        if not self.is_user_confirm:
            return True
        input_options = ['yes', 'no']
        input_message = """
请输入yes继续或输入no退出,yes/no?
"""
        user_option = self.get_user_choose_option(input_options=input_options,
                                                  input_message=input_message)
        if user_option == "no":
            return False
        else:
            return True
 def create_target_table(self):
     if not self.is_target_table_exist():
         PrintHelper.print_info_message("准备创建目标表")
         table_script = self.get_target_table_create_script()
         self.target_mysql_server.mysql_exec(sql_script=table_script)
     else:
         PrintHelper.print_info_message("目标表已存在")
 def mysql_query(self, sql_script, sql_paras=None, return_dict=False, max_execute_seconds=0):
     """
     执行查询并返回数据
     :param sql_script:
     :param sql_paras:
     :param return_dict:
     :param max_execute_seconds:
     :return:
     """
     conn = None
     cursor = None
     try:
         conn = self.get_connection(return_dict=return_dict)
         cursor = conn.cursor()
         self.set_max_execute_seconds(cursor=cursor, max_execute_seconds=max_execute_seconds)
         if sql_paras is not None:
             cursor.execute(sql_script, sql_paras)
         else:
             cursor.execute(sql_script)
         exec_result = cursor.fetchall()
         conn.commit()
         return exec_result
     except Exception as ex:
         conn.rollback()
         warning_message = "执行出现异常:{}\n堆栈:{}\n语句:{}\n参数:{}".format(
             str(ex), traceback.format_exc(),
             sql_script, sql_paras
         )
         PrintHelper.print_warning_message(warning_message)
         raise Exception(str(ex))
     finally:
         if cursor is not None:
             cursor.close()
         if conn is not None:
             conn.close()
 def mysql_exec(self, sql_script, sql_paras=None):
     """
     执行传入的脚本,返回影响行数
     :param sql_script:
     :param sql_param:
     :return: 脚本最后一条语句执行影响行数
     """
     cursor = None
     conn = None
     try:
         conn = self.get_connection()
         cursor = conn.cursor()
         if sql_paras is not None:
             cursor.execute(sql_script, sql_paras)
         else:
             cursor.execute(sql_script)
         affect_rows = cursor.rowcount
         conn.commit()
         return affect_rows
     except Exception as ex:
         conn.rollback()
         warning_message = "执行出现异常:{}\n堆栈:{}\n语句:{}\n参数:{}".format(
             str(ex), traceback.format_exc(),
             sql_script, sql_paras
         )
         PrintHelper.print_warning_message(warning_message)
         raise Exception(str(ex))
     finally:
         if cursor is not None:
             cursor.close()
         if conn is not None:
             conn.close()
    def delete_data_by_scripts(self, transfer_scripts):
        try:
            sql_script_list = []
            delete_script = transfer_scripts["delete_script"]
            temp_script = delete_script, None
            sql_script_list.append(temp_script)
            sql_script = delete_script
            tmp_script = """
USE {0};
""".format(self.source_database_name) + sql_script + """
COMMIT;
SELECT SLEEP('{0}');
##=====================================================##
""".format(self.batch_sleep_seconds)
            PrintHelper.write_file(file_path=self.transfer_script_file,
                                   message=tmp_script)
            if not self.is_dry_run:
                total_affect_rows = self.source_mysql_server.mysql_exec_many(
                    sql_script_list)
                PrintHelper.print_info_message(
                    "本次归档操作记录{}条".format(total_affect_rows))
                self.sleep_with_affect_rows(total_affect_rows)
            else:
                PrintHelper.print_info_message("生成迁移脚本(未执行)")
                PrintHelper.print_info_message(sql_script)
            return True
        except Exception as ex:
            PrintHelper.print_warning_message("在归档过程中出现异常:{}\n堆栈:{}\n".format(
                str(ex), traceback.format_exc()))
            return False
示例#6
0
 def create_target_table(self):
     sql_script = "CREATE TABLE IF NOT EXISTS `{target_database_name}`.`{target_table_name}` LIKE `{source_database_name}`.`{source_table_name}`;".format(
         target_database_name=self.target_database_name,
         source_database_name=self.source_database_name,
         target_table_name=self.target_table_name,
         source_table_name=self.source_table_name)
     PrintHelper.print_info_message("建表脚本为:\n {0}".format(sql_script))
     self.source_mysql_server.mysql_exec(sql_script=sql_script)
示例#7
0
 def get_user_choose_option(self, input_options, input_message):
     while True:
         PrintHelper.print_info_message(input_message)
         str_input = input("")
         for input_option in input_options:
             if str_input.strip() == input_option:
                 choose_option = input_option
                 return choose_option
 def mysql_exec_many(self, script_list):
     """
     将多个SQL放到一个事务中执行
     :param script_list:
     :return:
     """
     cursor = None
     conn = None
     total_affect_rows = 0
     try:
         conn = self.get_connection()
         cursor = conn.cursor()
         for sql_script, sql_param in script_list:
             if sql_param is not None:
                 cursor.execute(sql_script, sql_param)
             else:
                 cursor.execute(sql_script)
             affect_rows = cursor.rowcount
             if affect_rows is not None:
                 total_affect_rows += affect_rows
         conn.commit()
         return total_affect_rows
     except Exception as ex:
         conn.rollback()
         warning_message = "执行出现异常:{}\n堆栈:{}\n参数:{}".format(
             str(ex),
             traceback.format_exc(),
             script_list
         )
         PrintHelper.print_warning_message(warning_message)
         raise Exception(str(ex))
     finally:
         if cursor is not None:
             cursor.close()
         if conn is not None:
             conn.close()
 def archive_data(self):
     PrintHelper.print_info_message("开始检查归档配置")
     if not self.check_config():
         return
     confirm_result = self.user_confirm()
     if not confirm_result:
         return
     PrintHelper.print_info_message("开始归档数据")
     self.loop_archive_data()
     PrintHelper.print_info_message("结束归档数据")
示例#10
0
def main():
    print artm.version()
    config = ConfigPaths('config.cfg')
    plot_maker = PlotMaker()
    printer = PrintHelper()
    print config.models_file_name

    batch_vectorizer = artm.BatchVectorizer(
        data_path=config.output_batches_path, data_format='batches')
    dictionary = artm.Dictionary()
    dictionary.load(dictionary_path=config.dictionary_path + '.dict')

    models_file = open(config.models_file_name, 'a')
    # model = process_one_model(config, batch_vectorizer, models_file, printer, plot_maker,
    #                           dictionary, _n_topics=50, _n_doc_passes=5, _seed_value=100, _n_top_tokens=10, _p_mass_threshold=0.25,
    #               _n_iterations=20, _model_name='model1')

    exp = Experiment(
        Pool(topics_filter=OptimizationTopicsFilter(eps=10**(-2.5),
                                                    verbose=False),
             save_topics=True))
    for i in xrange(3):
        model_artm = process_one_model(config,
                                       batch_vectorizer,
                                       models_file,
                                       printer,
                                       plot_maker,
                                       dictionary,
                                       _n_topics=50,
                                       _n_doc_passes=5,
                                       _seed_value=100,
                                       _n_top_tokens=10,
                                       _p_mass_threshold=0.25,
                                       _n_iterations=20,
                                       _model_name='model_{}'.format(i))
        #display_points(model_artm.get_phi())
        exp.collect_topics(model_artm.get_phi(), model_artm.get_theta())
        vals, bins = exp.topics_pool.topics_filter.plot_hist()
        save_hist(vals, bins, "data_iter_{}.csv".format(i))
        print exp.topics_pool.get_basic_topics_count()
    #
    models_file.close()
 def check_config(self):
     try:
         if str(self.source_database_name).strip() == "":
             PrintHelper.print_warning_message("数据库名不能为空")
             return False
         if str(self.source_table_name).strip() == "":
             PrintHelper.print_warning_message("表名不能为空")
             return False
         if str(self.data_condition).strip() == "":
             PrintHelper.print_warning_message("迁移条件不能为空")
             return False
         self.create_target_table()
         source_columns = self.get_source_columns()
         target_columns = self.get_target_columns()
         if len(source_columns) != len(target_columns):
             PrintHelper.print_warning_message("源表和目标表的列数量不匹配,不满足迁移条件")
             return False
         column_count = len(source_columns)
         primary_key_count = 0
         for column_id in range(column_count):
             source_column_name = source_columns[column_id]["Field"]
             source_column_key = source_columns[column_id]["Key"]
             source_column_type = source_columns[column_id]["Type"]
             target_column_name = target_columns[column_id]["Field"]
             target_column_key = target_columns[column_id]["Key"]
             target_column_type = source_columns[column_id]["Type"]
             if source_column_name.lower() != target_column_name.lower() \
                     or target_column_key.lower() != target_column_key.lower() \
                     or source_column_type.lower() != target_column_type.lower():
                 PrintHelper.print_warning_message("源表和目标表的列不匹配,不满足迁移条件")
                 return False
             if source_column_key.lower() == 'pri':
                 primary_key_count += 1
                 self.table_primary_key = source_column_name
                 if 'int(' in str(source_column_type).lower() \
                         or 'bigint(' in str(source_column_type).lower():
                     self.table_primary_key_type = 'INT'
                 elif 'varchar(' in str(source_column_type).lower():
                     self.table_primary_key_type = 'CHAR'
         if self.table_primary_key_type == "":
             PrintHelper.print_warning_message(
                 "主键不为int/bigint/varchar三种类型中的一种,不满足迁移条件")
             return False
         if primary_key_count == 0:
             PrintHelper.print_warning_message("未找到主键,不瞒足迁移条件")
             return False
         if primary_key_count > 1:
             PrintHelper.print_warning_message("要迁移的表使用复合主键,不满足迁移条件")
             return False
         return True
     except Exception as ex:
         PrintHelper.print_warning_message("执行出现异常,异常为{0},{1}".format(
             str(ex), traceback.format_exc()))
         return False
 def loop_archive_data(self):
     max_key, min_key = self.get_loop_key_range()
     if max_key is None or min_key is None:
         PrintHelper.print_info_message("未找到满足条件的键区间")
         return
     current_key = min_key
     while current_key < max_key:
         if self.has_stop_file():
             break
         PrintHelper.print_info_message("*" * 70)
         next_key = self.get_next_loop_key(current_key=current_key)
         if next_key is None:
             PrintHelper.print_info_message("未找到下一个可归档的区间,退出归档")
             break
         if not self.archive_range_data(current_key=current_key,
                                        next_key=next_key):
             PrintHelper.print_info_message("执行出现异常,退出归档!")
             break
         current_key = next_key
         PrintHelper.print_info_message("*" * 70)
         info = """最小值为:{0},最大值为:{1},当前处理值为:{2}""".format(
             min_key, max_key, current_key)
         PrintHelper.print_info_message(info)
     PrintHelper.print_info_message("*" * 70)
     PrintHelper.print_info_message("执行完成")
 def archive_range_data(self, current_key, next_key):
     try:
         if self.is_dry_run:
             PrintHelper.print_info_message("未真实执行,未生产SQL文件。")
             return True
         range_keys = self.get_archive_range_keys(current_key, next_key)
         range_data = self.get_data_by_keys(range_keys=range_keys)
         PrintHelper.print_info_message("找到满足条件记录{}条".format(
             len(range_data)))
         PrintHelper.print_info_message("开始删除目标库已有数据")
         self.delete_target_data_by_keys(range_keys=range_keys)
         PrintHelper.print_info_message("开始向目标库上插入数据")
         self.insert_target_range_data(range_data=range_data)
         PrintHelper.print_info_message("开始删除目标库归档数据")
         self.delete_source_data_by_keys(range_keys=range_keys)
         return True
     except Exception as ex:
         PrintHelper.print_warning_message("在归档过程中出现异常:{}\n堆栈:{}\n".format(
             str(ex), traceback.format_exc()))
         return False
示例#14
0
 def has_stop_file(self):
     if os.path.exists(self.stop_script_file):
         PrintHelper.print_info_message("检查到停止文件,准备退出!")
         return True
     else:
         return False
 def check_config(self):
     try:
         if str(self.source_database_name).strip() == "":
             PrintHelper.print_warning_message("数据库名不能为空")
             return False
         if str(self.source_table_name).strip() == "":
             PrintHelper.print_warning_message("表名不能为空")
             return False
         if str(self.data_condition).strip() == "":
             PrintHelper.print_warning_message("迁移条件不能为空")
             return False
         source_columns = self.get_source_columns()
         primary_key_count = 0
         for column_item in source_columns:
             source_column_name = column_item["Field"]
             source_column_key = column_item["Key"]
             source_column_type = column_item["Type"]
             if str(source_column_key).lower() == 'pri':
                 primary_key_count += 1
                 self.table_primary_key = source_column_name
                 if 'int(' in str(source_column_type).lower() \
                         or 'bigint(' in str(source_column_type).lower():
                     self.table_primary_key_type = 'INT'
                 elif 'varchar(' in str(source_column_type).lower():
                     self.table_primary_key_type = 'CHAR'
         if self.table_primary_key_type == "":
             PrintHelper.print_warning_message(
                 "主键不为int/bigint/varchar三种类型中的一种,不满足迁移条件")
             return False
         if primary_key_count == 0:
             PrintHelper.print_warning_message("未找到主键,不瞒足迁移条件")
             return False
         if primary_key_count > 1:
             PrintHelper.print_warning_message("要迁移的表使用复合主键,不满足迁移条件")
             return False
         return True
     except Exception as ex:
         PrintHelper.print_warning_message("执行出现异常,异常:{0}\n堆栈:{1}\n".format(
             str(ex), traceback.format_exc()))
         return False