Пример #1
0
def prepare_sal_release():
    
    try:
        os.mkdir(sal_rel_dir)
        print('SAL release directory {} created.'.format(sal_rel_dir))
    except FileExistsError:
        print('SAL Release directory {} already exists, recreated.'.format(sal_rel_dir))
        delete_files(sal_rel_dir)
        os.mkdir(sal_rel_dir)

    shutil.copytree(get_env_var(constants.sal_home_env_var_name)+'/include/',sal_rel_dir+'/include')
    shutil.copytree(get_env_var(constants.sal_home_env_var_name)+'/build',sal_rel_dir+'/build')
    shutil.copytree(get_env_var(constants.sal_home_env_var_name)+'/lib',sal_rel_dir+'/lib')
    shutil.copytree(get_env_var(constants.sal_home_env_var_name)+'/scripts',sal_rel_dir+'/scripts')
    shutil.copytree(get_env_var(constants.sal_home_env_var_name)+'/config',sal_rel_dir+'/config')
    shutil.copytree(get_env_var(constants.sal_home_env_var_name)+'/proto',sal_rel_dir+'/proto')
    os.mkdir(sal_rel_dir+'/test')
    shutil.copyfile(get_env_var(constants.sal_home_env_var_name)+'/test/sal_service_test_direct.py',sal_rel_dir+'/test/sal_service_test_direct.py')
    shutil.copyfile(get_env_var(constants.sal_home_env_var_name)+'/sal_services_direct_pb2.py',sal_rel_dir+'/sal_services_direct_pb2.py')
    shutil.copyfile(get_env_var(constants.sal_home_env_var_name)+'/sal_services_direct_pb2_grpc.py',sal_rel_dir+'/sal_services_direct_pb2_grpc.py')
    shutil.copyfile(get_env_var(
        constants.sal_home_env_var_name) + '/sal_services_pb2.py',
                    sal_rel_dir + '/sal_services_pb2.py')
    shutil.copyfile(get_env_var(
        constants.sal_home_env_var_name) + '/sal_services_pb2_grpc.py',
                    sal_rel_dir + '/sal_services_pb2_grpc.py')
    print('SAL release is available at {}'.format(sal_rel_dir))
Пример #2
0
def prepare_bsp_pkg():
    bsp_dev_abs = get_bsp_dev_abs_path()
    earliest_commit_hash = execute_cmd_n_get_output_2(
        'git --git-dir {0}/.git rev-list --max-parents=0 HEAD'.format(
            bsp_dev_abs))
    latest_commit_hash = execute_cmd_n_get_output_2(
        'git --git-dir {0}/.git rev-parse HEAD'.format(bsp_dev_abs))
    os.chdir(bsp_dev_abs)
    execute_cmd_n_get_output_2(
        'git --git-dir {0}/.git diff {1} {2} -- \':!./platforms/apsn/\' \':!.idea/\' \':!.gitignore\' > {3}'
        .format(bsp_dev_abs, earliest_commit_hash, latest_commit_hash,
                bsp_dev_abs + '/' + diff_file))

    latest_commit_hash_short = execute_cmd_n_get_output_2(
        'git --git-dir {0}/.git rev-parse --short HEAD'.format(bsp_dev_abs))
    bsp_name = '/' + os.path.basename(bsp_dev_abs) \
               + '_' + latest_commit_hash_short
    bsp_rel_dir = release_dir + bsp_name

    try:
        os.mkdir(bsp_rel_dir)
        print('SAL release directory {} created.'.format(bsp_rel_dir))
    except FileExistsError:
        print('BSP Release directory {} already exists, recreated.'.format(
            bsp_rel_dir))
        delete_files(bsp_rel_dir)
        os.mkdir(bsp_rel_dir)

    shutil.move(bsp_dev_abs + '/' + diff_file, bsp_rel_dir + '/' + diff_file)
    shutil.copytree(bsp_dev_abs + '/platforms/apsn/', bsp_rel_dir + '/apsn')
    shutil.make_archive(bsp_rel_dir, 'zip', bsp_rel_dir)
Пример #3
0
def clean_sal():
    print('Cleaning SAL...')
    
    to_delete = [get_env_var(constants.sal_home_env_var_name)+f for f in ['/bin', '/build', '/logs/', '/CMakeCache.txt', '/Makefile',
                                                                          '/CMakeFiles', '/cmake-build-debug']]
    os.system(
        'make -C {} clean'.format(get_env_var(constants.sal_home_env_var_name)))
    for file in to_delete:
        print('Deteling {}'.format(file))
        delete_files(file)
Пример #4
0
def clean_bsp():
    print('Cleaning BSP...')
    to_delete = [
        get_aps_bsp_pkg_abs_path() + f for f in
        ['/CMakeCache.txt', '/Makefile', '/CMakeFiles', '/cmake-build-debug']
    ]
    execute_cmd('make -C {} clean'.format(get_aps_bsp_pkg_abs_path()))

    for file in to_delete:
        print('Deteling {}'.format(file))
        delete_files(file)
    return True
Пример #5
0
def test_delete_files():
    setup_test_files()

    # Set the readonly.txt file to read/write so it can be deleted
    readonly_file_name = f'{test_folder_path}/readonly.txt'
    if is_file(readonly_file_name):
        os.chmod(readonly_file_name, S_IWUSR | S_IREAD)

    # delete both the readonly.txt and the readwrite.txt files
    delete_files(f'{test_folder_path}/*.txt')

    # assert both files have been deleted
    assert is_file(f'{test_folder_path}/readwrite.txt') is False
    assert is_file(f'{test_folder_path}/readonly.txt') is False

    teardown_test_files()
Пример #6
0
def main():
    duration_list = ['hour', 'day']
    # parse args
    task_name_list = config.task_list.keys()
    start_time, stop_time, task_name, duration = common.parse_args(
        task_name_list, duration_list)
    task = config.task_list[task_name]

    # init log
    common.init_log(task['log']['filename'], task['log']['debug'])

    logging.info('======\n%s\n' %
                 datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z'))
    logging.info(" TASK [%s] START ..." % task_name)

    # init t_timedelta
    t_timedelta = 'hour'
    summary_suffix_format = '_%Y%m%d'
    if duration == 'hour':
        t_timedelta = datetime.timedelta(hours=1)
        summary_suffix_format = '_hourly_%Y%m%d'
    elif duration == 'day':
        t_timedelta = datetime.timedelta(days=1)
        summary_suffix_format = '_daily_%Y%m'
    else:
        logging.info('main():bad param `duration`:%s' % duration)
        return

    # make tmp sql file
    _prefix = task['tmp_summary_sql_file_name']
    _suffix = task['tmp_summary_sql_file_name_suffix']
    global_tmp_sql_file = common.make_file_name(_prefix, _suffix)
    logging.info('Global Tmp Sql File:%s' % global_tmp_sql_file)
    # delete old same name file
    common.delete_files(global_tmp_sql_file)

    summary_dimension_1_name_list = task['summary_dimension_1_name_list']
    summary_dimension_2_name_list = task['summary_dimension_2_name_list']
    summary_metric_name_list = task['summary_metric_name_list']

    # current module ref
    this_module = sys.modules[__name__]
    # create dimension table
    create_table.create_table_dimension_enum(global_tmp_sql_file,
                                             task['dimension_table_name'])
    # summary function
    create_summary_table_function = getattr(
        create_table, task['create_summary_table_function'])
    compute_summary_function = getattr(this_module,
                                       task['compute_summary_function'])

    # main loop
    # save summary_table_name
    dump_table_name_list = [task['dimension_table_name']]
    # foreach time range
    p = start_time
    sql_list = []
    while p < stop_time:
        # prepare origin_table_name
        _prefix = task['raw_data_table_name']
        _suffix = task['raw_data_table_name_suffix']
        _format_suffix = p.strftime(_suffix)
        origin_table_name = '%s%s' % (_prefix, _format_suffix)
        print 'origin_table_name', origin_table_name

        # prepare summary_table_name
        _prefix = task['summary_data_table_name']
        _format_suffix = p.strftime(summary_suffix_format)
        summary_table_name = '%s%s' % (_prefix, _format_suffix)
        # save summary_table_name
        dump_table_name_list.append(summary_table_name)

        # create summary table
        tmp_sql = create_summary_table_function(None, summary_table_name)
        sql_list.append(tmp_sql)

        # summary compute sql
        tmp_sql_list = compute_summary_function(p, duration, origin_table_name,
                                                summary_table_name,
                                                summary_dimension_1_name_list,
                                                summary_dimension_2_name_list,
                                                summary_metric_name_list)
        sql_list += tmp_sql_list

        # compute ctr
        tmp_sql = update_summary_ctr(summary_table_name)
        sql_list.append(tmp_sql)

        # extract dimension_enum
        category = task['category']
        dimension_name_list = list(
            set(summary_dimension_1_name_list + summary_dimension_2_name_list))
        tmp_sql_list = compute_dimension_enum(origin_table_name,
                                              task['dimension_table_name'],
                                              dimension_name_list, category)
        sql_list += tmp_sql_list

        # next
        p += t_timedelta
    # End While

    # filter duplication sql or None
    good_sql_list = []
    for sql in sql_list:
        if not sql or not isinstance(sql, (unicode, str)) \
                or sql in good_sql_list:
            continue
        good_sql_list.append(sql)

    # write sql to file
    with open(global_tmp_sql_file, 'a') as f:
        big_sql = '\n'.join(good_sql_list)
        f.write(big_sql)
        f.write('\n')

    # 执行 SQL
    local_mysql_auth = config.local_mysql_auth
    common.execute_mysql_sql(local_mysql_auth['host'],
                             local_mysql_auth['port'],
                             local_mysql_auth['user'],
                             local_mysql_auth['passwd'],
                             local_mysql_auth['dbname'], global_tmp_sql_file)

    # clean some files
    if os.path.exists(global_tmp_sql_file):
        if task['keep_summary_sql_file']:
            logging.info("keep the tmp sql file @ %s" % global_tmp_sql_file)
        else:
            try:
                os.remove(global_tmp_sql_file)
            except Exception as e:
                logging.error("main():Delete the tmp sql file: %s:[%s]" %
                              (global_tmp_sql_file, e))
    else:
        logging.warning("There is No tmp sql file:%s" % global_tmp_sql_file)

    # dump to summary data to remote server
    _prefix = task['mysql_dump_file_name']
    _suffix_fmt = task['mysql_dump_file_name_suffix']
    dump_file_name = common.make_file_name(_prefix, _suffix_fmt)
    common.delete_files(dump_file_name)

    local_mysql_auth = config.local_mysql_auth
    dump_table_name_list = list(set(dump_table_name_list))

    # dump tables into dump file
    for dump_table_name in dump_table_name_list:
        common.execute_mysql_dump(local_mysql_auth['host'],
                                  local_mysql_auth['port'],
                                  local_mysql_auth['user'],
                                  local_mysql_auth['passwd'],
                                  local_mysql_auth['dbname'], dump_table_name,
                                  dump_file_name)

    # 执行 Dump SQL
    remote_mysql_auth = config.remote_mysql_auth
    common.execute_mysql_sql(remote_mysql_auth['host'],
                             remote_mysql_auth['port'],
                             remote_mysql_auth['user'],
                             remote_mysql_auth['passwd'],
                             remote_mysql_auth['dbname'], dump_file_name)

    if task['keep_mysql_dump_file']:
        logging.info("keep the Dump sql file @ %s" % global_tmp_sql_file)
    else:
        common.delete_files(dump_file_name)

    # End
    return
Пример #7
0
    def process_table(self, db, db_engine, schema_name, table_name,
                      table_object, table_history, current_timestamp):
        """Process a specific table."""

        # skip default table and ignored tables
        if table_name == 'default':
            return
        elif table_object.ignore_table:
            logger.info(f'Skipping table: {table_name} (ignore_table=1)')
            return
        elif table_object.drop_table:
            logger.info(f'Skipping table: {table_name} (drop_table=1)')
            return

        # initialize table history's last time stamp to first timestamp if not set yet
        if not table_history.last_timestamp:
            # default first timestamp to 1900-01-01 if project has no first timestamp
            if not table_object.first_timestamp:
                table_object.first_timestamp = '1900-01-01'
            table_history.last_timestamp = iso_to_datetime(
                table_object.first_timestamp)

        # skip table if last timestamp > current timestamp, eg. tables pre-configured for the future
        if table_history.last_timestamp > current_timestamp:
            explanation = f'first/last timestamp {table_history.last_timestamp} > current timestamp {current_timestamp}'
            logger.info(f'Skipping table: {table_name} ({explanation})')
            return

        # if we're here then we have a legit last timestamp value to use for CDC
        last_timestamp = table_history.last_timestamp

        self.stats.start(table_name, 'table')
        # logger.info(f'Processing {table_name} ...')

        # create a fresh cursor for each table
        cursor = db.conn.cursor()

        # save table object for stage
        output_stream = open(f'{self.work_folder_name}/{table_name}.table',
                             'wb')
        pickle.dump(table_object, output_stream)
        output_stream.close()

        # discover table schema
        table_schema = db_engine.select_table_schema(schema_name, table_name)

        # remove ignored columns from table schema
        if table_object.ignore_columns:
            # find columns to ignore (remove) based on ignore column names/glob-style patterns
            ignore_columns = []
            for column_name in table_schema.columns:
                for pattern in split(table_object.ignore_columns):
                    # use fnmatch() to provide glob style matching
                    if fnmatch.fnmatch(column_name.lower(), pattern.lower()):
                        ignore_columns.append(column_name)

            # delete ignored columns from our table schema
            for column_name in ignore_columns:
                logger.info(f'Ignore_column: {table_name}.{column_name}')
                table_schema.columns.pop(column_name)

        # save table schema for stage to use
        output_stream = open(f'{self.work_folder_name}/{table_name}.schema',
                             'wb')
        pickle.dump(table_schema, output_stream)
        output_stream.close()

        # save table pk for stage to use
        pk_columns = db_engine.select_table_pk(schema_name, table_name)
        if not pk_columns and table_object.primary_key:
            pk_columns = table_object.primary_key
        output_stream = open(f'{self.work_folder_name}/{table_name}.pk', 'w')
        output_stream.write(pk_columns)
        output_stream.close()

        # clear cdc if it doesn't match timestamp/rowversion
        table_object.cdc = table_object.cdc.lower()
        if not table_object.cdc or table_object.cdc not in ('timestamp',
                                                            'rowversion'):
            table_object.cdc = ''

        # if no pk_columns, then clear table cdc
        if not pk_columns:
            if table_object.cdc and table_object.cdc != 'none':
                logger.info(
                    f'Warning: {table_name} cdc={table_object.cdc} but table has no pk column(s)'
                )
                table_object.cdc = 'none'

            # we still keep timestamp because its required for filtering first_timestamp - current_timestamp
            # if table_object.timestamp:
            # 	logger.info(f'Warning: {table_name} timestamp={table_object.timestamp} but table has no pk column(s)')
            # 	table_object.timestamp = ''

        # update table object properties for cdc select build
        column_names = list(table_schema.columns.keys())
        table_object.schema_name = schema_name
        table_object.table_name = table_name
        table_object.column_names = column_names
        select_cdc = cdc_select.SelectCDC(table_object)
        sql = select_cdc.select(self.job_id, current_timestamp, last_timestamp)

        # logger.info(f'Capture SQL:\n{sql}\n')

        # run sql here vs via db_engine.capture_select
        # cursor = db_engine.capture_select(schema_name, table_name, column_names, last_timestamp, current_timestamp)
        cursor.execute(sql)

        # capture rows in fixed size batches to support unlimited size record counts
        # Note: Batching on capture side allows stage to insert multiple batches in parallel.

        if self.project.batch_size:
            batch_size = int(self.project.batch_size)
            # logger.info(f'Using project specific batch size: {self.project.batch_size}')
        else:
            batch_size = 1_000_000

        batch_number = 0
        row_count = 0
        file_size = 0
        while True:
            batch_number += 1
            rows = cursor.fetchmany(batch_size)
            if not rows:
                break

            logger.info(
                f'Table({table_name}): batch={batch_number} using batch size {batch_size:,}'
            )

            # flatten rows to list of column values
            json_rows = [list(row) for row in rows]
            output_file = f'{self.work_folder_name}/{table_name}#{batch_number:04}.json'
            with open(output_file, 'w') as output_stream:
                # indent=2 for debugging
                json.dump(json_rows,
                          output_stream,
                          indent=2,
                          default=json_serializer)

            # track stats
            row_count += len(json_rows)
            file_size += pathlib.Path(output_file).stat().st_size

        # if no cdc, but order set, do a file hash see if output the same time as last file hash
        if (not table_object.cdc
                or table_object.cdc == 'none') and table_object.order:
            print(
                f'Checking {table_name} file hash based on cdc={table_object.cdc} and order={table_object.order}'
            )
            table_data_files = f'{self.work_folder_name}/{table_name}#*.json'
            current_filehash = hash_files(table_data_files)
            if table_history.last_filehash == current_filehash:
                # suppress this update
                print(
                    f'Table({table_name}): identical file hash, update suppressed'
                )
                logger.info(
                    f'Table({table_name}): identical file hash, update suppressed'
                )
                row_count = 0
                file_size = 0

                # delete exported json files
                delete_files(table_data_files)
            else:
                print(
                    f'Table({table_name}): {table_history.last_filehash} != {current_filehash}'
                )
                table_history.last_filehash = current_filehash

        # update table history with new last timestamp value
        table_history.last_timestamp = current_timestamp

        # track total row count and file size across all of a table's batched json files
        self.stats.stop(table_name, row_count, file_size)

        # save interim state of stats for diagnostics
        self.stats.save()

        self.job_row_count += row_count
        self.job_file_size += file_size

        # explicitly close cursor when finished
        # cursor.close()
        return
Пример #8
0
def main():
    global global_fail_count
    global global_success_count
    global global_file_list
    # parse args
    task_name_list = config.task_list.keys()
    start_time, stop_time, task_name, _ = common.parse_args(task_name_list)
    task = config.task_list[task_name]

    # init task app local data
    task['app'] = {}

    # init log
    common.init_log(task['log']['filename'], task['log']['debug'])

    logging.info('======\n%s\n' %
                 datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z'))
    logging.info(" TASK [%s] START ..." % task_name)

    # init parser json function
    task['app']['parser_json'] = getattr(parser, task['parser_function_name'])

    # make tmp sql file
    _prefix = task['tmp_sql_file_name']
    _suffix = task['tmp_sql_file_name_suffix']
    global_tmp_sql_file = common.make_file_name(_prefix, _suffix)
    logging.info('Global Tmp Sql File:%s' % global_tmp_sql_file)
    # save global_tmp_sql_file to task .
    task['app']['global_tmp_sql_file'] = global_tmp_sql_file
    # delete old same name file
    common.delete_files(global_tmp_sql_file)

    # init s3 helper, download s3 file to local
    amazon_s3_auth = task['amazon_s3_auth']
    s3 = S3Helper(**amazon_s3_auth)

    aws_appid_list = task['aws_appid_list']
    tmp_local_file_dir = task['tmp_local_file_dir']

    keep_s3_file = task['keep_s3_file']

    # main loop
    # foreach time range
    p = start_time
    while p < stop_time:
        # prepare raw data table name
        _prefix = task['raw_data_table_name']
        _suffix = task['raw_data_table_name_suffix']
        _format_suffix = p.strftime(_suffix)
        table_name = '%s%s' % (_prefix, _format_suffix)
        # save table_name to task .
        task['app']['table_name'] = table_name

        # create table if not exits
        create_raw_data_table_function = task['create_raw_data_table_function']
        create_table_function = getattr(create_table,
                                        create_raw_data_table_function)
        create_table_function(global_tmp_sql_file, table_name)

        # foreach time range
        for aws_appid in aws_appid_list:
            key_path = 'awsma/events/%s/' % aws_appid
            local_s3_file_list = common.download_from_s3(
                s3, key_path, p, tmp_local_file_dir)
            #
            main_loop(task, local_s3_file_list)
            # delete local s3 files
            if not keep_s3_file:
                common.delete_files(local_s3_file_list)
        p += datetime.timedelta(hours=1)

    # exit()
    # 执行 SQL
    local_mysql_auth = config.local_mysql_auth

    common.execute_mysql_sql(local_mysql_auth['host'],
                             local_mysql_auth['port'],
                             local_mysql_auth['user'],
                             local_mysql_auth['passwd'],
                             local_mysql_auth['dbname'], global_tmp_sql_file)
    # END
    if os.path.exists(global_tmp_sql_file):
        if task['keep_sql_file']:
            logging.info("keep the tmp sql file @ %s" % global_tmp_sql_file)
        else:
            try:
                os.remove(global_tmp_sql_file)
            except Exception as e:
                logging.error("main():Delete the tmp sql file: %s:[%s]" %
                              (global_tmp_sql_file, e))
    else:
        logging.warning("There is No tmp sql file:%s" % global_tmp_sql_file)

    return
Пример #9
0
def prepare_sal_release():
    try:
        os.mkdir(sal_rel_dir)
        print('SAL release directory {} created.'.format(sal_rel_dir))
    except FileExistsError:
        print('SAL Release directory {} already exists, recreated.'.format(
            sal_rel_dir))
        delete_files(sal_rel_dir)
        os.mkdir(sal_rel_dir)

    shutil.copytree(
        get_env_var(constants.sal_home_env_var_name) + '/include/',
        sal_rel_dir + '/include')
    shutil.copytree(
        get_env_var(constants.sal_home_env_var_name) + '/src/include/',
        sal_rel_dir + '/src/include')
    shutil.copytree(
        get_env_var(constants.sal_home_env_var_name) + '/build',
        sal_rel_dir + '/build')
    shutil.copytree(
        get_env_var(constants.sal_home_env_var_name) + '/lib',
        sal_rel_dir + '/lib')
    shutil.copytree(
        get_env_var(constants.sal_home_env_var_name) + '/scripts',
        sal_rel_dir + '/scripts')
    shutil.copytree(
        get_env_var(constants.sal_home_env_var_name) + '/config',
        sal_rel_dir + '/config')
    shutil.copytree(
        get_env_var(constants.sal_home_env_var_name) + '/proto',
        sal_rel_dir + '/proto')
    if get_from_advance_setting_dict(constants.sal_sw_attr_node,
                                     constants.build_third_party_node):
        shutil.copytree(
            get_env_var(constants.tp_install_env_var_name) + '/lib',
            sal_rel_dir + '/install/lib')
        shutil.copytree(
            get_env_var(constants.tp_install_env_var_name) + '/include',
            sal_rel_dir + '/install/include')
        shutil.copytree(
            get_env_var(constants.tp_install_env_var_name) + '/bin',
            sal_rel_dir + '/install/bin')
        shutil.copytree(
            get_env_var(constants.tp_install_env_var_name) + '/share',
            sal_rel_dir + '/install/share')

    os.mkdir(sal_rel_dir + '/test')
    shutil.copyfile(
        get_env_var(constants.sal_home_env_var_name) + '/README.md',
        sal_rel_dir + '/README.md')
    shutil.copyfile(
        get_env_var(constants.sal_home_env_var_name) +
        '/test/sal_service_test_BF6064.py',
        sal_rel_dir + '/test/sal_service_test_BF6064.py')
    shutil.copyfile(
        get_env_var(constants.sal_home_env_var_name) +
        '/test/sal_service_test_BF2556.py',
        sal_rel_dir + '/test/sal_service_test_BF2556.py')
    shutil.copyfile(
        get_env_var(constants.sal_home_env_var_name) + '/test/TestUtil.py',
        sal_rel_dir + '/test/TestUtil.py')
    shutil.copyfile(
        get_env_var(constants.sal_home_env_var_name) + '/sal_services_pb2.py',
        sal_rel_dir + '/sal_services_pb2.py')
    shutil.copyfile(
        get_env_var(constants.sal_home_env_var_name) +
        '/sal_services_pb2_grpc.py', sal_rel_dir + '/sal_services_pb2_grpc.py')
    shutil.copyfile(
        get_env_var(constants.sal_home_env_var_name) + '/sal_services_pb2.py',
        sal_rel_dir + '/sal_services.grpc.pb.cc')
    shutil.copyfile(
        get_env_var(constants.sal_home_env_var_name) +
        '/sal_services_pb2_grpc.py', sal_rel_dir + '/sal_services.grpc.pb.h')
    shutil.copyfile(
        get_env_var(constants.sal_home_env_var_name) +
        '/sal_services_pb2_grpc.py', sal_rel_dir + '/sal_services.pb.cc')
    shutil.copyfile(
        get_env_var(constants.sal_home_env_var_name) +
        '/sal_services_pb2_grpc.py', sal_rel_dir + '/sal_services.pb.h')

    prepare_sal_pkg()

    return True