Пример #1
0
def generate_data(data_count, result_per_user=15):
    user_count = data_count // result_per_user
    print('data_count:', data_count)
    print('user_count:', user_count)
    print('result_per_user:'******'./output/{file_id}-user.gen'
    user_file_load_ext = f'./output/{file_id}-user.load'

    result_file_gen_ext = f'./output/{file_id}-result.gen'
    result_file_load_ext = f'./output/{file_id}-result.load'

    user_file = open(user_file_gen_ext, 'w')
    result_file = open(result_file_gen_ext, 'w')

    for user in Mimic.generate(count=user_count):
        insert_dt = DateTime.between_ts(from_ts, to_ts)

        # user.type = 'user'
        user.insert_date = str(insert_dt.date())
        user.insert_time = insert_dt.isoformat()
        user.user_detail_id = 1
        user.user_id = 1
        user.mbb = Mobile.phone(from_number=from_mbb, to_number=to_mbb)
        user.device_client_id = str(uuid.UUID(int=(0xFFFFFFFFFFFFFFFFFFFFFF0 * user.mbb) % 0xFFFFFFFFFFFFFFFFFFFFFFF))
        user.device_client_ip = Mobile.ip()
        user.device_os = Mobile.device_os()
        user.is_cracked = Mobile.is_cracked()
        user.connection_type = Mobile.connection_type()
        user.imei = Mobile.imei(from_imei, to_imei)
        user.carrier = Mobile.carrier()
        user.operation = Mobile.operation()
        # user.insert_date
        user.is_success = 1
        user.msisdn = '9' + user.device_client_id
        # user.user_code
        # user.smartphone_device_client_id

        user_file.write(json.dumps(user) + os.linesep)

        for result in Mimic.generate(count=result_per_user):
            record_date = DateTime.between_ts(insert_dt.timestamp(), to_ts)

            # result.type = 'result'
            result.record_date = str(record_date.date())
            result.mbb = user.mbb
            result.device_uid = user.imei
            result.query_name = switch_query(random.randint(1, 100))
            result.source = random.choice(["AB", "AF"])

            package_list = [packagen.packages[random.randint(0, len_packages - 1)]]
            if probably(15 / 100):
                package_list.append(packagen.packages[random.randint(0, len_packages - 1)])
            result.package_list = ','.join(package_list)

            result.device_client_id = user.device_client_id
            result.device_info = user.device_os

            result_file.write(json.dumps(result) + os.linesep)

    user_file.close()
    result_file.close()

    os.rename(result_file_gen_ext, result_file_load_ext)
    os.rename(user_file_gen_ext, user_file_load_ext)

    took = time.time() - start_time
    if took <= 0.001:
        speed = 0
    else:
        speed = user_count / took

    print(
        f"Generated user data: {user_count}, result data: {user_count}x{result_per_user}={result_per_user * user_count}\n"
        f"Time: {took}. Speed: {round(speed)} users/sec.")
Пример #2
0
def generate_data(user_count=150,
                  fraud_count=50,
                  start_year=2017,
                  end_year=2021,
                  user_start=0,
                  user_data_per_user=67):
    print(f'Generating {user_count} data. Offset: {user_start}')
    switch_query = Switch(between_table)

    from_ts = int(datetime(start_year, 1, 1).timestamp())
    to_ts = int(datetime(end_year, 1, 1).timestamp())

    len_packages = len(packagen.packages)

    seed = 5301000000
    user_seed = seed + user_start

    file_id = f"{int(time.time())}-{random.randint(1000, 9999)}"

    print(f"File id: {file_id}")

    user_file_gen_ext = f'./output/{file_id}-user.gen'
    user_file_load_ext = f'./output/{file_id}-user.load'

    fraud_file_gen_ext = f'./output/{file_id}-fraud.gen'
    fraud_file_load_ext = f'./output/{file_id}-fraud.load'

    user_file = open(user_file_gen_ext, 'w')
    fraud_file = open(fraud_file_gen_ext, 'w')
    fraud_data_written = False

    random.seed(0)

    for ix, base in enumerate(Mimic.generate(count=user_count)):
        user_seed += 1
        user_seed_str = str(user_seed)

        base.type = 'user_detail'
        base.user_detail_id = user_seed
        base.user_id = 9999999999 - user_seed
        base.mbb = user_seed
        base.device_client_id = hashlib.sha512(
            user_seed_str.encode()).hexdigest()
        base.imei = hashlib.md5(
            user_seed_str.encode()).hexdigest().upper()[:15]
        base.msisdn = str(9055500000 + base.mbb * pow(10, 10))

        for user in Mimic.generate(count=user_data_per_user):
            insert_dt = DateTime.between_ts(from_ts, to_ts)

            user.insert_date = str(insert_dt.date())
            user.insert_time = insert_dt.isoformat()

            user.type = base.type

            user.user_detail_id = base.user_detail_id  # static
            user.user_id = base.user_id  # static
            user.mbb = base.mbb  # static
            user.device_client_id = base.device_client_id  # static
            user.device_client_ip = Mobile.ip()
            user.device_os = Mobile.device_os()
            user.is_cracked = Mobile.is_cracked()
            user.connection_type = Mobile.connection_type()
            user.imei = base.imei  # static
            user.carrier = Mobile.carrier()
            user.operation = Mobile.operation()
            user.is_success = Mobile.is_success()
            user.msisdn = base.msisdn
            user_file.write(json.dumps(user) + os.linesep)
            # print(user)

        if user_seed - seed <= fraud_count:
            fraud_data_written = True
            fraud = Prodict()
            record_date = DateTime.between_ts(from_ts, to_ts)

            fraud.type = 'fraud_result'
            fraud.record_date = str(record_date.date())
            fraud.mbb = base.mbb
            fraud.device_uid = base.imei.lower()
            fraud.query_name = switch_query(random.randint(1, 100))
            fraud.source = random.choice(["AB", "AF"])

            # package_list = [packagen.packages[random.randint(0, len_packages - 1)]]
            # if probably(5 / 100):
            #     package_list.append(packagen.packages[random.randint(0, len_packages - 1)])
            # fraud.package_list = ','.join(package_list)
            fraud.package_list = packagen.packages[random.randint(
                0, len_packages - 1)]

            fraud.device_client_id = base.device_client_id
            fraud.device_info = Mobile.device_os()
            fraud_file.write(json.dumps(fraud) + os.linesep)

    print('Done.')
    user_file.close()
    fraud_file.close()
    os.rename(user_file_gen_ext, user_file_load_ext)
    if fraud_data_written:
        os.rename(fraud_file_gen_ext, fraud_file_load_ext)
    else:
        os.remove(fraud_file_gen_ext)
Пример #3
0
def generate_data(user_count, result_count):
    print(user_count, result_count)
    switch_query = Switch(between_table)

    from_ts = int(datetime(2015, 1, 1).timestamp())
    to_ts = int(datetime(2020, 12, 31).timestamp())

    from_mbb = 5301000000
    to_mbb = from_mbb + 200000

    from_imei = 990000862471854
    to_imei = from_imei + 200000
    start_time = time.time()

    # rd = random.Random()
    # rd.seed(0)

    len_packages = len(packagen.packages)

    file_id = f"{int(time.time())}-{random.randint(1000, 9999)}"

    print(f"File id: {file_id}")

    user_file_gen_ext = f'./output/{file_id}-user.gen'
    user_file_load_ext = f'./output/{file_id}-user.load'

    result_file_gen_ext = f'./output/{file_id}-result.gen'
    result_file_load_ext = f'./output/{file_id}-result.load'

    user_file = open(user_file_gen_ext, 'w')
    result_file = open(result_file_gen_ext, 'w')

    for user in Mimic.generate(count=user_count):
        insert_dt = DateTime.between_ts(from_ts, to_ts)

        user.insert_date = str(insert_dt.date())
        user.insert_time = insert_dt.isoformat()
        user.user_detail_id = 1
        user.user_id = 1
        user.mbb = Mobile.phone(from_number=from_mbb, to_number=to_mbb)
        user.device_client_id = str(uuid.UUID(int=(0xFFFFFFFFFFFFFFFFFFFFFF0 * user.mbb) % 0xFFFFFFFFFFFFFFFFFFFFFFF))
        user.device_client_ip = Mobile.ip()
        user.device_os = Mobile.device_os()
        user.is_cracked = Mobile.is_cracked()
        user.connection_type = Mobile.connection_type()
        user.imei = Mobile.imei(from_imei, to_imei)
        user.carrier = Mobile.carrier()
        user.operation = Mobile.operation()
        # user.insert_date
        user.is_success = 1
        user.msisdn = '9' + user.device_client_id
        # user.user_code
        # user.smartphone_device_client_id

        user_file.write(json.dumps(user) + os.linesep)

        for result in Mimic.generate(count=result_count):
            record_date = DateTime.between_ts(insert_dt.timestamp(), to_ts)

            result.record_date = record_date.isoformat()
            result.mbb = user.mbb
            result.device_uid = user.imei
            result.query_name = switch_query(random.randint(1, 100))
            result.source = "AB" if random.randint(1, 2) == 1 else "AF"
            package_list = [packagen.packages[random.randint(0, len_packages - 1)]]

            if random.randint(1, 20) < 3:
                package_list.append(packagen.packages[random.randint(0, len_packages - 1)])

            result.package_list = ','.join(package_list)

            result.device_client_id = user.device_client_id
            result.device_info = user.device_os

            result_file.write(json.dumps(result) + os.linesep)

    user_file.close()
    result_file.close()

    os.rename(result_file_gen_ext, result_file_load_ext)
    os.rename(user_file_gen_ext, user_file_load_ext)

    took = time.time() - start_time
    speed = user_count / took
    one_million_time = 200_000 / speed

    print(f"Generated user data:{user_count}, result data:{result_count}x{user_count}={result_count * user_count}\n"
          f"Time: {took}. Speed: {speed} users/sec. It takes {one_million_time / 3600} hours to generate 1M rows")