Python choice示例，datahub_core.generators.choice Python示例

示例#1

0

显示文件

def test_correct_number_of_rows_are_generated():

    df = gen.generate(
        props={
            'region':
            gen.choice(data=['EMEA', 'LATAM', 'NAM', 'APAC'],
                       weights=[0.1, 0.1, 0.3, 0.5]),
            'sic_range':
            gen.sic_range(),
            'sic':
            gen.sic_industry(sic_range_field='sic_range'),
            'country':
            gen.country_codes(region_field='region'),
            'client_name':
            gen.company_namer(field='sic',
                              field_type='sic',
                              countrycode_field='country')
        },
        count=50,
        randomstate=np.random.RandomState(13031981)).to_dataframe()

    df['sic_range'] = df['sic_range'].apply(lambda x: x.name)
    df['sic'] = df['sic'].apply(lambda x: x.name)
    df['country'] = df['country'].apply(lambda x: x.alpha3_code)

    print(df)

示例#2

0

显示文件

文件： test_choice_from_list.py 项目： stjordanis/datahub

def test_choice():
    df = gen.generate(
        props={
            'region': gen.choice(['NAM', 'EMEA', 'APAC', 'LATAM']),
        },
        count=100,
        randomstate=np.random.RandomState(13031981)).to_dataframe()

    found_nam = False
    found_emea = False
    found_apac = False
    found_latam = False

    for row in df['region']:
        if row == 'NAM':
            found_nam = True
        if row == 'EMEA':
            found_emea = True
        if row == 'APAC':
            found_apac = True
        if row == 'LATAM':
            found_latam = True

    assert found_nam
    assert found_emea
    assert found_apac
    assert found_latam

示例#3

0

显示文件

def test_correct_number_of_rows_are_generated():
    df = gen.generate(
        props={
            'region':
            gen.choice(data=['EMEA', 'LATAM', 'NAM', 'APAC'],
                       weights=[0.1, 0.1, 0.3, 0.5]),
            "country":
            gen.country_codes(region_field='region'),
            "client_type":
            gen.choice(data=data.client_types()),
            "client_name":
            gen.company_namer(field='client_type',
                              field_type='client_type',
                              countrycode_field='country')
        },
        count=50,
        randomstate=np.random.RandomState(13031981)).to_dataframe()

示例#4

0

显示文件

文件： test_country_generation.py 项目： stjordanis/datahub

def test_generated_regions_are_correct():
    result = gen.generate(
        props={
            "region": gen.choice(data.regions())
        },
        count=100,
        randomstate=np.random.RandomState(13031981)).to_dataframe()

    assert len(result) == 100
    region_list = data.regions()

    for row in result['region']:
        assert row in region_list

示例#5

0

显示文件

文件： test_country_generation.py 项目： stjordanis/datahub

def test_countries_are_in_nam_target_region():
    result = gen.generate(
        props={
            "region": gen.choice(['NAM']),
            "country": gen.country_codes(region_field="region")
        },
        count=100,
        randomstate=np.random.RandomState(13031981)).to_dataframe()

    country_list = data.countries(region="NAM")

    for row in result['country']:
        assert row in country_list

示例#6

0

显示文件

def test_correct_number_of_rows_are_generated():

    df = gen.generate(props={
        'firm_account':
        gen.choice(data=['A', 'B', 'C']),
        'region':
        gen.choice(data=['NAM', 'EMEA', 'LATAM', 'APAC']),
        'country':
        gen.country_codes(region_field="region"),
        'intraday_pnl':
        gen.random_range(low=-1000, high=1000, round_dp=2),
        'trade_year':
        gen.choice(
            data=[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019])
    },
                      count=50,
                      randomstate=np.random.RandomState(
                          13031981)).to_dataframe()

    df['ccy'] = df['country'].apply(lambda x: x.currency)
    df['country'] = df['country'].apply(lambda x: x.alpha3_code)
    df['trade_date'] = df['trade_year'].apply(lambda x: f"{x}-01-01")

示例#7

0

显示文件

文件： test_person_name_generation.py 项目： stjordanis/datahub

def test_correct_number_of_rows_are_generated():
    df = gen.generate(
        props={
            'region':
            gen.choice(data=['EMEA', 'LATAM', 'NAM', 'APAC'],
                       weights=[0.1, 0.1, 0.3, 0.5]),
            "country":
            gen.country_codes(region_field='region'),
            "contact_name":
            gen.person(country_field='country')
        },
        count=50,
        randomstate=np.random.RandomState(13031981)).to_dataframe()

示例#8

0

显示文件

def run(seed=130319810):
    regions = ['NAM', 'EMEA', 'APAC', 'LATAM']
    region_weights = [0.5, 0.3, 0.1, 0.1]

    df = gen.generate(
        props={
            'region': gen.choice(
                data=regions,
                weights=region_weights),
            'country': gen.country_codes(
                region_field='region'),
            'secondary-region': gen.choice(
                data=regions,
                weights=region_weights),                
            'secondary-country': gen.country_codes(
                region_field='secondary-region'),
            'industry': gen.sic_range(),
            'industry_code': gen.sic_industry('industry'),
            'legal-name': gen.company_namer(
                field='industry_code',
                countrycode_field='country'),
            'lei_code': gen.lei_code()
        },
        count=50,
        randomstate=np.random.RandomState(seed)
    ).to_dataframe()

    # Cleanup the country and add the CCY
    df['prefered_ccy'] = df['country'].apply(lambda x: x.currency)
    df['country'] = df['country'].apply(lambda x: x.alpha3_code)

    df['secondary_ccy'] = df['secondary-country'].apply(lambda x: x.currency)
    df['secondary-country'] = df['secondary-country'].apply(lambda x: x.alpha3_code)


    print(df)
    return df

示例#9

0

显示文件

文件： test_address_generation.py 项目： zheyu-wang-tony/datahub

def test_address_generation():
    df = gen.generate(
        props={
            'region': gen.choice(['NAM', 'EMEA', 'APAC', 'LATAM']),
            'country': gen.country_codes(region_field='region'),
            'address': gen.address('country')
        },
        count=100,
        randomstate=np.random.RandomState(13031981)).to_dataframe()

    ## post process, adress is an object
    df['country'] = df['country'].apply(lambda x: x.alpha2_code)
    df['address_1'] = df['address'].apply(lambda x: x.address_1)
    df['address_2'] = df['address'].apply(lambda x: x.address_2)
    df['city'] = df['address'].apply(lambda x: x.city)
    df['state'] = df['address'].apply(lambda x: x.state)
    df['postal)code'] = df['address'].apply(lambda x: x.postal_code)

    del df['address']