def test_correct_number_of_rows_are_generated(): df = gen.generate( props={ 'region': gen.choice(data=['EMEA', 'LATAM', 'NAM', 'APAC'], weights=[0.1, 0.1, 0.3, 0.5]), 'sic_range': gen.sic_range(), 'sic': gen.sic_industry(sic_range_field='sic_range'), 'country': gen.country_codes(region_field='region'), 'client_name': gen.company_namer(field='sic', field_type='sic', countrycode_field='country') }, count=50, randomstate=np.random.RandomState(13031981)).to_dataframe() df['sic_range'] = df['sic_range'].apply(lambda x: x.name) df['sic'] = df['sic'].apply(lambda x: x.name) df['country'] = df['country'].apply(lambda x: x.alpha3_code) print(df)
def test_countries_are_in_nam_target_region(): result = gen.generate( props={ "region": gen.choice(['NAM']), "country": gen.country_codes(region_field="region") }, count=100, randomstate=np.random.RandomState(13031981)).to_dataframe() country_list = data.countries(region="NAM") for row in result['country']: assert row in country_list
def test_correct_number_of_rows_are_generated(): df = gen.generate( props={ 'region': gen.choice(data=['EMEA', 'LATAM', 'NAM', 'APAC'], weights=[0.1, 0.1, 0.3, 0.5]), "country": gen.country_codes(region_field='region'), "contact_name": gen.person(country_field='country') }, count=50, randomstate=np.random.RandomState(13031981)).to_dataframe()
def run(seed=130319810): regions = ['NAM', 'EMEA', 'APAC', 'LATAM'] region_weights = [0.5, 0.3, 0.1, 0.1] df = gen.generate( props={ 'region': gen.choice( data=regions, weights=region_weights), 'country': gen.country_codes( region_field='region'), 'secondary-region': gen.choice( data=regions, weights=region_weights), 'secondary-country': gen.country_codes( region_field='secondary-region'), 'industry': gen.sic_range(), 'industry_code': gen.sic_industry('industry'), 'legal-name': gen.company_namer( field='industry_code', countrycode_field='country'), 'lei_code': gen.lei_code() }, count=50, randomstate=np.random.RandomState(seed) ).to_dataframe() # Cleanup the country and add the CCY df['prefered_ccy'] = df['country'].apply(lambda x: x.currency) df['country'] = df['country'].apply(lambda x: x.alpha3_code) df['secondary_ccy'] = df['secondary-country'].apply(lambda x: x.currency) df['secondary-country'] = df['secondary-country'].apply(lambda x: x.alpha3_code) print(df) return df
def test_correct_number_of_rows_are_generated(): df = gen.generate( props={ 'region': gen.choice(data=['EMEA', 'LATAM', 'NAM', 'APAC'], weights=[0.1, 0.1, 0.3, 0.5]), "country": gen.country_codes(region_field='region'), "client_type": gen.choice(data=data.client_types()), "client_name": gen.company_namer(field='client_type', field_type='client_type', countrycode_field='country') }, count=50, randomstate=np.random.RandomState(13031981)).to_dataframe()
def test_address_generation(): df = gen.generate( props={ 'region': gen.choice(['NAM', 'EMEA', 'APAC', 'LATAM']), 'country': gen.country_codes(region_field='region'), 'address': gen.address('country') }, count=100, randomstate=np.random.RandomState(13031981)).to_dataframe() ## post process, adress is an object df['country'] = df['country'].apply(lambda x: x.alpha2_code) df['address_1'] = df['address'].apply(lambda x: x.address_1) df['address_2'] = df['address'].apply(lambda x: x.address_2) df['city'] = df['address'].apply(lambda x: x.city) df['state'] = df['address'].apply(lambda x: x.state) df['postal)code'] = df['address'].apply(lambda x: x.postal_code) del df['address']
def test_normal_sampler_bound(): df = gen.generate_from_model( props={ 'country': gen.country_codes(sampler=gen.bound_normal) }, count=100000, randomstate=np.random.RandomState(13031981)).to_dataframe() df['country'] = df['country'].map(lambda x: x.alpha2_code) ax = df['country'].value_counts().plot(kind='bar') ax = df['country'].value_counts().plot(x='month', linestyle='-', marker='o', ax=ax) ax.set_xlabel("Country") ax.set_ylabel("Count") plt.show()
def test_correct_number_of_rows_are_generated(): df = gen.generate(props={ 'firm_account': gen.choice(data=['A', 'B', 'C']), 'region': gen.choice(data=['NAM', 'EMEA', 'LATAM', 'APAC']), 'country': gen.country_codes(region_field="region"), 'intraday_pnl': gen.random_range(low=-1000, high=1000, round_dp=2), 'trade_year': gen.choice( data=[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]) }, count=50, randomstate=np.random.RandomState( 13031981)).to_dataframe() df['ccy'] = df['country'].apply(lambda x: x.currency) df['country'] = df['country'].apply(lambda x: x.alpha3_code) df['trade_date'] = df['trade_year'].apply(lambda x: f"{x}-01-01")
import numpy as np import datahub_core.generators as gen from datahub_core.models import MarkovModel RS = np.random.RandomState(13031981) MODEL = MarkovModel(filename='./tests/client_data.json', randomstate=RS) RESULT = gen.generate_from_model(props={ "country": gen.country_codes(region_field='region'), "ev": gen.random_range(high=100000, low=10000000), "address": gen.address(country_field='country'), "contact_name": gen.person(country_field='country'), "client_name": gen.company_namer(field='client_type', field_type='client_type', countrycode_field='country') }, count=50, model=MODEL) def test_correct_number_of_rows_are_generated(): df = RESULT.to_dataframe() # remap obects into flat table df['country'] = df['country'].map(lambda x: x.alpha3_code) df['city'] = df['address'].map(lambda x: x.city)