def address(cls, *, locale=Locales.EN, calling_code=None, city=None, country=None, country_code=None, latitude=None, longitude=None, postal_code=None, state=None, street_name=None, street_number=None, street_suffix=None): ''' Create an Address Data Entity object. All individual fields are automatically randomly generated based on locale. If provided, the corresponding values are overriden. Note: All individual fields are randomly generated. Don't expect correct correlation e.g. correct postal code for the generated city. Keyword Arguments: locale: Approprite Random.locale.<local_name> object. Default is Random.locale.EN calling_code: Calling Code city: City country: Country Name country_code: Country Code latitude: Latitude longitude: Longitde postal_code: Postal Code state: State street_name: Street Name street_number Street Number street_suffix: Street Suffix ''' address = Address(locale=locale) from arjuna.engine.data.entity.address import Address as ArjAddress return ArjAddress( calling_code=calling_code is not None and calling_code or address.calling_code(), city=city and city is not None or address.city(), country=country is not None and country or address.country(), country_code=country_code is not None and country_code or address.country_code(), latitude=latitude is not None and latitude or address.latitude(), longitude=longitude is not None and longitude or address.longitude(), postal_code=postal_code is not None and postal_code or address.postal_code(), state=state is not None and state or address.state(), street_name=street_name is not None and street_name or address.street_name(), street_number=street_number is not None and street_number or address.street_number(), street_suffix=street_suffix is not None and street_suffix or address.street_suffix(), )
def gen_data_change_column_name(self, data_path, partition_date, num_rows, file_format): """ Input - data_path: path where the partition will be created (string) - partition_date: partition date to be created (date) - num_rows: number of rows to be generated (integer) - file_format: format of file to be generated (parquet or avro) This function creates a data sample changing column name """ person = Person('en') address = Address('en') # Create schema schema_street = StructType([ StructField('street_name', StringType(), True), StructField('lat', FloatType(), True), #column renamed StructField('long', FloatType(), True) #column renamed ]) schema_address_details = StructType([ StructField('street', schema_street, True), StructField('number', IntegerType(), True) ]) schema_address = StructType([ StructField('address_details', schema_address_details, True), StructField('city', StringType(), True), StructField('country', StringType(), True), StructField('country_code', StringType(), True), StructField('state', StringType(), True), StructField('postal_code', IntegerType(), True) ]) schema_df = StructType([ StructField('identifier', StringType(), True), StructField('first_name', StringType(), True), StructField('last_name', StringType(), True), StructField('occupation', StringType(), True), StructField('age', IntegerType(), True), StructField('address', schema_address, True), StructField('title_name', StringType(), True), #column renamed StructField('date', DateType(), True) ]) # Generate data for _ in range(num_rows): df_temp = self.spark.createDataFrame([[ person.identifier(), person.first_name(), person.last_name(), person.occupation(), person.age(), [[[ address.street_name(), float(address.latitude()), float(address.longitude()) ], int(address.street_number())], address.city(), address.country(), address.country_code(), address.state(), int(address.postal_code())], person.title(), partition_date ]], schema_df) try: df = df.union(df_temp) except: df = df_temp df.coalesce(1).write.partitionBy('date').mode('overwrite').format( file_format).save(data_path) print('Partition created: {data_path}/date={date}'.format( data_path=data_path, date=partition_date)) print('# Rows:', df.count()) print('Schema:') df.printSchema() print('\n') return