def create_human(n): # function of creating new client if n == 1: person = Person('ru') else: person = Person('en') # Выводим исландские мужские имена. client_info = generate_id_passport() print('\nфамилия:\t', person.last_name()) print('имя:\t\t', person.name()) print('отчество:\t', person.surname(), '\n') passport = generate_number_passport() print('личный №:\t', client_info.get('idn')) print('документ:\t', passport) print('пол:\t\t', client_info.get('sex')) print('возраст:\t', person.age(13, 70), '\n') # print('аватар\t', person.avatar()) print('гражд-во:\t', person.get_current_locale()) print('Нац-сть:\t', person.nationality()) print('телефон:\t', person.telephone('+37529#######')) print('email:\t\t', '*****@*****.**') # print('email:\t\t', person.email()) print('проф-ия:\t', person.occupation(), '\n') print('обращение:\t', person.title()) print('взгляды:\t', person.views_on()) print('вера:\t\t', person.worldview(), '\n')
def create_human(n): """ :param n: язык генерации данных :return: сгенерированные фейковые данные """ log.info("Генерируем тестовые данные") if n == 1: person = Person('ru') else: person = Person('en') log.info("person: " + str(person)) client_info = generate_id_passport() print('\nфамилия:\t', person.last_name()) print('имя:\t\t', person.name()) print('отчество:\t', person.surname(), '\n') passport = generate_number_passport() print('личный №:\t', client_info.get('idn')) print('документ:\t', passport) print('пол:\t\t', client_info.get('sex')) print('возраст:\t', person.age(13, 70), '\n') # print('аватар\t', person.avatar()) print('гражд-во:\t', person.get_current_locale()) print('Нац-сть:\t',person.nationality()) print('телефон:\t', person.telephone('+37529#######')) print('email:\t\t', person.email()) print('проф-ия:\t', person.occupation(), '\n') print('обращение:\t', person.title()) print('взгляды:\t', person.views_on()) print('вера:\t\t', person.worldview(), '\n')
def setUp(self): """Generate some fake data.""" self.app = Flask('test') util.init_app(self.app) self.app.config['CLASSIC_DATABASE_URI'] = 'sqlite:///test.db' self.app.config['CLASSIC_SESSION_HASH'] = 'foohash' with self.app.app_context(): util.create_all() with util.transaction() as session: person = Person('en') net = Internet('en') ip_addr = net.ip_v4() email = person.email() approved = 1 deleted = 0 banned = 0 first_name = person.name() last_name = person.surname() suffix_name = person.title() joined_date = util.epoch( Datetime('en').datetime().replace(tzinfo=EASTERN)) db_user = models.DBUser( first_name=first_name, last_name=last_name, suffix_name=suffix_name, share_first_name=1, share_last_name=1, email=email, flag_approved=approved, flag_deleted=deleted, flag_banned=banned, flag_edit_users=0, flag_edit_system=0, flag_email_verified=1, share_email=8, email_bouncing=0, policy_class=2, # Public user. TODO: consider admin. joined_date=joined_date, joined_ip_num=ip_addr, joined_remote_host=ip_addr) session.add(db_user) self.user = domain.User(user_id=str(db_user.user_id), username='******', email=db_user.email, name=domain.UserFullName( forename=db_user.first_name, surname=db_user.last_name, suffix=db_user.suffix_name))
def users(count: int = 500) -> List[models.User]: """Generate a bunch of random users.""" _users = [] for i in range(count): locale = _get_locale() person = Person(locale) net = Internet(locale) ip_addr = net.ip_v4() _users.append( models.User( first_name=person.name(), last_name=person.surname(), suffix_name=person.title(), share_first_name=1, share_last_name=1, email=person.email(), share_email=8, email_bouncing=0, policy_class=2, # Public user. joined_date=_epoch(Datetime(locale).datetime()), joined_ip_num=ip_addr, joined_remote_host=ip_addr)) return _users
def setUp(self): """Generate some fake data.""" self.app = Flask('test') util.init_app(self.app) self.app.config['CLASSIC_DATABASE_URI'] = 'sqlite:///test.db' self.app.config['CLASSIC_SESSION_HASH'] = 'foohash' with self.app.app_context(): util.create_all() with util.transaction() as session: person = Person('en') net = Internet('en') ip_addr = net.ip_v4() email = "*****@*****.**" approved = 1 deleted = 0 banned = 0 first_name = person.name() last_name = person.surname() suffix_name = person.title() joined_date = util.epoch( Datetime('en').datetime().replace(tzinfo=EASTERN)) db_user = models.DBUser( first_name=first_name, last_name=last_name, suffix_name=suffix_name, share_first_name=1, share_last_name=1, email=email, flag_approved=approved, flag_deleted=deleted, flag_banned=banned, flag_edit_users=0, flag_edit_system=0, flag_email_verified=1, share_email=8, email_bouncing=0, policy_class=2, # Public user. TODO: consider admin. joined_date=joined_date, joined_ip_num=ip_addr, joined_remote_host=ip_addr) session.add(db_user) self.user = domain.User(user_id=str(db_user.user_id), username='******', email=db_user.email, name=domain.UserFullName( forename=db_user.first_name, surname=db_user.last_name, suffix=db_user.suffix_name)) ok_patterns = ['%w3.org', '%aaas.org', '%agu.org', '%ams.org'] bad_patterns = ['%.com', '%.net', '%.biz.%'] with util.transaction() as session: for pattern in ok_patterns: session.add(models.DBEmailWhitelist(pattern=str(pattern))) for pattern in bad_patterns: session.add(models.DBEmailBlacklist(pattern=str(pattern))) session.add( models.DBEndorsementDomain( endorsement_domain='test_domain', endorse_all='n', mods_endorse_all='n', endorse_email='y', papers_to_endorse=3)) for category, definition in taxonomy.CATEGORIES_ACTIVE.items(): if '.' in category: archive, subject_class = category.split('.', 1) else: archive, subject_class = category, '' session.add( models.DBCategory(archive=archive, subject_class=subject_class, definitive=1, active=1, endorsement_domain='test_domain'))
def setUpClass(cls): """Generate some fake data.""" cls.app = Flask('test') util.init_app(cls.app) cls.app.config['CLASSIC_DATABASE_URI'] = 'sqlite:///test.db' cls.app.config['CLASSIC_SESSION_HASH'] = 'foohash' cls.app.config['CLASSIC_SESSION_TIMEOUT'] = '36000' with cls.app.app_context(): util.create_all() COUNT = 50 cls.users = [] _users = [] _domain_users = [] for i in range(COUNT): with util.transaction() as session: locale = _get_locale() person = Person(locale) net = Internet(locale) ip_addr = net.ip_v4() email = person.email() approved = 1 if _prob(90) else 0 deleted = 1 if _prob(2) else 0 banned = 1 if random.randint(0, 100) <= 1 else 0 first_name = person.name() last_name = person.surname() suffix_name = person.title() name = (first_name, last_name, suffix_name) joined_date = util.epoch( Datetime(locale).datetime().replace(tzinfo=EASTERN)) db_user = models.DBUser( first_name=first_name, last_name=last_name, suffix_name=suffix_name, share_first_name=1, share_last_name=1, email=email, flag_approved=approved, flag_deleted=deleted, flag_banned=banned, flag_edit_users=0, flag_edit_system=0, flag_email_verified=1, share_email=8, email_bouncing=0, policy_class=2, # Public user. TODO: consider admin. joined_date=joined_date, joined_ip_num=ip_addr, joined_remote_host=ip_addr) session.add(db_user) # Create a username. username_is_valid = 1 if _prob(90) else 0 username = person.username() db_nick = models.DBUserNickname( user=db_user, nickname=username, flag_valid=username_is_valid, flag_primary=1) # Create the user's profile. archive, subject_class = _random_category() db_profile = models.DBProfile( user=db_user, country=locale, affiliation=person.university(), url=net.home_page(), rank=random.randint(1, 5), archive=archive, subject_class=subject_class, original_subject_classes='', flag_group_math=1 if _prob(5) else 0, flag_group_cs=1 if _prob(5) else 0, flag_group_nlin=1 if _prob(5) else 0, flag_group_q_bio=1 if _prob(5) else 0, flag_group_q_fin=1 if _prob(5) else 0, flag_group_stat=1 if _prob(5) else 0) # Set the user's password. password = person.password() db_password = models.DBUserPassword( user=db_user, password_storage=2, password_enc=util.hash_password(password)) # Create some endorsements. archive, subject_class = _random_category() net_points = 0 for _ in range(0, random.randint(1, 4)): etype = random.choice(['auto', 'user', 'admin']) point_value = random.randint(-10, 10) net_points += point_value if len(_users) > 0 and etype == 'auto': endorser_id = random.choice(_users).user_id else: endorser_id = None issued_when = util.epoch( Datetime(locale).datetime().replace( tzinfo=EASTERN)) session.add( models.DBEndorsement(endorsee=db_user, endorser_id=endorser_id, archive=archive, subject_class=subject_class, flag_valid=1, endorsement_type=etype, point_value=point_value, issued_when=issued_when)) session.add(db_password) session.add(db_nick) session.add(db_profile) _users.append(db_user) _domain_users.append( (domain.User(user_id=str(db_user.user_id), username=db_nick.nickname, email=db_user.email, name=domain.UserFullName( forename=db_user.first_name, surname=db_user.last_name, suffix=db_user.suffix_name), verified=bool( db_user.flag_email_verified)), domain.Authorizations( classic=util.compute_capabilities(db_user), ))) session.commit() # We'll use these data to run tests. cls.users.append(( email, username, password, name, (archive, subject_class, net_points), (approved, deleted, banned), ))
def person( cls, *, locale=Locales.EN, qualification=None, age=None, blood_type=None, email=None, first_name=None, last_name=None, gender=None, height=None, id=None, language=None, nationality=None, occupation=None, phone=None, title=None, university=None, weight=None, work_experience=None, ): ''' Create an Person Data Entity object. All individual fields are automatically randomly generated based on locale. If provided, the corresponding values are overriden. Note: All individual fields are randomly generated. Don't expect correct correlation e.g. correct postal code for the generated city. Keyword Arguments: locale: Approprite Random.locale.<local_name> object. Default is Random.locale.EN qualification: Educational Qualification age: Age blood_type: Blood type email: Email address first_name: First name last_name: Last name gender: Gender height: Height id: Identifier language: Language nationality: Nationality occupation: Occupation phone: Phone number title: Title university: University weight: Weight work_experience: Work Experience ''' person = Person(locale=locale) from arjuna.engine.data.entity.person import Person as ArjPerson first_name = first_name is not None and first_name or person.first_name( ) last_name = last_name is not None and last_name or person.last_name() return ArjPerson( qualification=qualification is not None and qualification or person.academic_degree(), age=age is not None and age or person.age(), blood_type=blood_type is not None and blood_type or person.blood_type(), email=email is not None and email or person.email(), first_name=first_name, last_name=last_name, name=first_name + " " + last_name, gender=gender is not None and gender or person.gender(), height=height is not None and height or person.height(), id=id is not None and id or person.identifier(), language=language is not None and language or person.language(), nationality=nationality is not None and nationality or person.nationality(), occupation=occupation is not None and occupation or person.occupation(), phone=phone is not None and phone or person.telephone(), title=title is not None and title or person.title(), university=university is not None and university or person.university(), weight=weight is not None and weight or person.weight(), work_experience=work_experience is not None and work_experience or person.work_experience(), )
def gen_data_change_column_name(self, data_path, partition_date, num_rows, file_format): """ Input - data_path: path where the partition will be created (string) - partition_date: partition date to be created (date) - num_rows: number of rows to be generated (integer) - file_format: format of file to be generated (parquet or avro) This function creates a data sample changing column name """ person = Person('en') address = Address('en') # Create schema schema_street = StructType([ StructField('street_name', StringType(), True), StructField('lat', FloatType(), True), #column renamed StructField('long', FloatType(), True) #column renamed ]) schema_address_details = StructType([ StructField('street', schema_street, True), StructField('number', IntegerType(), True) ]) schema_address = StructType([ StructField('address_details', schema_address_details, True), StructField('city', StringType(), True), StructField('country', StringType(), True), StructField('country_code', StringType(), True), StructField('state', StringType(), True), StructField('postal_code', IntegerType(), True) ]) schema_df = StructType([ StructField('identifier', StringType(), True), StructField('first_name', StringType(), True), StructField('last_name', StringType(), True), StructField('occupation', StringType(), True), StructField('age', IntegerType(), True), StructField('address', schema_address, True), StructField('title_name', StringType(), True), #column renamed StructField('date', DateType(), True) ]) # Generate data for _ in range(num_rows): df_temp = self.spark.createDataFrame([[ person.identifier(), person.first_name(), person.last_name(), person.occupation(), person.age(), [[[ address.street_name(), float(address.latitude()), float(address.longitude()) ], int(address.street_number())], address.city(), address.country(), address.country_code(), address.state(), int(address.postal_code())], person.title(), partition_date ]], schema_df) try: df = df.union(df_temp) except: df = df_temp df.coalesce(1).write.partitionBy('date').mode('overwrite').format( file_format).save(data_path) print('Partition created: {data_path}/date={date}'.format( data_path=data_path, date=partition_date)) print('# Rows:', df.count()) print('Schema:') df.printSchema() print('\n') return