Exemplo n.º 1
0
 def getCustomerDetails(self,dataProp_dct,companies_lst,classes,data_prop,birth):
     lst=random.choice(companies_lst)
     vars()[dataProp_dct['Company']]= lst[0]                 #gen_data.create_company_name(biz_type="Generic")
     vars()[dataProp_dct['Sector']]=lst[1]
     vars()[dataProp_dct['Designation']]=gen_data.create_job_title()
     while True:
         start_date=birth+relativedelta(years=random.randint(18,26))
         if start_date<datetime.today():
             vars()[dataProp_dct['From_Date']]=start_date
             break
         else:continue
     vars()[dataProp_dct['To_Date']]=vars()[dataProp_dct['From_Date']]+timedelta(days=random.randrange((datetime.today() - vars()[dataProp_dct['From_Date']]).days))
     return [vars()[dataProp_dct['Company']],vars()[dataProp_dct['Sector']],vars()[dataProp_dct['Designation']],vars()[dataProp_dct['From_Date']],vars()[dataProp_dct['To_Date']]]
Exemplo n.º 2
0
def get_rows():
    i=501
	#line = input("Enter a row (python dict) into the table: ")
    while i < 1000:
		fake = Faker()
		#Pick an account number and store it in acct 
		#if the account hasn't been already generated then generate a record with all fields
		i=i+1	
		line = "{'rownum':"+str(i)+",'dunno':"+str(10)+",'CC':"+str(gen_data.cc_number())+",'Employer':"+str(gen_data.create_company_name())+\
		",'Custemail':"+str(gen_data.create_email())+",'name':"+\
		str(gen_data.create_name())+",'occupation':"+str(gen_data.create_job_title())+",'address_street':"+\
		str(gen_data.create_city_state_zip())+",'DOB':"+str(gen_data.create_birthday(min_age=2, max_age=85))+\
		",'previous_address_city_state_zip':"+str(gen_data.create_city_state_zip())+",'altcustomer_name':"+str(fake.name())+\
		",'altcustomer_occupation':"+str(gen_data.create_job_title())+",'altcustomer_dob':"+str(gen_data.create_birthday(min_age=2, max_age=85))+\
		",'ssn':"+str((randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1)))+",'phone':"+\
		str((randrange(101,1000,1),randrange(101,999,1),randrange(1000,10000,1)))+ \
		",'AccountID':"+str(randrange(100000,100000000,1))+",'PepFlag':"+str(max((randrange(0,101,1)-99,0)))+",'altcustomerssn':"+\
		str((randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1)))+",'demarketed_customer_flag':"+\
		str(max((randrange(0,101,1)-99),0))+\
		",'SAR_flag':"+str(max((randrange(0,101,1)-99),0))+",'nolonger_a_customer':"+str(max((randrange(0,101,1)-99),0))+\
		",'closed_account'"+str(max((randrange(0,101,1)-90),0))+",'High_risk_flag':"+str(max((randrange(0,101,1)-99),0))+\
		",'Risk_rating':"+str(max((randrange(0,101,1)-99),0))+"}"
        yield ast.literal_eval(line)
Exemplo n.º 3
0
    def getCustomerDetails(self, dataProp_dct, companies_lst, classes,
                           data_prop, birth):
        lst = random.choice(companies_lst)
        for key in dataProp_dct.keys():
            if self.checkKeywords(key, 'Company')[0] == 1:
                companycolName = self.checkKeywords(key, 'Company')[1]
                company = lst[0]
                break
            else:
                company = None
        for key in dataProp_dct.keys():
            if self.checkKeywords(key, 'Sector')[0] == 1:
                sectorcolName = self.checkKeywords(key, 'Sector')[1]
                sector = lst[1]
                break
            else:
                sector = None
        for key in dataProp_dct.keys():
            if self.checkKeywords(key, 'Designation')[0] == 1:
                designationcolName = self.checkKeywords(key, 'Designation')[1]
                designation = gen_data.create_job_title()
                break
            else:
                designation = None
        while True:
            start_date = birth + relativedelta(years=random.randint(18, 26))
            if start_date < datetime.today():
                for key in dataProp_dct.keys():
                    if self.checkKeywords(key, 'From_Date')[0] == 1:
                        fromDatecolName = self.checkKeywords(key,
                                                             'From_Date')[1]
                        fromDate = start_date
                        break
                    else:
                        fromDate = None
                break
            else:
                continue
        for key in dataProp_dct.keys():
            if self.checkKeywords(key, 'To_Date')[0] == 1:
                toDatecolName = self.checkKeywords(key, 'To_Date')[1]
                toDate = fromDate + timedelta(
                    days=random.randrange((datetime.today() - fromDate).days))
                break
            else:
                toDate = None

        return [company, sector, designation, fromDate, toDate]
def fake_user():
    f = open('test_user.txt', 'w')
    lat = 40.4365
    lng = -99.3925
    jobs = [gen_data.create_job_title() for i in xrange(200)]
    for i in xrange(100):
        lat += 0.001
        lng -= 0.001
        zipcode, city, state = gen_data.create_city_state_zip()
        f.write(fake.first_name() + '|' + fake.last_name() + '|' +
                gen_data.create_street() + '|' + city + '|' + state + '|' +
                zipcode + '|' + fake.date() + '|' + str(sample(jobs, 1)[0]) +
                '|' + str(i) + '|' + fake.email() + '|' +
                fake.password(length=6,
                              special_chars=True,
                              digits=True,
                              upper_case=True,
                              lower_case=True) + '|' + str(lat) + '|' +
                str(lng) + '\n')
    f.close()
Exemplo n.º 5
0
 def getProspectDetails(self, companies_lst, classes, data_prop, birth):
     lst = random.choice(companies_lst)
     vars()[data_prop[classes[1]][4]] = lst[0]
     vars()[data_prop[classes[1]][-5]] = lst[1]
     vars()[data_prop[classes[1]][7]] = gen_data.create_job_title()
     while True:
         start_date = birth + relativedelta(years=random.randint(18, 26))
         if start_date < datetime.today():
             vars()[data_prop[classes[1]][12]] = start_date
             break
         else:
             continue
     vars()[data_prop[classes[1]][-2]] = vars()[data_prop[
         classes[1]][14]] + timedelta(days=random.randrange(
             (datetime.today() - vars()[data_prop[classes[1]][14]]).days))
     return [
         vars()[data_prop[classes[1]][4]],
         vars()[data_prop[classes[1]][-5]],
         vars()[data_prop[classes[1]][7]],
         vars()[data_prop[classes[1]][12]],
         vars()[data_prop[classes[1]][-2]]
     ]
Exemplo n.º 6
0
			mdl.insert(No_CCs,'')
		#Sets CC_NO to a random credit card number
		CC_NO=gen_data.cc_number()
                                
		#Extract CC_Number from the tuple returned by CC_Number...Tuple contains CC Number and Type
		#while CC_list.count(CC_NO[1][0]) > 0: 
		CC_TRANS=CC_NO[1][0]
		
		dt = str(datetime.now())
		clean=re.sub('\W','',dt)
		printCC=str(CC_TRANS[-4:])+str(clean[-12:-3])+str(randrange(1111,9999,randrange(1,10,1)))
		#str(CC_TRANS[-4:])+str(clean[-12:-2])+str(randrange(1111,9999,randrange(1,10,1)))
		#Add CC_Number to control list to prevent duplicates
		#Add data elements to current csv row
		row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\
		gen_data.create_email(),gen_data.create_job_title()])
                                
		#Creates Current Address
		zip=random.choice(zips.zip)
		addr=geo_data.create_city_state_zip[zip]
		#Creates Previous address
		zip2=random.choice(zips.zip)
		addr2=geo_data.create_city_state_zip[zip2]
                                                                
		#Add additional data elements to current csv row
		lrg_cash_ex=random.choice(Yes_No)
		
		#Condition for SARs and Demarketed Clients 
		if(Clsd=='Yes'):
			#1% of closed accounts are demarketed but never had a SAR filed
			if (max((randrange(0,101,1)-99),0)==1 and SAR=='No'):
Exemplo n.º 7
0
from barnum import gen_data
import csv


with open('demographic.csv','w') as csvfile:
    csvwriter =csv.writer(csvfile, delimiter=' ')
    for i in range (0,100):
      name=gen_data.create_name()
      job_title=gen_data.create_job_title()
      phone=gen_data.create_phone()
      address=gen_data.create_city_state_zip()
      csvwriter.writerow([name,job_title,phone,address])

csvfile.close()
def createCusts(N):
	#List for client whose net worth is over $500K
	HighNetWorth = ['Yes'] + ['No'] * 30
	#List for type of account
	Related_Type = ['Primary','Secondary','Joint']
	#List for how the account was opened
	Party_Type = ['Person','Non-Person']
	#List for a BMO customer
	Party_Relation = ['Customer','Non-Customer']
	#List for random Yes/No Flag
	Yes_No = ['Yes'] + ['No'] * 12
	#List for random Yes/No Consent
	Yes_No_Consent = ['Yes'] + ['No'] * 4
	#List for equal Yes/No Flag
	Yes_No_50 = ['Yes','No']
	#List for official language
	Official_Lang = ['English'] * 3 + ['French']
	#List for method of communication
	Preffered_Channel = ['Direct Mail','Telemarketing','Email','SMS']
	#List for status of customer
	#Customer_Status = ['Prospect','Inactive Customer','Past Customer'] + ['Active Customer'] * 56
	#List for LOB Segment Type
	Seg_Model_Type = ['LOB Specific','Profitability','Geographical','Behavioral','Risk Tolerance']
	#List for Model ID
	Model_ID = ['01','02','03','04','05']
	#List for Model Name
	Seg_Model_Name = ['IRRI', 'CRS Risk Score','Geo Risk','Financial Behavior Risk','CM Risk']
	#List for Model Score
	Seg_Model_Score = ['200','300','400','100','500']
	#List for Model Group
	Seg_Model_Group = ['Group 1'] * 2 + ['Group 2','Group 3','Group 4']
	#List for Model Description
	Seg_Model_Description = ['High Risk Tier','Mid Risk Tier','Low Risk Tier','Vertical Risk','Geographical Risk']
	#List for random Arms Dealer flag
	Arms_Manufacturer=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random auction flag
	Auction=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random Cash Intensive flag
	CashIntensive_Business=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random Casino?Gaming flag
	Casino_Gambling=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random Client Onboarding flag
	Channel_Onboarding=['E-mail','In Person','In person - In Branch/Bank Office','In person - Offsite/Client Location','Mail','Online','Phone','Request for Proposal (RFP)'] + ['Not Applicable'] * 10
	#List for random Transaction flag
	Channel_Ongoing_Transactions=['ATM','E-mail','Fax','Mail','Not Applicable','OTC Communication System','Phone'] + ['Online'] * 4 + ['In Person'] * 31
	#List for random HI_Vehicle flag
	Complex_HI_Vehicle=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random Metals flag
	Dealer_Precious_Metal=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random Arms Dealer flag
	Digital_PM_Operator=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random Embassy flag
	Embassy_Consulate=['Yes'] + ['No'] * 2 + [''] * 392
	#Sets variable to Embassy flag
	Exchange_Currency=Embassy_Consulate
	#Sets variable to Embassy flag
	Foreign_Financial_Institution=Embassy_Consulate
	#Sets variable to Embassy flag
	Foreign_Government=Embassy_Consulate
	#Sets variable to Embassy flag
	Foreign_NonBank_Financial_Institution=Embassy_Consulate
	#Sets variable to Embassy flag
	Internet_Gambling=Embassy_Consulate
	#Sets variable to Embassy flag
	Medical_Marijuana_Dispensary=Embassy_Consulate
	#Sets variable to Embassy flag
	Money_Service_Business=Embassy_Consulate
	#Sets variable to Embassy flag
	NonRegulated_Financial_Institution=Embassy_Consulate
	#Sets variable to Embassy flag
	Not_Profit=Embassy_Consulate
	#List for random occupation
	Occupation=['11-1011 Chief Executives',\
	'11-3011 Administrative Services Managers',\
	'11-3031 Financial Managers',\
	'11-3061 Purchasing Managers',\
	'13-1011 Agents and Business Managers of Artists Performers and Athletes',\
	'13-1031 Claims Adjusters Examiners, and Investigators',\
	'13-1199 Business Operations Specialists, All Other',\
	'13-2099 Financial Specialists All Other',\
	'17-1011 Architects Except Landscape and Naval',\
	'23-1011 Lawyers',\
	'23-1023 Judges, Magistrate Judges and Magistrates',\
	'25-2012 Kindergarten Teachers Except Special Education',\
	'25-2021 Elementary School Teachers Except Special Education',\
	'29-1041 Optometrists',\
	'29-2054 Respiratory Therapy Technicians',\
	'33-2011 Firefighters',\
	'37-1012 First-Line Supervisors of Landscaping Lawn Service and Groundskeeping Workers',\
	'39-1011 Gaming Supervisors',\
	'39-2011 Animal Trainers',\
	'41-1011 First-Line Supervisors of Retail Sales Workers',\
	'41-1012 First-Line Supervisors of Non-Retail Sales Workers',\
	'41-2011 Cashiers',\
	'41-2031 Retail Salespersons',\
	'43-3021 Billing and Posting Clerks',\
	'45-1011 First-Line Supervisors of Farming, Fishing, and Forestry Workers',\
	'49-2011 Computer Automated Teller and Office Machine Repairers',\
	'53-3021 Bus Drivers Transit and Intercity',\
	'53-4031 Railroad Conductors and Yardmasters',\
	'55-1011 Air Crew Officers',\
	'55-1012 Aircraft Launch and Recovery Officers',\
	'55-1013 Armored Assault Vehicle Officers',\
	]
	#Sets variable to Embassy flag
	Privately_ATM_Operator=Embassy_Consulate
	#List for random products
	Products=['Certificate of Deposit',\
	'Checking Account',\
	'Credit Card',\
	'Custodial and Investment Agency - Institutional',\
	'Custodial and Investment Agency - Personal',\
	'Custodial/Trust Outsourcing Services (BTOS)',\
	'Custody Accounts (PTIM)',\
	'Custody Accounts (RSTC)',\
	'DTF (BHFA)',\
	'Investment Agency - Personal',\
	'Investment Management Account (PTIM)',\
	'Lease',\
	'Loan / Letter of Credit',\
	'Money Market',\
	'Mortgage / Bond / Debentures',\
	'None',\
	'Savings Account',\
	'Trust Administration - Irrevocable and Revocable (PTIM)',\
	'Trust Administration - Irrevocable and Revocable Trusts (BDTC)',\
	] + ['Nondeposit Investment Products'] * 14 + ['Investment Agency - Institutional'] * 5
	#Sets variable to Embassy flag
	Sales_Used_Vehicles=Embassy_Consulate
	#Dictionary for random Services
	Services=['Benefit Payment Services',\
	'Domestic Wires and Direct Deposit / ACH',\
	'Family Office Services (FOS)',\
	'Fiduciary Services',\
	'International Wires and IAT',\
	'Investment Advisory Services (IAS)',\
	'Investment Services',\
	'None',\
	'Online / Mobile Banking',\
	'Payroll',\
	'Short Term Cash Management',\
	'Trust Services',\
	'Trustee Services',\
	'Vault Cash Services',\
	] + ['Financial Planning'] * 6 + ['Retirement Plans'] * 19
	#Dictionary for random SIC_Code
	SIC_Code=['6021 National Commercial Banks',\
	'6211 Security Brokers Dealers and Flotation Companies',\
	'6282 Investment Advice',\
	'6311 Life Insurance',\
	'6733 Trusts Except Educational Religious and Charitable',\
	'8999 Services NEC',\
	] + ['6722 Management Investment Offices Open-End'] * 12
	#Dictionary for random Market Listing
	Stock_Market_Listing=['Australian Stock Exchange',\
	'Brussels Stock Exchange',\
	'Montreal Stock Exchange',\
	'Tiers 1 and 2 of the TSX Venture Exchange (also known as Tiers 1 and 2 of the Canadian Venture Exchange)',\
	'Toronto Stock Exchange',\
	] + ['Not Found'] * 30
	#Sets variable to Embassy flag
	Third_Party_Payment_Processor=Embassy_Consulate
	#Sets variable to Embassy flag
	Transacting_Provider=Embassy_Consulate
	#Dictionary for random Low Net Worth
	LowNet=[1,2] + [0] * 5
	#Dictionary for Consumer vs Business
	Acct_Type = ['B'] + ['C'] * 5
	#Dictionary for random number of credits cards per account
	Number_CC = [1] * 7 + [2] * 11 + [3] * 3 + [4]
	#Dictionary for Account list set to blank
	acct_list=[]
	#Dictionary for CreditCard list set to blank
	CC_list = []
	
	#Dictionary for random Wolfsberg scenario
	Use_Case = [1,4,7,10,13,16,19,22,25,28,31,34,39] * 4 + [2,5,8,11,14,17,20,23,26,29,32,35,38] * 7 + [3,6,9,12,15,18,21,24,27,30,33,36] * 65 + [37] * 73 + [40,41] * 2
	refrating = ['1','1','1','2','3','4','2','4','5','5','5','5','5','5','5','5','5','5','5','5']
	fake = Faker()
	global liSSNMaster
	start=10786147
	acct_list=[]
	liCSV = []
	for i in xrange(N):
		#Initiate High Risk Flags
		#Politically Exposed Person
		PEP='No'
		#Customer with a Suspicous Activity Report
		SAR='No'
		#Customer with a closed account
		Clsd='No'
		#High risk customer flag
		high_risk='No'
		#High Risk Rating
		hr_rating=''
		#Customer that was demarketed by the bank
		demarket='No'
		dem_date=''
		#generate closed acct flag
		if (max((randrange(0,98,1)-96),0)==1):
			Clsd='Yes'
		#Random choice for number of credit card users per account number
		No_CCs = random.choice(Number_CC)
		#Generate account number
		acct=start+1+randrange(1,10,1)
		start=acct
		#Randomly generate customer name + middle name in tmp
		name = fake.name()
		tmp=gen_data.create_name()
		#Adds account number to account dictionary
		acct_list.extend([acct])
		#Creates a new row and adds data elements
		row = [i]+[acct]+[random.choice(Acct_Type)]+[No_CCs]+[name]+[tmp[0]]+[liSSNMaster[i]]
		#Dictionary for names list set to blank
		names=[]
		#Dictionary for Social Security Number list set to blank
		ssn=[]
		#Middle Name to reduce name dups
		mdl=[]
		
		for j in range(No_CCs-1):
			names.insert(j,fake.name())
			tmp2=gen_data.create_name()
			mdl.insert(j,tmp2[0])
		##Pull from SSN Master list
			randInt = randrange(1,len(liSSNMaster),1)
			if randInt != i:
				ssn.insert(j,liSSNMaster[randInt])
			else:
				ssn.insert(j,liSSNMaster[randInt - 1])
			
		#Name and SSN is set to blank if less than 4 customers on an account
		for k in range(4-No_CCs):
			names.insert(No_CCs+k,'')
			ssn.insert(No_CCs+k,'')
			mdl.insert(No_CCs,'')
			
		#Sets CC_NO to a random credit card number
		CC_NO=gen_data.cc_number()
		#Extract CC_Number from the tuple returned by CC_Number then scramble to ensure uniqueness...Tuple contains CC Number and Type
		CC_TRANS=CC_NO[1][0]
		dt = str(datetime.now())
		clean=re.sub('\W','',dt)
		printCC=str(CC_TRANS[-4:])+str(clean[-12:-3])+str(randrange(1111,9999,randrange(1,10,1)))
		
		#Add data elements to current csv row
		row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\
		gen_data.create_email(),gen_data.create_job_title()])
		#Create Current Address
		zip=random.choice(zips.zip)
		addr=geo_data.create_city_state_zip[zip]
		#Create Previous address
		zip2=random.choice(zips.zip)
		addr2=geo_data.create_city_state_zip[zip2]
		#Add additional data elements to current csv row
		lrg_cash_ex=random.choice(Yes_No)
		#Condition for SARs and Demarketed Clients
		if(Clsd=='Yes'):
			#1% of closed accounts are demarketed but never had a SAR filed
			if (max((randrange(0,101,1)-99),0)==1 and SAR=='No'):
				demarket='Yes'
				dem_date=gen_data.create_date(past=True)
			if (max((randrange(0,11,1)-9),0)==1 and demarket=='No'):
				#10% of closed accounts have SARs
				SAR='Yes'
				#90% of closed accounts with SARs are demarketed
				if(max((randrange(0,11,1)-9),0)==0):
					demarket='Yes'
					dem_date=gen_data.create_date(past=True)
				
		if (max((randrange(0,101,1)-99),0)==1):
			PEP='Yes'
		row.extend([addr[0],addr[1],zip,'US',addr2[0],addr2[1],zip2,'US',gen_data.create_birthday(min_age=2, max_age=85),PEP,SAR,Clsd])
		
		#Start Generating related accounts from account list once 10,000 accounts are generated - to avoid duplicating accounts in the beginning
		if i > 10000:
			rel = int(random.choice(acct_list))*max((randrange(0,10001,1)-9999),0)
			if rel <> 0:
				row.append(rel)
				row.append(random.choice(Related_Type))
			else:
				row.append('')
				row.append('')
		else:
			row.append('')
			row.append('')
		
		#Randomly generates account start date
		party_start=gen_data.create_date(past=True)
		#Randomly selects consent option for sharing info
		Consent_Share = random.choice(Yes_No_Consent)
		#Add additional data elements to current csv row
		row.extend([random.choice(Party_Type),random.choice(Party_Relation),party_start,gen_data.create_date(past=True),\
		lrg_cash_ex,demarket,dem_date,randrange(0,100,1),random.choice(Official_Lang)])
		#Add data element preferred methond of contact for yes to share info...if not then blank to current row
		
		if Consent_Share == 'Yes':
			row.extend(['Yes',random.choice(Preffered_Channel)])
		else:
			row.extend(['No',''])
		
		row.extend([zip,randrange(0,5,1)])
		#Generate Segment ID then add additional Segment data based on the selection to the current csv row
		Segment_ID = randrange(0,5,1)%5
		if Segment_ID == 0:
			row.extend([Model_ID[0],Seg_Model_Type[0],Seg_Model_Name[0],Seg_Model_Group[0],Seg_Model_Description[0],Seg_Model_Score[0]])
		if Segment_ID == 1:
			row.extend([Model_ID[1],Seg_Model_Type[1],Seg_Model_Name[1],Seg_Model_Group[1],Seg_Model_Description[1],Seg_Model_Score[1]])
		if Segment_ID == 2:
			row.extend([Model_ID[2],Seg_Model_Type[2],Seg_Model_Name[2],Seg_Model_Group[2],Seg_Model_Description[2],Seg_Model_Score[2]])
		if Segment_ID == 3:
			row.extend([Model_ID[3],Seg_Model_Type[3],Seg_Model_Name[3],Seg_Model_Group[3],Seg_Model_Description[3],Seg_Model_Score[3]])
		if Segment_ID == 4:
			row.extend([Model_ID[4],Seg_Model_Type[4],Seg_Model_Name[4],Seg_Model_Group[4],Seg_Model_Description[4],Seg_Model_Score[4]])
		
		#Add additional data elements to current csv row
		hr0=random.choice(Arms_Manufacturer)
		hr01=random.choice(Auction)
		hr02=random.choice(CashIntensive_Business)
		hr03=random.choice(Casino_Gambling)
		hr04=random.choice(Channel_Onboarding)
		hr05=random.choice(Channel_Ongoing_Transactions)
		row.extend([hr0,hr01,hr02,hr03,hr04,hr05])
		#Randomly select whether customer has a High Net Worth
		HighNetWorthFlag = random.choice(HighNetWorth)
		#Randomly Generate customer net worth based on the above flag
		if HighNetWorthFlag == 'Yes':
			row.append(max(max((randrange(0,101,1)-99),0)*randrange(1000000,25000000,1),randrange(1000000,5000000,1)))
		else:
			flag=random.choice(LowNet)
			if flag==0:
				row.append(randrange(-250000,600000,1))
			else:
				if flag==1:
					row.append(randrange(149000,151000,1))
				else:
					row.append(randrange(40000,50000,1))
		#Add data elements to current csv row
		hr1=random.choice(Complex_HI_Vehicle)
		hr2=random.choice(Dealer_Precious_Metal)
		hr3=random.choice(Digital_PM_Operator)
		hr4=random.choice(Embassy_Consulate)
		hr5=random.choice(Exchange_Currency)
		hr6=random.choice(Foreign_Financial_Institution)
		hr7=random.choice(Foreign_Government)
		hr8=random.choice(Foreign_NonBank_Financial_Institution)
		hr9=random.choice(Internet_Gambling)
		hr10=random.choice(Medical_Marijuana_Dispensary)
		hr11=random.choice(Money_Service_Business)
		hr12=random.choice(NAICS.NAICS_Code)
		hr13=random.choice(NonRegulated_Financial_Institution)
		hr14=random.choice(Not_Profit)
		#hr15=random.choice(Occupation) - added before through gen_data
		hr16=random.choice(Privately_ATM_Operator)
		hr17=random.choice(Products)
		hr18=random.choice(Sales_Used_Vehicles)
		hr19=random.choice(Services)
		hr20=random.choice(SIC_Code)
		hr21=random.choice(Stock_Market_Listing)
		hr22=random.choice(Third_Party_Payment_Processor)
		hr23=random.choice(Transacting_Provider)
		
		if(PEP=='Yes' or SAR=='Yes' or lrg_cash_ex=='Yes' or demarket=='Yes' or hr0=='Yes'
		or hr01=='Yes' or hr02=='Yes' or hr03=='Yes' or hr1=='Yes' or hr2=='Yes' or hr3=='Yes' or hr4=='Yes' or
		hr5=='Yes' or hr6=='Yes' or hr7=='Yes' or hr8=='Yes' or hr9=='Yes' or hr10=='Yes' or hr11=='Yes' or hr13=='Yes' or hr14=='Yes' or
		hr16=='Yes' or hr17=='Yes' or hr18=='Yes' or hr22=='Yes' or hr23=='Yes' or HighNetWorthFlag=='Yes'):
			high_risk='Yes'
			hr_rating=random.choice(refrating)
		if(SAR=='No' and high_risk=='No'):
			if(max((randrange(0,101,1)-99),0)==1):
				high_risk='Yes'
				hr_rating=random.choice(refrating)
		if(PEP=='No' and high_risk=='No'):
			if(max((randrange(0,101,1)-99),0)==1):
				high_risk='Yes'
				hr_rating=random.choice(refrating)
		if(high_risk=='No'):
			if(max((randrange(0,101,1)-99),0)==1):
				high_risk='Yes'
				hr_rating=random.choice(refrating)
		row.extend([hr1,hr2,hr3,hr4,hr5,hr6,hr7,hr8,hr9,hr10,hr11,hr12,hr13,hr14,hr16,hr17,hr18,hr19,hr20,hr21,hr22,hr23,
		HighNetWorthFlag,high_risk,hr_rating,random.choice(Use_Case)])
		liCSV.append(row)
	return liCSV
Exemplo n.º 9
0
        f1,
        delimiter=',',
        lineterminator='\n',
    )
    writer.writerow(['rownum'] +['dunno'] + ['CC'] + ['Employer'] + ['Custemail'] + ['name'] \
 + ['occupation'] + ['address_street'] + ['DOB']+['previous address_city_state_zip']+ ['altcustomer_name'] \
 + ['altcustomer_occupation']   + ['altcustomer_dob'] + ['ssn'] + ['phone']  + \
 ['AccountID'] + ['PepFlag'] + ['altcustomerssn'] + ['demarketed_customer_flag'] + \
 ['SAR_flag'] + ['nolonger_a_customer'] + ['closed_account'] +['High_risk_flag'] +['Risk_rating'])
    while i < 50000000:
        #Pick an account number and store it in acct
        acct = randrange(100000, 100000000, 1)
        #if the account hasn't been already generated then generate a record with all fields
        if d.has_key(str(acct)) == False:
            row = [i] + [10] + [gen_data.cc_number()]+[gen_data.create_company_name()] + \
            [gen_data.create_email()]+[gen_data.create_name()] +[gen_data.create_job_title()] + \
            [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] + \
            [gen_data.create_city_state_zip()] + [fake.name()] + [gen_data.create_job_title()] + \
            [gen_data.create_birthday(min_age=2, max_age=85)]  +\
            [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] +  \
            [(randrange(101,1000,1),randrange(101,999,1),randrange(1000,10000,1))] + \
            [acct] + \
            [max((randrange(0,101,1)-99),0)] + \
            [(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \
            [max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] + \
            [max((randrange(0,101,1)-99),0)]  + [max((randrange(0,101,1)-90),0)] + \
            [max((randrange(0,101,1)-99),0)] +  [max((randrange(0,101,1)-99),0)]
            d[str(acct)] = acct
            i = i + 1
            writer.writerow(row)
Exemplo n.º 10
0
from barnum import gen_data
import csv
#gen_data = gen_data()
with open('large.csv','w') as f1:
    writer=csv.writer(f1, delimiter=',',lineterminator='\n',)
    writer.writerow([''] + range(10))
    for i in range(50000000):
        row = [i] + [10] + [gen_data.cc_number()]+[gen_data.create_company_name()] +[gen_data.create_email()]+[gen_data.create_name()] +[gen_data.create_job_title()] + [gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)]
        writer.writerow(row)
		
		
		
 #row = [i] + [10] + [fake.name()] +[fake.address()]
for dummy in range (0, count):
    firstName, lastName = gen_data.create_name()
    zip, city, state = gen_data.create_city_state_zip()
    postalAddressID=str(random.randint(0, sys.maxint))

    UID = str(random.randint(0, sys.maxint))
    phoneNumber = gen_data.create_phone()
    phoneUri = 'tel:+1' + phoneNumber.translate(allchars,' -()')
    birthDay = gen_data.create_birthday()
    streetAddress = gen_data.create_street()
    emailAddress = gen_data.create_email(name=(firstName, lastName))
    xmppAddress = str(firstName+"." + lastName + "@gmail.com").lower()
    hasIMAccount = False
    hasPhoneNumber = False
    jobTitle = gen_data.create_job_title()

    generatePostalAddress()
    generateEmailAddress()

    #Only every 3rd have Phone or IM to add variation.
    if random.randint(0, 3) > 2 or count == 1:
        generateIMAccount(gen_data, str)
        hasIMAccount = True
    if random.randint(0, 3) > 2 or count == 1:
        generatePhoneNumber()
        hasPhoneNumber = True

        if (withPhone): generatePhoneCalls(3)
        if (withPhone): generateSMS (4)
Exemplo n.º 12
0
def generate_customers():
    with get_file('uber_cust.csv', 'w') as f1:
        # Writer for CSV...Pipe delimited...Return for a new line
        writer = csv.writer(
            f1,
            delimiter='|',
            lineterminator='\n',
        )
        # Header Row
        writer.writerow(
            ['ROWNUM'] + ['accountNumber'] + ['accountCategory'] + ['accountType'] + ['NUM_CCS'] + ['NAME'] + [
                'M_NAME'] + [
                'SSN'] + [
                'AUTHORIZED_NAME2'] + ['M_NAME2'] + ['SSN2'] + \
            ['AUTHORIZED_NAME3'] + ['M_NAME3'] + ['SSN3'] + ['AUTHORIZED_NAME4'] + ['M_NAME4'] + ['SSN4'] + [
                'CREDITCARDNUMBER'] + ['CREDITCARDTYPE'] + ['EMPLOYER'] + ['CUSTEMAIL'] + \
            ['OCCUPATION'] + ['CITY'] + ['STATE'] + ['ZIP'] + ['COUNTRY'] + ['PREVIOUS_CITY'] + [
                'PREVIOUS_STATE'] + \
            ['PREVIOUS_ZIP'] + ['PREVIOUS_COUNTRY'] + ['DOB'] + ['politically_exposed_person'] + [
                'suspicious_activity_report'] + ['CLOSEDACCOUNT'] + [
                'RELATED_ACCT'] + ['RELATED_TYPE'] + ['PARTY_TYPE'] + ['PARTY_RELATION'] + [
                'PARTY_STARTDATE'] + ['PARTY_ENDDATE'] + \
            ['LARGE_CASH_EXEMPT'] + ['DEMARKET_FLAG'] + ['DEMARKET_DATE'] + ['PROB_DEFAULT_RISKR'] + [
                'OFFICIAL_LANG_PREF'] + ['CONSENT_SHARING'] + \
            ['PREFERRED_CHANNEL'] + ['PRIMARY_BRANCH_NO'] + ['DEPENDANTS_COUNT'] + ['SEG_MODEL_ID'] + [
                'SEG_MODEL_TYPE'] + \
            ['SEG_MODEL_NAME'] + ['SEG_MODEL_GROUP'] + ['SEG_M_GRP_DESC'] + ['SEG_MODEL_SCORE'] + [
                'ARMS_MANUFACTURER'] + ['AUCTION'] + \
            ['CASHINTENSIVE_BUSINESS'] + ['CASINO_GAMBLING'] + ['CHANNEL_ONBOARDING'] + [
                'CHANNEL_ONGOING_TRANSACTIONS'] + ['CLIENT_NET_WORTH'] + \
            ['COMPLEX_HI_VEHICLE'] + ['DEALER_PRECIOUS_METAL'] + ['DIGITAL_PM_OPERATOR'] + [
                'EMBASSY_CONSULATE'] + ['EXCHANGE_CURRENCY'] + \
            ['FOREIGN_FINANCIAL_INSTITUTION'] + ['FOREIGN_GOVERNMENT'] + [
                'FOREIGN_NONBANK_FINANCIAL_INSTITUTION'] + ['INTERNET_GAMBLING'] + \
            ['MEDICAL_MARIJUANA_DISPENSARY'] + ['MONEY_SERVICE_BUSINESS'] + ['NAICS_CODE'] + [
                'NONREGULATED_FINANCIAL_INSTITUTION'] + \
            ['NOT_PROFIT'] + ['PRIVATELY_ATM_OPERATOR'] + ['PRODUCTS'] + ['SALES_USED_VEHICLES'] + [
                'SERVICES'] + \
            ['SIC_CODE'] + ['STOCK_MARKET_LISTING'] + ['THIRD_PARTY_PAYMENT_PROCESSOR'] + [
                'TRANSACTING_PROVIDER'] + ['HIGH_NET_WORTH'] + ['HIGH_RISK'] + ['RISK_RATING'] + [
                'USE_CASE_SCENARIO'])
        # Loop for number of accounts to generate
        start = 10
        acct_list = []

        li_ssn_master = list(
            set([
                ''.join(str(random.randint(0, 9)) for _ in xrange(9))
                for i in xrange(30)
            ]))

        if len(li_ssn_master) < 30:
            li_ssn_master = list(
                set([
                    ''.join(str(random.randint(0, 9)) for _ in xrange(9))
                    for i in xrange(30)
                ]))
        for i in xrange(30):
            # Initiate High Risk Flags
            politically_exposed_person = 'No'
            suspicious_activity_report = 'No'

            closed_cust_acct = 'No'
            # High risk customer flag
            high_risk = 'No'
            # High Risk Rating
            hr_rating = ''
            # Customer that was demarketed by the bank
            demarket = 'No'
            dem_date = ''
            # generate closed acct flag
            if max((randrange(0, 98, 1) - 96), 0) == 1:
                closed_cust_acct = 'Yes'

            # Random number generator for account number
            # acct = randrange(100000,100000000,1)
            # Random choice for number of credit cards per account number
            no_ccs = weighted_options('number_cc')
            # while acct_list.count(acct) > 0:
            #	acct = randrange(100000,100000000,1)
            # dt = str(datetime.now())
            # acct=str(i)++re.sub('\W','',dt)
            acct = start + 1 + randrange(1, 10, 1)
            start = acct

            name = fake.name()
            tmp = gen_data.create_name()
            # Adds account number to account dictionary
            acct_list.extend([acct])
            # Creates a new row and adds data elements
            ##      JS - Main Account Holder SSN as current index in master SSN list
            ##		row = [i]+[acct]+[random.choice(acct_type)]+[No_CCs]+[name]+[tmp[0]]+[(str(randrange(101,1000,1))+str(randrange(10,100,1))+str(randrange(1000,10000,1)))]
            row = [i] + [acct] + [weighted_options('acct_type')] + [no_ccs] + [
                name
            ] + [tmp[0]] + [li_ssn_master[i]]
            # Dictionary for names list set to blank
            names = []
            # Dictionary for Social Security Number list set to blank
            ssn = []
            # Generates Name and SSN for Credit Users
            # Middle Name to reduce name dups
            mdl = []
            for j in range(no_ccs - 1):
                names.insert(j, fake.name())
                tmp2 = gen_data.create_name()
                mdl.insert(j, tmp2[0])
                ##      JS - Pull from SSN Master list
                # ssn.insert(j,(str(randrange(101,1000,1))+str(randrange(10,100,1))+str(randrange(1000,10000,1))))
                randInt = randrange(1, len(li_ssn_master), 1)
                if randInt != i:
                    ssn.insert(j, li_ssn_master[randInt])
                else:
                    ssn.insert(j, li_ssn_master[randInt - 1])

            # Name and SSN is set to blank if less than 4 customers on an account

            for k in range(4 - no_ccs):
                names.insert(no_ccs + k, '')
                ssn.insert(no_ccs + k, '')
                mdl.insert(no_ccs, '')
            # Sets CC_NO to a random credit card number
            CC_NO = gen_data.create_cc_number()

            # Extract CC_Number from the tuple returned by CC_Number...Tuple contains CC Number and Type
            # while credit_cards.count(CC_NO[1][0]) > 0:
            CC_TRANS = CC_NO[1][0]

            dt = str(datetime.now())
            clean = re.sub('\W', '', dt)
            printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str(
                randrange(1111, 9999, randrange(1, 10, 1)))
            # str(CC_TRANS[-4:])+str(clean[-12:-2])+str(randrange(1111,9999,randrange(1,10,1)))
            # Add CC_Number to control list to prevent duplicates
            # Add data elements to current csv row
            row.extend([
                names[0], mdl[0], ssn[0], names[1], mdl[1], ssn[1], names[2],
                mdl[2], ssn[2], printCC, CC_NO[0],
                gen_data.create_company_name() + ' ' + tmp[1],
                gen_data.create_email(),
                gen_data.create_job_title()
            ])

            # Creates Current Address
            zip = random.choice(zips.zip)
            addr = geo_data.create_city_state_zip[zip]
            # Creates Previous address
            zip2 = random.choice(zips.zip)
            addr2 = geo_data.create_city_state_zip[zip2]

            # Add additional data elements to current csv row
            lrg_cash_ex = weighted_options('yes_no')

            # Condition for SARs and Demarketed Clients
            if closed_cust_acct == 'Yes':
                # 1% of closed accounts are demarketed but never had a suspicious_activity_report filed
                if risk_range() and suspicious_activity_report == 'No':
                    demarket = 'Yes'
                    dem_date = gen_data.create_date(past=True)
                if risk_range() and demarket == 'No':
                    # 10% of closed accounts have SARs
                    suspicious_activity_report = 'Yes'
                    # 90% of closed accounts  with SARs are demarketed
                    if max((randrange(0, 11, 1) - 9), 0) == 0:
                        demarket = 'Yes'
                        dem_date = gen_data.create_date(past=True)

            if risk_range():
                politically_exposed_person = 'Yes'

            row.extend([
                addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US',
                gen_data.create_birthday(min_age=2, max_age=85),
                politically_exposed_person, suspicious_activity_report,
                closed_cust_acct
            ])
            # Start Generating related accounts from account list once 10,000 accounts are generated
            if i > 10000:
                rel = int(random.choice(acct_list)) * max(
                    (randrange(0, 10001, 1) - 9999), 0)
                if rel <> 0:
                    row.append(rel)
                    row.append(weighted_options('related_type'))
                else:
                    row.append('')
                    row.append('')
            else:
                row.append('')
                row.append('')

            # Randomly generates account start date
            party_start = gen_data.create_date(past=True)
            # Randomly selects consent option for sharing info
            consent_share = weighted_options('yes_no')

            # Add additional data elements to current csv row

            row.extend([
                weighted_options('party_type'),
                weighted_options('party_relation'), party_start,
                gen_data.create_date(past=True), lrg_cash_ex, demarket,
                dem_date,
                randrange(0, 100, 1),
                weighted_options('official_lang')
            ])
            # Add data element preferred methond of contact for yes to share info...if not then blank to current row
            if consent_share == 'Yes':
                row.extend(['Yes', weighted_options('preferred_channel')])
            else:
                row.extend(['No', ''])
            # DO NOT USE CUST STATUS BELOW - NOT INTEGRATED WITH CLOSED STATUS! Add additional data elements to current csv row
            row.extend([zip, randrange(0, 5, 1)])

            # Generates Segment ID then adds additional Segment data based on the selection to the current csv row
            Segment_ID = randrange(0, 5, 1) % 5

            if Segment_ID == 0:
                row.extend([
                    MODEL_ID[0], SEG_MODEL_TYPE[0], SEG_MODEL_NAME[0],
                    SEG_MODEL_GROUP[0], SEG_MODEL_DESCRIPTION[0],
                    SEG_MODEL_SCORE[0]
                ])

            if Segment_ID == 1:
                row.extend([
                    MODEL_ID[1], SEG_MODEL_TYPE[1], SEG_MODEL_NAME[1],
                    SEG_MODEL_GROUP[1], SEG_MODEL_DESCRIPTION[1],
                    SEG_MODEL_SCORE[1]
                ])

            if Segment_ID == 2:
                row.extend([
                    MODEL_ID[2], SEG_MODEL_TYPE[2], SEG_MODEL_NAME[2],
                    SEG_MODEL_GROUP[2], SEG_MODEL_DESCRIPTION[2],
                    SEG_MODEL_SCORE[2]
                ])

            if Segment_ID == 3:
                row.extend([
                    MODEL_ID[3], SEG_MODEL_TYPE[3], SEG_MODEL_NAME[3],
                    SEG_MODEL_GROUP[3], SEG_MODEL_DESCRIPTION[3],
                    SEG_MODEL_SCORE[3]
                ])

            if Segment_ID == 4:
                row.extend([
                    MODEL_ID[4], SEG_MODEL_TYPE[4], SEG_MODEL_NAME[4],
                    SEG_MODEL_GROUP[4], SEG_MODEL_DESCRIPTION[4],
                    SEG_MODEL_SCORE[4]
                ])

            # Add additional data elements to current csv row
            arms_manufacturer = weighted_options('arms_manufacturers')
            auction = weighted_options('auction')
            cash_intensive_business = weighted_options(
                'cash_intensive_business')
            casino_gambling = weighted_options('casino_gambling')
            chan_ob = weighted_options('channel_onboarding')
            chan_txn = weighted_options('channel_ongoing_txn')

            row.extend([
                arms_manufacturer, auction, cash_intensive_business,
                casino_gambling, chan_ob, chan_txn
            ])

            # Randomly select whether customer has a High Net Worth
            high_net_worth_flag = weighted_options('high_net_worth')

            # Randomly Generates customer net worth based on the above flag
            if high_net_worth_flag == 'Yes':
                row.append(
                    max(
                        max((randrange(0, 101, 1) - 99), 0) *
                        randrange(1000000, 25000000, 1),
                        randrange(1000000, 5000000, 1)))
            else:
                flag = weighted_options('low_net')
                if flag == 0:
                    row.append(randrange(-250000, 600000, 1))
                else:
                    if flag == 1:
                        row.append(randrange(149000, 151000, 1))
                    else:
                        row.append(randrange(40000, 50000, 1))
            # Add data elements to current csv row
            hr1 = weighted_options('complex_hi_vehicle')
            hr2 = weighted_options('dealer_precious_metal')
            hr3 = weighted_options('digital_pm_operator')
            hr4 = weighted_options(EMBASSY_CONSULATE)
            hr5 = weighted_options(EXCHANGE_CURRENCY)
            hr6 = weighted_options(FOREIGN_FINANCIAL_INSTITUTION)
            hr7 = weighted_options(FOREIGN_GOVT)
            hr8 = weighted_options(FOREIGN_NONBANK_FINANCIAL_INSTITUTION)
            hr9 = weighted_options(INTERNET_GAMBLING)
            hr10 = weighted_options(MEDICAL_MARIJUANA_DISPENSARY)
            hr11 = weighted_options(MONEY_SERVICE_BUSINESS)
            hr12 = random.choice(NAICS.NAICS_Code)
            hr13 = weighted_options(NONREGULATED_FINANCIAL_INSTITUTION)
            hr14 = weighted_options(NOT_PROFIT)
            # hr15=random.choice(occupation)
            hr16 = weighted_options(PRIVATE_ATM_OPERATOR)
            hr17 = weighted_options('products')
            hr18 = weighted_options(SALES_USED_VEHICLES)
            hr19 = weighted_options('services')
            hr20 = weighted_options('sic_code')
            hr21 = weighted_options('stock_market_listing')
            hr22 = weighted_options(THIRD_PARTY_PAYMENT_PROCESSOR)
            hr23 = weighted_options(TRANSACTING_PROVIDER)

            if 'Yes' in (politically_exposed_person,
                         suspicious_activity_report, lrg_cash_ex, demarket,
                         arms_manufacturer, auction, cash_intensive_business,
                         casino_gambling, hr1, hr2, hr3, hr4, hr5, hr6, hr7,
                         hr8, hr9, hr10, hr11, hr13, hr14, hr16, hr17, hr18,
                         hr22, hr23, high_net_worth_flag):
                high_risk = 'Yes'
                hr_rating = weighted_options('refrating')

            if suspicious_activity_report == 'No' and high_risk == 'No':
                if risk_range():
                    high_risk = 'Yes'
                    hr_rating = weighted_options('refrating')
            if politically_exposed_person == 'No' and high_risk == 'No':
                if risk_range():
                    high_risk = 'Yes'
                    hr_rating = weighted_options('refrating')

            if high_risk == 'No':
                if risk_range():
                    high_risk = 'Yes'
                    hr_rating = weighted_options('refrating')

            row.extend([
                hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12,
                hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23,
                high_net_worth_flag, high_risk, hr_rating,
                random.choice(USE_CASE)
            ])
            # End the current row
            writer.writerow(row)
def createCusts(N):
    #List for client whose net worth is over $500K
    HighNetWorth = ['Yes'] + ['No'] * 30
    #List for type of account
    Related_Type = ['Primary', 'Secondary', 'Joint']
    #List for how the account was opened
    Party_Type = ['Person', 'Non-Person']
    #List for a BMO customer
    Party_Relation = ['Customer', 'Non-Customer']
    #List for random Yes/No Flag
    Yes_No = ['Yes'] + ['No'] * 12
    #List for random Yes/No Consent
    Yes_No_Consent = ['Yes'] + ['No'] * 4
    #List for equal Yes/No Flag
    Yes_No_50 = ['Yes', 'No']
    #List for official language
    Official_Lang = ['English'] * 3 + ['French']
    #List for method of communication
    Preffered_Channel = ['Direct Mail', 'Telemarketing', 'Email', 'SMS']
    #List for status of customer
    #Customer_Status = ['Prospect','Inactive Customer','Past Customer'] + ['Active Customer'] * 56
    #List for LOB Segment Type
    Seg_Model_Type = [
        'LOB Specific', 'Profitability', 'Geographical', 'Behavioral',
        'Risk Tolerance'
    ]
    #List for Model ID
    Model_ID = ['01', '02', '03', '04', '05']
    #List for Model Name
    Seg_Model_Name = [
        'IRRI', 'CRS Risk Score', 'Geo Risk', 'Financial Behavior Risk',
        'CM Risk'
    ]
    #List for Model Score
    Seg_Model_Score = ['200', '300', '400', '100', '500']
    #List for Model Group
    Seg_Model_Group = ['Group 1'] * 2 + ['Group 2', 'Group 3', 'Group 4']
    #List for Model Description
    Seg_Model_Description = [
        'High Risk Tier', 'Mid Risk Tier', 'Low Risk Tier', 'Vertical Risk',
        'Geographical Risk'
    ]
    #List for random Arms Dealer flag
    Arms_Manufacturer = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random auction flag
    Auction = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random Cash Intensive flag
    CashIntensive_Business = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random Casino?Gaming flag
    Casino_Gambling = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random Client Onboarding flag
    Channel_Onboarding = [
        'E-mail', 'In Person', 'In person - In Branch/Bank Office',
        'In person - Offsite/Client Location', 'Mail', 'Online', 'Phone',
        'Request for Proposal (RFP)'
    ] + ['Not Applicable'] * 10
    #List for random Transaction flag
    Channel_Ongoing_Transactions = [
        'ATM', 'E-mail', 'Fax', 'Mail', 'Not Applicable',
        'OTC Communication System', 'Phone'
    ] + ['Online'] * 4 + ['In Person'] * 31
    #List for random HI_Vehicle flag
    Complex_HI_Vehicle = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random Metals flag
    Dealer_Precious_Metal = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random Arms Dealer flag
    Digital_PM_Operator = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random Embassy flag
    Embassy_Consulate = ['Yes'] + ['No'] * 2 + [''] * 392
    #Sets variable to Embassy flag
    Exchange_Currency = Embassy_Consulate
    #Sets variable to Embassy flag
    Foreign_Financial_Institution = Embassy_Consulate
    #Sets variable to Embassy flag
    Foreign_Government = Embassy_Consulate
    #Sets variable to Embassy flag
    Foreign_NonBank_Financial_Institution = Embassy_Consulate
    #Sets variable to Embassy flag
    Internet_Gambling = Embassy_Consulate
    #Sets variable to Embassy flag
    Medical_Marijuana_Dispensary = Embassy_Consulate
    #Sets variable to Embassy flag
    Money_Service_Business = Embassy_Consulate
    #Sets variable to Embassy flag
    NonRegulated_Financial_Institution = Embassy_Consulate
    #Sets variable to Embassy flag
    Not_Profit = Embassy_Consulate
    #List for random occupation
    Occupation=['11-1011 Chief Executives',\
    '11-3011 Administrative Services Managers',\
    '11-3031 Financial Managers',\
    '11-3061 Purchasing Managers',\
    '13-1011 Agents and Business Managers of Artists Performers and Athletes',\
    '13-1031 Claims Adjusters Examiners, and Investigators',\
    '13-1199 Business Operations Specialists, All Other',\
    '13-2099 Financial Specialists All Other',\
    '17-1011 Architects Except Landscape and Naval',\
    '23-1011 Lawyers',\
    '23-1023 Judges, Magistrate Judges and Magistrates',\
    '25-2012 Kindergarten Teachers Except Special Education',\
    '25-2021 Elementary School Teachers Except Special Education',\
    '29-1041 Optometrists',\
    '29-2054 Respiratory Therapy Technicians',\
    '33-2011 Firefighters',\
    '37-1012 First-Line Supervisors of Landscaping Lawn Service and Groundskeeping Workers',\
    '39-1011 Gaming Supervisors',\
    '39-2011 Animal Trainers',\
    '41-1011 First-Line Supervisors of Retail Sales Workers',\
    '41-1012 First-Line Supervisors of Non-Retail Sales Workers',\
    '41-2011 Cashiers',\
    '41-2031 Retail Salespersons',\
    '43-3021 Billing and Posting Clerks',\
    '45-1011 First-Line Supervisors of Farming, Fishing, and Forestry Workers',\
    '49-2011 Computer Automated Teller and Office Machine Repairers',\
    '53-3021 Bus Drivers Transit and Intercity',\
    '53-4031 Railroad Conductors and Yardmasters',\
    '55-1011 Air Crew Officers',\
    '55-1012 Aircraft Launch and Recovery Officers',\
    '55-1013 Armored Assault Vehicle Officers',\
    ]
    #Sets variable to Embassy flag
    Privately_ATM_Operator = Embassy_Consulate
    #List for random products
    Products=['Certificate of Deposit',\
    'Checking Account',\
    'Credit Card',\
    'Custodial and Investment Agency - Institutional',\
    'Custodial and Investment Agency - Personal',\
    'Custodial/Trust Outsourcing Services (BTOS)',\
    'Custody Accounts (PTIM)',\
    'Custody Accounts (RSTC)',\
    'DTF (BHFA)',\
    'Investment Agency - Personal',\
    'Investment Management Account (PTIM)',\
    'Lease',\
    'Loan / Letter of Credit',\
    'Money Market',\
    'Mortgage / Bond / Debentures',\
    'None',\
    'Savings Account',\
    'Trust Administration - Irrevocable and Revocable (PTIM)',\
    'Trust Administration - Irrevocable and Revocable Trusts (BDTC)',\
    ] + ['Nondeposit Investment Products'] * 14 + ['Investment Agency - Institutional'] * 5
    #Sets variable to Embassy flag
    Sales_Used_Vehicles = Embassy_Consulate
    #Dictionary for random Services
    Services=['Benefit Payment Services',\
    'Domestic Wires and Direct Deposit / ACH',\
    'Family Office Services (FOS)',\
    'Fiduciary Services',\
    'International Wires and IAT',\
    'Investment Advisory Services (IAS)',\
    'Investment Services',\
    'None',\
    'Online / Mobile Banking',\
    'Payroll',\
    'Short Term Cash Management',\
    'Trust Services',\
    'Trustee Services',\
    'Vault Cash Services',\
    ] + ['Financial Planning'] * 6 + ['Retirement Plans'] * 19
    #Dictionary for random SIC_Code
    SIC_Code=['6021 National Commercial Banks',\
    '6211 Security Brokers Dealers and Flotation Companies',\
    '6282 Investment Advice',\
    '6311 Life Insurance',\
    '6733 Trusts Except Educational Religious and Charitable',\
    '8999 Services NEC',\
    ] + ['6722 Management Investment Offices Open-End'] * 12
    #Dictionary for random Market Listing
    Stock_Market_Listing=['Australian Stock Exchange',\
    'Brussels Stock Exchange',\
    'Montreal Stock Exchange',\
    'Tiers 1 and 2 of the TSX Venture Exchange (also known as Tiers 1 and 2 of the Canadian Venture Exchange)',\
    'Toronto Stock Exchange',\
    ] + ['Not Found'] * 30
    #Sets variable to Embassy flag
    Third_Party_Payment_Processor = Embassy_Consulate
    #Sets variable to Embassy flag
    Transacting_Provider = Embassy_Consulate
    #Dictionary for random Low Net Worth
    LowNet = [1, 2] + [0] * 5
    #Dictionary for Consumer vs Business
    Acct_Type = ['B'] + ['C'] * 5
    #Dictionary for random number of credits cards per account
    Number_CC = [1] * 7 + [2] * 11 + [3] * 3 + [4]
    #Dictionary for Account list set to blank
    acct_list = []
    #Dictionary for CreditCard list set to blank
    CC_list = []

    #Dictionary for random Wolfsberg scenario
    Use_Case = [1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 39] * 4 + [
        2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38
    ] * 7 + [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36
             ] * 65 + [37] * 73 + [40, 41] * 2
    refrating = [
        '1', '1', '1', '2', '3', '4', '2', '4', '5', '5', '5', '5', '5', '5',
        '5', '5', '5', '5', '5', '5'
    ]
    fake = Faker()
    global liSSNMaster
    start = 10786147
    acct_list = []
    liCSV = []
    for i in xrange(N):
        #Initiate High Risk Flags
        #Politically Exposed Person
        PEP = 'No'
        #Customer with a Suspicous Activity Report
        SAR = 'No'
        #Customer with a closed account
        Clsd = 'No'
        #High risk customer flag
        high_risk = 'No'
        #High Risk Rating
        hr_rating = ''
        #Customer that was demarketed by the bank
        demarket = 'No'
        dem_date = ''
        #generate closed acct flag
        if (max((randrange(0, 98, 1) - 96), 0) == 1):
            Clsd = 'Yes'
        #Random choice for number of credit card users per account number
        No_CCs = random.choice(Number_CC)
        #Generate account number
        acct = start + 1 + randrange(1, 10, 1)
        start = acct
        #Randomly generate customer name + middle name in tmp
        name = fake.name()
        tmp = gen_data.create_name()
        #Adds account number to account dictionary
        acct_list.extend([acct])
        #Creates a new row and adds data elements
        row = [i] + [acct] + [random.choice(Acct_Type)] + [No_CCs] + [name] + [
            tmp[0]
        ] + [liSSNMaster[i]]
        #Dictionary for names list set to blank
        names = []
        #Dictionary for Social Security Number list set to blank
        ssn = []
        #Middle Name to reduce name dups
        mdl = []

        for j in range(No_CCs - 1):
            names.insert(j, fake.name())
            tmp2 = gen_data.create_name()
            mdl.insert(j, tmp2[0])
            ##Pull from SSN Master list
            randInt = randrange(1, len(liSSNMaster), 1)
            if randInt != i:
                ssn.insert(j, liSSNMaster[randInt])
            else:
                ssn.insert(j, liSSNMaster[randInt - 1])

        #Name and SSN is set to blank if less than 4 customers on an account
        for k in range(4 - No_CCs):
            names.insert(No_CCs + k, '')
            ssn.insert(No_CCs + k, '')
            mdl.insert(No_CCs, '')

        #Sets CC_NO to a random credit card number
        CC_NO = gen_data.cc_number()
        #Extract CC_Number from the tuple returned by CC_Number then scramble to ensure uniqueness...Tuple contains CC Number and Type
        CC_TRANS = CC_NO[1][0]
        dt = str(datetime.now())
        clean = re.sub('\W', '', dt)
        printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str(
            randrange(1111, 9999, randrange(1, 10, 1)))

        #Add data elements to current csv row
        row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\
        gen_data.create_email(),gen_data.create_job_title()])
        #Create Current Address
        zip = random.choice(zips.zip)
        addr = geo_data.create_city_state_zip[zip]
        #Create Previous address
        zip2 = random.choice(zips.zip)
        addr2 = geo_data.create_city_state_zip[zip2]
        #Add additional data elements to current csv row
        lrg_cash_ex = random.choice(Yes_No)
        #Condition for SARs and Demarketed Clients
        if (Clsd == 'Yes'):
            #1% of closed accounts are demarketed but never had a SAR filed
            if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'):
                demarket = 'Yes'
                dem_date = gen_data.create_date(past=True)
            if (max((randrange(0, 11, 1) - 9), 0) == 1 and demarket == 'No'):
                #10% of closed accounts have SARs
                SAR = 'Yes'
                #90% of closed accounts with SARs are demarketed
                if (max((randrange(0, 11, 1) - 9), 0) == 0):
                    demarket = 'Yes'
                    dem_date = gen_data.create_date(past=True)

        if (max((randrange(0, 101, 1) - 99), 0) == 1):
            PEP = 'Yes'
        row.extend([
            addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US',
            gen_data.create_birthday(min_age=2, max_age=85), PEP, SAR, Clsd
        ])

        #Start Generating related accounts from account list once 10,000 accounts are generated - to avoid duplicating accounts in the beginning
        if i > 10000:
            rel = int(random.choice(acct_list)) * max(
                (randrange(0, 10001, 1) - 9999), 0)
            if rel <> 0:
                row.append(rel)
                row.append(random.choice(Related_Type))
            else:
                row.append('')
                row.append('')
        else:
            row.append('')
            row.append('')

        #Randomly generates account start date
        party_start = gen_data.create_date(past=True)
        #Randomly selects consent option for sharing info
        Consent_Share = random.choice(Yes_No_Consent)
        #Add additional data elements to current csv row
        row.extend([random.choice(Party_Type),random.choice(Party_Relation),party_start,gen_data.create_date(past=True),\
        lrg_cash_ex,demarket,dem_date,randrange(0,100,1),random.choice(Official_Lang)])
        #Add data element preferred methond of contact for yes to share info...if not then blank to current row

        if Consent_Share == 'Yes':
            row.extend(['Yes', random.choice(Preffered_Channel)])
        else:
            row.extend(['No', ''])

        row.extend([zip, randrange(0, 5, 1)])
        #Generate Segment ID then add additional Segment data based on the selection to the current csv row
        Segment_ID = randrange(0, 5, 1) % 5
        if Segment_ID == 0:
            row.extend([
                Model_ID[0], Seg_Model_Type[0], Seg_Model_Name[0],
                Seg_Model_Group[0], Seg_Model_Description[0],
                Seg_Model_Score[0]
            ])
        if Segment_ID == 1:
            row.extend([
                Model_ID[1], Seg_Model_Type[1], Seg_Model_Name[1],
                Seg_Model_Group[1], Seg_Model_Description[1],
                Seg_Model_Score[1]
            ])
        if Segment_ID == 2:
            row.extend([
                Model_ID[2], Seg_Model_Type[2], Seg_Model_Name[2],
                Seg_Model_Group[2], Seg_Model_Description[2],
                Seg_Model_Score[2]
            ])
        if Segment_ID == 3:
            row.extend([
                Model_ID[3], Seg_Model_Type[3], Seg_Model_Name[3],
                Seg_Model_Group[3], Seg_Model_Description[3],
                Seg_Model_Score[3]
            ])
        if Segment_ID == 4:
            row.extend([
                Model_ID[4], Seg_Model_Type[4], Seg_Model_Name[4],
                Seg_Model_Group[4], Seg_Model_Description[4],
                Seg_Model_Score[4]
            ])

        #Add additional data elements to current csv row
        hr0 = random.choice(Arms_Manufacturer)
        hr01 = random.choice(Auction)
        hr02 = random.choice(CashIntensive_Business)
        hr03 = random.choice(Casino_Gambling)
        hr04 = random.choice(Channel_Onboarding)
        hr05 = random.choice(Channel_Ongoing_Transactions)
        row.extend([hr0, hr01, hr02, hr03, hr04, hr05])
        #Randomly select whether customer has a High Net Worth
        HighNetWorthFlag = random.choice(HighNetWorth)
        #Randomly Generate customer net worth based on the above flag
        if HighNetWorthFlag == 'Yes':
            row.append(
                max(
                    max((randrange(0, 101, 1) - 99), 0) *
                    randrange(1000000, 25000000, 1),
                    randrange(1000000, 5000000, 1)))
        else:
            flag = random.choice(LowNet)
            if flag == 0:
                row.append(randrange(-250000, 600000, 1))
            else:
                if flag == 1:
                    row.append(randrange(149000, 151000, 1))
                else:
                    row.append(randrange(40000, 50000, 1))
        #Add data elements to current csv row
        hr1 = random.choice(Complex_HI_Vehicle)
        hr2 = random.choice(Dealer_Precious_Metal)
        hr3 = random.choice(Digital_PM_Operator)
        hr4 = random.choice(Embassy_Consulate)
        hr5 = random.choice(Exchange_Currency)
        hr6 = random.choice(Foreign_Financial_Institution)
        hr7 = random.choice(Foreign_Government)
        hr8 = random.choice(Foreign_NonBank_Financial_Institution)
        hr9 = random.choice(Internet_Gambling)
        hr10 = random.choice(Medical_Marijuana_Dispensary)
        hr11 = random.choice(Money_Service_Business)
        hr12 = random.choice(NAICS.NAICS_Code)
        hr13 = random.choice(NonRegulated_Financial_Institution)
        hr14 = random.choice(Not_Profit)
        #hr15=random.choice(Occupation) - added before through gen_data
        hr16 = random.choice(Privately_ATM_Operator)
        hr17 = random.choice(Products)
        hr18 = random.choice(Sales_Used_Vehicles)
        hr19 = random.choice(Services)
        hr20 = random.choice(SIC_Code)
        hr21 = random.choice(Stock_Market_Listing)
        hr22 = random.choice(Third_Party_Payment_Processor)
        hr23 = random.choice(Transacting_Provider)

        if (PEP == 'Yes' or SAR == 'Yes' or lrg_cash_ex == 'Yes'
                or demarket == 'Yes' or hr0 == 'Yes' or hr01 == 'Yes'
                or hr02 == 'Yes' or hr03 == 'Yes' or hr1 == 'Yes'
                or hr2 == 'Yes' or hr3 == 'Yes' or hr4 == 'Yes' or hr5 == 'Yes'
                or hr6 == 'Yes' or hr7 == 'Yes' or hr8 == 'Yes' or hr9 == 'Yes'
                or hr10 == 'Yes' or hr11 == 'Yes' or hr13 == 'Yes'
                or hr14 == 'Yes' or hr16 == 'Yes' or hr17 == 'Yes'
                or hr18 == 'Yes' or hr22 == 'Yes' or hr23 == 'Yes'
                or HighNetWorthFlag == 'Yes'):
            high_risk = 'Yes'
            hr_rating = random.choice(refrating)
        if (SAR == 'No' and high_risk == 'No'):
            if (max((randrange(0, 101, 1) - 99), 0) == 1):
                high_risk = 'Yes'
                hr_rating = random.choice(refrating)
        if (PEP == 'No' and high_risk == 'No'):
            if (max((randrange(0, 101, 1) - 99), 0) == 1):
                high_risk = 'Yes'
                hr_rating = random.choice(refrating)
        if (high_risk == 'No'):
            if (max((randrange(0, 101, 1) - 99), 0) == 1):
                high_risk = 'Yes'
                hr_rating = random.choice(refrating)
        row.extend([
            hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12,
            hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23,
            HighNetWorthFlag, high_risk, hr_rating,
            random.choice(Use_Case)
        ])
        liCSV.append(row)
    return liCSV
Exemplo n.º 14
0
from random import random
from random import shuffle
from faker import Faker
from barnum import gen_data
import csv
fake = Faker()
with open('large.csv','w') as f1:
    writer=csv.writer(f1, delimiter=',',lineterminator='\n',)
    writer.writerow(['rownum'] +['dunno'] + ['CC'] + ['Employer'] + ['Custemail'] + ['name'] \
	+ ['occupation'] + ['address_street'] + ['DOB']+['previous address_city_state_zip']+ ['altcustomer_name'] \
	+ ['altcustomer_occupation']   + ['altcustomer_dob'] + ['ssn'] + ['phone']  + \
	['AccountID'] + ['PepFlag'] + ['altcustomerssn'] + ['demarketed_customer_flag'] + \
	['SAR_flag'] + ['nolonger_a_customer'] + ['closed_account'] +['High_risk_flag'] +['Risk_rating'])
    for i in range(50000000):   
		row = [i] + [10] + [gen_data.cc_number()]+[gen_data.create_company_name()] + \
		[gen_data.create_email()]+[gen_data.create_name()] +[gen_data.create_job_title()] + \
		[gen_data.create_city_state_zip()] + [gen_data.create_birthday(min_age=2, max_age=85)] + \
		[gen_data.create_city_state_zip()] + [fake.name()] + [gen_data.create_job_title()] + \
		[gen_data.create_birthday(min_age=2, max_age=85)]  +\
		[(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] +  \
		[(randrange(101,1000,1),randrange(101,999,1),randrange(1000,10000,1))] + \
		[randrange(100000,100000000,1)] + \
		[max((randrange(0,101,1)-99),0)] + \
		[(randrange(101,1000,1),randrange(10,100,1),randrange(1000,10000,1))] + \
		[max((randrange(0,101,1)-99),0)] + [max((randrange(0,101,1)-99),0)] + \
		[max((randrange(0,101,1)-99),0)] 	+ [max((randrange(0,101,1)-90),0)] + \
		[max((randrange(0,101,1)-99),0)] +  [max((randrange(0,101,1)-99),0)]	
		writer.writerow(row)
		
		
Exemplo n.º 15
0
def gen_cust(liSSNMaster, acct_list, i):
    fake = Faker()
    #Initiate High Risk Flags
    #Politically Exposed Person
    PEP = 'No'
    #Customer with a Suspicous Activity Report
    SAR = 'No'
    #Customer with a closed account
    #generate closed acct flag
    Clsd = choice(Clsd_flag)
    #High risk customer flag
    high_risk = 'No'
    #High Risk Rating
    hr_rating = ''
    #Customer that was demarketed by the bank
    demarket = 'No'
    dem_date = ''
    #Random choice for number of credit cards per account number
    No_CCs = choice(Number_CC)
    acct = start + 1 + randrange(1, 10, 1)
    start = acct
    #Randomly generates customer name
    name = fake.name()
    tmp = gen_data.create_name()
    #Adds account number to account dictionary
    acct_list.extend([acct])
    #Creates a new row and adds data elements
    ##      JS - Main Account Holder SSN as current index in master SSN list
    row = [i] + [acct] + [choice(Acct_Type)
                          ] + [No_CCs] + [name] + [tmp[0]] + [liSSNMaster[i]]
    #Dictionary for names list set to blank
    names = []
    #Dictionary for Social Security Number list set to blank
    ssn = []
    #Generates Name and SSN for Credit Users
    #Middle Name to reduce name dups
    mdl = []
    for j in range(No_CCs - 1):
        names.insert(j, fake.name())
        tmp2 = gen_data.create_name()
        mdl.insert(j, tmp2[0])
        ##      JS - Pull from SSN Master list
        randInt = randrange(1, len(liSSNMaster), 1)
        if randInt != i:
            ssn.insert(j, liSSNMaster[randInt])
        else:
            ssn.insert(j, liSSNMaster[randInt - 1])

    #Name and SSN is set to blank if less than 4 customers on an account

    for k in range(4 - No_CCs):
        names.insert(No_CCs + k, '')
        ssn.insert(No_CCs + k, '')
        mdl.insert(No_CCs, '')
    #Sets CC_NO to a random credit card number
    CC_NO = gen_data.create_cc_number()
    CC_TRANS = CC_NO[1][0]
    dt = str(datetime.now())
    clean = re.sub('\W', '', dt)
    printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str(
        randrange(1111, 9999, randrange(1, 10, 1)))
    #Add data elements to current csv row
    row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\
    gen_data.create_email(),gen_data.create_job_title()])

    #Creates Current Address
    zip = choice(zips.zip)
    addr = geo_data.create_city_state_zip[zip]
    #Creates Previous address
    zip2 = choice(zips.zip)
    addr2 = geo_data.create_city_state_zip[zip2]

    #Add additional data elements to current csv row
    lrg_cash_ex = choice(Yes_No)

    #Condition for SARs and Demarketed Clients
    if (Clsd == 'Yes'):
        #1% of closed accounts are demarketed but never had a SAR filed
        if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'):
            demarket = 'Yes'
            dem_date = gen_data.create_date(past=True)
        if (max((randrange(0, 11, 1) - 9), 0) == 1 and demarket == 'No'):
            #10% of closed accounts have SARs
            SAR = 'Yes'
            #90% of closed accounts with SARs are demarketed
            if (max((randrange(0, 11, 1) - 9), 0) == 0):
                demarket = 'Yes'
                dem_date = gen_data.create_date(past=True)
    #1% of accounts are PEP
    if (max((randrange(0, 101, 1) - 99), 0) == 1):
        PEP = 'Yes'

    row.extend([
        addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US',
        gen_data.create_birthday(min_age=2, max_age=85), PEP, SAR, Clsd
    ])
    #Start Generating related accounts from account list once 10,000 accounts are generated
    if i > 10000:
        rel = int(choice(acct_list)) * max((randrange(0, 10001, 1) - 9999), 0)
        if rel <> 0:
            row.append(rel)
            row.append(choice(Related_Type))
        else:
            row.append('')
            row.append('')
    else:
        row.append('')
        row.append('')

    #Randomly generates account start date
    party_start = gen_data.create_date(past=True)
    #Randomly selects consent option for sharing info
    Consent_Share = choice(Yes_No_Consent)

    #Add additional data elements to current csv row
    row.extend([choice(Party_Type),choice(Party_Relation),party_start,gen_data.create_date(past=True),\
    lrg_cash_ex,demarket,dem_date,randrange(0,100,1),choice(Official_Lang)])
    #Add data element preferred methond of contact for yes to share info...if not then blank to current row
    if Consent_Share == 'Yes':
        row.extend(['Yes', choice(Preffered_Channel)])
    else:
        row.extend(['No', ''])
    #DO NOT USE CUST STATUS BELOW - NOT INTEGRATED WITH CLOSED STATUS! Add additional data elements to current csv row
    row.extend([zip, randrange(0, 5, 1)])

    #Generates Segment ID then adds additional Segment data based on the selection to the current csv row
    Segment_ID = randrange(0, 5, 1)

    if Segment_ID == 0:
        row.extend(
            ['01', 'LOB Specific', 'IRRI', 'Group 1', 'High Risk Tier', '200'])
    if Segment_ID == 1:
        row.extend([
            '02', 'Profitability', 'CRS Risk Score', 'Group 1',
            'Mid Risk Tier', '300'
        ])
    if Segment_ID == 2:
        row.extend([
            '03', 'Geographical', 'Geo Risk', 'Group 2', 'Low Risk Tier', '400'
        ])
    if Segment_ID == 3:
        row.extend([
            '04', 'Behavioral', 'Financial Behavior Risk', 'Group 3',
            'Vertical Risk', '100'
        ])
    if Segment_ID == 4:
        row.extend([
            '05', 'Risk Tolerance', 'CM Risk', 'Group 4', 'Geographical Risk',
            '500'
        ])

    #Arms Manufacturer random choice
    hr0 = choice(Yes_No_Cust_Flag)
    #Auction random choice
    hr01 = choice(Yes_No_Cust_Flag)
    #Cash Intensive Business random choice
    hr02 = choice(Yes_No_Cust_Flag)
    #Casino Gambling random choice
    hr03 = choice(Yes_No_Cust_Flag)
    #Channel Onboarding random choice
    hr04 = choice(Channel_Onboarding)
    #Channel Ongoing Transactions random choice
    hr05 = choice(Channel_Ongoing_Transactions)
    #Add additional data elements to current csv row
    row.extend([hr0, hr01, hr02, hr03, hr04, hr05])

    #Randomly select whther customer has a High Net Worth
    HighNetWorthFlag = choice(HighNetWorth)
    #Randomly Generates customer net worth based on the above flag
    if HighNetWorthFlag == 'Yes':
        row.append(
            max(
                max((randrange(0, 101, 1) - 99), 0) *
                randrange(5000000, 25000000, 1),
                randrange(1000000, 5000000, 1)))
    else:
        flag = choice(LowNet)
        if flag == 0:
            row.append(randrange(-250000, 600000, 1))
        else:
            if flag == 1:
                row.append(randrange(149000, 151000, 1))
            else:
                row.append(randrange(40000, 50000, 1))
    #Add data elements to current csv row
    #Complex_HI_Vehicle random choice
    hr1 = choice(Yes_No_Cust_Flag)
    #Dealer_Precious_Metal random choice
    hr2 = choice(Yes_No_Cust_Flag)
    #Digital_PM_Operator random choice
    hr3 = choice(Yes_No_Cust_Flag)
    #Embassy_Consulate random choice
    hr4 = choice(Yes_No_Cust_Flag)
    #Exchange_Currency random choice
    hr5 = choice(Yes_No_Cust_Flag)
    #Foreign_Financial_Institution random choice
    hr6 = choice(Yes_No_Cust_Flag)
    #Foreign_Government random choice
    hr7 = choice(Yes_No_Cust_Flag)
    #Foreign_NonBank_Financial_Institution random choice
    hr8 = choice(Yes_No_Cust_Flag)
    #Internet_Gambling random choice
    hr9 = choice(Yes_No_Cust_Flag)
    #Medical_Marijuana_Dispensary random choice
    hr10 = choice(Yes_No_Cust_Flag)
    #Money_Service_Business random choice
    hr11 = choice(Yes_No_Cust_Flag)
    hr12 = choice(NAICS.NAICS_Code)
    #NonRegulated_Financial_Institution random choice
    hr13 = choice(Yes_No_Cust_Flag)
    #Not_Profit random choice
    hr14 = choice(Yes_No_Cust_Flag)
    #Occupation random choice
    #hr15=choice(Occupation)
    #Privately_ATM_Operator random choice
    hr16 = choice(Yes_No_Cust_Flag)
    #Products random choice
    hr17 = choice(Products)
    #Sales_Used_Vehicles random choice
    hr18 = choice(Yes_No_Cust_Flag)
    #Services random choice
    hr19 = choice(Services)
    #SIC_Code random choice
    hr20 = choice(SIC_Code)
    #Stock_Market_Listing random choice
    hr21 = choice(Stock_Market_Listing)
    #Third_Party_Payment_Processor random choice
    hr22 = choice(Yes_No_Cust_Flag)
    #Transacting_Provider random choice
    hr23 = choice(Yes_No_Cust_Flag)

    refrating = ['1'] * 3 + ['2', '4'] * 2 + ['3'] + ['5'] * 12
    if (PEP == 'Yes' or SAR == 'Yes' or lrg_cash_ex == 'Yes'
            or demarket == 'Yes' or hr0 == 'Yes' or hr01 == 'Yes'
            or hr02 == 'Yes' or hr03 == 'Yes' or hr1 == 'Yes' or hr2 == 'Yes'
            or hr3 == 'Yes' or hr4 == 'Yes' or hr5 == 'Yes' or hr6 == 'Yes'
            or hr7 == 'Yes' or hr8 == 'Yes' or hr9 == 'Yes' or hr10 == 'Yes'
            or hr11 == 'Yes' or hr13 == 'Yes' or hr14 == 'Yes' or hr16 == 'Yes'
            or hr17 == 'Yes' or hr18 == 'Yes' or hr22 == 'Yes' or hr23 == 'Yes'
            or HighNetWorthFlag == 'Yes'):
        high_risk = 'Yes'
        hr_rating = choice(refrating)

    if (high_risk == 'No'):
        if (max((randrange(0, 101, 1) - 99), 0) == 1):
            high_risk = 'Yes'
            hr_rating = choice(refrating)

    row.extend([
        hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12, hr13,
        hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23, HighNetWorthFlag,
        high_risk, hr_rating,
        choice(Use_Case)
    ])
    #End the current row
    return row
Exemplo n.º 16
0
     + ["High_risk_flag"]
     + ["Risk_rating"]
 )
 while i < 50000000:
     # Pick an account number and store it in acct
     acct = randrange(100000, 100000000, 1)
     # if the account hasn't been already generated then generate a record with all fields
     if d.has_key(str(acct)) == False:
         row = (
             [i]
             + [10]
             + [gen_data.cc_number()]
             + [gen_data.create_company_name()]
             + [gen_data.create_email()]
             + [gen_data.create_name()]
             + [gen_data.create_job_title()]
             + [gen_data.create_city_state_zip()]
             + [gen_data.create_birthday(min_age=2, max_age=85)]
             + [gen_data.create_city_state_zip()]
             + [fake.name()]
             + [gen_data.create_job_title()]
             + [gen_data.create_birthday(min_age=2, max_age=85)]
             + [(randrange(101, 1000, 1), randrange(10, 100, 1), randrange(1000, 10000, 1))]
             + [(randrange(101, 1000, 1), randrange(101, 999, 1), randrange(1000, 10000, 1))]
             + [acct]
             + [max((randrange(0, 101, 1) - 99), 0)]
             + [(randrange(101, 1000, 1), randrange(10, 100, 1), randrange(1000, 10000, 1))]
             + [max((randrange(0, 101, 1) - 99), 0)]
             + [max((randrange(0, 101, 1) - 99), 0)]
             + [max((randrange(0, 101, 1) - 99), 0)]
             + [max((randrange(0, 101, 1) - 90), 0)]
        #Sets CC_NO to a random credit card number
        CC_NO = gen_data.create_cc_number()

        #Extract CC_Number from the tuple returned by CC_Number...Tuple contains CC Number and Type
        #while CC_list.count(CC_NO[1][0]) > 0:
        CC_TRANS = CC_NO[1][0]

        dt = str(datetime.now())
        clean = re.sub('\W', '', dt)
        printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str(
            randrange(1111, 9999, randrange(1, 10, 1)))
        #str(CC_TRANS[-4:])+str(clean[-12:-2])+str(randrange(1111,9999,randrange(1,10,1)))
        #Add CC_Number to control list to prevent duplicates
        #Add data elements to current csv row
        row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\
        gen_data.create_email(),gen_data.create_job_title()])

        #Creates Current Address
        zip = random.choice(zips.zip)
        addr = geo_data.create_city_state_zip[zip]
        #Creates Previous address
        zip2 = random.choice(zips.zip)
        addr2 = geo_data.create_city_state_zip[zip2]

        #Add additional data elements to current csv row
        lrg_cash_ex = random.choice(Yes_No)

        #Condition for SARs and Demarketed Clients
        if (Clsd == 'Yes'):
            #1% of closed accounts are demarketed but never had a SAR filed
            if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'):
Exemplo n.º 18
0
    def __init__(self, i, acct, liSSNMaster, acct_list):
        self.ROWNUM = i
        self.ACCOUNTID = acct
        self.SSN = liSSNMaster[i]
        self.ACCT_TYPE = choice(Acct_Type)
        self.NUM_CCS = choice(Number_CC)
        self.NAME = fake.name()
        self.CUSTEMAIL = gen_data.create_email()
        self.OCCUPATION = gen_data.create_job_title()
        self.COUNTRY = 'US'
        self.PREVIOUS_COUNTRY = 'US'
        self.DOB = gen_data.create_birthday(min_age=2, max_age=85)
        self.PARTY_ENDDATE = gen_data.create_date(past=True)
        self.CONSENT_SHARING = choice(Yes_No_Consent)
        self.LARGE_CASH_EXEMPT = choice(Yes_No)
        self.PARTY_TYPE = choice(Party_Type)
        self.PARTY_RELATION = choice(Party_Relation)
        self.PROB_DEFAULT_RISKR = randrange(0, 100, 1)
        self.OFFICIAL_LANG_PREF = choice(Official_Lang)
        self.DEPENDANTS_COUNT = randrange(0, 5, 1)
        self.USE_CASE_SCENARIO = choice(Use_Case)
        self.CLOSEDACCOUNT = choice(Clsd_flag)
        self.HIGH_NET_WORTH = choice(HighNetWorth)
        self.PARTY_STARTDATE = gen_data.create_date(past=True)
        self.ARMS_MANUFACTURER = choice(Yes_No_Cust_Flag)
        self.AUCTION = choice(Yes_No_Cust_Flag)
        self.CASHINTENSIVE_BUSINESS = choice(Yes_No_Cust_Flag)
        self.CASINO_GAMBLING = choice(Yes_No_Cust_Flag)
        self.CHANNEL_ONBOARDING = choice(Channel_Onboarding)
        self.CHANNEL_ONGOING_TRANSACTIONS = choice(
            Channel_Ongoing_Transactions)
        self.COMPLEX_HI_VEHICLE = choice(Yes_No_Cust_Flag)
        self.DEALER_PRECIOUS_METAL = choice(Yes_No_Cust_Flag)
        self.DIGITAL_PM_OPERATOR = choice(Yes_No_Cust_Flag)
        self.EMBASSY_CONSULATE = choice(Yes_No_Cust_Flag)
        self.EXCHANGE_CURRENCY = choice(Yes_No_Cust_Flag)
        self.FOREIGN_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag)
        self.FOREIGN_GOVERNMENT = choice(Yes_No_Cust_Flag)
        self.FOREIGN_NONBANK_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag)
        self.INTERNET_GAMBLING = choice(Yes_No_Cust_Flag)
        self.MEDICAL_MARIJUANA_DISPENSARY = choice(Yes_No_Cust_Flag)
        self.MONEY_SERVICE_BUSINESS = choice(Yes_No_Cust_Flag)
        self.NAICS_CODE = choice(NAICS.NAICS_Code)
        self.NONREGULATED_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag)
        self.NOT_PROFIT = choice(Yes_No_Cust_Flag)
        self.PRIVATELY_ATM_OPERATOR = choice(Yes_No_Cust_Flag)
        self.PRODUCTS = choice(Products)
        self.SALES_USED_VEHICLES = choice(Yes_No_Cust_Flag)
        self.SERVICES = choice(Services)
        self.SIC_CODE = choice(SIC_Code)
        self.STOCK_MARKET_LISTING = choice(Stock_Market_Listing)
        self.THIRD_PARTY_PAYMENT_PROCESSOR = choice(Yes_No_Cust_Flag)
        self.TRANSACTING_PROVIDER = choice(Yes_No_Cust_Flag)
        self.ZIP = choice(zips.zip)
        self.PREVIOUS_ZIP = choice(zips.zip)
        addr = geo_data.create_city_state_zip[self.ZIP]
        addr2 = geo_data.create_city_state_zip[self.PREVIOUS_ZIP]
        self.CITY = addr[0]
        self.STATE = addr[1]
        self.PREVIOUS_CITY = addr2[0]
        self.PREVIOUS_STATE = addr2[1]
        self.PRIMARY_BRANCH_NO = self.ZIP
        tmp = gen_data.create_name()
        self.M_NAME = tmp[0]
        self.EMPLOYER = gen_data.create_company_name() + ' ' + tmp[1]
        No_CCs = choice(Number_CC)
        #Dictionary for names list set to blank
        names = []
        #Dictionary for Social Security Number list set to blank
        ssn = []
        #Middle Name to reduce name dups
        mdl = []
        #Generates Name and SSN for Credit Users
        for j in range(4):
            if No_CCs > j:
                names.insert(j, fake.name())
                tmp2 = gen_data.create_name()
                mdl.insert(j, tmp2[0])
                randInt = randrange(1, len(liSSNMaster), 1)
                if randInt != i:
                    ssn.insert(j, liSSNMaster[randInt])
                else:
                    ssn.insert(j, liSSNMaster[randInt - 1])
            #Name and SSN is set to blank if less than 4 customers on an account
            else:
                names.insert(No_CCs + j, '')
                ssn.insert(No_CCs + j, '')
                mdl.insert(No_CCs + j, '')

        self.AUTHORIZED_NAME2 = names[0]
        self.M_NAME2 = mdl[0]
        self.SSN2 = ssn[0]
        self.AUTHORIZED_NAME3 = names[1]
        self.M_NAME3 = mdl[1]
        self.SSN3 = ssn[1]
        self.AUTHORIZED_NAME4 = names[2]
        self.M_NAME4 = mdl[2]
        self.SSN4 = ssn[2]

        #Sets CC_NO to a random credit card number
        CC_NO = gen_data.create_cc_number()
        CC_TRANS = CC_NO[1][0]
        dt = str(datetime.now())
        clean = re.sub('\W', '', dt)
        self.CREDITCARDNUMBER = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str(
            randrange(1111, 9999, randrange(1, 10, 1)))
        self.CREDITCARDTYPE = CC_NO[0]

        self.RELATED_ACCT = ''
        self.RELATED_TYPE = ''
        if i > 10000:
            rel = int(choice(acct_list)) * max(
                (randrange(0, 10001, 1) - 9999), 0)
            if rel <> 0:
                self.RELATED_ACCT = rel
                self.RELATED_TYPE = choice(Related_Type)

        self.PREFERRED_CHANNEL = ''
        if self.CONSENT_SHARING == 'Yes':
            self.PREFERRED_CHANNEL = choice(Prefered_Channel)


##              #Generates Segment ID then adds additional Segment data based on the selection to the current csv row
        Segment_ID = randrange(0, 5, 1)
        if Segment_ID == 0:
            self.SEG_MODEL_ID = '01'
            self.SEG_MODEL_TYPE = 'LOB Specific'
            self.SEG_MODEL_NAME = 'IRRI'
            self.SEG_MODEL_GROUP = 'Group 1'
            self.SEG_M_GRP_DESC = 'High Risk Tier'
            self.SEG_MODEL_SCORE = '200'
        if Segment_ID == 1:
            self.SEG_MODEL_ID = '02'
            self.SEG_MODEL_TYPE = 'Profitability'
            self.SEG_MODEL_NAME = 'CRS Risk Score'
            self.SEG_MODEL_GROUP = 'Group 1'
            self.SEG_M_GRP_DESC = 'Mid Risk Tier'
            self.SEG_MODEL_SCORE = '300'
        if Segment_ID == 2:
            self.SEG_MODEL_ID = '03'
            self.SEG_MODEL_TYPE = 'Geographical'
            self.SEG_MODEL_NAME = 'Geo Risk'
            self.SEG_MODEL_GROUP = 'Group 2'
            self.SEG_M_GRP_DESC = 'Low Risk Tier'
            self.SEG_MODEL_SCORE = '400'
        if Segment_ID == 3:
            self.SEG_MODEL_ID = '04'
            self.SEG_MODEL_TYPE = 'Behavioral'
            self.SEG_MODEL_NAME = 'Financial Behavior Risk'
            self.SEG_MODEL_GROUP = 'Group 3'
            self.SEG_M_GRP_DESC = 'Vertical Risk'
            self.SEG_MODEL_SCORE = '100'
        if Segment_ID == 4:
            self.SEG_MODEL_ID = '05'
            self.SEG_MODEL_TYPE = 'Risk Tolerance'
            self.SEG_MODEL_NAME = 'CM Risk'
            self.SEG_MODEL_GROUP = 'Group 4'
            self.SEG_M_GRP_DESC = 'Geographical Risk'
            self.SEG_MODEL_SCORE = '500'

        self.CLIENT_NET_WORTH = ''
        if self.HIGH_NET_WORTH == 'Yes':
            self.CLIENT_NET_WORTH = max(
                max((randrange(0, 101, 1) - 99), 0) *
                randrange(5000000, 25000000, 1),
                randrange(1000000, 5000000, 1))
        else:
            flag = choice(LowNet)
            if flag == 0:
                self.CLIENT_NET_WORTH = randrange(-250000, 600000, 1)
            else:
                if flag == 1:
                    self.CLIENT_NET_WORTH = randrange(149000, 151000, 1)
                else:
                    self.CLIENT_NET_WORTH = randrange(40000, 50000, 1)

        #Politically Exposed Person
        self.PEP = 'No'
        #1% of accounts are PEP
        if (max((randrange(0, 101, 1) - 99), 0) == 1):
            self.PEP = 'Yes'

        #Customer that was demarketed by the bank
        self.DEMARKET_FLAG = 'No'
        self.DEMARKET_DATE = ''
        #Customer with a Suspicous Activity Report
        self.SAR = 'No'
        #Customer with a closed account
        #generate closed acct flag
        #Condition for SARs and Demarketed Clients
        if (self.CLOSEDACCOUNT == 'Yes'):
            #1% of closed accounts are demarketed but never had a SAR filed
            if (max((randrange(0, 101, 1) - 99), 0) == 1):
                self.DEMARKET_FLAG = 'Yes'
                self.DEMARKET_DATE = gen_data.create_date(past=True)
            if (self.DEMARKET_FLAG == 'No' and max(
                (randrange(0, 11, 1) - 9), 0) == 1):
                #10% of closed accounts have SARs
                self.SAR = 'Yes'
                #90% of closed accounts with SARs are demarketed
                if (max((randrange(0, 11, 1) - 9), 0) == 0):
                    self.DEMARKET_FLAG = 'Yes'
                    self.DEMARKET_DATE = gen_data.create_date(past=True)

        self.HIGH_RISK = 'No'
        self.RISK_RATING = ''
        if (self.PEP == 'Yes' or self.SAR == 'Yes'
                or self.LARGE_CASH_EXEMPT == 'Yes'
                or self.DEMARKET_FLAG == 'Yes'
                or self.ARMS_MANUFACTURER == 'Yes' or self.AUCTION == 'Yes'
                or self.CASHINTENSIVE_BUSINESS == 'Yes'
                or self.CASINO_GAMBLING == 'Yes'
                or self.COMPLEX_HI_VEHICLE == 'Yes'
                or self.DEALER_PRECIOUS_METAL == 'Yes'
                or self.DIGITAL_PM_OPERATOR == 'Yes'
                or self.EMBASSY_CONSULATE == 'Yes'
                or self.EXCHANGE_CURRENCY == 'Yes'
                or self.FOREIGN_FINANCIAL_INSTITUTION == 'Yes'
                or self.FOREIGN_GOVERNMENT == 'Yes'
                or self.FOREIGN_NONBANK_FINANCIAL_INSTITUTION == 'Yes'
                or self.INTERNET_GAMBLING == 'Yes'
                or self.MEDICAL_MARIJUANA_DISPENSARY == 'Yes'
                or self.MONEY_SERVICE_BUSINESS == 'Yes'
                or self.NONREGULATED_FINANCIAL_INSTITUTION == 'Yes'
                or self.NOT_PROFIT == 'Yes'
                or self.PRIVATELY_ATM_OPERATOR == 'Yes'
                or self.SALES_USED_VEHICLES == 'Yes'
                or self.THIRD_PARTY_PAYMENT_PROCESSOR == 'Yes'
                or self.TRANSACTING_PROVIDER == 'Yes'
                or self.HIGH_NET_WORTH == 'Yes'):
            self.HIGH_RISK = 'Yes'
            self.RISK_RATING = choice(refrating)
        elif (max((randrange(0, 101, 1) - 99), 0) == 1):
            self.HIGH_RISK = 'Yes'
            self.RISK_RATING = choice(refrating)