Exemplo n.º 1
0
def generate_cc_credits(merchant, no_trans, count):
    for i in range(no_trans):
        acct = random.choice(python_account_ID.accountid)
        cc_list = python_CC.CC_Dict[acct]
        #7)Set customer credit limit - skew to clients with $1000-$25000 and 10% with $25K - $50K
        limit = max(
            max((randrange(1, 101, 1) - 99), 0) *
            randrange(25000, 50000, 1000), randrange(1000, 25000, 1000))
        #local Amt variable to calculate customer total usage
        usedAmt = 0
        maxDate = datetime(0001, 01, 01)
        NoTrans = randrange(100, 150, 1)
        #loop to generate NoTrans transactions per customer, we can add logic for probabilities of # transactions if neccessary random number generator to avoid the constant value
        for j in range(NoTrans):
            dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            #generate amount for current transaction with 50%-50% distribution on credits and debits
            tmpAmt = randrange(1, limit, 100)
            #if not credit then generate debit
            #if tmpAmt == 0:
            #tmpAmt = randrange(1,limit,100)
            #add current amount to client total account usage
            usedAmt = usedAmt + tmpAmt
            #pull value from dictionary for randomly selected merchant category
            cat = random.choice(merchant)
            tranType = ''
            #set transaction type based on amount
            tranType = random.choice(Transaction_Type_Credits)
            #tranType random.choice(Transaction_Type)
            #append values to row list
            row = [str(count) + '_' + dt
                   ] + [acct] + [gen_data.create_company_name()]
            row.append(cat)
            row.append(python_merchant_cat.All_Merchant_Cat[cat])
            #date posted
            date1 = gen_data.create_date(past=True)
            if date1 > maxDate:
                maxDate = date1
            #date of transaction a day later
            date2 = date1 - timedelta(days=1)
            row.extend([
                date1, date2, tranType, 'US', limit, tmpAmt, usedAmt,
                cc_list[0], cc_list[1]
            ])
            count = count + 1
            writer.writerow(row)
        #post generating all transactions, check account balance - if overpaid - refund $ and add a refun transaction
        if usedAmt < limit * (-1):
            row = [str(count) + '_' + dt] + [acct] + [''] + [''] + ['']
            date1temp = maxDate + timedelta(days=90)
            date2 = date1temp - timedelta(days=1)
            row.extend([
                date1temp, date2, 'Refund', '', limit,
                abs(limit - abs(usedAmt)) * (-1), 0, cc_list[0], cc_list[1]
            ])
            count = count + 1
            usedAmt = 0
            maxDate = datetime(0001, 01, 01)
            writer.writerow(row)
def gen_cc_external(merchant,no_trans,count):
	for i in range(no_trans):
		acct=random.choice(python_account_ID.accountid)
		cc_list=python_CC.CC_Dict[acct]
		#7)Set customer credit limit - skew to clients with $1000-$25000 and 10% with $25K - $50K
		limit = max(max((randrange(1,101,1)-99),0)* randrange(25000,50000,1000),randrange(1000,25000,1000))
		#local Amt variable to calculate customer total usage
		usedAmt = 0
		maxDate= datetime(0001,01,01) 
		NoTrans = randrange(100,150,1)
		#loop to generate NoTrans transactions per customer, we can add logic for probabilities of # transactions if neccessary random number generator to avoid the constant value
		for j in range(NoTrans):
			dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
			#generate amount for current transaction with 50%-50% distribution on credits and debits
			tmpAmt = max((randrange(1,3,1)-1),0)* randrange(1,limit,100)*(-1)
			#if not credit then generate debit
			if tmpAmt == 0:
				tmpAmt = randrange(1,limit,100)
			#add current amount to client total account usage
			usedAmt = usedAmt + tmpAmt
			#pull value from dictionary for randomly selected merchant category 
			cat = ''
			tranType = ''
			row = [str(count)+'_'+dt] + [acct] + [gen_data.create_company_name()] 
			#set transaction type based on amount
			if tmpAmt < 0:
				tranType = random.choice(Transaction_Type_Credits)
				row.append('Non-BMO Acct')
				row.append('')
			else: 
				tranType = random.choice(Transaction_Type_Debits)
				cat = random.choice(merchant)
				row.append(cat)
				row.append(python_merchant_cat.All_Merchant_Cat[cat])
			#tranType random.choice(Transaction_Type)
			
			#date posted
			date1 = gen_data.create_date(past=True)
			if date1 > maxDate:
				maxDate = date1
			#date of transaction a day later
			date2 = date1-timedelta(days=1)
			row.extend([date1, date2, tranType,random.choice(Country_Red),limit,tmpAmt,usedAmt,cc_list[0],cc_list[1]])
			count = count + 1
			writer.writerow(row)
		#post generating all transactions, check account balance - if overpaid - refund $ and add a refun transaction 
		if usedAmt < limit * (-1):
			row = [str(count)+'_'+dt]+ [acct] + ['']+['']+['']
			date1temp=maxDate+timedelta(days=90)
			date2=date1temp-timedelta(days=1)
			row.extend([date1temp, date2, 'Refund','',limit,abs(limit-abs(usedAmt))*(-1),0,cc_list[0],cc_list[1]])
			count = count + 1
			usedAmt = 0
			maxDate= datetime(0001,01,01)
			writer.writerow(row)
        #Creates Current Address
        zip = random.choice(zips.zip)
        addr = geo_data.create_city_state_zip[zip]
        #Creates Previous address
        zip2 = random.choice(zips.zip)
        addr2 = geo_data.create_city_state_zip[zip2]

        #Add additional data elements to current csv row
        lrg_cash_ex = random.choice(Yes_No)

        #Condition for SARs and Demarketed Clients
        if (Clsd == 'Yes'):
            #1% of closed accounts are demarketed but never had a SAR filed
            if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'):
                demarket = 'Yes'
                dem_date = gen_data.create_date(past=True)
            if (max((randrange(0, 11, 1) - 9), 0) == 1 and demarket == 'No'):
                #10% of closed accounts have SARs
                SAR = 'Yes'
                #90% of closed accounts with SARs are demarketed
                if (max((randrange(0, 11, 1) - 9), 0) == 0):
                    demarket = 'Yes'
                    dem_date = gen_data.create_date(past=True)

        if (max((randrange(0, 101, 1) - 99), 0) == 1):
            PEP = 'Yes'

        row.extend([
            addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US',
            gen_data.create_birthday(min_age=2, max_age=85), PEP, SAR, Clsd
        ])
'9223':'Bail and Bond Payments',
'9311':'Tax Payments   Government Agencies',
'9399':'Government Services (Not Elsewhere Classified)',
'9402':'Postal Services   Government Only',
'9405':'U.S. Federal Government Agencies or Departments',
'9950':'Intra Company Purchases'
};


with open('CreditCard_Transaction_MerchantCredits.csv','w') as f1: 

    writer=csv.writer(f1, delimiter=',',lineterminator='\n',)
    writer.writerow(['rownum'] +['Account_Number'] + ['Merchant_Name']+['Merchant_Category_Code']+['Merchant_Category_Desc'] +\
	['Post_Date'] + ['Transaction_Date'] + ['Transaction_Type'] +['Merchant_Country']+['Credit_Limit']+['Amount'])
	
    for i in range(10):
		dt=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
		row = [str(i)+'_'+dt]+[random.choice(python_account_ID.accountid)] +[gen_data.create_company_name()] 
		cat=random.choice(Merchant_Category)
		row.append(cat)
		row.append(All_Merchant_Cat[cat])
		date1= gen_data.create_date(past=True)
		date2=date1-timedelta(days=1)
		row.append(date1)
		row.append(date2)
		#Set customer credit limit - skew to clients with $1000-$25000 and 10% with $25K - $50K
		limit = max(max((randrange(1,101,1)-99),0)* randrange(25000,50000,1000),randrange(1000,25000,1000))
		tmpAmt = randrange(1,limit,100)
		row.extend([random.choice(Transaction_Type_Credits),random.choice(Country),limit,tmpAmt])
		writer.writerow(row)
    'Thriller record': (8, 20),
    'Harry Potter book': (5, 35),
    'iPhone': (400, 900),
    'Rubik’s Cube': (15, 19),
    'banana': (10, 10),  # how much is a banana, 10 dollars?
}

salesdata = []
for i in range(2):
    for i in range(130045):
        sel = random.sample(humans, 1)[0]
        product = random.sample(products.keys(), 1)[0]
        units = random.randint(1, 50) if product != 'Corolla' else random.randint(1, 10)
        unitprice = round(random.uniform(products[product][0], products[product][1]), 2)
        salesdata.append(sel + [
            str(gen_data.create_date(past=True, max_years_future=1, max_years_past=3)),
            product,
            units,
            unitprice,
        ])
    try:
        del products['Corolla']
        del products['Thriller record']
    except:
        pass


df = pd.DataFrame(salesdata)
columns = ['name', 'birthday', 'customer', 'orderdate', 'product', 'units', 'unitprice']
df.columns = columns
Exemplo n.º 6
0
'6733 Trusts, Except Educational, Religious, and Charitable',\
'8999 Services, NEC',\
]
Stock_Market_Listing=['Australian Stock Exchange',\
'Brussels Stock Exchange',\
'Montreal Stock Exchange',\
'Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found','Not Found',\
'Tiers 1 and 2 of the TSX Venture Exchange (also known as Tiers 1 and 2 of the Canadian Venture Exchange)',\
'Toronto Stock Exchange',\
]
Third_Party_Payment_Processor=Embassy_Consulate
Transacting_Provider=Embassy_Consulate


fake = Faker()
gen_data.create_date(past=True)

with open('large.csv','r') as csvinput:
	with open('large_extended.csv','w') as csvoutput:
	
		writer=csv.writer(csvoutput, delimiter=',',lineterminator='\n',)
		reader = csv.reader(csvinput)

		row = next(reader)
		
		
		row.append('RELATED_ACCT')
		row.append('RELATED_TYPE')
		row.append('PARTY_TYPE')
		row.append('PARTY_RELATION')
		row.append('PARTY_STARTDATE')
Exemplo n.º 7
0
		#Creates Current Address
		zip=random.choice(zips.zip)
		addr=geo_data.create_city_state_zip[zip]
		#Creates Previous address
		zip2=random.choice(zips.zip)
		addr2=geo_data.create_city_state_zip[zip2]
                                                                
		#Add additional data elements to current csv row
		lrg_cash_ex=random.choice(Yes_No)
		
		#Condition for SARs and Demarketed Clients 
		if(Clsd=='Yes'):
			#1% of closed accounts are demarketed but never had a SAR filed
			if (max((randrange(0,101,1)-99),0)==1 and SAR=='No'):
				demarket='Yes'
				dem_date=gen_data.create_date(past=True)
			if (max((randrange(0,11,1)-9),0)==1 and demarket=='No'):
				#10% of closed accounts have SARs 
				SAR='Yes'
				#90% of closed accounts with SARs are demarketed
				if(max((randrange(0,11,1)-9),0)==0):
					demarket='Yes'
					dem_date=gen_data.create_date(past=True)

		if (max((randrange(0,101,1)-99),0)==1):
			PEP='Yes'

		row.extend([addr[0],addr[1],zip,'US',addr2[0],addr2[1],zip2,'US',gen_data.create_birthday(min_age=2, max_age=85),PEP,SAR,Clsd])                            
		#Start Generating related accounts from account list once 10,000 accounts are generated
		if i > 10000: 
			rel = int(random.choice(acct_list))*max((randrange(0,10001,1)-9999),0)
Exemplo n.º 8
0
    'Harry Potter book': (5, 35),
    'iPhone': (400, 900),
    'Rubik’s Cube': (15, 19),
    'banana': (10, 10),  # how much is a banana, 10 dollars?
}

salesdata = []
for i in range(2345):
    sel = random.sample(humans, 1)[0]
    units = random.randint(1, 50)
    product = random.sample(products.keys(), 1)[0]
    unitprice = round(
        random.uniform(products[product][0], products[product][1]), 2)
    salesdata.append(sel + [
        str(
            gen_data.create_date(
                past=True, max_years_future=0, max_years_past=1)),
        product,
        units,
        unitprice,
    ])

df = pd.DataFrame(salesdata)
columns = [
    'name', 'birthday', 'customer', 'orderdate', 'product', 'units',
    'unitprice'
]
df.columns = columns

# df.to_json('blooth_sales_data_".json', orient='records')
df.to_json('blooth_sales_data_2.json',
           orient='records',
Exemplo n.º 9
0
with open('CreditCard_Transaction_Red_NonAccountHolders.csv', 'w') as f1:
    writer = csv.writer(
        f1,
        delimiter=',',
        lineterminator='\n',
    )
    writer.writerow(['rownum'] +['Account_Number'] + ['Merchant_Name']+['Merchant_Category_Code']+['Merchant_Category_Desc'] +\
 ['Post_Date'] + ['Transaction_Date'] + ['Transaction_Type'] +['Merchant_Country']+['Credit_Limit']+['Amount'])
    for i in range(10):
        dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        #generate external account that is not in ref acct file
        acct = randrange(100000, 100000000, 1)
        row = [str(i) + '_' + dt] + [acct] + [gen_data.create_company_name()]
        cat = random.choice(Merchant_Category)
        row.append(cat)
        row.append(All_Merchant_Cat[cat])
        date1 = gen_data.create_date(past=True)
        date2 = date1 - timedelta(days=1)
        row.append(date1)
        row.append(date2)
        #7)Set customer credit limit - skew to clients with $1000-$25000 and 10% with $25K - $50K
        limit = max(
            max((randrange(1, 101, 1) - 99), 0) *
            randrange(25000, 50000, 1000), randrange(1000, 25000, 1000))
        tmpAmt = randrange(1, limit, 100)
        row.extend([
            random.choice(Transaction_Type),
            random.choice(Country), limit, tmpAmt
        ])
        writer.writerow(row)
def createCusts(N):
	#List for client whose net worth is over $500K
	HighNetWorth = ['Yes'] + ['No'] * 30
	#List for type of account
	Related_Type = ['Primary','Secondary','Joint']
	#List for how the account was opened
	Party_Type = ['Person','Non-Person']
	#List for a BMO customer
	Party_Relation = ['Customer','Non-Customer']
	#List for random Yes/No Flag
	Yes_No = ['Yes'] + ['No'] * 12
	#List for random Yes/No Consent
	Yes_No_Consent = ['Yes'] + ['No'] * 4
	#List for equal Yes/No Flag
	Yes_No_50 = ['Yes','No']
	#List for official language
	Official_Lang = ['English'] * 3 + ['French']
	#List for method of communication
	Preffered_Channel = ['Direct Mail','Telemarketing','Email','SMS']
	#List for status of customer
	#Customer_Status = ['Prospect','Inactive Customer','Past Customer'] + ['Active Customer'] * 56
	#List for LOB Segment Type
	Seg_Model_Type = ['LOB Specific','Profitability','Geographical','Behavioral','Risk Tolerance']
	#List for Model ID
	Model_ID = ['01','02','03','04','05']
	#List for Model Name
	Seg_Model_Name = ['IRRI', 'CRS Risk Score','Geo Risk','Financial Behavior Risk','CM Risk']
	#List for Model Score
	Seg_Model_Score = ['200','300','400','100','500']
	#List for Model Group
	Seg_Model_Group = ['Group 1'] * 2 + ['Group 2','Group 3','Group 4']
	#List for Model Description
	Seg_Model_Description = ['High Risk Tier','Mid Risk Tier','Low Risk Tier','Vertical Risk','Geographical Risk']
	#List for random Arms Dealer flag
	Arms_Manufacturer=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random auction flag
	Auction=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random Cash Intensive flag
	CashIntensive_Business=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random Casino?Gaming flag
	Casino_Gambling=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random Client Onboarding flag
	Channel_Onboarding=['E-mail','In Person','In person - In Branch/Bank Office','In person - Offsite/Client Location','Mail','Online','Phone','Request for Proposal (RFP)'] + ['Not Applicable'] * 10
	#List for random Transaction flag
	Channel_Ongoing_Transactions=['ATM','E-mail','Fax','Mail','Not Applicable','OTC Communication System','Phone'] + ['Online'] * 4 + ['In Person'] * 31
	#List for random HI_Vehicle flag
	Complex_HI_Vehicle=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random Metals flag
	Dealer_Precious_Metal=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random Arms Dealer flag
	Digital_PM_Operator=['Yes'] + ['No'] * 2 + [''] * 392
	#List for random Embassy flag
	Embassy_Consulate=['Yes'] + ['No'] * 2 + [''] * 392
	#Sets variable to Embassy flag
	Exchange_Currency=Embassy_Consulate
	#Sets variable to Embassy flag
	Foreign_Financial_Institution=Embassy_Consulate
	#Sets variable to Embassy flag
	Foreign_Government=Embassy_Consulate
	#Sets variable to Embassy flag
	Foreign_NonBank_Financial_Institution=Embassy_Consulate
	#Sets variable to Embassy flag
	Internet_Gambling=Embassy_Consulate
	#Sets variable to Embassy flag
	Medical_Marijuana_Dispensary=Embassy_Consulate
	#Sets variable to Embassy flag
	Money_Service_Business=Embassy_Consulate
	#Sets variable to Embassy flag
	NonRegulated_Financial_Institution=Embassy_Consulate
	#Sets variable to Embassy flag
	Not_Profit=Embassy_Consulate
	#List for random occupation
	Occupation=['11-1011 Chief Executives',\
	'11-3011 Administrative Services Managers',\
	'11-3031 Financial Managers',\
	'11-3061 Purchasing Managers',\
	'13-1011 Agents and Business Managers of Artists Performers and Athletes',\
	'13-1031 Claims Adjusters Examiners, and Investigators',\
	'13-1199 Business Operations Specialists, All Other',\
	'13-2099 Financial Specialists All Other',\
	'17-1011 Architects Except Landscape and Naval',\
	'23-1011 Lawyers',\
	'23-1023 Judges, Magistrate Judges and Magistrates',\
	'25-2012 Kindergarten Teachers Except Special Education',\
	'25-2021 Elementary School Teachers Except Special Education',\
	'29-1041 Optometrists',\
	'29-2054 Respiratory Therapy Technicians',\
	'33-2011 Firefighters',\
	'37-1012 First-Line Supervisors of Landscaping Lawn Service and Groundskeeping Workers',\
	'39-1011 Gaming Supervisors',\
	'39-2011 Animal Trainers',\
	'41-1011 First-Line Supervisors of Retail Sales Workers',\
	'41-1012 First-Line Supervisors of Non-Retail Sales Workers',\
	'41-2011 Cashiers',\
	'41-2031 Retail Salespersons',\
	'43-3021 Billing and Posting Clerks',\
	'45-1011 First-Line Supervisors of Farming, Fishing, and Forestry Workers',\
	'49-2011 Computer Automated Teller and Office Machine Repairers',\
	'53-3021 Bus Drivers Transit and Intercity',\
	'53-4031 Railroad Conductors and Yardmasters',\
	'55-1011 Air Crew Officers',\
	'55-1012 Aircraft Launch and Recovery Officers',\
	'55-1013 Armored Assault Vehicle Officers',\
	]
	#Sets variable to Embassy flag
	Privately_ATM_Operator=Embassy_Consulate
	#List for random products
	Products=['Certificate of Deposit',\
	'Checking Account',\
	'Credit Card',\
	'Custodial and Investment Agency - Institutional',\
	'Custodial and Investment Agency - Personal',\
	'Custodial/Trust Outsourcing Services (BTOS)',\
	'Custody Accounts (PTIM)',\
	'Custody Accounts (RSTC)',\
	'DTF (BHFA)',\
	'Investment Agency - Personal',\
	'Investment Management Account (PTIM)',\
	'Lease',\
	'Loan / Letter of Credit',\
	'Money Market',\
	'Mortgage / Bond / Debentures',\
	'None',\
	'Savings Account',\
	'Trust Administration - Irrevocable and Revocable (PTIM)',\
	'Trust Administration - Irrevocable and Revocable Trusts (BDTC)',\
	] + ['Nondeposit Investment Products'] * 14 + ['Investment Agency - Institutional'] * 5
	#Sets variable to Embassy flag
	Sales_Used_Vehicles=Embassy_Consulate
	#Dictionary for random Services
	Services=['Benefit Payment Services',\
	'Domestic Wires and Direct Deposit / ACH',\
	'Family Office Services (FOS)',\
	'Fiduciary Services',\
	'International Wires and IAT',\
	'Investment Advisory Services (IAS)',\
	'Investment Services',\
	'None',\
	'Online / Mobile Banking',\
	'Payroll',\
	'Short Term Cash Management',\
	'Trust Services',\
	'Trustee Services',\
	'Vault Cash Services',\
	] + ['Financial Planning'] * 6 + ['Retirement Plans'] * 19
	#Dictionary for random SIC_Code
	SIC_Code=['6021 National Commercial Banks',\
	'6211 Security Brokers Dealers and Flotation Companies',\
	'6282 Investment Advice',\
	'6311 Life Insurance',\
	'6733 Trusts Except Educational Religious and Charitable',\
	'8999 Services NEC',\
	] + ['6722 Management Investment Offices Open-End'] * 12
	#Dictionary for random Market Listing
	Stock_Market_Listing=['Australian Stock Exchange',\
	'Brussels Stock Exchange',\
	'Montreal Stock Exchange',\
	'Tiers 1 and 2 of the TSX Venture Exchange (also known as Tiers 1 and 2 of the Canadian Venture Exchange)',\
	'Toronto Stock Exchange',\
	] + ['Not Found'] * 30
	#Sets variable to Embassy flag
	Third_Party_Payment_Processor=Embassy_Consulate
	#Sets variable to Embassy flag
	Transacting_Provider=Embassy_Consulate
	#Dictionary for random Low Net Worth
	LowNet=[1,2] + [0] * 5
	#Dictionary for Consumer vs Business
	Acct_Type = ['B'] + ['C'] * 5
	#Dictionary for random number of credits cards per account
	Number_CC = [1] * 7 + [2] * 11 + [3] * 3 + [4]
	#Dictionary for Account list set to blank
	acct_list=[]
	#Dictionary for CreditCard list set to blank
	CC_list = []
	
	#Dictionary for random Wolfsberg scenario
	Use_Case = [1,4,7,10,13,16,19,22,25,28,31,34,39] * 4 + [2,5,8,11,14,17,20,23,26,29,32,35,38] * 7 + [3,6,9,12,15,18,21,24,27,30,33,36] * 65 + [37] * 73 + [40,41] * 2
	refrating = ['1','1','1','2','3','4','2','4','5','5','5','5','5','5','5','5','5','5','5','5']
	fake = Faker()
	global liSSNMaster
	start=10786147
	acct_list=[]
	liCSV = []
	for i in xrange(N):
		#Initiate High Risk Flags
		#Politically Exposed Person
		PEP='No'
		#Customer with a Suspicous Activity Report
		SAR='No'
		#Customer with a closed account
		Clsd='No'
		#High risk customer flag
		high_risk='No'
		#High Risk Rating
		hr_rating=''
		#Customer that was demarketed by the bank
		demarket='No'
		dem_date=''
		#generate closed acct flag
		if (max((randrange(0,98,1)-96),0)==1):
			Clsd='Yes'
		#Random choice for number of credit card users per account number
		No_CCs = random.choice(Number_CC)
		#Generate account number
		acct=start+1+randrange(1,10,1)
		start=acct
		#Randomly generate customer name + middle name in tmp
		name = fake.name()
		tmp=gen_data.create_name()
		#Adds account number to account dictionary
		acct_list.extend([acct])
		#Creates a new row and adds data elements
		row = [i]+[acct]+[random.choice(Acct_Type)]+[No_CCs]+[name]+[tmp[0]]+[liSSNMaster[i]]
		#Dictionary for names list set to blank
		names=[]
		#Dictionary for Social Security Number list set to blank
		ssn=[]
		#Middle Name to reduce name dups
		mdl=[]
		
		for j in range(No_CCs-1):
			names.insert(j,fake.name())
			tmp2=gen_data.create_name()
			mdl.insert(j,tmp2[0])
		##Pull from SSN Master list
			randInt = randrange(1,len(liSSNMaster),1)
			if randInt != i:
				ssn.insert(j,liSSNMaster[randInt])
			else:
				ssn.insert(j,liSSNMaster[randInt - 1])
			
		#Name and SSN is set to blank if less than 4 customers on an account
		for k in range(4-No_CCs):
			names.insert(No_CCs+k,'')
			ssn.insert(No_CCs+k,'')
			mdl.insert(No_CCs,'')
			
		#Sets CC_NO to a random credit card number
		CC_NO=gen_data.cc_number()
		#Extract CC_Number from the tuple returned by CC_Number then scramble to ensure uniqueness...Tuple contains CC Number and Type
		CC_TRANS=CC_NO[1][0]
		dt = str(datetime.now())
		clean=re.sub('\W','',dt)
		printCC=str(CC_TRANS[-4:])+str(clean[-12:-3])+str(randrange(1111,9999,randrange(1,10,1)))
		
		#Add data elements to current csv row
		row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\
		gen_data.create_email(),gen_data.create_job_title()])
		#Create Current Address
		zip=random.choice(zips.zip)
		addr=geo_data.create_city_state_zip[zip]
		#Create Previous address
		zip2=random.choice(zips.zip)
		addr2=geo_data.create_city_state_zip[zip2]
		#Add additional data elements to current csv row
		lrg_cash_ex=random.choice(Yes_No)
		#Condition for SARs and Demarketed Clients
		if(Clsd=='Yes'):
			#1% of closed accounts are demarketed but never had a SAR filed
			if (max((randrange(0,101,1)-99),0)==1 and SAR=='No'):
				demarket='Yes'
				dem_date=gen_data.create_date(past=True)
			if (max((randrange(0,11,1)-9),0)==1 and demarket=='No'):
				#10% of closed accounts have SARs
				SAR='Yes'
				#90% of closed accounts with SARs are demarketed
				if(max((randrange(0,11,1)-9),0)==0):
					demarket='Yes'
					dem_date=gen_data.create_date(past=True)
				
		if (max((randrange(0,101,1)-99),0)==1):
			PEP='Yes'
		row.extend([addr[0],addr[1],zip,'US',addr2[0],addr2[1],zip2,'US',gen_data.create_birthday(min_age=2, max_age=85),PEP,SAR,Clsd])
		
		#Start Generating related accounts from account list once 10,000 accounts are generated - to avoid duplicating accounts in the beginning
		if i > 10000:
			rel = int(random.choice(acct_list))*max((randrange(0,10001,1)-9999),0)
			if rel <> 0:
				row.append(rel)
				row.append(random.choice(Related_Type))
			else:
				row.append('')
				row.append('')
		else:
			row.append('')
			row.append('')
		
		#Randomly generates account start date
		party_start=gen_data.create_date(past=True)
		#Randomly selects consent option for sharing info
		Consent_Share = random.choice(Yes_No_Consent)
		#Add additional data elements to current csv row
		row.extend([random.choice(Party_Type),random.choice(Party_Relation),party_start,gen_data.create_date(past=True),\
		lrg_cash_ex,demarket,dem_date,randrange(0,100,1),random.choice(Official_Lang)])
		#Add data element preferred methond of contact for yes to share info...if not then blank to current row
		
		if Consent_Share == 'Yes':
			row.extend(['Yes',random.choice(Preffered_Channel)])
		else:
			row.extend(['No',''])
		
		row.extend([zip,randrange(0,5,1)])
		#Generate Segment ID then add additional Segment data based on the selection to the current csv row
		Segment_ID = randrange(0,5,1)%5
		if Segment_ID == 0:
			row.extend([Model_ID[0],Seg_Model_Type[0],Seg_Model_Name[0],Seg_Model_Group[0],Seg_Model_Description[0],Seg_Model_Score[0]])
		if Segment_ID == 1:
			row.extend([Model_ID[1],Seg_Model_Type[1],Seg_Model_Name[1],Seg_Model_Group[1],Seg_Model_Description[1],Seg_Model_Score[1]])
		if Segment_ID == 2:
			row.extend([Model_ID[2],Seg_Model_Type[2],Seg_Model_Name[2],Seg_Model_Group[2],Seg_Model_Description[2],Seg_Model_Score[2]])
		if Segment_ID == 3:
			row.extend([Model_ID[3],Seg_Model_Type[3],Seg_Model_Name[3],Seg_Model_Group[3],Seg_Model_Description[3],Seg_Model_Score[3]])
		if Segment_ID == 4:
			row.extend([Model_ID[4],Seg_Model_Type[4],Seg_Model_Name[4],Seg_Model_Group[4],Seg_Model_Description[4],Seg_Model_Score[4]])
		
		#Add additional data elements to current csv row
		hr0=random.choice(Arms_Manufacturer)
		hr01=random.choice(Auction)
		hr02=random.choice(CashIntensive_Business)
		hr03=random.choice(Casino_Gambling)
		hr04=random.choice(Channel_Onboarding)
		hr05=random.choice(Channel_Ongoing_Transactions)
		row.extend([hr0,hr01,hr02,hr03,hr04,hr05])
		#Randomly select whether customer has a High Net Worth
		HighNetWorthFlag = random.choice(HighNetWorth)
		#Randomly Generate customer net worth based on the above flag
		if HighNetWorthFlag == 'Yes':
			row.append(max(max((randrange(0,101,1)-99),0)*randrange(1000000,25000000,1),randrange(1000000,5000000,1)))
		else:
			flag=random.choice(LowNet)
			if flag==0:
				row.append(randrange(-250000,600000,1))
			else:
				if flag==1:
					row.append(randrange(149000,151000,1))
				else:
					row.append(randrange(40000,50000,1))
		#Add data elements to current csv row
		hr1=random.choice(Complex_HI_Vehicle)
		hr2=random.choice(Dealer_Precious_Metal)
		hr3=random.choice(Digital_PM_Operator)
		hr4=random.choice(Embassy_Consulate)
		hr5=random.choice(Exchange_Currency)
		hr6=random.choice(Foreign_Financial_Institution)
		hr7=random.choice(Foreign_Government)
		hr8=random.choice(Foreign_NonBank_Financial_Institution)
		hr9=random.choice(Internet_Gambling)
		hr10=random.choice(Medical_Marijuana_Dispensary)
		hr11=random.choice(Money_Service_Business)
		hr12=random.choice(NAICS.NAICS_Code)
		hr13=random.choice(NonRegulated_Financial_Institution)
		hr14=random.choice(Not_Profit)
		#hr15=random.choice(Occupation) - added before through gen_data
		hr16=random.choice(Privately_ATM_Operator)
		hr17=random.choice(Products)
		hr18=random.choice(Sales_Used_Vehicles)
		hr19=random.choice(Services)
		hr20=random.choice(SIC_Code)
		hr21=random.choice(Stock_Market_Listing)
		hr22=random.choice(Third_Party_Payment_Processor)
		hr23=random.choice(Transacting_Provider)
		
		if(PEP=='Yes' or SAR=='Yes' or lrg_cash_ex=='Yes' or demarket=='Yes' or hr0=='Yes'
		or hr01=='Yes' or hr02=='Yes' or hr03=='Yes' or hr1=='Yes' or hr2=='Yes' or hr3=='Yes' or hr4=='Yes' or
		hr5=='Yes' or hr6=='Yes' or hr7=='Yes' or hr8=='Yes' or hr9=='Yes' or hr10=='Yes' or hr11=='Yes' or hr13=='Yes' or hr14=='Yes' or
		hr16=='Yes' or hr17=='Yes' or hr18=='Yes' or hr22=='Yes' or hr23=='Yes' or HighNetWorthFlag=='Yes'):
			high_risk='Yes'
			hr_rating=random.choice(refrating)
		if(SAR=='No' and high_risk=='No'):
			if(max((randrange(0,101,1)-99),0)==1):
				high_risk='Yes'
				hr_rating=random.choice(refrating)
		if(PEP=='No' and high_risk=='No'):
			if(max((randrange(0,101,1)-99),0)==1):
				high_risk='Yes'
				hr_rating=random.choice(refrating)
		if(high_risk=='No'):
			if(max((randrange(0,101,1)-99),0)==1):
				high_risk='Yes'
				hr_rating=random.choice(refrating)
		row.extend([hr1,hr2,hr3,hr4,hr5,hr6,hr7,hr8,hr9,hr10,hr11,hr12,hr13,hr14,hr16,hr17,hr18,hr19,hr20,hr21,hr22,hr23,
		HighNetWorthFlag,high_risk,hr_rating,random.choice(Use_Case)])
		liCSV.append(row)
	return liCSV
Exemplo n.º 11
0
    def __init__(self, i, acct, liSSNMaster, acct_list):
        self.ROWNUM = i
        self.ACCOUNTID = acct
        self.SSN = liSSNMaster[i]
        self.ACCT_TYPE = choice(Acct_Type)
        self.NUM_CCS = choice(Number_CC)
        self.NAME = fake.name()
        self.CUSTEMAIL = gen_data.create_email()
        self.OCCUPATION = gen_data.create_job_title()
        self.COUNTRY = 'US'
        self.PREVIOUS_COUNTRY = 'US'
        self.DOB = gen_data.create_birthday(min_age=2, max_age=85)
        self.PARTY_ENDDATE = gen_data.create_date(past=True)
        self.CONSENT_SHARING = choice(Yes_No_Consent)
        self.LARGE_CASH_EXEMPT = choice(Yes_No)
        self.PARTY_TYPE = choice(Party_Type)
        self.PARTY_RELATION = choice(Party_Relation)
        self.PROB_DEFAULT_RISKR = randrange(0, 100, 1)
        self.OFFICIAL_LANG_PREF = choice(Official_Lang)
        self.DEPENDANTS_COUNT = randrange(0, 5, 1)
        self.USE_CASE_SCENARIO = choice(Use_Case)
        self.CLOSEDACCOUNT = choice(Clsd_flag)
        self.HIGH_NET_WORTH = choice(HighNetWorth)
        self.PARTY_STARTDATE = gen_data.create_date(past=True)
        self.ARMS_MANUFACTURER = choice(Yes_No_Cust_Flag)
        self.AUCTION = choice(Yes_No_Cust_Flag)
        self.CASHINTENSIVE_BUSINESS = choice(Yes_No_Cust_Flag)
        self.CASINO_GAMBLING = choice(Yes_No_Cust_Flag)
        self.CHANNEL_ONBOARDING = choice(Channel_Onboarding)
        self.CHANNEL_ONGOING_TRANSACTIONS = choice(
            Channel_Ongoing_Transactions)
        self.COMPLEX_HI_VEHICLE = choice(Yes_No_Cust_Flag)
        self.DEALER_PRECIOUS_METAL = choice(Yes_No_Cust_Flag)
        self.DIGITAL_PM_OPERATOR = choice(Yes_No_Cust_Flag)
        self.EMBASSY_CONSULATE = choice(Yes_No_Cust_Flag)
        self.EXCHANGE_CURRENCY = choice(Yes_No_Cust_Flag)
        self.FOREIGN_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag)
        self.FOREIGN_GOVERNMENT = choice(Yes_No_Cust_Flag)
        self.FOREIGN_NONBANK_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag)
        self.INTERNET_GAMBLING = choice(Yes_No_Cust_Flag)
        self.MEDICAL_MARIJUANA_DISPENSARY = choice(Yes_No_Cust_Flag)
        self.MONEY_SERVICE_BUSINESS = choice(Yes_No_Cust_Flag)
        self.NAICS_CODE = choice(NAICS.NAICS_Code)
        self.NONREGULATED_FINANCIAL_INSTITUTION = choice(Yes_No_Cust_Flag)
        self.NOT_PROFIT = choice(Yes_No_Cust_Flag)
        self.PRIVATELY_ATM_OPERATOR = choice(Yes_No_Cust_Flag)
        self.PRODUCTS = choice(Products)
        self.SALES_USED_VEHICLES = choice(Yes_No_Cust_Flag)
        self.SERVICES = choice(Services)
        self.SIC_CODE = choice(SIC_Code)
        self.STOCK_MARKET_LISTING = choice(Stock_Market_Listing)
        self.THIRD_PARTY_PAYMENT_PROCESSOR = choice(Yes_No_Cust_Flag)
        self.TRANSACTING_PROVIDER = choice(Yes_No_Cust_Flag)
        self.ZIP = choice(zips.zip)
        self.PREVIOUS_ZIP = choice(zips.zip)
        addr = geo_data.create_city_state_zip[self.ZIP]
        addr2 = geo_data.create_city_state_zip[self.PREVIOUS_ZIP]
        self.CITY = addr[0]
        self.STATE = addr[1]
        self.PREVIOUS_CITY = addr2[0]
        self.PREVIOUS_STATE = addr2[1]
        self.PRIMARY_BRANCH_NO = self.ZIP
        tmp = gen_data.create_name()
        self.M_NAME = tmp[0]
        self.EMPLOYER = gen_data.create_company_name() + ' ' + tmp[1]
        No_CCs = choice(Number_CC)
        #Dictionary for names list set to blank
        names = []
        #Dictionary for Social Security Number list set to blank
        ssn = []
        #Middle Name to reduce name dups
        mdl = []
        #Generates Name and SSN for Credit Users
        for j in range(4):
            if No_CCs > j:
                names.insert(j, fake.name())
                tmp2 = gen_data.create_name()
                mdl.insert(j, tmp2[0])
                randInt = randrange(1, len(liSSNMaster), 1)
                if randInt != i:
                    ssn.insert(j, liSSNMaster[randInt])
                else:
                    ssn.insert(j, liSSNMaster[randInt - 1])
            #Name and SSN is set to blank if less than 4 customers on an account
            else:
                names.insert(No_CCs + j, '')
                ssn.insert(No_CCs + j, '')
                mdl.insert(No_CCs + j, '')

        self.AUTHORIZED_NAME2 = names[0]
        self.M_NAME2 = mdl[0]
        self.SSN2 = ssn[0]
        self.AUTHORIZED_NAME3 = names[1]
        self.M_NAME3 = mdl[1]
        self.SSN3 = ssn[1]
        self.AUTHORIZED_NAME4 = names[2]
        self.M_NAME4 = mdl[2]
        self.SSN4 = ssn[2]

        #Sets CC_NO to a random credit card number
        CC_NO = gen_data.create_cc_number()
        CC_TRANS = CC_NO[1][0]
        dt = str(datetime.now())
        clean = re.sub('\W', '', dt)
        self.CREDITCARDNUMBER = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str(
            randrange(1111, 9999, randrange(1, 10, 1)))
        self.CREDITCARDTYPE = CC_NO[0]

        self.RELATED_ACCT = ''
        self.RELATED_TYPE = ''
        if i > 10000:
            rel = int(choice(acct_list)) * max(
                (randrange(0, 10001, 1) - 9999), 0)
            if rel <> 0:
                self.RELATED_ACCT = rel
                self.RELATED_TYPE = choice(Related_Type)

        self.PREFERRED_CHANNEL = ''
        if self.CONSENT_SHARING == 'Yes':
            self.PREFERRED_CHANNEL = choice(Prefered_Channel)


##              #Generates Segment ID then adds additional Segment data based on the selection to the current csv row
        Segment_ID = randrange(0, 5, 1)
        if Segment_ID == 0:
            self.SEG_MODEL_ID = '01'
            self.SEG_MODEL_TYPE = 'LOB Specific'
            self.SEG_MODEL_NAME = 'IRRI'
            self.SEG_MODEL_GROUP = 'Group 1'
            self.SEG_M_GRP_DESC = 'High Risk Tier'
            self.SEG_MODEL_SCORE = '200'
        if Segment_ID == 1:
            self.SEG_MODEL_ID = '02'
            self.SEG_MODEL_TYPE = 'Profitability'
            self.SEG_MODEL_NAME = 'CRS Risk Score'
            self.SEG_MODEL_GROUP = 'Group 1'
            self.SEG_M_GRP_DESC = 'Mid Risk Tier'
            self.SEG_MODEL_SCORE = '300'
        if Segment_ID == 2:
            self.SEG_MODEL_ID = '03'
            self.SEG_MODEL_TYPE = 'Geographical'
            self.SEG_MODEL_NAME = 'Geo Risk'
            self.SEG_MODEL_GROUP = 'Group 2'
            self.SEG_M_GRP_DESC = 'Low Risk Tier'
            self.SEG_MODEL_SCORE = '400'
        if Segment_ID == 3:
            self.SEG_MODEL_ID = '04'
            self.SEG_MODEL_TYPE = 'Behavioral'
            self.SEG_MODEL_NAME = 'Financial Behavior Risk'
            self.SEG_MODEL_GROUP = 'Group 3'
            self.SEG_M_GRP_DESC = 'Vertical Risk'
            self.SEG_MODEL_SCORE = '100'
        if Segment_ID == 4:
            self.SEG_MODEL_ID = '05'
            self.SEG_MODEL_TYPE = 'Risk Tolerance'
            self.SEG_MODEL_NAME = 'CM Risk'
            self.SEG_MODEL_GROUP = 'Group 4'
            self.SEG_M_GRP_DESC = 'Geographical Risk'
            self.SEG_MODEL_SCORE = '500'

        self.CLIENT_NET_WORTH = ''
        if self.HIGH_NET_WORTH == 'Yes':
            self.CLIENT_NET_WORTH = max(
                max((randrange(0, 101, 1) - 99), 0) *
                randrange(5000000, 25000000, 1),
                randrange(1000000, 5000000, 1))
        else:
            flag = choice(LowNet)
            if flag == 0:
                self.CLIENT_NET_WORTH = randrange(-250000, 600000, 1)
            else:
                if flag == 1:
                    self.CLIENT_NET_WORTH = randrange(149000, 151000, 1)
                else:
                    self.CLIENT_NET_WORTH = randrange(40000, 50000, 1)

        #Politically Exposed Person
        self.PEP = 'No'
        #1% of accounts are PEP
        if (max((randrange(0, 101, 1) - 99), 0) == 1):
            self.PEP = 'Yes'

        #Customer that was demarketed by the bank
        self.DEMARKET_FLAG = 'No'
        self.DEMARKET_DATE = ''
        #Customer with a Suspicous Activity Report
        self.SAR = 'No'
        #Customer with a closed account
        #generate closed acct flag
        #Condition for SARs and Demarketed Clients
        if (self.CLOSEDACCOUNT == 'Yes'):
            #1% of closed accounts are demarketed but never had a SAR filed
            if (max((randrange(0, 101, 1) - 99), 0) == 1):
                self.DEMARKET_FLAG = 'Yes'
                self.DEMARKET_DATE = gen_data.create_date(past=True)
            if (self.DEMARKET_FLAG == 'No' and max(
                (randrange(0, 11, 1) - 9), 0) == 1):
                #10% of closed accounts have SARs
                self.SAR = 'Yes'
                #90% of closed accounts with SARs are demarketed
                if (max((randrange(0, 11, 1) - 9), 0) == 0):
                    self.DEMARKET_FLAG = 'Yes'
                    self.DEMARKET_DATE = gen_data.create_date(past=True)

        self.HIGH_RISK = 'No'
        self.RISK_RATING = ''
        if (self.PEP == 'Yes' or self.SAR == 'Yes'
                or self.LARGE_CASH_EXEMPT == 'Yes'
                or self.DEMARKET_FLAG == 'Yes'
                or self.ARMS_MANUFACTURER == 'Yes' or self.AUCTION == 'Yes'
                or self.CASHINTENSIVE_BUSINESS == 'Yes'
                or self.CASINO_GAMBLING == 'Yes'
                or self.COMPLEX_HI_VEHICLE == 'Yes'
                or self.DEALER_PRECIOUS_METAL == 'Yes'
                or self.DIGITAL_PM_OPERATOR == 'Yes'
                or self.EMBASSY_CONSULATE == 'Yes'
                or self.EXCHANGE_CURRENCY == 'Yes'
                or self.FOREIGN_FINANCIAL_INSTITUTION == 'Yes'
                or self.FOREIGN_GOVERNMENT == 'Yes'
                or self.FOREIGN_NONBANK_FINANCIAL_INSTITUTION == 'Yes'
                or self.INTERNET_GAMBLING == 'Yes'
                or self.MEDICAL_MARIJUANA_DISPENSARY == 'Yes'
                or self.MONEY_SERVICE_BUSINESS == 'Yes'
                or self.NONREGULATED_FINANCIAL_INSTITUTION == 'Yes'
                or self.NOT_PROFIT == 'Yes'
                or self.PRIVATELY_ATM_OPERATOR == 'Yes'
                or self.SALES_USED_VEHICLES == 'Yes'
                or self.THIRD_PARTY_PAYMENT_PROCESSOR == 'Yes'
                or self.TRANSACTING_PROVIDER == 'Yes'
                or self.HIGH_NET_WORTH == 'Yes'):
            self.HIGH_RISK = 'Yes'
            self.RISK_RATING = choice(refrating)
        elif (max((randrange(0, 101, 1) - 99), 0) == 1):
            self.HIGH_RISK = 'Yes'
            self.RISK_RATING = choice(refrating)
Exemplo n.º 12
0
def generate_customers():
    with get_file('uber_cust.csv', 'w') as f1:
        # Writer for CSV...Pipe delimited...Return for a new line
        writer = csv.writer(
            f1,
            delimiter='|',
            lineterminator='\n',
        )
        # Header Row
        writer.writerow(
            ['ROWNUM'] + ['accountNumber'] + ['accountCategory'] + ['accountType'] + ['NUM_CCS'] + ['NAME'] + [
                'M_NAME'] + [
                'SSN'] + [
                'AUTHORIZED_NAME2'] + ['M_NAME2'] + ['SSN2'] + \
            ['AUTHORIZED_NAME3'] + ['M_NAME3'] + ['SSN3'] + ['AUTHORIZED_NAME4'] + ['M_NAME4'] + ['SSN4'] + [
                'CREDITCARDNUMBER'] + ['CREDITCARDTYPE'] + ['EMPLOYER'] + ['CUSTEMAIL'] + \
            ['OCCUPATION'] + ['CITY'] + ['STATE'] + ['ZIP'] + ['COUNTRY'] + ['PREVIOUS_CITY'] + [
                'PREVIOUS_STATE'] + \
            ['PREVIOUS_ZIP'] + ['PREVIOUS_COUNTRY'] + ['DOB'] + ['politically_exposed_person'] + [
                'suspicious_activity_report'] + ['CLOSEDACCOUNT'] + [
                'RELATED_ACCT'] + ['RELATED_TYPE'] + ['PARTY_TYPE'] + ['PARTY_RELATION'] + [
                'PARTY_STARTDATE'] + ['PARTY_ENDDATE'] + \
            ['LARGE_CASH_EXEMPT'] + ['DEMARKET_FLAG'] + ['DEMARKET_DATE'] + ['PROB_DEFAULT_RISKR'] + [
                'OFFICIAL_LANG_PREF'] + ['CONSENT_SHARING'] + \
            ['PREFERRED_CHANNEL'] + ['PRIMARY_BRANCH_NO'] + ['DEPENDANTS_COUNT'] + ['SEG_MODEL_ID'] + [
                'SEG_MODEL_TYPE'] + \
            ['SEG_MODEL_NAME'] + ['SEG_MODEL_GROUP'] + ['SEG_M_GRP_DESC'] + ['SEG_MODEL_SCORE'] + [
                'ARMS_MANUFACTURER'] + ['AUCTION'] + \
            ['CASHINTENSIVE_BUSINESS'] + ['CASINO_GAMBLING'] + ['CHANNEL_ONBOARDING'] + [
                'CHANNEL_ONGOING_TRANSACTIONS'] + ['CLIENT_NET_WORTH'] + \
            ['COMPLEX_HI_VEHICLE'] + ['DEALER_PRECIOUS_METAL'] + ['DIGITAL_PM_OPERATOR'] + [
                'EMBASSY_CONSULATE'] + ['EXCHANGE_CURRENCY'] + \
            ['FOREIGN_FINANCIAL_INSTITUTION'] + ['FOREIGN_GOVERNMENT'] + [
                'FOREIGN_NONBANK_FINANCIAL_INSTITUTION'] + ['INTERNET_GAMBLING'] + \
            ['MEDICAL_MARIJUANA_DISPENSARY'] + ['MONEY_SERVICE_BUSINESS'] + ['NAICS_CODE'] + [
                'NONREGULATED_FINANCIAL_INSTITUTION'] + \
            ['NOT_PROFIT'] + ['PRIVATELY_ATM_OPERATOR'] + ['PRODUCTS'] + ['SALES_USED_VEHICLES'] + [
                'SERVICES'] + \
            ['SIC_CODE'] + ['STOCK_MARKET_LISTING'] + ['THIRD_PARTY_PAYMENT_PROCESSOR'] + [
                'TRANSACTING_PROVIDER'] + ['HIGH_NET_WORTH'] + ['HIGH_RISK'] + ['RISK_RATING'] + [
                'USE_CASE_SCENARIO'])
        # Loop for number of accounts to generate
        start = 10
        acct_list = []

        li_ssn_master = list(
            set([
                ''.join(str(random.randint(0, 9)) for _ in xrange(9))
                for i in xrange(30)
            ]))

        if len(li_ssn_master) < 30:
            li_ssn_master = list(
                set([
                    ''.join(str(random.randint(0, 9)) for _ in xrange(9))
                    for i in xrange(30)
                ]))
        for i in xrange(30):
            # Initiate High Risk Flags
            politically_exposed_person = 'No'
            suspicious_activity_report = 'No'

            closed_cust_acct = 'No'
            # High risk customer flag
            high_risk = 'No'
            # High Risk Rating
            hr_rating = ''
            # Customer that was demarketed by the bank
            demarket = 'No'
            dem_date = ''
            # generate closed acct flag
            if max((randrange(0, 98, 1) - 96), 0) == 1:
                closed_cust_acct = 'Yes'

            # Random number generator for account number
            # acct = randrange(100000,100000000,1)
            # Random choice for number of credit cards per account number
            no_ccs = weighted_options('number_cc')
            # while acct_list.count(acct) > 0:
            #	acct = randrange(100000,100000000,1)
            # dt = str(datetime.now())
            # acct=str(i)++re.sub('\W','',dt)
            acct = start + 1 + randrange(1, 10, 1)
            start = acct

            name = fake.name()
            tmp = gen_data.create_name()
            # Adds account number to account dictionary
            acct_list.extend([acct])
            # Creates a new row and adds data elements
            ##      JS - Main Account Holder SSN as current index in master SSN list
            ##		row = [i]+[acct]+[random.choice(acct_type)]+[No_CCs]+[name]+[tmp[0]]+[(str(randrange(101,1000,1))+str(randrange(10,100,1))+str(randrange(1000,10000,1)))]
            row = [i] + [acct] + [weighted_options('acct_type')] + [no_ccs] + [
                name
            ] + [tmp[0]] + [li_ssn_master[i]]
            # Dictionary for names list set to blank
            names = []
            # Dictionary for Social Security Number list set to blank
            ssn = []
            # Generates Name and SSN for Credit Users
            # Middle Name to reduce name dups
            mdl = []
            for j in range(no_ccs - 1):
                names.insert(j, fake.name())
                tmp2 = gen_data.create_name()
                mdl.insert(j, tmp2[0])
                ##      JS - Pull from SSN Master list
                # ssn.insert(j,(str(randrange(101,1000,1))+str(randrange(10,100,1))+str(randrange(1000,10000,1))))
                randInt = randrange(1, len(li_ssn_master), 1)
                if randInt != i:
                    ssn.insert(j, li_ssn_master[randInt])
                else:
                    ssn.insert(j, li_ssn_master[randInt - 1])

            # Name and SSN is set to blank if less than 4 customers on an account

            for k in range(4 - no_ccs):
                names.insert(no_ccs + k, '')
                ssn.insert(no_ccs + k, '')
                mdl.insert(no_ccs, '')
            # Sets CC_NO to a random credit card number
            CC_NO = gen_data.create_cc_number()

            # Extract CC_Number from the tuple returned by CC_Number...Tuple contains CC Number and Type
            # while credit_cards.count(CC_NO[1][0]) > 0:
            CC_TRANS = CC_NO[1][0]

            dt = str(datetime.now())
            clean = re.sub('\W', '', dt)
            printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str(
                randrange(1111, 9999, randrange(1, 10, 1)))
            # str(CC_TRANS[-4:])+str(clean[-12:-2])+str(randrange(1111,9999,randrange(1,10,1)))
            # Add CC_Number to control list to prevent duplicates
            # Add data elements to current csv row
            row.extend([
                names[0], mdl[0], ssn[0], names[1], mdl[1], ssn[1], names[2],
                mdl[2], ssn[2], printCC, CC_NO[0],
                gen_data.create_company_name() + ' ' + tmp[1],
                gen_data.create_email(),
                gen_data.create_job_title()
            ])

            # Creates Current Address
            zip = random.choice(zips.zip)
            addr = geo_data.create_city_state_zip[zip]
            # Creates Previous address
            zip2 = random.choice(zips.zip)
            addr2 = geo_data.create_city_state_zip[zip2]

            # Add additional data elements to current csv row
            lrg_cash_ex = weighted_options('yes_no')

            # Condition for SARs and Demarketed Clients
            if closed_cust_acct == 'Yes':
                # 1% of closed accounts are demarketed but never had a suspicious_activity_report filed
                if risk_range() and suspicious_activity_report == 'No':
                    demarket = 'Yes'
                    dem_date = gen_data.create_date(past=True)
                if risk_range() and demarket == 'No':
                    # 10% of closed accounts have SARs
                    suspicious_activity_report = 'Yes'
                    # 90% of closed accounts  with SARs are demarketed
                    if max((randrange(0, 11, 1) - 9), 0) == 0:
                        demarket = 'Yes'
                        dem_date = gen_data.create_date(past=True)

            if risk_range():
                politically_exposed_person = 'Yes'

            row.extend([
                addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US',
                gen_data.create_birthday(min_age=2, max_age=85),
                politically_exposed_person, suspicious_activity_report,
                closed_cust_acct
            ])
            # Start Generating related accounts from account list once 10,000 accounts are generated
            if i > 10000:
                rel = int(random.choice(acct_list)) * max(
                    (randrange(0, 10001, 1) - 9999), 0)
                if rel <> 0:
                    row.append(rel)
                    row.append(weighted_options('related_type'))
                else:
                    row.append('')
                    row.append('')
            else:
                row.append('')
                row.append('')

            # Randomly generates account start date
            party_start = gen_data.create_date(past=True)
            # Randomly selects consent option for sharing info
            consent_share = weighted_options('yes_no')

            # Add additional data elements to current csv row

            row.extend([
                weighted_options('party_type'),
                weighted_options('party_relation'), party_start,
                gen_data.create_date(past=True), lrg_cash_ex, demarket,
                dem_date,
                randrange(0, 100, 1),
                weighted_options('official_lang')
            ])
            # Add data element preferred methond of contact for yes to share info...if not then blank to current row
            if consent_share == 'Yes':
                row.extend(['Yes', weighted_options('preferred_channel')])
            else:
                row.extend(['No', ''])
            # DO NOT USE CUST STATUS BELOW - NOT INTEGRATED WITH CLOSED STATUS! Add additional data elements to current csv row
            row.extend([zip, randrange(0, 5, 1)])

            # Generates Segment ID then adds additional Segment data based on the selection to the current csv row
            Segment_ID = randrange(0, 5, 1) % 5

            if Segment_ID == 0:
                row.extend([
                    MODEL_ID[0], SEG_MODEL_TYPE[0], SEG_MODEL_NAME[0],
                    SEG_MODEL_GROUP[0], SEG_MODEL_DESCRIPTION[0],
                    SEG_MODEL_SCORE[0]
                ])

            if Segment_ID == 1:
                row.extend([
                    MODEL_ID[1], SEG_MODEL_TYPE[1], SEG_MODEL_NAME[1],
                    SEG_MODEL_GROUP[1], SEG_MODEL_DESCRIPTION[1],
                    SEG_MODEL_SCORE[1]
                ])

            if Segment_ID == 2:
                row.extend([
                    MODEL_ID[2], SEG_MODEL_TYPE[2], SEG_MODEL_NAME[2],
                    SEG_MODEL_GROUP[2], SEG_MODEL_DESCRIPTION[2],
                    SEG_MODEL_SCORE[2]
                ])

            if Segment_ID == 3:
                row.extend([
                    MODEL_ID[3], SEG_MODEL_TYPE[3], SEG_MODEL_NAME[3],
                    SEG_MODEL_GROUP[3], SEG_MODEL_DESCRIPTION[3],
                    SEG_MODEL_SCORE[3]
                ])

            if Segment_ID == 4:
                row.extend([
                    MODEL_ID[4], SEG_MODEL_TYPE[4], SEG_MODEL_NAME[4],
                    SEG_MODEL_GROUP[4], SEG_MODEL_DESCRIPTION[4],
                    SEG_MODEL_SCORE[4]
                ])

            # Add additional data elements to current csv row
            arms_manufacturer = weighted_options('arms_manufacturers')
            auction = weighted_options('auction')
            cash_intensive_business = weighted_options(
                'cash_intensive_business')
            casino_gambling = weighted_options('casino_gambling')
            chan_ob = weighted_options('channel_onboarding')
            chan_txn = weighted_options('channel_ongoing_txn')

            row.extend([
                arms_manufacturer, auction, cash_intensive_business,
                casino_gambling, chan_ob, chan_txn
            ])

            # Randomly select whether customer has a High Net Worth
            high_net_worth_flag = weighted_options('high_net_worth')

            # Randomly Generates customer net worth based on the above flag
            if high_net_worth_flag == 'Yes':
                row.append(
                    max(
                        max((randrange(0, 101, 1) - 99), 0) *
                        randrange(1000000, 25000000, 1),
                        randrange(1000000, 5000000, 1)))
            else:
                flag = weighted_options('low_net')
                if flag == 0:
                    row.append(randrange(-250000, 600000, 1))
                else:
                    if flag == 1:
                        row.append(randrange(149000, 151000, 1))
                    else:
                        row.append(randrange(40000, 50000, 1))
            # Add data elements to current csv row
            hr1 = weighted_options('complex_hi_vehicle')
            hr2 = weighted_options('dealer_precious_metal')
            hr3 = weighted_options('digital_pm_operator')
            hr4 = weighted_options(EMBASSY_CONSULATE)
            hr5 = weighted_options(EXCHANGE_CURRENCY)
            hr6 = weighted_options(FOREIGN_FINANCIAL_INSTITUTION)
            hr7 = weighted_options(FOREIGN_GOVT)
            hr8 = weighted_options(FOREIGN_NONBANK_FINANCIAL_INSTITUTION)
            hr9 = weighted_options(INTERNET_GAMBLING)
            hr10 = weighted_options(MEDICAL_MARIJUANA_DISPENSARY)
            hr11 = weighted_options(MONEY_SERVICE_BUSINESS)
            hr12 = random.choice(NAICS.NAICS_Code)
            hr13 = weighted_options(NONREGULATED_FINANCIAL_INSTITUTION)
            hr14 = weighted_options(NOT_PROFIT)
            # hr15=random.choice(occupation)
            hr16 = weighted_options(PRIVATE_ATM_OPERATOR)
            hr17 = weighted_options('products')
            hr18 = weighted_options(SALES_USED_VEHICLES)
            hr19 = weighted_options('services')
            hr20 = weighted_options('sic_code')
            hr21 = weighted_options('stock_market_listing')
            hr22 = weighted_options(THIRD_PARTY_PAYMENT_PROCESSOR)
            hr23 = weighted_options(TRANSACTING_PROVIDER)

            if 'Yes' in (politically_exposed_person,
                         suspicious_activity_report, lrg_cash_ex, demarket,
                         arms_manufacturer, auction, cash_intensive_business,
                         casino_gambling, hr1, hr2, hr3, hr4, hr5, hr6, hr7,
                         hr8, hr9, hr10, hr11, hr13, hr14, hr16, hr17, hr18,
                         hr22, hr23, high_net_worth_flag):
                high_risk = 'Yes'
                hr_rating = weighted_options('refrating')

            if suspicious_activity_report == 'No' and high_risk == 'No':
                if risk_range():
                    high_risk = 'Yes'
                    hr_rating = weighted_options('refrating')
            if politically_exposed_person == 'No' and high_risk == 'No':
                if risk_range():
                    high_risk = 'Yes'
                    hr_rating = weighted_options('refrating')

            if high_risk == 'No':
                if risk_range():
                    high_risk = 'Yes'
                    hr_rating = weighted_options('refrating')

            row.extend([
                hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12,
                hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23,
                high_net_worth_flag, high_risk, hr_rating,
                random.choice(USE_CASE)
            ])
            # End the current row
            writer.writerow(row)
def createCusts(N):
    #List for client whose net worth is over $500K
    HighNetWorth = ['Yes'] + ['No'] * 30
    #List for type of account
    Related_Type = ['Primary', 'Secondary', 'Joint']
    #List for how the account was opened
    Party_Type = ['Person', 'Non-Person']
    #List for a BMO customer
    Party_Relation = ['Customer', 'Non-Customer']
    #List for random Yes/No Flag
    Yes_No = ['Yes'] + ['No'] * 12
    #List for random Yes/No Consent
    Yes_No_Consent = ['Yes'] + ['No'] * 4
    #List for equal Yes/No Flag
    Yes_No_50 = ['Yes', 'No']
    #List for official language
    Official_Lang = ['English'] * 3 + ['French']
    #List for method of communication
    Preffered_Channel = ['Direct Mail', 'Telemarketing', 'Email', 'SMS']
    #List for status of customer
    #Customer_Status = ['Prospect','Inactive Customer','Past Customer'] + ['Active Customer'] * 56
    #List for LOB Segment Type
    Seg_Model_Type = [
        'LOB Specific', 'Profitability', 'Geographical', 'Behavioral',
        'Risk Tolerance'
    ]
    #List for Model ID
    Model_ID = ['01', '02', '03', '04', '05']
    #List for Model Name
    Seg_Model_Name = [
        'IRRI', 'CRS Risk Score', 'Geo Risk', 'Financial Behavior Risk',
        'CM Risk'
    ]
    #List for Model Score
    Seg_Model_Score = ['200', '300', '400', '100', '500']
    #List for Model Group
    Seg_Model_Group = ['Group 1'] * 2 + ['Group 2', 'Group 3', 'Group 4']
    #List for Model Description
    Seg_Model_Description = [
        'High Risk Tier', 'Mid Risk Tier', 'Low Risk Tier', 'Vertical Risk',
        'Geographical Risk'
    ]
    #List for random Arms Dealer flag
    Arms_Manufacturer = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random auction flag
    Auction = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random Cash Intensive flag
    CashIntensive_Business = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random Casino?Gaming flag
    Casino_Gambling = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random Client Onboarding flag
    Channel_Onboarding = [
        'E-mail', 'In Person', 'In person - In Branch/Bank Office',
        'In person - Offsite/Client Location', 'Mail', 'Online', 'Phone',
        'Request for Proposal (RFP)'
    ] + ['Not Applicable'] * 10
    #List for random Transaction flag
    Channel_Ongoing_Transactions = [
        'ATM', 'E-mail', 'Fax', 'Mail', 'Not Applicable',
        'OTC Communication System', 'Phone'
    ] + ['Online'] * 4 + ['In Person'] * 31
    #List for random HI_Vehicle flag
    Complex_HI_Vehicle = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random Metals flag
    Dealer_Precious_Metal = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random Arms Dealer flag
    Digital_PM_Operator = ['Yes'] + ['No'] * 2 + [''] * 392
    #List for random Embassy flag
    Embassy_Consulate = ['Yes'] + ['No'] * 2 + [''] * 392
    #Sets variable to Embassy flag
    Exchange_Currency = Embassy_Consulate
    #Sets variable to Embassy flag
    Foreign_Financial_Institution = Embassy_Consulate
    #Sets variable to Embassy flag
    Foreign_Government = Embassy_Consulate
    #Sets variable to Embassy flag
    Foreign_NonBank_Financial_Institution = Embassy_Consulate
    #Sets variable to Embassy flag
    Internet_Gambling = Embassy_Consulate
    #Sets variable to Embassy flag
    Medical_Marijuana_Dispensary = Embassy_Consulate
    #Sets variable to Embassy flag
    Money_Service_Business = Embassy_Consulate
    #Sets variable to Embassy flag
    NonRegulated_Financial_Institution = Embassy_Consulate
    #Sets variable to Embassy flag
    Not_Profit = Embassy_Consulate
    #List for random occupation
    Occupation=['11-1011 Chief Executives',\
    '11-3011 Administrative Services Managers',\
    '11-3031 Financial Managers',\
    '11-3061 Purchasing Managers',\
    '13-1011 Agents and Business Managers of Artists Performers and Athletes',\
    '13-1031 Claims Adjusters Examiners, and Investigators',\
    '13-1199 Business Operations Specialists, All Other',\
    '13-2099 Financial Specialists All Other',\
    '17-1011 Architects Except Landscape and Naval',\
    '23-1011 Lawyers',\
    '23-1023 Judges, Magistrate Judges and Magistrates',\
    '25-2012 Kindergarten Teachers Except Special Education',\
    '25-2021 Elementary School Teachers Except Special Education',\
    '29-1041 Optometrists',\
    '29-2054 Respiratory Therapy Technicians',\
    '33-2011 Firefighters',\
    '37-1012 First-Line Supervisors of Landscaping Lawn Service and Groundskeeping Workers',\
    '39-1011 Gaming Supervisors',\
    '39-2011 Animal Trainers',\
    '41-1011 First-Line Supervisors of Retail Sales Workers',\
    '41-1012 First-Line Supervisors of Non-Retail Sales Workers',\
    '41-2011 Cashiers',\
    '41-2031 Retail Salespersons',\
    '43-3021 Billing and Posting Clerks',\
    '45-1011 First-Line Supervisors of Farming, Fishing, and Forestry Workers',\
    '49-2011 Computer Automated Teller and Office Machine Repairers',\
    '53-3021 Bus Drivers Transit and Intercity',\
    '53-4031 Railroad Conductors and Yardmasters',\
    '55-1011 Air Crew Officers',\
    '55-1012 Aircraft Launch and Recovery Officers',\
    '55-1013 Armored Assault Vehicle Officers',\
    ]
    #Sets variable to Embassy flag
    Privately_ATM_Operator = Embassy_Consulate
    #List for random products
    Products=['Certificate of Deposit',\
    'Checking Account',\
    'Credit Card',\
    'Custodial and Investment Agency - Institutional',\
    'Custodial and Investment Agency - Personal',\
    'Custodial/Trust Outsourcing Services (BTOS)',\
    'Custody Accounts (PTIM)',\
    'Custody Accounts (RSTC)',\
    'DTF (BHFA)',\
    'Investment Agency - Personal',\
    'Investment Management Account (PTIM)',\
    'Lease',\
    'Loan / Letter of Credit',\
    'Money Market',\
    'Mortgage / Bond / Debentures',\
    'None',\
    'Savings Account',\
    'Trust Administration - Irrevocable and Revocable (PTIM)',\
    'Trust Administration - Irrevocable and Revocable Trusts (BDTC)',\
    ] + ['Nondeposit Investment Products'] * 14 + ['Investment Agency - Institutional'] * 5
    #Sets variable to Embassy flag
    Sales_Used_Vehicles = Embassy_Consulate
    #Dictionary for random Services
    Services=['Benefit Payment Services',\
    'Domestic Wires and Direct Deposit / ACH',\
    'Family Office Services (FOS)',\
    'Fiduciary Services',\
    'International Wires and IAT',\
    'Investment Advisory Services (IAS)',\
    'Investment Services',\
    'None',\
    'Online / Mobile Banking',\
    'Payroll',\
    'Short Term Cash Management',\
    'Trust Services',\
    'Trustee Services',\
    'Vault Cash Services',\
    ] + ['Financial Planning'] * 6 + ['Retirement Plans'] * 19
    #Dictionary for random SIC_Code
    SIC_Code=['6021 National Commercial Banks',\
    '6211 Security Brokers Dealers and Flotation Companies',\
    '6282 Investment Advice',\
    '6311 Life Insurance',\
    '6733 Trusts Except Educational Religious and Charitable',\
    '8999 Services NEC',\
    ] + ['6722 Management Investment Offices Open-End'] * 12
    #Dictionary for random Market Listing
    Stock_Market_Listing=['Australian Stock Exchange',\
    'Brussels Stock Exchange',\
    'Montreal Stock Exchange',\
    'Tiers 1 and 2 of the TSX Venture Exchange (also known as Tiers 1 and 2 of the Canadian Venture Exchange)',\
    'Toronto Stock Exchange',\
    ] + ['Not Found'] * 30
    #Sets variable to Embassy flag
    Third_Party_Payment_Processor = Embassy_Consulate
    #Sets variable to Embassy flag
    Transacting_Provider = Embassy_Consulate
    #Dictionary for random Low Net Worth
    LowNet = [1, 2] + [0] * 5
    #Dictionary for Consumer vs Business
    Acct_Type = ['B'] + ['C'] * 5
    #Dictionary for random number of credits cards per account
    Number_CC = [1] * 7 + [2] * 11 + [3] * 3 + [4]
    #Dictionary for Account list set to blank
    acct_list = []
    #Dictionary for CreditCard list set to blank
    CC_list = []

    #Dictionary for random Wolfsberg scenario
    Use_Case = [1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 39] * 4 + [
        2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38
    ] * 7 + [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36
             ] * 65 + [37] * 73 + [40, 41] * 2
    refrating = [
        '1', '1', '1', '2', '3', '4', '2', '4', '5', '5', '5', '5', '5', '5',
        '5', '5', '5', '5', '5', '5'
    ]
    fake = Faker()
    global liSSNMaster
    start = 10786147
    acct_list = []
    liCSV = []
    for i in xrange(N):
        #Initiate High Risk Flags
        #Politically Exposed Person
        PEP = 'No'
        #Customer with a Suspicous Activity Report
        SAR = 'No'
        #Customer with a closed account
        Clsd = 'No'
        #High risk customer flag
        high_risk = 'No'
        #High Risk Rating
        hr_rating = ''
        #Customer that was demarketed by the bank
        demarket = 'No'
        dem_date = ''
        #generate closed acct flag
        if (max((randrange(0, 98, 1) - 96), 0) == 1):
            Clsd = 'Yes'
        #Random choice for number of credit card users per account number
        No_CCs = random.choice(Number_CC)
        #Generate account number
        acct = start + 1 + randrange(1, 10, 1)
        start = acct
        #Randomly generate customer name + middle name in tmp
        name = fake.name()
        tmp = gen_data.create_name()
        #Adds account number to account dictionary
        acct_list.extend([acct])
        #Creates a new row and adds data elements
        row = [i] + [acct] + [random.choice(Acct_Type)] + [No_CCs] + [name] + [
            tmp[0]
        ] + [liSSNMaster[i]]
        #Dictionary for names list set to blank
        names = []
        #Dictionary for Social Security Number list set to blank
        ssn = []
        #Middle Name to reduce name dups
        mdl = []

        for j in range(No_CCs - 1):
            names.insert(j, fake.name())
            tmp2 = gen_data.create_name()
            mdl.insert(j, tmp2[0])
            ##Pull from SSN Master list
            randInt = randrange(1, len(liSSNMaster), 1)
            if randInt != i:
                ssn.insert(j, liSSNMaster[randInt])
            else:
                ssn.insert(j, liSSNMaster[randInt - 1])

        #Name and SSN is set to blank if less than 4 customers on an account
        for k in range(4 - No_CCs):
            names.insert(No_CCs + k, '')
            ssn.insert(No_CCs + k, '')
            mdl.insert(No_CCs, '')

        #Sets CC_NO to a random credit card number
        CC_NO = gen_data.cc_number()
        #Extract CC_Number from the tuple returned by CC_Number then scramble to ensure uniqueness...Tuple contains CC Number and Type
        CC_TRANS = CC_NO[1][0]
        dt = str(datetime.now())
        clean = re.sub('\W', '', dt)
        printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str(
            randrange(1111, 9999, randrange(1, 10, 1)))

        #Add data elements to current csv row
        row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\
        gen_data.create_email(),gen_data.create_job_title()])
        #Create Current Address
        zip = random.choice(zips.zip)
        addr = geo_data.create_city_state_zip[zip]
        #Create Previous address
        zip2 = random.choice(zips.zip)
        addr2 = geo_data.create_city_state_zip[zip2]
        #Add additional data elements to current csv row
        lrg_cash_ex = random.choice(Yes_No)
        #Condition for SARs and Demarketed Clients
        if (Clsd == 'Yes'):
            #1% of closed accounts are demarketed but never had a SAR filed
            if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'):
                demarket = 'Yes'
                dem_date = gen_data.create_date(past=True)
            if (max((randrange(0, 11, 1) - 9), 0) == 1 and demarket == 'No'):
                #10% of closed accounts have SARs
                SAR = 'Yes'
                #90% of closed accounts with SARs are demarketed
                if (max((randrange(0, 11, 1) - 9), 0) == 0):
                    demarket = 'Yes'
                    dem_date = gen_data.create_date(past=True)

        if (max((randrange(0, 101, 1) - 99), 0) == 1):
            PEP = 'Yes'
        row.extend([
            addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US',
            gen_data.create_birthday(min_age=2, max_age=85), PEP, SAR, Clsd
        ])

        #Start Generating related accounts from account list once 10,000 accounts are generated - to avoid duplicating accounts in the beginning
        if i > 10000:
            rel = int(random.choice(acct_list)) * max(
                (randrange(0, 10001, 1) - 9999), 0)
            if rel <> 0:
                row.append(rel)
                row.append(random.choice(Related_Type))
            else:
                row.append('')
                row.append('')
        else:
            row.append('')
            row.append('')

        #Randomly generates account start date
        party_start = gen_data.create_date(past=True)
        #Randomly selects consent option for sharing info
        Consent_Share = random.choice(Yes_No_Consent)
        #Add additional data elements to current csv row
        row.extend([random.choice(Party_Type),random.choice(Party_Relation),party_start,gen_data.create_date(past=True),\
        lrg_cash_ex,demarket,dem_date,randrange(0,100,1),random.choice(Official_Lang)])
        #Add data element preferred methond of contact for yes to share info...if not then blank to current row

        if Consent_Share == 'Yes':
            row.extend(['Yes', random.choice(Preffered_Channel)])
        else:
            row.extend(['No', ''])

        row.extend([zip, randrange(0, 5, 1)])
        #Generate Segment ID then add additional Segment data based on the selection to the current csv row
        Segment_ID = randrange(0, 5, 1) % 5
        if Segment_ID == 0:
            row.extend([
                Model_ID[0], Seg_Model_Type[0], Seg_Model_Name[0],
                Seg_Model_Group[0], Seg_Model_Description[0],
                Seg_Model_Score[0]
            ])
        if Segment_ID == 1:
            row.extend([
                Model_ID[1], Seg_Model_Type[1], Seg_Model_Name[1],
                Seg_Model_Group[1], Seg_Model_Description[1],
                Seg_Model_Score[1]
            ])
        if Segment_ID == 2:
            row.extend([
                Model_ID[2], Seg_Model_Type[2], Seg_Model_Name[2],
                Seg_Model_Group[2], Seg_Model_Description[2],
                Seg_Model_Score[2]
            ])
        if Segment_ID == 3:
            row.extend([
                Model_ID[3], Seg_Model_Type[3], Seg_Model_Name[3],
                Seg_Model_Group[3], Seg_Model_Description[3],
                Seg_Model_Score[3]
            ])
        if Segment_ID == 4:
            row.extend([
                Model_ID[4], Seg_Model_Type[4], Seg_Model_Name[4],
                Seg_Model_Group[4], Seg_Model_Description[4],
                Seg_Model_Score[4]
            ])

        #Add additional data elements to current csv row
        hr0 = random.choice(Arms_Manufacturer)
        hr01 = random.choice(Auction)
        hr02 = random.choice(CashIntensive_Business)
        hr03 = random.choice(Casino_Gambling)
        hr04 = random.choice(Channel_Onboarding)
        hr05 = random.choice(Channel_Ongoing_Transactions)
        row.extend([hr0, hr01, hr02, hr03, hr04, hr05])
        #Randomly select whether customer has a High Net Worth
        HighNetWorthFlag = random.choice(HighNetWorth)
        #Randomly Generate customer net worth based on the above flag
        if HighNetWorthFlag == 'Yes':
            row.append(
                max(
                    max((randrange(0, 101, 1) - 99), 0) *
                    randrange(1000000, 25000000, 1),
                    randrange(1000000, 5000000, 1)))
        else:
            flag = random.choice(LowNet)
            if flag == 0:
                row.append(randrange(-250000, 600000, 1))
            else:
                if flag == 1:
                    row.append(randrange(149000, 151000, 1))
                else:
                    row.append(randrange(40000, 50000, 1))
        #Add data elements to current csv row
        hr1 = random.choice(Complex_HI_Vehicle)
        hr2 = random.choice(Dealer_Precious_Metal)
        hr3 = random.choice(Digital_PM_Operator)
        hr4 = random.choice(Embassy_Consulate)
        hr5 = random.choice(Exchange_Currency)
        hr6 = random.choice(Foreign_Financial_Institution)
        hr7 = random.choice(Foreign_Government)
        hr8 = random.choice(Foreign_NonBank_Financial_Institution)
        hr9 = random.choice(Internet_Gambling)
        hr10 = random.choice(Medical_Marijuana_Dispensary)
        hr11 = random.choice(Money_Service_Business)
        hr12 = random.choice(NAICS.NAICS_Code)
        hr13 = random.choice(NonRegulated_Financial_Institution)
        hr14 = random.choice(Not_Profit)
        #hr15=random.choice(Occupation) - added before through gen_data
        hr16 = random.choice(Privately_ATM_Operator)
        hr17 = random.choice(Products)
        hr18 = random.choice(Sales_Used_Vehicles)
        hr19 = random.choice(Services)
        hr20 = random.choice(SIC_Code)
        hr21 = random.choice(Stock_Market_Listing)
        hr22 = random.choice(Third_Party_Payment_Processor)
        hr23 = random.choice(Transacting_Provider)

        if (PEP == 'Yes' or SAR == 'Yes' or lrg_cash_ex == 'Yes'
                or demarket == 'Yes' or hr0 == 'Yes' or hr01 == 'Yes'
                or hr02 == 'Yes' or hr03 == 'Yes' or hr1 == 'Yes'
                or hr2 == 'Yes' or hr3 == 'Yes' or hr4 == 'Yes' or hr5 == 'Yes'
                or hr6 == 'Yes' or hr7 == 'Yes' or hr8 == 'Yes' or hr9 == 'Yes'
                or hr10 == 'Yes' or hr11 == 'Yes' or hr13 == 'Yes'
                or hr14 == 'Yes' or hr16 == 'Yes' or hr17 == 'Yes'
                or hr18 == 'Yes' or hr22 == 'Yes' or hr23 == 'Yes'
                or HighNetWorthFlag == 'Yes'):
            high_risk = 'Yes'
            hr_rating = random.choice(refrating)
        if (SAR == 'No' and high_risk == 'No'):
            if (max((randrange(0, 101, 1) - 99), 0) == 1):
                high_risk = 'Yes'
                hr_rating = random.choice(refrating)
        if (PEP == 'No' and high_risk == 'No'):
            if (max((randrange(0, 101, 1) - 99), 0) == 1):
                high_risk = 'Yes'
                hr_rating = random.choice(refrating)
        if (high_risk == 'No'):
            if (max((randrange(0, 101, 1) - 99), 0) == 1):
                high_risk = 'Yes'
                hr_rating = random.choice(refrating)
        row.extend([
            hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12,
            hr13, hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23,
            HighNetWorthFlag, high_risk, hr_rating,
            random.choice(Use_Case)
        ])
        liCSV.append(row)
    return liCSV
Exemplo n.º 14
0
def gen_cust(liSSNMaster, acct_list, i):
    fake = Faker()
    #Initiate High Risk Flags
    #Politically Exposed Person
    PEP = 'No'
    #Customer with a Suspicous Activity Report
    SAR = 'No'
    #Customer with a closed account
    #generate closed acct flag
    Clsd = choice(Clsd_flag)
    #High risk customer flag
    high_risk = 'No'
    #High Risk Rating
    hr_rating = ''
    #Customer that was demarketed by the bank
    demarket = 'No'
    dem_date = ''
    #Random choice for number of credit cards per account number
    No_CCs = choice(Number_CC)
    acct = start + 1 + randrange(1, 10, 1)
    start = acct
    #Randomly generates customer name
    name = fake.name()
    tmp = gen_data.create_name()
    #Adds account number to account dictionary
    acct_list.extend([acct])
    #Creates a new row and adds data elements
    ##      JS - Main Account Holder SSN as current index in master SSN list
    row = [i] + [acct] + [choice(Acct_Type)
                          ] + [No_CCs] + [name] + [tmp[0]] + [liSSNMaster[i]]
    #Dictionary for names list set to blank
    names = []
    #Dictionary for Social Security Number list set to blank
    ssn = []
    #Generates Name and SSN for Credit Users
    #Middle Name to reduce name dups
    mdl = []
    for j in range(No_CCs - 1):
        names.insert(j, fake.name())
        tmp2 = gen_data.create_name()
        mdl.insert(j, tmp2[0])
        ##      JS - Pull from SSN Master list
        randInt = randrange(1, len(liSSNMaster), 1)
        if randInt != i:
            ssn.insert(j, liSSNMaster[randInt])
        else:
            ssn.insert(j, liSSNMaster[randInt - 1])

    #Name and SSN is set to blank if less than 4 customers on an account

    for k in range(4 - No_CCs):
        names.insert(No_CCs + k, '')
        ssn.insert(No_CCs + k, '')
        mdl.insert(No_CCs, '')
    #Sets CC_NO to a random credit card number
    CC_NO = gen_data.create_cc_number()
    CC_TRANS = CC_NO[1][0]
    dt = str(datetime.now())
    clean = re.sub('\W', '', dt)
    printCC = str(CC_TRANS[-4:]) + str(clean[-12:-3]) + str(
        randrange(1111, 9999, randrange(1, 10, 1)))
    #Add data elements to current csv row
    row.extend([names[0],mdl[0],ssn[0],names[1],mdl[1],ssn[1],names[2],mdl[2],ssn[2],printCC,CC_NO[0],gen_data.create_company_name()+' '+tmp[1],\
    gen_data.create_email(),gen_data.create_job_title()])

    #Creates Current Address
    zip = choice(zips.zip)
    addr = geo_data.create_city_state_zip[zip]
    #Creates Previous address
    zip2 = choice(zips.zip)
    addr2 = geo_data.create_city_state_zip[zip2]

    #Add additional data elements to current csv row
    lrg_cash_ex = choice(Yes_No)

    #Condition for SARs and Demarketed Clients
    if (Clsd == 'Yes'):
        #1% of closed accounts are demarketed but never had a SAR filed
        if (max((randrange(0, 101, 1) - 99), 0) == 1 and SAR == 'No'):
            demarket = 'Yes'
            dem_date = gen_data.create_date(past=True)
        if (max((randrange(0, 11, 1) - 9), 0) == 1 and demarket == 'No'):
            #10% of closed accounts have SARs
            SAR = 'Yes'
            #90% of closed accounts with SARs are demarketed
            if (max((randrange(0, 11, 1) - 9), 0) == 0):
                demarket = 'Yes'
                dem_date = gen_data.create_date(past=True)
    #1% of accounts are PEP
    if (max((randrange(0, 101, 1) - 99), 0) == 1):
        PEP = 'Yes'

    row.extend([
        addr[0], addr[1], zip, 'US', addr2[0], addr2[1], zip2, 'US',
        gen_data.create_birthday(min_age=2, max_age=85), PEP, SAR, Clsd
    ])
    #Start Generating related accounts from account list once 10,000 accounts are generated
    if i > 10000:
        rel = int(choice(acct_list)) * max((randrange(0, 10001, 1) - 9999), 0)
        if rel <> 0:
            row.append(rel)
            row.append(choice(Related_Type))
        else:
            row.append('')
            row.append('')
    else:
        row.append('')
        row.append('')

    #Randomly generates account start date
    party_start = gen_data.create_date(past=True)
    #Randomly selects consent option for sharing info
    Consent_Share = choice(Yes_No_Consent)

    #Add additional data elements to current csv row
    row.extend([choice(Party_Type),choice(Party_Relation),party_start,gen_data.create_date(past=True),\
    lrg_cash_ex,demarket,dem_date,randrange(0,100,1),choice(Official_Lang)])
    #Add data element preferred methond of contact for yes to share info...if not then blank to current row
    if Consent_Share == 'Yes':
        row.extend(['Yes', choice(Preffered_Channel)])
    else:
        row.extend(['No', ''])
    #DO NOT USE CUST STATUS BELOW - NOT INTEGRATED WITH CLOSED STATUS! Add additional data elements to current csv row
    row.extend([zip, randrange(0, 5, 1)])

    #Generates Segment ID then adds additional Segment data based on the selection to the current csv row
    Segment_ID = randrange(0, 5, 1)

    if Segment_ID == 0:
        row.extend(
            ['01', 'LOB Specific', 'IRRI', 'Group 1', 'High Risk Tier', '200'])
    if Segment_ID == 1:
        row.extend([
            '02', 'Profitability', 'CRS Risk Score', 'Group 1',
            'Mid Risk Tier', '300'
        ])
    if Segment_ID == 2:
        row.extend([
            '03', 'Geographical', 'Geo Risk', 'Group 2', 'Low Risk Tier', '400'
        ])
    if Segment_ID == 3:
        row.extend([
            '04', 'Behavioral', 'Financial Behavior Risk', 'Group 3',
            'Vertical Risk', '100'
        ])
    if Segment_ID == 4:
        row.extend([
            '05', 'Risk Tolerance', 'CM Risk', 'Group 4', 'Geographical Risk',
            '500'
        ])

    #Arms Manufacturer random choice
    hr0 = choice(Yes_No_Cust_Flag)
    #Auction random choice
    hr01 = choice(Yes_No_Cust_Flag)
    #Cash Intensive Business random choice
    hr02 = choice(Yes_No_Cust_Flag)
    #Casino Gambling random choice
    hr03 = choice(Yes_No_Cust_Flag)
    #Channel Onboarding random choice
    hr04 = choice(Channel_Onboarding)
    #Channel Ongoing Transactions random choice
    hr05 = choice(Channel_Ongoing_Transactions)
    #Add additional data elements to current csv row
    row.extend([hr0, hr01, hr02, hr03, hr04, hr05])

    #Randomly select whther customer has a High Net Worth
    HighNetWorthFlag = choice(HighNetWorth)
    #Randomly Generates customer net worth based on the above flag
    if HighNetWorthFlag == 'Yes':
        row.append(
            max(
                max((randrange(0, 101, 1) - 99), 0) *
                randrange(5000000, 25000000, 1),
                randrange(1000000, 5000000, 1)))
    else:
        flag = choice(LowNet)
        if flag == 0:
            row.append(randrange(-250000, 600000, 1))
        else:
            if flag == 1:
                row.append(randrange(149000, 151000, 1))
            else:
                row.append(randrange(40000, 50000, 1))
    #Add data elements to current csv row
    #Complex_HI_Vehicle random choice
    hr1 = choice(Yes_No_Cust_Flag)
    #Dealer_Precious_Metal random choice
    hr2 = choice(Yes_No_Cust_Flag)
    #Digital_PM_Operator random choice
    hr3 = choice(Yes_No_Cust_Flag)
    #Embassy_Consulate random choice
    hr4 = choice(Yes_No_Cust_Flag)
    #Exchange_Currency random choice
    hr5 = choice(Yes_No_Cust_Flag)
    #Foreign_Financial_Institution random choice
    hr6 = choice(Yes_No_Cust_Flag)
    #Foreign_Government random choice
    hr7 = choice(Yes_No_Cust_Flag)
    #Foreign_NonBank_Financial_Institution random choice
    hr8 = choice(Yes_No_Cust_Flag)
    #Internet_Gambling random choice
    hr9 = choice(Yes_No_Cust_Flag)
    #Medical_Marijuana_Dispensary random choice
    hr10 = choice(Yes_No_Cust_Flag)
    #Money_Service_Business random choice
    hr11 = choice(Yes_No_Cust_Flag)
    hr12 = choice(NAICS.NAICS_Code)
    #NonRegulated_Financial_Institution random choice
    hr13 = choice(Yes_No_Cust_Flag)
    #Not_Profit random choice
    hr14 = choice(Yes_No_Cust_Flag)
    #Occupation random choice
    #hr15=choice(Occupation)
    #Privately_ATM_Operator random choice
    hr16 = choice(Yes_No_Cust_Flag)
    #Products random choice
    hr17 = choice(Products)
    #Sales_Used_Vehicles random choice
    hr18 = choice(Yes_No_Cust_Flag)
    #Services random choice
    hr19 = choice(Services)
    #SIC_Code random choice
    hr20 = choice(SIC_Code)
    #Stock_Market_Listing random choice
    hr21 = choice(Stock_Market_Listing)
    #Third_Party_Payment_Processor random choice
    hr22 = choice(Yes_No_Cust_Flag)
    #Transacting_Provider random choice
    hr23 = choice(Yes_No_Cust_Flag)

    refrating = ['1'] * 3 + ['2', '4'] * 2 + ['3'] + ['5'] * 12
    if (PEP == 'Yes' or SAR == 'Yes' or lrg_cash_ex == 'Yes'
            or demarket == 'Yes' or hr0 == 'Yes' or hr01 == 'Yes'
            or hr02 == 'Yes' or hr03 == 'Yes' or hr1 == 'Yes' or hr2 == 'Yes'
            or hr3 == 'Yes' or hr4 == 'Yes' or hr5 == 'Yes' or hr6 == 'Yes'
            or hr7 == 'Yes' or hr8 == 'Yes' or hr9 == 'Yes' or hr10 == 'Yes'
            or hr11 == 'Yes' or hr13 == 'Yes' or hr14 == 'Yes' or hr16 == 'Yes'
            or hr17 == 'Yes' or hr18 == 'Yes' or hr22 == 'Yes' or hr23 == 'Yes'
            or HighNetWorthFlag == 'Yes'):
        high_risk = 'Yes'
        hr_rating = choice(refrating)

    if (high_risk == 'No'):
        if (max((randrange(0, 101, 1) - 99), 0) == 1):
            high_risk = 'Yes'
            hr_rating = choice(refrating)

    row.extend([
        hr1, hr2, hr3, hr4, hr5, hr6, hr7, hr8, hr9, hr10, hr11, hr12, hr13,
        hr14, hr16, hr17, hr18, hr19, hr20, hr21, hr22, hr23, HighNetWorthFlag,
        high_risk, hr_rating,
        choice(Use_Case)
    ])
    #End the current row
    return row