Пример #1
0
def main():
	
	filename = "stars_data.csv"
	data = a.read_data(filename)
	data.pop(0)
	random.shuffle(data)
	frequency = a.frequency_word(data)
	
	data_neg = [x for x in data if int(x[6]) == 1]
	data_pos = [x for x in data if int(x[6]) == 5]

	matrix_pos = np.zeros((2000,2500))
	matrix_neg = np.zeros((2000,2500))
	matrix_pos = cluster.create_matrix(matrix_pos,data_pos,frequency)
	matrix_neg = cluster.create_matrix(matrix_neg,data_neg,frequency)
	
	kmeans_feature = cluster.kmeans_bin(data,matrix_pos,matrix_neg,frequency,50)
	smeans_feature = cluster.smeans_bin(data,matrix_pos,matrix_neg,frequency,50)	
	origin_feature = a.create_binary_feature(data,frequency,6)
	
	sample_origin_feature = a.create_binary_feature(data,random.sample(frequency,100),6)
	combine_feature = combine(kmeans_feature,sample_origin_feature)
	
	print "Test1"
	test1(matrix_pos,matrix_neg)
	print "Test2"
	test2(kmeans_feature,smeans_feature)
	print "Test3"
	test3(origin_feature,kmeans_feature)
	print "Test4"
	test4(sample_origin_feature,kmeans_feature,combine_feature)
Пример #2
0
def main(args):		
	#take argument
	trainfile = args[1]
	testfile = args[2]
	classlabel = int(args[3])
	printWord = int(args[4])
		
	
	#set train file an dtest file
	train = a.read_data(trainfile)
	test  = a.read_data(testfile)
	
	#get top 2000 frequency
	fre = a.frequency(train)
	
	
	#if yes, print Words
	if (printWord == 1):
		a.printTopwords(fre)
	
	#create binary feature for boss data
	train = a.create_binary_feature(train,fre,classlabel)
	test = a.create_binary_feature(test,fre,classlabel)
	
	#get probability table based on train data
	prob_table,pYes,pNo = a.train_nbc(train)
	
	#use probability table for testing,and return result
	result = a.test_nbc(prob_table,test,pYes,pNo)
	
	#get test class label
	classlabel = [x[-1] for x in test]
	
	
	#use zero one difference figure out result
	diff = a.zero_onr_loss(result,classlabel)
	
	print "ZERO-ONE-LOSS {0}".format(diff)
Пример #3
0
def main():
	
	#data preprocessing
	filename = "stars_data.csv"
	data = a.read_data(filename)
	data.pop(0)
	random.shuffle(data)
	words = a.frequency_word(data)	
	features = a.create_binary_feature(data,words,6)
	words.append("isPositive")
	words.append("isNegative")
	minsupport = 0.03
	minconf = 3.81
	
	L,support_count = apriori.frequentItemsetGeneration(features,words,minsupport)
	print len(L[0]) + len(L[1]) + len(L[2])
	rules = ruleG(L,support_count,minconf)
	print len(rules)
	rules = sorted(rules.items(),key=operator.itemgetter(1),reverse= True)
	rules = [rules[i] for i in range(30)]
	
	for rule in rules:
		print rule
Пример #4
0
def A_Cabins(filepath='Data_Input\cabins.csv'):
    """
    --------------------------------------------------------------------------
    Calculate cabin areas
    --------------------------------------------------------------------------
    Input:
    N_Trailers - int, number of trailers [-]
    L_Trailer - float, typical trailer length [m], default=12
    N_Cars - int, number of cars [-]
    L_Car - float, typical car length [m], default=4.5
    --------------------------------------------------------------------------
    Output:
    Trailer_Lane_Meters - int, length of trailer lanes [m]
    Car_Lane_Meters - int, length of car lanes [m]
    --------------------------------------------------------------------------
    """
    #Read cabin data
    dt = dpro.read_data(filepath=filepath, print_stats=False)
    #Manipulate the data
    dt['N_Berths'] = dt['N_Cabins'] * dt['Berths']
    dt['A_Cabins'] = dt['N_Cabins'] * dt['Area']
    A_Pas_Cabins = dt['A_Cabins'].sum()
    return A_Pas_Cabins
Пример #5
0
def main():
	
	#data preprocessing
	filename = "stars_data.csv"
	data = a.read_data(filename)
	data.pop(0)
	random.shuffle(data)
	words = a.frequency_word(data)	
	features = a.create_binary_feature(data,words,6)
	words.append("isPositive")
	words.append("isNegative")
	minsupport = 0.03
	minconf = 0.25
	
	D = construct(features,words)
	D = map(set, D)
	t = []
	t.append(frozenset(['friendly']))
	t.append(frozenset(['isPositive']))
	t.append(frozenset(['staff']))
	t.append(frozenset(['favorite']))
	
	q2(D,t)
	'''
Пример #6
0
start_time = time.time()

#%% Set global parameters
setfilters = True  #Set argument for running the filter
UpdateReport = False  #Update the report once the code has executed
MergeDatabase = False  #Update the database file from the database

#%% Read and analyze the data
#Merge the database files
if MergeDatabase:
    dp.merge_database(path='Ship_Database_Raw_Archives/',
                      ShipType='-',
                      ShipTypeTags='-')

#Read dataframe - database
ShipData = dp.read_data(print_summary=True)

#Aplly missing data fill filters
fill_filters = [['MCR', 85], ['L_LaneTrailer', 0], ['L_LaneTrailer(or)', 0],
                ['L_LaneCar', 0], ['L_LaneCar(or)', 0], ['L_TrainTrack', 0],
                ['N_Trailers', 0], ['N_Trailers(or)', 0], ['N_Cars', 0],
                ['N_Cars(or)', 0], ['L_Car', 4.5], ['L_Trailer', 12],
                ['L_TrainTrack', 0], ['LNG Tanks', 0]]
ShipData = dp.batch_fill_data(ShipData, fill_filters)

#Process the data
#ADD: Analyse the data for pass and vehicle area vs no decks
ShipData = dp.data_process(ShipData)

#Filter the data
if setfilters: