Пример #1
0
def run_ablation_svm(X, y, class_names, quality):
	"""
	Runs ablation tests on svm model (i.e. runs the SVM on only a subset
	of features to determine which features are more important than others)

	Args:
		X: 
			input examples
		y: 
			labels of examples
		class_names: 
			list of actual signs
		quality: 
			flag that determines the quality of the data being passed in

	Returns: 
		None
	"""
	feature_array = preprocessing.get_feature_list(quality)
	feature_remove_list = []
	for feature in feature_array: 
		feature_remove_list.append(feature)
		ablated_X1 = preprocessing.get_ablated_matrix(X, quality, [feature])

		if len(feature_remove_list) > 1 and len(feature_remove_list) < len(feature_array): 
			ablated_X2 = preprocessing.get_ablated_matrix(X, quality, feature_remove_list)
		#create splits
		X_train, y_train, X_test, y_test = preprocessing.create_train_test_split(ablated_X1, y, test_size=0.3, shuffle=True)	
		flattened_Xtrain = preprocessing.flatten_matrix(X_train)
		flattened_Xtest = preprocessing.flatten_matrix(X_test)	

		print "fitting svm on feature set without feature %s..." % feature 
		# fit svm model
		svm_model = svm.SVC(kernel="linear", decision_function_shape='ovr')
		svm_model.fit(flattened_Xtrain, y_train)
		y_predict_train = svm_model.predict(flattened_Xtrain)
		y_predict = svm_model.predict(flattened_Xtest)
		
		print "running analyses on feature set without feature %s..." % feature
		analysis.run_analyses(y_predict_train, y_train, y_predict, y_test, class_names, True)

		#true ablation tests
		if len(feature_remove_list) > 1 and len(feature_remove_list) < len(feature_array): 
			X_train, y_train, X_test, y_test = preprocessing.create_train_test_split(ablated_X2, y, test_size=0.3, shuffle=False)	
			flattened_Xtrain = preprocessing.flatten_matrix(X_train)
			flattened_Xtest = preprocessing.flatten_matrix(X_test)	

			print "fitting svm on feature set without the features %s..." % ", ".join(feature_remove_list) 
			# fit svm model
			svm_model = svm.SVC(kernel="linear", decision_function_shape='ovr')
			svm_model.fit(flattened_Xtrain, y_train)
			y_predict_train = svm_model.predict(flattened_Xtrain)
			y_predict = svm_model.predict(flattened_Xtest)
		
			print "running analyses on feature set without the features %s..." % ", ".join(feature_remove_list)
			analysis.run_analyses(y_predict_train, y_train, y_predict, y_test, class_names, True)
Пример #2
0
def run_logreg(quality='high'):
    """
	Runs a simple logistic regression model; first fits the model
	on the training data (70 percent of the total data) and tests on 
	the rest of the data.

	Args:
		none

	Returns:
		none
	"""

    data = io.load_data(quality=quality)
    X, y, class_names = preprocessing.create_data_tensor(data)
    X_train, y_train, X_test, y_test = preprocessing.create_train_test_split(
        X, y, test_size=0.3, shuffle=True)

    # flatten data
    flattened_Xtrain = preprocessing.flatten_matrix(X_train)
    flattened_Xtest = preprocessing.flatten_matrix(X_test)

    # fit logistic regression model
    logreg_model = linear_model.LogisticRegression(multi_class='ovr')
    logreg_model.fit(flattened_Xtrain, y_train)
    y_predict_train = logreg_model.predict(flattened_Xtrain)
    y_predict = logreg_model.predict(flattened_Xtest)

    # print metrics and confusion plot
    analysis.run_analyses(y_predict_train, y_train, y_predict, y_test,
                          class_names)
Пример #3
0
def run_svm(quality="high", ablation=False, concat=True):
	"""
	Runs a simple SVM model with a linear kernel; first fits the model
	on the training data (70 percent of the total data) and tests on 
	the rest of the data.

	Args:
		None

	Returns: 
		None
	"""
	data = io.load_data(quality=quality)
	X, y, class_names = preprocessing.create_data_tensor(data)	
	if ablation: 
		run_ablation_svm(X, y, class_names, quality)
		return 
	X_train, y_train, X_test, y_test = preprocessing.create_train_test_split(X, y, test_size=0.3, shuffle=False)

	flattened_Xtrain = preprocessing.flatten_matrix(X_train)
	flattened_Xtest = preprocessing.flatten_matrix(X_test)	

	if concat:
		X_train_400 = np.load('../data/seq_mining_features/k_2-w_2/X_train-400.npy')
		X_test_400 = np.load('../data/seq_mining_features/k_2-w_2/X_test-400.npy')
		y_train_400 = np.load('../data/seq_mining_features/k_2-w_2/y_train-400.npy')
		y_test_400 = np.load('../data/seq_mining_features/k_2-w_2/y_test-400.npy')

		print X_train_400.shape, y_train_400.shape
		print flattened_Xtrain.shape, y_train.shape
		print '-----------'
		print X_test_400.shape, y_test_400.shape
		print flattened_Xtest.shape, y_test.shape

		flattened_Xtrain_concatenated = np.hstack((flattened_Xtrain, X_train_400[:, 0:75]))
		flattened_Xtest_concatenated = np.hstack((flattened_Xtest, X_test_400[:, 0:75]))

		print '-----------'
		print flattened_Xtrain.shape, y_train.shape
		print flattened_Xtest.shape, y_test.shape

	C = 0.01
	# fit svm model
	svm_model = svm.SVC(kernel="linear", C=C, decision_function_shape='ovr')
	svm_model.fit(flattened_Xtrain, y_train)
	y_predict_train = svm_model.predict(flattened_Xtrain)
	y_predict = svm_model.predict(flattened_Xtest)

	analysis.run_analyses(y_predict_train, y_train, y_predict, y_test, class_names, ablation=False, confusion=False)

	print '-----------==================-----------'
	svm_model = svm.SVC(kernel="linear", C=C, decision_function_shape='ovr')
	svm_model.fit(flattened_Xtrain_concatenated, y_train)
	y_predict_train = svm_model.predict(flattened_Xtrain_concatenated)
	y_predict = svm_model.predict(flattened_Xtest_concatenated)

	analysis.run_analyses(y_predict_train, y_train, y_predict, y_test, class_names, ablation=False, confusion=False)
	print '-----------==================-----------'
Пример #4
0
def main():
    data = io.load_data(quality="low")
    X, y, class_names = preprocessing.create_data_tensor(data)
    X_train, y_train, X_test, y_test = preprocessing.create_train_test_split(
        X, y)
    X = preprocessing.scale_spatially(X)

    # for i in class_names:
    # 	# print i
    # 	# print class_names[i]
    # 	# print np.where(y==i)
    # 	# print np.where(y==i)[0]
    # 	# print np.where(y==i)[0].size
    # 	print class_names[i], i, np.where(y==i)[0].size

    # analysis.plot_signals_two_column(data[class_names[y[0]]][0][:, 0:3],
    # 								 X[0, 0:3, :].T,
    # 									['Raw X', 'Raw Y', 'Raw Z'],
    # 									['Resampled X', 'Resampled Y', 'Resampled Z'])

    shop_idx = np.where(y == 3)[0]
    shop_idx = shop_idx[0:6]

    # print X[shop_idx, 0, :].shape
    NUM = 3
    C1 = 0
    C2 = 3
    d1 = X[np.where(y == C1)[0][0:NUM], 2, :].T
    d2 = X[np.where(y == C2)[0][0:NUM], 2, :].T

    d1p = np.roll(d1, -1, 0) - d1
    # d1p = d1p[0:d1.shape[0]-1]
    d1p[-1, :] = d1p[-2, :]

    d2p = np.roll(d2, -1, 0) - d2
    d2p[-1, :] = d2p[-2, :]

    labels1 = [str(class_names[C1]) + ' ' + str(i) for i in xrange(NUM)] + [
        str(class_names[C1]) + '\' ' + str(i) for i in xrange(NUM)
    ]
    labels2 = [str(class_names[C2]) + ' ' + str(i) for i in xrange(NUM)] + [
        str(class_names[C2]) + '\' ' + str(i) for i in xrange(NUM)
    ]

    print d1.shape, d1p.shape
    print d2.shape, d2p.shape

    print np.concatenate((d1, d1p), 1).shape

    # analysis.plot_signals_two_column(np.concatenate((d1, d1p), 1),
    # 								 np.concatenate((d2, d2p), 1),
    # 								labels1,
    # 								labels2)

    print class_names[4]
    d2 = X[np.where(y == 4)[0][0:2], 2, :].T

    analysis.plot_signals(d2, labels2)
Пример #5
0
def run_nn(quality='low'):
    """
    Runs a simple neural network model; first fits the model
    on the training data (70 percent of the total data) and tests on 
    the rest of the data.

    Args:
        none

    Returns:
        none
    """

    data = io.load_data(quality=quality)
    X, y, class_names = preprocessing.create_data_tensor(data)
    X_train, y_train, X_test, y_test = preprocessing.create_train_test_split(
        X, y, test_size=0.3, shuffle=False)

    y_train_one_hot = np.zeros((y_train.shape[0], len(class_names)))
    for i in range(y_train.shape[0]):
        y_train_one_hot[i, y_train[i]] = 1

    y_test_one_hot = np.zeros((y_test.shape[0], len(class_names)))
    for i in range(y_test.shape[0]):
        y_test_one_hot[i, y_test[i]] = 1

    # flatten data
    flattened_Xtrain = preprocessing.flatten_matrix(X_train)
    flattened_Xtest = preprocessing.flatten_matrix(X_test)

    # fit neural network model
    HIDDEN_LAYER_SIZE = y_train_one_hot.shape[1]
    # HIDDEN_LAYER_SIZE = y_train_one_hot.shape[1]*2

    ############ FAILED ##################
    # HIDDEN_LAYER_SIZE = X_train.shape[1]
    #
    ######################################
    nn_model = Sequential()

    ####################################### MODELS #######################################

    ####################################### MODEL1 #######################################
    # Training Error: 0.517945411562
    # Testing Error: 0.778446115288
    # Epochs: 200
    #
    nn_model.add(
        Dense(HIDDEN_LAYER_SIZE,
              input_dim=flattened_Xtrain.shape[1],
              init='uniform',
              activation="tanh"))
    nn_model.add(Dropout(0.2))
    nn_model.add(
        Dense(y_train_one_hot.shape[1], init='uniform', activation="tanh"))
    N_EPOCHS = 200
    ####################################### MODEL1 #######################################

    ####################################### MODEL2 #######################################
    # Training Error: 0.
    # Testing Error: 0.
    #
    # nn_model.add(Dense(HIDDEN_LAYER_SIZE, input_dim=flattened_Xtrain.shape[1], init='uniform', activation="tanh"))
    # nn_model.add(Dropout(0.2))
    # nn_model.add(Dense(HIDDEN_LAYER_SIZE*10, activation="tanh", init='uniform'))
    # nn_model.add(Dropout(0.3))
    # nn_model.add(Dense(HIDDEN_LAYER_SIZE*2, activation="tanh", init='uniform'))
    # nn_model.add(Dense(y_train_one_hot.shape[1], init='uniform', activation="tanh"))
    ####################################### MODEL2 #######################################

    sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    nn_model.compile(loss='mean_squared_error', optimizer=sgd)

    nn_model.fit(flattened_Xtrain, y_train_one_hot, nb_epoch=N_EPOCHS)
    y_predict_train = nn_model.predict_classes(flattened_Xtrain)
    y_predict = nn_model.predict_classes(flattened_Xtest)
    # y_predict_one_hot = nn_model.predict(flattened_Xtest)

    # print metrics and confusion plot
    analysis.run_analyses(y_predict_train, y_train, y_predict, y_test,
                          class_names)
Пример #6
0
def seg_mining(use_all_signs):
    # Loading data
    data = io.load_data(quality="low")
    X, y, class_names = preprocessing.create_data_tensor(data)
    X_train, y_train, X_test, y_test = preprocessing.create_train_test_split(
        X, y, test_size=0.3, shuffle=False)

    if not use_all_signs:
        # TODO: change not break
        X_train = preprocessing.scale_spatially(
            X_train)[:NUM_SIGNS * EXAMPLES_PER_SIGN, :NUM_SIGNALS, :]
        y_train = y_train[:NUM_SIGNS * EXAMPLES_PER_SIGN]
        X_test = preprocessing.scale_spatially(
            X_test)[:NUM_SIGNS * (70 - EXAMPLES_PER_SIGN), :NUM_SIGNALS, :]
        y_test = y_test[:NUM_SIGNS * (70 - EXAMPLES_PER_SIGN)]
    else:
        X_train = preprocessing.scale_spatially(X_train)
        X_test = preprocessing.scale_spatially(X_test)

    # print X.shape
    # sys.exit()
    # Computing fake slopes
    dX_train = np.roll(X_train, -1, 2) - X_train
    dX_train[:, :, -1] = dX_train[:, :, -2]
    dX_test = np.roll(X_test, -1, 2) - X_test
    dX_test[:, :, -1] = dX_test[:, :, -2]

    combined_trends, combined_trends_interval_start, combined_trends_interval_end = create_combined_trends(
        dX_train)
    combined_trends_test, combined_trends_interval_start_test, combined_trends_interval_end_test = create_combined_trends(
        dX_test)

    # Computing trends
    # binary_I_idx_train = (INCREASING_THRESHOLD >= dX_train) & (dX_train > STEADY_THRESHOLD)
    # I_idx_train = np.where(binary_I_idx_train == 1)
    # binary_VI_idx_train = (VERY_INCREASING_THRESHOLD >= dX_train) & (dX_train > INCREASING_THRESHOLD)
    # VI_idx_train = np.where(binary_VI_idx_train == 1)
    # WI_idx_train = np.where(dX_train > VERY_INCREASING_THRESHOLD)
    # binary_D_idx_train = (-1*INCREASING_THRESHOLD <= dX_train) & (dX_train < -1*STEADY_THRESHOLD)
    # D_idx_train = np.where(binary_D_idx_train == 1)
    # binary_VD_idx_train = (-1*VERY_INCREASING_THRESHOLD <= dX_train) & (dX_train < -1*INCREASING_THRESHOLD)
    # VD_idx_train = np.where(binary_VD_idx_train == 1)
    # WD_idx_train = np.where(dX_train < -1*VERY_INCREASING_THRESHOLD)
    # S_idx_train = np.where(np.abs(dX_train) <= STEADY_THRESHOLD)

    # trends = np.zeros(dX_train.shape, dtype=np.int8)
    # trends[I_idx_train] = get_trend_idx_train('I')
    # trends[VI_idx_train] = get_trend_idx_train('VI')
    # trends[WI_idx_train] = get_trend_idx_train('WI')
    # trends[S_idx_train] = get_trend_idx_train('S')
    # trends[D_idx_train] = get_trend_idx_train('D')
    # trends[VD_idx_train] = get_trend_idx_train('VD')
    # trends[WD_idx_train] = get_trend_idx_train('WD')

    # I_idx_train = np.where(dX_train > STEADY_THRESHOLD)
    # S_idx_train = np.where(np.abs(dX_train) <= STEADY_THRESHOLD)
    # D_idx_train = np.where(dX_train < -1*STEADY_THRESHOLD)

    # trends = np.zeros(dX_train.shape, dtype=np.int8)
    # trends[I_idx_train] = get_trend_idx('I')
    # trends[S_idx_train] = get_trend_idx('S')
    # trends[D_idx_train] = get_trend_idx('D')

    # # print dX_train.shape, np.prod(dX_train.shape)
    # # print I_idx_train[0].shape, S_idx_train[0].shape, D_idx_train[0].shape
    # # print np.where(dX_train > STEADY_THRESHOLD)
    # # print np.where(trends == 0)[0].shape, np.where(trends == 1)[0].shape, np.where(trends == 2)[0].shape

    # # Combine trends
    # # Intervals are inclusive in nature
    # combined_trends = np.ones(trends.shape, dtype=np.int8) * get_trend_idx('useless')
    # combined_trends_interval_start = np.ones(trends.shape, dtype=np.int8) * get_trend_idx('useless')
    # combined_trends_interval_end = np.ones(trends.shape, dtype=np.int8) * get_trend_idx('useless')

    # for e in xrange(combined_trends.shape[0]):
    # 	for f in xrange(combined_trends.shape[1]):
    # 		combined_trend_idx = 0
    # 		combined_trends[e,f,0] = trends[e,f,0]
    # 		combined_trends_interval_start[e,f,0] = 0
    # 		for t in xrange(1, combined_trends.shape[2]):
    # 			if trends[e,f,t] != trends[e,f,t-1]:
    # 				# Set End time for previous trend
    # 				combined_trends_interval_end[e,f,combined_trend_idx] = t

    # 				# Start next trend
    # 				combined_trend_idx += 1
    # 				combined_trends[e,f,combined_trend_idx] = trends[e,f,t]
    # 				combined_trends_interval_start[e,f,combined_trend_idx] = t

    # print trends[5, 3, :]
    # print combined_trends[5, 3, :]
    # print combined_trends_interval_start[5, 3, :]
    # print combined_trends_interval_end[5, 3, :]
    # NUM_SIGN = 3
    # NUM_EXAMPLES = 70
    # idx = NUM_SIGN*NUM_EXAMPLES
    # print trends[np.where(y==3)[0][0],3,:]
    # print combined_trends[np.where(y==3)[0][0],3,:]
    # print combined_trends_interval_start[np.where(y==3)[0][0],3,:]
    # print combined_trends_interval_end[np.where(y==3)[0][0],3,:]

    # support(combined_trends, None, 10)
    patterns = [None] * NUM_SIGNS
    pattern_counts = Counter()
    all_pattern_supports = [None] * NUM_SIGNS
    for i in xrange(NUM_SIGNS):
        print ''
        print 'For sign %d...' % i
        print 'Generating one patterns.....'
        one_patterns = generate_one_pattern(combined_trends)
        # support((combined_trends, combined_trends_interval_start, combined_trends_interval_end),
        # 	'S:1-o;S:2-b;D:1-o;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1-b;D:1',
        # 	w=5, minsup=20)
        # sys.exit()

        k = 2
        total_time = 0
        patterns[i] = [one_patterns]
        all_pattern_supports[i] = [None]
        while True and k <= K:
            # generate_k_patterns
            print 'Generating patterns...'

            generation_start_time = time.time()
            total_time = 0
            new_patterns, total_time = generate_k_patterns(
                patterns[i][k - 2], k, total_time)

            generation_end_time = time.time()
            print "k:", k, " -> New patterns:", len(new_patterns)

            print "Total time for generation: ", (generation_end_time -
                                                  generation_start_time)
            print "Total time for comparison: ", (total_time)

            if len(new_patterns) == 0: break
            # prune_patterns
            print 'Pruning patterns...'
            combined_trends_sign = combined_trends[i *
                                                   EXAMPLES_PER_SIGN:(i + 1) *
                                                   EXAMPLES_PER_SIGN, :, :]
            combined_trends_interval_start_sign = combined_trends_interval_start[
                i * EXAMPLES_PER_SIGN:(i + 1) * EXAMPLES_PER_SIGN, :, :]
            combined_trends_interval_end_sign = combined_trends_interval_end[
                i * EXAMPLES_PER_SIGN:(i + 1) * EXAMPLES_PER_SIGN, :, :]

            pruned_patterns, pattern_supports = prune_patterns(
                (combined_trends_sign, combined_trends_interval_start_sign,
                 combined_trends_interval_end_sign), new_patterns, MIN_SUPPORT,
                WINDOW_SIZE)

            all_pattern_supports[i].append(pattern_supports)
            for pattern in pattern_supports:
                pattern_counts[pattern] += 1

            print "Pruned Patterns:", len(pruned_patterns)

            # increment k
            k += 1
            # If no k patterns, break
            if len(pruned_patterns) == 0:
                break
            #print pruned_patterns.keys()[0]

            patterns[i].append(pruned_patterns)
            # patterns.append(new_patterns)

        # print "pruned patterns: "
        # print patterns[i]
        # print all_pattern_supports[i]

    ranked_patterns = chi_square(patterns, pattern_counts,
                                 all_pattern_supports)[:NUM_PATTERN_FEATURES]

    # cut some ranked_patterns out

    X_train_new = construct_feature_vectors(ranked_patterns, \
     (combined_trends, combined_trends_interval_start, combined_trends_interval_end), EXAMPLES_PER_SIGN)
    X_test_new = construct_feature_vectors(ranked_patterns, \
     (combined_trends_test, combined_trends_interval_start_test, combined_trends_interval_end_test), 70 - EXAMPLES_PER_SIGN)

    # print ranked_patterns[:NUM_PATTERN_FEATURES]
    # print len(ranked_patterns)

    # print X_train_new
    # print y_train

    svm_model = svm.SVC(C=.1, kernel="linear", decision_function_shape='ovr')
    svm_model.fit(X_train_new, y_train)
    y_predict_train = svm_model.predict(X_train_new)
    y_predict = svm_model.predict(X_test_new)

    analysis.run_analyses(y_predict_train, y_train, y_predict, y_test,
                          class_names)
Пример #7
0
def seg_mining(use_all_signs):
	# Loading data
	data = io.load_data(quality="low")
	X, y, class_names = preprocessing.create_data_tensor(data)	
	X_train, y_train, X_test, y_test = preprocessing.create_train_test_split(X, y, test_size=0.3, shuffle=False)

	# for class_idx in np.unique(y):
	# 	print class_names[class_idx], np.where(class_idx == y)[0].shape[0]
	# import sys
	# sys.exit(1)

	if not use_all_signs: 
		# TODO: change not break
		X_train = preprocessing.scale_spatially(X_train)[:NUM_SIGNS * EXAMPLES_PER_SIGN,:NUM_SIGNALS,:]
		y_train = y_train[:NUM_SIGNS * EXAMPLES_PER_SIGN]
		X_test = preprocessing.scale_spatially(X_test)[:NUM_SIGNS * (70 - EXAMPLES_PER_SIGN),:NUM_SIGNALS,:]
		y_test = y_test[:NUM_SIGNS * (70 - EXAMPLES_PER_SIGN)]
	else:  
		X_train = preprocessing.scale_spatially(X_train)
		X_test = preprocessing.scale_spatially(X_test)

	# Computing fake slopes
	dX_train = X_train 
	dX_test = X_test 

	# dX_train = np.roll(X_train, -1, 2) - X_train
	# dX_train[:, :, -1] = dX_train[:, :, -2]
	# dX_test = np.roll(X_test, -1, 2) - X_test
	# dX_test[:, :, -1] = dX_test[:, :, -2]

	combined_trends, combined_trends_interval_start, combined_trends_interval_end = sq.create_combined_trends_discritized(dX_train)
	combined_trends_test, combined_trends_interval_start_test, combined_trends_interval_end_test = sq.create_combined_trends_discritized(dX_test)

	patterns = [None] * NUM_SIGNS
	pattern_counts = Counter()
	all_pattern_supports = [None] * NUM_SIGNS

	threads = []
	for i in xrange(NUM_SIGNS):
		process_sign(i, combined_trends, combined_trends_interval_start, combined_trends_interval_end,  patterns, pattern_counts, all_pattern_supports)
	# 	thread = Thread(target=process_sign, args=(i, combined_trends, combined_trends_interval_start, combined_trends_interval_end,  patterns, pattern_counts, all_pattern_supports))
	# 	thread.start()
	# 	threads.append(thread)
	
	# for thread in threads:
	# 	thread.join()

		# process_sign(i, combined_trends, combined_trends_interval_start, combined_trends_interval_end, patterns, pattern_counts, all_pattern_supports)
		# for pattern_supports in all_pattern_supports[i]:
		# 	if pattern_supports is not None:
		# 		for pattern in pattern_supports:
		# 			pattern_counts[pattern] += 1

	# cut some ranked_patterns out
	print 'Classes',len(patterns), len(all_pattern_supports)
	print 'Iterations',len(patterns[1]), len(all_pattern_supports[1])
	# print 'patterns in iteration 1 ',len(patterns[1][0]), len(all_pattern_supports[1][0])
	
	ranked_patterns_all = sq.chi_square(patterns, pattern_counts, all_pattern_supports)

	# NUM_PATTERN_FEATURES_ARRAY = [75, 100, 200, 300, 400, 500, 750, 1000, 1500]
	# NUM_PATTERN_FEATURES_ARRAY = [400, 500, 750, 1000, 1500, 2000, 2500]
	NUM_PATTERN_FEATURES_ARRAY = [NUM_PATTERN_FEATURES]
	for num_pattern_features in NUM_PATTERN_FEATURES_ARRAY:
		print '---------------------=================---------------------'
		ranked_patterns = ranked_patterns_all[:num_pattern_features]
		print len(ranked_patterns_all),'->',len(ranked_patterns)
		
		X_train_new = sq.construct_feature_vectors(ranked_patterns, \
			(combined_trends, combined_trends_interval_start, combined_trends_interval_end), EXAMPLES_PER_SIGN)
		X_test_new = sq.construct_feature_vectors(ranked_patterns, \
			(combined_trends_test, combined_trends_interval_start_test, combined_trends_interval_end_test), 70 - EXAMPLES_PER_SIGN)

		svm_model = svm.SVC(C=PENALTY, kernel="linear", decision_function_shape='ovr')
		svm_model.fit(X_train_new, y_train)
		y_predict_train = svm_model.predict(X_train_new)
		y_predict = svm_model.predict(X_test_new)

		# np.save('../data/seq_mining_features/X_train-%d.npy'%num_pattern_features, X_train_new)
		# np.save('../data/seq_mining_features/X_test-%d.npy'%num_pattern_features, X_test_new)
		# np.save('../data/seq_mining_features/y_train-%d.npy'%num_pattern_features, y_train)
		# np.save('../data/seq_mining_features/y_test-%d.npy'%num_pattern_features, y_test)

		analysis.run_analyses(y_predict_train, y_train, y_predict, y_test, class_names)
		print 'STEADY_THRESHOLD',STEADY_THRESHOLD
		print 'WINDOW_SIZE',WINDOW_SIZE
		print 'MIN_SUPPORT',MIN_SUPPORT
		print 'K',K
		print 'NUM_PATTERN_FEATURES',num_pattern_features
		print 'NUM_SIGNS',NUM_SIGNS
		print 'NUM_SIGNALS',NUM_SIGNALS
		print 'EXAMPLES_PER_SIGN',EXAMPLES_PER_SIGN
		print 'PENALTY',PENALTY
	print '---------------------=================---------------------'
Пример #8
0
def run_nn(quality='low'):
    """
	Runs a simple neural network model; first fits the model
	on the training data (70 percent of the total data) and tests on 
	the rest of the data.

	Args:
		none

	Returns:
		none
	"""

    data = io.load_data(quality=quality)
    X, y, class_names = preprocessing.create_data_tensor(data)
    X_train, y_train, X_test, y_test = preprocessing.create_train_test_split(
        X, y, test_size=0.3, shuffle=False)

    y_train_one_hot = np.zeros((y_train.shape[0], len(class_names)))
    for i in range(y_train.shape[0]):
        y_train_one_hot[i, y_train[i]] = 1

    y_test_one_hot = np.zeros((y_test.shape[0], len(class_names)))
    for i in range(y_test.shape[0]):
        y_test_one_hot[i, y_test[i]] = 1

    # flip last two axes
    # tensor: samples x features x time
    # tensor: samples x time x features
    X_train = np.swapaxes(X_train, 1, 2)
    X_test = np.swapaxes(X_test, 1, 2)

    print X_train.shape
    print y_train_one_hot.shape

    lstm_model = Sequential()

    ############################## MODEL 1 ##############################
    # Average Precision: 0.109463986213
    # Average Recall: 0.0912280701754
    # Average F1: 0.0664461045921
    # Training Error: 0.852201933405
    # Testing Error: 0.908771929825
    # HIDDEN_LAYER = 300
    # lstm_model.add(LSTM(output_dim=HIDDEN_LAYER,
    # 					input_dim=X_train.shape[2],
    # 					activation='tanh',
    # 					inner_activation='hard_sigmoid',
    # 					return_sequences=False))
    # lstm_model.add(Dense(y_train_one_hot.shape[1], activation='tanh'))
    # lstm_model.compile(loss='mean_squared_error', optimizer='rmsprop')
    # lstm_model.fit(X_train, y_train_one_hot, batch_size=16, nb_epoch=10)
    ############################## MODEL 1 ##############################
    #
    ############################### MODEL 2 ##############################
    # Average Precision: 0.109463986213
    # Average Recall: 0.0912280701754
    # Average F1: 0.0664461045921
    # Training Error: 0.852201933405
    # Testing Error: 0.908771929825
    HIDDEN_LAYER = 300
    lstm_model.add(
        LSTM(output_dim=HIDDEN_LAYER,
             input_dim=X_train.shape[2],
             activation='tanh',
             inner_activation='hard_sigmoid',
             return_sequences=False))

    lstm_model.add(
        LSTM(HIDDEN_LAYER,
             activation='tanh',
             inner_activation='hard_sigmoid',
             return_sequences=False))
    lstm_model.add(Dense(y_train_one_hot.shape[1], activation='tanh'))
    lstm_model.compile(loss='mean_squared_error', optimizer='rmsprop')
    lstm_model.fit(X_train, y_train_one_hot, batch_size=16, nb_epoch=10)
    ############################## MODEL 2 ##############################
    # lstm_model.add(Dropout(0.5))
    # lstm_model.add(Dense(95))
    # lstm_model.add(Activation('softmax'))

    # score = model.evaluate(X_test, Y_test, batch_size=16)

    y_predict_train = lstm_model.predict_classes(X_train)
    y_predict = lstm_model.predict_classes(X_test)
    # y_predict_one_hot = nn_model.predict(flattened_Xtest)

    # print y_predict
    # print y_predict_one_hot

    # print metrics and confusion plot
    analysis.run_analyses(y_predict_train, y_train, y_predict, y_test,
                          class_names, False, False)

    print y_predict