from utility.data_utility import loadData from utility.model_utility import buildNaiveBayesModel, buildSVCModel, buildLogRegressionModel import os import pickle SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(SCRIPT_DIR, "../../data") TRAINING_MAP_DIR = os.path.join(DATA_DIR, "training_maps") PATTERN_DIR = os.path.join(DATA_DIR, "patterns") seed_pattern_dict = readSeedPattern(PATTERN_DIR) generateData(TRAINING_MAP_DIR, seed_pattern_dict, number_per_pattern=200) # exit() dataOneLinePerMap = loadData(TRAINING_MAP_DIR) print(dataOneLinePerMap[0]) # First row print(dataOneLinePerMap[1]) # Second row tmp = [i[1] for i in dataOneLinePerMap] X = pd.DataFrame(tmp) # Array of maplines Y = np.array([i[0] for i in dataOneLinePerMap]) # model = buildNaiveBayesModel(X, Y) # model = buildSVCModel(X, Y) model = buildLogRegressionModel(X, Y) pickle.dump(model, open("model.p", "wb")) def do_train_split():
from utility.model_utility import buildNaiveBayesModel, buildSVCModel, buildLogRegressionModel import os import pickle SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(SCRIPT_DIR, "../../data") TRAINING_MAP_DIR = os.path.join(DATA_DIR,"training_maps") PATTERN_DIR = os.path.join(DATA_DIR,"patterns") seed_pattern_dict = readSeedPattern(PATTERN_DIR) generateData(TRAINING_MAP_DIR, seed_pattern_dict,number_per_pattern=200) # exit() dataOneLinePerMap = loadData(TRAINING_MAP_DIR) print(dataOneLinePerMap[0]) # First row print(dataOneLinePerMap[1]) # Second row tmp = [i[1] for i in dataOneLinePerMap] X = pd.DataFrame(tmp) # Array of maplines Y = np.array([i[0] for i in dataOneLinePerMap]) # model = buildNaiveBayesModel(X, Y) # model = buildSVCModel(X, Y) model = buildLogRegressionModel(X, Y) pickle.dump(model,open("model.p","wb")) def do_train_split(): from sklearn.cross_validation import train_test_split X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.20, random_state=4)
import pandas as pd import numpy as np from utility.data_utility import loadData dataOneLinePerMap = loadData() tmp = [i[1] for i in dataOneLinePerMap] X = pd.DataFrame(tmp) Y = np.array([i[0] for i in dataOneLinePerMap]) from sklearn.cluster import KMeans num_clusters = 2 km = KMeans(n_clusters=num_clusters) km.fit(X) print(km.n_clusters) clusters = km.labels_.tolist() print("Length of cluster variable= {0}".format(len(clusters))) data = {'pattern': Y, 'cluster': clusters} frame = pd.DataFrame(data, index=[Y], columns=['pattern', 'cluster']) print(frame)
import pandas as pd import numpy as np from utility.data_utility import loadData dataOneLinePerMap = loadData() tmp = [i[1] for i in dataOneLinePerMap] X = pd.DataFrame(tmp) Y = np.array([i[0] for i in dataOneLinePerMap]) from sklearn.cluster import KMeans num_clusters = 2 km = KMeans(n_clusters=num_clusters) km.fit(X) print(km.n_clusters) clusters = km.labels_.tolist() print("Length of cluster variable= {0}".format(len(clusters))) data = { 'pattern': Y, 'cluster': clusters} frame = pd.DataFrame(data, index = [Y] , columns = ['pattern', 'cluster']) print(frame)