Пример #1
0
from utility.data_utility import loadData
from utility.model_utility import buildNaiveBayesModel, buildSVCModel, buildLogRegressionModel

import os
import pickle

SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(SCRIPT_DIR, "../../data")
TRAINING_MAP_DIR = os.path.join(DATA_DIR, "training_maps")
PATTERN_DIR = os.path.join(DATA_DIR, "patterns")

seed_pattern_dict = readSeedPattern(PATTERN_DIR)
generateData(TRAINING_MAP_DIR, seed_pattern_dict, number_per_pattern=200)
# exit()

dataOneLinePerMap = loadData(TRAINING_MAP_DIR)
print(dataOneLinePerMap[0])  # First row
print(dataOneLinePerMap[1])  # Second row

tmp = [i[1] for i in dataOneLinePerMap]

X = pd.DataFrame(tmp)  # Array of maplines
Y = np.array([i[0] for i in dataOneLinePerMap])
# model = buildNaiveBayesModel(X, Y)
# model = buildSVCModel(X, Y)
model = buildLogRegressionModel(X, Y)

pickle.dump(model, open("model.p", "wb"))


def do_train_split():
Пример #2
0
from utility.model_utility import buildNaiveBayesModel, buildSVCModel, buildLogRegressionModel

import os
import pickle

SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(SCRIPT_DIR, "../../data")
TRAINING_MAP_DIR = os.path.join(DATA_DIR,"training_maps")
PATTERN_DIR = os.path.join(DATA_DIR,"patterns")

seed_pattern_dict = readSeedPattern(PATTERN_DIR)
generateData(TRAINING_MAP_DIR, seed_pattern_dict,number_per_pattern=200)
# exit()


dataOneLinePerMap = loadData(TRAINING_MAP_DIR)
print(dataOneLinePerMap[0]) # First row
print(dataOneLinePerMap[1]) # Second row

tmp = [i[1] for i in dataOneLinePerMap]

X = pd.DataFrame(tmp) # Array of maplines
Y = np.array([i[0] for i in dataOneLinePerMap])
# model = buildNaiveBayesModel(X, Y)
# model = buildSVCModel(X, Y)
model = buildLogRegressionModel(X, Y)

pickle.dump(model,open("model.p","wb"))
def do_train_split():
    from sklearn.cross_validation import train_test_split
    X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.20, random_state=4)
Пример #3
0
import pandas as pd
import numpy as np

from utility.data_utility import loadData

dataOneLinePerMap = loadData()

tmp = [i[1] for i in dataOneLinePerMap]
X = pd.DataFrame(tmp)
Y = np.array([i[0] for i in dataOneLinePerMap])

from sklearn.cluster import KMeans
num_clusters = 2
km = KMeans(n_clusters=num_clusters)
km.fit(X)
print(km.n_clusters)

clusters = km.labels_.tolist()
print("Length of cluster variable= {0}".format(len(clusters)))

data = {'pattern': Y, 'cluster': clusters}
frame = pd.DataFrame(data, index=[Y], columns=['pattern', 'cluster'])

print(frame)
Пример #4
0
import pandas as pd
import numpy as np

from utility.data_utility import loadData

dataOneLinePerMap = loadData()


tmp = [i[1] for i in dataOneLinePerMap]
X = pd.DataFrame(tmp)
Y = np.array([i[0] for i in dataOneLinePerMap])

from sklearn.cluster import KMeans
num_clusters = 2
km = KMeans(n_clusters=num_clusters)
km.fit(X)
print(km.n_clusters)

clusters = km.labels_.tolist()
print("Length of cluster variable= {0}".format(len(clusters)))

data = { 'pattern': Y, 'cluster': clusters}
frame = pd.DataFrame(data, index = [Y] , columns = ['pattern', 'cluster'])

print(frame)