Exemplo n.º 1
0
def main():
    currDir = os.getcwd()
    path = os.path.join(currDir, '2017.csv')

    input, output = readData(path, 'Economy..GDP.per.Capita.', 'Freedom',
                             'Happiness.Score')

    # split in 80/20 percent
    np.random.seed(5)
    indexes = [i for i in range(len(input))]
    trainSample = np.random.choice(indexes,
                                   int(0.8 * len(input)),
                                   replace=False)
    testSample = [i for i in indexes if not i in trainSample]

    trainInputs = [input[i] for i in trainSample]
    trainOutputs = [output[i] for i in trainSample]

    testInputs = [input[i] for i in testSample]
    testOutputs = [output[i] for i in testSample]

    print('=== SKLEARN MODEL ===')
    tool_regressor = tool_regression(trainInputs, trainOutputs)
    # print('Tool predict ' + str(tool_regressor.predict(testInputs)))
    print('\n\n=== MY MODEL ===')
    manual_regressor = manual_regression(trainInputs, trainOutputs)
    # print('Manual predict ' + str(manual_regressor.predict(testInputs)))

    print('\n\n===Performance===')
    print('Tool prediction error:   ',
          mean_squared_error(testOutputs, tool_regressor.predict(testInputs)))
    print('Manual prediction error: ',
          meanSquareError(manual_regressor, testInputs, testOutputs))

    plotDataHistogram([input[i][0] for i in range(0, len(trainInputs))],
                      'capita GDP')
    plotDataHistogram([input[i][1] for i in range(0, len(trainInputs))],
                      'freedom')
    plotDataHistogram(trainOutputs, 'Happiness score')

    plotData([trainInputs[i][0] for i in range(0, len(trainInputs))],
             [trainInputs[i][1] for i in range(0, len(trainOutputs))],
             trainOutputs, [
                 manual_regressor.intercept_, manual_regressor.coef_[0],
                 manual_regressor.coef_[1]
             ], 'TRAIN BASED ON LEARNT MODEL')

    plotData([testInputs[i][0] for i in range(0, len(testInputs))],
             [testInputs[i][1] for i in range(0, len(testInputs))],
             testOutputs, [
                 manual_regressor.intercept_, manual_regressor.coef_[0],
                 manual_regressor.coef_[1]
             ], 'TEST BASED ON LEARNT MODEL')

    predictedPlot([testInputs[i][0] for i in range(0, len(testInputs))],
                  [testInputs[i][1]
                   for i in range(0, len(testInputs))], testOutputs,
                  manual_regressor.predict(testInputs),
                  'PREDICTED BASED ON LEARNT MODEL')
def main():
	extractData = False
	helper = helperClass.Helper()
	path_to_training_directory = "data/Train"
	path_to_testing_directory = "data/Test"
	path_to_training_labels = "data/Train/GroundTruth/groundTruth.txt"

	if(extractData):
		truths = open(path_to_training_labels, "r").read().split("\n")

		print "Extracting user data..."

		userData = []
		for i in range(1, len(truths)):
			userData.append(reader.readData(i, helper, path_to_training_directory))
			sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i)/(len(truths)-1)*100), i, len(truths)-1))
			sys.stdout.flush()
		print "\r"

		pickle.dump(userData, open("userData.pkl", "wb"))
	else:
		userData = pickle.load(open("userData.pkl", "rb"))

	labelVectors = helper.getLabelVectors(path_to_training_labels)
	print str(len(labelVectors))+" label vectors created"

	allWords = set()
	userWords = {}
	
	print "Extracting unique words from user data..."
	for i in range(0, len(userData)):
		userWords[i] = helper.getUserWords(userData[i])
		allWords = allWords.union(userWords[i])
		sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData)))
		sys.stdout.flush()

	print "\n"+str(len(allWords))+" unique words found.\n"
	
	helper.setFeatureList(sorted(allWords))

	featureVectors = []
	print "Generating feature vectors..."

	for i in range(0, len(userData)):
		featureVectors.append(helper.getFeatureVector(userWords[i]))
		sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData)))
		sys.stdout.flush()

	print "\r"
Exemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("data",
                        help="File of measure class pairs to test.",
                        type=str)
    parser.add_argument("dimen",
                        help="Tuple representing measure space.",
                        type=str)
    parser.add_argument(
        "priors",
        help="File designating prior probabilities of classes.",
        type=str)
    parser.add_argument(
        "conditionals",
        help="File designating class conditional probabilities.",
        type=str)
    parser.add_argument(
        "--eGain",
        "-e",
        help=
        "Economic gain matrix for data. If not provided assumes identity matrix.",
        type=str)
    parser.add_argument(
        "--vFolds",
        "-v",
        help=
        "Number of v-fold partitions for testing. If not provided, assumes all data is for testing.",
        type=int)
    args = parser.parse_args()

    # Reading data
    dimen = eval(args.dimen)
    measures, tags = reader.readData(args.data, dimen)
    priors = reader.readPriors(args.priors)
    conds = reader.readCCP(args.conditionals)
    e = False
    if args.eGain:
        e = reader.readGain(args.eGain)

    classifier = BayesClassifier(priors, conds, eGain=e)

    expGain = test(classifier, measures, tags, V=args.vFolds)

    print("The expected gain for the data is: {}".format(expGain))
Exemplo n.º 4
0
import os
import sys
import reader
import matplotlib.pyplot as plt

#Getting file name of the data
fName = sys.argv[1]
print fName

#using a reader module to grab the data and put them into lists.
#grabbing the measured data and the correctness data.
data = reader.readData(fName)
measured = data[0]
correct = data[2]

#Creating lists for the false positive rate and the true postive rate.
fpr = list()
tpr = list()

#sorting them in decending order using zip.
#First pairs them, then sorts then unpacks them into two lists.
measured, correct = (list(t) for t in (zip(
    *sorted(zip(measured, correct), reverse=True))))

#counting the total number of correct and incorrect readings.
totalCorrect = correct.count('Y')
totalIncorrect = len(correct) - totalCorrect

#Accounting for if there are no correct readings, or incorrect readings
if (totalCorrect == 0):
    totalCorrect = 1
Exemplo n.º 5
0
import evaluator as ev


def printSol(file):
    sol = np.load(file)
    print("Score:", sol["score"])
    print("Permutation:")
    print(' '.join(map(str, sol["chromosome"])))


genericParameters = namedtuple(
    "genericParameters", "populationSize crossProbability mutationProbability")

np.random.seed(12345678)

parameters = genericParameters(100, 0.5, 0.02)
problemDim, weightMtx, distanceMtx = rd.readData("tai256c.dat")

ag = AG.AG(problemDim, weightMtx, distanceMtx)
agl = AGL.AGL(problemDim, weightMtx, distanceMtx)
agb = AGB.AGB(problemDim, weightMtx, distanceMtx)

print(agl.AGL(parameters))

# for cp in [0.1, 0.5, 0.7]:
#     for mp in [0, 0.01, 0.05, 0.1]:
#         parameters = genericParameters(100, cp, mp)
#         agl.AGL(parameters)

#printSol("resultsLamarck20Best/PS100CP0.5MP0.02iter228score44804670time1104.5440604686737.npy")
Exemplo n.º 6
0
def avgChart():
	days        = []
	v1          = []
	v2          = []
	v3          = []
	v5          = []
	l1          = []
	p1          = []
	t1          = []
	t2          = []
	sg1         = []
	sg2         = []
	sg3         = []
	sg4         = []
	sg5         = []
	dateFormat  = '%Y-%m-%d'
	beginStr    = request.form.get('begin', type=str)
	endStr      = request.form.get('end', type=str)
	chartType   = request.form.get('chartType', type=str)
	machines    = request.form.get('mach', type=str)
	mach        = json.loads(machines)
	begin       = datetime.strptime(beginStr, dateFormat)
	end         = datetime.strptime(endStr, dateFormat)
	delta       = end - begin

	#Loop thru days and add them to days list
	for i in range(delta.days + 1):
		if i != 0:
			days.append(str(begin + timedelta(days=i))[5:10].replace('-', '/'))

	#Setup style of chart
	if chartType == 'Bar':
		user_chart = pygal.Bar(style=LightColorizedStyle)
	elif chartType == 'Line':
		user_chart = pygal.Line(style=CleanStyle)
	elif chartType == 'Stacked':
		user_chart = pygal.StackedLine(fill=True)

	#Setup labels on x axis and the title of the chart
	user_chart.x_labels = days
	user_chart.title = days[0] + ' - ' + days[-1] + ' Plant 1 Daily Sheet Utilization by Machine'

	#Loop thru machines list and assign correct day and machine to be plotted on chart
	for i in mach:
		#Add data to chart by machine
		if i == 'v1':
			user_chart.add('Vipros 1', v1)
		elif i == 'v2':
			user_chart.add('Vipros 2', v2)
		elif i == 'v3':
			user_chart.add('Vipros 3', v3)
		elif i == 'v5':
			user_chart.add('Vipros 5', v5)
		elif i == 'l1':
			user_chart.add('Salvagnini', l1)
		elif i == 'p1':
			user_chart.add('Pulsar', p1)
		elif i == 't1':
			user_chart.add('FMS 1', t1)
		elif i == 't2':
			user_chart.add('FMS 2', t2)
		elif i == 'sg1':
			user_chart.add('SG 1', sg1)
		elif i == 'sg2':
			user_chart.add('SG 2', sg2)
		elif i == 'sg3':
			user_chart.add('SG 3', sg3)
		elif i == 'sg4':
			user_chart.add('SG 4', sg4)
		elif i == 'sg5':
			user_chart.add('SG 5', sg5)

		data = reader.readData(i)

		for key in data:
			if key[0:5] in days:
				if i == 'v1':
					v1.append(data[key])
				elif i == 'v2':
					v2.append(data[key])
				elif i == 'v3':
					v3.append(data[key])
				elif i == 'v5':
					v5.append(data[key])
				elif i == 'l1':
					l1.append(data[key])
				elif i == 'p1':
					p1.append(data[key])
				elif i == 't1':
					t1.append(data[key])
				elif i == 't2':
					t2.append(data[key])
				elif i == 'sg1':
					sg1.append(data[key])
				elif i == 'sg2':
					sg2.append(data[key])
				elif i == 'sg3':
					sg3.append(data[key])
				elif i == 'sg4':
					sg4.append(data[key])
				elif i == 'sg5':
					sg5.append(data[key])

	reader.cleanUp()
	days = None
	v1   = None
	v2   = None
	v3   = None
	v5   = None
	l1   = None
	p1   = None
	t1   = None
	t2   = None
	sg1  = None
	sg2  = None
	sg3  = None
	sg4  = None
	sg5  = None

	chart = user_chart.render(is_unicode=True)
	return chart
import tensorflow as tf

import reader
import summarize


if __name__ == "__main__":
    SPLIT_SIZE = 0.3
    num_labels = 4
    reviewer = "Steve+Rhodes"

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    KTF.set_session(sess)

    data = reader.readData(scale=num_labels)
    reviews = [entry[0] for entry in data[reviewer]]
    # summaries = [summarize.summarizeContent(review, sentences_count=3) for review in reviews]
    raw_docs_train = reviews
    # raw_docs_train = [summarize.firstSentence(review)[:10] for review in reviews]
    sentiment_train = [entry[1] for entry in data[reviewer]]

    # print pd.value_counts(sentiment_train)
    # print num_labels

    # text pre-processing
    stop_words = set(stopwords.words('english'))
    stop_words.update(['.', ',', '"', "'", ':', ';', '(', ')', '[', ']', '{', '}'])
    stemmer = SnowballStemmer("english")

    print ("pre-processing train docs...")
Exemplo n.º 8
0
def f_id(r):
    return r
    
def Q(d):
    if d >= 0.8:
        return 1
    elif d> 0.3:
        return 2*d - 0.6
    else:
        return 0
     
file_name = str(sys.argv[1])
#E - liczba ekspertow
#S - liczba alternatyw
#R - kolejne matryce ekspertow // len(R)==E
E, S, R = reader.readData(file_name)

#R_hasz  - matryca R#,
#  rzedy - eksperci
#  kolumny - alternatywy
#  R#|d1|d2|
#  e1|__|__|
#  e2|  |  |
R_hasz = []
for expertMatrix in R: 
    #print expertMatrix
    #print "+++++"
    R_hasz.append([round(sum([f_2(r, 0.5) for r in row])/float(S-1),2) for row in expertMatrix])

#for row in R_hasz:
#    print row
def main():
	extractData = False
	extractTestingData = False
	helper = helperClass.Helper()
	path_to_training_directory = "data/Train"
	path_to_testing_directory = "data/Test"
	path_to_training_labels = "data/Train/GroundTruth/groundTruth.txt"
	path_to_testing_labels = "data/Test/GroundTruth/groundTruth.txt"

	if(extractData):
		truths = open(path_to_training_labels, "r").read().split("\n")

		print "Extracting user training data..."

		userData = []
		for i in range(1, len(truths)):
			userData.append(reader.readData(i, helper, path_to_training_directory))
			sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i)/(len(truths)-1)*100), i, len(truths)-1))
			sys.stdout.flush()
		print "\r"

		pickle.dump(userData, open("userData.pkl", "wb"))
	else:
		userData = pickle.load(open("userData.pkl", "rb"))


	allWords = set()
	userWords = {}
	
	print "Extracting unique words from user data..."
	for i in range(0, len(userData)):
		userWords[i] = {}
		for j in userData[i]:
			userWords[i][j] = helper.getUserWords(userData[i], j)
			allWords = allWords.union(userWords[i][j])
			sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData)))
			sys.stdout.flush()

	print "\n"+str(len(allWords))+" unique words found.\n"
	# print allWords
	helper.setFeatureList(sorted(allWords))
	
	with open('allWords.txt', 'w') as outfile:
		json.dump(sorted(allWords), outfile)

	featureVectors = {}
	print "Generating feature vectors..."

	for j in userData[0]:
		featureVectors[j] = []

	for i in range(0, len(userData)):
		for j in userData[i]:
			featureVectors[j].append(helper.getFeatureVector(userWords[i][j]))
			sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData)))
			sys.stdout.flush()

	# for j in range(0, len(userData[0])):
	# 	featureVectors[j] = []
	# 	for i in range(0, len(userData)):
	# 		featureVectors[j].append(helper.getFeatureVector(userWords[i]))
	# 		sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData)))
	# 		sys.stdout.flush()

	print "\r"
	labelVectors = helper.getLabelVectors(path_to_training_labels)

	print "Training SVM models..."
	params = svm_parameter()
	params.C = 10
	params.kernel_type = LINEAR

	# labels = labelVectors[0]
	models = {}


	# CREATE ONE MODEL FOR EACH category and data source
	# Userdata is an array of objects, each object containing three objects with data from each source

	for i in range(0, len(labelVectors)):
		# Loop 1-20 (Each category)
		models[i] = {}
		for j in userData[0]:
			# Loop through 1-3 (each data source)
			problem = svm_problem(labelVectors[i], featureVectors[j])
			models[i][j] = svm_train(problem, params)


	pprint(models)
	# problem = svm_problem(labels, featureVectors)
	# model = svm_train(problem, params)

	if(extractTestingData):
		truths = open(path_to_testing_labels , "r").read().split("\n")

		print "Extracting user testing data..."
		userIdPattern = re.compile("U(\d*?)gnd.txt")
		userIDs = userIdPattern.findall(" ".join(os.listdir(path_to_testing_directory+"/GroundTruth")))
		userIDs = map(int, userIDs)

		userData = []
		for i in range(0, len(userIDs)):
			userData.append(reader.readData(userIDs[i], helper, path_to_testing_directory))
			sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/(len(userIDs))*100), i+1, len(userIDs)))
			sys.stdout.flush()
		print "\r"

		pickle.dump(userData, open("userTestingData.pkl", "wb"))
	else:
		userData = pickle.load(open("userTestingData.pkl", "rb"))


	print "Generating feature vectors..."


	featureVectors = {}
	# Feature vectors should be an object containing three arrays, one for each data source
	for i in userData[0]:
		featureVectors[i] = []

	for i in range(0, len(userData)):
		for j in userData[i]:
			featureVectors[j].append(helper.getFeatureVector(userWords[i][j]))
			sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData)))
			sys.stdout.flush()

	# for j in range(0, len(userData[0])):	
	# 	featureVectors[j] = []
	# 	print "Generating feature vectors for "+str(j)
	# 	for i in range(0, len(userData)):
	# 		featureVectors[j].append(helper.getFeatureVector(helper.getUserWords(userData[i][j])))
	# 		sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData)))
	# 		sys.stdout.flush()

	print "\r"
	labelVectors = helper.getLabelVectors(path_to_testing_labels)
	
	avgAcc = 0.0
	# labelContainer = []
	labelContainer = {}
	for i in models[0]:
		labelContainer[i] = []

	print "Classifying dataset..."
	for i in range(0, len(models)):
		for j in models[i]:
			p_labels, p_accs, p_vals = svm_predict(labelVectors[i], featureVectors[j], models[i][j])
			labelContainer[j].append(p_labels)
			avgAcc = avgAcc+p_accs[0]

	avgAcc = avgAcc/(len(models)*3)
	print "Average accuracy: "+str(avgAcc)+"%"

	for category in labelContainer:
		reader.saveOutput(labelContainer[category], 'data/outputLabels-'+category+".csv")
		
	# reader.saveOutput(labelContainer, 'data/outputLabels.csv')		
	reader.getSaK()
	pickle.dump(labelContainer, open("outputLabels.pkl", "wb"))
Exemplo n.º 10
0
    # days to double deaths
    for country, data in deaths.items():
        axes[i_row,
             i_col].plot(range(-days_back + 2, 0),
                         np.log(2) / np.log(np.sqrt(data[2:] / data[:-2])),
                         label=country)
    axes[i_row, i_col].set_xlabel('days before today')
    axes[i_row, i_col].set_ylabel('d to double deaths')
    axes[i_row, i_col].set_ylim((0., 10.))
    axes[i_row, i_col].grid()


if __name__ == "__main__":
    # read absolute data
    deaths, conf, recovered, people, countries = readData()

    # create relative datasets
    confPerMillion = {}
    for k in conf.keys():
        confPerMillion[k] = conf[k] / people[k]
    deathsPerMillion = {}
    for k in deaths.keys():
        deathsPerMillion[k] = deaths[k] / people[k]

    recoveredPerMillion = {}
    # for k in recovered.keys():
    #     recoveredPerMillion[k] = recovered[k] / people[k]

    deathsPerConfirmed = {}
    for k in deaths.keys():
Exemplo n.º 11
0
def main():
    extractData = False
    extractTestingData = False
    helper = helperClass.Helper()
    path_to_training_directory = "data/Train"
    # path_to_testing_directory = "data/Test"
    path_to_testing_directory = "multi-view-online-testing"
    path_to_training_labels = "data/Train/GroundTruth/groundTruth.txt"
    path_to_testing_labels = "multi-view-online-testing/GroundTruth/groundTruth.txt"
    # path_to_testing_labels = "data/Test/GroundTruth/groundTruth.txt"

    if extractData:
        truths = open(path_to_training_labels, "r").read().split("\n")

        print "Extracting user training data..."

        userData = []
        for i in range(1, len(truths)):
            userData.append(reader.readData(i, helper, path_to_training_directory))
            sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i) / (len(truths) - 1) * 100), i, len(truths) - 1))
            sys.stdout.flush()
        print "\r"

        pickle.dump(userData, open("userData.pkl", "wb"))
    else:
        userData = pickle.load(open("userData.pkl", "rb"))

    allWords = set()
    userWords = {}

    print "Extracting unique words from user data..."
    for i in range(0, len(userData)):
        userWords[i] = helper.getUserWords(userData[i])
        allWords = allWords.union(userWords[i])
        sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i + 1) / len(userData) * 100), i + 1, len(userData)))
        sys.stdout.flush()

    print "\n" + str(len(allWords)) + " unique words found.\n"
    # print allWords
    helper.setFeatureList(sorted(allWords))

    with open("allWords.txt", "w") as outfile:
        json.dump(sorted(allWords), outfile)

    featureVectors = []
    print "Generating feature vectors..."

    for i in range(0, len(userData)):
        featureVectors.append(helper.getFeatureVector(userWords[i]))
        sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i + 1) / len(userData) * 100), i + 1, len(userData)))
        sys.stdout.flush()

    print "\r"
    labelVectors = helper.getLabelVectors(path_to_training_labels)

    print "Training SVM models..."
    params = svm_parameter()
    params.C = 10
    params.kernel_type = LINEAR

    # labels = labelVectors[0]
    models = {}
    for i in range(0, len(labelVectors)):
        problem = svm_problem(labelVectors[i], featureVectors)
        models[i] = svm_train(problem, params)

        # problem = svm_problem(labels, featureVectors)
        # model = svm_train(problem, params)

    if extractTestingData:
        truths = open(path_to_testing_labels, "r").read().split("\n")

        print "Extracting user testing data..."
        userIdPattern = re.compile("U(\d*?)gnd.txt")
        userIDs = userIdPattern.findall(" ".join(os.listdir(path_to_testing_directory + "/GroundTruth")))
        userIDs = map(int, userIDs)

        userData = []
        for i in range(0, len(userIDs)):
            userData.append(reader.readData(userIDs[i], helper, path_to_testing_directory))
            sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i + 1) / (len(userIDs)) * 100), i + 1, len(userIDs)))
            sys.stdout.flush()
        print "\r"

        pickle.dump(userData, open("userTestingData.pkl", "wb"))
    else:
        userData = pickle.load(open("userTestingData.pkl", "rb"))

    print "Generating feature vectors..."
    featureVectors = []
    for i in range(0, len(userData)):
        featureVectors.append(helper.getFeatureVector(helper.getUserWords(userData[i])))
        sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i + 1) / len(userData) * 100), i + 1, len(userData)))
        sys.stdout.flush()

    print "\r"
    labelVectors = helper.getLabelVectors(path_to_testing_labels)

    avgAcc = 0.0
    labelContainer = []
    print "Classifying dataset..."
    for i in range(0, len(models)):
        p_labels, p_accs, p_vals = svm_predict(labelVectors[i], featureVectors, models[i])
        labelContainer.append(p_labels)
        avgAcc = avgAcc + p_accs[0]

    avgAcc = avgAcc / (len(models))
    print "Average accuracy: " + str(avgAcc) + "%"

    reader.saveOutput(labelContainer, "data/outputLabels.csv")
    pickle.dump(labelContainer, open("outputLabels.pkl", "wb"))
    reader.getSaK()