def main(): currDir = os.getcwd() path = os.path.join(currDir, '2017.csv') input, output = readData(path, 'Economy..GDP.per.Capita.', 'Freedom', 'Happiness.Score') # split in 80/20 percent np.random.seed(5) indexes = [i for i in range(len(input))] trainSample = np.random.choice(indexes, int(0.8 * len(input)), replace=False) testSample = [i for i in indexes if not i in trainSample] trainInputs = [input[i] for i in trainSample] trainOutputs = [output[i] for i in trainSample] testInputs = [input[i] for i in testSample] testOutputs = [output[i] for i in testSample] print('=== SKLEARN MODEL ===') tool_regressor = tool_regression(trainInputs, trainOutputs) # print('Tool predict ' + str(tool_regressor.predict(testInputs))) print('\n\n=== MY MODEL ===') manual_regressor = manual_regression(trainInputs, trainOutputs) # print('Manual predict ' + str(manual_regressor.predict(testInputs))) print('\n\n===Performance===') print('Tool prediction error: ', mean_squared_error(testOutputs, tool_regressor.predict(testInputs))) print('Manual prediction error: ', meanSquareError(manual_regressor, testInputs, testOutputs)) plotDataHistogram([input[i][0] for i in range(0, len(trainInputs))], 'capita GDP') plotDataHistogram([input[i][1] for i in range(0, len(trainInputs))], 'freedom') plotDataHistogram(trainOutputs, 'Happiness score') plotData([trainInputs[i][0] for i in range(0, len(trainInputs))], [trainInputs[i][1] for i in range(0, len(trainOutputs))], trainOutputs, [ manual_regressor.intercept_, manual_regressor.coef_[0], manual_regressor.coef_[1] ], 'TRAIN BASED ON LEARNT MODEL') plotData([testInputs[i][0] for i in range(0, len(testInputs))], [testInputs[i][1] for i in range(0, len(testInputs))], testOutputs, [ manual_regressor.intercept_, manual_regressor.coef_[0], manual_regressor.coef_[1] ], 'TEST BASED ON LEARNT MODEL') predictedPlot([testInputs[i][0] for i in range(0, len(testInputs))], [testInputs[i][1] for i in range(0, len(testInputs))], testOutputs, manual_regressor.predict(testInputs), 'PREDICTED BASED ON LEARNT MODEL')
def main(): extractData = False helper = helperClass.Helper() path_to_training_directory = "data/Train" path_to_testing_directory = "data/Test" path_to_training_labels = "data/Train/GroundTruth/groundTruth.txt" if(extractData): truths = open(path_to_training_labels, "r").read().split("\n") print "Extracting user data..." userData = [] for i in range(1, len(truths)): userData.append(reader.readData(i, helper, path_to_training_directory)) sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i)/(len(truths)-1)*100), i, len(truths)-1)) sys.stdout.flush() print "\r" pickle.dump(userData, open("userData.pkl", "wb")) else: userData = pickle.load(open("userData.pkl", "rb")) labelVectors = helper.getLabelVectors(path_to_training_labels) print str(len(labelVectors))+" label vectors created" allWords = set() userWords = {} print "Extracting unique words from user data..." for i in range(0, len(userData)): userWords[i] = helper.getUserWords(userData[i]) allWords = allWords.union(userWords[i]) sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData))) sys.stdout.flush() print "\n"+str(len(allWords))+" unique words found.\n" helper.setFeatureList(sorted(allWords)) featureVectors = [] print "Generating feature vectors..." for i in range(0, len(userData)): featureVectors.append(helper.getFeatureVector(userWords[i])) sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData))) sys.stdout.flush() print "\r"
def main(): parser = argparse.ArgumentParser() parser.add_argument("data", help="File of measure class pairs to test.", type=str) parser.add_argument("dimen", help="Tuple representing measure space.", type=str) parser.add_argument( "priors", help="File designating prior probabilities of classes.", type=str) parser.add_argument( "conditionals", help="File designating class conditional probabilities.", type=str) parser.add_argument( "--eGain", "-e", help= "Economic gain matrix for data. If not provided assumes identity matrix.", type=str) parser.add_argument( "--vFolds", "-v", help= "Number of v-fold partitions for testing. If not provided, assumes all data is for testing.", type=int) args = parser.parse_args() # Reading data dimen = eval(args.dimen) measures, tags = reader.readData(args.data, dimen) priors = reader.readPriors(args.priors) conds = reader.readCCP(args.conditionals) e = False if args.eGain: e = reader.readGain(args.eGain) classifier = BayesClassifier(priors, conds, eGain=e) expGain = test(classifier, measures, tags, V=args.vFolds) print("The expected gain for the data is: {}".format(expGain))
import os import sys import reader import matplotlib.pyplot as plt #Getting file name of the data fName = sys.argv[1] print fName #using a reader module to grab the data and put them into lists. #grabbing the measured data and the correctness data. data = reader.readData(fName) measured = data[0] correct = data[2] #Creating lists for the false positive rate and the true postive rate. fpr = list() tpr = list() #sorting them in decending order using zip. #First pairs them, then sorts then unpacks them into two lists. measured, correct = (list(t) for t in (zip( *sorted(zip(measured, correct), reverse=True)))) #counting the total number of correct and incorrect readings. totalCorrect = correct.count('Y') totalIncorrect = len(correct) - totalCorrect #Accounting for if there are no correct readings, or incorrect readings if (totalCorrect == 0): totalCorrect = 1
import evaluator as ev def printSol(file): sol = np.load(file) print("Score:", sol["score"]) print("Permutation:") print(' '.join(map(str, sol["chromosome"]))) genericParameters = namedtuple( "genericParameters", "populationSize crossProbability mutationProbability") np.random.seed(12345678) parameters = genericParameters(100, 0.5, 0.02) problemDim, weightMtx, distanceMtx = rd.readData("tai256c.dat") ag = AG.AG(problemDim, weightMtx, distanceMtx) agl = AGL.AGL(problemDim, weightMtx, distanceMtx) agb = AGB.AGB(problemDim, weightMtx, distanceMtx) print(agl.AGL(parameters)) # for cp in [0.1, 0.5, 0.7]: # for mp in [0, 0.01, 0.05, 0.1]: # parameters = genericParameters(100, cp, mp) # agl.AGL(parameters) #printSol("resultsLamarck20Best/PS100CP0.5MP0.02iter228score44804670time1104.5440604686737.npy")
def avgChart(): days = [] v1 = [] v2 = [] v3 = [] v5 = [] l1 = [] p1 = [] t1 = [] t2 = [] sg1 = [] sg2 = [] sg3 = [] sg4 = [] sg5 = [] dateFormat = '%Y-%m-%d' beginStr = request.form.get('begin', type=str) endStr = request.form.get('end', type=str) chartType = request.form.get('chartType', type=str) machines = request.form.get('mach', type=str) mach = json.loads(machines) begin = datetime.strptime(beginStr, dateFormat) end = datetime.strptime(endStr, dateFormat) delta = end - begin #Loop thru days and add them to days list for i in range(delta.days + 1): if i != 0: days.append(str(begin + timedelta(days=i))[5:10].replace('-', '/')) #Setup style of chart if chartType == 'Bar': user_chart = pygal.Bar(style=LightColorizedStyle) elif chartType == 'Line': user_chart = pygal.Line(style=CleanStyle) elif chartType == 'Stacked': user_chart = pygal.StackedLine(fill=True) #Setup labels on x axis and the title of the chart user_chart.x_labels = days user_chart.title = days[0] + ' - ' + days[-1] + ' Plant 1 Daily Sheet Utilization by Machine' #Loop thru machines list and assign correct day and machine to be plotted on chart for i in mach: #Add data to chart by machine if i == 'v1': user_chart.add('Vipros 1', v1) elif i == 'v2': user_chart.add('Vipros 2', v2) elif i == 'v3': user_chart.add('Vipros 3', v3) elif i == 'v5': user_chart.add('Vipros 5', v5) elif i == 'l1': user_chart.add('Salvagnini', l1) elif i == 'p1': user_chart.add('Pulsar', p1) elif i == 't1': user_chart.add('FMS 1', t1) elif i == 't2': user_chart.add('FMS 2', t2) elif i == 'sg1': user_chart.add('SG 1', sg1) elif i == 'sg2': user_chart.add('SG 2', sg2) elif i == 'sg3': user_chart.add('SG 3', sg3) elif i == 'sg4': user_chart.add('SG 4', sg4) elif i == 'sg5': user_chart.add('SG 5', sg5) data = reader.readData(i) for key in data: if key[0:5] in days: if i == 'v1': v1.append(data[key]) elif i == 'v2': v2.append(data[key]) elif i == 'v3': v3.append(data[key]) elif i == 'v5': v5.append(data[key]) elif i == 'l1': l1.append(data[key]) elif i == 'p1': p1.append(data[key]) elif i == 't1': t1.append(data[key]) elif i == 't2': t2.append(data[key]) elif i == 'sg1': sg1.append(data[key]) elif i == 'sg2': sg2.append(data[key]) elif i == 'sg3': sg3.append(data[key]) elif i == 'sg4': sg4.append(data[key]) elif i == 'sg5': sg5.append(data[key]) reader.cleanUp() days = None v1 = None v2 = None v3 = None v5 = None l1 = None p1 = None t1 = None t2 = None sg1 = None sg2 = None sg3 = None sg4 = None sg5 = None chart = user_chart.render(is_unicode=True) return chart
import tensorflow as tf import reader import summarize if __name__ == "__main__": SPLIT_SIZE = 0.3 num_labels = 4 reviewer = "Steve+Rhodes" gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) KTF.set_session(sess) data = reader.readData(scale=num_labels) reviews = [entry[0] for entry in data[reviewer]] # summaries = [summarize.summarizeContent(review, sentences_count=3) for review in reviews] raw_docs_train = reviews # raw_docs_train = [summarize.firstSentence(review)[:10] for review in reviews] sentiment_train = [entry[1] for entry in data[reviewer]] # print pd.value_counts(sentiment_train) # print num_labels # text pre-processing stop_words = set(stopwords.words('english')) stop_words.update(['.', ',', '"', "'", ':', ';', '(', ')', '[', ']', '{', '}']) stemmer = SnowballStemmer("english") print ("pre-processing train docs...")
def f_id(r): return r def Q(d): if d >= 0.8: return 1 elif d> 0.3: return 2*d - 0.6 else: return 0 file_name = str(sys.argv[1]) #E - liczba ekspertow #S - liczba alternatyw #R - kolejne matryce ekspertow // len(R)==E E, S, R = reader.readData(file_name) #R_hasz - matryca R#, # rzedy - eksperci # kolumny - alternatywy # R#|d1|d2| # e1|__|__| # e2| | | R_hasz = [] for expertMatrix in R: #print expertMatrix #print "+++++" R_hasz.append([round(sum([f_2(r, 0.5) for r in row])/float(S-1),2) for row in expertMatrix]) #for row in R_hasz: # print row
def main(): extractData = False extractTestingData = False helper = helperClass.Helper() path_to_training_directory = "data/Train" path_to_testing_directory = "data/Test" path_to_training_labels = "data/Train/GroundTruth/groundTruth.txt" path_to_testing_labels = "data/Test/GroundTruth/groundTruth.txt" if(extractData): truths = open(path_to_training_labels, "r").read().split("\n") print "Extracting user training data..." userData = [] for i in range(1, len(truths)): userData.append(reader.readData(i, helper, path_to_training_directory)) sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i)/(len(truths)-1)*100), i, len(truths)-1)) sys.stdout.flush() print "\r" pickle.dump(userData, open("userData.pkl", "wb")) else: userData = pickle.load(open("userData.pkl", "rb")) allWords = set() userWords = {} print "Extracting unique words from user data..." for i in range(0, len(userData)): userWords[i] = {} for j in userData[i]: userWords[i][j] = helper.getUserWords(userData[i], j) allWords = allWords.union(userWords[i][j]) sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData))) sys.stdout.flush() print "\n"+str(len(allWords))+" unique words found.\n" # print allWords helper.setFeatureList(sorted(allWords)) with open('allWords.txt', 'w') as outfile: json.dump(sorted(allWords), outfile) featureVectors = {} print "Generating feature vectors..." for j in userData[0]: featureVectors[j] = [] for i in range(0, len(userData)): for j in userData[i]: featureVectors[j].append(helper.getFeatureVector(userWords[i][j])) sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData))) sys.stdout.flush() # for j in range(0, len(userData[0])): # featureVectors[j] = [] # for i in range(0, len(userData)): # featureVectors[j].append(helper.getFeatureVector(userWords[i])) # sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData))) # sys.stdout.flush() print "\r" labelVectors = helper.getLabelVectors(path_to_training_labels) print "Training SVM models..." params = svm_parameter() params.C = 10 params.kernel_type = LINEAR # labels = labelVectors[0] models = {} # CREATE ONE MODEL FOR EACH category and data source # Userdata is an array of objects, each object containing three objects with data from each source for i in range(0, len(labelVectors)): # Loop 1-20 (Each category) models[i] = {} for j in userData[0]: # Loop through 1-3 (each data source) problem = svm_problem(labelVectors[i], featureVectors[j]) models[i][j] = svm_train(problem, params) pprint(models) # problem = svm_problem(labels, featureVectors) # model = svm_train(problem, params) if(extractTestingData): truths = open(path_to_testing_labels , "r").read().split("\n") print "Extracting user testing data..." userIdPattern = re.compile("U(\d*?)gnd.txt") userIDs = userIdPattern.findall(" ".join(os.listdir(path_to_testing_directory+"/GroundTruth"))) userIDs = map(int, userIDs) userData = [] for i in range(0, len(userIDs)): userData.append(reader.readData(userIDs[i], helper, path_to_testing_directory)) sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/(len(userIDs))*100), i+1, len(userIDs))) sys.stdout.flush() print "\r" pickle.dump(userData, open("userTestingData.pkl", "wb")) else: userData = pickle.load(open("userTestingData.pkl", "rb")) print "Generating feature vectors..." featureVectors = {} # Feature vectors should be an object containing three arrays, one for each data source for i in userData[0]: featureVectors[i] = [] for i in range(0, len(userData)): for j in userData[i]: featureVectors[j].append(helper.getFeatureVector(userWords[i][j])) sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData))) sys.stdout.flush() # for j in range(0, len(userData[0])): # featureVectors[j] = [] # print "Generating feature vectors for "+str(j) # for i in range(0, len(userData)): # featureVectors[j].append(helper.getFeatureVector(helper.getUserWords(userData[i][j]))) # sys.stdout.write("\r%5.2f%% (%i/%i)" %((float(i+1)/len(userData)*100), i+1, len(userData))) # sys.stdout.flush() print "\r" labelVectors = helper.getLabelVectors(path_to_testing_labels) avgAcc = 0.0 # labelContainer = [] labelContainer = {} for i in models[0]: labelContainer[i] = [] print "Classifying dataset..." for i in range(0, len(models)): for j in models[i]: p_labels, p_accs, p_vals = svm_predict(labelVectors[i], featureVectors[j], models[i][j]) labelContainer[j].append(p_labels) avgAcc = avgAcc+p_accs[0] avgAcc = avgAcc/(len(models)*3) print "Average accuracy: "+str(avgAcc)+"%" for category in labelContainer: reader.saveOutput(labelContainer[category], 'data/outputLabels-'+category+".csv") # reader.saveOutput(labelContainer, 'data/outputLabels.csv') reader.getSaK() pickle.dump(labelContainer, open("outputLabels.pkl", "wb"))
# days to double deaths for country, data in deaths.items(): axes[i_row, i_col].plot(range(-days_back + 2, 0), np.log(2) / np.log(np.sqrt(data[2:] / data[:-2])), label=country) axes[i_row, i_col].set_xlabel('days before today') axes[i_row, i_col].set_ylabel('d to double deaths') axes[i_row, i_col].set_ylim((0., 10.)) axes[i_row, i_col].grid() if __name__ == "__main__": # read absolute data deaths, conf, recovered, people, countries = readData() # create relative datasets confPerMillion = {} for k in conf.keys(): confPerMillion[k] = conf[k] / people[k] deathsPerMillion = {} for k in deaths.keys(): deathsPerMillion[k] = deaths[k] / people[k] recoveredPerMillion = {} # for k in recovered.keys(): # recoveredPerMillion[k] = recovered[k] / people[k] deathsPerConfirmed = {} for k in deaths.keys():
def main(): extractData = False extractTestingData = False helper = helperClass.Helper() path_to_training_directory = "data/Train" # path_to_testing_directory = "data/Test" path_to_testing_directory = "multi-view-online-testing" path_to_training_labels = "data/Train/GroundTruth/groundTruth.txt" path_to_testing_labels = "multi-view-online-testing/GroundTruth/groundTruth.txt" # path_to_testing_labels = "data/Test/GroundTruth/groundTruth.txt" if extractData: truths = open(path_to_training_labels, "r").read().split("\n") print "Extracting user training data..." userData = [] for i in range(1, len(truths)): userData.append(reader.readData(i, helper, path_to_training_directory)) sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i) / (len(truths) - 1) * 100), i, len(truths) - 1)) sys.stdout.flush() print "\r" pickle.dump(userData, open("userData.pkl", "wb")) else: userData = pickle.load(open("userData.pkl", "rb")) allWords = set() userWords = {} print "Extracting unique words from user data..." for i in range(0, len(userData)): userWords[i] = helper.getUserWords(userData[i]) allWords = allWords.union(userWords[i]) sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i + 1) / len(userData) * 100), i + 1, len(userData))) sys.stdout.flush() print "\n" + str(len(allWords)) + " unique words found.\n" # print allWords helper.setFeatureList(sorted(allWords)) with open("allWords.txt", "w") as outfile: json.dump(sorted(allWords), outfile) featureVectors = [] print "Generating feature vectors..." for i in range(0, len(userData)): featureVectors.append(helper.getFeatureVector(userWords[i])) sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i + 1) / len(userData) * 100), i + 1, len(userData))) sys.stdout.flush() print "\r" labelVectors = helper.getLabelVectors(path_to_training_labels) print "Training SVM models..." params = svm_parameter() params.C = 10 params.kernel_type = LINEAR # labels = labelVectors[0] models = {} for i in range(0, len(labelVectors)): problem = svm_problem(labelVectors[i], featureVectors) models[i] = svm_train(problem, params) # problem = svm_problem(labels, featureVectors) # model = svm_train(problem, params) if extractTestingData: truths = open(path_to_testing_labels, "r").read().split("\n") print "Extracting user testing data..." userIdPattern = re.compile("U(\d*?)gnd.txt") userIDs = userIdPattern.findall(" ".join(os.listdir(path_to_testing_directory + "/GroundTruth"))) userIDs = map(int, userIDs) userData = [] for i in range(0, len(userIDs)): userData.append(reader.readData(userIDs[i], helper, path_to_testing_directory)) sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i + 1) / (len(userIDs)) * 100), i + 1, len(userIDs))) sys.stdout.flush() print "\r" pickle.dump(userData, open("userTestingData.pkl", "wb")) else: userData = pickle.load(open("userTestingData.pkl", "rb")) print "Generating feature vectors..." featureVectors = [] for i in range(0, len(userData)): featureVectors.append(helper.getFeatureVector(helper.getUserWords(userData[i]))) sys.stdout.write("\r%5.2f%% (%i/%i)" % ((float(i + 1) / len(userData) * 100), i + 1, len(userData))) sys.stdout.flush() print "\r" labelVectors = helper.getLabelVectors(path_to_testing_labels) avgAcc = 0.0 labelContainer = [] print "Classifying dataset..." for i in range(0, len(models)): p_labels, p_accs, p_vals = svm_predict(labelVectors[i], featureVectors, models[i]) labelContainer.append(p_labels) avgAcc = avgAcc + p_accs[0] avgAcc = avgAcc / (len(models)) print "Average accuracy: " + str(avgAcc) + "%" reader.saveOutput(labelContainer, "data/outputLabels.csv") pickle.dump(labelContainer, open("outputLabels.pkl", "wb")) reader.getSaK()