def aggregate(func, columnName, tableName, dictionary): # print('[FROM AGGREGATE]', dictionary) if columnName == '*': sys.exit("[ERROR]: sql query syntax error, aggregate functions not applicable on *") if columnName not in dictionary[tableName]: error = "[ERROR]: no column named " + columnName + " found in " + tableName sys.exit(error) tName = tableName + '.csv' fileData = [] readFile(tName, fileData) colList = [] for data in fileData: colList.append(int(data[dictionary[tableName].index(columnName)])) if func.lower() == 'max': print (max(colList)) elif func.lower() == 'min': print (min(colList)) elif func.lower() == 'sum': print (sum(colList)) elif func.lower() == 'avg': print (sum(colList)/len(colList)) elif func.lower() == 'distinct': distinct(colList, columnName, tableName, dictionary) else: print ("[ERROR]: unknown function :", '"' + func + '"')
def processWhereJoin(whereStr, columnNames, tableNames, dictionary): tableNames.reverse() l1 = [] l2 = [] readFile(tableNames[0] + '.csv', l1) readFile(tableNames[1] + '.csv', l2) fileData = [] for item1 in l1: for item2 in l2: fileData.append(item2 + item1) dictionary["sample"] = [] for i in dictionary[tableNames[1]]: dictionary["sample"].append(tableNames[1] + '.' + i) for i in dictionary[tableNames[0]]: dictionary["sample"].append(tableNames[0] + '.' + i) dictionary["test"] = dictionary[tableNames[1]] + dictionary[tableNames[0]] print (dictionary) tableNames.remove(tableNames[0]) tableNames.remove(tableNames[0]) tableNames.insert(0, "sample") if(len(columnNames) == 1 and columnNames[0] == '*'): columnNames = dictionary[tableNames[0]] for i in columnNames: print (i, end='\t') print ('\n') a = whereStr.split(" ") print (a) check = 0 evalCheck = 0 for data in fileData: string = evaluate(a, tableNames, dictionary, data) # print (string) for col in columnNames: try: evalCheck = eval(string) except: sys.exit('error') if evalCheck: check = 1 if '.' in col: print (data[dictionary[tableNames[0]].index(col)], end='\t\t') else: print (data[dictionary["test"].index(col)], end='\t\t') if check == 1: check = 0 print ('\n') del dictionary['sample']
def downloadAllTweets(start, end, tweetTargetSource): numRequests = 0 tweetIdSource = 'data/tweets.csv' dataLines = readFile(tweetIdSource).splitlines()[start:end] reducedDataLines = sample(dataLines, 400000) #Choosing random sample of 400000\ fieldnames = ['id_str', 'created_at', 'coordinates', 'hashtags', 'text'] setHeaders(tweetTargetSource, fieldnames) requestStartRow = 0 requestEndRow = 99 while(requestEndRow <= len(reducedDataLines)): print "Parsing rows " + str(requestStartRow) + '-' + str(requestStartRow + 6000) + ' out of ' + str(len(reducedDataLines)) print str(float(requestStartRow)/float(len(reducedDataLines)) * 100) + "% complete" #Inner while loop handles API Rate limit logic while (numRequests < 60): if (getRateLimit('statuses')['resources']['statuses']['/statuses/lookup']['remaining'] == 0): break tweetIdList = parseTweetIds(reducedDataLines, requestStartRow, requestEndRow) tweetData = getTweetsFromId(tweetIdList) print "Request: " + str(numRequests) if (tweetData != None): writeToFile(tweetData, tweetTargetSource, fieldnames) requestStartRow += 100 requestEndRow += 100 numRequests += 1 print "Rate Limit Exceeded. Waiting...\n" while (getRateLimit('statuses')['resources']['statuses']['/statuses/lookup']['remaining'] == 0): sleep(120) #Suspends execution until rate limit refreshed numRequests = 0 print "Done!"
def selectColumns(columnNames, tableNames, dictionary): if len(columnNames) == 1 and columnNames[0] == '*': columnNames = dictionary[tableNames[0]] for i in columnNames: if i not in dictionary[tableNames[0]]: error = "[ERROR]: no column named '" + \ i + "' found in " + tableNames[0] sys.exit(error) printHeader(columnNames, tableNames, dictionary) tName = tableNames[0] + '.csv' fileData = [] readFile(tName, fileData) printData(fileData, columnNames, tableNames, dictionary)
def join(columnNames, tableNames, dictionary): tableNames.reverse() l1 = [] l2 = [] readFile(tableNames[0] + '.csv', l1) readFile(tableNames[1] + '.csv', l2) fileData = [] for item1 in l1: for item2 in l2: fileData.append(item2 + item1) # dictionary["sample"] = dictionary[b] + dictionary[a] dictionary["sample"] = [] for i in dictionary[tableNames[1]]: dictionary["sample"].append(tableNames[1] + '.' + i) for i in dictionary[tableNames[0]]: dictionary["sample"].append(tableNames[0] + '.' + i) dictionary["test"] = dictionary[tableNames[1]] + dictionary[tableNames[0]] # print (dictionary["test"]) tableNames.remove(tableNames[0]) tableNames.remove(tableNames[0]) tableNames.insert(0, "sample") if (len(columnNames) == 1 and columnNames[0] == '*'): columnNames = dictionary[tableNames[0]] # print (header) for i in columnNames: print(i, end='\t') print('\n') # printData(fileData,columnNames,tableNames,dictionary) for data in fileData: for col in columnNames: if '.' in col: print(data[dictionary[tableNames[0]].index(col)], end='\t\t') else: print(data[dictionary["test"].index(col)], end='\t\t') print('\n')
def processWhere(whereStr, columnNames, tableNames, dictionary): a = whereStr.split(" ") if a[0] not in dictionary[tableNames[0]]: error = "[ERROR]: unknown column '" + a[0] + "' in where clause" sys.exit(error) # print (a) # print (len(a)) if len(a) > 3 and a[4] not in dictionary[tableNames[0]]: error = "[ERROR]: unknown column '" + a[4] + "' in where clause" sys.exit(error) if(len(columnNames) == 1 and columnNames[0] == '*'): columnNames = dictionary[tableNames[0]] printHeader(columnNames, tableNames, dictionary) tName = tableNames[0] + '.csv' fileData = [] readFile(tName, fileData) check = 0 evalCheck = 0 # print (fileData) for data in fileData: string = evaluate(a, tableNames, dictionary, data) for col in columnNames: # print(col) try: evalCheck = eval(string) except: sys.exit('error') if evalCheck: check = 1 print (data[dictionary[tableNames[0]].index(col)], end="\t\t") if check == 1: check = 0 print ('\n')
def main(): """Prompts user for data set and classifier Displays percent correct""" print("Which data would you like to use?") print("1 - Iris") print("2 - From file") option = int(input()) # default to Iris with improper input if option == 2: data = rd.readFile() else: data = rd.readIris() data_train, data_test, target_train, target_test = \ train_test_split(data.data, data.target, test_size=.3) print("Which classifier would you like to use?") print("1 - GaussianNB") print("2 - Hard Coded") print("3 - KNN") option = int(input()) # default to hard_coded_classifier with improper input if option == 1: classifier = GaussianNB() elif option == 2: classifier = hc.HardCodedClassifier() else: classifier = knn.Classifier() model = classifier.fit(data_train, target_train) target_predicted = model.predict(data_test) # loop through test target and predicted target # if they are equal, increment number correct num_correct = 0 for i in range(0, len(target_predicted)): if target_predicted[i] == target_test[i]: num_correct += 1 # get percent correct percent_accurate = round((num_correct / len(target_predicted)) * 100, 2) print("Classifier is", percent_accurate, "% accurate.")
n1 = eval(input("Enter the size: ",)) import read_data import ship_file import string import random lst_data = read_data.readFile('data.txt') lst_of_grid = read_data.read_field(lst_data) lst_alfabet = [i for i in string.ascii_uppercase] def is_valid (lst_of_grid): lst = [] assert isinstance(n1, object) n1 = 0 n2 = 0 n3 = 0 n4 = 0 if len(lst_of_grid)**(1/2) == n1: for i in lst_of_grid: if ship_file.ship_size(lst_of_grid, i[0]) == 1: lst.append(1) n1+=1 elif ship_file.ship_size(lst_of_grid, i[0]) == 2: lst.append(2) n2+=1 elif ship_file.ship_size(lst_of_grid, i[0]) == 3: lst.append(3) n3+=1 elif ship_file.ship_size(lst_of_grid, i[0]) == 4: lst.append(4) n4+=1
if lb >= optErrorT: continue nodeTrans.ub = ub nodeTrans.lb = lb queueTrans.put(nodeTrans) # print(count) return optErrorT, T if __name__ == '__main__': dir = 'point_cloud_registration' filenames = ['pointcloud1.fuse', 'pointcloud2.fuse'] names = ['pointcloud1', 'pointcloud2'] pointcloud1 = readFile('{}/{}'.format(dir, filenames[0]), names[0]) pointcloud2 = readFile('{}/{}'.format(dir, filenames[1]), names[1]) print(pointcloud1.shape) print(pointcloud2.shape) i1 = None i2 = None if pointcloud1.shape[1] == 4: # store available intensities separately i1 = pointcloud1[:, 3] i2 = pointcloud2[:, 3] else: # Create homogenous coordinates to redue time of transformations. pointcloud1 = np.hstack((pointcloud1, np.ones( (pointcloud1.shape[0], 1)))) pointcloud2 = np.hstack((pointcloud2, np.ones(