示例#1
0
def aggregate(func, columnName, tableName, dictionary):

    # print('[FROM AGGREGATE]', dictionary)
    if columnName == '*':
        sys.exit("[ERROR]: sql query syntax error, aggregate functions not applicable on *")
    if columnName not in dictionary[tableName]:
        error = "[ERROR]: no column named " + columnName + " found in " + tableName
        sys.exit(error)

    tName = tableName + '.csv'
    fileData = []
    readFile(tName, fileData)
    colList = []
    for data in fileData:
        colList.append(int(data[dictionary[tableName].index(columnName)]))

    if func.lower() == 'max':
        print (max(colList))
    elif func.lower() == 'min':
        print (min(colList))
    elif func.lower() == 'sum':
        print (sum(colList))
    elif func.lower() == 'avg':
        print (sum(colList)/len(colList))
    elif func.lower() == 'distinct':
        distinct(colList, columnName, tableName, dictionary)
    else:
        print ("[ERROR]: unknown function :", '"' + func + '"')
示例#2
0
def processWhereJoin(whereStr, columnNames, tableNames, dictionary):
    tableNames.reverse()

    l1 = []
    l2 = []
    readFile(tableNames[0] + '.csv', l1)
    readFile(tableNames[1] + '.csv', l2)

    fileData = []
    for item1 in l1:
        for item2 in l2:
            fileData.append(item2 + item1)
    
    dictionary["sample"] = []
    for i in dictionary[tableNames[1]]:
        dictionary["sample"].append(tableNames[1] + '.' + i)
    for i in dictionary[tableNames[0]]:
        dictionary["sample"].append(tableNames[0] + '.' + i)

    dictionary["test"] = dictionary[tableNames[1]] + dictionary[tableNames[0]]
    print (dictionary)
    tableNames.remove(tableNames[0])
    tableNames.remove(tableNames[0])
    tableNames.insert(0, "sample")

    if(len(columnNames) == 1 and columnNames[0] == '*'):
        columnNames = dictionary[tableNames[0]]

    for i in columnNames:
        print (i, end='\t')
    print ('\n')

    a = whereStr.split(" ")
    print (a)
    check = 0
    evalCheck = 0
    for data in fileData:
        string = evaluate(a, tableNames, dictionary, data)
        # print (string)
        for col in columnNames:
            try:
                evalCheck = eval(string)
            except:
                sys.exit('error')
            if evalCheck:
                check = 1
                if '.' in col:
                    print (data[dictionary[tableNames[0]].index(col)], end='\t\t')
                else:
                    print (data[dictionary["test"].index(col)], end='\t\t')
        if check == 1:
            check = 0
            print ('\n')

    del dictionary['sample']
示例#3
0
def downloadAllTweets(start, end, tweetTargetSource):
	numRequests = 0
	tweetIdSource = 'data/tweets.csv'
	dataLines = readFile(tweetIdSource).splitlines()[start:end]
	reducedDataLines = sample(dataLines, 400000)	#Choosing random sample of 400000\
	fieldnames = ['id_str', 'created_at', 'coordinates', 'hashtags', 'text']
	setHeaders(tweetTargetSource, fieldnames)
	requestStartRow = 0
	requestEndRow = 99
	while(requestEndRow <= len(reducedDataLines)):
		print "Parsing rows " + str(requestStartRow) + '-' + str(requestStartRow + 6000) + ' out of ' + str(len(reducedDataLines))
		print str(float(requestStartRow)/float(len(reducedDataLines)) * 100) + "% complete"
		#Inner while loop handles API Rate limit logic
		while (numRequests < 60):
			if (getRateLimit('statuses')['resources']['statuses']['/statuses/lookup']['remaining'] == 0):
				break
			tweetIdList = parseTweetIds(reducedDataLines, requestStartRow, requestEndRow)
			tweetData = getTweetsFromId(tweetIdList)
			print "Request: " + str(numRequests)
			if (tweetData != None): writeToFile(tweetData, tweetTargetSource, fieldnames)
			requestStartRow += 100
			requestEndRow += 100
			numRequests += 1
		print "Rate Limit Exceeded. Waiting...\n"
		while (getRateLimit('statuses')['resources']['statuses']['/statuses/lookup']['remaining'] == 0):
			sleep(120)	#Suspends execution until rate limit refreshed
		numRequests = 0
	print "Done!"
def selectColumns(columnNames, tableNames, dictionary):

    if len(columnNames) == 1 and columnNames[0] == '*':
        columnNames = dictionary[tableNames[0]]

    for i in columnNames:
        if i not in dictionary[tableNames[0]]:
            error = "[ERROR]: no column named '" + \
                i + "' found in " + tableNames[0]
            sys.exit(error)

    printHeader(columnNames, tableNames, dictionary)

    tName = tableNames[0] + '.csv'
    fileData = []
    readFile(tName, fileData)

    printData(fileData, columnNames, tableNames, dictionary)
示例#5
0
def join(columnNames, tableNames, dictionary):
    tableNames.reverse()

    l1 = []
    l2 = []
    readFile(tableNames[0] + '.csv', l1)
    readFile(tableNames[1] + '.csv', l2)

    fileData = []
    for item1 in l1:
        for item2 in l2:
            fileData.append(item2 + item1)

    # dictionary["sample"] = dictionary[b] + dictionary[a]
    dictionary["sample"] = []
    for i in dictionary[tableNames[1]]:
        dictionary["sample"].append(tableNames[1] + '.' + i)
    for i in dictionary[tableNames[0]]:
        dictionary["sample"].append(tableNames[0] + '.' + i)

    dictionary["test"] = dictionary[tableNames[1]] + dictionary[tableNames[0]]
    # print (dictionary["test"])

    tableNames.remove(tableNames[0])
    tableNames.remove(tableNames[0])
    tableNames.insert(0, "sample")

    if (len(columnNames) == 1 and columnNames[0] == '*'):
        columnNames = dictionary[tableNames[0]]

    # print (header)
    for i in columnNames:
        print(i, end='\t')
    print('\n')

    # printData(fileData,columnNames,tableNames,dictionary)

    for data in fileData:
        for col in columnNames:
            if '.' in col:
                print(data[dictionary[tableNames[0]].index(col)], end='\t\t')
            else:
                print(data[dictionary["test"].index(col)], end='\t\t')
        print('\n')
示例#6
0
def processWhere(whereStr, columnNames, tableNames, dictionary):
    a = whereStr.split(" ")

    if a[0] not in dictionary[tableNames[0]]:
        error = "[ERROR]: unknown column '" + a[0] + "' in where clause"
        sys.exit(error)
    # print (a)
    # print (len(a))
    if len(a) > 3 and a[4] not in dictionary[tableNames[0]]:
        error = "[ERROR]: unknown column '" + a[4] + "' in where clause"
        sys.exit(error)

    if(len(columnNames) == 1 and columnNames[0] == '*'):
        columnNames = dictionary[tableNames[0]]

    printHeader(columnNames, tableNames, dictionary)

    tName = tableNames[0] + '.csv'
    fileData = []
    readFile(tName, fileData)

    check = 0
    evalCheck = 0
    # print (fileData)
    for data in fileData:
        string = evaluate(a, tableNames, dictionary, data)
        for col in columnNames:
            # print(col)
            try:
                evalCheck = eval(string)
            except:
                sys.exit('error')
            if evalCheck:
                check = 1
                print (data[dictionary[tableNames[0]].index(col)], end="\t\t")
        if check == 1:
            check = 0
            print ('\n')
示例#7
0
def main():
    """Prompts user for data set and classifier
       Displays percent correct"""

    print("Which data would you like to use?")
    print("1 - Iris")
    print("2 - From file")
    option = int(input())

    # default to Iris with improper input
    if option == 2:
        data = rd.readFile()
    else:
        data = rd.readIris()

    data_train, data_test, target_train, target_test = \
        train_test_split(data.data, data.target, test_size=.3)

    print("Which classifier would you like to use?")
    print("1 - GaussianNB")
    print("2 - Hard Coded")
    print("3 - KNN")

    option = int(input())

    # default to hard_coded_classifier with improper input
    if option == 1:
        classifier = GaussianNB()
    elif option == 2:
        classifier = hc.HardCodedClassifier()
    else:
        classifier = knn.Classifier()

    model = classifier.fit(data_train, target_train)
    target_predicted = model.predict(data_test)

    # loop through test target and predicted target
    # if they are equal, increment number correct
    num_correct = 0
    for i in range(0, len(target_predicted)):
        if target_predicted[i] == target_test[i]:
            num_correct += 1

    # get percent correct
    percent_accurate = round((num_correct / len(target_predicted)) * 100, 2)

    print("Classifier is", percent_accurate, "% accurate.")
示例#8
0
n1 = eval(input("Enter the size: ",))
import read_data
import ship_file
import string
import random
lst_data = read_data.readFile('data.txt')
lst_of_grid = read_data.read_field(lst_data)
lst_alfabet = [i for i in string.ascii_uppercase]
def is_valid (lst_of_grid):
    lst = []
    assert isinstance(n1, object)
    n1 = 0
    n2 = 0 
    n3 = 0
    n4 = 0
    if len(lst_of_grid)**(1/2) == n1:
        for i in lst_of_grid:

            if ship_file.ship_size(lst_of_grid, i[0]) == 1:
                lst.append(1)
                n1+=1                
            elif ship_file.ship_size(lst_of_grid, i[0]) == 2:
                lst.append(2)
                n2+=1 
            elif ship_file.ship_size(lst_of_grid, i[0]) == 3:
                lst.append(3)
                n3+=1 
            elif ship_file.ship_size(lst_of_grid, i[0]) == 4:
                lst.append(4)
                n4+=1 
示例#9
0
文件: trial.py 项目: joosm/ICP-1
            if lb >= optErrorT:
                continue

            nodeTrans.ub = ub
            nodeTrans.lb = lb
            queueTrans.put(nodeTrans)

    # print(count)
    return optErrorT, T


if __name__ == '__main__':
    dir = 'point_cloud_registration'
    filenames = ['pointcloud1.fuse', 'pointcloud2.fuse']
    names = ['pointcloud1', 'pointcloud2']
    pointcloud1 = readFile('{}/{}'.format(dir, filenames[0]), names[0])
    pointcloud2 = readFile('{}/{}'.format(dir, filenames[1]), names[1])

    print(pointcloud1.shape)
    print(pointcloud2.shape)
    i1 = None
    i2 = None
    if pointcloud1.shape[1] == 4:
        # store available intensities separately
        i1 = pointcloud1[:, 3]
        i2 = pointcloud2[:, 3]
    else:
        # Create homogenous coordinates to redue time of transformations.
        pointcloud1 = np.hstack((pointcloud1, np.ones(
            (pointcloud1.shape[0], 1))))
        pointcloud2 = np.hstack((pointcloud2, np.ones(