예제 #1
0
the clustering. FEATURES is an array corresponding to the columns to use for
clustering. For example, FEATURES = [0, 2] will use columns 0 and 2. Each
column being clustered is assumed to have numeric values. DELIM specifies the
text delimiter to use to break the file apart
"""
def cluster_from_file(file, features, delim):
	x = fo.file_to_matrix(file, delim)
	x_clus = cluster(x, features)
	# we only care about data we can cluster
	x_clus = prune_no_cluster_data(x_clus)
	return x_clus

"""
Prints cluster information for the tweet data passed in. Only tweets which
are assigned a cluster in the DBSCAN algorithm are printed. Tweets are
clustered based on latitude and longitude data
"""
if __name__ == '__main__':
	if select.select([sys.stdin,],[],[],0.0)[0]:
	    file = sys.stdin
	else:
		if len(sys.argv) != 2:
			print("Usage: " + sys.argv[0] + " <data file> [OR] <stdin>")
			exit(-1)
		else:
			file = open(sys.argv[1], 'r')
			assert(file != None)
	
	x_clus = cluster_from_file(file, [0, 1], constants.delim)
	fo.print_matrix(x_clus)
예제 #2
0

def cluster_from_file(file, features, delim):
    x = fo.file_to_matrix(file, delim)
    x_clus = cluster(x, features)
    # we only care about data we can cluster
    x_clus = prune_no_cluster_data(x_clus)
    return x_clus


"""
Prints cluster information for the tweet data passed in. Only tweets which
are assigned a cluster in the DBSCAN algorithm are printed. Tweets are
clustered based on latitude and longitude data
"""
if __name__ == '__main__':
    if select.select([
            sys.stdin,
    ], [], [], 0.0)[0]:
        file = sys.stdin
    else:
        if len(sys.argv) != 2:
            print("Usage: " + sys.argv[0] + " <data file> [OR] <stdin>")
            exit(-1)
        else:
            file = open(sys.argv[1], 'r')
            assert (file != None)

    x_clus = cluster_from_file(file, [0, 1], constants.delim)
    fo.print_matrix(x_clus)
예제 #3
0
	return x

"""
Removes the column specified by TEXT_COL
"""
def strip_text(x, txt_col):
	return np.delete(x, txt_col, axis = 1)

"""
A wrapper to the ANALYZE function, but reads the data in from a file
"""
def analyze_file(file, delim, text_col, include_zero_polarity):
	x = fo.file_to_matrix(file, delim)
	x_sentiment = analyze(x, text_col, include_zero_polarity)
	return x_sentiment

if __name__ == '__main__':
	if select.select([sys.stdin,],[],[],0.0)[0]:
	    file = sys.stdin
	else:
		if len(sys.argv) != 2:
			print("Usage: " + sys.argv[0] + " <data file> [OR] <stdin>")
			exit(-1)
		else:
			file = open(sys.argv[1], 'r')
			assert(file != None)
	
	x = analyze_file(file, constants.delim, -1, False)
	x = strip_text(x, -3)
	fo.print_matrix(x)
예제 #4
0
                    cnt = cnt + 1
                    if cnt % 1000 == 0:
                        sys.stderr.write("logged " + str(cnt) + " tweets\n")
                        sys.stderr.flush()
                    if cnt > n_tweets:
                        break

    return np.matrix(tweets)


"""
Prints tweet data to stdout
"""
if __name__ == '__main__':
    if len(sys.argv) != 6:
        print("Usage: " + sys.argv[0] +
              " <num tweets> <S.W. long> <S.W. lat> <N.E. long> <N.E. lat>")
        exit(-1)

    # build the coordinates in the correct order
    for i in range(2, 6):
        sys.argv[i] = int(sys.argv[i])
    coords = []
    coords.append(min(sys.argv[2], sys.argv[4]))
    coords.append(min(sys.argv[3], sys.argv[5]))
    coords.append(max(sys.argv[2], sys.argv[4]))
    coords.append(max(sys.argv[3], sys.argv[5]))

    tweets = get_tweets(int(sys.argv[1]), coords)
    fo.print_matrix(tweets)
예제 #5
0
    return np.delete(x, txt_col, axis=1)


"""
A wrapper to the ANALYZE function, but reads the data in from a file
"""


def analyze_file(file, delim, text_col, include_zero_polarity):
    x = fo.file_to_matrix(file, delim)
    x_sentiment = analyze(x, text_col, include_zero_polarity)
    return x_sentiment


if __name__ == '__main__':
    if select.select([
            sys.stdin,
    ], [], [], 0.0)[0]:
        file = sys.stdin
    else:
        if len(sys.argv) != 2:
            print("Usage: " + sys.argv[0] + " <data file> [OR] <stdin>")
            exit(-1)
        else:
            file = open(sys.argv[1], 'r')
            assert (file != None)

    x = analyze_file(file, constants.delim, -1, False)
    x = strip_text(x, -3)
    fo.print_matrix(x)
예제 #6
0
					coords = tweet['coordinates']['coordinates']
					tweets.append([coords[1], coords[0], text])
					cnt = cnt + 1
					if cnt % 1000 == 0:
					    sys.stderr.write("logged " + str(cnt) + " tweets\n")
					    sys.stderr.flush()
					if cnt > n_tweets:
						break

	return np.matrix(tweets)

"""
Prints tweet data to stdout
"""
if __name__ == '__main__':
	if len(sys.argv) != 6:
		print("Usage: " + sys.argv[0] + " <num tweets> <S.W. long> <S.W. lat> <N.E. long> <N.E. lat>")
		exit(-1)
	
	# build the coordinates in the correct order
	for i in range(2, 6):
		sys.argv[i] = int(sys.argv[i])
	coords = []
	coords.append(min(sys.argv[2], sys.argv[4]))
	coords.append(min(sys.argv[3], sys.argv[5]))
	coords.append(max(sys.argv[2], sys.argv[4]))
	coords.append(max(sys.argv[3], sys.argv[5]))

	tweets = get_tweets(int(sys.argv[1]), coords)
	fo.print_matrix(tweets)