the clustering. FEATURES is an array corresponding to the columns to use for clustering. For example, FEATURES = [0, 2] will use columns 0 and 2. Each column being clustered is assumed to have numeric values. DELIM specifies the text delimiter to use to break the file apart """ def cluster_from_file(file, features, delim): x = fo.file_to_matrix(file, delim) x_clus = cluster(x, features) # we only care about data we can cluster x_clus = prune_no_cluster_data(x_clus) return x_clus """ Prints cluster information for the tweet data passed in. Only tweets which are assigned a cluster in the DBSCAN algorithm are printed. Tweets are clustered based on latitude and longitude data """ if __name__ == '__main__': if select.select([sys.stdin,],[],[],0.0)[0]: file = sys.stdin else: if len(sys.argv) != 2: print("Usage: " + sys.argv[0] + " <data file> [OR] <stdin>") exit(-1) else: file = open(sys.argv[1], 'r') assert(file != None) x_clus = cluster_from_file(file, [0, 1], constants.delim) fo.print_matrix(x_clus)
def cluster_from_file(file, features, delim): x = fo.file_to_matrix(file, delim) x_clus = cluster(x, features) # we only care about data we can cluster x_clus = prune_no_cluster_data(x_clus) return x_clus """ Prints cluster information for the tweet data passed in. Only tweets which are assigned a cluster in the DBSCAN algorithm are printed. Tweets are clustered based on latitude and longitude data """ if __name__ == '__main__': if select.select([ sys.stdin, ], [], [], 0.0)[0]: file = sys.stdin else: if len(sys.argv) != 2: print("Usage: " + sys.argv[0] + " <data file> [OR] <stdin>") exit(-1) else: file = open(sys.argv[1], 'r') assert (file != None) x_clus = cluster_from_file(file, [0, 1], constants.delim) fo.print_matrix(x_clus)
return x """ Removes the column specified by TEXT_COL """ def strip_text(x, txt_col): return np.delete(x, txt_col, axis = 1) """ A wrapper to the ANALYZE function, but reads the data in from a file """ def analyze_file(file, delim, text_col, include_zero_polarity): x = fo.file_to_matrix(file, delim) x_sentiment = analyze(x, text_col, include_zero_polarity) return x_sentiment if __name__ == '__main__': if select.select([sys.stdin,],[],[],0.0)[0]: file = sys.stdin else: if len(sys.argv) != 2: print("Usage: " + sys.argv[0] + " <data file> [OR] <stdin>") exit(-1) else: file = open(sys.argv[1], 'r') assert(file != None) x = analyze_file(file, constants.delim, -1, False) x = strip_text(x, -3) fo.print_matrix(x)
cnt = cnt + 1 if cnt % 1000 == 0: sys.stderr.write("logged " + str(cnt) + " tweets\n") sys.stderr.flush() if cnt > n_tweets: break return np.matrix(tweets) """ Prints tweet data to stdout """ if __name__ == '__main__': if len(sys.argv) != 6: print("Usage: " + sys.argv[0] + " <num tweets> <S.W. long> <S.W. lat> <N.E. long> <N.E. lat>") exit(-1) # build the coordinates in the correct order for i in range(2, 6): sys.argv[i] = int(sys.argv[i]) coords = [] coords.append(min(sys.argv[2], sys.argv[4])) coords.append(min(sys.argv[3], sys.argv[5])) coords.append(max(sys.argv[2], sys.argv[4])) coords.append(max(sys.argv[3], sys.argv[5])) tweets = get_tweets(int(sys.argv[1]), coords) fo.print_matrix(tweets)
return np.delete(x, txt_col, axis=1) """ A wrapper to the ANALYZE function, but reads the data in from a file """ def analyze_file(file, delim, text_col, include_zero_polarity): x = fo.file_to_matrix(file, delim) x_sentiment = analyze(x, text_col, include_zero_polarity) return x_sentiment if __name__ == '__main__': if select.select([ sys.stdin, ], [], [], 0.0)[0]: file = sys.stdin else: if len(sys.argv) != 2: print("Usage: " + sys.argv[0] + " <data file> [OR] <stdin>") exit(-1) else: file = open(sys.argv[1], 'r') assert (file != None) x = analyze_file(file, constants.delim, -1, False) x = strip_text(x, -3) fo.print_matrix(x)
coords = tweet['coordinates']['coordinates'] tweets.append([coords[1], coords[0], text]) cnt = cnt + 1 if cnt % 1000 == 0: sys.stderr.write("logged " + str(cnt) + " tweets\n") sys.stderr.flush() if cnt > n_tweets: break return np.matrix(tweets) """ Prints tweet data to stdout """ if __name__ == '__main__': if len(sys.argv) != 6: print("Usage: " + sys.argv[0] + " <num tweets> <S.W. long> <S.W. lat> <N.E. long> <N.E. lat>") exit(-1) # build the coordinates in the correct order for i in range(2, 6): sys.argv[i] = int(sys.argv[i]) coords = [] coords.append(min(sys.argv[2], sys.argv[4])) coords.append(min(sys.argv[3], sys.argv[5])) coords.append(max(sys.argv[2], sys.argv[4])) coords.append(max(sys.argv[3], sys.argv[5])) tweets = get_tweets(int(sys.argv[1]), coords) fo.print_matrix(tweets)