예제 #1
0
def main():
    outputFile = open(cleanedPath, "w")

    with open(tweetPath, "r") as tweetLines:
        for line in tweetLines:
            possibleTweet = parseJson(line)
            if isATweet(possibleTweet):
                outputFile.write("%s\n" % getTextAndTimestamp(possibleTweet))

    outputFile.write("%s tweets contained unicode." % getTextAndTimestamp.tweetsWithUnicode)
예제 #2
0
def main():
	outputFile = open(feature2FilePath, "w")
	
	with open(tweetPath, "r") as tweetLines:
		for line in tweetLines:
			possibleTweet = parseJson(line)
			
			#check if is tweet. Assume valid tweets cotains 'created_at' and 'entities.hashtags' according to https://dev.twitter.com/overview/api/tweets field guide
			if (('created_at' in possibleTweet) and ('entities' in possibleTweet) and ('hashtags' in possibleTweet['entities'])):	
				graph = updateGrapWith(possibleTweet)
				avgDegree = float(0)
				if graph:
					#remove multiple adjacencies use to track mutiple tweets
					trueGraph = { vertice: set(edges) for (vertice, edges) in graph.iteritems() }
					#calculate avg_deg
					avgDegree = float(sum([len(edges) for edges in trueGraph.values()])) / len(graph)
					
				average_degree = "{0:.2f}".format(avgDegree)	
				
				outputFile.write('%s\n' % average_degree)