def main(): outputFile = open(cleanedPath, "w") with open(tweetPath, "r") as tweetLines: for line in tweetLines: possibleTweet = parseJson(line) if isATweet(possibleTweet): outputFile.write("%s\n" % getTextAndTimestamp(possibleTweet)) outputFile.write("%s tweets contained unicode." % getTextAndTimestamp.tweetsWithUnicode)
def main(): outputFile = open(feature2FilePath, "w") with open(tweetPath, "r") as tweetLines: for line in tweetLines: possibleTweet = parseJson(line) #check if is tweet. Assume valid tweets cotains 'created_at' and 'entities.hashtags' according to https://dev.twitter.com/overview/api/tweets field guide if (('created_at' in possibleTweet) and ('entities' in possibleTweet) and ('hashtags' in possibleTweet['entities'])): graph = updateGrapWith(possibleTweet) avgDegree = float(0) if graph: #remove multiple adjacencies use to track mutiple tweets trueGraph = { vertice: set(edges) for (vertice, edges) in graph.iteritems() } #calculate avg_deg avgDegree = float(sum([len(edges) for edges in trueGraph.values()])) / len(graph) average_degree = "{0:.2f}".format(avgDegree) outputFile.write('%s\n' % average_degree)