def test_textoutput(self): ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.text.csv.ref') with TemporaryDirectory() as tempdir: outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv') json2csv(self.infile, outfn, ['text'], gzip_compress=False) self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
def test_user_metadata(self): ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.user.csv.ref') fields = ['id', 'text', 'user.id', 'user.followers_count', 'user.friends_count'] with TemporaryDirectory() as tempdir: outfn = os.path.join(tempdir, 'tweets.20150430-223406.user.csv') json2csv(self.infile, outfn, fields, gzip_compress=False) self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
def test_file_is_wrong(self): """ Sanity check that file comparison is not giving false positives. """ ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.retweet.csv.ref') with TemporaryDirectory() as tempdir: outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv') json2csv(self.infile, outfn, ['text'], gzip_compress=False) self.assertFalse(are_files_identical(outfn, ref_fn), msg=self.msg)
def test_tweet_metadata(self): ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.tweet.csv.ref') fields = ['created_at', 'favorite_count', 'id', 'in_reply_to_status_id', 'in_reply_to_user_id', 'retweet_count', 'retweeted', 'text', 'truncated', 'user.id'] with TemporaryDirectory() as tempdir: outfn = os.path.join(tempdir, 'tweets.20150430-223406.tweet.csv') json2csv(self.infile, outfn, fields, gzip_compress=False) self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
def convert_csv_tweet_file(self, input_file, args=[ 'created_at', 'favorite_count', 'id', 'in_reply_to_status_id', 'in_reply_to_user_id', 'retweet_count', 'text', 'truncated', 'user.id' ]): with open(input_file) as file: json2csv(file, path + 'tweets_text.csv', args) return open(path + 'tweets_text.csv', 'r').readlines()
def test_user_metadata(self): ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.user.csv.ref') fields = [ 'id', 'text', 'user.id', 'user.followers_count', 'user.friends_count' ] with TemporaryDirectory() as tempdir: outfn = os.path.join(tempdir, 'tweets.20150430-223406.user.csv') json2csv(self.infile, outfn, fields, gzip_compress=False) self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
def test_tweet_metadata(self): ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.tweet.csv.ref') fields = [ 'created_at', 'favorite_count', 'id', 'in_reply_to_status_id', 'in_reply_to_user_id', 'retweet_count', 'retweeted', 'text', 'truncated', 'user.id' ] with TemporaryDirectory() as tempdir: outfn = os.path.join(tempdir, 'tweets.20150430-223406.tweet.csv') json2csv(self.infile, outfn, fields, gzip_compress=False) self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
from nltk.twitter.util import json2csv input_file = "tweets.20150430-223406.json" with open(input_file) as fp: json2csv(fp, 'tweets_text.csv', ['text'])