Exemplo n.º 1
0
def extractJson(fileName):
    """ Extract tweets from JSON file into a list of Tweets """
    tweetList = list()
    try:
        file = open(fileName, encoding='utf-8')
        json_str = file.read()
        data = json.loads(json_str)

        for tweet in data['results']:
            nTweet = Tweet()
            nTweet.id = tweet['id']
            nTweet.userId = tweet['from_user_id']
            nTweet.text = tweet['text']
            nTweet.user = tweet['from_user']
            nTweet.userName = tweet['from_user_name']
            nTweet.profileImgUrlHttp = tweet['profile_image_url']
            nTweet.source = tweet['source']
            nTweet.toUser = tweet['to_user']
            nTweet.date = tweet['created_at']
            for mention in tweet['entities']['user_mentions']:
                nTweet.userMentions.append(mention["id"])
            tweetList.append(nTweet)
                
        file.close()

    except(ValueError):
        sys.exit("Error while parsing{0}".format(fileName) + "Not a valid JSON file")

    return tweetList
Exemplo n.º 2
0
def extractTweets(fileName):
    """ Extract tweets from JSON file into a list of Tweets """
    tweetList = list()
    try:
        file = open(fileName)
        json_str = file.read()
        data = json.loads(json_str)
        print("Parsing " + fileName + " ...")
        for tweet in data["results"]:
            nTweet = Tweet()
            nTweet.id = tweet["id"]
            nTweet.userId = tweet["from_user_id"]
            nTweet.text = tweet["text"]
            nTweet.user = tweet["from_user"]
            nTweet.userName = tweet["from_user_name"]
            nTweet.profileImgUrlHttp = tweet["profile_image_url"]
            nTweet.source = tweet["source"]
            nTweet.toUser = tweet["to_user"]
            nTweet.date = tweet["created_at"]

            if "urls" in tweet["entities"]:
                for urls in tweet["entities"]["urls"]:
                    urlStr = urls["url"]
                    expandedUrl = urls["expanded_url"]
                    try:
                        u = urllib.urlopen(expandedUrl)
                        expandedUrl = u.url
                        u = None
                    except IOError as ioe:
                        print("Error urllib.urlopen")
                        print("---> URL = {}".format(expandedUrl))
                        continue
                    nTweet.urls.append(expandedUrl)

            for mention in tweet["entities"]["user_mentions"]:
                nTweet.userMentions.append(
                    {"id": mention["id"], "name": mention["name"], "screenName": mention["screen_name"]}
                )
            tweetList.append(nTweet)
        file.close()

    except (ValueError):
        sys.exit("Error while parsing {0}".format(fileName) + " Not a valid JSON file")

    return tweetList