Exemplo n.º 1
0
    def test_insert_media(self):
        """ Tests if media is inserted and retrieved correctly """

        # can we find the movie?
        bat = queries.find_media_by_asin('0440419395')
        self.is_batman_movie(bat)

        bat = queries.find_media_by_title('Batman: The Return of the Force')
        self.is_batman_movie(bat[0])

        bat = queries.find_media_by_creator('Dario')
        self.is_batman_movie(bat[0])
Exemplo n.º 2
0
    def test_clean(self):
        """ Tests cleaning the database of comments and emotions """

        bat = queries.find_media_by_asin('0440419395')
        queries.insert_comment(bat.media_id, 11, 7, 0.9, 0.4, -0.5, 0.6, 0.3)

        queries.clean_media(bat.media_id)

        comments = queries.find_comments_for_media(bat.media_id)
        emotions = queries.find_emotions_for_media(bat.media_id)

        self.assertEqual(len(comments), 0)
        self.assertEqual(len(emotions), 0)
Exemplo n.º 3
0
def handleReview(asin, list_of_review_dicts, productapi, producttype):
    global i
    product_dict = dict()
    product_dict["comments"] = list()

    try:
        product_dict = add_amazon_info_to_dict(asin, product_dict)
    except AmazonInfoNotFoundError:
        print("Couldn't find amazon info for product", i, " skipping")
        return
    # add the ASIN to the dict
    product_dict["asin"] = asin
    product_dict["type"] = producttype

    product = queries.find_media_by_asin(asin)
    if product is None:
        queries.insert_media(product_dict["title"], product_dict["creator"], product_dict["description"], producttype, asin, int(time.time()))
    else:
        queries.clean_media(product.media_id)
        queries.update_media(product.media_id, int(time.time()))


    for review in list_of_review_dicts:
        comment_dict = dict()
        # if these dont exist in some of them, then so help me god
        comment_dict["rating"] = review["overall"]
        comment_dict["helpful"] = review["helpful"]
        comment_dict["unixtime"] = int(review["unixReviewTime"])
        comment_dict["text"] = review["reviewText"]
        product_dict["comments"].append(comment_dict)

    # now process this dict in comment_processing
    filename = product_dict["title"] + "$$$" + asin

    processed_dict_dict = dict()
    try:
        processed_dict = calculateVectorsForAllComments(product_dict, g)
    except NoEmotionsFoundError:
        # REMOVE the media from the table since we don't want it anymore
        queries.remove_media(asin)
        print("couldnt find any emotions for product: ", i, "Skipping")
        return
    #create the summary
    processed_dict["summary"] = html.unescape(return_summary(processed_dict))

    processed_json = json.dumps(processed_dict, indent=4)

    print ("Adding product with asin: ", asin, "to S3 ---", i)
    push_to_S3(filename, processed_json)
Exemplo n.º 4
0
    def test_updating_media(self):
        """ Tests updating media and media emotions """

        bat = queries.find_media_by_asin('0440419395')

        # testing updating the emotions
        queries.insert_media_emotion(bat.media_id, 'cool')
        queries.insert_media_emotion(bat.media_id, 'dark')

        emotions = queries.find_emotions_for_media(bat.media_id)

        self.assertTrue('dark' in emotions, \
                        "Didn't find all emotions for media")
        self.assertTrue('cool' in emotions, \
                        "Didn't find all emotions for media")

        # test updating the last_updated column
        queries.update_media(bat.media_id, 20)

        self.assertEqual(bat.last_updated, 20, \
                         'Did not updated date properly')
def calculateVectorsForAllComments(dictFromJSON, g):
    compound_emotion_dict = collections.defaultdict(int)
    sentic_emotion_dict = collections.defaultdict(int)

    processed_comments = list()
    overall_rating = 0.0

    # the product model from the DB
    product = queries.find_media_by_asin(dictFromJSON["asin"])

    tokenized_docs = buildListOfTokenizedDocuments(dictFromJSON)
    for comment in dictFromJSON["comments"]:
        vectorized_comment = calculateVector(tokenizeDocument(comment["text"]), tokenized_docs)
        vectorized_desc = calculateVector(tokenizeDocument(dictFromJSON["description"]), tokenized_docs)
        comment["vector_space"] = vectorized_comment
        relevancy = getCosine(vectorized_comment, vectorized_desc)

        if relevancy < 0.15:
            continue

        comment["relevancy"] = relevancy

        # add emotional score
        try:
            comment_emotion = emotions(comment["text"], g)
        except ConceptError:
            print("Not enough concepts to do anything useful - skipping this product")
            continue

        comment["emotion_vector"] = comment_emotion.emotion_vector

        compound_emotions = comment_emotion.get_compound_emotion()
        sentic_values = comment_emotion.get_all_sentic_values()

        sentic_values = [value.name for value in sentic_values if value is not None]

        compound_emotions_list = []
        for compound_emotion, strength in compound_emotions:
            compound_emotions_list.append(
                {"compound_emotion": compound_emotion.name, "strength": strength.name}
            )
        comment["compound_emotions"] = compound_emotions_list

        comment["sentic_emotions"] = sentic_values

        # add all compound_emotions to the default dictFromJSON
        for compound in comment["compound_emotions"]:
            compound_emotion_dict[compound["compound_emotion"]] += 1

        for sentic in comment["sentic_emotions"]:
            sentic_emotion_dict[sentic] += 1

        overall_rating += float(comment["rating"])

        # add the comment to the database
        # insert_comment(item_id, relevancy, pleasantness, attention, sensitivity, aptitude, polarity, date)
        queries.insert_comment(product.media_id,
                                comment["unixtime"],
                                comment["relevancy"],
                                comment["emotion_vector"]["pleasantness"],
                                comment["emotion_vector"]["attention"],
                                comment["emotion_vector"]["sensitivity"],
                                comment["emotion_vector"]["aptitude"],
                                comment["emotion_vector"]["polarity"])

        comment["text"] = html.unescape(comment["text"])
        processed_comments.append(comment)

    popular_compound_emotions = []
    if len(compound_emotion_dict) == 0:
        raise NoEmotionsFoundError("No compound emotions found")

    for i in range(0, 3):
        popular_emotion = max(compound_emotion_dict, key=compound_emotion_dict.get)
        popular_compound_emotions.append(popular_emotion)
        compound_emotion_dict.pop(popular_emotion)

    popular_sentic_emotions = []
    if len(sentic_emotion_dict) == 0:
        raise NoEmotionsFoundError("No sentic emotions found")

    for i in range(0, 3):
        popular_sentic = max(sentic_emotion_dict, key=sentic_emotion_dict.get)
        popular_sentic_emotions.append(popular_sentic)
        sentic_emotion_dict.pop(popular_sentic)
        # add emotion to the database
        queries.insert_media_emotion(product.media_id, popular_sentic)

    dictFromJSON["popular_compound_emotions"] = popular_compound_emotions
    dictFromJSON["popular_sentic_emotions"] = popular_sentic_emotions

    if len(processed_comments) > 0:
        rating = overall_rating / len(processed_comments)
        rating = float("{0:.2f}".format(rating))
        dictFromJSON["overall_rating"] = rating

    dictFromJSON["comments"] = sort_list_of_dicts(processed_comments)

    # get rid of escaped html characters in description
    dictFromJSON["description"] = html.unescape(dictFromJSON["description"])

    return dictFromJSON