示例#1
0
    def get_recommendations(keywords):
        df = pd.read_csv('richCityData.csv')

        score_dict = {}

        for index, row in df.iterrows():
            score_dict[index] = CosineSimilarity.cosine_similarity_of(row['description'], keywords)

        #sort cities by score and index.
        sorted_scores = sorted(score_dict.items(), key=operator.itemgetter(1), reverse=True)

        counter = 0

        #create an empty results data frame.
        resultDF = pd.DataFrame(columns=('city', 'popularity', 'description', 'image'))

        #get highest scored 5 cities.
        for i in sorted_scores:
            print(i[0], i[1])
            resultDF = resultDF.append({'city': df.iloc[i[0]]['city'], 'popularity': df.iloc[i[0]]['popularity'], 'description': df.iloc[i[0]]['description'], 'image': df.iloc[i[0]]['image']}, ignore_index=True)
            counter += 1

            if counter>4:
                break

        #convert DF to json.
        json_result = json.dumps(resultDF.to_dict('records'))
        return json_result
示例#2
0
    def test_cosine_similarity_same(self):
        text1 = "happy birthday"
        text2 = "happy birthday"
        cs = CosineSimilarity.cosine_similarity_of(text1, text2)

        #strings used due to floating number problem.
        self.assertEqual("%.2f" % cs, "1.00")
示例#3
0
    def test_cosine_similarity_different(self):
        text1 = "hello sir"
        text2 = "good afternoon"
        cs = CosineSimilarity.cosine_similarity_of(text1, text2)

        #strings used due to floating number problem.
        self.assertEqual("%.2f" % cs, "0.00")
示例#4
0
    def get_recommendations_include_rating_count_threshold_positive_negative_reviews(
            keywords):
        df = pd.read_csv('city_data_cleared.csv')

        score_dict = {}

        for index, row in df.iterrows():
            cs_score = CosineSimilarity.cosine_similarity_of(
                row['description'], keywords)

            rating = row['rating']
            rating_count = row['rating_count']
            positive_review_count = row['positive_review']
            negative_review_count = row['negative_review']
            rating_contribution = RatingExtractor.get_rating_weight_with_count_and_reviews(
                rating, rating_count, positive_review_count,
                negative_review_count)

            final_score = RecommenderEngine.calculate_final_score(
                cs_score, rating_contribution)

            score_dict[index] = final_score

        #sort cities by score and index.
        sorted_scores = sorted(score_dict.items(),
                               key=operator.itemgetter(1),
                               reverse=True)

        counter = 0

        #create an empty results data frame.
        resultDF = pd.DataFrame(columns=('city', 'popularity', 'description',
                                         'score'))

        #get highest scored 5 cities.
        for i in sorted_scores:
            #print index and score of the city.
            #print(i[0], i[1])
            resultDF = resultDF.append(
                {
                    'city': df.iloc[i[0]]['city'],
                    'popularity': df.iloc[i[0]]['popularity'],
                    'description': df.iloc[i[0]]['description'],
                    'score': i[1]
                },
                ignore_index=True)
            counter += 1

            if counter > 4:
                break

        #convert DF to json.
        json_result = json.dumps(resultDF.to_dict('records'))
        return json_result
示例#5
0
def get_recommendations(resume, jobs_df):
    score_dict = {}

    for index, row in jobs_df.iterrows():
        score_dict[index] = CosineSimilarity.cosine_similarity_of(
            row['description_cleaned'], resume)

    # Sort descriptions by score and index
    sorted_scores = sorted(score_dict.items(),
                           key=operator.itemgetter(1),
                           reverse=True)
    counter = 0

    # Create results data frame
    resultDF = pd.DataFrame(columns=[
        'Job Index', 'Company', 'Title', 'Location', 'Description',
        'Job Description'
    ])  # , 'score'])

    # Get the 10 jobs with the highest similarity scores
    for i in sorted_scores:
        # print index & score of the job description
        resultDF = resultDF.append(
            {
                'Description': jobs_df.iloc[i[0]]['job_description'],
                'Title': jobs_df.iloc[i[0]]['title'],
                'Company': jobs_df.iloc[i[0]]['company_name'],
                'Location': jobs_df.iloc[i[0]]['location'],
                'Job Index': jobs_df.iloc[i[0]]['Unnamed: 0']
            },
            ignore_index=True)
        # 'score': i[1]}, ignore_index=True)
        counter += 1

        if counter > 10:
            break

    json_result = json.dumps(resultDF.to_dict('records'))
    resultDF.fillna('', inplace=True)
    return resultDF
示例#6
0
    def get_rating_recommendations(keywords):
        df = pd.read_csv('ratingRichCityData.csv')

        score_dict = {}

        for index, row in df.iterrows():
            cs = CosineSimilarity.cosine_similarity_of(row['description'], keywords)
            rating = row['rating']
            rating_count = row['rating_count']
            positive_review_count = row['positive_review']
            negative_review_count = row['negative_review']

            rat_value = RatingExtractor.get_rating_with_count_and_reviews(rating,rating_count,positive_review_count,negative_review_count)

            score = RecommenderEngine.calculate_score_from(cs,rat_value)

            score_dict[index] = score

        sorted_scores = sorted(score_dict.items(), key=operator.itemgetter(1), reverse=True)

        counter = 0

        resultDF = pd.DataFrame(columns=('city', 'popularity', 'description', 'image'))

        #get highest scored 5 cities.
        for i in sorted_scores:
            print(i[0], i[1])
            resultDF = resultDF.append({'city': df.iloc[i[0]]['city'], 'popularity': df.iloc[i[0]]['popularity'], 'description': df.iloc[i[0]]['description'], 'image': df.iloc[i[0]]['image']}, ignore_index=True)
            counter += 1

            if counter>4:
                break

        #convert DF to json.
        json_result = json.dumps(resultDF.to_dict('records'))
        return json_result
示例#7
0
    def test_cosine_similarity_some(self):
        text1 = "apple banana orange"
        text2 = "orange berry ananas"
        cs = CosineSimilarity.cosine_similarity_of(text1, text2)

        self.assertEqual("%.2f" % cs, "0.33")