Пример #1
0
def calculate_top_5(app, user_download_history):
    app_similarity = {}

    for apps in user_download_history:
        similarity = Helper.cosine_similarity([app], apps)
        for other_app in apps:
            if app_similarity.has_key(other_app):
                app_similarity[
                    other_app] = app_similarity[other_app] + similarity
            else:
                app_similarity[other_app] = similarity

    if not app_similarity.has_key(app):
        return

    app_similarity.pop(app)
    sorted_tups = sorted(app_similarity.items(),
                         key=operator.itemgetter(1),
                         reverse=True)
    top_5_app = [
        sorted_tups[0][0], sorted_tups[1][0], sorted_tups[2][0],
        sorted_tups[3][0], sorted_tups[4][0]
    ]
    print("top_5_app for" + str(app) + ":\t" + str(top_5_app))
    # store the top 5
    DataService.update_app_info({'app_id': app},
                                {'$set': {
                                    'top_5_app': top_5_app
                                }})
def main():
    try:
        # print to a file
        # f = file('log.txt', 'w')
        # sys.stdout = f

        # get MongoDB client and set it in DataService
        client = MongoClient('localhost', 27017) # or client = MongoClient('mongodb://localhost:27017/')
        DataService.init(client)

        # persist_download_history_and_app_info()
        user_download_history = load_user_download_history()
        app_info = load_app_info()
        persist_top_5_apps_for_app(user_download_history, app_info)

        # print for testing
        #print "app_info: \n  " + str(load_top_5_apps('C10107104'))

    except Exception as e:
        print "Exception! Go fix it!"
        print e

    finally:
        # clean up work
        if 'client' in locals():
            client.close()
    def run(self, app, download_history):
        # This need serious optimization where we calculate similarity as A->B B->A is always dup
        # Ideally loop through all download history

        # create a dict to store each other app and its similarity to this app
        app_similarity = {} # {app_id: similarity}

        for apps in download_history:
            if app not in apps:
                continue
            #calculate the similarity
            similarity = Helper.cosine_similarity_app_applist(app, apps)
            for other_app in apps:
                if other_app == app:
                    continue
                if app_similarity.has_key(other_app):
                    app_similarity[other_app] = app_similarity[other_app] + similarity
                else:
                    app_similarity[other_app] = similarity

        # ignore apps not in any download history
        if len(app_similarity) == 0:
            return

        # sort app_similarity dict by value and get the top 5 as recommendation
        sorted_tups = self.__sort_dict_by_value(app_similarity)
        #print sorted_tups
        top_5_app = [sorted_tups[0][0], sorted_tups[1][0], sorted_tups[2][0], sorted_tups[3][0], sorted_tups[4][0]]
        #print "top_5_app for " + str(self.app) + ":\t" + str(top_5_app)

        # store the top 5
        DataService.update_app_info({'app_id': app}, {'$set': {'top_5_app': top_5_app}})
Пример #4
0
def calculate_app_top_5(app, all_download_history):
    # create a dict to store each other app and its similarity to this app_list2
    app_similarity = {} # {app_id: similarity}

    for apps in all_download_history:
        # calculate the similarity of the app and the user download history
        similarity = Helper.cosine_similarity([app], apps)
        for other_app in apps:
            if app_similarity.has_key(other_app):
                app_similarity[other_app] = app_similarity[other_app] + similarity
            else:
                app_similarity[other_app] = similarity

    # there could be app without related app (not in any download history)
    if not app_similarity.has_key(app):
        return

    # sort app_similarity dict by value and get the top 5 as recommendation
    app_similarity.pop(app)
    #sort by similarity
    sorted_tups = sorted(app_similarity.items(), key=operator.itemgetter(1), reverse=True)
    top_5_app = [sorted_tups[0][0], sorted_tups[1][0], sorted_tups[2][0], sorted_tups[3][0], sorted_tups[4][0]]
    # print("top_5_app for " + str(app) + ":\t" + str(top_5_app))

    DataService.update_app_info({'app_id' : app}, {'$set' : {'top_5_app': top_5_app}})
Пример #5
0
def calculate_top_5(app, user_download_history):
    '''
    cosine_similarity between an App and user's history
    '''
    #create a dict to store each other app and its similarity to this app
    app_similarity = collections.defaultdict(float)  #{app_id: similarity}
    for apps in user_download_history:
        #calculate the similarity
        similarity = Helper.cosine_similarity([app], apps)
        # accumluate similarity
        for other_app in apps:
            app_similarity[other_app] += similarity

    # There could be app without related apps (not in any download history)
    if not app in app_similarity:
        return

    #sort app_similarity dict by value and get the top 5 as recommendation
    app_similarity.pop(app)
    sorted_tups = sorted(app_similarity.items(),
                         key=operator.itemgetter(1),
                         reverse=True)  #sort by similarity
    top_5_app = [
        sorted_tups[0][0], sorted_tups[1][0], sorted_tups[2][0],
        sorted_tups[3][0], sorted_tups[4][0]
    ]
    #print("top_5_app for " + str(app) + ":\t" + str(top_5_app))

    #store the top 5
    DataService.update_app_info({'app_id': app},
                                {'$set': {
                                    'top_5_app': top_5_app
                                }})
Пример #6
0
def main():
    try:
        # print to a file
        # f = file('log.txt', 'w')
        # sys.stdout = f

        # get MongoDB client and set it in DataService
        client = MongoClient(
            'localhost',
            27017)  # or client = MongoClient('mongodb://localhost:27017/')
        DataService.init(client)

        # persist_download_history_and_app_info()
        user_download_history = load_user_download_history()
        app_info = load_app_info()
        persist_top_5_apps_for_app(user_download_history, app_info)

        # print for testing
        #print "app_info: \n  " + str(load_top_5_apps('C10107104'))

    except Exception as e:
        print "Exception! Go fix it!"
        print e

    finally:
        # clean up work
        if 'client' in locals():
            client.close()
Пример #7
0
def generate_recommendations_for_one_user(user_download_history, recommendations, one_user_id):
    all_app_id = recommendations.keys()
    recommended_app_list = []
    sim_score = []	

    for app in user_download_history[ one_user_id ]:
        if app in all_app_id:
            for one_sim_app in recommendations[app]:
                recommended_app_list.append(one_sim_app[0])
                sim_score.append(one_sim_app[1])	

    sorted_list = [x for (y,x) in sorted(zip(sim_score, recommended_app_list), key=lambda pair: pair[0], reverse = True)] 
    DataService.update_user_download_history({'user_id':one_user_id}, {'$set':{"recommended_apps":sorted_list}})
    return
Пример #8
0
def main():
    try:
        client = MongoClient('localhost', 27017)
        DataService.init(client)

        user_download_history = DataService.retrieve_user_download_history()
        app_info = DataService.retrieve_app_info()
        for app in app_info.keys():
            calculate_top_5(app, user_download_history.values())
    except Exception as e:
        print(e)
    finally:
        if 'client' in locals():
            client.close()
Пример #9
0
def run():
    musixMatch = Musixmatch(environ["API_KEY"])
    dataService = DataService()
    countryService = CountryService()

    # LANGUAGE
    languagetable = LanguageDetectionService().get_language_table(
        dataService.all_lyrics)
    languagetable.name = "language"

    # MUSIXMATCH
    entries = dataService.all_entries
    track_info = musixMatch.label_entries(entries)
    tracktable = DataCleanUpService.make_track_information_table(track_info)

    # COUNTRY
    countries = dataService.all_countries
    country_info = countryService.get_country_info(countries)
    countrytable = DataCleanUpService.make_country_information_table(
        dataService.all_countries, country_info)

    # COMBINE
    df = (dataService.contestants.join(languagetable, how="left").join(
        tracktable, how="left").merge(countrytable,
                                      how="left",
                                      left_on="to_country",
                                      right_on="to_country"))

    # OUTPUT
    df.to_pickle("contestdata.pickle")
    df.to_feather("contestdata.arrow")
    df.to_csv("contestantdata.csv", quoting=csv.QUOTE_NONNUMERIC)
Пример #10
0
def main():
    try:
        #get MongoDB client and set it in DataService
        client = MongoClient('localhost', 27017)
        DataService.init(client)
        #work flow
        user_download_history = DataService.retrieve_user_download_history()
        app_info = DataService.retrieve_app_info()
        for app in app_info.keys():
            calculate_top_5(app, user_download_history.values())
    except Exception as e:
        print(e)
    finally:
        #clean up work
        if 'client' in locals():
            client.close()
Пример #11
0
def main():
    try:
        client = MongoClient('localhost', 27017)
        DataService.init(client)

        user_download_history = DataService.retrieve_user_download_history()
        # calculate_top_5('C10063783', user_download_history.values())
        app_info = DataService.retrieve_app_info()
        for app in app_info.keys():
            calculate_top_5(app, user_download_history.values())

    except Exception as e:
        print(e)
    finally:
        if 'client' in locals():
            client.close()
Пример #12
0
def load_app_info():
    # retrieval work done here
    result = DataService.retrieve_app_info()

    # print app info
    #print "app_info: \n  " + str(result) + "\n"

    return result
Пример #13
0
def load_user_download_history():
    # retrieval work done here
    result = DataService.retrieve_user_download_history()

    # print user download history
    #print "user_download_history: \n  " + str(result) + "\n"

    return result
Пример #14
0
def main():
	try:
		#get MongoDB client and set it in DataService
		client = MongoClient('localhost', 27017)
		DataService.init(client)

		#work flow
		user_download_history = DataService.retrieve_user_download_history()
		#calculate_top_5('C10107104', user_download_history.values())

		app_info = DataService.retrieve_app_info()
		for app in app_info.keys():
			calculate_top_5(app, user_download_history.values())

	except Exception, e:
		print("Exception detected:")
		print(e)
def main():
    try:
        start = time.clock()
        client = MongoClient('localhost', 27017)
        DataService.init(client)

        user_download_history = DataService.retrieve_user_download_history()
        apps = DataService.retrieve_app_info()
        for app in apps.keys():
            calculate_top_5(app, user_download_history.values())
        end = time.clock()
        print "time: " + str(end - start)
    except Exception as e:
        print e
    finally:
        if 'client' in locals():
            client.close()
Пример #16
0
def main():
	try:
		# get MongoDB client and set it in DataService
		client = MongoClient('localhost', 27017)
		DataService.init(client)

		# work flow
		user_download_history = DataService.retrieve_user_download_history()
		app_info = DataService.retrieve_app_info()
		for app in app_info.keys():
			calculate_top_5(app, user_download_history.values())
	except Exception as e:
		print e
	finally:
		# clean up
		if 'client' in locals():
			client.close()
def load_user_download_history():
    # retrieval work done here
    result = DataService.retrieve_user_download_history()

    # print user download history
    #print "user_download_history: \n  " + str(result) + "\n"

    return result
def load_app_info():
    # retrieval work done here
    result = DataService.retrieve_app_info()

    # print app info
    #print "app_info: \n  " + str(result) + "\n"

    return result
Пример #19
0
def calculate_top_k(app, user_download_history, num):
    app_similarity = {}	
    count = 0
    for apps in user_download_history:
        similarity = Helper.cosine_similarity([app], apps)
        for other_app in apps:
            if app_similarity.has_key(other_app):
                app_similarity[other_app] = app_similarity[other_app] + similarity
            else:
            	app_similarity[other_app] = similarity
    #print(app_similarity)        	
    if not app_similarity.has_key(app):            	
        return False

    app_similarity.pop(app)
    sorted_tups = sorted(app_similarity.items(), key=operator.itemgetter(1), reverse=True)
    name = "top_" + str(num) + "_app"
    DataService.update_app_info({'app_id':app}, {'$set':{name:sorted_tups[:num]}})
    return True
Пример #20
0
def main():
    try:
        client = MongoClient('localhost', 27017)
        DataService.init(client)

        user_download_history = DataService.retrieve_user_download_history()
        all_app_id = DataService.get_all_app_id()

        for one_id in all_app_id:
            calculate_top_k(one_id, user_download_history.values(), 5)
        
        recommendations = DataService.retrieve_recommended_items()
        generate_recommendations(user_download_history, recommendations)

    except Exception as e:
        print(e)
    finally:
        if 'client' in locals():
            client.close()	
Пример #21
0
def calculate_user_top_5(user_id, user_download_history, all_download_history):
    app_similarity = {}

    for apps in all_download_history:
        similarity = Helper.cosine_similarity(user_download_history, apps)
        for other_app in apps:
            if app_similarity.has_key(other_app):
                app_similarity[other_app] += similarity
            else:
                app_similarity[other_app] = similarity

    for app in user_download_history:
        app_similarity.pop(app)


    sorted_tups = sorted(app_similarity.items(), key=operator.itemgetter(1), reverse=True)
    top_5_app = [sorted_tups[0][0], sorted_tups[1][0], sorted_tups[2][0], sorted_tups[3][0], sorted_tups[4][0]]
    print("top_5_app for " + str(user_id) + ":\t" + str(top_5_app))

    DataService.update_user_info({'user_id' : user_id}, {'$set' : {'top_5_app': top_5_app}})
Пример #22
0
def calculate_top_5(app, user_download_history):
	app_similarity = {}

	for apps in user_download_history:
		similarity = Helper.cosine_similarity([app], apps)
		for other_app in apps:
			if app_similarity.has_key(other_app):
				app_similarity[other_app] = app_similarity[other_app] + similarity
			else:
				app_similarity[other_app] = similarity

	if not app_similarity.has_key(app):
		return

	app_similarity.pop(app)
	sorted_tups = sorted(app_similarity.items(), key=operator.itemgetter(1), reverse=True)
	top_5_app = [sorted_tups[0][0], sorted_tups[1][0], sorted_tups[2][0], sorted_tups[3][0], sorted_tups[4][0]]
	print("top_5_app for" + str(app) + ":\t" + str(top_5_app))
	# store the top 5
	DataService.update_app_info({'app_id': app}, {'$set': {'top_5_app': top_5_app}})
Пример #23
0
def persist_download_history_and_app_info():
    # persist data into 2 tables
    # user_download_history - (user_id, download_history)
    # app_info              - (app_id, app_name)

    # clean up to remove all old data
    DataService.clean_up()

    # persist work done here
    DataService.read_then_persist('data/sample.data')

    # print for testing, comment out anything below within try block if you want

    # print db and collections, they will show only if any data was persisted
    print "dbs: " + str(DataService.client.database_names())
    db = DataService.db
    print "collections: " + str(db.collection_names()) + "\n"

    # print user_download_history
    user_download_history_cursor = DataService.get_cursor_user_download_history(
    )
    print "user_download_history: count=" + str(
        user_download_history_cursor.count())
    for user_download_history in user_download_history_cursor:
        print "  " + str(user_download_history)
    print ""

    # print app_info
    app_info_cursor = DataService.get_cursor_app_info()
    print "app_info: count=" + str(app_info_cursor.count())
    for app_info in app_info_cursor:
        print "  " + str(app_info)
    print ""
def persist_download_history_and_app_info():
    # persist data into 2 tables
    # user_download_history - (user_id, download_history)
    # app_info              - (app_id, app_name)

    # clean up to remove all old data
    DataService.clean_up()

    # persist work done here
    DataService.read_then_persist('data/sample.data')

    # print for testing, comment out anything below within try block if you want

    # print db and collections, they will show only if any data was persisted
    print "dbs: " + str(DataService.client.database_names())
    db = DataService.db
    print "collections: " + str(db.collection_names()) + "\n"

    # print user_download_history
    user_download_history_cursor = DataService.get_cursor_user_download_history()
    print "user_download_history: count=" + str(user_download_history_cursor.count())
    for user_download_history in user_download_history_cursor:
        print "  " + str(user_download_history)
    print ""

    # print app_info
    app_info_cursor = DataService.get_cursor_app_info()
    print "app_info: count=" + str(app_info_cursor.count())
    for app_info in app_info_cursor:
        print "  " + str(app_info)
    print ""
    def run(self, app, download_history):
        # This need serious optimization where we calculate similarity as A->B B->A is always dup
        # Ideally loop through all download history

        # create a dict to store each other app and its similarity to this app
        app_similarity = {}  # {app_id: similarity}

        for apps in download_history:
            if app not in apps:
                continue
            #calculate the similarity
            similarity = Helper.cosine_similarity_app_applist(app, apps)
            for other_app in apps:
                if other_app == app:
                    continue
                if app_similarity.has_key(other_app):
                    app_similarity[
                        other_app] = app_similarity[other_app] + similarity
                else:
                    app_similarity[other_app] = similarity

        # ignore apps not in any download history
        if len(app_similarity) == 0:
            return

        # sort app_similarity dict by value and get the top 5 as recommendation
        sorted_tups = self.__sort_dict_by_value(app_similarity)
        #print sorted_tups
        top_5_app = [
            sorted_tups[0][0], sorted_tups[1][0], sorted_tups[2][0],
            sorted_tups[3][0], sorted_tups[4][0]
        ]
        #print "top_5_app for " + str(self.app) + ":\t" + str(top_5_app)

        # store the top 5
        DataService.update_app_info({'app_id': app},
                                    {'$set': {
                                        'top_5_app': top_5_app
                                    }})
Пример #26
0
def main():
	start = time.clock()
	try:
		client = MongoClient('localhost', 27017)
		DataService.init(client)

		user_download_history = DataService.retrieve_user_download_history()
		app_info = DataService.retrieve_app_info()
		for app in app_info.keys():
			calculate_top_5(app, user_download_history.values())

	except Exception as e:
		print("Exception detected:")
		print(e)

	finally:
		if 'client' in locals():
			client.close()

	end = time.clock()

	print "time eplapsed = " + str(end - start)
def calculate_top_5(app, user_download_history):
    similarity = {}
    for u in user_download_history:
        if app in u:
            sim = Helper.cosine_similarity([app], u)
            for apps in u:
                if similarity.has_key(apps):
                    similarity[apps] = similarity[apps] + sim
                else:
                    similarity[apps] = sim

    # The app has not been downloaded, therefore no related history
    if not similarity.has_key(app):
        return

    similarity.pop(app)
    sorted_result = sorted(similarity.items(), key = operator.itemgetter(1), reverse = True)
    top_5 = []
    for i in range(5):
        top_5.append(sorted_result[i][0])
    print str(app) + " - top 5: " + str(top_5)
    DataService.update_app_info({'app_id': app}, {'$set': {'top_5_app': top_5}})
Пример #28
0
def main():
    try:
        client = MongoClient('localhost', 27017)
        DataService.init(client)

        
        # work flow
        user_download_history = DataService.retrieve_user_download_history()
        app_info = DataService.retrieve_app_info()

        # print("Calculating Top 5 related apps for apps...")
        # start = time.clock()
        # for app in app_info.keys():
        #     # print(app)
        #     calculate_app_top_5(app, user_download_history.values())
        # # calculate_top_5('C10107104', user_download_history.values())
        # end = time.clock()

        # print "time elapsed = " + str(end - start)

        print("Calculating Top 5 recommended apps for users...")

        start = time.clock()
        for user_id, download_history in user_download_history.iteritems():
            # print(app)
            calculate_user_top_5(user_id, download_history, user_download_history.values())
        # calculate_top_5('C10107104', user_download_history.values())
        end = time.clock()

        print "time elapsed = " + str(end - start)

    except Exception as e:
        print(e)
    finally:
        # clean up work
        if 'client' in locals():
            client.close()
Пример #29
0
def main():
	try:
		start = time.clock()
		client = MongoClient('localhost', 27017)
		DataService.init(client)

		user_download_history = DataService.retrieve_user_download_history()
		app_ids = DataService.retrieve_all_app_id()

		top_5_apps = {}
		
		for appp in app_ids:
			calculate_top_5(appp, user_download_history.values(),top_5_apps)

		for users in user_download_history.keys():
			users_app = user_download_history[users]
			all_possible_app = {}
			#sorted_tups = calculate_top_5_user(users, user_download_history,top_5_apps,all_possible_app)
			for used_app_id in users_app:
				top_5_possible_app = top_5_apps[used_app_id]
				for i in top_5_possible_app:
					if i in all_possible_app:
						all_possible_app[i] = all_possible_app[i] + 1
					else:
						all_possible_app[i] = 1
			sorted_tups = sorted(all_possible_app.items(), key = operator.itemgetter(1), reverse = True)
			top_5_user_app = [sorted_tups[0][0], sorted_tups[1][0], sorted_tups[2][0], sorted_tups[3][0],sorted_tups[4][0]]
			print("top_5_app " + str(users) + ":\t" + str(top_5_user_app))

		end = time.clock()
		print "time elapsed = " + str(end - start)
	except Exception as e:
		print(e)
	finally:
		if 'client' in locals():
			client.close()
Пример #30
0
def main():
	try:
		client = MongoClient('localhost',27017)
		DataService.init(client)
		apps = DataService.retrieve_appinfo()
		#work flow

		for app in apps.keys():
			user_download_history = DataService.retrieve_user_download_history()
			top_5_app = calculate_Top_5(app, user_download_history.values())
			DataService.update_app_info({"app_id": app},{"$set": {"Top 5": top_5_app}})
	except Exception as e:
		print(e)
	finally:
		#clean up work
		if 'client' in locals():
			client.close()
Пример #31
0
def main():
    noInput = True # weather user input a right command
    processes = []
    try:
        while noInput:
            flag = input("If you want to calculate each app's recommend apps" 
                         + " please press 1, If you want to calculate"
                         + " recommend apps for each user, please press"
                         + " 2: ")
            if flag == 1:
                noInput = False

                # start time
                start = time.clock()

                # set the shared dictionary
                rslt = {}
                result = multiprocessing.Manager().dict(rslt)

                # control the number of apps each process deals with
                count = 0

                # a list of appID
                keys = []

                # get client and init it with DataService class
                client = MongoClient("localhost", 27017)
                DataService.init(client)

                # start analyzing
                userDownloadHistory = DataService.retrieveUserDownloadHistory()
                allApp = DataService.retrieveAppInfo()

                length = len(allApp)
                for i in range(0, length, 500):
                    # each process will deal with 500 apps
                    p = multiprocessing.Process(target = appWrapper,
                        args = (allApp.keys()[i: min(i + 500, length)],
                            userDownloadHistory.values(), result))
                    processes.append(p)

                # set process to daemon and start
                for p in processes:
                    p.daemon = True
                    p.start()

                # main process will wait other processes
                for p in processes:
                    p.join()
                    
                # update MongoDB
                for key in result.keys():
                    DataService.updateAppInfo({"app_id": key},
                              {"$set": {"top 5 apps": result[key]}})
            elif flag == 2:
                noInput = False

                # start time
                start = time.clock()

                # set the shared dictionary
                rslt = {}
                result = multiprocessing.Manager().dict(rslt)

                # control the number of users each process deals with
                count = 0

                # a list of userID
                keys = []

                # get client and init it with DataService class
                client = MongoClient("localhost", 27017)
                DataService.init(client)

                # start analyzing
                userDownloadHistory = DataService.retrieveUserDownloadHistory()

                for key in userDownloadHistory.keys():
                    keys.append(key)
                    count += 1
                    # each process will deal with 500 users
                    if count == 500:
                        p = multiprocessing.Process(target = userWrapper,
                                args = (keys, userDownloadHistory, result))
                        processes.append(p)
                        keys = []
                        count = 0
                # the rest users
                p = multiprocessing.Process(target = userWrapper,
                        args = (keys, userDownloadHistory, result))
                processes.append(p)
                
                # set process to daemon and start
                for p in processes:
                    p.daemon = True
                    p.start()

                # main process will wait other processes
                for p in processes:
                    p.join()

                # update MongoDB
                for key in result.keys():
                    DataService.updateUserInfo({"user_id": key},
                          {"$set": {"top 5 recommended apps": result[key]}})
            else:
                print "Sorry, you press the wrong number, please try again."
    except Exception as e:
        print e
    finally:
        # close client
        if "client" in locals():
            client.close()
    end = time.clock()
    print "Finished! The elapsed time is " + str(end - start)
def load_top_5_apps(app_id):
    return DataService.retrieve_app_info({'app_id': app_id})
Пример #33
0
def load_top_5_apps(app_id):
    return DataService.retrieve_app_info({'app_id': app_id})