def generateEdmEmailingList(connection, site_id): logger = logging.getLogger("EDMCalculations") c_user_orders = getSiteDBCollection(connection, site_id, "user_orders") latest_order_datetime = getLatestUserOrderDatetime(connection, site_id) if latest_order_datetime is None: query = {} else: query = {"order_datetime": {"$gte": latest_order_datetime \ - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)}} db = getSiteDB(connection, site_id) result = db.command({"distinct": "user_orders", "key": "user_id", "query": query}) user_ids = result["values"] mongo_client = MongoClient(connection) c_edm_emailing_list = getSiteDBCollection(connection, site_id, "edm_emailing_list") c_edm_emailing_list.drop() c_edm_emailing_list = getSiteDBCollection(connection, site_id, "edm_emailing_list") count = 0 t0 = time.time() for user_id in user_ids: count += 1 if count % 100 == 0: logger.info("Count: %s, %s users/sec" % (count, count/(time.time() - t0))) recommendation_result, _ = mongo_client.recommend_for_edm(site_id, user_id, max_amount=EXPECTED_RECOMMENDATION_ITEMS) if len(recommendation_result) == EXPECTED_RECOMMENDATION_ITEMS: c_edm_emailing_list.insert({"user_id": user_id, "recommendation_result": recommendation_result})
def getEmailingUsers(connection, site_id, page_num, page_size): c_user_orders = getSiteDBCollection(connection, site_id, "user_orders") latest_order_datetime = getLatestUserOrderDatetime(connection, site_id) if latest_order_datetime is None: query = {} else: query = { "order_datetime": {"$gte": latest_order_datetime - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)} } db = getSiteDB(connection, site_id) result = db.command({"distinct": "user_orders", "key": "user_id", "query": query}) user_ids = result["values"] selected_user_ids = user_ids[(page_num - 1) * page_size : page_num * page_size] max_page_num = len(user_ids) / page_size if len(user_ids) % page_size > 0: max_page_num += 1 page_num_left = max(page_num - 4, 1) page_num_right = min(max_page_num, page_num + (9 - (page_num - page_num_left))) models = [{"user_id": user_id} for user_id in selected_user_ids] return { "models": models, "page": page_num, "page_size": page_size, "total": len(user_ids), "prev_page_num": max(1, page_num - 1), "page_nums": range(page_num_left, page_num_right + 1), "next_page_num": min(max_page_num, page_num + 1), "max_page_num": max_page_num, "curr_left_reached": page_num == 1, "curr_right_reached": page_num >= max_page_num, }
def getEmailingUsers(connection, site_id, page_num, page_size): c_user_orders = getSiteDBCollection(connection, site_id, "user_orders") latest_order_datetime = getLatestUserOrderDatetime(connection, site_id) if latest_order_datetime is None: query = {} else: query = {"order_datetime": {"$gte": latest_order_datetime \ - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)}} db = getSiteDB(connection, site_id) result = db.command({"distinct": "user_orders", "key": "user_id", "query": query}) user_ids = result["values"] selected_user_ids = user_ids[(page_num - 1) * page_size:page_num * page_size] max_page_num = len(user_ids) / page_size if len(user_ids) % page_size > 0: max_page_num += 1 page_num_left = max(page_num - 4, 1) page_num_right = min(max_page_num, page_num + (9 - (page_num - page_num_left))) models = [{"user_id": user_id} for user_id in selected_user_ids] return {"models": models, "page": page_num, "page_size": page_size, "total": len(user_ids), "prev_page_num": max(1, page_num - 1), "page_nums": range(page_num_left, page_num_right + 1), "next_page_num": min(max_page_num, page_num + 1), "max_page_num": max_page_num, "curr_left_reached": page_num == 1, "curr_right_reached": page_num >= max_page_num}
def generateEdmEmailingList(connection, site_id): logger = logging.getLogger("EDMCalculations") c_user_orders = getSiteDBCollection(connection, site_id, "user_orders") latest_order_datetime = getLatestUserOrderDatetime(connection, site_id) if latest_order_datetime is None: query = {} else: query = {"order_datetime": {"$gte": latest_order_datetime \ - datetime.timedelta(days=EMAILING_USER_ORDERS_MAX_DAY)}} db = getSiteDB(connection, site_id) result = db.command({ "distinct": "user_orders", "key": "user_id", "query": query }) user_ids = result["values"] mongo_client = MongoClient(connection) c_edm_emailing_list = getSiteDBCollection(connection, site_id, "edm_emailing_list") c_edm_emailing_list.drop() c_edm_emailing_list = getSiteDBCollection(connection, site_id, "edm_emailing_list") count = 0 t0 = time.time() for user_id in user_ids: count += 1 if count % 100 == 0: logger.info("Count: %s, %s users/sec" % (count, count / (time.time() - t0))) recommendation_result, _ = mongo_client.recommend_for_edm( site_id, user_id, max_amount=EXPECTED_RECOMMENDATION_ITEMS) if len(recommendation_result) == EXPECTED_RECOMMENDATION_ITEMS: c_edm_emailing_list.insert({ "user_id": user_id, "recommendation_result": recommendation_result })
mongo_client = MongoClient(pymongo.Connection(settings.mongodb_host)) parser = OptionParser() parser.add_option("-r", "--reset_db", dest="reset_db", help="reset all database of this site(use with caution)", default="no") parser.add_option("-i", "--site_id", dest="site_id", help="Site ID(required)", default=None) parser.add_option("-n", "--site_name", dest="site_name", help="Site Name(required)", default=None) parser.add_option("-c", "--calc_interval", dest="calc_interval", help="How long (in seconds) the server will update the calculation", default="43200") (options, args) = parser.parse_args() site_id, site_name = options.site_id, options.site_name #assert site_id is not None #assert site_name is not None # TODO: use dropDatabase? connection = pymongo.Connection(settings.mongodb_host) if options.reset_db == "yes": getSiteDBCollection(connection, site_id, "item_similarities").drop() getSiteDBCollection(connection, site_id, "raw_logs").drop() getSiteDBCollection(connection, site_id, "items").drop() getSiteDB(connection, site_id).create_collection("raw_logs", {}) getSiteDBCollection(connection, site_id, "raw_logs").ensure_index([("timestamp", -1)]) mongo_client.updateSite(site_id, site_name, int(options.calc_interval))
def getSiteDB(self, site_id): return getSiteDB(self.connection, site_id)
parser.add_option("-n", "--site_name", dest="site_name", help="Site Name(required)", default=None) parser.add_option( "-c", "--calc_interval", dest="calc_interval", help="How long (in seconds) the server will update the calculation", default="43200") (options, args) = parser.parse_args() site_id, site_name = options.site_id, options.site_name #assert site_id is not None #assert site_name is not None # TODO: use dropDatabase? connection = pymongo.Connection(settings.mongodb_host) if options.reset_db == "yes": getSiteDBCollection(connection, site_id, "item_similarities").drop() getSiteDBCollection(connection, site_id, "raw_logs").drop() getSiteDBCollection(connection, site_id, "items").drop() getSiteDB(connection, site_id).create_collection("raw_logs", {}) getSiteDBCollection(connection, site_id, "raw_logs").ensure_index([("timestamp", -1)]) mongo_client.updateSite(site_id, site_name, int(options.calc_interval))