def reset_items(site_id): mongo_client = getMongoClient() if mongo_client.siteExists(site_id, use_cache=False): mongo_client.cleanupItems(site_id) reset_item_index(site_id) else: raise SiteNotExistsError()
def run(site_id): #print "This script ignore descript field currently!" answer = raw_input( "Do you really want to reindex items of site: %s (enter 'yes' to continue)" % site_id) if answer == "yes": reset_item_index(site_id) mongo_client = getMongoClient() c_items = mongo_client.getSiteDBCollection(site_id, "items") total = c_items.count() cnt = 0 for item in c_items.find(): del item["_id"] #if item.has_key("description"): # del item["description"] #item["categories"] = [] es_client.es_index_item(site_id, item) cnt += 1 if (cnt % 50) == 0: print "%s/%s" % (cnt, total) # also fill whitelisted keywords for record in keyword_list.fetchSuggestKeywordList(site_id): if record["type"] == keyword_list.WHITE_LIST: keyword_list.markKeywordsAsWhiteListed(site_id, [record["keyword"]]) else: print "Exit without action." sys.exit(0)
def reset_items(site_id): mongo_client = getMongoClient() if mongo_client.siteExists(site_id, use_cache=False): mongo_client.cleanupItems(site_id) reset_item_index(site_id) else: raise SiteNotExistsError()
def setUp(self): self.mongo_client = getMongoClient() self.es = es_search_functions.getESClient() site_record = self.initSite(self.TEST_SITE_ID) self.api_key = site_record["api_key"] self.site_token = site_record["site_token"] self.maxDiff = None self.clearCaches()
def setUp(self): self.mongo_client = getMongoClient() self.es = es_search_functions.getESClient() site_record = self.initSite(self.TEST_SITE_ID) self.api_key = site_record["api_key"] self.site_token = site_record["site_token"] self.maxDiff = None self.clearCaches()
def update_keyword_hot_view_list(site_id): mongo_client = getMongoClient() results = mongo_client.calculateKeywordHotViewList(site_id) for category_id, topn in results.items(): if len(topn) > 0: cached_result.set("AutoKeywordHotView", site_id, (category_id, ), topn) # purge the KeywordHotView cached_result.delete("KeywordHotView", site_id, (category_id, ))
def process_item_update_queue(item_update_queue): mongo_client = getMongoClient() for site_id, item in item_update_queue: for category in item["categories"]: mongo_client.updateProperty(site_id, category) if item.get("brand", None): mongo_client.updateProperty(site_id, item["brand"]) item = mongo_client.updateItem(site_id, item) es_client.es_index_item(site_id, item)
def run(from_site_id, from_datetime, to_datetime, to_site_id, to_site_from_datetime): from_datetime = as_datetime(from_datetime) to_datetime = as_datetime(to_datetime) to_site_from_datetime = as_datetime(to_site_from_datetime) time_delta = to_site_from_datetime - from_datetime print "TIME DELTA:", time_delta mongo_client = getMongoClient() from_c_raw_logs = mongo_client.getSiteDBCollection(from_site_id, "raw_logs") to_c_raw_logs = mongo_client.getSiteDBCollection(to_site_id, "raw_logs") result_set = from_c_raw_logs.find({"created_on": {"$gte": from_datetime, "$lte": to_datetime}}) print "map date range: %s, %s to %s, %s" % (from_datetime, to_datetime, to_site_from_datetime, to_site_from_datetime + (to_datetime - from_datetime)) print from_c_raw_logs, to_c_raw_logs print "Total logs:", result_set.count() answer = raw_input("Do you want to load raw_logs from %s to %s ?(enter 'yes' to continue)" % (from_site_id, to_site_id)) if answer == "yes": client = Client() for raw_log in result_set: del raw_log["_id"] raw_log["created_on"] = raw_log["created_on"] + time_delta #to_c_raw_logs.insert(raw_log) post_data = {"api_key": "5a552549"} if raw_log["behavior"] in ("V", "AF", "RF", "UNLIKE", "RI", "ASC", "RSC"): post_data["item_id"] = raw_log["item_id"] post_data["user_id"] = raw_log["user_id"] if raw_log["behavior"] == "PLO": post_data["user_id"] = raw_log["user_id"] post_data["order_id"] = raw_log.get("order_id", None) post_data["order_content"] = "|".join(["%(item_id)s,%(price)s,%(amount)s" % order_item for order_item in raw_log["order_content"]]) if raw_log["behavior"] in ("RI",): post_data["score"] = raw_log["score"] BH2EventType = { "V": "ViewItem", "AF": "AddFavorite", "RF": "RemoveFavorite", "UNLIKE": "Unlike", "RI": "RateItem", "ASC": "AddOrderItem", "RSC": "RemoveOrderItem", "PLO": "PlaceOrder" } post_data["event_type"] = BH2EventType[raw_log["behavior"]] client.cookies["__ptmid"] = raw_log["tjbid"] before_count = to_c_raw_logs.count() response = client.get("/api/v1.6/public/events/", post_data) if response.status_code != 200 or response.data["code"] != 0: print response, response.data else: after_count = to_c_raw_logs.count() while after_count <= before_count: print "waiting raw_log being inserted. %s,%s" % (before_count, after_count) time.sleep(0.1) after_count = to_c_raw_logs.count() last_raw_log = [rl for rl in to_c_raw_logs.find().sort([("$natural", -1)]).limit(1)][0] last_raw_log["created_on"] = raw_log["created_on"] to_c_raw_logs.save(last_raw_log) else: print "Exit without action."
def run(site_id, site_name, api_prefix): answer = raw_input("Do you want to create the site: '%s' with site_name '%s' and api_prefix '%s' ?(enter 'yes' to continue)" % (site_id, site_name, api_prefix)) if answer == "yes": mongo_client = getMongoClient() site_record = create_site(mongo_client, site_id, site_name, 3600 * 24, api_prefix=api_prefix) print "Site %s created. " % site_id print "api_key=%s" % site_record["api_key"] print "api_token=%s" % site_record["site_token"] else: print "Exit without action."
def run(site_id, site_name, api_prefix): answer = raw_input( "Do you want to create the site: '%s' with site_name '%s' and api_prefix '%s' ?(enter 'yes' to continue)" % (site_id, site_name, api_prefix)) if answer == "yes": mongo_client = getMongoClient() site_record = create_site(mongo_client, site_id, site_name, 3600 * 24, api_prefix=api_prefix) print "Site %s created. " % site_id print "api_key=%s" % site_record["api_key"] print "api_token=%s" % site_record["site_token"] else: print "Exit without action."
def authenticate(self, request): try: mongo_client = getMongoClient() authorization_line = request.META.get('HTTP_AUTHORIZATION') if authorization_line: splitted_line = authorization_line.split() if not (len(splitted_line) == 2 and splitted_line[0] == "Token"): return None else: return None token = splitted_line[1] site = mongo_client.getSiteFromToken(site_token=token) if site is None: raise exceptions.AuthenticationFailed('No such user') return (site, None) except exceptions.AuthenticationFailed: raise except: import logging logging.critical("PocoTokenAuthentication unexpect error", exc_info=True) raise
from elasticutils import S, F from common.mongo_client import getMongoClient from common.mongo_client import SimpleRecommendationResultFilter from common.mongo_client import SameGroupRecommendationResultFilter from tasks import process_item_update_queue from tasks import write_log #logging.basicConfig(format="%(asctime)s|%(levelname)s|%(name)s|%(message)s", # level=logging.WARNING, # datefmt="%Y-%m-%d %I:%M:%S") mongo_client = getMongoClient() mongo_client.reloadApiKey2SiteID() class HotViewListCache: EXPIRY_TIME = 3600 def __init__(self, mongo_client): self.mongo_client = mongo_client def getHotViewList(self, site_id, hot_index_type, category_id=None, brand=None): cache_key = "hot-view-list-%s-%s-%s-%s" % (site_id, hot_index_type, category_id, brand) django_cache = get_cache("default") cache_entry = django_cache.get(cache_key) if cache_entry: return cache_entry
def run(from_site_id, from_datetime, to_datetime, to_site_id, to_site_from_datetime): from_datetime = as_datetime(from_datetime) to_datetime = as_datetime(to_datetime) to_site_from_datetime = as_datetime(to_site_from_datetime) time_delta = to_site_from_datetime - from_datetime print "TIME DELTA:", time_delta mongo_client = getMongoClient() from_c_raw_logs = mongo_client.getSiteDBCollection(from_site_id, "raw_logs") to_c_raw_logs = mongo_client.getSiteDBCollection(to_site_id, "raw_logs") result_set = from_c_raw_logs.find( {"created_on": { "$gte": from_datetime, "$lte": to_datetime }}) print "map date range: %s, %s to %s, %s" % (from_datetime, to_datetime, to_site_from_datetime, to_site_from_datetime + (to_datetime - from_datetime)) print from_c_raw_logs, to_c_raw_logs print "Total logs:", result_set.count() answer = raw_input( "Do you want to load raw_logs from %s to %s ?(enter 'yes' to continue)" % (from_site_id, to_site_id)) if answer == "yes": client = Client() for raw_log in result_set: del raw_log["_id"] raw_log["created_on"] = raw_log["created_on"] + time_delta #to_c_raw_logs.insert(raw_log) post_data = {"api_key": "5a552549"} if raw_log["behavior"] in ("V", "AF", "RF", "UNLIKE", "RI", "ASC", "RSC"): post_data["item_id"] = raw_log["item_id"] post_data["user_id"] = raw_log["user_id"] if raw_log["behavior"] == "PLO": post_data["user_id"] = raw_log["user_id"] post_data["order_id"] = raw_log.get("order_id", None) post_data["order_content"] = "|".join([ "%(item_id)s,%(price)s,%(amount)s" % order_item for order_item in raw_log["order_content"] ]) if raw_log["behavior"] in ("RI", ): post_data["score"] = raw_log["score"] BH2EventType = { "V": "ViewItem", "AF": "AddFavorite", "RF": "RemoveFavorite", "UNLIKE": "Unlike", "RI": "RateItem", "ASC": "AddOrderItem", "RSC": "RemoveOrderItem", "PLO": "PlaceOrder" } post_data["event_type"] = BH2EventType[raw_log["behavior"]] client.cookies["__ptmid"] = raw_log["tjbid"] before_count = to_c_raw_logs.count() response = client.get("/api/v1.6/public/events/", post_data) if response.status_code != 200 or response.data["code"] != 0: print response, response.data else: after_count = to_c_raw_logs.count() while after_count <= before_count: print "waiting raw_log being inserted. %s,%s" % ( before_count, after_count) time.sleep(0.1) after_count = to_c_raw_logs.count() last_raw_log = [ rl for rl in to_c_raw_logs.find().sort([("$natural", -1)]).limit(1) ][0] last_raw_log["created_on"] = raw_log["created_on"] to_c_raw_logs.save(last_raw_log) else: print "Exit without action."
def update_hotview_list(site_id): mongo_client = getMongoClient() for hot_index_type, prefix in mongo_client.HOT_INDEX_TYPE2INDEX_PREFIX.items(): mongo_client.updateHotViewList(site_id, hot_index_type)
to_be_in_unidentified_keywords, increase_count=True) def _indexKeywordsForCompletion(self, site_id, keywords): res = self.es.indices.analyze(index=es_search_functions.getESItemIndexName(site_id), text=" ".join(keywords), analyzer="mycn_analyzer_whitespace_pinyin_first_n_full") for token_idx in range(len(res["tokens"])): token = res["tokens"][token_idx] raw_keyword = keywords[token_idx] splitted_token = token["token"].split("||") first_letters = splitted_token[0] full_pinyin = "".join(splitted_token[1:]) result = {"keyword_completion": {"input": [raw_keyword, full_pinyin, first_letters], "output": raw_keyword}} self.es.index(index=es_search_functions.getESItemIndexName(site_id), doc_type='keyword', body=result) def markKeywordsAsWhiteListed(self, site_id, keywords): #from recommender import es_client # also need to search and reindex the white listed keywords. use update api. # also update the keyword completion self.updateSuggestKeywordList(site_id, self.WHITE_LIST, keywords) self._indexKeywordsForCompletion(site_id, keywords) def markKeywordsAsBlackListed(self, site_id, keywords): self.updateSuggestKeywordList(site_id, self.BLACK_LIST, keywords) # TODO remove keywords for completion keyword_list = KeywordList(es_search_functions.getESClient(), getMongoClient())
def rebuild_suggestion_cache(site_id): mongo_client = getMongoClient() builder = SuggestionCacheBuilder(site_id, mongo_client) builder.rebuild()
# http://www.aspheute.com/english/20040105.asp def createRandomPassword(length): allowedChars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ23456789" password = "" for i in range(length): password += allowedChars[random.randint(0, 256) % len(allowedChars)] return password def createHashedPassword(password): salt = createRandomPassword(16) hashed_password = hashlib.sha256(password + salt).hexdigest() return hashed_password, salt mongo_client = getMongoClient() c_users = mongo_client.getTjbDb()["users"] random.seed(open("/dev/random", "rb").read(10)) def _inputSites(): sites_str = raw_input("sites(comma separated):").strip() if sites_str == "": return [] else: return sites_str.split(",") def cmd_createNewUser():
def _getFullCacheKey(self, cache_type, site_id, cache_key_tuple): return "results-cache-%s-%s-%s" % (cache_type, site_id, "|".join(cache_key_tuple)) def _setDjangoCache(self, cache_key, result): cache = get_cache("default") cache.set(cache_key, result, self.EXPIRY_TIME) def set(self, cache_type, site_id, cache_key_tuple, result): full_cache_key = self._getFullCacheKey(cache_type, site_id, cache_key_tuple) self.mongo_client.updateCachedResults(site_id, full_cache_key, result) self._setDjangoCache(full_cache_key, result) def get(self, cache_type, site_id, cache_key_tuple): cache = get_cache("default") full_cache_key = self._getFullCacheKey(cache_type, site_id, cache_key_tuple) cached_result = cache.get(full_cache_key) if cached_result is None: cached_result = self.mongo_client.getFromCachedResults(site_id, full_cache_key) if cached_result is not None: self._setDjangoCache(full_cache_key, cached_result) return cached_result def delete(self, cache_type, site_id, cache_key_tuple): cache = get_cache("default") full_cache_key = self._getFullCacheKey(cache_type, site_id, cache_key_tuple) self.mongo_client.deleteCachedResults(site_id, full_cache_key) cache.delete(full_cache_key) cached_result = CachedResult(getMongoClient())