def handle_image(graph_file, action): assert action in [DELETE, DISCARD, INC_RANK, DEC_RANK] key_str = get_delim() + "image_" end_pos = graph_file.find(key_str) assert -1 != end_pos begin_pos = graph_file[:end_pos].rfind(get_delim()) assert -1 != begin_pos pattern = graph_file[begin_pos + 1:end_pos] has_cache, cached_objs = load(GraphFetcher.get_cache_file(pattern)) assert has_cache file_encoding = graph_file[graph_file.find(key_str) + len(key_str):graph_file.find(".jpg")] for url in cached_objs: image_slot = cached_objs[url] if image_slot.encoding == file_encoding: new_encoding = NA if DELETE == action else \ None if DISCARD == action else \ image_slot.encoding # no change new_rank = image_slot.rank + 1 if INC_RANK == action else \ image_slot.rank - 1 if DEC_RANK == action and image_slot.rank is not 1 else \ image_slot.rank # no change updated_slot = ImageSlot(timestamp=image_slot.timestamp, encoding=new_encoding, rank=new_rank) cached_objs[url] = updated_slot save(GraphFetcher.get_cache_file(pattern), cached_objs) if action in [DELETE, DISCARD]: os.remove(graph_file) msg = "" if action in [DELETE, DISCARD] else \ get_msg(Msg.change_rank_to) + str(new_rank) + "!" if new_rank is not image_slot.rank else \ get_msg(Msg.cannot_lower_down_rank_as_it_is_already_the_lowest) if image_slot.rank is 1 else \ None assert msg is not None return msg assert False
def get_updated_url(self, pattern): has_cache, cached_objs = load(GraphFetcher.get_cache_file(pattern)) recent_url, is_new_result = Crawler().crawl(pattern, self.__size, self.__option) self.__has_write = is_new_result new_objs = {} if recent_url: for url in recent_url: if has_cache and url in cached_objs: image_slot = cached_objs[url] if is_new_result: # update to current date updated_slot = ImageSlot(datetime.today(), image_slot.encoding, image_slot.rank) new_objs[url] = updated_slot else: new_objs[url] = image_slot else: default_rank = 1 new_slot = ImageSlot(timestamp=datetime.today(), encoding=None, rank=default_rank) new_objs[url] = new_slot # a new entry if not has_cache: return new_objs, {} old_objs = {} for url in cached_objs: if not recent_url or url not in recent_url: old_objs[url] = cached_objs[url] return new_objs, old_objs
def handle_image(graph_file, action): assert action in [DELETE, DISCARD, INC_RANK, DEC_RANK] key_str = get_delim() + "image_" end_pos = graph_file.find(key_str) assert -1 != end_pos begin_pos = graph_file[:end_pos].rfind(get_delim()) assert -1 != begin_pos pattern = graph_file[begin_pos + 1:end_pos] has_cache, cached_objs = load(GraphFetcher.get_cache_file(pattern)) assert has_cache file_encoding = graph_file[graph_file.find(key_str) + len(key_str):graph_file.find(".jpg")] for url in cached_objs: image_slot = cached_objs[url] if image_slot.encoding == file_encoding: new_encoding = NA if DELETE == action else \ None if DISCARD == action else \ image_slot.encoding # no change new_rank = image_slot.rank + 1 if INC_RANK == action else \ image_slot.rank - 1 if DEC_RANK == action and image_slot.rank is not 1 else \ image_slot.rank # no change updated_slot = ImageSlot(timestamp=image_slot.timestamp, encoding=new_encoding, rank=new_rank) cached_objs[url] = updated_slot save(GraphFetcher.get_cache_file(pattern), cached_objs) if action in [DELETE, DISCARD]: os.remove(graph_file) msg = "" if action in [DELETE, DISCARD] else \ "change rank to " + str(new_rank) + "!" if new_rank is not image_slot.rank else \ "cannot lower down rank as it is already the lowest!" if image_slot.rank is 1 else \ None assert msg is not None return msg assert False
def __init__(self, need_save=True): self.__need_save = need_save self.__network_reachable = network_reachable() self.__has_write = False self.__url_map = {} self.__cache_file = get_data_home() + "url.pickle" self.__updated_key_value = None, None is_exist, url_map = load(self.__cache_file) if is_exist: self.__url_map = url_map
def __del__(self): if self.__need_save and self.__has_write: if os.path.exists(self.__cache_file): is_exist, url_map = load(self.__cache_file) assert is_exist else: url_map = {} key, updated_value = self.__updated_key_value assert key and updated_value url_map[key] = updated_value # TODO: better use file lock (fcntl) to avoid file update error in mul-instances usage save(self.__cache_file, url_map)
def print_pattern(pattern): print("image:", pattern) # print(GraphFetcher.get_cache_file(pattern)) [has_cache, cached_objs] = load(GraphFetcher.get_cache_file(pattern)) assert has_cache for url, i in zip(cached_objs, range(len(cached_objs))): image_slot = cached_objs[url] # print("次序:", i) print("url:", url) print("timestamp:", image_slot.timestamp) print("no.:", image_slot.encoding) print("rank:", image_slot.rank)
def print_pattern(pattern): print("image:", pattern) # print(GraphFetcher.get_cache_file(pattern)) [has_cache, cached_objs] = load(GraphFetcher.get_cache_file(pattern)) assert has_cache for url, i in zip(cached_objs, range(len(cached_objs))): image_slot = cached_objs[url] # print("order:", i) print("url:", url) print("timestamp:", image_slot.timestamp) print("no.:", image_slot.encoding) print("rank:", image_slot.rank)
def __load_or_create_status(self): status_cache = {} # key: image_file, value: status cache_file = self.__location + get_delim() + GraphDirHandler.CACHE_FILE cache_existed = os.path.exists(cache_file) if cache_existed: success, cache_data = load(cache_file) assert success [timestamp, status_cache] = cache_data if not self.dir_changed(timestamp): return status_cache else: info("directory %s has changed, update cache file" % self.__location) else: info("create a new cache file for directory: %s" % self.__location) image_files = [] for root, _, files in os.walk(self.__location): assert len(root) >= 1 if root[-1] != get_delim(): root += get_delim() for base_file in files: basename, ext = os.path.splitext(base_file) if ext.replace(".", "") in GraphDirHandler.RECOGNIZED_IMAGE_EXT: image_files.append((root + base_file).replace( self.__location + get_delim(), "")) if not image_files: if cache_existed: os.remove(cache_file) self.__valid = False return None existed_image = {} for image in image_files: existed_image[image] = 1 # 1 is just a dummy value if image not in status_cache: status_cache[image] = Status() to_be_deleted = [] for image in status_cache: # this check works when some image is deleted if image not in existed_image: to_be_deleted.append(image) for image in to_be_deleted: status_cache.pop(image) # TODO: this makes an 'always' has-changed 2nd time image timestamp = time.ctime(os.path.getmtime(self.__location)) save(cache_file, [timestamp, status_cache]) return status_cache
def __load_or_create_status(self): status_cache = {} # key: image_file, value: status cache_file = self.__location + get_delim() + GraphDirHandler.CACHE_FILE cache_existed = os.path.exists(cache_file) if cache_existed: success, cache_data = load(cache_file) assert success [timestamp, status_cache] = cache_data if not self.dir_changed(timestamp): return status_cache else: info(get_msg(Msg.directory), self.__location, get_msg(Msg.has_changed_update_cache_file)) else: info("%s%s" % (get_msg(Msg.create_new_cache_file_for_directory), self.__location)) image_files = [] for root, _, files in os.walk(self.__location): assert len(root) >= 1 if root[-1] != get_delim(): root += get_delim() for base_file in files: basename, ext = os.path.splitext(base_file) if ext.replace(".", "") in GraphDirHandler.RECOGNIZED_IMAGE_EXT: image_files.append((root + base_file).replace(self.__location, "")) if not image_files: if cache_existed: os.remove(cache_file) self.__valid = False return None existed_image = {} for image in image_files: existed_image[image] = 1 # 1 is just a dummy value if image not in status_cache: status_cache[image] = Status() to_be_deleted = [] for image in status_cache: # this check works when some image is deleted if image not in existed_image: to_be_deleted.append(image) for image in to_be_deleted: status_cache.pop(image) # TODO: this makes an 'always' has-changed 2nd time image timestamp = time.ctime(os.path.getmtime(self.__location)) save(cache_file, [timestamp, status_cache]) return status_cache