Пример #1
0
 def handle_image(graph_file, action):
     assert action in [DELETE, DISCARD, INC_RANK, DEC_RANK]
     key_str = get_delim() + "image_"
     end_pos = graph_file.find(key_str)
     assert -1 != end_pos
     begin_pos = graph_file[:end_pos].rfind(get_delim())
     assert -1 != begin_pos
     pattern = graph_file[begin_pos + 1:end_pos]
     has_cache, cached_objs = load(GraphFetcher.get_cache_file(pattern))
     assert has_cache
     file_encoding = graph_file[graph_file.find(key_str) + len(key_str):graph_file.find(".jpg")]
     for url in cached_objs:
         image_slot = cached_objs[url]
         if image_slot.encoding == file_encoding:
             new_encoding = NA if DELETE == action else \
                 None if DISCARD == action else \
                 image_slot.encoding  # no change
             new_rank = image_slot.rank + 1 if INC_RANK == action else \
                 image_slot.rank - 1 if DEC_RANK == action and image_slot.rank is not 1 else \
                 image_slot.rank  # no change
             updated_slot = ImageSlot(timestamp=image_slot.timestamp,
                                      encoding=new_encoding,
                                      rank=new_rank)
             cached_objs[url] = updated_slot
             save(GraphFetcher.get_cache_file(pattern), cached_objs)
             if action in [DELETE, DISCARD]:
                 os.remove(graph_file)
             msg = "" if action in [DELETE, DISCARD] else \
                 get_msg(Msg.change_rank_to) + str(new_rank) + "!" if new_rank is not image_slot.rank else \
                 get_msg(Msg.cannot_lower_down_rank_as_it_is_already_the_lowest) if image_slot.rank is 1 else \
                 None
             assert msg is not None
             return msg
     assert False
Пример #2
0
 def get_updated_url(self, pattern):
     has_cache, cached_objs = load(GraphFetcher.get_cache_file(pattern))
     recent_url, is_new_result = Crawler().crawl(pattern, self.__size, self.__option)
     self.__has_write = is_new_result
     new_objs = {}
     if recent_url:
         for url in recent_url:
             if has_cache and url in cached_objs:
                 image_slot = cached_objs[url]
                 if is_new_result:
                     # update to current date
                     updated_slot = ImageSlot(datetime.today(), image_slot.encoding, image_slot.rank)
                     new_objs[url] = updated_slot
                 else:
                     new_objs[url] = image_slot
             else:
                 default_rank = 1
                 new_slot = ImageSlot(timestamp=datetime.today(), encoding=None, rank=default_rank)
                 new_objs[url] = new_slot  # a new entry
     if not has_cache:
         return new_objs, {}
     old_objs = {}
     for url in cached_objs:
         if not recent_url or url not in recent_url:
             old_objs[url] = cached_objs[url]
     return new_objs, old_objs
Пример #3
0
 def handle_image(graph_file, action):
     assert action in [DELETE, DISCARD, INC_RANK, DEC_RANK]
     key_str = get_delim() + "image_"
     end_pos = graph_file.find(key_str)
     assert -1 != end_pos
     begin_pos = graph_file[:end_pos].rfind(get_delim())
     assert -1 != begin_pos
     pattern = graph_file[begin_pos + 1:end_pos]
     has_cache, cached_objs = load(GraphFetcher.get_cache_file(pattern))
     assert has_cache
     file_encoding = graph_file[graph_file.find(key_str) + len(key_str):graph_file.find(".jpg")]
     for url in cached_objs:
         image_slot = cached_objs[url]
         if image_slot.encoding == file_encoding:
             new_encoding = NA if DELETE == action else \
                 None if DISCARD == action else \
                 image_slot.encoding  # no change
             new_rank = image_slot.rank + 1 if INC_RANK == action else \
                 image_slot.rank - 1 if DEC_RANK == action and image_slot.rank is not 1 else \
                 image_slot.rank  # no change
             updated_slot = ImageSlot(timestamp=image_slot.timestamp,
                                      encoding=new_encoding,
                                      rank=new_rank)
             cached_objs[url] = updated_slot
             save(GraphFetcher.get_cache_file(pattern), cached_objs)
             if action in [DELETE, DISCARD]:
                 os.remove(graph_file)
             msg = "" if action in [DELETE, DISCARD] else \
                 "change rank to " + str(new_rank) + "!" if new_rank is not image_slot.rank else \
                 "cannot lower down rank as it is already the lowest!" if image_slot.rank is 1 else \
                 None
             assert msg is not None
             return msg
     assert False
Пример #4
0
 def get_updated_url(self, pattern):
     has_cache, cached_objs = load(GraphFetcher.get_cache_file(pattern))
     recent_url, is_new_result = Crawler().crawl(pattern, self.__size, self.__option)
     self.__has_write = is_new_result
     new_objs = {}
     if recent_url:
         for url in recent_url:
             if has_cache and url in cached_objs:
                 image_slot = cached_objs[url]
                 if is_new_result:
                     # update to current date
                     updated_slot = ImageSlot(datetime.today(), image_slot.encoding, image_slot.rank)
                     new_objs[url] = updated_slot
                 else:
                     new_objs[url] = image_slot
             else:
                 default_rank = 1
                 new_slot = ImageSlot(timestamp=datetime.today(), encoding=None, rank=default_rank)
                 new_objs[url] = new_slot  # a new entry
     if not has_cache:
         return new_objs, {}
     old_objs = {}
     for url in cached_objs:
         if not recent_url or url not in recent_url:
             old_objs[url] = cached_objs[url]
     return new_objs, old_objs
Пример #5
0
 def __init__(self, need_save=True):
     self.__need_save = need_save
     self.__network_reachable = network_reachable()
     self.__has_write = False
     self.__url_map = {}
     self.__cache_file = get_data_home() + "url.pickle"
     self.__updated_key_value = None, None
     is_exist, url_map = load(self.__cache_file)
     if is_exist:
         self.__url_map = url_map
Пример #6
0
 def __init__(self, need_save=True):
     self.__need_save = need_save
     self.__network_reachable = network_reachable()
     self.__has_write = False
     self.__url_map = {}
     self.__cache_file = get_data_home() + "url.pickle"
     self.__updated_key_value = None, None
     is_exist, url_map = load(self.__cache_file)
     if is_exist:
         self.__url_map = url_map
Пример #7
0
 def __del__(self):
     if self.__need_save and self.__has_write:
         if os.path.exists(self.__cache_file):
             is_exist, url_map = load(self.__cache_file)
             assert is_exist
         else:
             url_map = {}
         key, updated_value = self.__updated_key_value
         assert key and updated_value
         url_map[key] = updated_value
         # TODO: better use file lock (fcntl) to avoid file update error in mul-instances usage
         save(self.__cache_file, url_map)
Пример #8
0
 def print_pattern(pattern):
     print("image:", pattern)
     # print(GraphFetcher.get_cache_file(pattern))
     [has_cache, cached_objs] = load(GraphFetcher.get_cache_file(pattern))
     assert has_cache
     for url, i in zip(cached_objs, range(len(cached_objs))):
         image_slot = cached_objs[url]
         # print("次序:", i)
         print("url:", url)
         print("timestamp:", image_slot.timestamp)
         print("no.:", image_slot.encoding)
         print("rank:", image_slot.rank)
Пример #9
0
 def print_pattern(pattern):
     print("image:", pattern)
     # print(GraphFetcher.get_cache_file(pattern))
     [has_cache, cached_objs] = load(GraphFetcher.get_cache_file(pattern))
     assert has_cache
     for url, i in zip(cached_objs, range(len(cached_objs))):
         image_slot = cached_objs[url]
         # print("order:", i)
         print("url:", url)
         print("timestamp:", image_slot.timestamp)
         print("no.:", image_slot.encoding)
         print("rank:", image_slot.rank)
Пример #10
0
 def __del__(self):
     if self.__need_save and self.__has_write:
         if os.path.exists(self.__cache_file):
             is_exist, url_map = load(self.__cache_file)
             assert is_exist
         else:
             url_map = {}
         key, updated_value = self.__updated_key_value
         assert key and updated_value
         url_map[key] = updated_value
         # TODO: better use file lock (fcntl) to avoid file update error in mul-instances usage
         save(self.__cache_file, url_map)
Пример #11
0
 def __load_or_create_status(self):
     status_cache = {}  # key: image_file, value: status
     cache_file = self.__location + get_delim() + GraphDirHandler.CACHE_FILE
     cache_existed = os.path.exists(cache_file)
     if cache_existed:
         success, cache_data = load(cache_file)
         assert success
         [timestamp, status_cache] = cache_data
         if not self.dir_changed(timestamp):
             return status_cache
         else:
             info("directory %s has changed, update cache file" %
                  self.__location)
     else:
         info("create a new cache file for directory: %s" % self.__location)
     image_files = []
     for root, _, files in os.walk(self.__location):
         assert len(root) >= 1
         if root[-1] != get_delim():
             root += get_delim()
         for base_file in files:
             basename, ext = os.path.splitext(base_file)
             if ext.replace(".",
                            "") in GraphDirHandler.RECOGNIZED_IMAGE_EXT:
                 image_files.append((root + base_file).replace(
                     self.__location + get_delim(), ""))
     if not image_files:
         if cache_existed:
             os.remove(cache_file)
         self.__valid = False
         return None
     existed_image = {}
     for image in image_files:
         existed_image[image] = 1  # 1 is just a dummy value
         if image not in status_cache:
             status_cache[image] = Status()
     to_be_deleted = []
     for image in status_cache:  # this check works when some image is deleted
         if image not in existed_image:
             to_be_deleted.append(image)
     for image in to_be_deleted:
         status_cache.pop(image)
     # TODO: this makes an 'always' has-changed 2nd time image
     timestamp = time.ctime(os.path.getmtime(self.__location))
     save(cache_file, [timestamp, status_cache])
     return status_cache
Пример #12
0
 def __load_or_create_status(self):
     status_cache = {}  # key: image_file, value: status
     cache_file = self.__location + get_delim() + GraphDirHandler.CACHE_FILE
     cache_existed = os.path.exists(cache_file)
     if cache_existed:
         success, cache_data = load(cache_file)
         assert success
         [timestamp, status_cache] = cache_data
         if not self.dir_changed(timestamp):
             return status_cache
         else:
             info(get_msg(Msg.directory), self.__location, get_msg(Msg.has_changed_update_cache_file))
     else:
         info("%s%s" % (get_msg(Msg.create_new_cache_file_for_directory), self.__location))
     image_files = []
     for root, _, files in os.walk(self.__location):
         assert len(root) >= 1
         if root[-1] != get_delim():
             root += get_delim()
         for base_file in files:
             basename, ext = os.path.splitext(base_file)
             if ext.replace(".", "") in GraphDirHandler.RECOGNIZED_IMAGE_EXT:
                 image_files.append((root + base_file).replace(self.__location, ""))
     if not image_files:
         if cache_existed:
             os.remove(cache_file)
         self.__valid = False
         return None
     existed_image = {}
     for image in image_files:
         existed_image[image] = 1  # 1 is just a dummy value
         if image not in status_cache:
             status_cache[image] = Status()
     to_be_deleted = []
     for image in status_cache:  # this check works when some image is deleted
         if image not in existed_image:
             to_be_deleted.append(image)
     for image in to_be_deleted:
         status_cache.pop(image)
     # TODO: this makes an 'always' has-changed 2nd time image
     timestamp = time.ctime(os.path.getmtime(self.__location))
     save(cache_file, [timestamp, status_cache])
     return status_cache