Пример #1
0
def recreate_cache():
    shutil.rmtree(diskcache_location, ignore_errors=True)
    os.mkdir(diskcache_location)
    diskcache_cache = Cache(diskcache_location,
                            disk_pickle_protocol=pickle_protocol)
    diskcache_cache.close()
    return diskcache_cache
Пример #2
0
class CacheProxy:
    def __init__(self, script):
        self.config = get_configs()
        collectors_dir = self.config.get('base', 'collectors_dir')
        self.cache = Cache(
            os.path.join(collectors_dir, 'cache/script/', script))

    def get(self, key):
        return self.cache.get(key)

    def set(self, key, value):
        self.cache.set(key, value)

    def delete(self, key):
        self.cache.delete(key)

    def close(self):
        self.cache.close()

    def counter_to_gauge(self, key, value):
        last_value = self.get(key)
        self.set(key, value)
        if last_value is None:
            return None
        gauge = value - last_value
        if gauge < 0 or gauge > last_value:
            return None
        return gauge
Пример #3
0
class BaseCacheAnalyzer(BaseAnalyzer):
    def __init__(self, cache_location=None, force=False):
        super().__init__()
        self.cache_location = cache_location
        self.cache = None
        self.force = force

    def initialize(self):
        from diskcache import Cache
        self.cache = Cache(self.cache_location or self.uid + "_cache")

    def filter(self, simulation):
        return self.force or not self.is_in_cache(simulation.id)

    def to_cache(self, key, data):
        self.cache.set(key, data)

    def from_cache(self, key):
        return self.cache.get(key)

    def is_in_cache(self, key):
        return key in self.cache

    def __del__(self):
        if self.cache:
            self.cache.close()

    @property
    def keys(self):
        return list(self.cache.iterkeys()) if self.cache else None
Пример #4
0
def worker(queue, eviction_policy, processes, threads):
    timings = co.defaultdict(list)
    cache = Cache('tmp', eviction_policy=eviction_policy)

    for index, (action, key, value) in enumerate(iter(queue.get, None)):
        start = time.time()

        try:
            if action == 'set':
                cache.set(key, value, expire=EXPIRE)
            elif action == 'get':
                result = cache.get(key)
            else:
                assert action == 'delete'
                cache.delete(key)
        except Timeout:
            miss = True
        else:
            miss = False

        stop = time.time()

        if (action == 'get' and processes == 1 and threads == 1
                and EXPIRE is None):
            assert result == value

        if index > WARMUP:
            delta = stop - start
            timings[action].append(delta)
            if miss:
                timings[action + '-miss'].append(delta)

    queue.put(timings)

    cache.close()
Пример #5
0
def worker(queue, eviction_policy):
    timings = {'get': [], 'set': [], 'delete': []}
    cache = Cache('tmp', eviction_policy=eviction_policy)

    for index, (action, key, value) in enumerate(iter(queue.get, None)):
        start = time.time()

        if action == 'set':
            cache.set(key, value, expire=EXPIRE)
        elif action == 'get':
            result = cache.get(key)
        else:
            assert action == 'delete'
            cache.delete(key)

        stop = time.time()

        if action == 'get' and PROCESSES == 1 and THREADS == 1 and EXPIRE is None:
            assert result == value

        if index > WARMUP:
            timings[action].append(stop - start)

    queue.put(timings)

    cache.close()
Пример #6
0
class FileCache(BaseCache):
    """ BaseCache implementation using files to store the data.
    This implementation uses diskcache.Cache
    see http://www.grantjenks.com/docs/diskcache/api.html#cache for more
    informations

    This cache requires you to install diskcache using `pip install diskcache`
    """
    def __init__(self, path, **settings):
        """ Constructor

        Arguments:
            path {String} -- The path on the disk to save the data
            settings {dict} -- The settings values for diskcache
        """
        from diskcache import Cache
        self._cache = Cache(path, **settings)

    def __del__(self):
        """ Close the connection as the cache instance is deleted.
        Safe to use as there are no circular ref.
        """
        self._cache.close()

    def set(self, key, value, timeout=300):
        expire_time = None if timeout == 0 else timeout
        self._cache.set(_hash(key), value, expire=expire_time)

    def get(self, key, default=None):
        return self._cache.get(_hash(key), default)

    def invalidate(self, key):
        self._cache.delete(_hash(key))
Пример #7
0
def worker(queue, eviction_policy, processes, threads):
    timings = {'get': [], 'set': [], 'delete': []}
    cache = Cache('tmp', eviction_policy=eviction_policy)

    for index, (action, key, value) in enumerate(iter(queue.get, None)):
        start = time.time()

        if action == 'set':
            cache.set(key, value, expire=EXPIRE)
        elif action == 'get':
            result = cache.get(key)
        else:
            assert action == 'delete'
            cache.delete(key)

        stop = time.time()

        if action == 'get' and processes == 1 and threads == 1 and EXPIRE is None:
            assert result == value

        if index > WARMUP:
            timings[action].append(stop - start)

    queue.put(timings)

    cache.close()
Пример #8
0
class CacheInteraction:
    def __init__(self, dimension=DimensionType.DIM_2D):
        self._cache = Cache(settings.CACHE_ROOT)
        self._dimension = dimension

    def __del__(self):
        self._cache.close()

    def get_buff_mime(self, chunk_number, quality, db_data):
        chunk, tag = self._cache.get('{}_{}_{}'.format(db_data.id, chunk_number, quality), tag=True)

        if not chunk:
            chunk, tag = self.prepare_chunk_buff(db_data, quality, chunk_number)
            self.save_chunk(db_data.id, chunk_number, quality, chunk, tag)
        return chunk, tag

    def prepare_chunk_buff(self, db_data, quality, chunk_number):
        from cvat.apps.engine.frame_provider import FrameProvider # TODO: remove circular dependency
        writer_classes = {
            FrameProvider.Quality.COMPRESSED : Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter,
            FrameProvider.Quality.ORIGINAL : Mpeg4ChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO else ZipChunkWriter,
        }

        image_quality = 100 if writer_classes[quality] in [Mpeg4ChunkWriter, ZipChunkWriter] else db_data.image_quality
        mime_type = 'video/mp4' if writer_classes[quality] in [Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter] else 'application/zip'

        kwargs = {}
        if self._dimension == DimensionType.DIM_3D:
            kwargs["dimension"] = DimensionType.DIM_3D
        writer = writer_classes[quality](image_quality, **kwargs)

        images = []
        buff = BytesIO()
        upload_dir = {
                StorageChoice.LOCAL: db_data.get_upload_dirname(),
                StorageChoice.SHARE: settings.SHARE_ROOT
            }[db_data.storage]
        if hasattr(db_data, 'video'):
            source_path = os.path.join(upload_dir, db_data.video.path)
            reader = VideoDatasetManifestReader(manifest_path=db_data.get_manifest_path(),
                source_path=source_path, chunk_number=chunk_number,
                chunk_size=db_data.chunk_size, start=db_data.start_frame,
                stop=db_data.stop_frame, step=db_data.get_frame_step())
            for frame in reader:
                images.append((frame, source_path, None))
        else:
            reader = ImageDatasetManifestReader(manifest_path=db_data.get_manifest_path(),
                chunk_number=chunk_number, chunk_size=db_data.chunk_size,
                start=db_data.start_frame, stop=db_data.stop_frame,
                step=db_data.get_frame_step())
            for item in reader:
                source_path = os.path.join(upload_dir, f"{item['name']}{item['extension']}")
                images.append((source_path, source_path, None))

        writer.save_as_chunk(images, buff)
        buff.seek(0)
        return buff, mime_type

    def save_chunk(self, db_data_id, chunk_number, quality, buff, mime_type):
        self._cache.set('{}_{}_{}'.format(db_data_id, chunk_number, quality), buff, tag=mime_type)
Пример #9
0
def main():
  # Sort command-line parameters
  args = parse_args()

  filters = [{'Name':'tag:{}'.format(list(item.keys())[0]), 
              'Values':list(item.values())} for item in args.tags]
  if args.verbosity: print("filters: {}".format(filters))

  # Create a session and connection to ec2
  session = boto3.Session(profile_name=args.profile)
  conn = session.client('ec2')

  # Create region list
  if args.region[0] == 'all':
    userRegion = [ region['RegionName'] for region in [ region for region in 
    conn.describe_regions()['Regions']]]
  else:
    userRegion = args.region
  if args.verbosity: print("userRegion: {}".format(userRegion))

  # Cache results to disk
  cache = Cache(os.path.expanduser('~') + '/.awstools')
  
  # Create a list of {region: EC2Resources}
  regions = []
  for region in userRegion:
    k = "{}_{}_{}".format(args.profile, region, '_'.join(["{}_{}".format(x, y) 
                                     for f in filters for x, y in f.items()]))
    if args.verbosity > 2: print(k)
    try:
      if not args.ignore_cache:
        regions.append({k:pickle.loads(cache[k])})
        if args.verbosity > 1: print("{} from cache".format(k))
      else:
        raise KeyError
    except KeyError:
      regions.append({k:EC2Resources(session, filters, region)})
      if args.verbosity > 1: print("{} skipped cache".format(k))

  if args.verbosity>1: print("regions: {}".format(regions))
  
  # Iterate through the list of {region: EC2Resources}, print, and update cache
  for rdict in regions:
    for region, ec2Instance in rdict.items():
      # Convert to JSON -> Python data structure -> JSON for proper formatting
      jsonContent = json.dumps(ec2Instance.instances, cls=DateTimeEncoder)
      from_json = json.loads(jsonContent) # Shrugs
      print(json.dumps(from_json, indent=4))
      if args.verbosity > 2: print(ec2Instance.__dict__)
      if ec2Instance.session:
        cache.set(region, pickle.dumps(ec2Instance), expire=3600)
  
  cache.close()
Пример #10
0
class CacheInteraction:
    def __init__(self, dimension=DimensionType.DIM_2D):
        self._cache = Cache(settings.CACHE_ROOT)
        self._dimension = dimension

    def __del__(self):
        self._cache.close()

    def get_buff_mime(self, chunk_number, quality, db_data):
        chunk, tag = self._cache.get('{}_{}_{}'.format(db_data.id, chunk_number, quality), tag=True)

        if not chunk:
            chunk, tag = self.prepare_chunk_buff(db_data, quality, chunk_number)
            self.save_chunk(db_data.id, chunk_number, quality, chunk, tag)
        return chunk, tag

    def prepare_chunk_buff(self, db_data, quality, chunk_number):
        from cvat.apps.engine.frame_provider import FrameProvider # TODO: remove circular dependency
        writer_classes = {
            FrameProvider.Quality.COMPRESSED : Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter,
            FrameProvider.Quality.ORIGINAL : Mpeg4ChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO else ZipChunkWriter,
        }

        image_quality = 100 if writer_classes[quality] in [Mpeg4ChunkWriter, ZipChunkWriter] else db_data.image_quality
        mime_type = 'video/mp4' if writer_classes[quality] in [Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter] else 'application/zip'

        kwargs = {}
        if self._dimension == DimensionType.DIM_3D:
            kwargs["dimension"] = DimensionType.DIM_3D
        writer = writer_classes[quality](image_quality, **kwargs)

        images = []
        buff = BytesIO()
        upload_dir = {
                StorageChoice.LOCAL: db_data.get_upload_dirname(),
                StorageChoice.SHARE: settings.SHARE_ROOT
            }[db_data.storage]
        if os.path.exists(db_data.get_meta_path()):
            source_path = os.path.join(upload_dir, db_data.video.path)
            meta = PrepareInfo(source_path=source_path, meta_path=db_data.get_meta_path())
            for frame in meta.decode_needed_frames(chunk_number, db_data):
                images.append(frame)
            writer.save_as_chunk([(image, source_path, None) for image in images], buff)
        else:
            with open(db_data.get_dummy_chunk_path(chunk_number), 'r') as dummy_file:
                images = [os.path.join(upload_dir, line.strip()) for line in dummy_file]
            writer.save_as_chunk([(image, image, None) for image in images], buff)

        buff.seek(0)
        return buff, mime_type

    def save_chunk(self, db_data_id, chunk_number, quality, buff, mime_type):
        self._cache.set('{}_{}_{}'.format(db_data_id, chunk_number, quality), buff, tag=mime_type)
Пример #11
0
def recreate_diskcache():
    if cache_options["CACHE_BACKEND"] != "redis":
        try:
            diskcache_cache = Cache(diskcache_location,
                                    disk_pickle_protocol=pickle_protocol)
        except DatabaseError:
            shutil.rmtree(diskcache_location, ignore_errors=True)
            os.mkdir(diskcache_location)
            diskcache_cache = Cache(diskcache_location,
                                    disk_pickle_protocol=pickle_protocol)
        diskcache_cache.clear()
        diskcache_cache.close()
class localCache(object):
    def __init__(self, config):
        self.cache_file = config.ad_cache_file

    def __enter__(self):
        self.cache = Cache(self.cache_file)
        self.cache.expire()
        return self

    def __exit__(self, exctype, exception, traceback):
        self.cache.close()

    def correct_ldap_group_list(self, group_list):
        # DELETE just deleted group from list
        deleted_groups = list()
        if len(self.cache) > 0:

            for group in group_list:
                if group.get("name") in self.cache and self.cache.get(
                        group.get("name")).get("cache_state") == "deleted":
                    log.info(
                        'Group{0} in state "deleted" founded in cache'.format(
                            group.get("name")))
                    deleted_groups.append(group)

        corrected_group_list = [
            x for x in group_list if x not in deleted_groups
        ]

        # ADD just created group to list
        created_groups = list()
        groups_name_list = [group.get("name") for group in group_list]
        if len(self.cache) > 0:

            cached = self.cache._sql('SELECT key FROM Cache').fetchall()

            for group in cached:
                if self.cache.get(group[0]).get("name") not in groups_name_list and\
                                self.cache.get(group[0]).get("cache_state") == "created":
                    log.info(
                        'Group{0} in state "created" founded in cache'.format(
                            group[0]))
                    created_groups.append(self.cache.get(group[0]))

        corrected_group_list.extend(
            [x for x in created_groups if x not in groups_name_list])

        return corrected_group_list
Пример #13
0
class Cache(object):
    def __init__(self):
        try:
            self.cache = DC('./tmp')
        except Exception as ex:
            print('Get an exception with diskcache open: {}'.format(ex))
            self.cache = None

    def __del__(self):
        try:
            self.cache.close()
        except Exception as ex:
            print('Get an exception with diskcache close: {}'.format(ex))

    def set(self, key, value):
        if self.cache is not None:
            self.cache.set(key, BytesIO(value), read=True, tag=u'data')

    def get(self, key):
        if self.cache is not None:
            value = self.cache.get(key, default=b'', read=True, tag=True)
            if value is not None and value != b'':
                return value
        return None

    def pop(self, key):
        if self.cache is not None:
            value = self.cache.pop(key, default=b'', read=True, tag=True)
            if value is not None and value != b'':
                return value
        return None

    def delete(self, key):
        if self.cache is not None:
            self.cache.delete(key)

    def create_index(self):
        if self.cache is not None:
            self.cache.create_tag_index()
            return self.cache.tag_index
        return None

    def clear_all(self):
        if self.cache is not None:
            self.cache.clear()
Пример #14
0
def save_to_cache(cache: Cache, data: dict):
    """Save dogs listing in persistent cache.

    Saves a diskcache Cache instance on disk. The available dogs dictionary is
    saved ('data' key) together with a timestamp ('time' key).

    Parameters
    ----------
    cache : Cache
        Cache object containing the dogs listing info.
    data : dict
        Dogs listing dictionary to cache for future use.
    """
    if len(data) == 0 or data is None:
        print('Nothing to save in cache.')
    else:
        cache['data'] = data
        cache['time'] = dt.strftime(dt.now(), '%Y-%m-%d %H:%M:%S')
    cache.close()
class BaseCacheAnalyzer(BaseAnalyzer):
    def __init__(self,
                 cache_location=None,
                 force=False,
                 delete_cache_when_done=False,
                 **kwargs):
        super().__init__(**kwargs)
        self.cache_location = cache_location
        self.cache = None
        self.force = force
        self.delete_cache_when_done = delete_cache_when_done

    def initialize(self):
        from diskcache import Cache
        self.cache = Cache(self.cache_location or self.uid + "_cache")

    def filter(self, simulation):
        return self.force or not self.is_in_cache(simulation.id)

    def to_cache(self, key, data):
        self.cache.set(key, data)

    def from_cache(self, key):
        return self.cache.get(key)

    def is_in_cache(self, key):
        return key in self.cache

    def destroy(self):
        if self.cache:
            self.cache.close()

        if self.cache and self.delete_cache_when_done and os.path.exists(
                self.cache.directory):
            cache_directory = self.cache.directory
            del self.cache
            shutil.rmtree(cache_directory)

    @property
    def keys(self):
        return list(self.cache.iterkeys()) if self.cache else None
Пример #16
0
class Spider(object):
    def __init__(self, directory=cache_dir.abspath, expire=24 * 3600):
        self.cache = Cache(directory)
        self.expire = expire

    def close(self):
        self.cache.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def get_content(self, url):
        if url in self.cache:
            return self.cache[url]
        else:
            content = requests.get(url).content
            self.cache[url] = content
            return content
Пример #17
0
class CacheEnabled:
    def __init__(self):
        self.cache_directory = None
        self.cache = None
        self.queue = False

    def initialize_cache(self, shards=None, timeout=1, queue=False):
        self.reset_cache()

        # Create a temporary directory for the cache
        self.cache_directory = mkdtemp()

        # Create a queue?
        if queue:
            self.cache = Deque(directory=self.cache_directory)
            self.queue = True
        elif shards:
            self.cache = FanoutCache(self.cache_directory,
                                     shards=shards,
                                     timeout=timeout)
            self.queue = False
        else:
            self.cache = Cache(self.cache_directory, timeout=timeout)
            self.queue = False

        return self.cache

    def reset_cache(self):
        # If already initialized, destroy and recreate
        if self.cache and not self.queue:
            self.cache.close()
        else:
            del self.cache

        if self.cache_directory:
            shutil.rmtree(self.cache_directory)

    def __del__(self):
        self.reset_cache()
Пример #18
0
class KeyValueDB(Generic[NativeType, StorageType], abc.ABC):
    """Interface for concrete DB backend."""

    _native_type: NativeType
    _storage_type: StorageType

    def __init__(self, datebase_dir: Path):
        if not datebase_dir.exists():
            datebase_dir.mkdir(mode=0o750, parents=True)
        self._cache = Cache(str(datebase_dir))

    def __contains__(self, key: Any) -> bool:
        return self._cache.__contains__(key)

    def __delitem__(self, key: Any) -> bool:
        return self._cache.__delitem__(key)

    def __getitem__(self, key: Any) -> NativeType:
        return self._storage_to_native_type(self._cache.__getitem__(key))

    def __setitem__(self, key: Any, value: NativeType) -> None:
        return self._cache.__setitem__(key, self._native_to_storage_type(value))

    def _native_to_storage_type(self, value: NativeType) -> StorageType:
        if self._native_type is self._storage_type or self._storage_type is None:
            return cast(StorageType, value)
        else:
            return self._storage_type(value)

    def _storage_to_native_type(self, value: StorageType) -> NativeType:
        if self._native_type is self._storage_type or self._native_type is None:
            return cast(NativeType, value)
        else:
            return self._native_type(value)

    def close(self, *args, **kwargs) -> None:
        return self._cache.close()

    def get(self, key: Any, default: Any = None, *args, **kwargs) -> Union[Any, NativeType]:
        value = self._cache.get(key, default, *args, **kwargs)
        if value is default:
            return default
        else:
            return self._storage_to_native_type(value)

    def set(self, key: Any, value: NativeType, *args, **kwargs) -> bool:
        return self._cache.set(key, self._native_to_storage_type(value), *args, **kwargs)

    def touch(self, *args, **kwargs) -> bool:
        return self._cache.touch(*args, **kwargs)
Пример #19
0
class cache:
    def __init__(self, latest=False):
        self.feedCache = Cache(".feedcache")
        self.latest = latest

    def __preprocess_title(self, feed):
        for entry in feed.entries:
            entry["feed_src"] = feed["feed"]["title"]

        return feed

    def __manage_cache(self, url):
        try:
            if url in self.feedCache:
                data = self.feedCache.get(url)
            else:
                parsed_feed = feedparser.parse(url)
                data = self.__preprocess_title(parsed_feed)
                # cache expires in 30 mins
                self.feedCache.add(url, data, expire=1800)
            self.feedCache.close()
        except ValueError:
            pass
        except Exception:
            pass
        return data

    def get_feed(self, url):
        if self.latest:
            latestFeed = self.__manage_cache(url).entries
            if len(latestFeed) > 0:
                return latestFeed[0]
            else:
                pass

        return self.__manage_cache(url).entries
Пример #20
0
class CacheManager(object):
    def __init__(self, cache_path=CACHE_PATH):
        self.cache = Cache(cache_path)

    def items(self):
        return self.cache.iterkeys()

    def has_key(self, key):
        return key in self.cache

    def set(self, key, value, ttl=TTL):
        return self.cache.set(key=key, value=value, expire=ttl)

    def get(self, key):
        return self.cache.get(key=key)

    def clear_cache(self):
        for key in self.cache:
            if key != 'censys_credentials':
                del self.cache[key]
        return True

    def close(self):
        self.cache.close()
Пример #21
0
    def check(self, dir, args):
        warnings = []
        log.info("Running URL checks (CheckURLs)")
        assert 'URLs' in __main__.remoteCheckList
        if 'URLs' in args.disableChecksRemote:
            return warnings

        cache_dir = 'data-check-cache/URLs'
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)
        global cache
        cache = Cache(cache_dir)
        if 'URLs' in args.purgeCaches:
            cache.clear()

        log.info("Testing biobank URLs")
        for biobank in dir.getBiobanks():
            if not 'url' in biobank or re.search('^\s*$', biobank['url']):
                warnings.append(
                    DataCheckWarning(self.__class__.__name__, "",
                                     dir.getBiobankNN(biobank['id']),
                                     DataCheckWarningLevel.WARNING,
                                     biobank['id'],
                                     DataCheckEntityType.BIOBANK,
                                     "Missing URL"))
            else:
                URLwarnings = testURL(
                    biobank['url'],
                    DataCheckWarning(self.__class__.__name__, "",
                                     dir.getBiobankNN(biobank['id']),
                                     DataCheckWarningLevel.ERROR,
                                     biobank['id'],
                                     DataCheckEntityType.BIOBANK,
                                     "Biobank URL"))
                warnings += URLwarnings

        log.info("Testing collection URLs")
        for collection in dir.getBiobanks():
            # non-existence of access URIs is tested in the access policy checks - here we only check validity of the URL if it exists
            if 'data_access_uri' in collection and not re.search(
                    '^\s*$', collection['data_access_uri']):
                URLwarnings = testURL(
                    collection['data_access_uri'],
                    DataCheckWarning(self.__class__.__name__, "",
                                     dir.getCollectionNN(collection['id']),
                                     DataCheckWarningLevel.ERROR,
                                     collection['id'],
                                     DataCheckEntityType.COLLECTION,
                                     "Data access URL for collection"))
                warnings += URLwarnings

            if 'sample_access_uri' in collection and not re.search(
                    '^\s*$', collection['sample_access_uri']):
                URLwarnings = testURL(
                    collection['sample_access_uri'],
                    DataCheckWarning(self.__class__.__name__, "",
                                     dir.getCollectionNN(collection['id']),
                                     DataCheckWarningLevel.ERROR,
                                     collection['id'],
                                     DataCheckEntityType.COLLECTION,
                                     "Sample access URL for collection"))
                warnings += URLwarnings
            if 'image_access_uri' in collection and not re.search(
                    '^\s*$', collection['image_access_uri']):
                URLwarnings = testURL(
                    collection['image_access_uri'],
                    DataCheckWarning(self.__class__.__name__, "",
                                     dir.getCollectionNN(collection['id']),
                                     DataCheckWarningLevel.ERROR,
                                     collection['id'],
                                     DataCheckEntityType.COLLECTION,
                                     "Image access URL for collection"))
                warnings += URLwarnings

        cache.close()
        return warnings
Пример #22
0
class State(StateBase):  # pylint: disable=too-many-instance-attributes
    def __init__(self, root_dir=None, tmp_dir=None):
        from diskcache import Cache

        super().__init__()

        self.tmp_dir = tmp_dir
        self.root_dir = root_dir
        self.fs = LocalFileSystem(None, {"url": self.root_dir})

        if not tmp_dir:
            return

        config = {"eviction_policy": "least-recently-used"}
        self.links = Cache(directory=os.path.join(tmp_dir, "links"), **config)
        self.md5s = Cache(directory=os.path.join(tmp_dir, "md5s"), **config)

    def close(self):
        self.md5s.close()
        self.links.close()

    def save(self, path_info, fs, hash_info):
        """Save hash for the specified path info.

        Args:
            path_info (dict): path_info to save hash for.
            hash_info (HashInfo): hash to save.
        """

        if not isinstance(fs, LocalFileSystem):
            return

        assert isinstance(path_info, str) or path_info.scheme == "local"
        assert hash_info
        assert isinstance(hash_info, HashInfo)
        assert os.path.exists(path_info)

        mtime, size = get_mtime_and_size(path_info, self.fs)
        inode = get_inode(path_info)

        logger.debug("state save (%s, %s, %s) %s", inode, mtime, size,
                     hash_info.value)

        self.md5s[inode] = (mtime, size, hash_info.value)

    def get(self, path_info, fs):
        """Gets the hash for the specified path info. Hash will be
        retrieved from the state database if available.

        Args:
            path_info (dict): path info to get the hash for.

        Returns:
            HashInfo or None: hash for the specified path info or None if it
            doesn't exist in the state database.
        """
        if not isinstance(fs, LocalFileSystem):
            return None

        assert isinstance(path_info, str) or path_info.scheme == "local"
        path = os.fspath(path_info)

        # NOTE: use os.path.exists instead of LocalFileSystem.exists
        # because it uses lexists() and will return True for broken
        # symlinks that we cannot stat() in get_mtime_and_size
        if not os.path.exists(path):
            return None

        mtime, size = get_mtime_and_size(path, self.fs)
        inode = get_inode(path)

        value = self.md5s.get(inode)

        if not value or value[0] != mtime or value[1] != size:
            return None

        return HashInfo("md5", value[2], size=int(size))

    def save_link(self, path_info, fs):
        """Adds the specified path to the list of links created by dvc. This
        list is later used on `dvc checkout` to cleanup old links.

        Args:
            path_info (dict): path info to add to the list of links.
        """
        if not isinstance(fs, LocalFileSystem):
            return

        assert isinstance(path_info, str) or path_info.scheme == "local"

        if not self.fs.exists(path_info):
            return

        mtime, _ = get_mtime_and_size(path_info, self.fs)
        inode = get_inode(path_info)
        relative_path = relpath(path_info, self.root_dir)

        with self.links as ref:
            ref[relative_path] = (inode, mtime)

    def get_unused_links(self, used, fs):
        """Removes all saved links except the ones that are used.

        Args:
            used (list): list of used links that should not be removed.
        """
        if not isinstance(fs, LocalFileSystem):
            return

        unused = []

        with self.links as ref:
            for relative_path in ref:
                path = os.path.join(self.root_dir, relative_path)

                if path in used or not self.fs.exists(path):
                    continue

                inode = get_inode(path)
                mtime, _ = get_mtime_and_size(path, self.fs)

                if ref[relative_path] == (inode, mtime):
                    logger.debug("Removing '%s' as unused link.", path)
                    unused.append(relative_path)

        return unused

    def remove_links(self, unused, fs):
        if not isinstance(fs, LocalFileSystem):
            return

        for path in unused:
            remove(os.path.join(self.root_dir, path))

        with self.links as ref:
            for path in unused:
                del ref[path]
Пример #23
0
class CAgent:
    def __init__(self,
                 name,
                 oDir: CDirectoryConfig,
                 oConfigByYaml: CConfigByYaml,
                 connectKnowlegeServer=False):
        self.name = name
        self.crawlerManager: CCrawlerManager = None
        self.storageManager: CStorage = None
        self.knowledgeManagerClient: CKnowledgeClient = None
        self.oDir: CDirectoryConfig = oDir
        self.oConf = oConfigByYaml
        self.oLog = CLog(oDir['Log'], self.name + '_log')
        self.dbWeb = ''
        self.cacheAgent = Cache(oDir['cacheAgentFolder'])
        self.cacheCrawler = Cache(oDir['cacheCrawlerFolder'])
        self.flagConnectKnowlegeServer = connectKnowlegeServer
        fKeyboardInterruptRegistrar(self._callbackKeyboardInterrupt)
        self.flagUserClose = False
#        fKeyboardInterruptRegistrar._register['test'] = self._callbackKeyboardInterrupt

    def _configStorage(self, mode='mongoDB'):
        oSubConfig = self.oConf['Storage']
        self.dbWeb = oSubConfig['dbWeb']
        if (oSubConfig.get('mode') != None):
            mode = oSubConfig['mode']
        path = self.dbWeb
        if (mode == 'mongoDB'):
            self.storageManager = CStorageMongoDB(self.name, path)

    def _configCrawler(self):
        self.crawlerManager = CCrawlerManager(self.name,
                                              self.oDir['crawlerCWD'],
                                              self.oLog,
                                              self.oDir['cacheCrawlerFolder'],
                                              self.oDir['cacheAgentFolder'])

    def _configKnowledgeManager(self):
        oSubConfig = self.oConf['KnowledgeManager']
        addressTuple = (oSubConfig['address'], oSubConfig['port'])
        key = oSubConfig['password']
        key = bytes(key, 'utf-8')
        print(key)
        self.knowledgeManagerClient = CKnowledgeClient(addressTuple, key,
                                                       self.oLog)
        if self.flagConnectKnowlegeServer:
            err = self.knowledgeManagerClient.connect()
            if err == False:
                raise ValueError("KnowledgeManager connection failed")

    def configAll(self):
        self._configCrawler()
        self.oLog.safeRecordTime('CrawlerManager conf finished')
        self._configKnowledgeManager()
        self.oLog.safeRecordTime('KnowledgeManager conf finished')
        self._configStorage()
        self.oLog.safeRecordTime('StorageManager conf finished')

    def startCrawling(self, jobsList: list):
        return self.crawlerManager.engineStart(jobsList)

    def fetchResult(
        self,
        handler,
        subProcHandle,
        timeWaitStep=1,
        maxWaitTimes=5
    ):  #total continuous waittime will be (timeWaitStep * maxWaitTimes)
        result = ''
        cnt = 0
        global WRITE_TO_STORAGE_FLAG
        WRITE_TO_STORAGE_FLAG = True
        while (True):
            _, result = self.cacheAgent.pull()
            if (result != None):
                result = json.loads(result)
                ans = handler(result['type'], result['content'])
                #                print(ans)
                for temp in ans:
                    self.storageManager.storeData(temp[0], temp[1], temp[2])
#                break
                cnt = 0  #clear counter
            elif (timeWaitStep * maxWaitTimes > 0):
                if (cnt >= maxWaitTimes
                    ):  # if continuous wait time equals to maxWaitTimes
                    WRITE_TO_STORAGE_FLAG = False
                    return False
                elif subProcHandle.poll(
                ) != None:  #if the subprocess is finished
                    WRITE_TO_STORAGE_FLAG = False
                    return subProcHandle.poll()
                else:
                    time.sleep(timeWaitStep)
                    cnt += 1  #counter add one
            else:
                WRITE_TO_STORAGE_FLAG = False
                raise ValueError(
                    "timeWaitStep * maxWaitTimes should be bigger than 0")

    def clearCache(self):
        self.cacheAgent.clear()
        self.cacheCrawler.clear()

    def closeCache(self):
        self.cacheAgent.close()
        self.cacheCrawler.close()
        self.crawlerManager.closeCache()

    def _callbackKeyboardInterrupt(self, *args, **kwargs):
        global WRITE_TO_STORAGE_FLAG
        self.flagUserClose = True
        if (WRITE_TO_STORAGE_FLAG is True):
            numRemainedMsg = len(self.cacheAgent)
            MSG = "Agent is fetching the result to the Storage," + \
            " number of remained items: " + str(numRemainedMsg) + \
            ", will close later."
            return False, MSG
        else:
            return True, ''

    def test(self):
        #code for testing keyboard interruption handle
        global WRITE_TO_STORAGE_FLAG
        WRITE_TO_STORAGE_FLAG = True
        for i in range(1000):
            time.sleep(0.01)
        WRITE_TO_STORAGE_FLAG = False
        #


#        print('Press Ctrl+C')
#        for x in range(1,100):
#            time.sleep(0.2)
#            print(x)

    def close(self):
        self.knowledgeManagerClient.close()
        self.closeCache()
class FinancialCache():
    """
        A Disk based database containing an offline version of financial
        data and used as a cache 
    """
    
    def __init__(self, path, **kwargs):
        '''
            Initializes the cache

            Parameters
            ----------
            path : str
            The path where the cache will be located

            max_cache_size_bytes : int (kwargs)
            (optional) the maximum size of the cache in bytes

            Raises
            ------
            ValidationError : in case an invalid cache size is supplied
            FileSystemError : in case the cache directory cannot be created

            
            Returns
            -----------
            A tuple of strings containing the start and end date of the fiscal period
        '''

        try:
            max_cache_size_bytes = kwargs['max_cache_size_bytes']
        except KeyError:
            # default max cache is 4GB
            max_cache_size_bytes = 4e9

        util.create_dir(path)
        
        try:
            self.cache = Cache(path, size_limit=int(max_cache_size_bytes))
        except Exception as e:
            raise ValidationError('invalid max cache size', e)

        log.debug("Cache was initialized: %s" % path)

    def write(self, key : str, value : object):
        """
            Writes an object to the cache

            Parameters
            ----------
            key : str
            The cache key

            value : object
            The cache value

            Returns
            ----------
            None
        """
        if (key == "" or key is None) or (value == "" or value is None):
            return

        self.cache[key] = value

    def read(self, key):
        """
            Reads an object to the cache and returns None if it cannot
            be found

            Parameters
            ----------
            key : str
            The cache key

            Returns
            ----------
            The object in question, or None if they key is not present
        """
        try:
            return self.cache[key]
        except KeyError:
            log.debug("%s not found inside cache" % key)
            return None

    def close(self):
        self.cache.close()
Пример #25
0
from kolibri.utils.conf import KOLIBRI_HOME
from kolibri.utils.conf import OPTIONS

cache_options = OPTIONS["Cache"]

pickle_protocol = OPTIONS["Python"]["PICKLE_PROTOCOL"]

diskcache_location = os.path.join(KOLIBRI_HOME, "process_cache")
try:
    diskcache_cache = Cache(diskcache_location, disk_pickle_protocol=pickle_protocol)
except DatabaseError:
    shutil.rmtree(diskcache_location, ignore_errors=True)
    os.mkdir(diskcache_location)
    diskcache_cache = Cache(diskcache_location, disk_pickle_protocol=pickle_protocol)
diskcache_cache.close()
# Default to LocMemCache, as it has the simplest configuration
default_cache = {
    "BACKEND": "django.core.cache.backends.locmem.LocMemCache",
    # Default time out of each cache key
    "TIMEOUT": cache_options["CACHE_TIMEOUT"],
    "OPTIONS": {"MAX_ENTRIES": cache_options["CACHE_MAX_ENTRIES"]},
}

built_files_prefix = "built_files"

built_files_cache = {
    "BACKEND": "django.core.cache.backends.locmem.LocMemCache",
    # Default time out of each cache key
    "TIMEOUT": cache_options["CACHE_TIMEOUT"],
    "OPTIONS": {"MAX_ENTRIES": cache_options["CACHE_MAX_ENTRIES"]},
    def check(self, dir, args):
        warnings = []
        log.info("Running geographical location checks (BiobankGeo)")
        # This is to be enabled for real runs.
        assert 'geocoding' in __main__.remoteCheckList
        if 'geocoding' in args.disableChecksRemote:
            geoCodingEnabled = False
        else:
            geoCodingEnabled = True

        cache_dir = 'data-check-cache/geolocator'
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)
        cache = Cache(cache_dir)
        if 'geocoding' in args.purgeCaches:
            cache.clear()

        geocoords_pattern = '^-?\d+\.\d+$'
        geolocator = Nominatim(
            user_agent=
            'Mozilla/5.0 (X11; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0',
            timeout=15)

        for biobank in dir.getBiobanks():
            if 'latitude' in biobank and not re.search(
                    '^\s*$', biobank['latitude']
            ) and 'longitude' in biobank and not re.search(
                    '^\s*$', biobank['longitude']):
                # we check before doing any convenience substitutions
                if not re.search(geocoords_pattern, biobank['latitude']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getBiobankNN(biobank['id']),
                            DataCheckWarningLevel.ERROR, biobank['id'],
                            DataCheckEntityType.BIOBANK,
                            "Invalid biobank latitude (should be a decimal number with period without any spaces or stray characters around - the surrounding quotes are added in this report): offending value '"
                            + biobank['latitude'] + "'"))
                if not re.search(geocoords_pattern, biobank['longitude']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getBiobankNN(biobank['id']),
                            DataCheckWarningLevel.ERROR, biobank['id'],
                            DataCheckEntityType.BIOBANK,
                            "Invalid biobank longitude (should be a decimal number with period without any spaces or stray characters around - the surrounding quotes are added in this report): offending value '"
                            + biobank['longitude'] + "'"))
                # this is for convenience - if there are commas used instead of periods, we should still do the remaining checks
                biobank['latitude'] = re.sub(r',', r'.', biobank['latitude'])
                biobank['longitude'] = re.sub(r',', r'.', biobank['longitude'])
                if re.search(geocoords_pattern,
                             biobank['latitude']) and re.search(
                                 geocoords_pattern, biobank['longitude']):
                    if geoCodingEnabled:
                        logMessage = "Checking reverse geocoding for " + biobank[
                            'latitude'] + ", " + biobank['longitude']
                        try:
                            loc_string = biobank['latitude'] + ", " + biobank[
                                'longitude']
                            if loc_string in cache and cache[loc_string] != "":
                                country_code = cache[loc_string]
                            else:
                                location = geolocator.reverse(loc_string,
                                                              language='en')
                                country_code = location.raw['address'][
                                    'country_code']
                                cache[loc_string] = country_code
                            logMessage += " -> OK"
                            if ((biobank['country']['id'] != "IARC"
                                 and biobank['country']['id'] != "EU")
                                    and country_code.upper() !=
                                    biobank['country']['id'] and
                                    not (country_code.upper() == "GB" and
                                         biobank['country']['id'] == "UK")):
                                warnings.append(
                                    DataCheckWarning(
                                        self.__class__.__name__, "",
                                        dir.getBiobankNN(biobank['id']),
                                        DataCheckWarningLevel.WARNING,
                                        biobank['id'],
                                        DataCheckEntityType.BIOBANK,
                                        "Geolocation of the biobank is likely outside of its country "
                                        + biobank['country']['id'] +
                                        "; biobank seems to be in " +
                                        country_code.upper() +
                                        f" based on geographical coordinates 'latitude'={biobank['latitude']} 'longitude'={biobank['longitude']}"
                                    ))
                        except Exception as e:
                            logMessage += " -> failed (" + str(e) + ")"
                            warnings.append(
                                DataCheckWarning(
                                    self.__class__.__name__, "",
                                    dir.getBiobankNN(biobank['id']),
                                    DataCheckWarningLevel.WARNING,
                                    biobank['id'], DataCheckEntityType.BIOBANK,
                                    "Reverse geocoding of the biobank  location failed ("
                                    + str(e) + ")"))
                        log.info(logMessage)
            else:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getBiobankNN(biobank['id']),
                        DataCheckWarningLevel.INFO, biobank['id'],
                        DataCheckEntityType.BIOBANK,
                        "Missing geographical coordinates ('latitude and/or 'longitude' attributes are empty)"
                    ))

        for collection in dir.getCollections():
            if 'latitude' in collection and not re.search(
                    '^\s*$', collection['latitude']
            ) and 'longitude' in collection and not re.search(
                    '^\s*$', collection['longitude']):
                # we check before doing any convenience substitutions
                if not re.search(geocoords_pattern, collection['latitude']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Invalid collection latitude (should be a decimal number with period without any spaces or stray characters around - the surrounding quotes are added in this report): offending value '"
                            + collection['latitude'] + "'"))
                if not re.search(geocoords_pattern, collection['longitude']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Invalid collection longitude (should be a decimal number with period without any spaces or stray characters around - the surrounding quotes are added in this report): offending value '"
                            + collection['longitude'] + "'"))
                # this is for convenience - if there are commas used instead of periods, we should still do the remaining checks
                collection['latitude'] = re.sub(r',', r'.',
                                                collection['latitude'])
                collection['longitude'] = re.sub(r',', r'.',
                                                 collection['longitude'])
                if re.search(geocoords_pattern,
                             collection['latitude']) and re.search(
                                 geocoords_pattern, collection['longitude']):
                    if geoCodingEnabled:
                        logMessage = "Checking reverse geocoding for " + collection[
                            'latitude'] + ", " + collection['longitude']
                        try:
                            loc_string = collection[
                                'latitude'] + ", " + collection['longitude']
                            if loc_string in cache and cache[loc_string] != "":
                                country_code = cache[loc_string]
                            else:
                                location = geolocator.reverse(loc_string,
                                                              language='en')
                                country_code = location.raw['address'][
                                    'country_code']
                                cache[loc_string] = country_code
                            logMessage += " -> OK"
                            biobankId = dir.getCollectionBiobankId(
                                collection['id'])
                            biobank = dir.getBiobankById(biobankId)
                            if ((biobank['country']['id'] != "IARC"
                                 and biobank['country']['id'] != "EU")
                                    and country_code.upper() !=
                                    biobank['country']['id'] and
                                    not (country_code.upper() == "GB" and
                                         biobank['country']['id'] == "UK")):
                                warnings.append(
                                    DataCheckWarning(
                                        self.__class__.__name__, "",
                                        dir.getCollectionNN(collection['id']),
                                        DataCheckWarningLevel.WARNING,
                                        collection['id'],
                                        DataCheckEntityType.COLLECTION,
                                        "Geolocation of the collection is likely outside of its country "
                                        + collection['country']['id'] +
                                        "; collection seems to be in " +
                                        country_code.upper() +
                                        f" based on geographical coordinates 'latitude'={collection['latitude']} 'longitude'={collection['longitude']}"
                                    ))
                        except Exception as e:
                            logMessage += " -> failed (" + str(e) + ")"
                            warnings.append(
                                DataCheckWarning(
                                    self.__class__.__name__, "",
                                    dir.getCollectionNN(collection['id']),
                                    DataCheckWarningLevel.WARNING,
                                    collection['id'],
                                    DataCheckEntityType.COLLECTION,
                                    "Reverse geocoding of the collection  location failed ("
                                    + str(e) + ")"))
                        log.info(logMessage)

        cache.close()
        return warnings
Пример #27
0
            print("Could not resolve source %s or target %s for graph" %
                  (link["source"], link["target"]))
            brokenlinks.append(link)
    graphlinks = [link for link in graphlinks if link not in brokenlinks]

    graphnodes = [node for _, node in graphnodes.items()]
    graphnodes = sorted(graphnodes, key=lambda x: x["seq"])
    graph = {
        "batadv": {
            "directed": False,
            "graph": [],
            "links": graphlinks,
            "multigraph": False,
            "nodes": graphnodes
        },
        "version": 1
    }
    print(graph)
    with open("graph.json", "w") as outfile:
        json.dump(graph, outfile)

    # finalize nodes.json
    nodes = {"nodes": nodes, "timestamp": timestamp, "version": 2}
    print(nodes)
    with open("nodes.json", "w") as outfile:
        json.dump(nodes, outfile)

    print("Wrote %d nodes." % len(nodes["nodes"]))

    cache.close()
Пример #28
0
class DiskPubSubCache(object):
    """A DiskCache-backed cache used for PubSub channels

    Attributes:
        cache (Cache): The cache which backs this pubsub cache
        _subscribers (dict{str: DiskSubscription}): The subscriptions tracked by this cache
        _threads_registered (set(str)): The names of the threads which have registered triggers on the database
        _push_partial (func): The function called when an insert or update happens on the cache

    Args:
        directory (str): The path to the directory used by this cache
        timeout (float, optional): The number of seconds to wait before an operation times out. Defaults to 0.01 seconds
    """
    _insert_func_name = 'push_on_insert'
    _update_func_name = 'push_on_update'

    def __init__(self, directory, timeout=0.01):
        self.cache = Cache(directory, timeout=timeout)
        self._subscribers = {}  # Would be nice to use a weakref to a set so that keys with no subscribers are
        self._threads_registered = set()
        self._insert_triggers()
        self._push_partial = partial(self.__push_to_subscribers)

    def publish(self, channel, data):
        """Publish data to a channel

        Args:
            channel (str): Channel to publish the data to
            data: The data to publish the data to. The data will arrive in the same format as it was set

        Returns:
            (int): The number of subscribers which received the published data
        """
        self.cache.set(channel, data)
        return len(self._subscribers.get(channel, []))

    def register_callbacks(self):
        """Registers the trigger functions for the current thread.

        A thread must have trigger functions registered before it can publish data
        """
        if threading.current_thread().name not in self._threads_registered:
            con = self._con
            for func_name in (self._insert_func_name, self._update_func_name):
                con.create_function(func_name, 2, self._push_partial)
            self._threads_registered.add(threading.current_thread().name)

    def _insert_triggers(self):
        """Inserts the original triggers into the cache, but does not create or the functions which receive the triggers
        """
        con = self._con
        for func_name, operation in [(self._insert_func_name, 'INSERT'), (self._update_func_name, 'UPDATE')]:
            con.execute('CREATE TRIGGER IF NOT EXISTS {0} AFTER {1} ON Cache BEGIN '
                        'SELECT {0}(NEW.key, NEW.value); END;'.format(func_name, operation))

    def subscribe(self, channel):
        """Subscribe to a channel

        Args:
            channel (str): The name of the channel to subscribe to

        Returns:
            (DiskSubscription): The subscription to this channel
        """
        subscription = DiskSubscription(channel)
        if channel not in self._subscribers:
            self._subscribers[channel] = WeakSet([subscription])
        else:
            self._subscribers[channel].add(subscription)
        return subscription

    def __push_to_subscribers(self, channel, value):
        try:
            value = self.__get_value(value)
            for subscriber in self._subscribers.get(str(channel), []):
                subscriber.push(value)
        except:
            import traceback
            traceback.print_exc()
            raise

    @staticmethod
    def __get_value(value):
        if value == unsubscribe_message or isinstance(value, string_types) or isinstance(value, int) or isinstance(
                value, float):
            return value
        if isinstance(value, binary_type):
            return value.decode('utf-8')
        try:
            return pickle.load(BytesIO(value))
        except (KeyError, TypeError, IndexError):
            return str(value)

    @property
    def _con(self):
        con = getattr(self.cache._local, 'con', None)

        if con is None:
            con = self.cache._local.con = sqlite3.connect(
                os.path.join(self.cache._directory, DBNAME),
                timeout=self.cache._timeout,
                isolation_level=None,
            )

            # Some SQLite pragmas work on a per-connection basis so query the
            # Settings table and reset the pragmas. The Settings table may not
            # exist so catch and ignore the OperationalError that may occur.

            try:
                select = 'SELECT key, value FROM Settings'
                settings = con.execute(select).fetchall()
            except sqlite3.OperationalError:
                pass
            else:
                for key, value in settings:
                    if key.startswith('sqlite_'):
                        self.cache.reset(key, value, update=False)

        return con

    def shutdown(self):
        """Shuts down the connection to the cache
        """
        self.cache.close()
Пример #29
0
class State(StateBase):  # pylint: disable=too-many-instance-attributes
    def __init__(self, root_dir=None, tmp_dir=None, dvcignore=None):
        from diskcache import Cache

        super().__init__()

        self.tmp_dir = tmp_dir
        self.root_dir = root_dir
        self.dvcignore = dvcignore

        if not tmp_dir:
            return

        config = {
            "eviction_policy": "least-recently-used",
            "disk_pickle_protocol": 4,
        }
        self.links = Cache(directory=os.path.join(tmp_dir, "links"), **config)
        self.md5s = Cache(directory=os.path.join(tmp_dir, "md5s"), **config)

    def close(self):
        self.md5s.close()
        self.links.close()

    def save(self, path_info, fs, hash_info):
        """Save hash for the specified path info.

        Args:
            path_info (dict): path_info to save hash for.
            hash_info (HashInfo): hash to save.
        """

        if not isinstance(fs, LocalFileSystem):
            return

        mtime, size = get_mtime_and_size(path_info, fs, self.dvcignore)
        inode = get_inode(path_info)

        logger.debug(
            "state save (%s, %s, %s) %s",
            inode,
            mtime,
            str(size),
            hash_info.value,
        )

        self.md5s[inode] = (mtime, str(size), hash_info.value)

    def get(self, path_info, fs):
        """Gets the hash for the specified path info. Hash will be
        retrieved from the state database if available.

        Args:
            path_info (dict): path info to get the hash for.

        Returns:
            HashInfo or None: hash for the specified path info or None if it
            doesn't exist in the state database.
        """
        from .objects.meta import Meta

        if not isinstance(fs, LocalFileSystem):
            return None, None

        try:
            mtime, size = get_mtime_and_size(path_info, fs, self.dvcignore)
        except FileNotFoundError:
            return None, None

        inode = get_inode(path_info)

        value = self.md5s.get(inode)

        if not value or value[0] != mtime or value[1] != str(size):
            return None, None

        return Meta(size=size), HashInfo("md5", value[2])

    def save_link(self, path_info, fs):
        """Adds the specified path to the list of links created by dvc. This
        list is later used on `dvc checkout` to cleanup old links.

        Args:
            path_info (dict): path info to add to the list of links.
        """
        if not isinstance(fs, LocalFileSystem):
            return

        try:
            mtime, _ = get_mtime_and_size(path_info, fs, self.dvcignore)
        except FileNotFoundError:
            return

        inode = get_inode(path_info)
        relative_path = relpath(path_info, self.root_dir)

        with self.links as ref:
            ref[relative_path] = (inode, mtime)

    def get_unused_links(self, used, fs):
        """Removes all saved links except the ones that are used.

        Args:
            used (list): list of used links that should not be removed.
        """
        if not isinstance(fs, LocalFileSystem):
            return

        unused = []

        with self.links as ref:
            for relative_path in ref:
                path = os.path.join(self.root_dir, relative_path)

                if path in used or not fs.exists(path):
                    continue

                inode = get_inode(path)
                mtime, _ = get_mtime_and_size(path, fs, self.dvcignore)

                if ref[relative_path] == (inode, mtime):
                    logger.debug("Removing '%s' as unused link.", path)
                    unused.append(relative_path)

        return unused

    def remove_links(self, unused, fs):
        if not isinstance(fs, LocalFileSystem):
            return

        for path in unused:
            remove(os.path.join(self.root_dir, path))

        with self.links as ref:
            for path in unused:
                del ref[path]
Пример #30
0
                    break
            log('Skipped post: "{title}"'.format(title=post["data"]["title"]))

        background = requests.get(url)
        CACHE.set(uuid4(), background, expire=cache_seconds)

    except requests.exceptions.ConnectionError:
        log("No internet, using background from cache")
        background = CACHE.get(choice(list(CACHE)))

    with open(os.path.join(CACHE_DIR, "current"), mode="w+b") as out:
        out.write(background.content)
        set_wallpaper(os.path.join(CACHE_DIR, "current"),
                      get_desktop_environment())


if __name__ == "__main__":
    main()

# get quote as text
#res = requests.get("https://www.brainyquote.com/quotes_of_the_day.html")
#soup = BeautifulSoup(res.text, "lxml")
#quote = soup.find("img", {"class":"p-qotd"})
#print(quote["alt"])

# TODO
# DPI/scaling issues?
# dependency check

CACHE.close()
Пример #31
0
class ReadCacheDataBackend(DataBackend):
    def __init__(self, config):
        read_cache_directory = config.get('dataBackend.readCache.directory',
                                          None,
                                          types=str)
        read_cache_maximum_size = config.get(
            'dataBackend.readCache.maximumSize', None, types=int)

        if read_cache_directory and not read_cache_maximum_size or not read_cache_directory and read_cache_maximum_size:
            raise ConfigurationError(
                'Both dataBackend.readCache.directory and dataBackend.readCache.maximumSize need to be set '
                + 'to enable disk based caching.')

        if read_cache_directory and read_cache_maximum_size:
            os.makedirs(read_cache_directory, exist_ok=True)
            try:
                self._read_cache = Cache(
                    read_cache_directory,
                    size_limit=read_cache_maximum_size,
                    eviction_policy='least-frequently-used',
                    statistics=1,
                )
            except Exception:
                logger.warning(
                    'Unable to enable disk based read caching. Continuing without it.'
                )
                self._read_cache = None
            else:
                logger.debug(
                    'Disk based read caching instantiated (cache size {}).'.
                    format(read_cache_maximum_size))
        else:
            self._read_cache = None
        self._use_read_cache = True

        # Start reader and write threads after the disk cached is created, so that they see it.
        super().__init__(config)

    def _read(self, block, metadata_only):
        key = self._block_uid_to_key(block.uid)
        metadata_key = key + self._META_SUFFIX
        if self._read_cache is not None and self._use_read_cache:
            metadata = self._read_cache.get(metadata_key)
            if metadata and metadata_only:
                return block, None, metadata
            elif metadata:
                data = self._read_cache.get(key)
                if data:
                    return block, data, metadata

        block, data, metadata = super()._read(block, metadata_only)

        # We always put blocks into the cache even when self._use_read_cache is False
        if self._read_cache is not None:
            self._read_cache.set(metadata_key, metadata)
            if not metadata_only:
                self._read_cache.set(key, data)

        return block, data, metadata

    def use_read_cache(self, enable):
        old_value = self._use_read_cache
        self._use_read_cache = enable
        return old_value

    def close(self):
        super().close()
        if self._read_cache is not None:
            (cache_hits, cache_misses) = self._read_cache.stats()
            logger.debug(
                'Disk based cache statistics (since cache creation): {} hits, {} misses.'
                .format(cache_hits, cache_misses))
            self._read_cache.close()