def __init__(self, **kwargs): for x in ['provider', 'name', 'uri']: if x not in kwargs: msg = '{name} is required' msg = msg.format(name=x) raise TypeError(msg) now = utils.now_timestamp() if 'last_seen' not in kwargs: kwargs['last_seen'] = now if 'created' not in kwargs: kwargs['created'] = now super().__init__(**kwargs)
def archive(src, dst, diff, filter_func=None, dry_run=False): nowts = utils.now_timestamp() ret = [] for (dirpath, dirnames, filenames) in os.walk(src): if filter_func: filter_func(dirpath, dirnames, filenames) for filename in filenames: oldpath = dirpath + '/' + filename st = os.stat(oldpath) stamp = st.st_mtime if (stamp + diff) >= nowts: continue newpath = pathname_replace(oldpath, src, dst) assert oldpath != newpath os.makedirs(os.path.dirname(newpath), exist_ok=True) path_exists = os.path.exists(newpath) same_file = path_exists and same_content(oldpath, newpath) if os.path.exists(newpath) and not same_file: msg = ("Moving '{old}' -> '{new}'. Destination file " "exists but has different content") raise NotImplementedError(msg) if not os.path.exists(newpath): if not dry_run: shutil.move(oldpath, newpath) ret.append((Operations.MOVE, oldpath, newpath)) else: if not dry_run: print("rm '{}'".format(oldpath)) ret.append((Operations.UNLINK, oldpath)) return ret
def __init__(self, name, uri, provider, timestamp=None, size=None, seeds=None, leechers=None, type=None, language=None, meta=None, tags=None): # Non database attributes self.meta = meta or [] self.language = language self.leechers = leechers self.seeds = seeds self.size = size self.timestamp = timestamp or utils.now_timestamp() self.tags = tags or [] self.type = type super().__init__(name=name, uri=uri, provider=provider)
def age(self): return utils.now_timestamp() - self.created
def _normalize_source_data(self, origin, *psrcs): """ Normalize input data for given origin. Args: origin - An Origin object. All psrcs should be the result of this origin. psrcs - List of psources (raw dicts) to be normalized. Returns: A list of normalized psources (dicts). """ required_keys = set([ 'name', 'provider', 'uri', ]) allowed_keys = required_keys.union( set([ 'language', 'leechers', 'meta', 'seeds', 'size', 'timestamp', 'type' ])) ret = [] now = utils.now_timestamp() for psrc in psrcs: if not isinstance(psrc, dict): msg = "Origin «{name}» emits invalid data type: {datatype}" msg = msg.format(name=origin.provider.__extension_name__, datatype=str(type(psrc))) self.logger.error(msg) continue # Insert provider name psrc['provider'] = origin.provider.__extension_name__ # Apply overrides psrc.update(origin.provider.overrides) # Check required keys missing_keys = required_keys - set(psrc.keys()) if missing_keys: msg = ("Origin «{name}» doesn't provide the required " "following keys: {missing_keys}") msg = msg.format(name=origin.provider_name, missing_keys=missing_keys) self.logger.error(msg) continue # Only those keys are allowed forbiden_keys = [k for k in psrc if k not in allowed_keys] if forbiden_keys: msg = ("Origin «{name}» emits the following invalid " "properties for its sources: {forbiden_keys}") msg = msg.format(name=psrc['provider'], forbiden_keys=forbiden_keys) self.logger.warning(msg) psrc = {k: psrc.get(k, None) for k in allowed_keys} # Check value types checks = [ ('created', int), ('leechers', int), ('name', str), ('seeds', int), ('size', int), ('permalink', str), ('uri', str), ] for k, kt in checks: if (psrc.get(k) is not None) and (not isinstance(psrc[k], kt)): try: psrc[k] = kt(psrc[k]) except (TypeError, ValueError): msg = ("Origin «{name}» emits invalid «{key}» value. " "Expected {expectedtype} (or compatible), got " "{currtype}") msg = msg.format( name=origin.provider.__extension_name__, key=k, expectedtype=kt, currtype=str(type(psrc[k]))) self.logger.error(msg) continue psrc['meta'] = psrc.get('meta', {}) if psrc['meta']: if not all([ isinstance(k, str) and isinstance(v, str) for (k, v) in psrc['meta'].items() ]): msg = ("Origin «{name}» emits invalid «meta» " "value. Expected dict(str->str)") msg = msg.format(name=self.provider) self.logger.warning(msg) psrc['meta'] = {} # Calculate URN from uri. If not found its a lazy source # IMPORTANT: URN is **lowercased** and **sha1-encoded** # try: # qs = parse.urlparse(psrc['uri']).query # urn = parse.parse_qs(qs)['xt'][-1] # normalized_urn = bittorrentlib.normalize_urn(urn) # # FIXME: This is a hack, fix uritools.alter_query_params # psrc['uri'] = psrc['uri'].replace( # 'xt=' + urn, 'xt=' + normalized_urn) # psrc['urn'] = normalized_urn # except KeyError: # pass # Fix created psrc['timestamp'] = psrc.get('timestamp', None) or now # Append to ret value ret.append(psrc) return ret