示例#1
0
    def __init__(self, **kwargs):
        for x in ['provider', 'name', 'uri']:
            if x not in kwargs:
                msg = '{name} is required'
                msg = msg.format(name=x)
                raise TypeError(msg)

        now = utils.now_timestamp()
        if 'last_seen' not in kwargs:
            kwargs['last_seen'] = now

        if 'created' not in kwargs:
            kwargs['created'] = now

        super().__init__(**kwargs)
示例#2
0
def archive(src, dst, diff, filter_func=None, dry_run=False):
    nowts = utils.now_timestamp()
    ret = []

    for (dirpath, dirnames, filenames) in os.walk(src):
        if filter_func:
            filter_func(dirpath, dirnames, filenames)

        for filename in filenames:
            oldpath = dirpath + '/' + filename
            st = os.stat(oldpath)
            stamp = st.st_mtime

            if (stamp + diff) >= nowts:
                continue

            newpath = pathname_replace(oldpath, src, dst)
            assert oldpath != newpath

            os.makedirs(os.path.dirname(newpath), exist_ok=True)

            path_exists = os.path.exists(newpath)
            same_file = path_exists and same_content(oldpath, newpath)

            if os.path.exists(newpath) and not same_file:
                msg = ("Moving '{old}' -> '{new}'. Destination file "
                       "exists but has different content")
                raise NotImplementedError(msg)

            if not os.path.exists(newpath):
                if not dry_run:
                    shutil.move(oldpath, newpath)
                ret.append((Operations.MOVE, oldpath, newpath))

            else:
                if not dry_run:
                    print("rm '{}'".format(oldpath))
                ret.append((Operations.UNLINK, oldpath))

    return ret
示例#3
0
    def __init__(self,
                 name,
                 uri,
                 provider,
                 timestamp=None,
                 size=None,
                 seeds=None,
                 leechers=None,
                 type=None,
                 language=None,
                 meta=None,
                 tags=None):

        # Non database attributes
        self.meta = meta or []
        self.language = language
        self.leechers = leechers
        self.seeds = seeds
        self.size = size
        self.timestamp = timestamp or utils.now_timestamp()
        self.tags = tags or []
        self.type = type

        super().__init__(name=name, uri=uri, provider=provider)
示例#4
0
 def age(self):
     return utils.now_timestamp() - self.created
示例#5
0
    def _normalize_source_data(self, origin, *psrcs):
        """ Normalize input data for given origin.

        Args:
          origin - An Origin object. All psrcs should be the result of this
                   origin.
          psrcs - List of psources (raw dicts) to be normalized.

        Returns:
          A list of normalized psources (dicts).
        """

        required_keys = set([
            'name',
            'provider',
            'uri',
        ])
        allowed_keys = required_keys.union(
            set([
                'language', 'leechers', 'meta', 'seeds', 'size', 'timestamp',
                'type'
            ]))

        ret = []
        now = utils.now_timestamp()

        for psrc in psrcs:
            if not isinstance(psrc, dict):
                msg = "Origin «{name}» emits invalid data type: {datatype}"
                msg = msg.format(name=origin.provider.__extension_name__,
                                 datatype=str(type(psrc)))
                self.logger.error(msg)
                continue

            # Insert provider name
            psrc['provider'] = origin.provider.__extension_name__

            # Apply overrides
            psrc.update(origin.provider.overrides)

            # Check required keys
            missing_keys = required_keys - set(psrc.keys())
            if missing_keys:
                msg = ("Origin «{name}» doesn't provide the required "
                       "following keys: {missing_keys}")
                msg = msg.format(name=origin.provider_name,
                                 missing_keys=missing_keys)
                self.logger.error(msg)
                continue

            # Only those keys are allowed
            forbiden_keys = [k for k in psrc if k not in allowed_keys]
            if forbiden_keys:
                msg = ("Origin «{name}» emits the following invalid "
                       "properties for its sources: {forbiden_keys}")
                msg = msg.format(name=psrc['provider'],
                                 forbiden_keys=forbiden_keys)
                self.logger.warning(msg)

            psrc = {k: psrc.get(k, None) for k in allowed_keys}

            # Check value types
            checks = [
                ('created', int),
                ('leechers', int),
                ('name', str),
                ('seeds', int),
                ('size', int),
                ('permalink', str),
                ('uri', str),
            ]
            for k, kt in checks:
                if (psrc.get(k) is not None) and (not isinstance(psrc[k], kt)):
                    try:
                        psrc[k] = kt(psrc[k])
                    except (TypeError, ValueError):
                        msg = ("Origin «{name}» emits invalid «{key}» value. "
                               "Expected {expectedtype} (or compatible), got "
                               "{currtype}")
                        msg = msg.format(
                            name=origin.provider.__extension_name__,
                            key=k,
                            expectedtype=kt,
                            currtype=str(type(psrc[k])))
                        self.logger.error(msg)
                        continue

            psrc['meta'] = psrc.get('meta', {})
            if psrc['meta']:
                if not all([
                        isinstance(k, str) and isinstance(v, str)
                        for (k, v) in psrc['meta'].items()
                ]):
                    msg = ("Origin «{name}» emits invalid «meta» "
                           "value. Expected dict(str->str)")
                    msg = msg.format(name=self.provider)
                    self.logger.warning(msg)
                    psrc['meta'] = {}

            # Calculate URN from uri. If not found its a lazy source
            # IMPORTANT: URN is **lowercased** and **sha1-encoded**
            # try:
            #     qs = parse.urlparse(psrc['uri']).query
            #     urn = parse.parse_qs(qs)['xt'][-1]
            #     normalized_urn = bittorrentlib.normalize_urn(urn)

            #     # FIXME: This is a hack, fix uritools.alter_query_params
            #     psrc['uri'] = psrc['uri'].replace(
            #         'xt=' + urn, 'xt=' + normalized_urn)
            #     psrc['urn'] = normalized_urn

            # except KeyError:
            #     pass

            # Fix created
            psrc['timestamp'] = psrc.get('timestamp', None) or now

            # Append to ret value
            ret.append(psrc)

        return ret