class Dict(with_metaclass(_DictMeta, Filter)): def __init__(self, selector=None, default=_NO_DEFAULT): super(Dict, self).__init__(self, default=default) if selector is None: self.selector = [] elif isinstance(selector, basestring): self.selector = selector.split('/') elif callable(selector): self.selector = [selector] else: self.selector = selector def __getitem__(self, name): self.selector.append(name) return self @debug() def filter(self, elements): if elements is not _NOT_FOUND: return elements else: return self.default_or_raise(ParseError('Element %r not found' % self.selector)) @classmethod def select(cls, selector, item, obj=None, key=None): if isinstance(item, (dict, list)): content = item else: content = item.el for el in selector: if isinstance(content, list): el = int(el) elif isinstance(el, _Filter): el._key = key el._obj = obj el = el(item) elif callable(el): el = el(item) try: content = content[el] except (KeyError, IndexError, TypeError): return _NOT_FOUND return content
class Enum(with_metaclass(EnumMeta, object)): pass
class BaseObject(with_metaclass(_BaseObjectMeta, StrConv, object)): """ This is the base class for a capability object. A capability interface may specify to return several kind of objects, to formalise retrieved information from websites. As python is a flexible language where variables are not typed, we use a system to force backends to set wanted values on all fields. To do that, we use the :class:`Field` class and all derived ones. For example:: class Transfer(BaseObject): " Transfer from an account to a recipient. " amount = DecimalField('Amount to transfer') date = Field('Date of transfer', basestring, date, datetime) origin = Field('Origin of transfer', int, long, basestring) recipient = Field('Recipient', int, long, basestring) The docstring is mandatory. """ id = None backend = None url = StringField('url') _fields = None def __init__(self, id=u'', url=NotLoaded, backend=None): self.id = to_unicode(id) if id is not None else u'' self.backend = backend self._fields = deepcopy(self._fields) self.__setattr__('url', url) @property def fullid(self): """ Full ID of the object, in form '**ID@backend**'. """ return '%s@%s' % (self.id, self.backend) def __iscomplete__(self): """ Return True if the object is completed. It is useful when the object is a field of an other object which is going to be filled. The default behavior is to iter on fields (with iter_fields) and if a field is NotLoaded, return False. """ for key, value in self.iter_fields(): if value is NotLoaded: return False return True def copy(self): obj = copy(self) obj._fields = copy(self._fields) for k in obj._fields: obj._fields[k] = copy(obj._fields[k]) return obj def __deepcopy__(self, memo): return self.copy() def set_empty_fields(self, value, excepts=()): """ Set the same value on all empty fields. :param value: value to set on all empty fields :param excepts: if specified, do not change fields listed """ for key, old_value in self.iter_fields(): if empty(old_value) and key not in excepts: setattr(self, key, value) def iter_fields(self): """ Iterate on the fields keys and values. Can be overloaded to iterate on other things. :rtype: iter[(key, value)] """ if hasattr(self, 'id') and self.id is not None: yield 'id', self.id for name, field in self._fields.items(): yield name, field.value def __eq__(self, obj): if isinstance(obj, BaseObject): return self.backend == obj.backend and self.id == obj.id else: return False def __getattr__(self, name): if self._fields is not None and name in self._fields: return self._fields[name].value else: raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__.__name__, name)) def __setattr__(self, name, value): try: attr = (self._fields or {})[name] except KeyError: if name not in dir(self) and not name.startswith('_'): warnings.warn( 'Creating a non-field attribute %s. Please prefix it with _' % name, AttributeCreationWarning, stacklevel=2) object.__setattr__(self, name, value) else: if not empty(value): try: # Try to convert value to the wanted one. nvalue = attr.convert(value) # If the value was converted if nvalue is not value: warnings.warn('Value %s was converted from %s to %s' % (name, type(value), type(nvalue)), ConversionWarning, stacklevel=2) value = nvalue except Exception: # error during conversion, it will probably not # match the wanted following types, so we'll # raise ValueError. pass from collections import deque actual_types = () for v in attr.types: if isinstance(v, str): # the following is a (almost) copy/paste from # https://stackoverflow.com/questions/11775460/lexical-cast-from-string-to-type q = deque([object]) while q: t = q.popleft() if t.__name__ == v: actual_types += (t, ) else: try: # keep looking! q.extend(t.__subclasses__()) except TypeError: # type.__subclasses__ needs an argument for # whatever reason. if t is type: continue else: raise else: actual_types += (v, ) if not isinstance(value, actual_types) and not empty(value): raise ValueError( 'Value for "%s" needs to be of type %r, not %r' % (name, actual_types, type(value))) attr.value = value def __delattr__(self, name): try: self._fields.pop(name) except KeyError: object.__delattr__(self, name) def to_dict(self): def iter_decorate(d): for key, value in d: if key == 'id' and self.backend is not None: value = self.fullid yield key, value fields_iterator = self.iter_fields() return OrderedDict(iter_decorate(fields_iterator)) def __getstate__(self): d = self.to_dict() d.update((k, v) for k, v in self.__dict__.items() if k != '_fields') return d @classmethod def from_dict(cls, values, backend=None): self = cls() for attr in values: setattr(self, attr, values[attr]) return self def __setstate__(self, state): self._fields = deepcopy( self._fields) # because yaml does not call __init__ for k in state: setattr(self, k, state[k]) if sys.version_info.major >= 3: def __dir__(self): return list(super(BaseObject, self).__dir__()) + list( self._fields.keys())
class ItemElement(with_metaclass(_ItemElementMeta, AbstractElement)): _attrs = None _loaders = None klass = None validate = None skip_optional_fields_errors = False class Index(object): pass def __init__(self, *args, **kwargs): super(ItemElement, self).__init__(*args, **kwargs) self.obj = None self.saved_attrib = {} # safer way would be to clone lxml tree def build_object(self): if self.klass is None: return return self.klass() def _restore_attrib(self): for el in self.saved_attrib: el.attrib.clear() el.attrib.update(self.saved_attrib[el]) self.saved_attrib = {} def should_highlight(self): try: responses_dirname = self.page.browser.responses_dirname and self.page.browser.highlight_el if not responses_dirname: return False if not self.el.getroottree(): return False except AttributeError: return False else: return True def _write_highlighted(self): if not self.should_highlight(): return responses_dirname = self.page.browser.responses_dirname html = lxml.html.tostring(self.el.getroottree().getroot()) fn = os.path.join(responses_dirname, 'obj-%s.html' % self._random_id) with open(fn, 'w') as fd: fd.write(html) self.logger.debug('highlighted object to %s', fn) def __call__(self, obj=None): if obj is not None: self.obj = obj for obj in self: return obj def __iter__(self): if self.condition is not None and not self.condition(): return highlight = False try: if self.should_highlight(): self.saved_attrib[self.el] = dict(self.el.attrib) self.el.attrib[ 'style'] = 'color: white !important; background: orange !important;' try: if self.obj is None: self.obj = self.build_object() self.parse(self.el) self.handle_loaders() for attr in self._attrs: self.handle_attr(attr, getattr(self, 'obj_%s' % attr)) except SkipItem: return if self.validate is not None and not self.validate(self.obj): return highlight = True finally: if highlight: self._write_highlighted() self._restore_attrib() yield self.obj def handle_attr(self, key, func): try: value = self.use_selector(func, key=key) except SkipItem as e: # Help debugging as tracebacks do not give us the key self.logger.debug("Attribute %s raises a %r", key, e) raise except Exception as e: # If we are here, we have probably a real parsing issue self.logger.warning('Attribute %s (in %s:%s) raises %s', key, self._class_file, self._class_line, repr(e)) if not self.skip_optional_fields_errors or key not in self.obj._fields or self.obj._fields[ key].mandatory: raise else: value = FetchError logger = getLogger('b2filters') logger.log(DEBUG_FILTERS, "%s.%s = %r" % (self._random_id, key, value)) setattr(self.obj, key, value)