def __enter__(self): """Enter context: Attempt to acquire lock""" lock_file = self.get_lock_file() start = time.time() last = 0 while True: with trapped: if self.reentrant and os.path.exists(lock_file): with open(lock_file, 'rb') as fp: if text.cast(fp.read()) == self.pid: break fd = os.open(lock_file, tempfile._text_openflags, self.lock_perms) try: os.write(fd, text.encode(self.pid)) finally: os.close(fd) break now = time.time() if ((self.timeout is not None) and ((now - start) >= self.timeout)): raise LockError(text.format('unable to acquire lock for %s', self.name)) sleep = self.interval - now + last if sleep > 0: time.sleep(sleep) last = now return self
def get_domain_from_url(url): """Return normalized domain portion of the URL""" from gruntle.memebot.utils import text items = text.decode(urlparse.urlparse(url).netloc).lower().rsplit( u':', 1)[0].split(u'.')[-2:] return text.encode(u'.'.join(item for item in (item.strip() for item in items) if item))
def __str__(self): from gruntle.memebot.utils.text import encode, format pattern, flags = self.pattern return encode( format("No match (%s): %s %r != %r [%s]", self.url, self.field, self.val, pattern, ", ".join(flags)) )
def __enter__(self): """Enter context: Attempt to acquire lock""" lock_file = self.get_lock_file() start = time.time() last = 0 while True: with trapped: if self.reentrant and os.path.exists(lock_file): with open(lock_file, 'rb') as fp: if text.cast(fp.read()) == self.pid: break fd = os.open(lock_file, tempfile._text_openflags, self.lock_perms) try: os.write(fd, text.encode(self.pid)) finally: os.close(fd) break now = time.time() if ((self.timeout is not None) and ((now - start) >= self.timeout)): raise LockError( text.format('unable to acquire lock for %s', self.name)) sleep = self.interval - now + last if sleep > 0: time.sleep(sleep) last = now return self
def __str__(self): from gruntle.memebot.utils.text import encode, format return encode( format( "Invalid content parsing %s: %s", self.response.url, "Unknown error" if self.msg is None else self.msg ) )
def _open(self, url, data=None, referer=None): request = urllib2.Request(text.encode(url), data) if referer is not None: request.add_header('Referer', referer) try: response = self.opener.open(request, timeout=self.timeout) except urllib2.HTTPError, exc: response = exc
def get_db_prep_value(self, value): """Serialize data before storing to database""" if value is not None: if not isinstance(value, str): value = text.encode(value, settings.TEXT_ENCODING) compressed_data = self.engine.compress(value, self.level) encoded_data = base64.encodestring(compressed_data) parts = self.HEADER, self.engine.id, encoded_data joined = ''.join(parts) value = text.decode(joined, settings.TEXT_ENCODING) return value
def __str__(self): from gruntle.memebot.utils.text import encode, decode, format return encode( format( "%s%s matched blacklist rule %r for host %r", self.blacklist.host, ("" if (self.url is None) else (u" (%s)" % decode(self.url))), self.blacklist.rule, self.blacklist.match, ) )
def to_python(self, value): """Convert serialized data into python object""" if value is not None: if not isinstance(value, str): value = text.encode(value, settings.TEXT_ENCODING) offset = len(self.HEADER) if (len(value) > (offset + 6)) and value.startswith(self.HEADER): id = value[offset] engine = self.engine_ids.get(id) if engine is not None: encoded_data = buffer(value, offset + 1) compressed_data = base64.decodestring(encoded_data) value = engine.decompress(compressed_data, self.level) return value
def prettify_node(node): """Try to turn a soup node into something resembling readable html""" if isinstance(node, (str, unicode)): html = node else: html = node.prettify() html = text.decode(html) html = lang_tag_re.sub(u' ', html) html = comment_re.sub(u' ', html) html = html.strip() lines = html.splitlines() lines = (line.rstrip() for line in lines) lines = (line for line in lines if line) html = u'\n'.join(lines) + u'\n' return text.encode(html)
def get_domain_from_url(url): """Return normalized domain portion of the URL""" from gruntle.memebot.utils import text items = text.decode(urlparse.urlparse(url).netloc).lower().rsplit(u':', 1)[0].split(u'.')[-2:] return text.encode(u'.'.join(item for item in (item.strip() for item in items) if item))
def __str__(self): from gruntle.memebot.utils.text import encode, format return encode(format("%s responded with status: %d %s", self.link.url, self.response.code, self.response.msg))
def __str__(self): from gruntle.memebot.utils.text import encode, decode, format return encode(format('%s%s matched blacklist rule %r for host %r', self.blacklist.host, ('' if (self.url is None) else (u' (%s)' % decode(self.url))), self.blacklist.rule, self.blacklist.match))
def __str__(self): from gruntle.memebot.utils.text import encode, format return encode(format('%s responded with status: %d %s', self.link.url, self.response.code, self.response.msg))
def normalize(self, host): """Clean up hostname""" items = text.decode(host).lower().rsplit(u':', 1)[0].split(u'.') return text.encode(u'.'.join(item for item in (item.strip() for item in items) if item))
def __str__(self): return text.encode( ", ".join(text.format("%s=%r", key, getattr(self, key, None)) for key in self._fields if key != "content") )
def __str__(self): return text.encode(', '.join(text.format('%s=%r', key, getattr(self, key, None)) for key in self._fields if key != 'content'))
def __str__(self): from gruntle.memebot.utils.text import encode, format pattern, flags = self.pattern return encode(format('No match (%s): %s %r != %r [%s]', self.url, self.field, self.val, pattern, ', '.join(flags)))
def __str__(self): return text.encode(', '.join(text.format('%s=%r', key, getattr(self, key, None)) for key in self._fields if key not in ('data', 'raw')))
def __str__(self): from gruntle.memebot.utils.text import encode, format return encode(format('Invalid content parsing %s: %s', self.response.url, 'Unknown error' if self.msg is None else self.msg))