def __init__(self, index=0, start=None, end=None, text='', position=''): try: self.index = int(index) except (TypeError, ValueError): # try to cast as int, but it's not mandatory self.index = index self.start = SubRipTime.coerce(start or 0) self.end = SubRipTime.coerce(end or 0) self.position = str(position) self.text = str(text)
def _mkarg(self, kargs): """change the argument list (encode value, add api key/secret) :return: the new argument list""" kargs = kargs.copy() kargs['api_key'] = self._api.key kargs['api_secret'] = self._api.secret for k, v in list(kargs.items()): if isinstance(v, Iterable) and not isinstance(v, basestring): kargs[k] = ','.join(v) elif isinstance(v, File) or v is None: del kargs[k] elif isinstance(v, numeric_types): kargs[k] = str(v) else: kargs[k] = v return kargs
def __call__(self, *args, **kargs): if len(args): raise TypeError('Only keyword arguments are allowed') form = _MultiPartForm() for (k, v) in kargs.items(): if isinstance(v, File): form.add_file(k, v.get_filename(), v.content) url = self._urlbase for k, v in self._mkarg(kargs).items(): form.add_field(k, v) body = form.bytes request = Request(url, data=body) request.add_header('Content-type', form.get_content_type()) request.add_header('Content-length', str(len(body))) self._api.update_request(request) retry = self._api.max_retries while True: retry -= 1 try: ret = urlopen(request, timeout=self._api.timeout).read() break except HTTPError as e: raise APIError(e.code, url, e.read()) except (socket.error, URLError) as e: if retry < 0: raise e _print_debug('caught error: {}; retrying'.format(e)) time.sleep(self._api.retry_delay) if self._api.decode_result: try: ret = json.loads(ret, object_hook=ObjectDict) except: raise APIError(-1, url, 'json decode error, value={0!r}'.format(ret)) return ret
def write_into(self, output_file, eol=None): """ write_into(output_file [, eol]) Serialize current state into `output_file`. `output_file` -> Any instance that respond to `write()`, typically a file object """ output_eol = eol or self.eol for item in self: string_repr = str(item) if output_eol != '\n': string_repr = string_repr.replace('\n', output_eol) output_file.write(string_repr) # Only add trailing eol if it's not already present. # It was kept in the SubRipItem's text before but it really # belongs here. Existing applications might give us subtitles # which already contain a trailing eol though. if not string_repr.endswith(2 * output_eol): output_file.write(output_eol)
def build_digest_header(self, method, url): realm = self.chal['realm'] nonce = self.chal['nonce'] qop = self.chal.get('qop') algorithm = self.chal.get('algorithm') opaque = self.chal.get('opaque') if algorithm is None: _algorithm = 'MD5' else: _algorithm = algorithm.upper() # lambdas assume digest modules are imported at the top level if _algorithm == 'MD5' or _algorithm == 'MD5-SESS': def md5_utf8(x): if isinstance(x, str): x = x.encode('utf-8') return hashlib.md5(x).hexdigest() hash_utf8 = md5_utf8 elif _algorithm == 'SHA': def sha_utf8(x): if isinstance(x, str): x = x.encode('utf-8') return hashlib.sha1(x).hexdigest() hash_utf8 = sha_utf8 KD = lambda s, d: hash_utf8("%s:%s" % (s, d)) if hash_utf8 is None: return None # XXX not implemented yet entdig = None p_parsed = urlparse(url) path = p_parsed.path if p_parsed.query: path += '?' + p_parsed.query A1 = '%s:%s:%s' % (self.username, realm, self.password) A2 = '%s:%s' % (method, path) HA1 = hash_utf8(A1) HA2 = hash_utf8(A2) if nonce == self.last_nonce: self.nonce_count += 1 else: self.nonce_count = 1 ncvalue = '%08x' % self.nonce_count s = str(self.nonce_count).encode('utf-8') s += nonce.encode('utf-8') s += time.ctime().encode('utf-8') s += os.urandom(8) cnonce = (hashlib.sha1(s).hexdigest()[:16]) if _algorithm == 'MD5-SESS': HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) if qop is None: respdig = KD(HA1, "%s:%s" % (nonce, HA2)) elif qop == 'auth' or 'auth' in qop.split(','): noncebit = "%s:%s:%s:%s:%s" % ( nonce, ncvalue, cnonce, 'auth', HA2 ) respdig = KD(HA1, noncebit) else: # XXX handle auth-int. return None self.last_nonce = nonce # XXX should the partial digests be encoded too? base = 'username="******", realm="%s", nonce="%s", uri="%s", ' \ 'response="%s"' % (self.username, realm, nonce, path, respdig) if opaque: base += ', opaque="%s"' % opaque if algorithm: base += ', algorithm="%s"' % algorithm if entdig: base += ', digest="%s"' % entdig if qop: base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce) return 'Digest %s' % (base)
try: from collections import UserList except ImportError: from UserList import UserList from itertools import chain from copy import copy from srtexc import Error from srtitem import SubRipItem from compat import str BOMS = ((codecs.BOM_UTF32_LE, 'utf_32_le'), (codecs.BOM_UTF32_BE, 'utf_32_be'), (codecs.BOM_UTF16_LE, 'utf_16_le'), (codecs.BOM_UTF16_BE, 'utf_16_be'), (codecs.BOM_UTF8, 'utf_8')) CODECS_BOMS = dict((codec, str(bom, codec)) for bom, codec in BOMS) BIGGER_BOM = max(len(bom) for bom, encoding in BOMS) class SubRipFile(UserList, object): """ SubRip file descriptor. Provide a pure Python mapping on all metadata. SubRipFile(items, eol, path, encoding) items -> list of SubRipItem. Default to []. eol -> str: end of line character. Default to linesep used in opened file if any else to os.linesep. path -> str: path where file will be saved. To open an existant file see
def _print_debug(msg): if DEBUG_LEVEL: sys.stderr.write(str(msg) + '\n')
def cfcookie(netloc, ua, timeout): try: headers = {'User-Agent': ua} req = urllib2.Request(netloc, headers=headers) try: urllib2.urlopen(req, timeout=int(timeout)) except urllib2.HTTPError as response: result = response.read(5242880) jschl = re.findall('name="jschl_vc" value="(.+?)"/>', result)[0] init = re.findall('setTimeout\(function\(\){\s*.*?.*:(.*?)};', result)[-1] builder = re.findall(r"challenge-form\'\);\s*(.*)a.v", result)[0] decryptVal = parseJSString(init) lines = builder.split(';') for line in lines: if len(line) > 0 and '=' in line: sections = line.split('=') line_val = parseJSString(sections[1]) decryptVal = int( eval( str(decryptVal) + str(sections[0][-1]) + str(line_val))) answer = decryptVal + len(urlparse(netloc).netloc) query = '%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&jschl_answer=%s' % ( netloc, jschl, answer) if 'type="hidden" name="pass"' in result: passval = re.findall('name="pass" value="(.*?)"', result)[0] query = '%s/cdn-cgi/l/chk_jschl?pass=%s&jschl_vc=%s&jschl_answer=%s' % ( netloc, quote_plus(passval), jschl, answer) time.sleep(5) cookies = cookielib.LWPCookieJar() handlers = [ urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies) ] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) try: req = urllib2.Request(query, headers=headers) urllib2.urlopen(req, timeout=int(timeout)) except BaseException: pass cookie = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies]) return cookie except BaseException: pass
def request(url, close=True, redirect=True, error=False, proxy=None, post=None, headers=None, mobile=False, limit=None, referer=None, cookie=None, output='', timeout='30', username=None, password=None): if isinstance(post, dict): if is_py2: post = urlencode(post) elif is_py3: post = bytes(urlencode(post), encoding='utf-8') elif isinstance(post, basestring) and is_py3: post = bytes(post, encoding='utf-8') try: handlers = [] if username is not None and password is not None and not proxy: passmgr = urllib2.HTTPPasswordMgrWithDefaultRealm() passmgr.add_password(None, uri=url, user=username, passwd=password) handlers += [urllib2.HTTPBasicAuthHandler(passmgr)] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) if proxy is not None: if username is not None and password is not None: passmgr = urllib2.ProxyBasicAuthHandler() passmgr.add_password(None, uri=url, user=username, passwd=password) handlers += [ urllib2.ProxyHandler({'http': '{0}'.format(proxy)}), urllib2.HTTPHandler, urllib2.ProxyBasicAuthHandler(passmgr) ] else: handlers += [ urllib2.ProxyHandler({'http': '{0}'.format(proxy)}), urllib2.HTTPHandler ] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) if output == 'cookie' or output == 'extended' or close is not True: cookies = cookielib.LWPCookieJar() handlers += [ urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies) ] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) try: if (2, 7, 9) < sys.version_info: raise BaseException import ssl try: import _ssl CERT_NONE = _ssl.CERT_NONE except ImportError: CERT_NONE = ssl.CERT_NONE ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = CERT_NONE handlers += [urllib2.HTTPSHandler(context=ssl_context)] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) except BaseException: pass try: headers.update(headers) except BaseException: headers = {} if 'User-Agent' in headers: pass elif not mobile is True: #headers['User-Agent'] = agent() headers['User-Agent'] = randomagent() else: headers['User-Agent'] = 'Apple-iPhone/701.341' if 'Referer' in headers: pass elif referer is None: headers['Referer'] = '%s://%s/' % (urlparse(url).scheme, urlparse(url).netloc) else: headers['Referer'] = referer if not 'Accept-Language' in headers: headers['Accept-Language'] = 'en-US' if 'Cookie' in headers: pass elif cookie is not None: headers['Cookie'] = cookie if redirect is False: class NoRedirectHandler(urllib2.HTTPRedirectHandler): def http_error_302(self, reqst, fp, code, msg, head): infourl = addinfourl(fp, head, reqst.get_full_url()) infourl.status = code infourl.code = code return infourl http_error_300 = http_error_302 http_error_301 = http_error_302 http_error_303 = http_error_302 http_error_307 = http_error_302 opener = urllib2.build_opener(NoRedirectHandler()) urllib2.install_opener(opener) try: del headers['Referer'] except Exception: pass req = urllib2.Request(url, data=post, headers=headers) try: response = urllib2.urlopen(req, timeout=int(timeout)) except urllib2.HTTPError as response: if response.code == 503: if 'cf-browser-verification' in response.read(5242880): netloc = '%s://%s' % (urlparse(url).scheme, urlparse(url).netloc) cf = cfcookie(netloc, headers['User-Agent'], timeout) headers['Cookie'] = cf req = urllib2.Request(url, data=post, headers=headers) response = urllib2.urlopen(req, timeout=int(timeout)) elif error is False: return elif error is False: return if output == 'cookie': try: result = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except BaseException: pass try: result = cf except BaseException: pass elif output == 'response': if limit == '0': result = (str(response.code), response.read(224 * 1024)) elif limit is not None: result = (str(response.code), response.read(int(limit) * 1024)) else: result = (str(response.code), response.read(5242880)) elif output == 'chunk': try: content = int(response.headers['Content-Length']) except BaseException: content = (2049 * 1024) if content < (2048 * 1024): return result = response.read(16 * 1024) elif output == 'extended': try: cookie = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except BaseException: pass try: cookie = cf except BaseException: pass content = response.headers result = response.read(5242880) return result, headers, content, cookie elif output == 'geturl': result = response.geturl() elif output == 'headers': content = response.headers return content else: if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) if close is True: response.close() return result except BaseException: return
def __str__(self): if self.ordinal < 0: # Represent negative times as zero return str(SubRipTime.from_ordinal(0)) return self.TIME_PATTERN % tuple(self)
class SubRipItem(ComparableMixin): """ SubRipItem(index, start, end, text, position) index -> int: index of item in file. 0 by default. start, end -> SubRipTime or coercible. text -> unicode: text content for item. position -> unicode: raw srt/vtt "display coordinates" string """ ITEM_PATTERN = str('%s\n%s --> %s%s\n%s\n') TIMESTAMP_SEPARATOR = '-->' def __init__(self, index=0, start=None, end=None, text='', position=''): try: self.index = int(index) except (TypeError, ValueError): # try to cast as int, but it's not mandatory self.index = index self.start = SubRipTime.coerce(start or 0) self.end = SubRipTime.coerce(end or 0) self.position = str(position) self.text = str(text) @property def duration(self): return self.end - self.start @property def text_without_tags(self): RE_TAG = re.compile(r'<[^>]*?>') return RE_TAG.sub('', self.text) @property def characters_per_second(self): characters_count = len(self.text_without_tags.replace('\n', '')) try: return characters_count / (self.duration.ordinal / 1000.0) except ZeroDivisionError: return 0.0 def __str__(self): position = ' %s' % self.position if self.position.strip() else '' return self.ITEM_PATTERN % (self.index, self.start, self.end, position, self.text) if is_py2: __unicode__ = __str__ def __str__(self): raise NotImplementedError('Use unicode() instead!') def _cmpkey(self): return (self.start, self.end) def shift(self, *args, **kwargs): """ shift(hours, minutes, seconds, milliseconds, ratio) Add given values to start and end attributes. All arguments are optional and have a default value of 0. """ self.start.shift(*args, **kwargs) self.end.shift(*args, **kwargs) @classmethod def from_string(cls, source): return cls.from_lines(source.splitlines(True)) @classmethod def from_lines(cls, lines): if len(lines) < 2: raise InvalidItem() lines = [l.rstrip() for l in lines] index = None if cls.TIMESTAMP_SEPARATOR not in lines[0]: index = lines.pop(0) start, end, position = cls.split_timestamps(lines[0]) body = '\n'.join(lines[1:]) return cls(index, start, end, body, position) @classmethod def split_timestamps(cls, line): timestamps = line.split(cls.TIMESTAMP_SEPARATOR) if len(timestamps) != 2: raise InvalidItem() start, end_and_position = timestamps end_and_position = end_and_position.lstrip().split(' ', 1) end = end_and_position[0] position = end_and_position[1] if len(end_and_position) > 1 else '' return (s.strip() for s in (start, end, position))