def _parse_user_comment(val): """ Interprets a UserComment field value, supplied either as the raw value or as a string containing comma separated byte codes. """ # Use the same data type conversion as the parse for field type 7 if CHAR_LIST_REGEX.match(val): val = [parse_int(num) for num in val.split(',')] # First 8 bytes gives string encoding - ASCII, JIS, or Unicode encoding_char_1 = val[0:1] val = val[8:] # Decode the rest if (type(val) != list): # val was already a string return val else: # val is a list of byte codes encoding_char_1 = _safe_string(encoding_char_1) if encoding_char_1 == '' or encoding_char_1 == 'A': # ASCII return _safe_string(val) elif encoding_char_1 == 'J': # JIS return unicode(str(bytearray(val)), 'SJIS') else: # Unicode return unicode(str(bytearray(val)))
def listen(self, tail=''): parts = (tail + ' x x').split()[0:2] nap_count = util.parse_int(parts[0], 30) nap_time = util.parse_float(parts[1], 1) for i in range(0, nap_count): log.i([chr(b) for b in self.transport.peek()]) time.sleep(nap_time)
def handle_message(self, event, query): number = parse_int(query) if not number: response = requests.get(_XKCD_API_CURRENT_URL) response.raise_for_status() number = response.json()['num'] return _XKCD_URL_TEMPLATE % number
def handle_message(self, event, query): times = parse_int(query, default=1) dice_gen = (random.randrange(1, 7) for _ in xrange(times)) if times > 10: counter = Counter(dice_gen) return ', '.join('%s: %s' % (k, counter[k]) for k in xrange(1, 7)) else: return ', '.join(map(str, dice_gen))
def get_port(request): """ Returns the port number in use on a Flask/Werkzeug request object. """ sep_idx = request.host.find(':') if sep_idx == -1: return 443 if request.is_secure else 80 else: return parse_int(request.host[sep_idx + 1:])
def _get_prop_val(prop_handlers, prop_val): """ Returns the string value of prop_val, as determined by the handlers in prop_handlers (use _get_prop_handlers to obtain this object) """ try: (_, field_type, print_fn, field_options) = prop_handlers # Check for empty values if not prop_val: return '' # Use custom handler if one is set if print_fn: return print_fn(prop_val) if field_type: if field_type in (5, 10): # Ratios sep_idx = prop_val.find('/') if sep_idx > -1: prop_val = Ratio( parse_long(prop_val[0:sep_idx]), parse_long(prop_val[sep_idx + 1:]) ) elif field_options: # All those with pre-defined options are numeric, types 3, 4, or 7 vals = prop_val.split(',') prop_val = ', '.join([field_options.get(parse_long(v), v) for v in vals]) elif field_type == 7: # Custom encoding. Assume we have either a straight string or a # string of comma separated byte codes. i.e. "ABC" or "65, 66, 67" if CHAR_LIST_REGEX.match(prop_val): prop_val = _safe_string([parse_int(num) for num in prop_val.split(',')]) else: prop_val = _safe_string(prop_val) else: # Anything else we can just convert straight to a string pass # Return the string form of whatever we now have return str(prop_val).strip() except: # Fall back to returning the value as-is return str(prop_val).strip()
def diagnostics_submit(): required = set(('device', 'operating-system', 'udid', 'version', 'git-revision', 'build-date-epoch', 'time-since-launch', 'preprocessor-avg-runtime')) if not required.issubset(set(request.form.keys())): abort(404) report = DiagnosticReport() report.submit_date = datetime.datetime.utcnow() report.device = request.form['device'].rstrip() report.system = request.form['operating-system'].rstrip() report.udid = request.form['udid'].rstrip() report.version = request.form['version'].rstrip() report.gitrev = request.form['git-revision'].rstrip() report.build_date = parse_date_epoch(request.form['build-date-epoch']) report.time_since_launch = parse_float(request.form['time-since-launch']) report.preprocessor_avg_runtime = parse_int(request.form['preprocessor-avg-runtime']) report.put() return ''
def decompress(source, target): # some info logging.debug("Starting decompression of %s to %s", repr(source), repr(target)) with open(source, "rb") as fsource: # some info logging.debug("Parsing header") magic, method, majorversion, minorversion, pagesize, uncompressed_size = util.parse_header( fsource) logging.debug(" Magic number: %s", repr(magic)) logging.debug(" Method: %s", repr(method)) logging.debug(" Major version number: %d", majorversion) logging.debug(" Minor version number: %d", minorversion) logging.debug(" Page size: %d", pagesize) logging.debug(" Uncompressed size: %d", uncompressed_size) nointra, delta, inner = parse_method_name(method) fsource.flush() tmphandle, tmpfile = None, None if inner == "gzip": fsource = gzip.GzipFile(fileobj=fsource, mode="rb", compresslevel=9) elif inner == "bzip2": fsource = bz2file.BZ2File(filename=fsource, mode="rb", compresslevel=9) elif inner == "7zip": util.create_dir(".tmp") tmphandle, tmpfile = tempfile.mkstemp(dir=".tmp") with open(tmpfile, "wb") as ftmp: p = subprocess.Popen(["7za", "x", "-txz", "-si", "-so"], stdin=fsource, stdout=ftmp, stderr=subprocess.PIPE) p.communicate() fsource = open(tmpfile, "rb") try: reference = util.parse_string(fsource) logging.debug("Reference dump: %s", reference) # parse deduplicated pages fills = {} reference_list = list(util.parse_pagenr_list(fsource)) for i in xrange(len(reference_list)): for left, right in util.parse_interval_list(fsource): for pagenr in xrange(left, right + 1): fills[pagenr] = reference_list[i] # parse diffs if delta: diffs = {} pagenrs = list(util.parse_pagenr_list(fsource)) for i in xrange(len(pagenrs)): diffs[pagenrs[i]] = util.parse_diff(fsource, pagesize) # parse new pages newpages = {} newdistinct = set() if nointra: for left, right in list(util.parse_interval_list(fsource)): for j in xrange(left, right + 1): page = fsource.read(pagesize) newdistinct.add(page) newpages[j] = page else: newcnt = util.parse_int(fsource, 4) intervals = [] for _ in xrange(newcnt): intervals.append(list(util.parse_interval_list(fsource))) for i in xrange(newcnt): page = fsource.read(pagesize) for left, right in intervals[i]: for j in xrange(left, right + 1): newdistinct.add(page) newpages[j] = page finally: if tmphandle is not None: os.close(tmphandle) os.remove(tmpfile) if inner is not None: fsource.close() # reconstruct file pagenr = 0 final = uncompressed_size / pagesize same_distinct, same_total = set(), 0 different_distinct, different_total = set(), 0 seen = set() diff_seen = set() with open(reference, "rb") as freference: with open(target, "wb") as ftarget: while pagenr < final: if pagenr in fills: freference.seek(pagesize * fills[pagenr]) page = freference.read(pagesize) seen.add(page) different_distinct.add(page) different_total += 1 ftarget.write(page) elif delta and pagenr in diffs: freference.seek(pagenr * pagesize) page = freference.read(pagesize) newpage = util.apply_diff(page, diffs[pagenr]) diff_seen.add(newpage) ftarget.write(newpage) elif pagenr in newpages: seen.add(newpages[pagenr]) ftarget.write(newpages[pagenr]) else: freference.seek(pagesize * pagenr) page = freference.read(pagesize) seen.add(page) same_distinct.add(page) same_total += 1 ftarget.write(page) pagenr += 1 # some info logging.debug("New pages: %d/%d (%d/%d)", len(newpages), final, len(newdistinct), len(seen)) logging.debug("Deduplicated pages at the same offset: %d/%d (%d/%d)", same_total, final, len(same_distinct), len(seen)) logging.debug("Deduplicated pages at different offsets: %d/%d (%d/%d)", len(fills), final, len(different_distinct), len(seen)) logging.debug("Deduplicated pages in total: %d/%d (%d/%d)", same_total + len(fills), final, len(same_distinct | different_distinct), len(seen)) if delta: logging.debug("Diffed pages: %d/%d (%d/%d)", len(diffs), final, len(diff_seen), len(seen)) logging.debug("Done") return 0
def image(): logger.debug(request.method + ' ' + request.url) try: logged_in = session_logged_in() allow_uncache = app.config['BENCHMARKING'] or app.config['DEBUG'] args = request.args # Get URL parameters for the image src = args.get('src', '') page = args.get('page', None) iformat = args.get('format', None) template = args.get('tmp', None) width = args.get('width', None) height = args.get('height', None) halign = args.get('halign', None) valign = args.get('valign', None) autosizefit = args.get('autosizefit', None) rotation = args.get('angle', None) flip = args.get('flip', None) top = args.get('top', None) left = args.get('left', None) bottom = args.get('bottom', None) right = args.get('right', None) autocropfit = args.get('autocropfit', None) fill = args.get('fill', None) quality = args.get('quality', None) sharpen = args.get('sharpen', None) ov_src = args.get('overlay', None) ov_size = args.get('ovsize', None) ov_opacity = args.get('ovopacity', None) ov_pos = args.get('ovpos', None) icc_profile = args.get('icc', None) icc_intent = args.get('intent', None) icc_bpc = args.get('bpc', None) colorspace = args.get('colorspace', None) strip = args.get('strip', None) dpi = args.get('dpi', None) tile = args.get('tile', None) # Get URL parameters for handling options attach = args.get('attach', None) xref = args.get('xref', None) stats = args.get('stats', None) # Get protected admin/internal parameters cache = args.get('cache', '1') if logged_in or allow_uncache else '1' recache = args.get('recache', None) if allow_uncache else None # eRez compatibility mode src = erez_params_compat(src) # Tweak strings as necessary and convert non-string parameters # to the correct data types try: # Image options if page is not None: page = parse_int(page) if iformat is not None: iformat = iformat.lower() if template is not None: template = template.lower() if width is not None: width = parse_int(width) if height is not None: height = parse_int(height) if halign is not None: halign = halign.lower() if valign is not None: valign = valign.lower() if autosizefit is not None: autosizefit = parse_boolean(autosizefit) if rotation is not None: rotation = parse_float(rotation) if flip is not None: flip = flip.lower() if top is not None: top = parse_float(top) if left is not None: left = parse_float(left) if bottom is not None: bottom = parse_float(bottom) if right is not None: right = parse_float(right) if autocropfit is not None: autocropfit = parse_boolean(autocropfit) if fill is not None: fill = parse_colour(fill) if quality is not None: quality = parse_int(quality) if sharpen is not None: sharpen = parse_int(sharpen) if ov_size is not None: ov_size = parse_float(ov_size) if ov_pos is not None: ov_pos = ov_pos.lower() if ov_opacity is not None: ov_opacity = parse_float(ov_opacity) if icc_profile is not None: icc_profile = icc_profile.lower() if icc_intent is not None: icc_intent = icc_intent.lower() if icc_bpc is not None: icc_bpc = parse_boolean(icc_bpc) if colorspace is not None: colorspace = colorspace.lower() if strip is not None: strip = parse_boolean(strip) if dpi is not None: dpi = parse_int(dpi) if tile is not None: tile = parse_tile_spec(tile) # Handling options if attach is not None: attach = parse_boolean(attach) if xref is not None: validate_string(xref, 0, 1024) if stats is not None: stats = parse_boolean(stats) # Admin/internal options if cache is not None: cache = parse_boolean(cache) if recache is not None: recache = parse_boolean(recache) except (ValueError, TypeError) as e: raise httpexc.BadRequest(unicode(e)) # Package and validate the parameters try: # #2694 Enforce public image limits - perform easy parameter checks if not logged_in: width, height, autosizefit = _public_image_limits_pre_image_checks( width, height, autosizefit, tile, template ) # Store and normalise all the parameters image_attrs = ImageAttrs(src, -1, page, iformat, template, width, height, halign, valign, rotation, flip, top, left, bottom, right, autocropfit, autosizefit, fill, quality, sharpen, ov_src, ov_size, ov_pos, ov_opacity, icc_profile, icc_intent, icc_bpc, colorspace, strip, dpi, tile) image_engine.finalise_image_attrs(image_attrs) except ValueError as e: raise httpexc.BadRequest(unicode(e)) # Get/create the database ID (from cache, validating path on create) image_id = data_engine.get_or_create_image_id( image_attrs.filename(), return_deleted=False, on_create=on_image_db_create_anon_history ) if (image_id == 0): raise DoesNotExistError() # Deleted elif (image_id < 0): raise DBError('Failed to add image to database') image_attrs.set_database_id(image_id) # Require view permission or file admin permissions_engine.ensure_folder_permitted( image_attrs.folder_path(), FolderPermission.ACCESS_VIEW, get_session_user() ) # Ditto for overlays if ov_src: permissions_engine.ensure_folder_permitted( filepath_parent(ov_src), FolderPermission.ACCESS_VIEW, get_session_user() ) # v1.17 If this is a conditional request with an ETag, see if we can just return a 304 if 'If-None-Match' in request.headers and not recache: etag_valid, modified_time = _etag_is_valid( image_attrs, request.headers['If-None-Match'], False ) if etag_valid: # Success HTTP 304 return make_304_response(image_attrs, False, modified_time) # Get the requested image data image_wrapper = image_engine.get_image( image_attrs, 'refresh' if recache else cache ) if (image_wrapper is None): raise DoesNotExistError() # #2694 Enforce public image limits - check the dimensions # of images that passed the initial parameter checks if not logged_in: try: _public_image_limits_post_image_checks( image_attrs.width(), image_attrs.height(), image_attrs.template(), image_wrapper.data(), image_wrapper.attrs().format() ) except ValueError as e: raise httpexc.BadRequest(unicode(e)) # As for the pre-check # Success HTTP 200 return make_image_response(image_wrapper, False, stats, attach, xref) except httpexc.HTTPException: # Pass through HTTP 4xx and 5xx raise except ServerTooBusyError: logger.warn(u'503 Too busy for ' + request.url) raise httpexc.ServiceUnavailable() except ImageError as e: logger.warn(u'415 Invalid image file \'' + src + '\' : ' + unicode(e)) raise httpexc.UnsupportedMediaType(unicode(e)) except SecurityError as e: if app.config['DEBUG']: raise log_security_error(e, request) raise httpexc.Forbidden() except DoesNotExistError as e: # First time around the ID will be set. Next time around it # won't but we should check whether the disk file now exists. if image_attrs.database_id() > 0 or path_exists(image_attrs.filename(), require_file=True): image_engine.reset_image(image_attrs) logger.warn(u'404 Not found: ' + unicode(e)) raise httpexc.NotFound(unicode(e)) except Exception as e: if app.config['DEBUG']: raise logger.error(u'500 Error for ' + request.url + '\n' + unicode(e)) raise httpexc.InternalServerError(unicode(e))
async def fetch_playlist(playlistId: str, chunk_size: int = 1024): """ Fetch all metadata of a YouTube playlist. Performs multiple asynchronous HTTP requests to obtain all metadata and playlist items. Does not provide rich YouTube metadata: fields that are included in browse_request queries for modern JS YT-Clients. Result: { 'id': string, 'title': string, 'description': string, 'thumbnail': string, 'length': integer, 'views': integer, 'uploader': { 'name': string, 'url': string }, 'items': [ { 'id': string, 'title': string, 'uploader': { 'name': string, 'url': string }, 'lengthSeconds': integer, } ] } """ # # Initialize the result # playlist = {'id': playlistId, 'items': []} # # Build the headers for the ClientSession. # headers = get_default_headers() # Only accept HTML. headers['Accept'] = 'text/html' # # Retrieve landing page. # # Open a auto-raising ClientSession with default cookie handling. async with ClientSession(headers=headers, raise_for_status=True) as session: # Step 1: Get the initial landing page. async with session.get('https://www.youtube.com/playlist', params={'list': playlistId}) as response: # Assert that this really worked the way we wanted. assert response.status == 200 assert response.content_type == 'text/html' encoding = response.get_encoding() assert is_valid_encoding(encoding) # Retrieve the '//div[@id=""]' node. is_content = lambda x: (x.tag, x.get('id')) == ('div', '') parser = etree.HTMLPullParser(events=('start', 'end')) content, discard = None, True while content is None and not response.content.at_eof(): # Feed the parser the next chunk of data. parser.feed( (await response.content.read(chunk_size)).decode(encoding)) for event, node in parser.read_events(): if event == 'start': if is_content(node): # Content node reached, stop discarding. discard = False continue if is_content(node): # Content node finished, exit. content = node break if discard: # Discard everything before this point. node.clear() for ancestor in node.xpath('ancestor-or-self::*'): while ancestor.getprevious() is not None: del ancestor.getparent()[0] # # Parse the playlist header. # pl_header = content[0] assert pl_header.get('id') == 'pl-header' # Get the thumbnail. pl_thumb = pl_header[0][0] assert pl_thumb.tag == 'img' playlist['thumbnail'] = pl_thumb.get('src') # Get the title. pl_title = pl_header[1][0] assert pl_title.tag == 'h1' playlist['title'] = pl_title.text.strip() # Get the uploader. pl_uploader = pl_header[1][1][0][0] assert pl_uploader.tag == 'a' playlist['uploader'] = { 'name': pl_uploader.text, 'url': 'https://www.youtube.com' + pl_uploader.get('href') } # Get the length. pl_length = pl_header[1][1][1] assert pl_length.tag == 'li' playlist['length'] = parse_int(pl_length.text, aggressive=True) # Get the view count. pl_views = pl_header[1][1][2] assert pl_views.tag == 'li' playlist['views'] = parse_int(pl_views.text, aggressive=True) # Get the description. pl_description = pl_header[1][2][0] assert pl_description.tag == 'span' playlist['description'] = pl_description.text.strip() # # Parse the playlist items. # def parse_item(node): item = {'id': node.get('data-video-id')} # Get the video thumbnail. vid_thumb = node[2][0][0][0][0][0][0] assert vid_thumb.tag == 'img' item['thumbnail'] = vid_thumb.get('data-thumb') # Get video title. vid_title = node[3][0] assert vid_title.tag == 'a' item['title'] = vid_title.text.strip() # Get video uploader. vid_uploader = node[3][1][0] assert vid_uploader.tag == 'a' item['uploader'] = { 'name': vid_uploader.text, 'url': 'https://www.youtube.com' + vid_uploader.get('href') } # Get video length. vid_length = node[6][0][0][0] assert vid_length.tag == 'span' item['lengthSeconds'] = parse_ts(vid_length.text) return item pl_items = content[1][0][0][0][0] assert pl_items.get('id') == 'pl-load-more-destination' playlist['items'] += map(parse_item, pl_items) # # Fetch and parse all continuations. # load_more = index_s(content, 1, 0, 0, 1) assert load_more.tag == 'button' load_more = load_more.get( 'data-uix-load-more-href') if load_more is not None else None while load_more is not None: # Request the continuation contents. async with session.get('https://www.youtube.com' + load_more, headers={'Accept': 'application/json'}) as response: # Assert that this really worked the way we wanted. assert response.status == 200 assert response.content_type == 'application/json' encoding = response.get_encoding() assert is_valid_encoding(encoding) # Parse the result data (large!) # TODO: This ought to be streamed as well. data = await response.json() assert 'content_html' in data assert 'load_more_widget_html' in data # Parse all new items. parser = etree.HTMLPullParser(events=('end', )) parser.feed(data['content_html']) for _, node in parser.read_events(): if node.tag == 'tr': playlist['items'] += [parse_item(node)] # Discard everything before this point. node.clear() for ancestor in node.xpath('ancestor-or-self::*'): while ancestor.getprevious() is not None: del ancestor.getparent()[0] # Extract the next continuation link match = re.search(r'data-uix-load-more-href=\"(.+?)\"', data['load_more_widget_html']) if match: # Next continuation link. load_more = match.group(1) else: # No more continuations. load_more = None return playlist
def decompress(source, target): # some info logging.debug("Starting decompression of %s to %s", repr(source), repr(target)) with open(source, "rb") as fsource: # some info logging.debug("Parsing header") magic, method, majorversion, minorversion, pagesize, uncompressed_size = util.parse_header( fsource) logging.debug(" Magic number: %s", repr(magic)) logging.debug(" Method: %s", repr(method)) logging.debug(" Major version number: %d", majorversion) logging.debug(" Minor version number: %d", minorversion) logging.debug(" Page size: %d", pagesize) logging.debug(" Uncompressed size: %d", uncompressed_size) inner = method.split("intradedup")[1] if not method.startswith("intradedup") or inner not in ("", "gzip", "bzip2", "7zip"): logging.error("Invalid method %s", repr(method)) return -1 fsource.flush() tmphandle, tmpfile = None, None if inner == "gzip": fsource = gzip.GzipFile(fileobj=fsource, mode="rb", compresslevel=9) elif inner == "bzip2": fsource = bz2file.BZ2File(filename=fsource, mode="rb", compresslevel=9) elif inner == "7zip": util.create_dir(".tmp") tmphandle, tmpfile = tempfile.mkstemp(dir=".tmp") with open(tmpfile, "wb") as ftmp: p = subprocess.Popen(["7za", "x", "-txz", "-si", "-so"], stdin=fsource, stdout=ftmp, stderr=subprocess.PIPE) p.communicate() fsource = open(tmpfile, "rb") try: # parse dictionary distinct = util.parse_int(fsource, 4) fills = {} pagelist = [] for _ in xrange(distinct): page = fsource.read(pagesize) pagelist.append(page) for i in xrange(distinct): for left, right in util.parse_interval_list(fsource): for pagenr in xrange(left, right + 1): fills[pagenr] = pagelist[i] # reconstruct file pagenr = 0 seen = set() with open(target, "wb") as ftarget: while True: if pagenr in fills: ftarget.write(fills[pagenr]) seen.add(fills[pagenr]) else: page = fsource.read(pagesize) seen.add(page) if not page: pagenr += 1 break ftarget.write(page) pagenr += 1 while pagenr in fills: ftarget.write(fills[pagenr]) seen.add(fills[pagenr]) pagenr += 1 logging.debug("Deduplicated pages: %d/%d (%d/%d)", len(fills), uncompressed_size / pagesize, distinct, len(seen)) finally: if tmphandle is not None: os.close(tmphandle) os.remove(tmpfile) if inner is not None: fsource.close() logging.debug("Done") return 0