def fix_image_url(url, repo_name): '''Fixes a GitHub image urls. Any links with `github.com` are invalid, because the return *html* content. Image links would have `githubusercontent.com`. For example: - This returns an html: https://github.com/Retrothopter/Niobium-Nanotech/blob/master/Preview.png - This returns a png: https://githubusercontent.com/Retrothopter/Niobium-Nanotech/blob/master/Preview.png Any links that are relative are also invalid. For example: - preview.png - sprites/preview.png - /sprites/preview.png''' # FIXME: this assumes `master` is always the branch we want, while in reality we need the # `default_branch` of the repository, which could also for example be `main` from urllib.parse import urlparse from parsec import optional, string, regex, none_of, many, ParseError glob = ( optional(string('/')) >> string(repo_name) >> string("/blob/master/") >> many(none_of("?")).parsecmap(lambda x: "".join(x))) o = urlparse(url) if o.netloc == "raw.githubusercontent.com": return url try: path = glob.parse(o.path) except ParseError as e: path = None if o.netloc == "github.com" and path: return f"https://raw.githubusercontent.com/{repo_name}/master/{path}" if o.netloc == "": return f"https://raw.githubusercontent.com/{repo_name}/master/{o.path}" return url
def fix_image_url(url, repo_name): '''Fixes a GitHub url, where the url should point to an image. Any links with `github.com` are invalid, because they're html links, while image links would have `githubusercontent.com`, for example: - https://github.com/Retrothopter/Niobium-Nanotech/blob/master/Preview.png; Any links that don't have a domain are relative and as such invalid, for example: - preview.png; - sprites/preview.png; - /sprites/preview.png This is also why a repo name is required. ''' from urllib.parse import urlparse from parsec import optional, string, regex, none_of, many, ParseError glob = ( optional(string('/')) >> string(repo_name) >> string("/blob/master/") >> many(none_of("?")).parsecmap(lambda x: "".join(x))) o = urlparse(url) if o.netloc == "raw.githubusercontent.com": return url try: path = glob.parse(o.path) except ParseError as e: path = None if o.netloc == "github.com" and path: return f"https://raw.githubusercontent.com/{repo_name}/master/{path}" if o.netloc == "": return f"https://raw.githubusercontent.com/{repo_name}/master/{o.path}" # print('[warning] non github url:', url) return url
from parsec import string, none_of, sepBy, many quoted_char = none_of('"') | string('""').result('"') quoted = (string('"') >> many(quoted_char) << string('"')).parsecmap(lambda x: "".join(x)) cell = quoted | many(none_of(",\n")).parsecmap(lambda x: "".join(x)) cells = sepBy(cell, string(",")) table = sepBy(cells, string("\n"))
def quoted_string(): yield string('"') chars = yield many(none_of('"')) yield string('"') return ''.join(chars)
def rest_line(): "Parse remainder of line." chars = yield (many(none_of("\n\r")) << ignore ) # So that we still function as a lexeme. return "".join(chars)
def rest_line(): "Parse remainder of line." chars = yield (many(none_of("\n\r")) << ignore ) # So that we still function as a lexeme. return "".join(chars) skip_line = lexeme(rest_line).result('(Skipped.)') name_only = regex(r"[_a-zA-Z0-9/\.()#-]+") name = word(name_only) symbol = lexeme(regex(r"[a-zA-Z_][^\s()\[\]]*")) true = lexeme(string("True")).result(True) false = lexeme(string("False")).result(False) quoted_string = lexeme(regex(r'"[^"]*"')) fail = one_of("") skip_keyword = (skip_line >> many(none_of("[") >> skip_line)).result( '(Skipped.)') # Skip over everything until the next keyword begins. IBIS_num_suf = { 'T': 'e12', 'k': 'e3', 'n': 'e-9', 'G': 'e9', 'm': 'e-3', 'p': 'e-12', 'M': 'e6', 'u': 'e-6', 'f': 'e-15', }
def parse_string(): """Parse a string as any characters enclosed between two "s.""" yield parsec.string('"') chars = yield parsec.many(parsec.none_of('"')) yield parsec.string('"') return wtypes.String(''.join(chars))