示例#1
0
        for ch in "1" + string.lowercase:  # '1' for #
            url = "http://www.noslang.com/{}/{}".format(resource, ch)
            print("Processing " + url)
            r = requests.get(url)
            if not r.ok:
                print("Skipping {} (status code {})".format(ch, r.status_code),
                      file=sys.stderr)

            page = html.fromstring(r.text)
            for abbr in page.cssselect("abbr"):
                a = abbr.getprevious()
                definition = abbr.attrib["title"].lower()
                if definition in fucking_shit:
                    definition = fucking_shit[definition]
                else:
                    for stars, replacement in fucking_shit.iteritems():
                        definition = definition.replace(stars, replacement)

                yield a.attrib["name"].decode("utf-8"), definition


if __name__ == "__main__":
    try:
        [path] = sys.argv[1:]
    except ValueError:
        print("Usage: [prog] path/to/trie", file=sys.stderr)
        sys.exit(1)

    abbr = BytesTrie(iter_noslang())
    abbr.save(path)
示例#2
0
import json
import sys
from marisa_trie import BytesTrie

if __name__ == "__main__":
    lang = sys.argv[1]
    print("load mention_stat")
    with open("./mention_stat_{}.json".format(lang)) as f:
        data = json.load(f)

    print("mention_stat to trie")
    trie = BytesTrie([(k, bytes(json.dumps(v), "utf-8"))
                      for k, v in data.items()])

    print("saving...")
    trie.save("mention_stat_{}.marisa".format(lang))

    print("Done!")