Exemplo n.º 1
0
def swefn_model(
        out: ModelOutput = ModelOutput("lexical_classes/swefn.pickle")):
    """Download and build SweFN model."""
    # Download swefn.xml and build swefn.pickle
    raw_file = Model("lexical_classes/swefn.xml")
    raw_file.download(
        "https://svn.spraakdata.gu.se/sb-arkiv/pub/lmf/swefn/swefn.xml")
    lexicon = read_swefn(raw_file.path)
    out.write_pickle(lexicon)

    # Clean up
    raw_file.remove()
Exemplo n.º 2
0
def build_model(out: ModelOutput = ModelOutput("sensaldo/sensaldo.pickle")):
    """Download and build SenSALDO model."""
    # Download and extract sensaldo-base-v02.txt
    zip_model = Model("sensaldo/sensaldo-v02.zip")
    zip_model.download(
        "https://svn.spraakdata.gu.se/sb-arkiv/pub/lexikon/sensaldo/sensaldo-v02.zip"
    )
    zip_model.unzip()
    tsv_model = Model("sensaldo/sensaldo-base-v02.txt")

    # Read sensaldo tsv dictionary and save as a pickle file
    lexicon = read_sensaldo(tsv_model)
    out.write_pickle(lexicon)

    # Clean up
    zip_model.remove()
    tsv_model.remove()
    Model("sensaldo/sensaldo-fullform-v02.txt").remove()
Exemplo n.º 3
0
def blingbring_model(
        out: ModelOutput = ModelOutput("lexical_classes/blingbring.pickle")):
    """Download and build Blingbring model."""
    # Download roget hierarchy
    classmap = Model("lexical_classes/roget_hierarchy.xml")
    classmap.download(
        "https://github.com/spraakbanken/sparv-models/raw/master/lexical_classes/roget_hierarchy.xml"
    )

    # Download blingbring.txt and build blingbring.pickle
    raw_file = Model("lexical_classes/blingbring.txt")
    raw_file.download(
        "https://svn.spraakdata.gu.se/sb-arkiv/pub/lexikon/bring/blingbring.txt"
    )
    lexicon = read_blingbring(raw_file.path, classmap.path)
    out.write_pickle(lexicon)

    # Clean up
    raw_file.remove()
    classmap.remove()
Exemplo n.º 4
0
def build_diapivot(out: ModelOutput = ModelOutput("hist/diapivot.pickle")):
    """Download diapivot XML dictionary and save as a pickle file."""
    # Download diapivot.xml
    xml_model = Model("hist/diapivot.xml")
    xml_model.download(
        "https://svn.spraakdata.gu.se/sb-arkiv/pub/lmf/diapivot/diapivot.xml")

    # Create pickle file
    xml_lexicon = read_xml(xml_model.path)
    log.info("Saving cross lexicon in Pickle format")
    picklex = {}
    for lem in xml_lexicon:
        lemgrams = []
        for saldo, match in list(xml_lexicon[lem].items()):
            lemgrams.append(PART_DELIM1.join([saldo, match]))
        picklex[lem] = sorted(lemgrams)

    out.write_pickle(picklex)

    # Clean up
    xml_model.remove()