def build_model(out: ModelOutput = ModelOutput("malt/swemalt-1.7.2.mco"), _maltjar: Binary = Binary("[malt.jar]")): """Download model for MALT Parser. Won't download model unless maltjar has been installed. """ out.download("http://maltparser.org/mco/swedish_parser/swemalt-1.7.2.mco")
def build_model(sense_model: ModelOutput = ModelOutput( "wsd/ALL_512_128_w10_A2_140403_ctx1.bin"), context_model: ModelOutput = ModelOutput( "wsd/lem_cbow0_s512_w10_NEW2_ctx.bin")): """Download models for SALDO-based word sense disambiguation.""" # Download sense model sense_model.download( "https://github.com/spraakbanken/sparv-wsd/raw/master/models/scouse/ALL_512_128_w10_A2_140403_ctx1.bin" ) # Download context model context_model.download( "https://github.com/spraakbanken/sparv-wsd/raw/master/models/scouse/lem_cbow0_s512_w10_NEW2_ctx.bin" )
def morphtable_inputs( suc: ModelOutput = ModelOutput("hunpos/suc3_morphtable.words"), morphtable_base: ModelOutput = ModelOutput("hunpos/suc.morphtable"), morphtable_patterns: ModelOutput = ModelOutput("hunpos/suc.patterns")): """Download the files needed to build the SALDO morphtable.""" suc.download( "https://github.com/spraakbanken/sparv-models/raw/master/hunpos/suc3_morphtable.words" ) morphtable_base.download( "https://github.com/spraakbanken/sparv-models/raw/master/hunpos/suc.morphtable" ) morphtable_patterns.download( "https://github.com/spraakbanken/sparv-models/raw/master/hunpos/suc.patterns" )
def download_saldo(out: ModelOutput = ModelOutput("saldo/saldom.xml")): """Download SALDO morphology XML.""" out.download( "https://svn.spraakdata.gu.se/sb-arkiv/pub/lexikon/saldom/saldom.xml")
def download_dalin_wordlist(out: ModelOutput = ModelOutput("hunpos/hist/dalinm.hunpos")): """Download Dalin wordlist.""" out.download("https://github.com/spraakbanken/sparv-models/raw/master/hunpos/hist/dalinm.hunpos")
def download_swedberg_wordlist(out: ModelOutput = ModelOutput("hunpos/hist/swedberg-gender.hunpos")): """Download Swedberg wordlist.""" out.download("https://github.com/spraakbanken/sparv-models/raw/master/hunpos/hist/swedberg-gender.hunpos")
def download_bettertokenizer( out: ModelOutput = ModelOutput("segment/bettertokenizer.sv")): """Download model for use with BetterWordTokenizer.""" out.download( "https://github.com/spraakbanken/sparv-models/raw/master/segment/bettertokenizer.sv" )
def download_punkt_model( out: ModelOutput = ModelOutput("segment/punkt-nltk-svenska.pickle")): """Download model for use with PunktSentenceTokenizer.""" out.download( "https://github.com/spraakbanken/sparv-models/raw/master/segment/punkt-nltk-svenska.pickle" )
def hunpos_model(model: ModelOutput = ModelOutput( "hunpos/suc3_suc-tags_default-setting_utf8.model")): """Download the Hunpos model.""" model.download( "https://github.com/spraakbanken/sparv-models/raw/master/hunpos/suc3_suc-tags_default-setting_utf8.model" )
def download_nst_comp( out: ModelOutput = ModelOutput("saldo/nst_comp_pos.pickle")): """Download compound POS model from sparv-models repo.""" out.download( "https://github.com/spraakbanken/sparv-models/raw/master/saldo/nst_comp_pos.pickle" )
def metashare_template(model: ModelOutput = ModelOutput( "sbx_metadata/sbx-metashare-template.xml")): """Download the SBX META-SHARE template.""" model.download( "https://raw.githubusercontent.com/spraakbanken/sparv-sbx-metadata/main/data/sbx-metashare-template.xml" )
def download_korp_stats(out: ModelOutput = ModelOutput("saldo/stats.pickle")): """Download stats.pickle model.""" out.download( "https://github.com/spraakbanken/sparv-models/raw/master/saldo/stats.pickle" )
def swefn_freq_model(out: ModelOutput = ModelOutput( "lexical_classes/swefn.freq.gp2008+suc3+romi.pickle")): """Download SweFN frequency model.""" out.download( "https://github.com/spraakbanken/sparv-models/raw/master/lexical_classes/swefn.freq.gp2008+suc3+romi.pickle" )