Exemplo n.º 1
0
def ctx2vec(gpu, skip_pip):
    os.makedirs("systems", exist_ok=True)
    with local.cwd("systems"):
        git("clone", "https://github.com/orenmel/context2vec.git")

    if not skip_pip:
        subdir = "gpu" if gpu else "nogpu"
        for fn in ["Pipfile", "Pipfile.lock"]:
            copyfile(
                "support/context2vec/{}/{}".format(subdir, fn),
                "systems/context2vec/{}".format(fn),
            )

    for fn in ["test.py", "train.py"]:
        copyfile(
            "support/context2vec/{}".format(fn),
            "systems/context2vec/context2vec/eval/wsd/{}".format(fn),
        )

    with local.cwd("systems/context2vec"), local.env(
            PIPENV_IGNORE_VIRTUALENVS="1"):
        if not skip_pip:
            pipenv("install")

        tmp_zipped_model_fn = urlretrieve(
            "https://archive.org/download/ctx2vec-b100-3epoch/ctx2vec-b100-3epoch.zip"
        )
        try:
            tmp_zip = ZipFile(tmp_zipped_model_fn)
            tmp_zip.extractall(".")
        finally:
            os.remove(tmp_zipped_model_fn)
Exemplo n.º 2
0
 def _bootstrap(self, res):
     logger.info("Downloading Elmo word vectors")
     zipped_tmp_fn = urlretrieve(self.URL)
     try:
         tmp_zip = zipfile.ZipFile(zipped_tmp_fn)
         tmp_zip.extractall(self.get_res(""))
     finally:
         os.remove(zipped_tmp_fn)
Exemplo n.º 3
0
 def _download(self, lang, url, dest):
     logger.info("Downloading {} word vectors".format(lang))
     tmp_fn = urlretrieve(url)
     try:
         logger.info("Converting {} word vectors".format(lang))
         fi = load_word2vec_format(tmp_fn)
         fi.save(dest)
     finally:
         os.remove(tmp_fn)
Exemplo n.º 4
0
 def _bootstrap(self, res):
     logger.info("Downloading Word2Vec word vectors")
     zipped_tmp_fn = urlretrieve(self.URL)
     try:
         tmp_zip = zipfile.ZipFile(zipped_tmp_fn)
         tmp_fn = get_tmpfile("word2vec-fi.txt")
         try:
             copyfileobj(tmp_zip.open("model.txt"), open(tmp_fn, "wb"))
             logger.info("Converting Word2Vec word vectors")
             fi = load_word2vec_format(tmp_fn, unicode_errors="replace")
             fi.save(self._get_res_path("vecs"))
         finally:
             os.remove(tmp_fn)
     finally:
         os.remove(zipped_tmp_fn)
Exemplo n.º 5
0
 def _bootstrap(self, res):
     logger.info("Downloading FiWN ConceptNet word vectors")
     zipped_tmp_fn = urlretrieve(self.URL)
     try:
         tmp_zip = zipfile.ZipFile(zipped_tmp_fn)
         tmp_fn = get_tmpfile("fiwn-conceptnet.txt")
         try:
             copyfileobj(tmp_zip.open("outputVectors.txt"),
                         open(tmp_fn, "wb"))
             logger.info("Converting FiWN ConceptNet word vectors")
             fi = load_word2vec_format(tmp_fn)
             fi.save(self._get_res_path("vecs"))
         finally:
             os.remove(tmp_fn)
         copyfileobj(tmp_zip.open("synsets.txt"),
                     open(self._get_res_path("synsets"), "wb"))
     finally:
         os.remove(zipped_tmp_fn)
Exemplo n.º 6
0
 def _bootstrap(self, _res):
     logger.info("Downloading word vectors")
     gzipped_glove_tmp_fn = urlretrieve(self.URL)
     try:
         glove_tmp_fn = get_tmpfile("glove.txt")
         try:
             copyfileobj(gzip.open(gzipped_glove_tmp_fn),
                         open(glove_tmp_fn, "wb"))
             logger.info("Converting word vectors")
             fi = load_word2vec_format(glove_tmp_fn)
             fi.save(self._get_res_path("vecs"))
         finally:
             try:
                 os.remove(glove_tmp_fn)
             except OSError:
                 pass
     finally:
         try:
             os.remove(gzipped_glove_tmp_fn)
         except OSError:
             pass
Exemplo n.º 7
0
from finntk.utils import urlretrieve
from conceptnet5.readers.wiktionary import prepare_db

URL = "https://conceptnet.s3.amazonaws.com/precomputed-data/2016/wiktionary/parsed-2/en.jsons.gz"
en_wiktionary_gz = urlretrieve(URL, filename="en.jsons.gz")
prepare_db([en_wiktionary_gz], "wiktionary.db")