Exemplo n.º 1
0
    def setUp(self):
        sql_url = "https://raw.githubusercontent.com/asyml/stave/master" \
                  "/simple-backend/example_db.sql"

        self.datapack_table: str = StaveMultiDocSqlReader.default_configs(
        )['datapack_table']
        self.multipack_table: str = StaveMultiDocSqlReader.default_configs(
        )['multipack_table']
        self.project_table: str = StaveDataPackSqlReader.default_configs(
        )['project_table']

        self.temp_dir = tempfile.TemporaryDirectory()
        maybe_download(sql_url, self.temp_dir.name, 'example_db.sql')
        sql_script: str = os.path.join(self.temp_dir.name, 'example_db.sql')
        self.sql_db: str = os.path.join(self.temp_dir.name, 'db.sqlite3')

        pack_count: int
        mp_count: int

        with open(sql_script) as q_file:
            # Build the example database by executing the sample sql script.
            q = q_file.read()
            conn = sqlite3.connect(self.sql_db)
            c = conn.cursor()
            c.executescript(q)
            conn.commit()
Exemplo n.º 2
0
    def test_reader_with_dir(self):
        tmp_dir = tempfile.TemporaryDirectory()
        maybe_download('https://en.wikipedia.org/wiki/Machine_learning',
                       tmp_dir.name, 'test_wikipedia.html')
        maybe_download('https://www.yahoo.com/', tmp_dir.name,
                       'test_yahoo.html')

        for pack in self.pl1.process_dataset(tmp_dir.name):
            self.assertIsInstance(pack, DataPack)

        tmp_dir.cleanup()
Exemplo n.º 3
0
def main():
    model_path = Path("examples/Cliner/CliNER/models")
    pkg_path = Path("examples/Cliner/CliNER/tools")

    # create the model path if it doesn't exist
    model_path.mkdir(parents=True, exist_ok=True)

    # download the pre-trained model
    maybe_download(urls=[
        "https://drive.google.com/file/d/1Jlm2wdmNA-GotTWF60zZRUs1MbnzYox2"
    ],
                   path=model_path,
                   filenames=["train_full.model"])

    # download the dependency package for evaluation
    maybe_download(urls=[
        "https://drive.google.com/file/d/1ZVgJ7EQtMjPpg_v-lCycCLdFgVzmdTxI"
    ],
                   path=pkg_path,
                   filenames=["i2b2va-eval.jar"])

    maybe_download(urls=[
        "https://drive.google.com/file/d/1QvenOvRx7R9XjA5tUxeZnDdoroW94R-N"
        "/view?usp=sharing"
    ],
                   path=pkg_path,
                   filenames=["py3_maxent_treebank_pos_tagger.pickle"])

    maybe_download(urls=[
        "https://drive.google.com/file/d/162jCbpzf5Jez8h0zzA-lAyMkCLcjgAep"
        "/view?usp=sharing"
    ],
                   path=pkg_path,
                   filenames=["py2_maxent_treebank_pos_tagger.pickle"])
Exemplo n.º 4
0
    def test_data_utils(self):
        urls = [
            "https://drive.google.com/file/d/1YHXMiIne5MjSBePsPHPWO6hdRj4w"
            "EnSk/view?usp=sharing"
        ]
        data_utils.maybe_download(urls=urls,
                                  path=self.test_path,
                                  filenames=[self.file_name])
        path = Path(self.test_path)
        self.assertEqual(path.exists(), True)

        files = list(os.walk(path))
        self.assertEqual(len(files), 1)

        with open(f"{self.test_path}/{self.file_name}", "r") as f:
            lines = f.readlines()
            self.assertEqual(lines, self.text)
Exemplo n.º 5
0
    def test_reader_with_filepath(self):
        tmp_dir = tempfile.TemporaryDirectory()
        filepath = maybe_download("https://www.yahoo.com/", tmp_dir.name,
                                  "test_yahoo.html")

        for pack in self.pl1.process_dataset(filepath):
            self.assertIsInstance(pack, DataPack)

        tmp_dir.cleanup()
Exemplo n.º 6
0
from forte.data.data_utils import maybe_download

# download resources
urls = [
    "https://drive.google.com/file/d/15RSfFkW9syQKtx-_fQ9KshN3BJ27Jf8t/"
    "view?usp=sharing",
    "https://drive.google.com/file/d/1Nh7D6Xam5JefdoSXRoL7S0DZK1d4i2UK/"
    "view?usp=sharing",
    "https://drive.google.com/file/d/1YWcI60lGKtTFH01Ai1HnwOKBsrFf2r29/"
    "view?usp=sharing",
    "https://drive.google.com/file/d/1ElHUEMPQIuWmV0GimroqFphbCvFKskYj/"
    "view?usp=sharing",
    "https://drive.google.com/file/d/1EhMXlieoEg-bGUbbQ2vN-iyNJvC4Dajl/"
    "view?usp=sharing",
]

filenames = [
    "config.json",
    "pytorch_model.bin",
    "special_tokens_map.json",
    "tokenizer_config.json",
    "vocab.txt",
]

maybe_download(urls=urls, path="resources/NCBI-disease", filenames=filenames)
Exemplo n.º 7
0
        "url from where the model will be directly"
        "downloaded",
    )

    parser.add_argument(
        "--model_dir",
        default=default_model_dir,
        help="Directory to which the model will be downloaded",
    )

    args = parser.parse_args()

    resource_path = Path(args.model_dir)

    # create the path if it doesn't exist
    resource_path.mkdir(parents=True, exist_ok=True)

    # download data
    if args.pretrained_model_name.lower() == "bert-base-uncased":
        gd_id = "1cyUrhs7JaCJTTu-DjFUqP6Bs4f8a6JTX"
        url = f"https://drive.google.com/file/d/{gd_id}/view"

    elif args.pretrained_model_name.lower() == "bert-large-uncased":
        gd_id = "1crlASTMlsihALlkabAQP6JTYIZwC1Wm8"
        url = f"https://drive.google.com/file/d/{gd_id}/view"

    else:
        url = args.pretrained_model_name

    maybe_download(urls=[url], path=resource_path, extract=True)
Exemplo n.º 8
0
from forte.data.data_utils import maybe_download

# download resources
urls = [
    "https://drive.google.com/file/d/1j3i5U1YffYqKTdSbnlsrMAm9j86nLjxC/view"
    "?usp=sharing",
    "https://drive.google.com/file/d/1aRPS_b4AFaZTsk9uZ41tkWIBvWbO_s_V/"
    "view?usp=sharing",
    "https://drive.google.com/file/d/1SYpcWEDeTlbOsXlRevS8YS_dyP_k29g0/"
    "view?usp=sharing",
    "https://drive.google.com/file/d/1S2UMDBX7Ci-Mrm30434t0LOBL__Db92Y/"
    "view?usp=sharing",
    "https://drive.google.com/file/d/1O4iFhBPuogwEgz7bpJjEqDqAlYf5caP4/"
    "view?usp=sharing",
]

filenames = [
    "model.pkl",
    "word_embedding_table.pkl",
    "word_alphabet.pkl",
    "ner_alphabet.pkl",
    "char_alphabet.pkl",
]

maybe_download(urls=urls, path="resources/", filenames=filenames)
Exemplo n.º 9
0
            embedding_path / "char_vocab.english.txt",
            "https://drive.google.com/file/d/1hgwmUBk8Mb3iZYiHi1UpCpPFOCfOQLLB/"
            "view?usp=sharing":
            embedding_path / "glove.840B.300d.05.filtered",
            "https://drive.google.com/file/d/1H4PZhJhGoFBqrSMRPufjJ-9zwROw8hAK/"
            "view?usp=sharing":
            embedding_path / "glove_50_300_2.filtered",
            "https://drive.google.com/file/d/1uoA5EnZMWl5m5DMevGcI7UjiXxQRlD9W/"
            "view?usp=sharing":
            embedding_path / "word_vocab.english.txt",
            "https://drive.google.com/file/d/1UZc8x-mhdXg7Rtt6FSBDlEoJb_nHxDAQ/"
            "view?usp=sharing":
            pretrained_path / "model.pt"
        })
        maybe_download(urls=list(urls_to_file_names.keys()),
                       path=model_path,
                       filenames=list(urls_to_file_names.values()))

    # download indexer model
    elif args.model_name.lower() == "indexer":
        urls_to_file_names = OrderedDict({
            "https://drive.google.com/file/d/14lL6AoyjdCp-fj8DOlyZhQZrNwoJBfQm/"
            "view?usp=sharing":
            "index.faiss",
            "https://drive.google.com/file/d/1DdgMA7jttgA113EIlebVb33JpGCGbEYf/"
            "view?usp=sharing":
            "index.meta_data"
        })
        maybe_download(urls=list(urls_to_file_names.keys()),
                       path=model_path / "chatbot",
                       filenames=list(urls_to_file_names.values()))
Exemplo n.º 10
0
from pathlib import Path

from forte.data.data_utils import maybe_download

if __name__ == "__main__":
    model_path = Path("examples/Cliner/CliNER/models")
    pkg_path = Path("examples/Cliner/CliNER/tools")

    # create the model path if it doesn't exist
    model_path.mkdir(parents=True, exist_ok=True)

    # download the pre-trained model
    maybe_download(urls=[
        "https://drive.google.com/file/d/1Jlm2wdmNA-GotTWF60zZRUs1MbnzYox2"],
        path=model_path,
        filenames=["train_full.model"])

    # download the dependency package for evaluation
    maybe_download(urls=[
        "https://drive.google.com/file/d/1ZVgJ7EQtMjPpg_v-lCycCLdFgVzmdTxI"],
        path=pkg_path,
        filenames=["i2b2va-eval.jar"])

    maybe_download(urls=[
        "https://drive.google.com/file/d/1QvenOvRx7R9XjA5tUxeZnDdoroW94R-N"
        "/view?usp=sharing"],
        path=pkg_path,
        filenames=["py3_maxent_treebank_pos_tagger.pickle"])

    maybe_download(urls=[
        "https://drive.google.com/file/d/162jCbpzf5Jez8h0zzA-lAyMkCLcjgAep"
Exemplo n.º 11
0
            os.path.join(str(embedding_path), "char_vocab.english.txt"),
            "https://drive.google.com/file/d/1hgwmUBk8Mb3iZYiHi1UpCpPFOCfOQLLB/"
            "view?usp=sharing":
            os.path.join(str(embedding_path), "glove.840B.300d.05.filtered"),
            "https://drive.google.com/file/d/1H4PZhJhGoFBqrSMRPufjJ-9zwROw8hAK/"
            "view?usp=sharing":
            os.path.join(str(embedding_path), "glove_50_300_2.filtered"),
            "https://drive.google.com/file/d/1uoA5EnZMWl5m5DMevGcI7UjiXxQRlD9W/"
            "view?usp=sharing":
            os.path.join(str(embedding_path), "word_vocab.english.txt"),
            "https://drive.google.com/file/d/1UZc8x-mhdXg7Rtt6FSBDlEoJb_nHxDAQ/"
            "view?usp=sharing":
            os.path.join(str(pretrained_path), "model.pt")
        })
        maybe_download(urls=list(urls_to_file_names.keys()),
                       path=model_path,
                       filenames=list(urls_to_file_names.values()))

    # download ner model
    elif args.model_name.lower() == "ner":
        urls = [
            "https://drive.google.com/file/d/1j3i5U1YffYqKTdSbnlsrMAm9j86nL"
            "jxC/view?usp=sharing",
            "https://drive.google.com/file/d/1aRPS_b4AFaZTsk9uZ41tkWIBvWbO"
            "_s_V/view?usp=sharing",
            "https://drive.google.com/file/d/1SYpcWEDeTlbOsXlRevS8YS_dyP_k2"
            "9g0/view?usp=sharing",
            "https://drive.google.com/file/d/1S2UMDBX7Ci-Mrm30434t0LOBL__Db"
            "92Y/view?usp=sharing",
            "https://drive.google.com/file/d/1O4iFhBPuogwEgz7bpJjEqDqAlYf5"
            "caP4/view?usp=sharing"