Exemplos de fail em Python, exemplos de wasabi.msg.fail em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: cli.py Projeto: larsrinn/managesieve-cli

def connect_to_server(
        host: str = typer.Option(...),
        username: str = typer.Option(...),
        tls: bool = True,
        password: str = typer.Option(..., prompt=True, hide_input=True),
):
    global client
    client = Client(host)
    if not client.connect(username, password, starttls=tls):
        msg.fail("Couldn't connect to server. Wrong password?")
        raise typer.Exit(code=1)

Exemplo n.º 2

0

Exibir arquivo

def import_code(code_path: Optional[Union[Path, str]]) -> None:
    """Helper to import Python file provided in training commands / commands
    using the config. This makes custom registered functions available.
    """
    if code_path is not None:
        if not Path(code_path).exists():
            msg.fail("Path to Python code not found", code_path, exits=1)
        try:
            import_file("python_code", code_path)
        except Exception as e:
            msg.fail(f"Couldn't load Python code: {code_path}", e, exits=1)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: run.py Projeto: adrianeboyd/wheelwright

def get_gh():
    token_path = ROOT / SECRET_FILE
    if ENV.GH_SECRET in os.environ:
        token = os.environ[ENV.GH_SECRET]
    elif token_path.exists():
        with token_path.open("r", encoding="utf-8") as f:
            token = f.read().strip()
    else:
        err = f"Can't find GitGub token. Not {ENV.GH_SECRET} envvar or in {token_path}"
        msg.fail(err, exits=1)
    return github.Github(token)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: download.py Projeto: monasaad/CAPEsFinal

def get_json(url, desc):
    r = requests.get(url)
    if r.status_code != 200:
        msg.fail(
            "Server error ({})".format(r.status_code),
            "Couldn't fetch {}. Please find a model for your spaCy "
            "installation (v{}), and download it manually. For more "
            "details, see the documentation: "
            "https://spacy.io/usage/models".format(desc, about.__version__),
            exits=1,
        )
    return r.json()

Exemplo n.º 5

0

Exibir arquivo

 def main(self, args: BaseArgumentParser) -> int:
     list_devices_response = self.get_client().list_devices()
     msg.divider("Registered Devices")
     for device in list_devices_response.devices:
         if device.is_available:
             msg.good(f"{device.name}")
         else:
             msg.fail(f"{device.name}:")
             msg.text(
                 f"  {color(device.error_type, bold=True)}: {device.error_message}"
             )
     return 0

Exemplo n.º 6

0

Exibir arquivo

Arquivo: P1_download_baseline.py Projeto: thoppe/The-Pile-PubMed

def check_hash(f0):
    key = f0.stem + ".xml.gz"

    if key not in md5:
        msg.fail(
            f"Can't find md5 hash for {key}, suspicious and should look into!")
        return

    if file_md5sum(f0) != md5[key]:
        msg.fail(f"Checksum failed {key}, should delete!")
        print(f0)
        return

Exemplo n.º 7

0

Exibir arquivo

Arquivo: run.py Projeto: Echinoidea/related-terms-search

def project_run(
    project_dir: Path,
    subcommand: str,
    *,
    force: bool = False,
    dry: bool = False,
    capture: bool = False,
) -> None:
    """Run a named script defined in the project.yml. If the script is part
    of the default pipeline (defined in the "run" section), DVC is used to
    execute the command, so it can determine whether to rerun it. It then
    calls into "exec" to execute it.

    project_dir (Path): Path to project directory.
    subcommand (str): Name of command to run.
    force (bool): Force re-running, even if nothing changed.
    dry (bool): Perform a dry run and don't execute commands.
    capture (bool): Whether to capture the output and errors of individual commands.
        If False, the stdout and stderr will not be redirected, and if there's an error,
        sys.exit will be called with the return code. You should use capture=False
        when you want to turn over execution to the command, and capture=True
        when you want to run the command more like a function.
    """
    config = load_project_config(project_dir)
    commands = {cmd["name"]: cmd for cmd in config.get("commands", [])}
    workflows = config.get("workflows", {})
    validate_subcommand(commands.keys(), workflows.keys(), subcommand)
    if subcommand in workflows:
        msg.info(f"Running workflow '{subcommand}'")
        for cmd in workflows[subcommand]:
            project_run(project_dir, cmd, force=force, dry=dry)
    else:
        cmd = commands[subcommand]
        for dep in cmd.get("deps", []):
            if not (project_dir / dep).exists():
                err = f"Missing dependency specified by command '{subcommand}': {dep}"
                err_help = "Maybe you forgot to run the 'project assets' command or a previous step?"
                err_kwargs = {"exits": 1} if not dry else {}
                msg.fail(err, err_help, **err_kwargs)
        check_spacy_commit = check_bool_env_var(
            ENV_VARS.PROJECT_USE_GIT_VERSION)
        with working_dir(project_dir) as current_dir:
            msg.divider(subcommand)
            rerun = check_rerun(current_dir,
                                cmd,
                                check_spacy_commit=check_spacy_commit)
            if not rerun and not force:
                msg.info(f"Skipping '{cmd['name']}': nothing changed")
            else:
                run_commands(cmd["script"], dry=dry, capture=capture)
                if not dry:
                    update_lockfile(current_dir, cmd)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: solver.py Projeto: justindujardin/mathy

def swarm_solve(
    problems: Union[List[str], str],
    config: SwarmConfig,
    max_steps: Union[List[int], int] = 128,
    silent: bool = False,
) -> Swarm:
    single_problem: bool = isinstance(problems, str)
    if single_problem:
        problems = [problems]
    if isinstance(max_steps, int):
        max_steps = [max_steps
                     ] if single_problem else [max_steps] * len(problems)
    assert len(problems) > 0, "no problems to solve"
    assert len(problems) == len(max_steps)
    assert isinstance(problems, list)
    current_problem: str = problems.pop(0)
    current_max_moves: int = max_steps.pop(0)

    def env_callable():
        nonlocal current_problem, current_max_moves
        return FragileMathyEnv(
            name="mathy_v0",
            problem=current_problem,
            repeat_problem=True,
            max_steps=current_max_moves,
        )

    mathy_env: MathyEnv = env_callable()._env._env.mathy
    swarm: Swarm = mathy_swarm(config, env_callable)
    while True:
        if not silent:
            with msg.loading(f"Solving {current_problem} ..."):
                swarm.run()
        else:
            swarm.run()

        if not silent:
            if swarm.walkers.best_reward > EnvRewards.WIN:
                last_state = MathyEnvState.from_np(
                    swarm.walkers.states.best_state)
                msg.good(
                    f"Solved! {current_problem} = {last_state.agent.problem}")
                mathy_env.print_history(last_state)
            else:
                msg.fail(f"Failed to find a solution :(")

        if len(max_steps) > 0:
            current_max_moves = max_steps.pop(0)
            current_problem = problems.pop(0)
        else:
            break
    return swarm

Exemplo n.º 9

0

Exibir arquivo

Arquivo: cli.py Projeto: larsrinn/managesieve-cli

def putscript(path: Path = typer.Argument(
    ...,
    exists=True,
    readable=True,
    file_okay=True,
    dir_okay=False,
    resolve_path=True,
), ):
    content = path.read_text()
    if client.putscript(path.name, content):
        msg.good(f"Put script {path.name}")
    else:
        msg.fail(f"Failed while putting script {path.name}")

Exemplo n.º 10

0

Exibir arquivo

Arquivo: dumptools.py Projeto: rakeshvarma-kasipeta/spikex

def resolve_version(wiki, version):
    url = _WIKI_BASE_DL_PATH.format(w=wiki)
    response = requests.get(url)
    versions = re.findall(r"href=\"(\d+)/\"", response.text)
    if version in versions:
        return version
    if version == "latest":
        return max(versions)
    msg.fail(
        "Wikipedia dump version not found",
        f"Pick one of these: {', '.join(versions)}.",
        exits=1,
    )

Exemplo n.º 11

0

Exibir arquivo

Arquivo: run.py Projeto: Jomcgi/scrubadub

def run_test(command, directory):
    """Execute a command that runs a test"""
    wrapped_command = "cd %s && %s" % (directory, command)
    pipe = subprocess.Popen(
        wrapped_command,
        shell=True,
    )
    pipe.wait()
    if pipe.returncode == 0:
        msg.good("TEST PASSED")
    else:
        msg.fail("TEST FAILED")
    return pipe.returncode

Exemplo n.º 12

0

Exibir arquivo

def read_vectors(vectors_loc):
    f = open_file(vectors_loc)
    shape = tuple(int(size) for size in next(f).split())
    vectors_data = numpy.zeros(shape=shape, dtype="f")
    vectors_keys = []
    for i, line in enumerate(tqdm(f)):
        line = line.rstrip()
        pieces = line.rsplit(" ", vectors_data.shape[1])
        word = pieces.pop(0)
        if len(pieces) != vectors_data.shape[1]:
            msg.fail(Errors.E094.format(line_num=i, loc=vectors_loc), exits=1)
        vectors_data[i] = numpy.asarray(pieces, dtype="f")
        vectors_keys.append(word)
    return vectors_data, vectors_keys

Exemplo n.º 13

0

Exibir arquivo

def verify_cli_args(config_path, output_dir, resume_path, epoch_resume):
    if not config_path or (str(config_path) != "-" and not config_path.exists()):
        msg.fail("Config file not found", config_path, exits=1)
    if output_dir.exists() and [p for p in output_dir.iterdir()]:
        if resume_path:
            msg.warn(
                "Output directory is not empty.",
                "If you're resuming a run in this directory, the old weights "
                "for the consecutive epochs will be overwritten with the new ones.",
            )
        else:
            msg.warn(
                "Output directory is not empty. ",
                "It is better to use an empty directory or refer to a new output path, "
                "then the new directory will be created for you.",
            )
    if resume_path is not None:
        if resume_path.is_dir():
            # This is necessary because Windows gives a Permission Denied when we
            # try to open the directory later, which is confusing. See #7878
            msg.fail(
                "--resume-path should be a weights file, but {resume_path} is a directory.",
                exits=True,
            )
        model_name = re.search(r"model\d+\.bin", str(resume_path))
        if not model_name and not epoch_resume:
            msg.fail(
                "You have to use the --epoch-resume setting when using a renamed weight file for --resume-path",
                exits=True,
            )
        elif not model_name and epoch_resume < 0:
            msg.fail(
                f"The argument --epoch-resume has to be greater or equal to 0. {epoch_resume} is invalid",
                exits=True,
            )

Exemplo n.º 14

0

Exibir arquivo

def run_test(command, directory):
    """Execute a command that runs a test"""
    msg.text("RUNNING  " + command)
    wrapped_command = f"cd {directory} && {command}"
    pipe = subprocess.Popen(
        wrapped_command, shell=True,
    )
    pipe.wait()
    if pipe.returncode == 0:
        msg.good("TEST PASSED")
    else:
        msg.fail("TEST FAILED")
    msg.text('')
    return pipe.returncode

Exemplo n.º 15

0

Exibir arquivo

def main(in_file,
         out_dir,
         spacy_model="en_core_web_sm",
         n_process=1,
         max_docs=10**7):
    """
    Step 1: Parse raw text with spaCy

    Expects an input file with one sentence per line and will output a .spacy
    file of the parsed collection of Doc objects (DocBin).
    """
    input_path = Path(in_file)
    output_path = Path(out_dir)
    if not input_path.exists():
        msg.fail("Can't find input file", in_file, exits=1)
    if not output_path.exists():
        output_path.mkdir(parents=True)
        msg.good(f"Created output directory {out_dir}")
    nlp = spacy.load(spacy_model)
    msg.info(f"Using spaCy model {spacy_model}")
    doc_bin = DocBin(attrs=["POS", "TAG", "DEP", "ENT_TYPE", "ENT_IOB"])
    msg.text("Preprocessing text...")
    count = 0
    batch_num = 0
    with input_path.open("r", encoding="utf8") as texts:
        docs = nlp.pipe(texts, n_process=n_process)
        for doc in tqdm.tqdm(docs, desc="Docs", unit=""):
            if count < max_docs:
                doc_bin.add(doc)
                count += 1
                output_file = output_path / f"{input_path.stem}.spacy"
            else:
                batch_num += 1
                count = 0
                msg.good(f"Processed {len(doc_bin)} docs")
                doc_bin_bytes = doc_bin.to_bytes()
                output_file = output_path / f"{input_path.stem}-{batch_num}.spacy"
                with output_file.open("wb") as f:
                    f.write(doc_bin_bytes)
                msg.good(f"Saved parsed docs to file", output_file.resolve())
                doc_bin = DocBin(
                    attrs=["POS", "TAG", "DEP", "ENT_TYPE", "ENT_IOB"])
        with output_file.open("wb") as f:
            batch_num += 1
            output_file = output_path / f"{input_path.stem}-{batch_num}.spacy"
            doc_bin_bytes = doc_bin.to_bytes()
            f.write(doc_bin_bytes)
            msg.good(f"Complete. Saved final parsed docs to file",
                     output_file.resolve())

Exemplo n.º 16

0

Exibir arquivo

def main(
    # fmt: off
    in_file: str = typer.Argument(..., help="Vectors file (text-based)"),
    vocab_file: str = typer.Argument(..., help="Vocabulary file"),
    out_dir: str = typer.Argument(..., help="Path to output directory"),
    min_freq_ratio: float = typer.Option(0.0, "--min-freq-ratio", "-r", help="Frequency ratio threshold for discarding minority senses or casings"),
    min_distance: float = typer.Option(0.0, "--min-distance", "-s", help="Similarity threshold for discarding redundant keys"),
    # fmt: on
):
    """
    Step 5: Export a sense2vec component

    Expects a vectors.txt and a vocab file trained with GloVe and exports
    a component that can be loaded with Sense2vec.from_disk.
    """
    input_path = Path(in_file)
    vocab_path = Path(vocab_file)
    output_path = Path(out_dir)
    if not input_path.exists():
        msg.fail("Can't find input file", in_file, exits=1)
    if input_path.suffix == ".bin":
        msg.fail("Need text-based vectors file, not binary", in_file, exits=1)
    if not vocab_path.exists():
        msg.fail("Can't find vocab file", vocab_file, exits=1)
    if not output_path.exists():
        output_path.mkdir(parents=True)
        msg.good(f"Created output directory {out_dir}")
    with input_path.open("r", encoding="utf8") as f:
        (n_vectors, vector_size), f = _get_shape(f)
        vectors_data = f.readlines()
    with vocab_path.open("r", encoding="utf8") as f:
        vocab = read_vocab(f)
    vectors = {}
    all_senses = set()
    for item in vectors_data:
        item = item.rstrip().rsplit(" ", vector_size)
        key = item[0]
        try:
            _, sense = split_key(key)
        except ValueError:
            continue
        vec = item[1:]
        if len(vec) != vector_size:
            msg.fail(f"Wrong vector size: {len(vec)} (expected {vector_size})", exits=1)
        all_senses.add(sense)
        vectors[key] = numpy.asarray(vec, dtype=numpy.float32)
    discarded = set()
    discarded.update(get_minority_keys(vocab, min_freq_ratio))
    discarded.update(get_redundant_keys(vocab, vectors, min_distance))
    n_vectors = len(vectors) - len(discarded)
    s2v = Sense2Vec(shape=(n_vectors, vector_size), senses=all_senses)
    for key, vector in vectors.items():
        if key not in discarded:
            s2v.add(key, vector)
            s2v.set_freq(key, vocab[key])
    msg.good("Created the sense2vec model")
    msg.info(f"{n_vectors} vectors, {len(all_senses)} total senses")
    s2v.to_disk(output_path)
    msg.good("Saved model to directory", out_dir)

Exemplo n.º 17

0

Exibir arquivo

def download(f0, f1):
    url = base_url + f0

    r = sess.get(url)
    if not r.ok:
        print(r.content)
        print(r.status_code)
        msg.fail(f"Failed {url}")
        exit()

    with open(f1, "wb") as FOUT:
        FOUT.write(r.content)

    msg.good(f"Saved {f0}")
    time.sleep(5)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: profile.py Projeto: monasaad/CAPEsFinal

def _read_inputs(loc, msg):
    if loc == "-":
        msg.info("Reading input from sys.stdin")
        file_ = sys.stdin
        file_ = (line.encode("utf8") for line in file_)
    else:
        input_path = Path(loc)
        if not input_path.exists() or not input_path.is_file():
            msg.fail("Not a valid input data file", loc, exits=1)
        msg.info("Using data from {}".format(input_path.parts[-1]))
        file_ = input_path.open()
    for line in file_:
        data = srsly.json_loads(line)
        text = data["text"]
        yield text

Exemplo n.º 19

0

Exibir arquivo

Arquivo: profile.py Projeto: xettrisomeman/spaCy

def _read_inputs(loc: Union[Path, str], msg: Printer) -> Iterator[str]:
    if loc == "-":
        msg.info("Reading input from sys.stdin")
        file_ = sys.stdin
        file_ = (line.encode("utf8") for line in file_)
    else:
        input_path = Path(loc)
        if not input_path.exists() or not input_path.is_file():
            msg.fail("Not a valid input data file", loc, exits=1)
        msg.info(f"Using data from {input_path.parts[-1]}")
        file_ = input_path.open()  # type: ignore[assignment]
    for line in file_:
        data = srsly.json_loads(line)
        text = data["text"]
        yield text

Exemplo n.º 20

0

Exibir arquivo

def validate_project_version(config: Dict[str, Any]) -> None:
    """If the project defines a compatible spaCy version range, chec that it's
    compatible with the current version of spaCy.
    config (Dict[str, Any]): The loaded config.
    """
    spacy_version = config.get("spacy_version", None)
    if spacy_version and not is_compatible_version(about.__version__,
                                                   spacy_version):
        err = (
            f"The {PROJECT_FILE} specifies a spaCy version range ({spacy_version}) "
            f"that's not compatible with the version of spaCy you're running "
            f"({about.__version__}). You can edit version requirement in the "
            f"{PROJECT_FILE} to load it, but the project may not run as expected."
        )
        msg.fail(err, exits=1)

Exemplo n.º 21

0

Exibir arquivo

def git_sparse_checkout(repo, subpath, dest, branch):
    # We're using Git, partial clone and sparse checkout to
    # only clone the files we need
    # This ends up being RIDICULOUS. omg.
    # So, every tutorial and SO post talks about 'sparse checkout'...But they
    # go and *clone* the whole repo. Worthless. And cloning part of a repo
    # turns out to be completely broken. The only way to specify a "path" is..
    # a path *on the server*? The contents of which, specifies the paths. Wat.
    # Obviously this is hopelessly broken and insecure, because you can query
    # arbitrary paths on the server! So nobody enables this.
    # What we have to do is disable *all* files. We could then just checkout
    # the path, and it'd "work", but be hopelessly slow...Because it goes and
    # transfers every missing object one-by-one. So the final piece is that we
    # need to use some weird git internals to fetch the missings in bulk, and
    # *that* we can do by path.
    # We're using Git and sparse checkout to only clone the files we need
    with make_tempdir() as tmp_dir:
        # This is the "clone, but don't download anything" part.
        cmd = (f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
               f"-b {branch} --filter=blob:none")
        run_command(cmd)
        # Now we need to find the missing filenames for the subpath we want.
        # Looking for this 'rev-list' command in the git --help? Hah.
        cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
        ret = run_command(cmd, capture=True)
        git_repo = _http_to_git(repo)
        # Now pass those missings into another bit of git internals
        missings = " ".join(
            [x[1:] for x in ret.stdout.split() if x.startswith("?")])
        if not missings:
            err = (
                f"Could not find any relevant files for '{subpath}'. "
                f"Did you specify a correct and complete path within repo '{repo}' "
                f"and branch {branch}?")
            msg.fail(err, exits=1)
        cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
        run_command(cmd, capture=True)
        # And finally, we can checkout our subpath
        cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
        run_command(cmd, capture=True)

        # Get a subdirectory of the cloned path, if appropriate
        source_path = tmp_dir / Path(subpath)
        if not is_subpath_of(tmp_dir, source_path):
            err = f"'{subpath}' is a path outside of the cloned repository."
            msg.fail(err, repo, exits=1)

        shutil.move(str(source_path), str(dest))

Exemplo n.º 22

0

Exibir arquivo

Arquivo: 02_preprocess.py Projeto: trizna3/S2V_ML

def main(in_file, out_dir, spacy_model="en_core_web_sm", n_process=4):
    """
    Step 2: Preprocess text in sense2vec's format

    Expects a binary .spacy input file consisting of the parsed Docs (DocBin)
    and outputs a text file with one sentence per line in the expected sense2vec
    format (merged noun phrases, concatenated phrases with underscores and
    added "senses").

    Example input:
    Rats, mould and broken furniture: the scandal of the UK's refugee housing

    Example output:
    Rats|NOUN ,|PUNCT mould|NOUN and|CCONJ broken_furniture|NOUN :|PUNCT
    the|DET scandal|NOUN of|ADP the|DET UK|GPE 's|PART refugee_housing|NOUN
    """
    input_path = Path(in_file)
    output_path = Path(out_dir)
    if not input_path.exists():
        msg.fail("Can't find input file", in_file, exits=1)
    if not output_path.exists():
        output_path.mkdir(parents=True)
        msg.good(f"Created output directory {out_dir}")
    nlp = spacy.load(spacy_model)
    msg.info(f"Using spaCy model {spacy_model}")
    with input_path.open("rb") as f:
        doc_bin_bytes = f.read()
    doc_bin = DocBin().from_bytes(doc_bin_bytes)
    msg.good(f"Loaded {len(doc_bin)} parsed docs")
    docs = doc_bin.get_docs(nlp.vocab)
    output_file = output_path / f"{input_path.stem}.s2v"
    lines_count = 0
    words_count = 0
    with output_file.open("w", encoding="utf8") as f:
        for doc in tqdm.tqdm(docs, desc="Docs", unit=""):
            doc = merge_phrases(doc)
            words = []
            for token in doc:
                if not token.is_space:
                    word, sense = make_spacy_key(token, prefer_ents=True)
                    words.append(make_key(word, sense))
            f.write(" ".join(words) + "\n")
            lines_count += 1
            words_count += len(words)
    msg.good(
        f"Successfully preprocessed {lines_count} docs ({words_count} words)",
        output_file.resolve(),
    )

Exemplo n.º 23

0

Exibir arquivo

Arquivo: 01_parse.py Projeto: prisilamichelle/DSL

def main(in_file, out_dir, spacy_model="en_core_web_sm", n_process=1):
    """
    Step 1: Parse raw text with spaCy

    Expects an input file with one sentence per line and will output a .spacy
    file of the parsed collection of Doc objects (DocBin).
    """
    input_path = Path(in_file)
    output_path = Path(out_dir)
    if not input_path.exists():
        msg.fail("Can't find input file", in_file, exits=1)
    if not output_path.exists():
        output_path.mkdir(parents=True)
        msg.good(f"Created output directory {out_dir}")
    nlp = spacy.load(spacy_model)
    msg.info(f"Using spaCy model {spacy_model}")
    msg.text("Preprocessing text...")
    texts = [line.rstrip() for line in open(in_file, 'r')]
    docs = nlp.pipe(texts, n_process=n_process)
    output_file = output_path / f"{input_path.stem}.s2v"
    lines_count = 0
    words_count = 0
    wn_lemmas = set(wordnet.all_lemma_names())
    with output_file.open("w", encoding="utf8") as f:
        for doc in tqdm.tqdm(docs, desc="Docs", unit=""):
            # print(doc)
            spans = get_phrases(doc, wn_lemmas)
            spans = filter_spans(spans)
            # print('NOUN SPAN', str(spans))
            doc = merge_phrases(doc, spans)
            spans = get_adjective_phrases(doc)
            spans = filter_spans(spans)
            # print('ADJ SPAN', str(spans))
            # print('*-----------------------------------------*')
            doc = merge_phrases(doc, spans)
            words = []
            for token in doc:
                if not token.is_space:
                    word, sense = make_spacy_key(token, prefer_ents=True)
                    words.append(make_key(word, sense))
            f.write(" ".join(words) + "\n")
            lines_count += 1
            words_count += len(words)
    msg.good(
        f"Successfully preprocessed {lines_count} docs ({words_count} words)",
        output_file.resolve(),
    )

Exemplo n.º 24

0

Exibir arquivo

Arquivo: assets.py Projeto: svlandeg/spaCy

def check_private_asset(dest: Path, checksum: Optional[str] = None) -> None:
    """Check and validate assets without a URL (private assets that the user
    has to provide themselves) and give feedback about the checksum.

    dest (Path): Destination path of the asset.
    checksum (Optional[str]): Optional checksum of the expected file.
    """
    if not Path(dest).exists():
        err = f"No URL provided for asset. You need to add this file yourself: {dest}"
        msg.warn(err)
    else:
        if not checksum:
            msg.good(f"Asset already exists: {dest}")
        elif checksum == get_checksum(dest):
            msg.good(f"Asset exists with matching checksum: {dest}")
        else:
            msg.fail(f"Asset available but with incorrect checksum: {dest}")

Exemplo n.º 25

0

Exibir arquivo

def init_spacy_model(
    lang: str = "en", size: str = "md", model_type: str = "core"
) -> Language:
    name = ""
    for model_name in available_models:
        if (
            lang in model_name
            and size in model_name
            and model_type in model_name
        ):
            msg.good(f"Found model {model_name}")
            name = model_name
            break
        else:
            msg.fail("No compatible model of your choice.")
    model = load_spacy_model(name)
    return model

Exemplo n.º 26

0

Exibir arquivo

def get_checksum(path: Union[Path, str]) -> str:
    """Get the checksum for a file or directory given its file path. If a
    directory path is provided, this uses all files in that directory.
    path (Union[Path, str]): The file or directory path.
    RETURNS (str): The checksum.
    """
    path = Path(path)
    if path.is_file():
        return hashlib.md5(Path(path).read_bytes()).hexdigest()
    if path.is_dir():
        # TODO: this is currently pretty slow
        dir_checksum = hashlib.md5()
        for sub_file in sorted(fp for fp in path.rglob("*") if fp.is_file()):
            dir_checksum.update(sub_file.read_bytes())
        return dir_checksum.hexdigest()
    msg.fail(f"Can't get checksum for {path}: not a file or directory",
             exits=1)

Exemplo n.º 27

0

Exibir arquivo

def read_vectors(vectors_loc):
    # temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
    from tqdm import tqdm

    f = open_file(vectors_loc)
    shape = tuple(int(size) for size in next(f).split())
    vectors_data = numpy.zeros(shape=shape, dtype="f")
    vectors_keys = []
    for i, line in enumerate(tqdm(f)):
        line = line.rstrip()
        pieces = line.rsplit(" ", vectors_data.shape[1])
        word = pieces.pop(0)
        if len(pieces) != vectors_data.shape[1]:
            msg.fail(Errors.E094.format(line_num=i, loc=vectors_loc), exits=1)
        vectors_data[i] = numpy.asarray(pieces, dtype="f")
        vectors_keys.append(word)
    return vectors_data, vectors_keys

Exemplo n.º 28

0

Exibir arquivo

Arquivo: debug_data.py Projeto: DuyguA/spaCy

def _load_file(file_path: Path, msg: Printer) -> None:
    file_name = file_path.parts[-1]
    if file_path.suffix == ".json":
        with msg.loading(f"Loading {file_name}..."):
            data = srsly.read_json(file_path)
        msg.good(f"Loaded {file_name}")
        return data
    elif file_path.suffix == ".jsonl":
        with msg.loading(f"Loading {file_name}..."):
            data = srsly.read_jsonl(file_path)
        msg.good(f"Loaded {file_name}")
        return data
    msg.fail(
        f"Can't load file extension {file_path.suffix}",
        "Expected .json or .jsonl",
        exits=1,
    )

Exemplo n.º 29

0

Exibir arquivo

    def commons_files(self):

        if self.wikidata_commons_category():
            category = pwb.Category(self.commons, self.wikidata_commons_category())

            try:
                return sum(1 for image in category.members(recurse=3, namespaces="File"))

            except RecursionError as error:
                msg.fail(f"Ha ocurrido un error de recursión: {error}")
                return "Demasiadas subcategorías"

            except (ValueError, AttributeError) as error:
                msg.fail(f"Ha ocurrido un error inespereado: {error}")
                return "0"
        else:
            return 0

Exemplo n.º 30

0

Exibir arquivo

def get_compatibility() -> dict:
    version = get_minor_version(about.__version__)
    r = requests.get(about.__compatibility__)
    if r.status_code != 200:
        msg.fail(
            f"Server error ({r.status_code})",
            f"Couldn't fetch compatibility table. Please find a package for your spaCy "
            f"installation (v{about.__version__}), and download it manually. "
            f"For more details, see the documentation: "
            f"https://spacy.io/usage/models",
            exits=1,
        )
    comp_table = r.json()
    comp = comp_table["spacy"]
    if version not in comp:
        msg.fail(f"No compatible packages found for v{version} of spaCy", exits=1)
    return comp[version]