Пример #1
0
    def to_disk(self, path, exclude=tuple(), disable=None):
        """Save the current state to a directory.  If a model is loaded, this
        will include the model.

        path (unicode or Path): Path to a directory, which will be created if
            it doesn't exist.
        exclude (list): Names of components or serialization fields to exclude.

        DOCS: https://spacy.io/api/language#to_disk
        """
        if disable is not None:
            deprecation_warning(Warnings.W014)
            exclude = disable
        path = util.ensure_path(path)
        serializers = OrderedDict()
        serializers["tokenizer"] = lambda p: self.tokenizer.to_disk(p, exclude=["vocab"])
        serializers["meta.json"] = lambda p: p.open("w").write(srsly.json_dumps(self.meta))
        for name, proc in self.pipeline:
            if not hasattr(proc, "name"):
                continue
            if name in exclude:
                continue
            if not hasattr(proc, "to_disk"):
                continue
            serializers[name] = lambda p, proc=proc: proc.to_disk(p, exclude=["vocab"])
        serializers["vocab"] = lambda p: self.vocab.to_disk(p)
        util.to_disk(path, serializers, exclude)
Пример #2
0
def package(input_dir, output_dir, meta_path=None, create_meta=False, force=False):
    """
    Generate Python package for model data, including meta and required
    installation files. A new directory will be created in the specified
    output directory, and model data will be copied over. If --create-meta is
    set and a meta.json already exists in the output directory, the existing
    values will be used as the defaults in the command-line prompt.
    """
    msg = Printer()
    input_path = util.ensure_path(input_dir)
    output_path = util.ensure_path(output_dir)
    meta_path = util.ensure_path(meta_path)
    if not input_path or not input_path.exists():
        msg.fail("Can't locate model data", input_path, exits=1)
    if not output_path or not output_path.exists():
        msg.fail("Output directory not found", output_path, exits=1)
    if meta_path and not meta_path.exists():
        msg.fail("Can't find model meta.json", meta_path, exits=1)

    meta_path = meta_path or input_path / "meta.json"
    if meta_path.is_file():
        meta = srsly.read_json(meta_path)
        if not create_meta:  # only print if user doesn't want to overwrite
            msg.good("Loaded meta.json from file", meta_path)
        else:
            meta = generate_meta(input_dir, meta, msg)
    for key in ("lang", "name", "version"):
        if key not in meta or meta[key] == "":
            msg.fail(
                "No '{}' setting found in meta.json".format(key),
                "This setting is required to build your package.",
                exits=1,
            )
    model_name = meta["lang"] + "_" + meta["name"]
    model_name_v = model_name + "-" + meta["version"]
    main_path = output_path / model_name_v
    package_path = main_path / model_name

    if package_path.exists():
        if force:
            shutil.rmtree(path2str(package_path))
        else:
            msg.fail(
                "Package directory already exists",
                "Please delete the directory and try again, or use the "
                "`--force` flag to overwrite existing "
                "directories.".format(path=path2str(package_path)),
                exits=1,
            )
    Path.mkdir(package_path, parents=True)
    shutil.copytree(path2str(input_path), path2str(package_path / model_name_v))
    create_file(main_path / "meta.json", srsly.json_dumps(meta, indent=2))
    create_file(main_path / "setup.py", TEMPLATE_SETUP)
    create_file(main_path / "MANIFEST.in", TEMPLATE_MANIFEST)
    create_file(package_path / "__init__.py", TEMPLATE_INIT)
    msg.good("Successfully created package '{}'".format(model_name_v), main_path)
    msg.text("To build the package, run `python setup.py sdist` in this directory.")
Пример #3
0
 def _save_model(epoch, is_temp=False):
     is_temp_str = ".temp" if is_temp else ""
     with model.use_params(optimizer.averages):
         with (output_dir / ("model%d%s.bin" % (epoch, is_temp_str))).open(
             "wb"
         ) as file_:
             file_.write(model.tok2vec.to_bytes())
         log = {
             "nr_word": tracker.nr_word,
             "loss": tracker.loss,
             "epoch_loss": tracker.epoch_loss,
             "epoch": epoch,
         }
         with (output_dir / "log.jsonl").open("a") as file_:
             file_.write(srsly.json_dumps(log) + "\n")
def callback():
    code = request.args.get("code")
    token_endpoint = google_config["token_endpoint"]
    token_url, headers, body = client.prepare_token_request(
        token_endpoint,
        authorization_response=request.url,
        redirect_url=request.base_url,
        code=code
    )
    token_response = requests.post(token_url, headers=headers, data=body, auth=(CLIENT_ID, CLIENT_SECRET))
    client.parse_request_body_response(json_dumps(token_response.json()))
    # Now get the user email
    userinfo_endpoint = google_config["userinfo_endpoint"]
    uri, headers, body = client.add_token(userinfo_endpoint)
    userinfo_response = requests.get(uri, headers=headers, data=body)
    print(userinfo_response.json())
Пример #5
0
    def to_bytes(self, *, exclude=tuple()):
        """Serialize the pipe to a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (bytes): The serialized object.

        DOCS: https://spacy.io/api/entitylinker#to_bytes
        """
        self._validate_serialization_attrs()
        serialize = {}
        if hasattr(self, "cfg") and self.cfg is not None:
            serialize["cfg"] = lambda: srsly.json_dumps(self.cfg)
        serialize["vocab"] = lambda: self.vocab.to_bytes(exclude=exclude)
        serialize["kb"] = self.kb.to_bytes
        serialize["model"] = self.model.to_bytes
        return util.to_bytes(serialize, exclude)
Пример #6
0
 def to_str(self) -> str:
     """Write the config to a string."""
     flattened = get_configparser()
     queue: List[Tuple[tuple, "Config"]] = [(tuple(), self)]
     for path, node in queue:
         for key, value in node.items():
             if hasattr(value, "items"):
                 queue.append((path + (key, ), value))
             else:
                 assert path
                 section_name = ".".join(path)
                 if not flattened.has_section(section_name):
                     flattened.add_section(section_name)
                 flattened.set(section_name, key, srsly.json_dumps(value))
     string_io = io.StringIO()
     flattened.write(string_io)
     return string_io.getvalue().strip()
Пример #7
0
def get_ner_stats(data: List[Example],
                  serialize: bool = False,
                  return_examples: bool = False) -> Union[NERStats, str, None]:
    """Compute statistics for NER data
    
    Args:
        data (List[Example]): Data as a List of examples
        serialize (bool, optional): Serialize to a JSON string for printing.
        return_examples (bool, optional): Whether to return examples per type
    
    Returns:
        Union[NERStats, str, None]: 
            List of examples or string if serialize and no_print are both True
    """
    annotations_per_type: DefaultDict[str, Any] = defaultdict(int)
    examples: DefaultDict[str, Any] = defaultdict(list)
    n_examples_no_entities = 0
    for e in data:
        if not e.spans:
            n_examples_no_entities += 1
            examples[NONE].append(e)
        else:
            for s in e.spans:
                annotations_per_type[s.label] += 1
                examples[s.label].append(e)

    sorted_anns_by_count = {
        a[0]: a[1]
        for a in sorted(
            annotations_per_type.items(), key=lambda x: x[1], reverse=True)
    }

    stats = NERStats(
        n_examples=len(data),
        n_examples_no_entities=n_examples_no_entities,
        n_annotations=sum(annotations_per_type.values()),
        n_annotations_per_type=sorted_anns_by_count,
    )
    if return_examples:
        stats.examples_with_type = examples

    if serialize:
        return srsly.json_dumps(stats.dict(), indent=4)
    else:
        return stats
Пример #8
0
 def _save_model(epoch, is_temp=False):
     is_temp_str = ".temp" if is_temp else ""
     with model.use_params(optimizer.averages):
         with (output_dir / ("model%d%s.bin" % (epoch, is_temp_str))).open(
             "wb"
         ) as file_:
             file_.write(model.tok2vec.to_bytes())
         with (output_dir / ("full_model%d%s.bin" % (epoch, is_temp_str))).open(
             "wb"
         ) as file_:
             file_.write(model.to_bytes())
         log = {
             "nr_word": tracker.nr_word,
             "loss": tracker.loss,
             "epoch_loss": tracker.epoch_loss,
             "epoch": epoch,
         }
         with (output_dir / "log.jsonl").open("a") as file_:
             file_.write(srsly.json_dumps(log) + "\n")
Пример #9
0
def try_dump_json(value: Any,
                  data: Union[Dict[str, dict], Config, str] = "") -> str:
    """Dump a config value as JSON and output user-friendly error if it fails."""
    # Special case if we have a variable: it's already a string so don't dump
    # to preserve ${x:y} vs. "${x:y}"
    if isinstance(value, str) and VARIABLE_RE.search(value):
        return value
    if isinstance(value, str) and value.replace(".", "", 1).isdigit():
        # Work around values that are strings but numbers
        value = f'"{value}"'
    try:
        return srsly.json_dumps(value)
    except Exception as e:
        err_msg = (
            f"Couldn't serialize config value of type {type(value)}: {e}. Make "
            f"sure all values in your config are JSON-serializable. If you want "
            f"to include Python objects, use a registered function that returns "
            f"the object instead.")
        raise ConfigValidationError(data, [], message=err_msg) from e
Пример #10
0
 def to_str(self) -> str:
     """Write the config to a string."""
     flattened = get_configparser()
     queue: List[Tuple[tuple, "Config"]] = [(tuple(), self)]
     for path, node in queue:
         section_name = ".".join(path)
         if path and path[-1] != "*" and not flattened.has_section(
                 section_name):
             # Always create sections for non-'*' sections, not only if
             # they have leaf entries, as we don't want to expand
             # blocks that are undefined
             flattened.add_section(section_name)
         for key, value in node.items():
             if hasattr(value, "items"):
                 queue.append((path + (key, ), value))
             else:
                 flattened.set(section_name, key, srsly.json_dumps(value))
     string_io = io.StringIO()
     flattened.write(string_io)
     return string_io.getvalue().strip()
Пример #11
0
    def to_bytes(self, exclude=tuple(), disable=None, **kwargs):
        """Serialize the current state to a binary string.

        exclude (list): Names of components or serialization fields to exclude.
        RETURNS (bytes): The serialized form of the `Language` object.

        DOCS: https://spacy.io/api/language#to_bytes
        """
        if disable is not None:
            deprecation_warning(Warnings.W014)
            exclude = disable
        serializers = OrderedDict()
        serializers["vocab"] = lambda: self.vocab.to_bytes()
        serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"])
        serializers["meta.json"] = lambda: srsly.json_dumps(self.meta)
        for name, proc in self.pipeline:
            if name in exclude:
                continue
            if not hasattr(proc, "to_bytes"):
                continue
            serializers[name] = lambda proc=proc: proc.to_bytes(exclude=["vocab"])
        exclude = util.get_serialization_exclude(serializers, exclude, kwargs)
        return util.to_bytes(serializers, exclude)
Пример #12
0
    def to_bytes(self, exclude=tuple(), disable=None, **kwargs):
        """Serialize the current state to a binary string.

        exclude (list): Names of components or serialization fields to exclude.
        RETURNS (bytes): The serialized form of the `Language` object.

        DOCS: https://spacy.io/api/language#to_bytes
        """
        if disable is not None:
            deprecation_warning(Warnings.W014)
            exclude = disable
        serializers = OrderedDict()
        serializers["vocab"] = lambda: self.vocab.to_bytes()
        serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"])
        serializers["meta.json"] = lambda: srsly.json_dumps(self.meta)
        for name, proc in self.pipeline:
            if name in exclude:
                continue
            if not hasattr(proc, "to_bytes"):
                continue
            serializers[name] = lambda proc=proc: proc.to_bytes(exclude=["vocab"])
        exclude = util.get_serialization_exclude(serializers, exclude, kwargs)
        return util.to_bytes(serializers, exclude)
Пример #13
0
def dataset_stats(data: List[Dict[str, object]], serialize=False):
    labels = defaultdict(int)
    examples = defaultdict(list)
    n_examples_no_entities = 0
    for e in data:
        if not e['spans']:
            n_examples_no_entities += 1
            examples['NONE'].append(e)
        else:
            for s in e['spans']:
                label = s['label']
                labels[label] += 1
                examples[label].append(e)

    res = {
        'n_examples': len(data),
        'n_examples_no_entities': n_examples_no_entities,
        'ents_per_type': labels
    }
    if serialize:
        return srsly.json_dumps(res, indent=4)
    else:
        res['examples_with_type'] = examples
        return res
Пример #14
0
def package(input_dir,
            output_dir,
            meta_path=None,
            create_meta=False,
            force=False):
    """
    Generate Python package for model data, including meta and required
    installation files. A new directory will be created in the specified
    output directory, and model data will be copied over. If --create-meta is
    set and a meta.json already exists in the output directory, the existing
    values will be used as the defaults in the command-line prompt.
    """
    msg = Printer()
    input_path = util.ensure_path(input_dir)
    output_path = util.ensure_path(output_dir)
    meta_path = util.ensure_path(meta_path)
    if not input_path or not input_path.exists():
        msg.fail("Can't locate model data", input_path, exits=1)
    if not output_path or not output_path.exists():
        msg.fail("Output directory not found", output_path, exits=1)
    if meta_path and not meta_path.exists():
        msg.fail("Can't find model meta.json", meta_path, exits=1)

    meta_path = meta_path or input_path / "meta.json"
    if meta_path.is_file():
        meta = srsly.read_json(meta_path)
        if not create_meta:  # only print if user doesn't want to overwrite
            msg.good("Loaded meta.json from file", meta_path)
        else:
            meta = generate_meta(input_dir, meta, msg)
    for key in ("lang", "name", "version"):
        if key not in meta or meta[key] == "":
            msg.fail(
                "No '{}' setting found in meta.json".format(key),
                "This setting is required to build your package.",
                exits=1,
            )
    model_name = meta["lang"] + "_" + meta["name"]
    model_name_v = model_name + "-" + meta["version"]
    main_path = output_path / model_name_v
    package_path = main_path / model_name

    if package_path.exists():
        if force:
            shutil.rmtree(path2str(package_path))
        else:
            msg.fail(
                "Package directory already exists",
                "Please delete the directory and try again, or use the "
                "`--force` flag to overwrite existing "
                "directories.".format(path=path2str(package_path)),
                exits=1,
            )
    Path.mkdir(package_path, parents=True)
    shutil.copytree(path2str(input_path),
                    path2str(package_path / model_name_v))
    create_file(main_path / "meta.json", srsly.json_dumps(meta, indent=2))
    create_file(main_path / "setup.py", TEMPLATE_SETUP)
    create_file(main_path / "MANIFEST.in", TEMPLATE_MANIFEST)
    create_file(package_path / "__init__.py", TEMPLATE_INIT)
    msg.good("Successfully created package '{}'".format(model_name_v),
             main_path)
    msg.text(
        "To build the package, run `python setup.py sdist` in this directory.")
 def to_bytes(self, exclude=tuple(), disable=None, **kwargs):
     return srsly.msgpack_dumps({"dummy": srsly.json_dumps(None)})
Пример #16
0
def create_wikigraph(
    output_path: Path,
    wiki="en",
    version="latest",
    dumps_path: Path = None,
    max_workers: int = None,
    silent: bool = None,
    force: bool = None,
):
    """
    Create a `WikiGraph` from a specific dump.

    It can then be used by directly loading it, or
    it can be packaged with the `package-wikigraph` command.

    Parameters
    ----------
    output_path : Path
        Path in which to store the `WikiGraph`.
    wiki : str, optional
        Wikipedia dump type to use, by default "en".
    version : str, optional
        Wikipedia dump version to use, by default "latest".
    dumps_path : Path, optional
        Path in which to find previously downloaded dumps,
        or where to save dumps downloaded in this call, by default None.
    max_workers : int, optional
        Maximum number of processes to use, by default None.
    silent : bool, optional
        Do not print anything in stout, by default None.
    force : bool, optional
        Overwrite content in output_path, if any, by default None.
    """
    if not output_path.exists():
        output_path.mkdir()
        msg.good(f"Created output directory: {output_path}")
    graph_name = f"{wiki}wiki_core"
    graph_path = output_path.joinpath(graph_name)
    if not force and graph_path.exists():
        msg.fail(
            f"Output path already contains {graph_name} directory",
            "Use --force to overwrite it",
            exits=1,
        )
    kwargs = {
        "dumps_path": dumps_path,
        "max_workers": max_workers,
        "wiki": wiki,
        "version": version,
        "verbose": not silent,
    }
    wg = WikiGraph.build(**kwargs)
    if not graph_path.exists():
        graph_path.mkdir()
    graph_format = "picklez"
    with msg.loading("dump to disk..."):
        wg.dump(graph_path, graph_format=graph_format)
    meta = get_meta()
    meta["name"] = graph_name
    meta["version"] = wg.version
    meta["graph_format"] = graph_format
    meta["spikex_version"] = f">={spikex_version}"
    meta["fullname"] = f"{graph_name}-{spikex_version}"
    meta["sources"].append("Wikipedia")
    meta_path = graph_path.joinpath("meta.json")
    meta_path.write_text(json_dumps(meta, indent=2))
    msg.good(f"Successfully created {graph_name}.")
Пример #17
0
def pretrain(
    texts_loc,
    vectors_model,
    output_dir,
    width=96,
    depth=4,
    embed_rows=2000,
    loss_func="cosine",
    use_vectors=False,
    dropout=0.2,
    nr_iter=1000,
    batch_size=3000,
    max_length=500,
    min_length=5,
    seed=0,
):
    """
    Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components,
    using an approximate language-modelling objective. Specifically, we load
    pre-trained vectors, and train a component like a CNN, BiLSTM, etc to predict
    vectors which match the pre-trained ones. The weights are saved to a directory
    after each epoch. You can then pass a path to one of these pre-trained weights
    files to the 'spacy train' command.

    This technique may be especially helpful if you have little labelled data.
    However, it's still quite experimental, so your mileage may vary.

    To load the weights back in during 'spacy train', you need to ensure
    all settings are the same between pretraining and training. The API and
    errors around this need some improvement.
    """
    config = dict(locals())
    msg = Printer()
    util.fix_random_seed(seed)

    has_gpu = prefer_gpu()
    msg.info("Using GPU" if has_gpu else "Not using GPU")

    output_dir = Path(output_dir)
    if not output_dir.exists():
        output_dir.mkdir()
        msg.good("Created output directory")
    srsly.write_json(output_dir / "config.json", config)
    msg.good("Saved settings to config.json")

    # Load texts from file or stdin
    if texts_loc != "-":  # reading from a file
        texts_loc = Path(texts_loc)
        if not texts_loc.exists():
            msg.fail("Input text file doesn't exist", texts_loc, exits=1)
        with msg.loading("Loading input texts..."):
            texts = list(srsly.read_jsonl(texts_loc))
        msg.good("Loaded input texts")
        random.shuffle(texts)
    else:  # reading from stdin
        msg.text("Reading input text from stdin...")
        texts = srsly.read_jsonl("-")

    with msg.loading("Loading model '{}'...".format(vectors_model)):
        nlp = util.load_model(vectors_model)
    msg.good("Loaded model '{}'".format(vectors_model))
    pretrained_vectors = None if not use_vectors else nlp.vocab.vectors.name
    model = create_pretraining_model(
        nlp,
        Tok2Vec(
            width,
            embed_rows,
            conv_depth=depth,
            pretrained_vectors=pretrained_vectors,
            bilstm_depth=0,  # Requires PyTorch. Experimental.
            cnn_maxout_pieces=3,  # You can try setting this higher
            subword_features=True,  # Set to False for Chinese etc
        ),
    )
    optimizer = create_default_optimizer(model.ops)
    tracker = ProgressTracker(frequency=10000)
    msg.divider("Pre-training tok2vec layer")
    row_settings = {
        "widths": (3, 10, 10, 6, 4),
        "aligns": ("r", "r", "r", "r", "r")
    }
    msg.row(("#", "# Words", "Total Loss", "Loss", "w/s"), **row_settings)
    for epoch in range(nr_iter):
        for batch in util.minibatch_by_words(((text, None) for text in texts),
                                             size=batch_size):
            docs = make_docs(
                nlp,
                [text for (text, _) in batch],
                max_length=max_length,
                min_length=min_length,
            )
            loss = make_update(model,
                               docs,
                               optimizer,
                               objective=loss_func,
                               drop=dropout)
            progress = tracker.update(epoch, loss, docs)
            if progress:
                msg.row(progress, **row_settings)
                if texts_loc == "-" and tracker.words_per_epoch[epoch] >= 10**7:
                    break
        with model.use_params(optimizer.averages):
            with (output_dir / ("model%d.bin" % epoch)).open("wb") as file_:
                file_.write(model.tok2vec.to_bytes())
            log = {
                "nr_word": tracker.nr_word,
                "loss": tracker.loss,
                "epoch_loss": tracker.epoch_loss,
                "epoch": epoch,
            }
            with (output_dir / "log.jsonl").open("a") as file_:
                file_.write(srsly.json_dumps(log) + "\n")
        tracker.epoch_loss = 0.0
        if texts_loc != "-":
            # Reshuffle the texts if texts were loaded from a file
            random.shuffle(texts)
Пример #18
0
def main(path):
    reddit = Reddit(path)
    for comment in reddit:
        print(srsly.json_dumps(comment))
Пример #19
0
def get_emails():
    result = schema.execute(default_query)
    return srsly.json_dumps(result.data)
Пример #20
0
 def to_bytes(self, **kwargs) -> bytes:
     serializers = {"cfg": lambda: srsly.json_dumps(self._get_config())}
     return util.to_bytes(serializers, [])
Пример #21
0
def main(path):
    reddit = Reddit(path)
    for comment in reddit:
        print(srsly.json_dumps(comment))
Пример #22
0
def package(
    input_dir: Path,
    output_dir: Path,
    meta_path: Optional[Path] = None,
    code_paths: List[Path] = [],
    name: Optional[str] = None,
    version: Optional[str] = None,
    create_meta: bool = False,
    create_sdist: bool = True,
    create_wheel: bool = False,
    force: bool = False,
    silent: bool = True,
) -> None:
    msg = Printer(no_print=silent, pretty=not silent)
    input_path = util.ensure_path(input_dir)
    output_path = util.ensure_path(output_dir)
    meta_path = util.ensure_path(meta_path)
    if create_wheel and not has_wheel():
        err = "Generating a binary .whl file requires wheel to be installed"
        msg.fail(err, "pip install wheel", exits=1)
    if not input_path or not input_path.exists():
        msg.fail("Can't locate pipeline data", input_path, exits=1)
    if not output_path or not output_path.exists():
        msg.fail("Output directory not found", output_path, exits=1)
    if create_sdist or create_wheel:
        opts = ["sdist" if create_sdist else "", "wheel" if create_wheel else ""]
        msg.info(f"Building package artifacts: {', '.join(opt for opt in opts if opt)}")
    for code_path in code_paths:
        if not code_path.exists():
            msg.fail("Can't find code file", code_path, exits=1)
        # Import the code here so it's available when model is loaded (via
        # get_meta helper). Also verifies that everything works
        util.import_file(code_path.stem, code_path)
    if code_paths:
        msg.good(f"Including {len(code_paths)} Python module(s) with custom code")
    if meta_path and not meta_path.exists():
        msg.fail("Can't find pipeline meta.json", meta_path, exits=1)
    meta_path = meta_path or input_dir / "meta.json"
    if not meta_path.exists() or not meta_path.is_file():
        msg.fail("Can't load pipeline meta.json", meta_path, exits=1)
    meta = srsly.read_json(meta_path)
    meta = get_meta(input_dir, meta)
    if meta["requirements"]:
        msg.good(
            f"Including {len(meta['requirements'])} package requirement(s) from "
            f"meta and config",
            ", ".join(meta["requirements"]),
        )
    if name is not None:
        if not name.isidentifier():
            msg.fail(
                f"Model name ('{name}') is not a valid module name. "
                "This is required so it can be imported as a module.",
                "We recommend names that use ASCII A-Z, a-z, _ (underscore), "
                "and 0-9. "
                "For specific details see: https://docs.python.org/3/reference/lexical_analysis.html#identifiers",
                exits=1,
            )
        if not _is_permitted_package_name(name):
            msg.fail(
                f"Model name ('{name}') is not a permitted package name. "
                "This is required to correctly load the model with spacy.load.",
                "We recommend names that use ASCII A-Z, a-z, _ (underscore), "
                "and 0-9. "
                "For specific details see: https://www.python.org/dev/peps/pep-0426/#name",
                exits=1,
            )
        meta["name"] = name
    if version is not None:
        meta["version"] = version
    if not create_meta:  # only print if user doesn't want to overwrite
        msg.good("Loaded meta.json from file", meta_path)
    else:
        meta = generate_meta(meta, msg)
    errors = validate(ModelMetaSchema, meta)
    if errors:
        msg.fail("Invalid pipeline meta.json")
        print("\n".join(errors))
        sys.exit(1)
    model_name = meta["name"]
    if not model_name.startswith(meta["lang"] + "_"):
        model_name = f"{meta['lang']}_{model_name}"
    model_name_v = model_name + "-" + meta["version"]
    main_path = output_dir / model_name_v
    package_path = main_path / model_name
    if package_path.exists():
        if force:
            shutil.rmtree(str(package_path))
        else:
            msg.fail(
                "Package directory already exists",
                "Please delete the directory and try again, or use the "
                "`--force` flag to overwrite existing directories.",
                exits=1,
            )
    Path.mkdir(package_path, parents=True)
    shutil.copytree(str(input_dir), str(package_path / model_name_v))
    for file_name in FILENAMES_DOCS:
        file_path = package_path / model_name_v / file_name
        if file_path.exists():
            shutil.copy(str(file_path), str(main_path))
    readme_path = main_path / "README.md"
    if not readme_path.exists():
        readme = generate_readme(meta)
        create_file(readme_path, readme)
        create_file(package_path / model_name_v / "README.md", readme)
        msg.good("Generated README.md from meta.json")
    else:
        msg.info("Using existing README.md from pipeline directory")
    imports = []
    for code_path in code_paths:
        imports.append(code_path.stem)
        shutil.copy(str(code_path), str(package_path))
    create_file(main_path / "meta.json", srsly.json_dumps(meta, indent=2))
    create_file(main_path / "setup.py", TEMPLATE_SETUP)
    create_file(main_path / "MANIFEST.in", TEMPLATE_MANIFEST)
    init_py = TEMPLATE_INIT.format(
        imports="\n".join(f"from . import {m}" for m in imports)
    )
    create_file(package_path / "__init__.py", init_py)
    msg.good(f"Successfully created package directory '{model_name_v}'", main_path)
    if create_sdist:
        with util.working_dir(main_path):
            util.run_command([sys.executable, "setup.py", "sdist"], capture=False)
        zip_file = main_path / "dist" / f"{model_name_v}{SDIST_SUFFIX}"
        msg.good(f"Successfully created zipped Python package", zip_file)
    if create_wheel:
        with util.working_dir(main_path):
            util.run_command([sys.executable, "setup.py", "bdist_wheel"], capture=False)
        wheel_name_squashed = re.sub("_+", "_", model_name_v)
        wheel = main_path / "dist" / f"{wheel_name_squashed}{WHEEL_SUFFIX}"
        msg.good(f"Successfully created binary wheel", wheel)
    if "__" in model_name:
        msg.warn(
            f"Model name ('{model_name}') contains a run of underscores. "
            "Runs of underscores are not significant in installed package names.",
        )
Пример #23
0
def json_dumps(data, indent=0, sort_keys=False):
    return srsly.json_dumps(data, indent, sort_keys)
Пример #24
0
def package(
    input_dir: Path,
    output_dir: Path,
    meta_path: Optional[Path] = None,
    code_paths: List[Path] = [],
    name: Optional[str] = None,
    version: Optional[str] = None,
    create_meta: bool = False,
    create_sdist: bool = True,
    create_wheel: bool = False,
    force: bool = False,
    silent: bool = True,
) -> None:
    msg = Printer(no_print=silent, pretty=not silent)
    input_path = util.ensure_path(input_dir)
    output_path = util.ensure_path(output_dir)
    meta_path = util.ensure_path(meta_path)
    if create_wheel and not has_wheel():
        err = "Generating a binary .whl file requires wheel to be installed"
        msg.fail(err, "pip install wheel", exits=1)
    if not input_path or not input_path.exists():
        msg.fail("Can't locate pipeline data", input_path, exits=1)
    if not output_path or not output_path.exists():
        msg.fail("Output directory not found", output_path, exits=1)
    if create_sdist or create_wheel:
        opts = [
            "sdist" if create_sdist else "", "wheel" if create_wheel else ""
        ]
        msg.info(
            f"Building package artifacts: {', '.join(opt for opt in opts if opt)}"
        )
    for code_path in code_paths:
        if not code_path.exists():
            msg.fail("Can't find code file", code_path, exits=1)
        # Import the code here so it's available when model is loaded (via
        # get_meta helper). Also verifies that everything works
        util.import_file(code_path.stem, code_path)
    if code_paths:
        msg.good(
            f"Including {len(code_paths)} Python module(s) with custom code")
    if meta_path and not meta_path.exists():
        msg.fail("Can't find pipeline meta.json", meta_path, exits=1)
    meta_path = meta_path or input_dir / "meta.json"
    if not meta_path.exists() or not meta_path.is_file():
        msg.fail("Can't load pipeline meta.json", meta_path, exits=1)
    meta = srsly.read_json(meta_path)
    meta = get_meta(input_dir, meta)
    if name is not None:
        meta["name"] = name
    if version is not None:
        meta["version"] = version
    if not create_meta:  # only print if user doesn't want to overwrite
        msg.good("Loaded meta.json from file", meta_path)
    else:
        meta = generate_meta(meta, msg)
    errors = validate(ModelMetaSchema, meta)
    if errors:
        msg.fail("Invalid pipeline meta.json")
        print("\n".join(errors))
        sys.exit(1)
    model_name = meta["lang"] + "_" + meta["name"]
    model_name_v = model_name + "-" + meta["version"]
    main_path = output_dir / model_name_v
    package_path = main_path / model_name
    if package_path.exists():
        if force:
            shutil.rmtree(str(package_path))
        else:
            msg.fail(
                "Package directory already exists",
                "Please delete the directory and try again, or use the "
                "`--force` flag to overwrite existing directories.",
                exits=1,
            )
    Path.mkdir(package_path, parents=True)
    shutil.copytree(str(input_dir), str(package_path / model_name_v))
    license_path = package_path / model_name_v / "LICENSE"
    if license_path.exists():
        shutil.move(str(license_path), str(main_path))
    imports = []
    for code_path in code_paths:
        imports.append(code_path.stem)
        shutil.copy(str(code_path), str(package_path))
    create_file(main_path / "meta.json", srsly.json_dumps(meta, indent=2))
    create_file(main_path / "setup.py", TEMPLATE_SETUP)
    create_file(main_path / "MANIFEST.in", TEMPLATE_MANIFEST)
    init_py = TEMPLATE_INIT.format(imports="\n".join(f"from . import {m}"
                                                     for m in imports))
    create_file(package_path / "__init__.py", init_py)
    msg.good(f"Successfully created package '{model_name_v}'", main_path)
    if create_sdist:
        with util.working_dir(main_path):
            util.run_command([sys.executable, "setup.py", "sdist"],
                             capture=False)
        zip_file = main_path / "dist" / f"{model_name_v}{SDIST_SUFFIX}"
        msg.good(f"Successfully created zipped Python package", zip_file)
    if create_wheel:
        with util.working_dir(main_path):
            util.run_command([sys.executable, "setup.py", "bdist_wheel"],
                             capture=False)
        wheel = main_path / "dist" / f"{model_name_v}{WHEEL_SUFFIX}"
        msg.good(f"Successfully created binary wheel", wheel)
Пример #25
0
def dqn_training(num_episodes, max_steps=500, display_action=False):
    """
    num_episodes: int 
        number of episodes
    visualize_plt: bool
        if true, display the cartpole action in the notebook
        if false (default), display the episodes x durations graph
    """
    score_history = scoreAverage(report_mean_score_over_n)
    for i_episode in range(num_episodes):
        # Initialize the environment and state
        env.reset()
        last_screen = get_screen()
        current_screen = get_screen()
        state = current_screen - last_screen
        # state = get_screen().to(device)
        total_reward = 0
        episode_start_time = datetime.now()
        statememory = []
        for t in count():
            # initialise state memory

            # Select and perform an action
            # action = select_action(state, SELECT_ACTION_BIAS_LIST)
            action = select_action(state, SELECT_ACTION_BIAS_LIST)
            # "action", action)
            if display_action:
                print("action: ", action.squeeze())
            _, reward, done, info = env.step(action)

            # ('action', 'reward', 'info', 'done')
            this_state = {
                "action": action.data[0].item(),
                "reward": reward[1],
                "scaled_reward": reward[0],
                "info": info,
                "done": done
            }

            statememory.append(this_state)
            total_reward += reward[1]

            reward = torch.tensor([reward[0]], device=device)

            # Observe new state
            last_screen = current_screen
            current_screen = get_screen().to(device)
            if not done:
                next_state = current_screen
                next_state = next_state
            else:
                next_state = None

            # Store the transition in memory
            memory.push(state, action, next_state, reward)

            # Move to the next state
            state = next_state

            # Perform one step of the optimization (on the target network)
            optimize_model()
            if done or t > max_steps:
                episode_end_time = datetime.now()
                episode_time = (episode_end_time -
                                episode_start_time).total_seconds()
                score_history.push(total_reward)
                # floyd metrics
                print(
                    f'{{"metric": "score", "value": {total_reward}, "epoch": {i_episode+1}}}'
                )
                print(
                    f'{{"metric": "rolling mean score", "value": {score_history.mean()}, "epoch": {i_episode+1}}}'
                )
                print(
                    f'{{"metric": "steps this episode", "value": {t}, "epoch": {i_episode+1}}}'
                )
                print(
                    f'{{"metric": "episode duration", "value": {episode_time}, "epoch": {i_episode+1}}}'
                )
                print(
                    f'{{"metric": "steps per second", "value": {float(t) / float(episode_time)}, "epoch": {i_episode+1}}}'
                )

                # paperspace
                # {"chart": "<identifier>", "y": <value>, "x": <value>}
                print(
                    f'{{"chart": "score", "y": {total_reward}, "x": {i_episode+1}}}'
                )
                print(
                    f'{{"chart": "rolling_mean_score", "y": {score_history.mean()}, "x": {i_episode+1}}}'
                )
                print(
                    f'{{"chart": "steps_this_episode", "y": {t}, "x": {i_episode+1}}}'
                )
                print(
                    f'{{"chart": "episode_duration", "y": {episode_time}, "x": {i_episode+1}}}'
                )
                print(
                    f'{{"chart": "steps_per_second", "y": {float(t) / float(episode_time)}, "x": {i_episode+1}}}'
                )
                filename = os.path.join(
                    STATE_DIR,
                    f'gamedata-{GAME_NAME}-{LEVEL}-{(i_episode+1):06}.json')
                print(f"Writing game history to '{filename}'")
                with open(filename, "w") as f:
                    f.write(json_dumps(statememory))
                    f.close()

                break

        # Update the target network
        if i_episode % TARGET_UPDATE == 0:
            target_net.load_state_dict(policy_net.state_dict())

    print('Completed training')

    # env.render(close=True)
    env.close()
Пример #26
0
 def to_bytes(self, **_kwargs):
     serializers = OrderedDict(
         (("cfg", lambda: srsly.json_dumps(self._get_config())), ))
     return util.to_bytes(serializers, [])
Пример #27
0
 def to_bytes(self, **kwargs):
     serializers = {
         "cfg": lambda: srsly.json_dumps(self.cfg),
     }
     return util.to_bytes(serializers, [])