def to_disk(self, path, exclude=tuple(), disable=None): """Save the current state to a directory. If a model is loaded, this will include the model. path (unicode or Path): Path to a directory, which will be created if it doesn't exist. exclude (list): Names of components or serialization fields to exclude. DOCS: https://spacy.io/api/language#to_disk """ if disable is not None: deprecation_warning(Warnings.W014) exclude = disable path = util.ensure_path(path) serializers = OrderedDict() serializers["tokenizer"] = lambda p: self.tokenizer.to_disk(p, exclude=["vocab"]) serializers["meta.json"] = lambda p: p.open("w").write(srsly.json_dumps(self.meta)) for name, proc in self.pipeline: if not hasattr(proc, "name"): continue if name in exclude: continue if not hasattr(proc, "to_disk"): continue serializers[name] = lambda p, proc=proc: proc.to_disk(p, exclude=["vocab"]) serializers["vocab"] = lambda p: self.vocab.to_disk(p) util.to_disk(path, serializers, exclude)
def package(input_dir, output_dir, meta_path=None, create_meta=False, force=False): """ Generate Python package for model data, including meta and required installation files. A new directory will be created in the specified output directory, and model data will be copied over. If --create-meta is set and a meta.json already exists in the output directory, the existing values will be used as the defaults in the command-line prompt. """ msg = Printer() input_path = util.ensure_path(input_dir) output_path = util.ensure_path(output_dir) meta_path = util.ensure_path(meta_path) if not input_path or not input_path.exists(): msg.fail("Can't locate model data", input_path, exits=1) if not output_path or not output_path.exists(): msg.fail("Output directory not found", output_path, exits=1) if meta_path and not meta_path.exists(): msg.fail("Can't find model meta.json", meta_path, exits=1) meta_path = meta_path or input_path / "meta.json" if meta_path.is_file(): meta = srsly.read_json(meta_path) if not create_meta: # only print if user doesn't want to overwrite msg.good("Loaded meta.json from file", meta_path) else: meta = generate_meta(input_dir, meta, msg) for key in ("lang", "name", "version"): if key not in meta or meta[key] == "": msg.fail( "No '{}' setting found in meta.json".format(key), "This setting is required to build your package.", exits=1, ) model_name = meta["lang"] + "_" + meta["name"] model_name_v = model_name + "-" + meta["version"] main_path = output_path / model_name_v package_path = main_path / model_name if package_path.exists(): if force: shutil.rmtree(path2str(package_path)) else: msg.fail( "Package directory already exists", "Please delete the directory and try again, or use the " "`--force` flag to overwrite existing " "directories.".format(path=path2str(package_path)), exits=1, ) Path.mkdir(package_path, parents=True) shutil.copytree(path2str(input_path), path2str(package_path / model_name_v)) create_file(main_path / "meta.json", srsly.json_dumps(meta, indent=2)) create_file(main_path / "setup.py", TEMPLATE_SETUP) create_file(main_path / "MANIFEST.in", TEMPLATE_MANIFEST) create_file(package_path / "__init__.py", TEMPLATE_INIT) msg.good("Successfully created package '{}'".format(model_name_v), main_path) msg.text("To build the package, run `python setup.py sdist` in this directory.")
def _save_model(epoch, is_temp=False): is_temp_str = ".temp" if is_temp else "" with model.use_params(optimizer.averages): with (output_dir / ("model%d%s.bin" % (epoch, is_temp_str))).open( "wb" ) as file_: file_.write(model.tok2vec.to_bytes()) log = { "nr_word": tracker.nr_word, "loss": tracker.loss, "epoch_loss": tracker.epoch_loss, "epoch": epoch, } with (output_dir / "log.jsonl").open("a") as file_: file_.write(srsly.json_dumps(log) + "\n")
def callback(): code = request.args.get("code") token_endpoint = google_config["token_endpoint"] token_url, headers, body = client.prepare_token_request( token_endpoint, authorization_response=request.url, redirect_url=request.base_url, code=code ) token_response = requests.post(token_url, headers=headers, data=body, auth=(CLIENT_ID, CLIENT_SECRET)) client.parse_request_body_response(json_dumps(token_response.json())) # Now get the user email userinfo_endpoint = google_config["userinfo_endpoint"] uri, headers, body = client.add_token(userinfo_endpoint) userinfo_response = requests.get(uri, headers=headers, data=body) print(userinfo_response.json())
def to_bytes(self, *, exclude=tuple()): """Serialize the pipe to a bytestring. exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (bytes): The serialized object. DOCS: https://spacy.io/api/entitylinker#to_bytes """ self._validate_serialization_attrs() serialize = {} if hasattr(self, "cfg") and self.cfg is not None: serialize["cfg"] = lambda: srsly.json_dumps(self.cfg) serialize["vocab"] = lambda: self.vocab.to_bytes(exclude=exclude) serialize["kb"] = self.kb.to_bytes serialize["model"] = self.model.to_bytes return util.to_bytes(serialize, exclude)
def to_str(self) -> str: """Write the config to a string.""" flattened = get_configparser() queue: List[Tuple[tuple, "Config"]] = [(tuple(), self)] for path, node in queue: for key, value in node.items(): if hasattr(value, "items"): queue.append((path + (key, ), value)) else: assert path section_name = ".".join(path) if not flattened.has_section(section_name): flattened.add_section(section_name) flattened.set(section_name, key, srsly.json_dumps(value)) string_io = io.StringIO() flattened.write(string_io) return string_io.getvalue().strip()
def get_ner_stats(data: List[Example], serialize: bool = False, return_examples: bool = False) -> Union[NERStats, str, None]: """Compute statistics for NER data Args: data (List[Example]): Data as a List of examples serialize (bool, optional): Serialize to a JSON string for printing. return_examples (bool, optional): Whether to return examples per type Returns: Union[NERStats, str, None]: List of examples or string if serialize and no_print are both True """ annotations_per_type: DefaultDict[str, Any] = defaultdict(int) examples: DefaultDict[str, Any] = defaultdict(list) n_examples_no_entities = 0 for e in data: if not e.spans: n_examples_no_entities += 1 examples[NONE].append(e) else: for s in e.spans: annotations_per_type[s.label] += 1 examples[s.label].append(e) sorted_anns_by_count = { a[0]: a[1] for a in sorted( annotations_per_type.items(), key=lambda x: x[1], reverse=True) } stats = NERStats( n_examples=len(data), n_examples_no_entities=n_examples_no_entities, n_annotations=sum(annotations_per_type.values()), n_annotations_per_type=sorted_anns_by_count, ) if return_examples: stats.examples_with_type = examples if serialize: return srsly.json_dumps(stats.dict(), indent=4) else: return stats
def _save_model(epoch, is_temp=False): is_temp_str = ".temp" if is_temp else "" with model.use_params(optimizer.averages): with (output_dir / ("model%d%s.bin" % (epoch, is_temp_str))).open( "wb" ) as file_: file_.write(model.tok2vec.to_bytes()) with (output_dir / ("full_model%d%s.bin" % (epoch, is_temp_str))).open( "wb" ) as file_: file_.write(model.to_bytes()) log = { "nr_word": tracker.nr_word, "loss": tracker.loss, "epoch_loss": tracker.epoch_loss, "epoch": epoch, } with (output_dir / "log.jsonl").open("a") as file_: file_.write(srsly.json_dumps(log) + "\n")
def try_dump_json(value: Any, data: Union[Dict[str, dict], Config, str] = "") -> str: """Dump a config value as JSON and output user-friendly error if it fails.""" # Special case if we have a variable: it's already a string so don't dump # to preserve ${x:y} vs. "${x:y}" if isinstance(value, str) and VARIABLE_RE.search(value): return value if isinstance(value, str) and value.replace(".", "", 1).isdigit(): # Work around values that are strings but numbers value = f'"{value}"' try: return srsly.json_dumps(value) except Exception as e: err_msg = ( f"Couldn't serialize config value of type {type(value)}: {e}. Make " f"sure all values in your config are JSON-serializable. If you want " f"to include Python objects, use a registered function that returns " f"the object instead.") raise ConfigValidationError(data, [], message=err_msg) from e
def to_str(self) -> str: """Write the config to a string.""" flattened = get_configparser() queue: List[Tuple[tuple, "Config"]] = [(tuple(), self)] for path, node in queue: section_name = ".".join(path) if path and path[-1] != "*" and not flattened.has_section( section_name): # Always create sections for non-'*' sections, not only if # they have leaf entries, as we don't want to expand # blocks that are undefined flattened.add_section(section_name) for key, value in node.items(): if hasattr(value, "items"): queue.append((path + (key, ), value)) else: flattened.set(section_name, key, srsly.json_dumps(value)) string_io = io.StringIO() flattened.write(string_io) return string_io.getvalue().strip()
def to_bytes(self, exclude=tuple(), disable=None, **kwargs): """Serialize the current state to a binary string. exclude (list): Names of components or serialization fields to exclude. RETURNS (bytes): The serialized form of the `Language` object. DOCS: https://spacy.io/api/language#to_bytes """ if disable is not None: deprecation_warning(Warnings.W014) exclude = disable serializers = OrderedDict() serializers["vocab"] = lambda: self.vocab.to_bytes() serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"]) serializers["meta.json"] = lambda: srsly.json_dumps(self.meta) for name, proc in self.pipeline: if name in exclude: continue if not hasattr(proc, "to_bytes"): continue serializers[name] = lambda proc=proc: proc.to_bytes(exclude=["vocab"]) exclude = util.get_serialization_exclude(serializers, exclude, kwargs) return util.to_bytes(serializers, exclude)
def dataset_stats(data: List[Dict[str, object]], serialize=False): labels = defaultdict(int) examples = defaultdict(list) n_examples_no_entities = 0 for e in data: if not e['spans']: n_examples_no_entities += 1 examples['NONE'].append(e) else: for s in e['spans']: label = s['label'] labels[label] += 1 examples[label].append(e) res = { 'n_examples': len(data), 'n_examples_no_entities': n_examples_no_entities, 'ents_per_type': labels } if serialize: return srsly.json_dumps(res, indent=4) else: res['examples_with_type'] = examples return res
def package(input_dir, output_dir, meta_path=None, create_meta=False, force=False): """ Generate Python package for model data, including meta and required installation files. A new directory will be created in the specified output directory, and model data will be copied over. If --create-meta is set and a meta.json already exists in the output directory, the existing values will be used as the defaults in the command-line prompt. """ msg = Printer() input_path = util.ensure_path(input_dir) output_path = util.ensure_path(output_dir) meta_path = util.ensure_path(meta_path) if not input_path or not input_path.exists(): msg.fail("Can't locate model data", input_path, exits=1) if not output_path or not output_path.exists(): msg.fail("Output directory not found", output_path, exits=1) if meta_path and not meta_path.exists(): msg.fail("Can't find model meta.json", meta_path, exits=1) meta_path = meta_path or input_path / "meta.json" if meta_path.is_file(): meta = srsly.read_json(meta_path) if not create_meta: # only print if user doesn't want to overwrite msg.good("Loaded meta.json from file", meta_path) else: meta = generate_meta(input_dir, meta, msg) for key in ("lang", "name", "version"): if key not in meta or meta[key] == "": msg.fail( "No '{}' setting found in meta.json".format(key), "This setting is required to build your package.", exits=1, ) model_name = meta["lang"] + "_" + meta["name"] model_name_v = model_name + "-" + meta["version"] main_path = output_path / model_name_v package_path = main_path / model_name if package_path.exists(): if force: shutil.rmtree(path2str(package_path)) else: msg.fail( "Package directory already exists", "Please delete the directory and try again, or use the " "`--force` flag to overwrite existing " "directories.".format(path=path2str(package_path)), exits=1, ) Path.mkdir(package_path, parents=True) shutil.copytree(path2str(input_path), path2str(package_path / model_name_v)) create_file(main_path / "meta.json", srsly.json_dumps(meta, indent=2)) create_file(main_path / "setup.py", TEMPLATE_SETUP) create_file(main_path / "MANIFEST.in", TEMPLATE_MANIFEST) create_file(package_path / "__init__.py", TEMPLATE_INIT) msg.good("Successfully created package '{}'".format(model_name_v), main_path) msg.text( "To build the package, run `python setup.py sdist` in this directory.")
def to_bytes(self, exclude=tuple(), disable=None, **kwargs): return srsly.msgpack_dumps({"dummy": srsly.json_dumps(None)})
def create_wikigraph( output_path: Path, wiki="en", version="latest", dumps_path: Path = None, max_workers: int = None, silent: bool = None, force: bool = None, ): """ Create a `WikiGraph` from a specific dump. It can then be used by directly loading it, or it can be packaged with the `package-wikigraph` command. Parameters ---------- output_path : Path Path in which to store the `WikiGraph`. wiki : str, optional Wikipedia dump type to use, by default "en". version : str, optional Wikipedia dump version to use, by default "latest". dumps_path : Path, optional Path in which to find previously downloaded dumps, or where to save dumps downloaded in this call, by default None. max_workers : int, optional Maximum number of processes to use, by default None. silent : bool, optional Do not print anything in stout, by default None. force : bool, optional Overwrite content in output_path, if any, by default None. """ if not output_path.exists(): output_path.mkdir() msg.good(f"Created output directory: {output_path}") graph_name = f"{wiki}wiki_core" graph_path = output_path.joinpath(graph_name) if not force and graph_path.exists(): msg.fail( f"Output path already contains {graph_name} directory", "Use --force to overwrite it", exits=1, ) kwargs = { "dumps_path": dumps_path, "max_workers": max_workers, "wiki": wiki, "version": version, "verbose": not silent, } wg = WikiGraph.build(**kwargs) if not graph_path.exists(): graph_path.mkdir() graph_format = "picklez" with msg.loading("dump to disk..."): wg.dump(graph_path, graph_format=graph_format) meta = get_meta() meta["name"] = graph_name meta["version"] = wg.version meta["graph_format"] = graph_format meta["spikex_version"] = f">={spikex_version}" meta["fullname"] = f"{graph_name}-{spikex_version}" meta["sources"].append("Wikipedia") meta_path = graph_path.joinpath("meta.json") meta_path.write_text(json_dumps(meta, indent=2)) msg.good(f"Successfully created {graph_name}.")
def pretrain( texts_loc, vectors_model, output_dir, width=96, depth=4, embed_rows=2000, loss_func="cosine", use_vectors=False, dropout=0.2, nr_iter=1000, batch_size=3000, max_length=500, min_length=5, seed=0, ): """ Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components, using an approximate language-modelling objective. Specifically, we load pre-trained vectors, and train a component like a CNN, BiLSTM, etc to predict vectors which match the pre-trained ones. The weights are saved to a directory after each epoch. You can then pass a path to one of these pre-trained weights files to the 'spacy train' command. This technique may be especially helpful if you have little labelled data. However, it's still quite experimental, so your mileage may vary. To load the weights back in during 'spacy train', you need to ensure all settings are the same between pretraining and training. The API and errors around this need some improvement. """ config = dict(locals()) msg = Printer() util.fix_random_seed(seed) has_gpu = prefer_gpu() msg.info("Using GPU" if has_gpu else "Not using GPU") output_dir = Path(output_dir) if not output_dir.exists(): output_dir.mkdir() msg.good("Created output directory") srsly.write_json(output_dir / "config.json", config) msg.good("Saved settings to config.json") # Load texts from file or stdin if texts_loc != "-": # reading from a file texts_loc = Path(texts_loc) if not texts_loc.exists(): msg.fail("Input text file doesn't exist", texts_loc, exits=1) with msg.loading("Loading input texts..."): texts = list(srsly.read_jsonl(texts_loc)) msg.good("Loaded input texts") random.shuffle(texts) else: # reading from stdin msg.text("Reading input text from stdin...") texts = srsly.read_jsonl("-") with msg.loading("Loading model '{}'...".format(vectors_model)): nlp = util.load_model(vectors_model) msg.good("Loaded model '{}'".format(vectors_model)) pretrained_vectors = None if not use_vectors else nlp.vocab.vectors.name model = create_pretraining_model( nlp, Tok2Vec( width, embed_rows, conv_depth=depth, pretrained_vectors=pretrained_vectors, bilstm_depth=0, # Requires PyTorch. Experimental. cnn_maxout_pieces=3, # You can try setting this higher subword_features=True, # Set to False for Chinese etc ), ) optimizer = create_default_optimizer(model.ops) tracker = ProgressTracker(frequency=10000) msg.divider("Pre-training tok2vec layer") row_settings = { "widths": (3, 10, 10, 6, 4), "aligns": ("r", "r", "r", "r", "r") } msg.row(("#", "# Words", "Total Loss", "Loss", "w/s"), **row_settings) for epoch in range(nr_iter): for batch in util.minibatch_by_words(((text, None) for text in texts), size=batch_size): docs = make_docs( nlp, [text for (text, _) in batch], max_length=max_length, min_length=min_length, ) loss = make_update(model, docs, optimizer, objective=loss_func, drop=dropout) progress = tracker.update(epoch, loss, docs) if progress: msg.row(progress, **row_settings) if texts_loc == "-" and tracker.words_per_epoch[epoch] >= 10**7: break with model.use_params(optimizer.averages): with (output_dir / ("model%d.bin" % epoch)).open("wb") as file_: file_.write(model.tok2vec.to_bytes()) log = { "nr_word": tracker.nr_word, "loss": tracker.loss, "epoch_loss": tracker.epoch_loss, "epoch": epoch, } with (output_dir / "log.jsonl").open("a") as file_: file_.write(srsly.json_dumps(log) + "\n") tracker.epoch_loss = 0.0 if texts_loc != "-": # Reshuffle the texts if texts were loaded from a file random.shuffle(texts)
def main(path): reddit = Reddit(path) for comment in reddit: print(srsly.json_dumps(comment))
def get_emails(): result = schema.execute(default_query) return srsly.json_dumps(result.data)
def to_bytes(self, **kwargs) -> bytes: serializers = {"cfg": lambda: srsly.json_dumps(self._get_config())} return util.to_bytes(serializers, [])
def package( input_dir: Path, output_dir: Path, meta_path: Optional[Path] = None, code_paths: List[Path] = [], name: Optional[str] = None, version: Optional[str] = None, create_meta: bool = False, create_sdist: bool = True, create_wheel: bool = False, force: bool = False, silent: bool = True, ) -> None: msg = Printer(no_print=silent, pretty=not silent) input_path = util.ensure_path(input_dir) output_path = util.ensure_path(output_dir) meta_path = util.ensure_path(meta_path) if create_wheel and not has_wheel(): err = "Generating a binary .whl file requires wheel to be installed" msg.fail(err, "pip install wheel", exits=1) if not input_path or not input_path.exists(): msg.fail("Can't locate pipeline data", input_path, exits=1) if not output_path or not output_path.exists(): msg.fail("Output directory not found", output_path, exits=1) if create_sdist or create_wheel: opts = ["sdist" if create_sdist else "", "wheel" if create_wheel else ""] msg.info(f"Building package artifacts: {', '.join(opt for opt in opts if opt)}") for code_path in code_paths: if not code_path.exists(): msg.fail("Can't find code file", code_path, exits=1) # Import the code here so it's available when model is loaded (via # get_meta helper). Also verifies that everything works util.import_file(code_path.stem, code_path) if code_paths: msg.good(f"Including {len(code_paths)} Python module(s) with custom code") if meta_path and not meta_path.exists(): msg.fail("Can't find pipeline meta.json", meta_path, exits=1) meta_path = meta_path or input_dir / "meta.json" if not meta_path.exists() or not meta_path.is_file(): msg.fail("Can't load pipeline meta.json", meta_path, exits=1) meta = srsly.read_json(meta_path) meta = get_meta(input_dir, meta) if meta["requirements"]: msg.good( f"Including {len(meta['requirements'])} package requirement(s) from " f"meta and config", ", ".join(meta["requirements"]), ) if name is not None: if not name.isidentifier(): msg.fail( f"Model name ('{name}') is not a valid module name. " "This is required so it can be imported as a module.", "We recommend names that use ASCII A-Z, a-z, _ (underscore), " "and 0-9. " "For specific details see: https://docs.python.org/3/reference/lexical_analysis.html#identifiers", exits=1, ) if not _is_permitted_package_name(name): msg.fail( f"Model name ('{name}') is not a permitted package name. " "This is required to correctly load the model with spacy.load.", "We recommend names that use ASCII A-Z, a-z, _ (underscore), " "and 0-9. " "For specific details see: https://www.python.org/dev/peps/pep-0426/#name", exits=1, ) meta["name"] = name if version is not None: meta["version"] = version if not create_meta: # only print if user doesn't want to overwrite msg.good("Loaded meta.json from file", meta_path) else: meta = generate_meta(meta, msg) errors = validate(ModelMetaSchema, meta) if errors: msg.fail("Invalid pipeline meta.json") print("\n".join(errors)) sys.exit(1) model_name = meta["name"] if not model_name.startswith(meta["lang"] + "_"): model_name = f"{meta['lang']}_{model_name}" model_name_v = model_name + "-" + meta["version"] main_path = output_dir / model_name_v package_path = main_path / model_name if package_path.exists(): if force: shutil.rmtree(str(package_path)) else: msg.fail( "Package directory already exists", "Please delete the directory and try again, or use the " "`--force` flag to overwrite existing directories.", exits=1, ) Path.mkdir(package_path, parents=True) shutil.copytree(str(input_dir), str(package_path / model_name_v)) for file_name in FILENAMES_DOCS: file_path = package_path / model_name_v / file_name if file_path.exists(): shutil.copy(str(file_path), str(main_path)) readme_path = main_path / "README.md" if not readme_path.exists(): readme = generate_readme(meta) create_file(readme_path, readme) create_file(package_path / model_name_v / "README.md", readme) msg.good("Generated README.md from meta.json") else: msg.info("Using existing README.md from pipeline directory") imports = [] for code_path in code_paths: imports.append(code_path.stem) shutil.copy(str(code_path), str(package_path)) create_file(main_path / "meta.json", srsly.json_dumps(meta, indent=2)) create_file(main_path / "setup.py", TEMPLATE_SETUP) create_file(main_path / "MANIFEST.in", TEMPLATE_MANIFEST) init_py = TEMPLATE_INIT.format( imports="\n".join(f"from . import {m}" for m in imports) ) create_file(package_path / "__init__.py", init_py) msg.good(f"Successfully created package directory '{model_name_v}'", main_path) if create_sdist: with util.working_dir(main_path): util.run_command([sys.executable, "setup.py", "sdist"], capture=False) zip_file = main_path / "dist" / f"{model_name_v}{SDIST_SUFFIX}" msg.good(f"Successfully created zipped Python package", zip_file) if create_wheel: with util.working_dir(main_path): util.run_command([sys.executable, "setup.py", "bdist_wheel"], capture=False) wheel_name_squashed = re.sub("_+", "_", model_name_v) wheel = main_path / "dist" / f"{wheel_name_squashed}{WHEEL_SUFFIX}" msg.good(f"Successfully created binary wheel", wheel) if "__" in model_name: msg.warn( f"Model name ('{model_name}') contains a run of underscores. " "Runs of underscores are not significant in installed package names.", )
def json_dumps(data, indent=0, sort_keys=False): return srsly.json_dumps(data, indent, sort_keys)
def package( input_dir: Path, output_dir: Path, meta_path: Optional[Path] = None, code_paths: List[Path] = [], name: Optional[str] = None, version: Optional[str] = None, create_meta: bool = False, create_sdist: bool = True, create_wheel: bool = False, force: bool = False, silent: bool = True, ) -> None: msg = Printer(no_print=silent, pretty=not silent) input_path = util.ensure_path(input_dir) output_path = util.ensure_path(output_dir) meta_path = util.ensure_path(meta_path) if create_wheel and not has_wheel(): err = "Generating a binary .whl file requires wheel to be installed" msg.fail(err, "pip install wheel", exits=1) if not input_path or not input_path.exists(): msg.fail("Can't locate pipeline data", input_path, exits=1) if not output_path or not output_path.exists(): msg.fail("Output directory not found", output_path, exits=1) if create_sdist or create_wheel: opts = [ "sdist" if create_sdist else "", "wheel" if create_wheel else "" ] msg.info( f"Building package artifacts: {', '.join(opt for opt in opts if opt)}" ) for code_path in code_paths: if not code_path.exists(): msg.fail("Can't find code file", code_path, exits=1) # Import the code here so it's available when model is loaded (via # get_meta helper). Also verifies that everything works util.import_file(code_path.stem, code_path) if code_paths: msg.good( f"Including {len(code_paths)} Python module(s) with custom code") if meta_path and not meta_path.exists(): msg.fail("Can't find pipeline meta.json", meta_path, exits=1) meta_path = meta_path or input_dir / "meta.json" if not meta_path.exists() or not meta_path.is_file(): msg.fail("Can't load pipeline meta.json", meta_path, exits=1) meta = srsly.read_json(meta_path) meta = get_meta(input_dir, meta) if name is not None: meta["name"] = name if version is not None: meta["version"] = version if not create_meta: # only print if user doesn't want to overwrite msg.good("Loaded meta.json from file", meta_path) else: meta = generate_meta(meta, msg) errors = validate(ModelMetaSchema, meta) if errors: msg.fail("Invalid pipeline meta.json") print("\n".join(errors)) sys.exit(1) model_name = meta["lang"] + "_" + meta["name"] model_name_v = model_name + "-" + meta["version"] main_path = output_dir / model_name_v package_path = main_path / model_name if package_path.exists(): if force: shutil.rmtree(str(package_path)) else: msg.fail( "Package directory already exists", "Please delete the directory and try again, or use the " "`--force` flag to overwrite existing directories.", exits=1, ) Path.mkdir(package_path, parents=True) shutil.copytree(str(input_dir), str(package_path / model_name_v)) license_path = package_path / model_name_v / "LICENSE" if license_path.exists(): shutil.move(str(license_path), str(main_path)) imports = [] for code_path in code_paths: imports.append(code_path.stem) shutil.copy(str(code_path), str(package_path)) create_file(main_path / "meta.json", srsly.json_dumps(meta, indent=2)) create_file(main_path / "setup.py", TEMPLATE_SETUP) create_file(main_path / "MANIFEST.in", TEMPLATE_MANIFEST) init_py = TEMPLATE_INIT.format(imports="\n".join(f"from . import {m}" for m in imports)) create_file(package_path / "__init__.py", init_py) msg.good(f"Successfully created package '{model_name_v}'", main_path) if create_sdist: with util.working_dir(main_path): util.run_command([sys.executable, "setup.py", "sdist"], capture=False) zip_file = main_path / "dist" / f"{model_name_v}{SDIST_SUFFIX}" msg.good(f"Successfully created zipped Python package", zip_file) if create_wheel: with util.working_dir(main_path): util.run_command([sys.executable, "setup.py", "bdist_wheel"], capture=False) wheel = main_path / "dist" / f"{model_name_v}{WHEEL_SUFFIX}" msg.good(f"Successfully created binary wheel", wheel)
def dqn_training(num_episodes, max_steps=500, display_action=False): """ num_episodes: int number of episodes visualize_plt: bool if true, display the cartpole action in the notebook if false (default), display the episodes x durations graph """ score_history = scoreAverage(report_mean_score_over_n) for i_episode in range(num_episodes): # Initialize the environment and state env.reset() last_screen = get_screen() current_screen = get_screen() state = current_screen - last_screen # state = get_screen().to(device) total_reward = 0 episode_start_time = datetime.now() statememory = [] for t in count(): # initialise state memory # Select and perform an action # action = select_action(state, SELECT_ACTION_BIAS_LIST) action = select_action(state, SELECT_ACTION_BIAS_LIST) # "action", action) if display_action: print("action: ", action.squeeze()) _, reward, done, info = env.step(action) # ('action', 'reward', 'info', 'done') this_state = { "action": action.data[0].item(), "reward": reward[1], "scaled_reward": reward[0], "info": info, "done": done } statememory.append(this_state) total_reward += reward[1] reward = torch.tensor([reward[0]], device=device) # Observe new state last_screen = current_screen current_screen = get_screen().to(device) if not done: next_state = current_screen next_state = next_state else: next_state = None # Store the transition in memory memory.push(state, action, next_state, reward) # Move to the next state state = next_state # Perform one step of the optimization (on the target network) optimize_model() if done or t > max_steps: episode_end_time = datetime.now() episode_time = (episode_end_time - episode_start_time).total_seconds() score_history.push(total_reward) # floyd metrics print( f'{{"metric": "score", "value": {total_reward}, "epoch": {i_episode+1}}}' ) print( f'{{"metric": "rolling mean score", "value": {score_history.mean()}, "epoch": {i_episode+1}}}' ) print( f'{{"metric": "steps this episode", "value": {t}, "epoch": {i_episode+1}}}' ) print( f'{{"metric": "episode duration", "value": {episode_time}, "epoch": {i_episode+1}}}' ) print( f'{{"metric": "steps per second", "value": {float(t) / float(episode_time)}, "epoch": {i_episode+1}}}' ) # paperspace # {"chart": "<identifier>", "y": <value>, "x": <value>} print( f'{{"chart": "score", "y": {total_reward}, "x": {i_episode+1}}}' ) print( f'{{"chart": "rolling_mean_score", "y": {score_history.mean()}, "x": {i_episode+1}}}' ) print( f'{{"chart": "steps_this_episode", "y": {t}, "x": {i_episode+1}}}' ) print( f'{{"chart": "episode_duration", "y": {episode_time}, "x": {i_episode+1}}}' ) print( f'{{"chart": "steps_per_second", "y": {float(t) / float(episode_time)}, "x": {i_episode+1}}}' ) filename = os.path.join( STATE_DIR, f'gamedata-{GAME_NAME}-{LEVEL}-{(i_episode+1):06}.json') print(f"Writing game history to '{filename}'") with open(filename, "w") as f: f.write(json_dumps(statememory)) f.close() break # Update the target network if i_episode % TARGET_UPDATE == 0: target_net.load_state_dict(policy_net.state_dict()) print('Completed training') # env.render(close=True) env.close()
def to_bytes(self, **_kwargs): serializers = OrderedDict( (("cfg", lambda: srsly.json_dumps(self._get_config())), )) return util.to_bytes(serializers, [])
def to_bytes(self, **kwargs): serializers = { "cfg": lambda: srsly.json_dumps(self.cfg), } return util.to_bytes(serializers, [])