# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import tensorflow as tf from absl import app, logging import neurst.utils.flags_core as flags_core from neurst.data.data_pipelines.data_pipeline import lowercase_and_remove_punctuations from neurst.data.text import Tokenizer, build_tokenizer FLAG_LIST = [ flags_core.Flag("input", dtype=flags_core.Flag.TYPE.STRING, default=None, help="The path to the input text file."), flags_core.Flag("output", dtype=flags_core.Flag.TYPE.STRING, default=None, help="The path to the output text file."), flags_core.Flag("lowercase", dtype=flags_core.Flag.TYPE.BOOLEAN, default=None, help="Whether to lowercase."), flags_core.Flag("remove_punctuation", dtype=flags_core.Flag.TYPE.BOOLEAN, default=None, help="Whether to remove the punctuations."), flags_core.ModuleFlag(Tokenizer.REGISTRY_NAME, help="The tokenizer."), ] def _main(_): arg_parser = flags_core.define_flags(FLAG_LIST, with_config_file=False) args, remaining_argv = flags_core.intelligent_parse_flags(FLAG_LIST, arg_parser) flags_core.verbose_flags(FLAG_LIST, args, remaining_argv)
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import tensorflow as tf from absl import app, logging import neurst.utils.flags_core as flags_core from neurst.metrics import Metric, build_metric from neurst.utils.misc import flatten_string_list FLAG_LIST = [ flags_core.Flag("hypo_file", dtype=flags_core.Flag.TYPE.STRING, default=None, help="The path to hypothesis file."), flags_core.Flag("ref_file", dtype=flags_core.Flag.TYPE.STRING, default=None, multiple=True, help="The path to reference file. "), flags_core.ModuleFlag(Metric.REGISTRY_NAME, help="The metric for evaluation."), ] def evaluate(metric, hypo_file, ref_file): assert metric is not None assert hypo_file assert ref_file with tf.io.gfile.GFile(hypo_file) as fp: hypo = [line.strip() for line in fp] ref_list = [] for one_ref_file in flatten_string_list(ref_file):
word_with_counts = word_with_counts[:max_vocab_size] with tf.io.gfile.GFile(output, "w") as foutput: # extra slots for x in extra_slots_list: foutput.write("{}\t{}\n".format(x, 1000)) logging.info("Plus extra %d slots to the vocabulary in the front.", len(extra_slots_list)) for word, count in word_with_counts: foutput.write("{}\t{}\n".format(word, count)) FLAG_LIST = [ flags_core.Flag( "min_frequency", dtype=flags_core.Flag.TYPE.INTEGER, default=0, help="Minimum frequency of a word to be included in the vocabulary."), flags_core.Flag("max_vocab_size", dtype=flags_core.Flag.TYPE.INTEGER, default=None, help="Maximum number of tokens in the vocabulary."), flags_core.Flag( "lowercase", dtype=flags_core.Flag.TYPE.BOOLEAN, default=None, help="If set to true, downcase all text before processing."), flags_core.Flag("input", dtype=flags_core.Flag.TYPE.STRING, default=None, help="Input full vocabulary file."),
# limitations under the License. import os import random import numpy import tensorflow as tf from absl import app, logging import neurst.utils.flags_core as flags_core from neurst.data.datasets import Dataset, build_dataset from neurst.tasks import Task, build_task from neurst.utils.compat import ModeKeys FLAG_LIST = [ flags_core.Flag("processor_id", dtype=flags_core.Flag.TYPE.INTEGER, default=None, help="The processor id."), flags_core.Flag( "num_processors", dtype=flags_core.Flag.TYPE.INTEGER, default=None, help= "The number of processors. Must be divisible by `num_output_shards`."), flags_core.Flag("num_output_shards", dtype=flags_core.Flag.TYPE.INTEGER, default=None, help="The total number of output shards."), flags_core.Flag( "output_range_begin", dtype=flags_core.Flag.TYPE.INTEGER, default=None,
import os import tensorflow as tf from absl import app, logging import neurst.utils.flags_core as flags_core from neurst.utils.compat import wrapper_var_name from neurst.utils.configurable import ModelConfigs from neurst.utils.flags_core import Flag from neurst.utils.misc import flatten_string_list FLAG_LIST = [ flags_core.Flag( "checkpoints", dtype=Flag.TYPE.STRING, default=None, multiple=True, help="A list or comma-separated string of checkpoints to be averaged. " "The averaged checkpoint will be saved to `output_path`."), flags_core.Flag("output_path", dtype=flags_core.Flag.TYPE.STRING, help="The path to the averaged checkpoint."), ] def checkpoint_exists(path): return (tf.io.gfile.exists(path) or tf.io.gfile.exists(path + ".meta") or tf.io.gfile.exists(path + ".index")) def checkpoint_list_checking(path_list):
from absl import app, logging import neurst.utils.flags_core as flags_core from neurst.data.datasets import Dataset, build_dataset from neurst.exps import BaseExperiment, build_exp from neurst.layers.quantization import QuantLayer from neurst.models import BaseModel from neurst.tasks import Task, build_task from neurst.training import training_utils from neurst.utils.configurable import ModelConfigs, deep_merge_dict, load_from_config_path, yaml_load_checking from neurst.utils.hparams_sets import get_hyper_parameters from neurst.utils.misc import flatten_string_list FLAG_LIST = [ flags_core.Flag("distribution_strategy", dtype=flags_core.Flag.TYPE.STRING, default="mirrored", help="The distribution strategy."), flags_core.Flag("dtype", dtype=flags_core.Flag.TYPE.STRING, default="float16", help="The computation type of the whole model."), flags_core.Flag( "enable_check_numerics", dtype=flags_core.Flag.TYPE.BOOLEAN, default=None, help="Whether to open the tf.debugging.enable_check_numerics. " "Note that this may lower down the training speed."), flags_core.Flag("enable_xla", dtype=flags_core.Flag.TYPE.BOOLEAN, default=None, help="Whether to enable XLA for training."),
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from absl import app, logging import neurst.utils.flags_core as flags_core from neurst.utils.converters import Converter, build_converter FLAG_LIST = [ flags_core.Flag("from", dtype=flags_core.Flag.TYPE.STRING, default=None, required=True, help="The path to pretrained model directory " "or a key indicating the publicly available model name."), flags_core.Flag("to", dtype=flags_core.Flag.TYPE.STRING, default=None, required=True, help="The path to save the converted checkpoint."), flags_core.Flag("model_name", dtype=flags_core.Flag.TYPE.STRING, default=None, required=True, help="The name of pretrained model, e.g. google_bert."), ]
import neurst.utils.flags_core as flags_core from neurst.data.datasets import Dataset, build_dataset from neurst.data.datasets.audio.audio_dataset import AudioTripleTFRecordDataset from neurst.data.datasets.parallel_text_dataset import ParallelTextDataset from neurst.exps import build_exp from neurst.exps.sequence_generator import SequenceGenerator from neurst.layers.search import SequenceSearch from neurst.metrics.metric import Metric from neurst.tasks import build_task from neurst.training import training_utils from neurst.utils.configurable import ModelConfigs FLAG_LIST = [ flags_core.Flag("distribution_strategy", dtype=flags_core.Flag.TYPE.STRING, default="mirrored", help="The distribution strategy."), flags_core.Flag("dtype", dtype=flags_core.Flag.TYPE.STRING, default="float16", help="The computation type of the whole model."), flags_core.Flag( "enable_check_numerics", dtype=flags_core.Flag.TYPE.BOOLEAN, default=None, help="Whether to open the tf.debugging.enable_check_numerics. " "Note that this may lower down the training speed."), flags_core.Flag("asr_model_dir", dtype=flags_core.Flag.TYPE.STRING, help="The path to the ASR model checkpoint."), flags_core.Flag("mt_model_dir",
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import tensorflow as tf from absl import app, logging import neurst.utils.flags_core as flags_core from neurst.data.datasets import Dataset, build_dataset from neurst.data.datasets.audio.audio_dataset import RawAudioDataset FLAG_LIST = [ flags_core.Flag("output_transcript_file", dtype=flags_core.Flag.TYPE.STRING, required=True, help="The path to save transcriptions."), flags_core.Flag("output_translation_file", dtype=flags_core.Flag.TYPE.STRING, default=None, help="The path to save transcriptions."), flags_core.ModuleFlag(Dataset.REGISTRY_NAME, help="The raw dataset."), ] def main(dataset, output_transcript_file, output_translation_file=None): assert isinstance(dataset, RawAudioDataset) transcripts = dataset.transcripts translations = dataset.translations assert transcripts, "Fail to extract transcripts." with tf.io.gfile.GFile(output_transcript_file, "w") as fw: