Exemplo n.º 1
0
    def parse_action(self, chat_id, text):
        self.logger.warning("%s - %s" % (chat_id, text))
        user = self.users.find_one({'chat_id': chat_id})
        if user is None:
            user = {'chat_id': chat_id,
                    'state': States.idle,

                    'words': [],
                    'train': {
                        'type': 0,
                        'words': 0,
                        'correct': 0,
                        'cadidacies': []
                    }}
        if 'train' not in user:
            user['train'] = {
                'type': 0,
                'words': 0,
                'correct': 0,
                'cadidacies': []
            }
        if text[0] == '/':  # Command
            cmd = text[1:].lower().split(' ')[0]
            if cmd in self.comands:
                self.comands[cmd](self, user, text)
        elif user['train']['type'] != 0:
            train.do_train(user, text)
        elif user['state'] == States.idle:
            self.add_word(user, text)
        elif user['state'] == States.langs_asked:
            self.langs_ask(user, text)
        self.users.save(user)
Exemplo n.º 2
0
 def search(self, opt, train=False, test=False):
     # put the dictionary in the argparse format use by do_train
     arg_opt = argparse.Namespace()
     arg_opt_dict = vars(arg_opt)
     arg_opt_dict.update(opt)
     if train:
         do_train(arg_opt)
     if test:
         do_test(arg_opt)
Exemplo n.º 3
0
def train_crf(train_path, pattern_path, model_path):
    """
    train a crf model
    :param train_path:
    :param pattern_path:
    :param model_path:
    :return:
    """
    opt = create_default_opt()
    opt.input = train_path
    opt.pattern = pattern_path
    opt.output = model_path
    opt.maxiter = 10
    opt.nthread = 4

    crf_model = pywapiti.mdl_new(pywapiti.rdr_new(opt.maxent))
    crf_model.opt = opt
    do_train(crf_model)
Exemplo n.º 4
0
def train_crf(train_path, pattern_path, model_path):
    """
    train a crf model
    :param train_path:
    :param pattern_path:
    :param model_path:
    :return:
    """
    opt = create_default_opt()
    opt.input = train_path
    opt.pattern = pattern_path
    opt.output = model_path
    opt.maxiter = 10
    opt.nthread = 4

    crf_model = pywapiti.mdl_new(
        pywapiti.rdr_new(opt.maxent))
    crf_model.opt = opt
    do_train(crf_model)
Exemplo n.º 5
0
    def remove(self, user, text):
        if user['train']['type'] != 0:
            for w in user['words']:
                if w == user['train']['word']:
                    user['words'].remove(w)
                    str_out = "%s - %s" % (w['en'], w['ru'])
                    telegram.send_message(user['chat_id'], "Deleted:\n%s" % (str_out,))

            train.do_train(user, text)
        else:
            tokens = text.split(" ")
            if len(tokens) > 1:
                cnt = int(tokens[1])
                if cnt > 0:
                    cnt -= 1
            else:
                cnt = -1
            str_out = "%s - %s" % (user['words'][cnt]['en'], user['words'][cnt]['ru'])
            del user['words'][cnt]
            telegram.send_message(user['chat_id'], "Word with index %s removed\n%s" % (cnt, str_out))
Exemplo n.º 6
0
def main():
    args = get_args()
    f = os.path.normpath
    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    if args.do_init:
        init(f(args.train_file), f(args.dev_file), f(args.test_file),
             f(args.word_dict_path), f(args.tag_dict_path))
        return
    if args.do_train:
        do_train(f(args.train_file),
                 f(args.output_dir), f(args.word_dict_path),
                 f(args.tag_dict_path), args.max_seq_len, args.embed_dim,
                 args.hidden_dim, args.lr, args.batch_size, args.epochs,
                 args.print_step, device)
    if args.do_eval:
        do_eval(f(args.test_file), f(args.word_dict_path),
                f(args.tag_dict_path), args.max_seq_len,
                args.embed_dim, args.hidden_dim, f(args.output_dir),
                f(args.eval_log_dir), device)
Exemplo n.º 7
0
import paddle
import paddle.fluid as fluid

from eval import do_eval
from train import do_train
from predict import do_predict
from inference_model import do_save_inference_model

from dgu.utils.configure import PDConfig

if __name__ == "__main__":

    args = PDConfig(yaml_file="./data/config/dgu.yaml")
    args.build()
    args.Print()

    if args.do_train:
        do_train(args)

    if args.do_predict:
        do_predict(args)

    if args.do_eval:
        do_eval(args)

    if args.do_save_inference_model:
        do_save_inference_model(args)

# vim: set ts=4 sw=4 sts=4 tw=100:
Exemplo n.º 8
0
parser.add_argument('--mode', required=True, choices=['train', 'eval'])
parser.add_argument('--epochs', default=5, type=int)

args = parser.parse_args()

print('args:', args)

bus = ng_bus.NeurogenBus(args.mode)
io = ng_input_alex_net.NeurogenIO(bus)
train = train.TrainController()

cli_vars = []
cli_vars += io.get_vars()
cli_vars += train.get_vars()

io.read_inputs()
X_train, y_train = io.get_train_xy()
X_test, y_test = io.get_test_xy()

print(X_train.shape)

train.do_compile()

train.print_sample_predictions(X_test, y_test)
result_of_train = train.do_train(X_train, y_train, X_test, y_test, args.epochs,
                                 'weights')
print(result_of_train.history)
# print(np.mean(result_of_train.history["val_acc"]))

train.print_sample_predictions(X_test, y_test)
Exemplo n.º 9
0
def main(_):

    logging.info('Running with parameters:')
    logging.info(json.dumps(FLAGS.flag_values_dict(), indent=4))

    if os.path.exists(os.path.join(FLAGS.model_dir, 'config.json')):

        expect_partial = False
        if FLAGS.mode in ['transcribe-file', 'realtime']:
            expect_partial = True

        model = load_model(FLAGS.model_dir,
            checkpoint=FLAGS.checkpoint, expect_partial=expect_partial)

    else:

        if FLAGS.mode in ['eval', 'transcribe-file', 'realtime']:
            raise Exception('Model not found at path: {}'.format(
                FLAGS.model_dir))

        logging.info('Initializing model from scratch.')

        os.makedirs(FLAGS.model_dir, exist_ok=True)
        model_config_filepath = os.path.join(FLAGS.model_dir, 'config.json')

        vocab = vocabulary.init_vocab()
        vocabulary.save_vocab(vocab, os.path.join(FLAGS.model_dir, 'vocab'))

        model = Transducer(vocab=vocab,
                           encoder_layers=FLAGS.encoder_layers,
                           encoder_size=FLAGS.encoder_size,
                           pred_net_layers=FLAGS.pred_net_layers,
                           pred_net_size=FLAGS.pred_net_size,
                           joint_net_size=FLAGS.joint_net_size,
                           softmax_size=FLAGS.softmax_size)

        model.save_config(model_config_filepath)

        logging.info('Initialized model from scratch.')

    distribution_strategy = None

    if FLAGS.tpu is not None:

        tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
            tpu=FLAGS.tpu)
        distribution_strategy = tf.distribute.experimental.TPUStrategy(
            tpu_cluster_resolver=tpu_cluster_resolver)

    if FLAGS.mode == 'export':
        
        saved_model_dir = os.path.join(FLAGS.model_dir, 'saved_model')
        os.makedirs(saved_model_dir, exist_ok=True)
        
        all_versions = [int(ver) for ver in os.listdir(saved_model_dir)]

        if len(all_versions) > 0:
            version = max(all_versions) + 1
        else:
            version = 1

        export_path = os.path.join(saved_model_dir, str(version))
        os.makedirs(export_path)

        tf.saved_model.save(model, export_path, signatures={
            'serving_default': model.predict
        })

    elif FLAGS.mode == 'transcribe-file':

        transcription = transcribe_file(model, FLAGS.input)

        print('Input file: {}'.format(FLAGS.input))
        print('Transcription: {}'.format(transcription))

    elif FLAGS.mode == 'realtime':

        audio_buf = []
        last_result = None

        def stream_callback(in_data, frame_count, time_info, status):
            audio_buf.append(in_data)
            return None, pyaudio.paContinue

        def audio_gen():
            while True:
                if len(audio_buf) > 0:
                    audio_data = audio_buf[0]
                    audio_arr = np.frombuffer(audio_data, dtype=np.float32)
                    yield audio_arr

        FORMAT = pyaudio.paFloat32
        CHANNELS = 1
        RATE = 16000
        CHUNK = 2048

        audio = pyaudio.PyAudio()
        stream = audio.open(format=FORMAT,
                            channels=CHANNELS,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK,
                            stream_callback=stream_callback)
        
        stream.start_stream()

        outputs = transcribe_stream(model, audio_gen(), RATE)

        print('Transcribing live audio (press CTRL+C to stop)...')

        for (output, is_final) in outputs:
            if output != last_result and output != '' and not is_final:
                print('Partial Result: {}'.format(output))
                last_result = output
            if is_final:
                print('# Final Result: {}'.format(output))
                last_result = None

    else:

        if FLAGS.dataset_name == 'common-voice':
            data_utils = utils.data.common_voice

        train_dataset, dev_dataset = data_utils.create_datasets(FLAGS.dataset_path,
            max_data=FLAGS.max_data)

        if dev_dataset is None:
            dev_dataset = train_dataset.take(FLAGS.eval_size)
            train_dataset = train_dataset.skip(FLAGS.eval_size)

        if FLAGS.mode == 'eval':

            logging.info('Begin evaluation...')

            loss, acc = do_eval(model, dev_dataset,
                                batch_size=FLAGS.batch_size,
                                shuffle_buffer_size=FLAGS.shuffle_buffer_size,
                                distribution_strategy=distribution_strategy)

            logging.info('Evaluation complete: Loss {} Accuracy {}'.format(
                loss, acc))

        else:

            optimizer = tf.keras.optimizers.Adam(FLAGS.learning_rate)

            checkpoints_path = os.path.join(FLAGS.model_dir, 'checkpoints')
            os.makedirs(checkpoints_path, exist_ok=True)

            do_train(model, train_dataset, optimizer,
                     FLAGS.epochs, FLAGS.batch_size,
                     eval_dataset=dev_dataset,
                     steps_per_checkpoint=FLAGS.steps_per_checkpoint,
                     checkpoint_path=checkpoints_path,
                     steps_per_log=FLAGS.steps_per_log,
                     tb_log_dir=FLAGS.tb_log_dir,
                     keep_top_n=FLAGS.keep_top,
                     shuffle_buffer_size=FLAGS.shuffle_buffer_size,
                     distribution_strategy=distribution_strategy)
Exemplo n.º 10
0
if args.mode == 'train':
    train_data, val_data = io.read_train_data(args.sample_count)

    if not ng_config.use_generator_fit:
        logging.info('X_train shape: %s', train_data[0].shape)

    logging.info('y_train shape: %s', train_data[1].shape)

train.do_compile()
train.try_load_weights(args.weights)

if args.mode == 'train':
    train.print_sample_predictions(val_data)
    result_of_train = train.do_train(
        train_data,
        val_data,
        args.epochs,
        args.weights,
    )
    metrics_output = json.dumps({'history': result_of_train.history})
    logging.info('history: %s', metrics_output)
    if args.metrics_output:
        open(args.metrics_output, 'w').write(metrics_output)
    # print(np.mean(result_of_train.history["val_acc"]))
    train.print_sample_predictions(val_data)
elif args.mode == 'eval':
    assert args.eval_data
    assert args.network_output
    import cli_util
    cli_util.do_eval(args, io, train)

from tensorflow.keras import backend as K
Exemplo n.º 11
0
 def start_train(self, user, text):
     user['train']['type'] = 0
     train.do_train(user, text)
Exemplo n.º 12
0
def main(_):

    logging.info('Running with parameters:')
    logging.info(json.dumps(FLAGS.flag_values_dict(), indent=4))

    if os.path.exists(os.path.join(FLAGS.model_dir, 'config.json')):

        expect_partial = False
        if FLAGS.mode in ['transcribe-file', 'realtime', 'export']:
            expect_partial = True

        model = load_model(FLAGS.model_dir,
            checkpoint=FLAGS.checkpoint, expect_partial=expect_partial)

    else:

        if FLAGS.mode in ['eval', 'transcribe-file', 'realtime']:
            raise Exception('Model not found at path: {}'.format(
                FLAGS.model_dir))

        logging.info('Initializing model from scratch.')

        os.makedirs(FLAGS.model_dir, exist_ok=True)
        model_config_filepath = os.path.join(FLAGS.model_dir, 'config.json')

        vocab = vocabulary.init_vocab()
        vocabulary.save_vocab(vocab, os.path.join(FLAGS.model_dir, 'vocab'))

        model = Transducer(vocab=vocab,
                           encoder_layers=FLAGS.encoder_layers,
                           encoder_size=FLAGS.encoder_size,
                           pred_net_layers=FLAGS.pred_net_layers,
                           joint_net_size=FLAGS.joint_net_size,
                           softmax_size=FLAGS.softmax_size)

        model.save_config(model_config_filepath)

        logging.info('Initialized model from scratch.')

    distribution_strategy = None

    if FLAGS.tpu is not None:

        tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
            tpu=FLAGS.tpu)
        distribution_strategy = tf.distribute.experimental.TPUStrategy(
            tpu_cluster_resolver=tpu_cluster_resolver)

    if FLAGS.mode == 'export':
        
        # saved_model_dir = os.path.join(FLAGS.model_dir, 'saved_model')
        # os.makedirs(saved_model_dir, exist_ok=True)
        
        # all_versions = [int(ver) for ver in os.listdir(saved_model_dir)]

        # if len(all_versions) > 0:
        #     version = max(all_versions) + 1
        # else:
        #     version = 1

        # export_path = os.path.join(saved_model_dir, str(version))
        # os.makedirs(export_path)

        # tf.saved_model.save(model, export_path, signatures={
        #     'serving_default': model.predict
        # })

        # print(model.predict(tf.zeros((1, 1024)), tf.constant([16000]), tf.constant(['hell']), tf.zeros((1, 2, 1, 2048))))

        tflite_dir = os.path.join(FLAGS.model_dir, 'lite')
        os.makedirs(tflite_dir, exist_ok=True)

        concrete_func = model.predict.get_concrete_function(
            audio=tf.TensorSpec([1, 1024], dtype=tf.float32),
            sr=tf.TensorSpec([1], dtype=tf.int32),
            pred_inp=tf.TensorSpec([1], dtype=tf.string),
            enc_state=tf.TensorSpec([1, 2, 1, model.encoder_size], dtype=tf.float32))

        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS,
                                               tf.lite.OpsSet.SELECT_TF_OPS]
        converter.experimental_new_converter = True
        converter.experimental_new_quantizer = True
        converter.allow_custom_ops = True

        # def representative_dataset_gen():
        #     dataset, _ = load_datasets()
        #     for i in range(10):
        #         yield [next(dataset)]

        # converter.optimizations = [tf.lite.Optimize.DEFAULT]
        # converter.representative_dataset = representative_dataset_gen
        # converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
        # converter.inference_input_type = tf.uint8
        # converter.inference_output_type = tf.uint8

        tflite_quant_model = converter.convert()
        
        with open(os.path.join(tflite_dir, 'model.tflite'), 'wb') as f:
            f.write(tflite_quant_model)

        print('Exported model to TFLite.')

    elif FLAGS.mode == 'transcribe-file':

        transcription = transcribe_file(model, FLAGS.input)

        print('Input file: {}'.format(FLAGS.input))
        print('Transcription: {}'.format(transcription))

    elif FLAGS.mode == 'realtime':

        import pyaudio

        audio_buf = []
        last_result = None

        def stream_callback(in_data, frame_count, time_info, status):
            audio_buf.append(in_data)
            return None, pyaudio.paContinue

        def audio_gen():
            while True:
                if len(audio_buf) > 0:
                    audio_data = audio_buf[0]
                    audio_arr = np.frombuffer(audio_data, dtype=np.float32)
                    yield audio_arr

        FORMAT = pyaudio.paFloat32
        CHANNELS = 1
        RATE = 16000
        CHUNK = 2048

        audio = pyaudio.PyAudio()
        stream = audio.open(format=FORMAT,
                            channels=CHANNELS,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK,
                            stream_callback=stream_callback)
        
        stream.start_stream()

        outputs = transcribe_stream(model, audio_gen(), RATE)

        print('Transcribing live audio (press CTRL+C to stop)...')

        for (output, is_final) in outputs:
            if output != last_result and output != '' and not is_final:
                print('Partial Result: {}'.format(output))
                last_result = output
            if is_final:
                print('# Final Result: {}'.format(output))
                last_result = None

    else:

        train_dataset, dev_dataset = load_datasets()

        if dev_dataset is None:
            dev_dataset = train_dataset.take(FLAGS.eval_size)
            train_dataset = train_dataset.skip(FLAGS.eval_size)

        if FLAGS.eval_size:
            dev_dataset = dev_dataset.take(FLAGS.eval_size)

        if FLAGS.mode == 'eval':

            logging.info('Begin evaluation...')

            loss, acc = do_eval(model, dev_dataset,
                                batch_size=FLAGS.batch_size,
                                shuffle_buffer_size=FLAGS.shuffle_buffer_size,
                                distribution_strategy=distribution_strategy)

            logging.info('Evaluation complete: Loss {} Accuracy {}'.format(
                loss, acc))

        else:

            optimizer = tf.keras.optimizers.Adam(FLAGS.learning_rate)

            checkpoints_path = os.path.join(FLAGS.model_dir, 'checkpoints')
            os.makedirs(checkpoints_path, exist_ok=True)

            do_train(model, train_dataset, optimizer,
                     FLAGS.epochs, FLAGS.batch_size,
                     eval_dataset=dev_dataset,
                     steps_per_checkpoint=FLAGS.steps_per_checkpoint,
                     checkpoint_path=checkpoints_path,
                     steps_per_log=FLAGS.steps_per_log,
                     tb_log_dir=FLAGS.tb_log_dir,
                     keep_top_n=FLAGS.keep_top,
                     shuffle_buffer_size=FLAGS.shuffle_buffer_size,
                     distribution_strategy=distribution_strategy)
Exemplo n.º 13
0
 def do_POST(self):
     if (self.path == "/get") or (self.path == "/train"):
         self.send_response(200)
     else:
         self.send_response(404)
         self.end_headers()
         return
     form = cgi.FieldStorage(fp=self.rfile,
                             headers=self.headers,
                             environ={
                                 "REQUEST_METHOD": "POST",
                                 "CONTENT_TYPE":
                                 self.headers["Content-Type"]
                             })
     field_item = form["file"]
     if (field_item == None):
         self.send_response(403)
         self.end_headers()
         return
     filename = field_item.filename
     fileValue = field_item.value
     fileSize = len(fileValue)
     print("[UPLOAD]", "file: ", filename, ", size:", len(fileValue),
           "bytes")
     if (len(fileValue) > 20000000) or not (
             filename.endswith(".jpg") or filename.endswith(".png")
             or filename.endswith(".jpeg") or filename.endswith(".gif")):
         self.send_response(403)
         self.end_headers()
         return
     else:
         self.send_header("Access-Control-Allow-Origin", "*")
         self.send_header("Content-type", "application/json")
         self.end_headers()
         if self.path == "/get":
             with open("upload/" + filename, "wb") as f:
                 f.write(fileValue)
             crop.ssd_process(img_path="upload/" + filename,
                              crop_path="crop",
                              show=False)
             os.remove("upload/" + filename)
             files = glob.glob(crop_path + "/*")
             pred_result = []
             pred_probabilities = []
             for index, f in enumerate(files):
                 img = image.load_img(f, target_size=(256, 256))
                 image_tensor = image.img_to_array(img)
                 image_tensor = np.expand_dims(image_tensor, axis=0)
                 image_tensor /= 255.
                 model = load_model(
                     "model/trash.h5",
                     custom_objects={"leaky_relu": leaky_relu})
                 pred = model.predict(image_tensor)
                 pred_class = np.argmax(pred, axis=1)
                 pred_result.append(pred)
                 pred_probabilities.append(pred)
                 print("[PREDICT] Crop name:", f)
                 print("[PREDICT] Raw prediction data:", pred)
                 print("[PREDICT] Raw prediction class data: ", pred_class)
             response = {
                 "status": "success",
                 "result": pred_result,
                 "probabilities": pred_probabilities
             }
         elif self.path == "/train":
             with open(
                     "train/" + labels_index[int(form["type"].value)] +
                     "/" + filename, "wb") as f:
                 f.write(fileValue)
             response = {"status": "success"}
             if int(time.time()) - conf.read_config()["last"] > 86400:
                 train_path = list(pathlib.Path("./train").glob("*/*"))
                 train_count = len(train_path)
                 if train_count > int(conf.read_config()["in"]):
                     conf.write_config("last", int(time.time()))
                     conf.write_config("in", weight.get_new_n(train_count))
                     train.do_train()
                     for item in train_path:
                         os.remove(item)
         responseBody = json.dumps(response)
         self.wfile.write(responseBody.encode("utf-8"))
     print()
Exemplo n.º 14
0
import os
import sys
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from options.train_options import TrainOptions
from train import do_train

parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--path', type=str, help='Path to datasets')
parser.add_argument('--dataset', type=str, default='ade20k', help='which dataset to process')
parser.add_argument('--norm', type=str, default='norm', help='normalization mode of dist map')

if __name__ == '__main__':
  options = TrainOptions().parse()
  options.norm_mode = 'clade'
  do_train(options)

  # calculate_fid_given_paths
  
  # args = parser.parse_args()
  # make_dist_train_val_ade_datasets(args.path, args.norm)