Exemplo n.º 1
0
class TrainingManager(object):
    """
    Manages multithreaded training of either Intents or Entities

    Args:
        cls (Type[Trainable]): Class to wrap
        cache_dir (str): Place to store cache files
    """
    def __init__(self, cls, cache_dir):
        self.cls = cls
        self.cache = cache_dir
        self.objects = []
        self.objects_to_train = []

        self.train_data = TrainData()

    def add(self, name, lines, reload_cache=False):
        hash_fn = join(self.cache, name + '.hash')
        old_hsh = None
        if isfile(hash_fn):
            with open(hash_fn, 'rb') as g:
                old_hsh = g.read()
        min_ver = splitext(padatious.__version__)[0]
        new_hsh = lines_hash([min_ver] + lines)
        if reload_cache or old_hsh != new_hsh:
            self.objects_to_train.append(self.cls(name=name, hsh=new_hsh))
        else:
            self.objects.append(self.cls.from_file(name=name, folder=self.cache))
        self.train_data.add_lines(name, lines)

    def load(self, name, file_name, reload_cache=False):
        with open(file_name) as f:
            self.add(name, f.read().split('\n'), reload_cache)

    def remove(self, name):
        self.objects = [i for i in self.objects if i.name != name]
        self.objects_to_train = [i for i in self.objects_to_train if i.name != name]
        self.train_data.remove_lines(name)

    def train(self, debug=True, single_thread=False, timeout=20):
        if not isdir(self.cache):
            mkdir(self.cache)

        train = partial(
            _train_and_save, cache=self.cache, data=self.train_data, print_updates=debug
        )

        if single_thread:
            for i in self.objects_to_train:
                train(i)
        else:
            # Train in multiple processes to disk
            pool = mp.Pool()
            try:
                pool.map_async(train, self.objects_to_train).get(timeout)
            except TimeoutError:
                if debug:
                    print('Some objects timed out while training')
            finally:
                pool.close()

        # Load saved objects from disk
        for obj in self.objects_to_train:
            try:
                self.objects.append(self.cls.from_file(name=obj.name, folder=self.cache))
            except IOError:
                if debug:
                    print('Took too long to train', obj.name)
        self.objects_to_train = []
Exemplo n.º 2
0
class TrainingManager(object):
    """
    Manages multithreaded training of either Intents or Entities

    Args:
        cls (Type[Trainable]): Class to wrap
        cache_dir (str): Place to store cache files
    """
    def __init__(self, cls, cache_dir):
        self.cls = cls
        self.cache = cache_dir
        self.objects = []
        self.objects_to_train = []

        self.train_data = TrainData()

    def add(self, name, lines, reload_cache=False):
        hash_fn = join(self.cache, name + '.hash')
        old_hsh = None
        if isfile(hash_fn):
            with open(hash_fn, 'rb') as g:
                old_hsh = g.read()
        min_ver = splitext(padatious.__version__)[0]
        new_hsh = lines_hash([min_ver] + lines)
        if reload_cache or old_hsh != new_hsh:
            self.objects_to_train.append(self.cls(name=name, hsh=new_hsh))
        else:
            self.objects.append(
                self.cls.from_file(name=name, folder=self.cache))
        self.train_data.add_lines(name, lines)

    def load(self, name, file_name, reload_cache=False):
        with open(file_name) as f:
            self.add(name, f.read().split('\n'), reload_cache)

    def remove(self, name):
        self.objects = [i for i in self.objects if i.name != name]
        self.objects_to_train = [
            i for i in self.objects_to_train if i.name != name
        ]
        self.train_data.remove_lines(name)

    def train(self, debug=True, single_thread=False):
        if not isdir(self.cache):
            mkdir(self.cache)

        def args(i):
            return i, self.cache, self.train_data, debug

        if single_thread:
            for i in self.objects_to_train:
                _train_and_save(*args(i))
        else:
            # Train in multiple processes to disk
            pool = mp.Pool()
            try:
                results = [
                    pool.apply_async(_train_and_save, args(i))
                    for i in self.objects_to_train
                ]

                for i in results:
                    i.get()
            finally:
                pool.close()

        # Load saved objects from disk
        for obj in self.objects_to_train:
            self.objects.append(
                self.cls.from_file(name=obj.name, folder=self.cache))
        self.objects_to_train = []