def load_data(preparer): dl = DataLoader() data, target, weights = list(), list(), list() sequences = dl.loadDirectory('data/sequences/model_train_seq/simulated') # sequences = dl.loadDirectory('data/sequences/train_sequences') for _, s_x, a_x, s_y, a_y in sequences: d, t, _ = preparer.prepare_training_data(s_x, a_x, s_y, a_y) data += d target += t return data, target
def load_data(preparer): dl = DataLoader() data, target, weights = list(), list(), list() sequences = dl.loadDirectory('data/sequences/model_train_seq/simulated') # sequences = dl.loadDirectory('data/sequences/train_sequences') for _, s_x, a_x, s_y, a_y in sequences: d, t, _ = preparer.prepare_training_data( s_x, a_x, s_y, a_y ) data += d target += t return data, target
def __init__( self, preparer, filename="data/clf/randomforest.clf", training_data_dir="data/sequences/train_sequences", params=None, autotrain=True, memoization=False, inverted=False, use_global_classifier=False, ): global _global_classifier """ @rtype : PairClassifier """ self._preparer = None self.preparer = preparer self.default_filename = filename self.training_data_dir = training_data_dir if params is None: self.params = config.classifiers[config.classifier_index][2] else: self.params = params self.mem = dict() self.memoization = memoization self.inverted = inverted if _global_classifier is None or not use_global_classifier: if autotrain and path.exists(self.default_filename): if path.isfile(self.default_filename): self.load(self.default_filename) else: self.classifier = self._get_classifier() if autotrain: sys.stderr.write('Training clasifier\n') dl = DataLoader() data, target, weights = list(), list(), list() sequences = dl.loadDirectory(self.training_data_dir) for _, s_x, a_x, s_y, a_y in sequences: d, t, w = self.preparer.prepare_training_data( s_x, a_x, s_y, a_y ) data += d target += t weights += w self.fit(data, target, array(weights)) self.save(self.default_filename) if use_global_classifier: _global_classifier = self.classifier else: self.classifier = _global_classifier
def __init__( self, preparer, filename="data/clf/randomforest.clf", training_data_dir="data/sequences/train_sequences", params=None, autotrain=True, memoization=False, inverted=False, use_global_classifier=False, ): global _global_classifier """ @rtype : PairClassifier """ self._preparer = None self.preparer = preparer self.default_filename = filename self.training_data_dir = training_data_dir if params is None: self.params = config.classifiers[config.classifier_index][2] else: self.params = params self.mem = dict() self.memoization = memoization self.inverted = inverted if _global_classifier is None or not use_global_classifier: if autotrain and path.exists(self.default_filename): if path.isfile(self.default_filename): self.load(self.default_filename) else: self.classifier = self._get_classifier() if autotrain: sys.stderr.write('Training clasifier\n') dl = DataLoader() data, target, weights = list(), list(), list() sequences = dl.loadDirectory(self.training_data_dir) for _, s_x, a_x, s_y, a_y in sequences: d, t, w = self.preparer.prepare_training_data( s_x, a_x, s_y, a_y) data += d target += t weights += w self.fit(data, target, array(weights)) self.save(self.default_filename) if use_global_classifier: _global_classifier = self.classifier else: self.classifier = _global_classifier
def train(self, dirname): dl = DataLoader() sequences = dl.loadDirectory(dirname) return self.train_multi(sequences)