def main(hypes_file, output_dir, override): """ Orchestrate. Parameters ---------- hypes_file : str Path to a JSON file output_dir : str Path where the output should be stored override : bool If True, then override the model if it exists. """ # Load hyperparameters with open(hypes_file, 'r') as f: hypes = json.load(f) # Set serialization path base = os.path.dirname(hypes_file) model_file_path = os.path.join(base, '%s.json' % hypes['model']['name']) model_file_path = os.path.abspath(model_file_path) if not os.path.isfile(model_file_path) or override: if not os.path.isfile(model_file_path): logging.info("Did not find '%s'. Start training...", model_file_path) else: logging.info("Override '%s'. Start training...", model_file_path) # Get training data x_files, y_files = get_file_list(hypes, 'train') # "Train" "classifier" (it just counts the classes) model = {} for i in range(len(hypes['classes'])): model[i] = 0 for y_file in y_files: logging.info("Read '%s'...", y_file) mask = load_segmentation_mask(hypes, y_file) for row in mask: for pixel in row: model[pixel] += 1 # save model as json file with open(model_file_path, 'w') as f: json.dump(model, f) else: # load model from json file with open(model_file_path) as f: model = json.load(f) # Evaluate data = get_file_list(hypes, 'test') analyze.evaluate(hypes, data, output_dir, model, elements=[0, 1], get_segmentation=get_segmentation)
def main(hypes_file, output_dir, override): """ Orchestrate. Parameters ---------- hypes_file : str Path to a JSON file output_dir : str Path where the output should be stored override : bool If True, then override the model if it exists. """ # Load hyperparameters with open(hypes_file, 'r') as f: hypes = json.load(f) # Set serialization path base = os.path.dirname(hypes_file) model_file_path = os.path.join(base, '%s.pickle' % hypes['model']['name']) model_file_path = os.path.abspath(model_file_path) if not os.path.isfile(model_file_path) or override: if not os.path.isfile(model_file_path): logging.info("Did not find '%s'. Start training...", model_file_path) else: logging.info("Override '%s'. Start training...", model_file_path) # Get training data x_files, y_files = get_file_list(hypes, 'train') # "Train" "classifier" (it just counts the classes) model = {'positions': None, 'files': 0} for y_file in y_files: logging.info("Read '%s'...", y_file) mask = load_segmentation_mask(hypes, y_file) if model['positions'] is None: model['positions'] = mask else: model['positions'] += mask model['files'] += 1 # save model as pickle file scipy.misc.imsave("instruments.png", model['positions']) with open(model_file_path, 'wb') as handle: pickle.dump(model, handle, protocol=pickle.HIGHEST_PROTOCOL) else: # load model from pickle file with open(model_file_path, 'rb') as handle: model = pickle.load(handle) # Evaluate data = get_file_list(hypes, 'test') analyze.evaluate(hypes, data, output_dir, model, elements=[0, 1], get_segmentation=get_segmentation)
def setUp(self): super(TestHeap, self).setUp() self.threads = set() self.content_stream = [] self.content_counter = 0 self.forum = forum.forum(self.feed) heap.priority_list(self.forum, lambda x: analyze.evaluate(x)) heap.priority_list(self.forum, lambda x: -analyze.evaluate(x))
def main(hypes_file, data_dir, override): """Orchestrate.""" with open(hypes_file, 'r') as f: hypes = json.load(f) model_file_path = os.path.abspath('%s.pkl' % hypes['model']['name']) color_changes = {0: (0, 0, 0, 0), 1: (0, 255, 0, 127), 'default': (0, 0, 0, 0)} if not os.path.isfile(model_file_path) or override: if not os.path.isfile(model_file_path): logging.info("Did not find '%s'. Start training...", model_file_path) else: logging.info("Override '%s'. Start training...", model_file_path) # Get data # x_files, y_files = inputs(hypes, None, 'train', data_dir) x_files, y_files = get_file_list(hypes, 'train') x_files, y_files = sklearn.utils.shuffle(x_files, y_files, random_state=0) x_train, y_train = get_traindata_single_file(hypes, x_files[0], y_files[0]) nb_features = x_train[0].shape[0] logging.info("Input gets %i features", nb_features) # Make model from sklearn.svm import LinearSVC, SVC from sklearn.tree import DecisionTreeClassifier model = SVC(probability=False, # cache_size=200, kernel="linear", C=2.8, gamma=.0073) model = LinearSVC(C=2.8) model = DecisionTreeClassifier() print("Start fitting. This may take a while") generator = generate_training_data(hypes, x_files, y_files) t0 = time.time() if False: sep = hypes['solver']['samples_per_epoch'] model.fit_generator(generator, samples_per_epoch=sep, nb_epoch=hypes['solver']['epochs'], verbose=1, # callbacks=[callb], validation_data=(x_train, y_train)) else: logging.info("Fit with .fit") x_train, y_train = inputs(hypes, None, 'train', data_dir) print(len(y_train)) model.fit(x_train, y_train) t1 = time.time() print("Training Time: %0.4f" % (t1 - t0)) # save as YAML joblib.dump(model, model_file_path) # Evaluate data = get_file_list(hypes, 'test') logging.info("Start segmentation") analyze.evaluate(hypes, data, data_dir, model, elements=[0, 1], load_label_seg=load_label_seg, color_changes=color_changes, get_segmentation=get_segmentation) else: model = joblib.load(model_file_path) data = get_file_list(hypes, 'test') analyze.evaluate(hypes, data, data_dir, model, elements=[0, 1], load_label_seg=load_label_seg, color_changes=color_changes, get_segmentation=get_segmentation)
def main(hypes_file, out_dir, override): """Orchestrate.""" with open(hypes_file, 'r') as f: hypes = json.load(f) model_file_path = '%s.yaml' % hypes['model']['name'] weights_file_path = '%s.hdf5' % hypes['model']['name'] if not os.path.isfile(model_file_path) or override: patch_size = hypes['arch']['patch_size'] img_channels = hypes['arch']['num_channels'] nb_out = hypes['arch']['stride']**len(hypes['classes']) model = Sequential() model.add( Convolution2D(64, 3, 3, border_mode='valid', init='glorot_normal', activation='sigmoid', input_shape=(img_channels, patch_size, patch_size))) model.add( Convolution2D(32, 3, 3, activation='relu', init='glorot_normal')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.5)) # model.add(Convolution2D(64, 3, 3, border_mode='same')) # model.add(Activation('relu')) # model.add(Convolution2D(64, 3, 3)) # model.add(Activation('relu')) # model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Dropout(0.25)) model.add(Flatten()) # model.add(Dense(64, activation='sigmoid')) # # model.add(Dropout(0.5)) # model.add(Dense(64, activation='relu')) # model.add(Dropout(0.5)) model.add(Dense(nb_out, activation='sigmoid', init='glorot_normal')) model.add(Reshape((hypes['arch']['stride'], hypes['arch']['stride']))) # sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) opt = keras.optimizers.Adadelta(lr=hypes['solver']['learning_rate'], rho=0.95, epsilon=1e-08) model.compile(loss=hypes['solver']['loss'], optimizer=opt) # hypes['solver']['optimizer'] logging.info("model compiled") # while 1: # b = generate_batch(hypes, 'train') # for e in range(10): # print 'Epoch', e # batches = 0 # for X_batch, Y_batch in generate_batch(hypes, 'train'): # Y_batch = np.reshape(Y_batch, (-1, 400)) # loss = model.fit(X_batch, # Y_batch, # batch_size=hypes['solver']['batch_size']) # print(loss) # batches += 1 # if e > 2: # # we need to break the loop by hand because # # the generator loops indefinitely # break # # Train g = generate_batch(hypes, 'train') logging.info("generate_batch") X_test, Y_test = g.next() # print("#" * 80) # print(X_test.shape) # print(Y_test.shape) logging.info("start fit_generator") model.fit_generator( generate_batch(hypes, 'train'), samples_per_epoch=hypes['solver']['samples_per_epoch'], nb_epoch=hypes['solver']['epochs'], verbose=1, validation_data=(X_test, Y_test)) x_files, y_files = get_file_list(hypes, 'train') x_files, y_files = sklearn.utils.shuffle(x_files, y_files, random_state=0) # ij = 0 # for epoch in range(1, hypes['solver']['epochs'] + 1): # print("#" * 80) # print("# Epoch %i" % epoch) # print("#" * 80) # x_files, y_files = sklearn.utils.shuffle(x_files, # y_files, # random_state=epoch) # for x_train_file, y_train_file in zip(x_files, y_files): # x_train, y_train = get_traindata_single_file(hypes, # x_train_file, # y_train_file) # # Reduce data # # x_train, y_train = reduce_data_equal(x_train, # # y_train) # t0 = time.time() # model.fit(x_train, y_train, # batch_size=128, # nb_epoch=1, # ) # ij += 1 # print("%i of %i" % # (ij, hypes['solver']['epochs'] * len(x_files))) # t1 = time.time() # print("Training Time: %0.4f" % (t1 - t0)) print("done with fit_generator") # save as YAML yaml_string = model.to_yaml() with open(model_file_path, 'w') as f: f.write(yaml_string) model.save_weights(weights_file_path) # Evaluate data = get_file_list(hypes, 'test') analyze.evaluate(hypes, data, out_dir, model, elements=[0, 1], get_segmentation=get_segmentation, verbose=True) else: with open(model_file_path) as f: yaml_string = f.read() model = model_from_yaml(yaml_string) model.load_weights(weights_file_path) model.compile(optimizer=hypes['solver']['optimizer'], loss='binary_crossentropy') data = get_file_list(hypes, 'test') analyze.evaluate(hypes, data, out_dir, model, elements=[0, 1], get_segmentation=get_segmentation, verbose=True)
def main(hypes_file, data_dir, override): """Orchestrate.""" with open(hypes_file, 'r') as f: hypes = json.load(f) if 'training' not in hypes: hypes['training'] = {} if 'make_equal' not in hypes['training']: hypes['training']['make_equal'] = False base = os.path.dirname(hypes_file) model_file_path = os.path.join(base, '%s.yaml' % hypes['model']['name']) model_file_path = os.path.abspath(model_file_path) weights_file_path = os.path.join(base, '%s.hdf5' % hypes['model']['name']) weights_file_path = os.path.abspath(weights_file_path) if not os.path.isfile(model_file_path) or override: if not os.path.isfile(model_file_path): logging.info("Did not find '%s'. Start training...", model_file_path) else: logging.info("Override '%s'. Start training...", model_file_path) # Get data # x_files, y_files = inputs(hypes, None, 'train', data_dir) x_files, y_files = get_file_list(hypes, 'train') x_files, y_files = sklearn.utils.shuffle(x_files, y_files, random_state=0) x_train, y_train = get_traindata_single_file(hypes, x_files[0], y_files[0]) nb_features = x_train[0].shape[0] logging.info("Input gets %i features", nb_features) # Make model model = Sequential() model.add(Dense(64, input_dim=nb_features, init='uniform', activation='sigmoid')) model.add(Dropout(0.5)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adagrad', # rmsprop metrics=['accuracy']) generator = generate_training_data(hypes, x_files, y_files) t0 = time.time() sep = hypes['solver']['samples_per_epoch'] if True: class_weight = get_class_weight(hypes) logging.info("class_weights = %s", class_weight) model.fit_generator(generator, samples_per_epoch=sep, nb_epoch=hypes['solver']['epochs'], verbose=1, validation_data=(x_train, y_train), class_weight=class_weight) else: logging.info("Fit with .fit") x_train, y_train = inputs(hypes, None, 'train', data_dir) model.fit(x_train, y_train, batch_size=128, nb_epoch=1) t1 = time.time() print("Training Time: %0.4f" % (t1 - t0)) # save as YAML yaml_string = model.to_yaml() with open(model_file_path, 'w') as f: f.write(yaml_string) model.save_weights(weights_file_path) # Evaluate data = get_file_list(hypes, 'test') logging.info("Start segmentation") analyze.evaluate(hypes, data, data_dir, model, elements=[0, 1], get_segmentation=get_segmentation) else: logging.info("## Found '%s'.", model_file_path) with open(model_file_path) as f: yaml_string = f.read() model = model_from_yaml(yaml_string) model.load_weights(weights_file_path) model.compile(optimizer='adagrad', loss='binary_crossentropy') data = get_file_list(hypes, 'test') analyze.evaluate(hypes, data, data_dir, model, elements=[0, 1], get_segmentation=get_segmentation)
def main(hypes_file, data_dir, override): """Orchestrate.""" with open(hypes_file, 'r') as f: hypes = json.load(f) model_file_path = os.path.abspath('%s.pkl' % hypes['model']['name']) color_changes = { 0: (0, 0, 0, 0), 1: (0, 255, 0, 127), 'default': (0, 0, 0, 0) } if not os.path.isfile(model_file_path) or override: if not os.path.isfile(model_file_path): logging.info("Did not find '%s'. Start training...", model_file_path) else: logging.info("Override '%s'. Start training...", model_file_path) # Get data # x_files, y_files = inputs(hypes, None, 'train', data_dir) x_files, y_files = get_file_list(hypes, 'train') x_files, y_files = sklearn.utils.shuffle(x_files, y_files, random_state=0) x_train, y_train = get_traindata_single_file(hypes, x_files[0], y_files[0]) nb_features = x_train[0].shape[0] logging.info("Input gets %i features", nb_features) # Make model from sklearn.svm import LinearSVC, SVC from sklearn.tree import DecisionTreeClassifier model = SVC( probability=False, # cache_size=200, kernel="linear", C=2.8, gamma=.0073) model = LinearSVC(C=2.8) model = DecisionTreeClassifier() print("Start fitting. This may take a while") generator = generate_training_data(hypes, x_files, y_files) t0 = time.time() if False: sep = hypes['solver']['samples_per_epoch'] model.fit_generator( generator, samples_per_epoch=sep, nb_epoch=hypes['solver']['epochs'], verbose=1, # callbacks=[callb], validation_data=(x_train, y_train)) else: logging.info("Fit with .fit") x_train, y_train = inputs(hypes, None, 'train', data_dir) print(len(y_train)) model.fit(x_train, y_train) t1 = time.time() print("Training Time: %0.4f" % (t1 - t0)) # save as YAML joblib.dump(model, model_file_path) # Evaluate data = get_file_list(hypes, 'test') logging.info("Start segmentation") analyze.evaluate(hypes, data, data_dir, model, elements=[0, 1], load_label_seg=load_label_seg, color_changes=color_changes, get_segmentation=get_segmentation) else: model = joblib.load(model_file_path) data = get_file_list(hypes, 'test') analyze.evaluate(hypes, data, data_dir, model, elements=[0, 1], load_label_seg=load_label_seg, color_changes=color_changes, get_segmentation=get_segmentation)
def main(data_dir): """Orchestrate.""" model_file_path = 'basic_local_classifier.yaml' weights_file_path = 'basic_local_classifier.hdf5' color_changes = {(255, 255, 255): (0, 255, 0), 'default': (0, 0, 0)} if not os.path.isfile(model_file_path): # Get data x_train, y_train = inputs({}, None, 'train', data_dir) x_train = x_train y_train = y_train x_train, y_train = sklearn.utils.shuffle(x_train, y_train, random_state=0) # Reduce data logging.info("Start reducing data...") n = sum(y_train) print("n=%i" % n) true_count, false_count = 0, 0 x_train_n, y_train_n = [], [] for x, y in zip(x_train, y_train): if y == 1 and true_count < n: x_train_n.append(x) y_train_n.append(y) true_count += 1 elif y == 0 and false_count < n: x_train_n.append(x) y_train_n.append(y) false_count += 1 else: break x_train = numpy.array(x_train_n) y_train = numpy.array(y_train_n) logging.info("Reduced data data...") # Make model model = Sequential() model.add(Dense(64, input_dim=5, init='uniform', activation='sigmoid')) model.add(Dropout(0.5)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) model.compile( loss='binary_crossentropy', optimizer='adagrad', # rmsprop metrics=['accuracy']) t0 = time.time() model.fit(x_train, y_train, batch_size=128, nb_epoch=1) t1 = time.time() print("Training Time: %0.4f" % (t1 - t0)) # save as YAML yaml_string = model.to_yaml() with open(model_file_path, 'w') as f: f.write(yaml_string) model.save_weights(weights_file_path) # Evaluate data = get_file_list('test', data_dir) logging.info("Start segmentation") analyze.evaluate(data, data_dir, model, elements=[0, 1], load_label_seg=load_label_seg, color_changes=color_changes, get_segmentation=get_segmentation) else: with open(model_file_path) as f: yaml_string = f.read() model = model_from_yaml(yaml_string) model.load_weights(weights_file_path) model.compile(optimizer='adagrad', loss='binary_crossentropy') data = get_file_list('test', data_dir) analyze.evaluate(data, data_dir, model, elements=[0, 1], load_label_seg=load_label_seg, color_changes=color_changes, get_segmentation=get_segmentation)
def main(hypes_file, out_dir, override): """Orchestrate.""" with open(hypes_file, 'r') as f: hypes = json.load(f) model_file_path = '%s.yaml' % hypes['model']['name'] weights_file_path = '%s.hdf5' % hypes['model']['name'] if not os.path.isfile(model_file_path) or override: patch_size = hypes['arch']['patch_size'] img_channels = hypes['arch']['num_channels'] nb_out = hypes['arch']['stride']**len(hypes['classes']) model = Sequential() model.add(Convolution2D(64, 3, 3, border_mode='valid', init='glorot_normal', activation='sigmoid', input_shape=(img_channels, patch_size, patch_size))) model.add(Convolution2D(32, 3, 3, activation='relu', init='glorot_normal')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.5)) # model.add(Convolution2D(64, 3, 3, border_mode='same')) # model.add(Activation('relu')) # model.add(Convolution2D(64, 3, 3)) # model.add(Activation('relu')) # model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Dropout(0.25)) model.add(Flatten()) # model.add(Dense(64, activation='sigmoid')) # # model.add(Dropout(0.5)) # model.add(Dense(64, activation='relu')) # model.add(Dropout(0.5)) model.add(Dense(nb_out, activation='sigmoid', init='glorot_normal')) model.add(Reshape((hypes['arch']['stride'], hypes['arch']['stride']))) # sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) opt = keras.optimizers.Adadelta(lr=hypes['solver']['learning_rate'], rho=0.95, epsilon=1e-08) model.compile(loss=hypes['solver']['loss'], optimizer=opt) # hypes['solver']['optimizer'] logging.info("model compiled") # while 1: # b = generate_batch(hypes, 'train') # for e in range(10): # print 'Epoch', e # batches = 0 # for X_batch, Y_batch in generate_batch(hypes, 'train'): # Y_batch = np.reshape(Y_batch, (-1, 400)) # loss = model.fit(X_batch, # Y_batch, # batch_size=hypes['solver']['batch_size']) # print(loss) # batches += 1 # if e > 2: # # we need to break the loop by hand because # # the generator loops indefinitely # break # # Train g = generate_batch(hypes, 'train') logging.info("generate_batch") X_test, Y_test = g.next() # print("#" * 80) # print(X_test.shape) # print(Y_test.shape) logging.info("start fit_generator") model.fit_generator(generate_batch(hypes, 'train'), samples_per_epoch=hypes['solver']['samples_per_epoch'], nb_epoch=hypes['solver']['epochs'], verbose=1, validation_data=(X_test, Y_test)) x_files, y_files = get_file_list(hypes, 'train') x_files, y_files = sklearn.utils.shuffle(x_files, y_files, random_state=0) # ij = 0 # for epoch in range(1, hypes['solver']['epochs'] + 1): # print("#" * 80) # print("# Epoch %i" % epoch) # print("#" * 80) # x_files, y_files = sklearn.utils.shuffle(x_files, # y_files, # random_state=epoch) # for x_train_file, y_train_file in zip(x_files, y_files): # x_train, y_train = get_traindata_single_file(hypes, # x_train_file, # y_train_file) # # Reduce data # # x_train, y_train = reduce_data_equal(x_train, # # y_train) # t0 = time.time() # model.fit(x_train, y_train, # batch_size=128, # nb_epoch=1, # ) # ij += 1 # print("%i of %i" % # (ij, hypes['solver']['epochs'] * len(x_files))) # t1 = time.time() # print("Training Time: %0.4f" % (t1 - t0)) print("done with fit_generator") # save as YAML yaml_string = model.to_yaml() with open(model_file_path, 'w') as f: f.write(yaml_string) model.save_weights(weights_file_path) # Evaluate data = get_file_list(hypes, 'test') analyze.evaluate(hypes, data, out_dir, model, elements=[0, 1], get_segmentation=get_segmentation, verbose=True) else: with open(model_file_path) as f: yaml_string = f.read() model = model_from_yaml(yaml_string) model.load_weights(weights_file_path) model.compile(optimizer=hypes['solver']['optimizer'], loss='binary_crossentropy') data = get_file_list(hypes, 'test') analyze.evaluate(hypes, data, out_dir, model, elements=[0, 1], get_segmentation=get_segmentation, verbose=True)