def train_model(x_data, y_data): print( "Training the SVM classifier ... this may take a while, go grab another coffee." ) with Timer(verbose=False) as t: model = SVC(kernel='linear') model.fit(x_data, y_data) print(f"Saving the model as {config.saved_model}") pickle.dump(model, open(config.saved_model, 'wb')) print("Training took {} mins".format(t.mins)) return model
def main(argv): print('Finding data ...'), with Timer(): image_filenames = glob(os.path.join(FLAGS.data_dir, 'disc*/OAS1_*_MR1/PROCESSED/MPRAGE/T88_111/OAS1_*_MR1_mpr_n4_anon_111_t88_gfc.hdr')) label_filenames = glob(os.path.join(FLAGS.data_dir, 'disc*/OAS1_*_MR1/FSL_SEG/OAS1_*_MR1_mpr_n4_anon_111_t88_masked_gfc_fseg.hdr')) assert(len(image_filenames) == len(label_filenames)) print('Found %i images.' % len(image_filenames)) print('Loading images ...'), with Timer(): images = [ReadImage(image_filename) for image_filename in image_filenames] labels = [ReadImage(label_filename) for label_filename in label_filenames] images_train, images_test, labels_train, labels_test = train_test_split(images, labels, train_size=0.66) tensor_board = TensorBoard(log_dir='./TensorBoard') early_stopping = EarlyStopping(monitor='acc', patience=2, verbose=1) model = create_inception_resnet_v2(nb_classes=nb_classes) model.compile(optimizer=RMSprop(lr=0.045, rho=0.94, epsilon=1., decay=0.9), loss='categorical_crossentropy', metrics=['acc']) model.fit_generator(generator(images_train, labels_train, input_shape, nb_classes, FLAGS.patch_size, FLAGS.batch_size), samples_per_epoch=FLAGS.samples_per_epoch, nb_epoch=FLAGS.nb_epochs, callbacks=[tensor_board, early_stopping], verbose=1)
def get_features(train=True): if train: # Load cifar10 train data and labels print("Reading training data...") x_data, y_data = cifar10(config.path_to_cifar, "train") else: print("Reading testing data...") x_data, y_data = cifar10(config.path_to_cifar, "test") N_data = len(x_data) assert x_data.shape[0] == len( y_data) and "Both data and labels should be the same set size" print("Num of training samples: ", N_data) x_data = np.array([cv2.cvtColor(x, cv2.COLOR_RGB2GRAY) for x in x_data], dtype=np.float32) x_data /= 255. # Extract features print("Extracting HOG Features, go grab a coffee...", end=" ") with Timer(verbose=False) as t: x_data = hog(x_data) print("HoG extraction for train set took {} mins".format(t.mins)) if config.min_max_norm: #Normalize the HOG feature vectors by rescaling the value range to [-1, 1] print( "Normalize the HOG features by rescaling the value range to [-1, 1]" ) scaler = MinMaxScaler(feature_range=(-1, 1)).fit(x_data) x_data = scaler.fit_transform(x_data) elif config.unit_vector_norm: #Normalize the HOG feature vectors by converting them to unit vectors (vector has length of 1) print("Normalize the HOG features by converting them to unit vectors") x_data = x_data / np.linalg.norm(x_data) if train: #Randomly shuffle the data if training. rand_idx = np.arange(N_data) np.random.shuffle(rand_idx) x_data = x_data[rand_idx] y_data = y_data[rand_idx] return x_data, y_data
jobs = [] for size in [500, 5000, 10000]: for mean1 in [(-1, 0), (1, 1)]: for mean2 in [(0, -1), (1, -1)]: jobs.append(generate_data(size, mean1, mean2)) def worker(model, args): X, Y = args model.compute_coefficients(X, Y) # Profile the python implementation model = LogisticRegressionPy() timer = Timer() for i, job in enumerate(jobs): worker(model, job) py_time = timer.split() # Profile the C++ implementation model = LogisticRegressionCpp() timer = Timer() for job in jobs: worker(model, job) cpp_time = timer.split() # Build a nice graph labels = ["Python", "C++"] times = [py_time, cpp_time] index = range(len(labels))
for mean2 in [(0, -1), (1, -1)]: jobs.append(generate_data(size, mean1, mean2)) # Profile with differing numbers of python threads with a model that is # single-threaded. model = LogisticRegressionCpp() def worker(args): X, Y = args model.compute_coefficients(X, Y) times = [] for num_threads in range(1, 9): pool = ThreadPool(num_threads) timer = Timer() pool.map(worker, jobs) times.append((num_threads, timer.split())) # Build a nice graph labels = [n for n, _ in times] times = [t for _, t in times] index = range(len(labels)) plt.bar(index, times) plt.xlabel('Number of threads') plt.ylabel('Runtime') plt.xticks(index, labels) plt.title("C++ speed with varying numbers of python(!) threads") plt.savefig("profile_pythreads.png")
def profiler(payload_size, num_calls): payload = [float(i) for i in range(payload_size)] timer = Timer() for _ in range(num_calls): noop(payload) return timer.split()
from multiprocessing.pool import ThreadPool from data import Timer from module import noop payload = [float(i) for i in range(1000000)] def worker(arg): noop(payload) jobs = range(200) print("Threads", "Runtime", sep="\t") for num_threads in range(1, 9): pool = ThreadPool(num_threads) timer = Timer() pool.map(worker, jobs) elapsed_time = timer.split() print(num_threads, elapsed_time, sep="\t")