def save_images(image_links, query): count = 0 l = len(image_links) ppb(0, l, prefix='Progress:', suffix='Complete', length=50) for i, (url, type) in enumerate(image_links.items()): if not type: type = 'jpg' try: filename = f"{query.strip()}_{count}.{type}" urllib.request.urlretrieve(url, f"images/{query.replace(' ', '_')}/{filename}") except: pass ppb(i + 1, l, prefix='Progress:', suffix='Complete', length=50) count+=1
def reset_images(image_dest, image_base): # delete all images in the image_dest folder ------------------------------------------------------------------------------- filenames = [f for f in listdir(image_dest) if isfile(join(image_dest, f))] if (len(filenames) != 0): ppb(0, len(filenames), prefix='Deleting current dataset: ', suffix='Complete') # print progress for i in range(len(filenames)): os.remove(join(image_dest, filenames[i])) ppb(i + 1, len(filenames), prefix='Deleting current dataset: ', suffix='Complete') # print progress # copy all images from image_base to image_dest ---------------------------------------------------------------------------- filenames_bu = [ f for f in listdir(image_base) if isfile(join(image_base, f)) ] ppb(0, len(filenames_bu), prefix='Copying original images: ', suffix='Complete') # print progress for i in range(len(filenames_bu)): copyfile(join(image_base, filenames_bu[i]), join(image_dest, filenames_bu[i])) ppb(i + 1, len(filenames_bu), prefix='Copying original images: ', suffix='Complete') # print progress
def train_and_predict_with_best_CNN(img_folder, base_folder, image_width, x_new, y_raw, test_pct, n_cats, n_iterations): filepaths = [img_folder+'/'+f for f in listdir(img_folder) if isfile(join(img_folder, f))] # get prediction image paths # use functions from dataset.py to preprocess new prediction images -------------------------------------------------------- d.reset_images(img_folder, base_folder) # delete preprocessed images & recopy d.resize_images(img_folder, image_width) # resize images for prediction x_tuple = [] # for fp in filepaths: # get pixel data from every image x_tuple.append(list(Image.open(fp, 'r').getdata())) # image_data = d.get_lists_from_tuples(x_tuple, image_width) # format pixel data properly for prediction image_data = np.array(image_data).astype('float32') / 255 # turn image_data into array and normalize # awdawwdaa wd x_train, y_train, x_test, y_test = split_format_CNN(x_new, y_raw, test_pct, n_cats) # format training data for CNN cnn_params, num_params = get_CNN_params([2]) cnn_params = cnn_params[2][0] n = 0 # count required to print progress bar= ppb(0, n_iterations, prefix = 'Training and testing CNNs: ', suffix = 'Complete', length = 100) # print initial progress bar best_acc = 0 results = {} iteration_accs = [] # stores accuracy results for each iteration iteration_times = [] # stores total execution times for each iteration # run all classifiers for given number of iterations ----------------------------------------------------------- for j in range(n_iterations): model, time = train_CNN(x_train, y_train, cnn_params, 2, 5) # store model and time elapsed acc = model.evaluate(x_test, y_test)[1] # evaluate using test data iteration_accs.append(acc) # store accuracy of current iteration in results list iteration_times.append(time) # store execution time of current iteration in times list if (acc > best_acc): best_cnn = model best_acc = acc n+=1 # progress bar stuff ppb(n, n_iterations, prefix = 'Training and testing CNNs: ', suffix = 'Complete', length = 100) results['avg_accuracy'] = np.mean(iteration_accs) # add accuracy key/value pair to dictionary results['avg_time'] = np.mean(iteration_times) # add tune key/value pair to dictionary predictions = np.argmax(best_cnn.predict(image_data), axis=-1) # predict the image's class return filepaths, results, predictions
def run_CNN_suite(x_raw, y_raw, test_pct, n_cats, iterations, phases, n_phases): all_params_and_results = [] x_train, y_train, x_test, y_test = split_format_CNN(x_raw, y_raw, test_pct, n_cats) # format training data for CNN cnn_params, num_params = get_CNN_params(phases) n = 0 # count required to print progress bar l = len(phases) * num_params * iterations # number of times an CNN is trained in total ppb(0, l, prefix = 'Training and testing CNNs: ', suffix = 'Complete', length = 100) # print initial progress bar # loop through every phase from 0-4 ---------------------------------------------------------------------------------------- # all_params_and_results appends a dict of parameters and results if phase is in given list, otherwise appends none for phase in range(n_phases): if phase in phases: # if the current phase (0-4) is one of the phases provided in main, train MLP accordingly ---------- phase_params_and_results = [] # will store the parameters and results dictionary associated with current phase # for every parameter value, run all 34 1-vs-all classifiers "iterations" times and average results ---------------- for i in range(len(cnn_params[phase])): iteration_accs = [] # stores accuracy results for each iteration iteration_times = [] # stores total execution times for each iteration # run all classifiers for given number of iterations ----------------------------------------------------------- for j in range(iterations): model, time = train_CNN(x_train, y_train, cnn_params[phase][i], phase) # store model and time elapsed acc = model.evaluate(x_test, y_test)[1] # evaluate using test data iteration_accs.append(acc) # store accuracy of current iteration in results list iteration_times.append(time) # store execution time of current iteration in times list n+=1 # progress bar stuff ppb(n, l, prefix = 'Training and testing CNNs: ', suffix = 'Complete', length = 100) cnn_params[phase][i]['avg_accuracy'] = np.mean(iteration_accs) # add accuracy key/value pair to dictionary cnn_params[phase][i]['avg_time'] = np.mean(iteration_times) # add tune key/value pair to dictionary phase_params_and_results.append(cnn_params[phase][i]) # update results of current phase all_params_and_results.append(phase_params_and_results) # add all phase results to list of all results else: all_params_and_results.append(None) return all_params_and_results
def resize_images(image_dest, nw): filenames = [f for f in listdir(image_dest) if isfile(join(image_dest, f))] ppb(0, len(filenames), prefix='Resizing multiplied images:', suffix='Complete') # print progress for i in range(len(filenames)): im = Image.open(join(image_dest, filenames[i])) # open the image # round(im.size[1]/round(im.size[0]/nw)) resized_im = im.resize( (nw, nw)) # resize every image to "nw" pixels wide & tall resized_im.save(join(image_dest, filenames[i])) # save the cropped image ppb(i + 1, len(filenames), prefix='Resizing multiplied images:', suffix='Complete') # print progress
def multiply_images(img_folder): p = 1 ppb(0, l, prefix='Creating filtered images: ', suffix='Complete', length=100) # print initial progress bar # For each of the source images (now 1 per COA), create 3 copies with different levels of contrast ------------------------- filenames = [f for f in listdir(img_folder) if isfile(join(img_folder, f))] for fn in filenames: create_new_contrast(img_folder + '/' + fn, 0.2, "-ct", p) # filename-ct.png: ct = contrast tiny p += 1 create_new_contrast(img_folder + '/' + fn, 0.4, "-cl", p) # filename-cl.png: cl = contrast low p += 1 create_new_contrast(img_folder + '/' + fn, 1.5, "-ch", p) # filename-ch.png: ch = contrast high p += 1 # For each of the source images (now 4 per COA), create a copy that is blurry ---------------------------------------------- filenames = [ f for f in listdir(img_folder) if isfile(join(img_folder, f)) ] # update filenames list for fn in filenames: create_new_blur(img_folder + '/' + fn, '-b', p) # filename-cl-b.png: b = blurred p += 1 # For each of the source images (now 8 per COA), create 3 copies with different levels of exposure ------------------------ filenames = [ f for f in listdir(img_folder) if isfile(join(img_folder, f)) ] # update filenames list for fn in filenames: create_new_exposure(img_folder + '/' + fn, 0.2, '-et', p) # filename-cl-bb-et.png: et = exposure tiny p += 1 create_new_exposure(img_folder + '/' + fn, 0.5, '-el', p) # filename-cl-bb-el.png: el = exposure low p += 1 create_new_exposure(img_folder + '/' + fn, 1.5, '-eh', p) # filename-cl-bb-eh.png: eh = exposure high p += 1
def run_CNN_suite_layers(x_raw, y_raw, test_pct, n_cats, iterations, nns): all_params_and_results = [] x_train, y_train, x_test, y_test = split_format_CNN(x_raw, y_raw, test_pct, n_cats) # format training data for CNN cnn_params, num_params = get_CNN_params([2]) cnn_params = cnn_params[2][0] n = 0 # count required to print progress bar l = len(nns) * iterations # number of times an CNN is trained in total ppb(0, l, prefix = 'Training and testing CNNs: ', suffix = 'Complete', length = 100) # print initial progress bar nn_results = [] for nn in nns: nn_results.append(cnn_params.copy()) # loop through every phase from 0-4 ---------------------------------------------------------------------------------------- # all_params_and_results appends a dict of parameters and results if phase is in given list, otherwise appends none for nn in nns: iteration_accs = [] # stores accuracy results for each iteration iteration_times = [] # stores total execution times for each iteration # run all classifiers for given number of iterations ----------------------------------------------------------- for j in range(iterations): model, time = train_CNN(x_train, y_train, cnn_params, 2, nn) # store model and time elapsed acc = model.evaluate(x_test, y_test)[1] # evaluate using test data iteration_accs.append(acc) # store accuracy of current iteration in results list iteration_times.append(time) # store execution time of current iteration in times list n+=1 # progress bar stuff ppb(n, l, prefix = 'Training and testing CNNs: ', suffix = 'Complete', length = 100) nn_results[nn]['avg_accuracy'] = np.mean(iteration_accs) # add accuracy key/value pair to dictionary nn_results[nn]['avg_time'] = np.mean(iteration_times) # add tune key/value pair to dictionary all_params_and_results.append(nn_results[nn]) # add all phase results to list of all results return all_params_and_results
def split_filename(filename, p): ppb(p, l, prefix='Creating filtered images: ', suffix='Complete') # update progress bar name, ext = os.path.splitext(filename) # split filename return name, ext # return both
def train_and_predict_with_best_MLP(img_folder, base_folder, image_width, x_new, y_raw, test_pct, n_cats, n_iterations): filepaths = [ img_folder + '/' + f for f in listdir(img_folder) if isfile(join(img_folder, f)) ] # use functions from dataset.py to preprocess new prediction images -------------------------------------------------------- d.reset_images(img_folder, base_folder) # delete preprocessed images & recopy d.resize_images(img_folder, image_width) # resize images for prediction x_tuple = [] # for fp in filepaths: # get pixel data from every image x_tuple.append(list(Image.open(fp, 'r').getdata())) # image_data = d.get_lists_from_tuples( x_tuple, image_width) # format pixel data properly for prediction image_data = np.array(image_data).astype( 'float32') / 255 # turn image_data into array and normalize # flatten image data for use with MLP prediction flat_image_data = [] for i in range(len(image_data)): flat_image = [] for j in range(len(image_data[i])): for k in range(len(image_data[i][j])): for l in range(len(image_data[i][j][k])): flat_image.append(image_data[i][j][k][l]) flat_image_data.append(flat_image) flat_image_data = np.array(flat_image_data) # awdawwdaa wd x_formatted, y_all_34 = format_MLP( x_new, y_raw, n_cats) # format data to use 34 classifiers mlp_params, num_params = get_MLP_params([2]) mlp_params = mlp_params[4][0] n = 0 # count required to print progress bar= l = n_iterations * n_cats ppb(0, l, prefix='Training and testing MLPs: ', suffix='Complete', length=100) # print initial progress bar best_acc = 0 results = {} iteration_accs = [] # stores accuracy results for each iteration iteration_times = [] # stores total execution times for each iteration # run all classifiers for given number of iterations ----------------------------------------------------------- for j in range(n_iterations): it_acc = 0 # used to find mean accuracy of current iteration it_time = 0 # used to sum total execution time of current iteration models_34 = [] # run all 34 1-vs-all classifiers ------------------------------------------------------------------------- for y_train in y_all_34: x_train, y_train, x_test, y_test = split_MLP( x_formatted, y_train, test_pct) # split data for MLP model, time = train_MLP( x_train, y_train, mlp_params) # train model & store training time acc = model.score(x_test, y_test) # store accuracy of current model models_34.append(model) it_acc += acc / n_cats # update accuracy of current iteration with weight 1/n_categories to find mean it_time += time # update execution time of current iteration n += 1 # progress bar stuff ppb(n, l, prefix='Training and testing MLPs: ', suffix='Complete', length=100) iteration_accs.append( it_acc) # store accuracy of current iteration in results list iteration_times.append( it_time) # store execution time of current iteration in times list if (acc > best_acc): best_mlp = models_34 best_acc = acc results['avg_accuracy'] = np.mean( iteration_accs) # add accuracy key/value pair to dictionary results['avg_time'] = np.mean( iteration_times) # add tune key/value pair to dictionary predictions = [] for model in models_34: predictions.append(model.predict(flat_image_data)) return filepaths, results, predictions
def run_MLP_suite(x_raw, y_raw, test_pct, n_cats, iterations, phases, n_phases): all_params_and_results = [] x_formatted, y_all_34 = format_MLP( x_raw, y_raw, n_cats) # format data to use 34 classifiers mlp_params, num_params = get_MLP_params(phases) l = len( phases ) * num_params * n_cats * iterations # number of times an MLP is trained in total ppb(0, l, prefix='Training and testing MLPs: ', suffix='Complete', length=100) # print initial progress bar n = 0 # count required to print progress bar # loop through every phase from 0-4 ---------------------------------------------------------------------------------------- # all_params_and_results appends a dict of parameters and results if phase is in given list, otherwise appends none for phase in range(n_phases): if phase in phases: # if the current phase (0-4) is one of the phases provided in main, train MLP accordingly ---------- phase_params_and_results = [ ] # will store the parameters and results dictionary associated with current phase # for every parameter value, run all 34 1-vs-all classifiers "iterations" times and average results ---------------- for i in range(len(mlp_params[phase])): iteration_accs = [ ] # stores accuracy results for each iteration iteration_times = [ ] # stores total execution times for each iteration # run all classifiers for given number of iterations ----------------------------------------------------------- for j in range(iterations): it_acc = 0 # used to find mean accuracy of current iteration it_time = 0 # used to sum total execution time of current iteration # run all 34 1-vs-all classifiers ------------------------------------------------------------------------- for y_train in y_all_34: x_train, y_train, x_test, y_test = split_MLP( x_formatted, y_train, test_pct) # split data for MLP model, time = train_MLP( x_train, y_train, mlp_params[phase] [i]) # train model & store training time acc = model.score( x_test, y_test) # store accuracy of current model it_acc += acc / n_cats # update accuracy of current iteration with weight 1/n_categories to find mean it_time += time # update execution time of current iteration n += 1 # progress bar stuff ppb(n, l, prefix='Training and testing MLPs: ', suffix='Complete', length=100) iteration_accs.append( it_acc ) # store accuracy of current iteration in results list iteration_times.append( it_time ) # store execution time of current iteration in times list mlp_params[phase][i]['avg_accuracy'] = np.mean( iteration_accs ) # add accuracy key/value pair to dictionary mlp_params[phase][i]['avg_time'] = np.mean( iteration_times) # add tune key/value pair to dictionary phase_params_and_results.append( mlp_params[phase][i]) # update results of current phase all_params_and_results.append( phase_params_and_results ) # add all phase results to list of all results else: all_params_and_results.append(None) return all_params_and_results