def data_augmentation_create_X_y(data): digit = 1 decoded_images = [] while digit < 10: #images = db.select_by_actual_digit('split_grey', str(digit)) images = [d for d in data if d[6] == str(digit)] for image in images: # Decode the images im = decode_image(image[2]) im = np.expand_dims(im, axis=-1) # Used for greyscale and black and white, if you want to do this for color images, this line is not needed # Get the original variables that are needed to upload the augmented images back into the training set orig_name = image[1] row = image[4] pos = image[5] actual_digits = image[6] number_of_digits = image[7] source = image[8] # Make sure only valid, greyscale images, are kept. If working with color images, this check is not needed if len(im.shape) < 4: decoded_images.append( (im, orig_name, row, pos, actual_digits, number_of_digits, source) ) digit += 1 return decoded_images
def decode_and_convert(names_and_images, batch_number): print('Starting conversion and logging of batch number {}'.format( batch_number)) # Need to convert the images into the correct format # TODO: This functionality could probably be added to the utklipp function width = 200 height = 115 dim = (width, height) names = names_and_images[0] images = names_and_images[1] #For testing index = 0 total = len(images) decoded_images = [] # Decode the images for img in images: decoded = decode_image(img) decoded = cv2.resize(decoded, dim, interpolation=cv2.INTER_AREA) decoded = cv2.cvtColor(decoded, cv2.COLOR_BGR2GRAY) decoded_images.append(decoded) print('Complated number {} out of {}.'.format(index, total)) index += 1 decoded_images = np.array(decoded_images) np.save('C:\Production Images\\batch_{}_names'.format(batch_number), names) np.save('C:\Production Images\\batch_{}_images'.format(batch_number), decoded_images)
def split_3digit_into_1digit_training(output_db): splitting_error = 0 images_completed = 0 conn = sqlite3.connect('\\\\129.242.140.132\\remote\\UtklippsDatabaser\\full_3digit_trainingset.db') query = 'SELECT * FROM cells' df = pd.read_sql_query(query, conn) df = df[['name', 'original', 'row', 'code', 'source']] total_images = len(df) # Iterate over each row in the dataframe, to get needed information from the original 3-digit images that will be split for index, row in df.iterrows(): name = row['name'] image= row['original'] image_row = row['row'] code= list(row['code']) # To get easy access to each individual digit source = row['source'] # Convert the image into a numpy array instead of a bytes-object image = decode.decode_image(image) # Get the split versions of the cell image, and all the different conversions split_result = splitter.split_and_convert(image) # If a split image exists in the 'split_orig' table, then it will also exist in the other cell tables if db_output.test_exists_any_source(name, 'split_orig'): images_completed += 1 perc_done = ((images_completed + splitting_error) / total_images) * 100 print('Skipping image {} that already exists in the database, - {}% done'.format(name, perc_done)) continue # Check if an error occured during the splitting of the image if split_result is None: splitting_error += 1 with open('splitting errors.txt', 'a') as file: file.write('Error number: {} - Cell image: {} - Original image: {}\n\n'.format(str(splitting_error), name, source)) perc_done = ((images_completed + splitting_error) / total_images) * 100 print('An error occured with splitting the cell image: {} - From the original image: {}, - {}% done'.format(name, source, perc_done)) continue i = 0 while i < 3: split_name = code[i] + '-' + str(i) + '-' + name split_imgs = [split_result[x][i] for x in range(3)] # Else, upload the split images output_db.store_single_splits_training(split_name, split_imgs, image_row, str(i), code[i], len(code), name) i += 1 perc_done = ((images_completed + splitting_error) / total_images) * 100 print('Completed image {}, - {}% done'.format(name, perc_done)) images_completed += 1 conn.close() return df
def decode(names_and_images, batch_number, oneDigit = False, training = False): print('Starting conversion of batch number {}'.format(batch_number)) # Need to convert the images into the correct format # TODO: This functionality could probably be added to the utklipp function # If we want to use 1 digit images, use this. If not, there is no reason to differentiate the sizes # ============================================================================= # if training == True: # width = 200 # height = 115 # else: # width = 100 # height = 100 # ============================================================================= width = 200 height = 115 dim = (width, height) names = names_and_images[0] images = names_and_images[1] index = 0 total = len(images) decoded_images = [] decoded_names = [] splittable_images = [] non_splittable_images = [] non_splittable_names = [] # Decode the images for img in images: decoded = decode_image(img) if oneDigit == True and training == False: decoded_split = splitter.split_and_convert(decoded, onlyGrey = True) if decoded_split is not None: split_image = (decoded_split[0], decoded_split[1], decoded_split[2]) splittable_images.append(split_image) else: non_splittable_images.append(decoded) non_splittable_names.append(names[index]) if training == False or len(decoded.shape) > 2: decoded = cv2.resize(decoded, dim, interpolation = cv2.INTER_AREA) #decoded = cv2.cvtColor(decoded, cv2.COLOR_BGR2GRAY) decoded = convert_img_bw(decoded) decoded_images.append(decoded) decoded_names.append(names[index]) print('Complated decoding and conversion of image number {} out of {}.'.format(index, total)) index += 1 if oneDigit == False or training == True: return (decoded_names, decoded_images) else: return (decoded_names, decoded_images, splittable_images, non_splittable_names, non_splittable_images)
def create_X_y(data, db, table, color): X = [] y = [] for digit in data: image = decode_image(digit[0]) # If any original color images managed to sneak into the database if len(image.shape) > 2 and color != 'orig' and color != 'original': name = digit[1] db.remove_by_name(table, name) continue # Should have done the transformation into bitwise_not when uploading the images to the Black & White database if color == 'bw': image = cv2.bitwise_not(image) #TODO: Move this functionality. 3-digit images did not get standardized on upload, do that manually here for now. if table == 'cells': width = 200 height = 115 dim = (width, height) image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA) X.append(image) label = digit[1] y.append(label) # ============================================================================= # # Remove invalid labels and images # zero_indexes = [i for i in y if i == '0'] # del y[: len(zero_indexes)] # del X[: len(zero_indexes)] # ============================================================================= X = np.array(X) y = np.array(y) # If we are working with 3-digit codes, we need to remap each code to a number between 0-<max number of unique codes> to fit in our model's softmax output layer if table == 'cells': unique_labels = np.unique(y) temp = {y:x for x, y in enumerate(unique_labels)} y = [temp.get(elem) for elem in y] y = np.array(y) np.save('C:\\Models\\Ground_truth_arrays\\3_digit_{}_ground_truth_mapping'.format(color), unique_labels) # Reshape X for later use in Keras, normal shape is (XXXX, 100, 100) we want it to be (XXXX, 100, 100, 1) for B&W and Greyscale, for original images no expansion is needed if color != 'original': if color != 'orig': X = np.expand_dims(X, axis=-1) return X, y