def get_data(frame_range=[3], num_train=2168, num_validation=400, num_test=200, feature_list=['LEO', 'area', 'angle'], reshape_frames=False, add_flip=True, crop_at_constr=False, blur_im=False): """ Get data. If feature list is None, raw image data will be returned (i.e. pixels values) as vectors of reshaped data. Set reshape_frames option to True to produces matrix with each row being a frame's pixel values. add_flip: True to double the data set by providing a refelcted frame too that is effectively the same (passed to pull_frame_range method) crop_at_constr: True to crop all frames at the constriction (passed to pull_frame_range) NOTE: this is not set up to pull frames from a range for raw image processing; this will extract one frame (with its flipped version if specified) across all videos """ import extract_features # load data num_break = int((num_train + num_validation + num_test) / 2) num_nobreak = int((num_train + num_validation + num_test) / 2) try: # exception will be thrown if you call on more break/nobreak samples # than is available my_frames = pull_frame_range(frame_range=frame_range, num_break=num_break, num_nobreak=num_nobreak, add_flip=add_flip, crop_at_constr=crop_at_constr, blur_im=blur_im) except: my_frames = pull_frame_range(frame_range=frame_range, num_break=None, num_nobreak=None, add_flip=add_flip, crop_at_constr=crop_at_constr, blur_im=blur_im) # if add_flip is true you expect at twice as many frames # frames_per_sample used for correct indexing of out arrays if add_flip is True: frames_per_sample = 2 * len(frame_range) else: frames_per_sample = 1 * len(frame_range) # construct X matrix and y vector try: n_features = len(feature_list) X_data = np.zeros((len(my_frames) * frames_per_sample, n_features)) y_data = np.zeros((len(my_frames) * frames_per_sample, 1)) except: dum_key = list(my_frames.keys()) dum_key = dum_key[0] dummy = my_frames[dum_key][0] y_data = np.zeros((len(my_frames) * frames_per_sample, 1)) if reshape_frames is True: X_data = np.zeros((len(my_frames) * frames_per_sample, dummy.shape[0] * dummy.shape[1])) elif reshape_frames is False: # one color channel for grayscale X_data = np.zeros((len(my_frames) * frames_per_sample, 1, dummy.shape[0], dummy.shape[1])) # step over axis of X_data by the number of frames per sample for i, key in zip( range(0, frames_per_sample * len(my_frames) + 1, frames_per_sample), my_frames.keys()): if i % 100 == 0: print('sampling dataset', i) # loop over each frame for f, frame in enumerate(my_frames[key]): # note this will end up just taking features from the last frame # in the range! if feature_list != None: LEO = key.split('LEO_')[-1] centroids, _ = extract_features.\ get_n_leading_droplets_centroids(frame, n=3) area = extract_features.polygon_area(centroids=centroids) leading_angle = \ extract_features.leading_angle(centroids=centroids) X_data[i + f, 0] = LEO X_data[i + f, 1] = area X_data[i + f, 2] = leading_angle else: if reshape_frames is True: X_data[i + f, :] = np.reshape(frame, -1) elif reshape_frames is False: X_data[i + f, 0, :, :] = frame # classify a break as 0 and nobreak as 1 my_class = key.split('_')[0] if 'nobreak' in my_class: y_data[i + f] = int(1) else: y_data[i + f] = int(0) # make masks for partitioning data sets mask_train = list(range(0, num_train)) mask_val = list(range(num_train, num_train + num_validation)) mask_test = list( range(num_train + num_validation, num_train + num_validation + num_test)) m = len(y_data) rand_i = [i for i in range(m)] rand_i = np.random.permutation(np.array(rand_i)) # partition based on type of output X if reshape_frames is True: X_data = X_data[rand_i, :] # train set X_train = X_data[mask_train] # validation set X_val = X_data[mask_val] # test set X_test = X_data[mask_test] # reshape data to rows X_train = X_train.reshape(num_train, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) elif reshape_frames is False: X_data = X_data[rand_i, :, :, :] # train set X_train = X_data[mask_train, :, :, :] # validation set X_val = X_data[mask_val, :, :, :] # test set X_test = X_data[mask_test, :, :, :] # and the targets vector y y_data = y_data[rand_i, :] # y_data = y_data = y_data[rand_i,:] y_train = y_data[mask_train] y_val = y_data[mask_val] y_test = y_data[mask_test] return X_train, y_train, X_val, y_val, X_test, y_test
def get_data(num_train=1000, num_validation=200, num_test=100, feature_list=['LEO', 'area', 'angle']): """ Get data. If feature list is None, raw image data will be returned (i.e. pixels values) as vectors of reshaped data. """ import extract_features # load data my_frames = pull_frame_range(frame_range=[3]) # construct X matrix and y vector try: n_features = len(feature_list) X_data = np.zeros((len(my_frames), n_features)) y_data = np.zeros((len(my_frames), 1)) except: dum_key = list(my_frames.keys()) dum_key = dum_key[0] dummy = my_frames[dum_key][0] X_data = np.zeros((len(my_frames), dummy.shape[0] * dummy.shape[1])) y_data = np.zeros((len(my_frames), 1)) for i, key in enumerate(my_frames): frame = my_frames[key][0] if feature_list != None: LEO = key.split('LEO_')[-1] centroids, _ = extract_features.\ get_n_leading_droplets_centroids(frame, n=3) area = extract_features.polygon_area(centroids=centroids) leading_angle = extract_features.leading_angle(centroids=centroids) X_data[i, 0] = LEO X_data[i, 1] = area X_data[i, 2] = leading_angle else: X_data[i, :] = np.reshape(frame, -1) # classify a break as 0 and nobreak as 1 my_class = key.split('_')[0] if 'nobreak' in my_class: y_data[i] = 1 else: y_data[i] = 0 mask_train = list(range(0, num_train)) mask_val = list(range(num_train, num_train + num_validation)) mask_test = list( range(num_train + num_validation, num_train + num_validation + num_test)) m = len(y_data) rand_i = [i for i in range(m)] rand_i = np.random.permutation(np.array(rand_i)) X_data = X_data[rand_i, :] y_data = y_data[rand_i, :] # train set X_train = X_data[mask_train] y_train = y_data[mask_train] # validation set X_val = X_data[mask_val] y_val = y_data[mask_val] # test set X_test = X_data[mask_test] y_test = y_data[mask_test] # normalize the data: subtract the mean image mean_feats = np.mean(X_train, axis=0) X_train -= mean_feats X_val -= mean_feats X_test -= mean_feats # reshape data to rows X_train = X_train.reshape(num_train, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) return X_train, y_train, X_val, y_val, X_test, y_test
# number of clusters k = 5 n_features = 3 # load data my_frames = data_utils.pull_frame_range(frame_range=[3]) # construct X matrix and y vector X = np.zeros((len(my_frames), n_features)) y = np.zeros((len(my_frames), 1)) for i, key in enumerate(my_frames): frame = my_frames[key][0] LEO = key.split('LEO_')[-1] centroids = extract_features.get_n_leading_droplets_centroids(frame, n=3) area = extract_features.polygon_area(centroids=centroids) leading_angle = extract_features.leading_angle(centroids=centroids) X[i, 0] = LEO X[i, 1] = area X[i, 2] = leading_angle # classify a break as 0 and nobreak as 1 my_class = key.split('_')[0] if 'nobreak' in my_class: y[i] = 1 else: y[i] = 0 ## shuffle X and y in the same way
for i, frame in enumerate(frames[frame_key]): # add contours to show_frame show_frame = data_utils.show_my_countours(frame,contours=-1, resize_frame=1,show=False) # add centroids to show_frame centroids, _ = extract_features.get_droplets_centroids(frame) for c in centroids: cX = centroids[c][0] cY = centroids[c][1] cv2.circle(show_frame, (cX,cY), 1, (0,0,255), 7) cv2.putText(show_frame, str(c), (cX + 4, cY - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) # add polygon with n vertices to show_frame leading_centroids, _ = \ extract_features.get_n_leading_droplets_centroids(frame,n) print('area: ', extract_features.polygon_area(leading_centroids), '\t angle: ', extract_features.leading_angle(leading_centroids)*180/np.pi, '\t frame key: ', frame_key) leading_centroids = [(coord) for coord in leading_centroids.values()] leading_centroids.append(leading_centroids[0]) leading_centroids = np.int32(np.array(leading_centroids)) cv2.polylines(show_frame, [leading_centroids], True, (255,60,255)) # add constriction location to show_frame constric_loc = data_utils.find_constriction(frame) y1 = int(frame.shape[0]/3) y2 = int(frame.shape[0]/3*2) cv2.line(show_frame, (constric_loc, y1), (constric_loc, y2), (0,150,255), 2) frame_str = frame_key.split('_')[0] frame_str = frame_key + ', frame ' + str(i)