Пример #1
0
def get_data(frame_range=[3],
             num_train=2168,
             num_validation=400,
             num_test=200,
             feature_list=['LEO', 'area', 'angle'],
             reshape_frames=False,
             add_flip=True,
             crop_at_constr=False,
             blur_im=False):
    """
    Get data. If feature list is None, raw image data will be returned
    (i.e. pixels values) as vectors of reshaped data. Set reshape_frames 
    option to True to produces matrix with each row being a frame's pixel
    values.
    
    add_flip: True to double the data set by providing a refelcted frame
        too that is effectively the same (passed to pull_frame_range method)
    
    crop_at_constr: True to crop all frames at the constriction (passed to 
        pull_frame_range)
    
    NOTE: this is not set up to pull frames from a range for raw image 
        processing; this will extract one frame (with its flipped version if
        specified) across all videos
    """

    import extract_features

    # load data
    num_break = int((num_train + num_validation + num_test) / 2)
    num_nobreak = int((num_train + num_validation + num_test) / 2)
    try:
        # exception will be thrown if you call on more break/nobreak samples
        # than is available
        my_frames = pull_frame_range(frame_range=frame_range,
                                     num_break=num_break,
                                     num_nobreak=num_nobreak,
                                     add_flip=add_flip,
                                     crop_at_constr=crop_at_constr,
                                     blur_im=blur_im)
    except:
        my_frames = pull_frame_range(frame_range=frame_range,
                                     num_break=None,
                                     num_nobreak=None,
                                     add_flip=add_flip,
                                     crop_at_constr=crop_at_constr,
                                     blur_im=blur_im)

    # if add_flip is true you expect at twice as many frames
    # frames_per_sample used for correct indexing of out arrays
    if add_flip is True:
        frames_per_sample = 2 * len(frame_range)
    else:
        frames_per_sample = 1 * len(frame_range)
    # construct X matrix and y vector
    try:
        n_features = len(feature_list)
        X_data = np.zeros((len(my_frames) * frames_per_sample, n_features))
        y_data = np.zeros((len(my_frames) * frames_per_sample, 1))
    except:
        dum_key = list(my_frames.keys())
        dum_key = dum_key[0]
        dummy = my_frames[dum_key][0]
        y_data = np.zeros((len(my_frames) * frames_per_sample, 1))

        if reshape_frames is True:
            X_data = np.zeros((len(my_frames) * frames_per_sample,
                               dummy.shape[0] * dummy.shape[1]))
        elif reshape_frames is False:
            # one color channel for grayscale
            X_data = np.zeros((len(my_frames) * frames_per_sample, 1,
                               dummy.shape[0], dummy.shape[1]))

    # step over axis of X_data by the number of frames per sample
    for i, key in zip(
            range(0,
                  frames_per_sample * len(my_frames) + 1, frames_per_sample),
            my_frames.keys()):
        if i % 100 == 0: print('sampling dataset', i)
        # loop over each frame
        for f, frame in enumerate(my_frames[key]):
            # note this will end up just taking features from the last frame
            # in the range!
            if feature_list != None:
                LEO = key.split('LEO_')[-1]
                centroids, _ = extract_features.\
                               get_n_leading_droplets_centroids(frame, n=3)
                area = extract_features.polygon_area(centroids=centroids)
                leading_angle = \
                    extract_features.leading_angle(centroids=centroids)

                X_data[i + f, 0] = LEO
                X_data[i + f, 1] = area
                X_data[i + f, 2] = leading_angle
            else:
                if reshape_frames is True:
                    X_data[i + f, :] = np.reshape(frame, -1)
                elif reshape_frames is False:
                    X_data[i + f, 0, :, :] = frame

            # classify a break as 0 and nobreak as 1
            my_class = key.split('_')[0]

            if 'nobreak' in my_class:
                y_data[i + f] = int(1)
            else:
                y_data[i + f] = int(0)

    # make masks for partitioning data sets
    mask_train = list(range(0, num_train))
    mask_val = list(range(num_train, num_train + num_validation))
    mask_test = list(
        range(num_train + num_validation,
              num_train + num_validation + num_test))
    m = len(y_data)
    rand_i = [i for i in range(m)]
    rand_i = np.random.permutation(np.array(rand_i))
    # partition based on type of output X
    if reshape_frames is True:
        X_data = X_data[rand_i, :]
        # train set
        X_train = X_data[mask_train]
        # validation set
        X_val = X_data[mask_val]
        # test set
        X_test = X_data[mask_test]
        # reshape data to rows
        X_train = X_train.reshape(num_train, -1)
        X_val = X_val.reshape(num_validation, -1)
        X_test = X_test.reshape(num_test, -1)
    elif reshape_frames is False:
        X_data = X_data[rand_i, :, :, :]
        # train set
        X_train = X_data[mask_train, :, :, :]
        # validation set
        X_val = X_data[mask_val, :, :, :]
        # test set
        X_test = X_data[mask_test, :, :, :]

    # and the targets vector y
    y_data = y_data[rand_i, :]
    #    y_data = y_data = y_data[rand_i,:]
    y_train = y_data[mask_train]
    y_val = y_data[mask_val]
    y_test = y_data[mask_test]

    return X_train, y_train, X_val, y_val, X_test, y_test
Пример #2
0
def get_data(num_train=1000,
             num_validation=200,
             num_test=100,
             feature_list=['LEO', 'area', 'angle']):
    """
    Get data. If feature list is None, raw image data will be returned
    (i.e. pixels values) as vectors of reshaped data.
    """

    import extract_features

    # load data
    my_frames = pull_frame_range(frame_range=[3])
    # construct X matrix and y vector
    try:
        n_features = len(feature_list)
        X_data = np.zeros((len(my_frames), n_features))
        y_data = np.zeros((len(my_frames), 1))
    except:
        dum_key = list(my_frames.keys())
        dum_key = dum_key[0]
        dummy = my_frames[dum_key][0]
        X_data = np.zeros((len(my_frames), dummy.shape[0] * dummy.shape[1]))
        y_data = np.zeros((len(my_frames), 1))

    for i, key in enumerate(my_frames):
        frame = my_frames[key][0]
        if feature_list != None:
            LEO = key.split('LEO_')[-1]
            centroids, _ = extract_features.\
                           get_n_leading_droplets_centroids(frame, n=3)
            area = extract_features.polygon_area(centroids=centroids)
            leading_angle = extract_features.leading_angle(centroids=centroids)

            X_data[i, 0] = LEO
            X_data[i, 1] = area
            X_data[i, 2] = leading_angle
        else:
            X_data[i, :] = np.reshape(frame, -1)

        # classify a break as 0 and nobreak as 1
        my_class = key.split('_')[0]

        if 'nobreak' in my_class:
            y_data[i] = 1
        else:
            y_data[i] = 0

    mask_train = list(range(0, num_train))
    mask_val = list(range(num_train, num_train + num_validation))
    mask_test = list(
        range(num_train + num_validation,
              num_train + num_validation + num_test))
    m = len(y_data)
    rand_i = [i for i in range(m)]
    rand_i = np.random.permutation(np.array(rand_i))
    X_data = X_data[rand_i, :]
    y_data = y_data[rand_i, :]
    # train set
    X_train = X_data[mask_train]
    y_train = y_data[mask_train]
    # validation set
    X_val = X_data[mask_val]
    y_val = y_data[mask_val]
    # test set
    X_test = X_data[mask_test]
    y_test = y_data[mask_test]
    # normalize the data: subtract the mean image
    mean_feats = np.mean(X_train, axis=0)
    X_train -= mean_feats
    X_val -= mean_feats
    X_test -= mean_feats
    # reshape data to rows
    X_train = X_train.reshape(num_train, -1)
    X_val = X_val.reshape(num_validation, -1)
    X_test = X_test.reshape(num_test, -1)

    return X_train, y_train, X_val, y_val, X_test, y_test
Пример #3
0
# number of clusters
k = 5

n_features = 3

# load data
my_frames = data_utils.pull_frame_range(frame_range=[3])
# construct X matrix and y vector
X = np.zeros((len(my_frames), n_features))
y = np.zeros((len(my_frames), 1))
for i, key in enumerate(my_frames):
    frame = my_frames[key][0]
    LEO = key.split('LEO_')[-1]
    centroids = extract_features.get_n_leading_droplets_centroids(frame, n=3)
    area = extract_features.polygon_area(centroids=centroids)
    leading_angle = extract_features.leading_angle(centroids=centroids)

    X[i, 0] = LEO
    X[i, 1] = area
    X[i, 2] = leading_angle

    # classify a break as 0 and nobreak as 1
    my_class = key.split('_')[0]

    if 'nobreak' in my_class:
        y[i] = 1
    else:
        y[i] = 0

## shuffle X and y in the same way
#m = len(y)
Пример #4
0
 show_frame = data_utils.show_my_countours(frame,contours=-1,
                                           resize_frame=1,show=False)
 # add centroids to show_frame
 centroids, _ = extract_features.get_droplets_centroids(frame)
 for c in centroids:
     cX = centroids[c][0]
     cY = centroids[c][1]
     cv2.circle(show_frame, (cX,cY), 1, (0,0,255), 7)
     cv2.putText(show_frame, str(c), (cX + 4, cY - 4),
                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
 # add polygon with n vertices to show_frame
 leading_centroids, _ = \
     extract_features.get_n_leading_droplets_centroids(frame,n)
 print('area: ', extract_features.polygon_area(leading_centroids), 
       '\t angle: ', 
       extract_features.leading_angle(leading_centroids)*180/np.pi,
       '\t frame key: ', frame_key)
 leading_centroids = [(coord) for coord in leading_centroids.values()]
 leading_centroids.append(leading_centroids[0])
 leading_centroids = np.int32(np.array(leading_centroids))        
 cv2.polylines(show_frame, [leading_centroids], True, (255,60,255))
 # add constriction location to show_frame
 constric_loc = data_utils.find_constriction(frame)
 y1 = int(frame.shape[0]/3)
 y2 = int(frame.shape[0]/3*2)
 cv2.line(show_frame, (constric_loc, y1), 
          (constric_loc, y2), (0,150,255), 2)
 frame_str = frame_key.split('_')[0]
 frame_str = frame_key + ', frame ' + str(i)
 # add frame label to show_frame
 show_frame = cv2.putText(show_frame, frame_str,