Python get_datastream示例，mPyPl.get_datastream Python示例

示例#1

0

显示文件

def videos_to_frames_pipe(data_dir,
                          ext='.avi',
                          target_ext='.allframes.npy',
                          classes=None,
                          min_size=128,
                          max_elements=13320):
    """
    Creates and executes mPyPl pipe to load all videos from 'data_dir' (each subfolder is a separate class),
    extracts all frames and saves them in numpy format
    
    Parameters
    ----------
    data_dir : str, required
        The directory where all the vidoes are organised in subfolders (subfolder name=class name)
    ext : str, optional
        Extension of video files to search for, by default '.avi'
    target_ext : str, optional
        Target extension for video frames to be serialized to, by default '.allframes.npy'
    classes : dict, optional
        Dictionary with class names and numeral representations. Must match the folder names in 'data_dir'.
        If set to 'None' it will automatically figure out classes based on folders structure in 'data_dir'.
        Example {'Class1': 1, 'Class2': 2}
        Defaults to 'None'
    min_size : int, optional
        Minimum size of frames based on the shorter edge, by default 128
    max_elements : int, optional
        Max elements for silly progress indicator, by default 13320
    """
    (mp.get_datastream(data_dir, classes=classes, ext=ext)
     | mp.apply('filename',
                'clip',
                lambda fn: VideoFileClip(fn),
                eval_strategy=mp.EvalStrategies.Value)
     | mp.apply('clip',
                'clip',
                lambda clip: clip.fx(vfx.resize, width=min_size)
                if clip.w <= clip.h else clip.fx(vfx.resize, height=min_size),
                eval_strategy=mp.EvalStrategies.Value)
     | mp.apply('clip',
                'allframes',
                lambda c: np.asarray(list(c.iter_frames())),
                eval_strategy=mp.EvalStrategies.Value)
     | mp.iter('clip', close_clip)
     | mp.delfield('clip')
     | cachecomputex(
         ext, target_ext, lambda x, nfn: np.save(nfn, x['allframes']),
         lambda x, nfn: print("Skipping saving 'allframes' for {}".format(x[
             'filename'])))
     | mp.silly_progress(elements=max_elements)
     | mp.execute)

示例#2

0

显示文件

def get_features_from_files(data_dir,
                            features_ext='.proc.c3d-avg.npy',
                            test_split=[],
                            classes=None,
                            max_elements=13320):
    """
    Creates and executes mPyPl pipe to load feature vectors from serialized files and returns a preprocessed
    data stream that can be further used with respect to train/test split and specific classes assigned to each element in the stream
    
    Parameters
    ----------
    data_dir : str, required
        The directory where all the vidoes are organised in subfolders (subfolder name=class name)
    features_ext : str, optional
        Extension of serialized feature vectors, by default '.proc.c3d-avg.npy'
    test_split : list, optional
        List of filenames belonging to the test subset. 
        If empty then there will be no data in the test subset, by default []
    classes : dict, optional
        Dictionary with class names and numeral representations. Must match the folder names in 'data_dir'.
        If set to 'None' it will automatically figure out classes based on folders structure in 'data_dir'.
        Example: {'Class1': 1, 'Class2': 2}
        Defaults to 'None'
    max_elements : int, optional
        Max elements for silly progress indicator, by default 13320
    
    Returns
    -------
    list of mPyPl.mdict.mdict
        List of dictionaries that can be used to access the data
    """

    data = (mp.get_datastream(data_dir, classes=classes, ext=features_ext)
            | mp.datasplit_by_pattern(test_pattern=test_split)
            | mp.pshuffle
            | mp.apply('filename', 'c3d_avg', lambda fn: np.load(fn))
            | mp.silly_progress(elements=max_elements)
            | mp.select_fields(['c3d_avg', 'class_id', 'split'])
            | mp.as_list)
    return data

示例#3

0

显示文件

def frames_to_features_pipe(data_dir,
                            mean_std,
                            model,
                            ext='.allframes.npy',
                            target_ext='.proc.c3d-avg.npy',
                            classes=None,
                            frames_per_clip=16,
                            frames_step=8,
                            batch_size=32,
                            max_elements=13320):
    """
    Creates and executes mPyPl pipe to load all video frames, resize and crop them, preprocess,
    run inferencing against Keras model and serialize the resulting feature vectors as npy format
    
    Parameters
    ----------
    data_dir : str, required
        The directory where all the vidoes are organised in subfolders (subfolder name=class name)
    mean_std : array, required
        Array of per channel mean and std values used for preprocessing of frames.
        Template: array[ [mean_R, mean_G, mean_B], [std_R, std_G, std_B] ]
        Example: array[ [123, 112, 145], [60, 62, 64] ]
    model : Keras model obj, required
        Keras model object ready for running predictions
    ext : str, optional
        Extension of frames files to search for, by default '.allframes.npy'
    target_ext : str, optional
        Target extension for feature vectors to be serialized to, by default '.proc.c3d-avg.npy'
    classes : dict, optional
        Dictionary with class names and numeral representations. Must match the folder names in 'data_dir'.
        If set to 'None' it will automatically figure out classes based on folders structure in 'data_dir'.
        Example: {'Class1': 1, 'Class2': 2}
        Defaults to 'None'
    frames_per_clip : int, optional
        When extracting smaller clips from longer video this defines the number of frames cut out from longer clip, by default 16
    frames_step : int, optional
        When extracting smaller clips from longer video this defines the step in number of frames, by default 8
    batch_size : int, optional
        Mini batch size used when pushing data to the model for scoring, by default 32
    max_elements : int, optional
        Max elements for silly progress indicator, by default 13320
    """

    (mp.get_datastream(data_dir, classes=classes, ext=ext)
     # load all frames for each video file
     | mp.apply('filename',
                'allframes',
                lambda fn: np.load(fn),
                eval_strategy=mp.EvalStrategies.OnDemand)
     # cut each video into multiple shorter clips definded by frames_per_clip and frames_step parameters
     | mp.apply('allframes',
                'clips16-8',
                lambda v: extract_clips(
                    v, frames_per_clip=frames_per_clip, step=frames_step),
                eval_strategy=mp.EvalStrategies.OnDemand)
     # center crop frames into 112x112
     | mp.apply('clips16-8',
                'cropped16-8',
                lambda v: np.asarray([[crop_center(frame) for frame in clip]
                                      for clip in v]),
                eval_strategy=mp.EvalStrategies.OnDemand)
     # preprocess frames by substracting channel-wise mean
     | mp.apply('cropped16-8',
                'proc_cropped16-8',
                lambda v: preprocess_input(v, mean_std, divide_std=False),
                eval_strategy=mp.EvalStrategies.OnDemand)
     # run batch predictions on c3d model to get feature vectors for each clip
     | mp.apply_batch('proc_cropped16-8',
                      'c3d16-8',
                      lambda x: predict_c3d(x, model),
                      batch_size=batch_size)
     # for each full video take feature vectors for all the extracted clips and average them
     | mp.apply('c3d16-8',
                'c3d_avg',
                lambda v: np.average(v, axis=0),
                eval_strategy=mp.EvalStrategies.OnDemand)
     # draw silly progress
     | mp.silly_progress(elements=max_elements)
     # save averaged feature vectors into .npy files
     | cachecomputex(
         ext, target_ext, lambda x, nfn: np.save(nfn, x['c3d_avg']), lambda x,
         nfn: print("Skipping saving 'c3d_avg' {}".format(x['filename'])))
     | mp.execute)

示例#4

0

显示文件

# Create a session with the above options specified.
k.tensorflow_backend.set_session(tf.Session(config=config))
###################################

vgg = keras.applications.vgg16.VGG16(include_top=False,
                                     weights='imagenet',
                                     input_shape=(video_size[1], video_size[0],
                                                  3))


def get_vgg(video):
    res = vgg.predict(keras.applications.vgg16.preprocess_input(video))
    return res


def calcvgg(x, nfn):
    print("Creating VGG descriptors for {}".format(x['filename']))
    clp = x['video']
    df = get_vgg(np.array(list(clp.iter_frames())))
    np.save(nfn, df)


if __name__ == "__main__":
    (mp.get_datastream(data_dir, ext=".resized.mp4")
     | load_moviepy_video()
     | cachecomputex(".resized.mp4", ".vgg.npy", calcvgg,
                     lambda x, nx: print("Skipping {}".format(x)))
     | close_moviepy_video()
     | execute)

示例#5

0

显示文件

文件： preprocess_pipeline.py 项目： herryliq/deltatre-microsoft-ai-soccer-action-recognition

        raise ValueError("Data file '%s' not found" % scene_detection_file)

    return scene_changes


if __name__ == "__main__":

    if (len(sys.argv) > 1):
        k = int(sys.argv[1])
        n = int(sys.argv[2])
        config.base_dir = config.base_dir_batch
        config.data_dir = config.data_dir_batch
    else:
        k, n = 0, 1

    (mp.get_datastream(data_dir, ext=".full.mp4")
     | batch(k, n)
     | mp.fapply('video', resize_video.load_resize)
     | execute)

    resized_file_names = (mp.get_datastream(data_dir, ext=".resized.mp4")
                          | mp.select_field("filename")
                          | mp.as_list)

    # use only the first threshold
    scene_changes = get_scene_changes(resized_file_names, data_dir)[40]

    (mp.get_datastream(data_dir, ext=".resized.mp4")
     | mp.filter("filename", lambda f: os.path.abspath(f) not in scene_changes)
     | cachecomputex(".resized.mp4", ".optflow.npy", create_denseflow.calcflow,
                     functools.partial(skip, s="creating dense flow"))

示例#6

0

显示文件

    
def calc_sub(filename, new_filename, model, get_func):
    print("Processing {}".format(filename))
    clp = VideoFileClip(filename)
    frames = list(clp.iter_frames())
    boxes = pickle.load(open(filename.replace('.resized.mp4','.boxes.pickle'), 'rb'))
    poses = []
    for f, bs in zip(frames, boxes):
        fposes = []
        for box in bs:
            x1,y1,x2,y2 = box.astype(int)
            pad = abs(x2 - x1) * 0.2
            sub = f[max(y1, y1-30):max(y2, y2+30),min(x1, x1-30):max(x2,x2+30)]
            fposes.append(get_func(model, sub))
        poses.append(fposes)
    pickle.dump(poses, open(new_filename, 'wb'))                        
    clp.reader.close()
    clp.audio.reader.close_proc()


# Dense Pose Calculation

pose_model = TfPoseEstimator(get_graph_path('cmu'), target_size=(432, 368))
pcalc_pose = partial(calc_sub, model=pose_model, get_func=get_poses)

stream = (
    mp.get_datastream(data_dir, ext='.resized.mp4')
    | mp.select_field('filename')
    | cachecompute('.resized.mp4','.poses.pickle', pcalc_pose, lambda x, nx: print("Skipping {}".format(x)))    
    | execute
)

示例#7

0

显示文件

文件： TrainGroup.py 项目： shwars/FaceArt

    conf = yaml.load(f)

parser = argparse.ArgumentParser()

parser.add_argument("dir", help="Directory of people photos", default=".")
parser.add_argument("--facegroup",
                    help="Name of face group",
                    default="maingroup")

args = parser.parse_args()

face.BaseUrl.set(conf['FaceApi']['Endpoint'])
face.Key.set(conf['FaceApi']['Key'])

classes = mp.get_classes(args.dir)
data = mp.get_datastream(args.dir, classes=classes) | mp.as_list

print("Person Group Trainer Utility")
print(" + found {} people".format(len(classes)))
print(" + Creating face group {}".format(args.facegroup))
face.person_group.create(args.facegroup, name=args.facegroup)

people = {}

for p in classes.keys():
    photos = data | mp.filter('class_name', lambda x: x == p) | mp.as_list
    print("Adding person {} - {} pics".format(p, len(photos)))
    pers = face.person.create(args.facegroup, p)
    people[pers['personId']] = p
    for x in photos:
        print(" + Adding photo {}".format(x['filename']), end='')

示例#8

0

显示文件

    flow = fflow.run(frames, boxes)
    pickle.dump(flow, open(new_filename, 'wb'))
    clp.reader.close()
    clp.audio.reader.close_proc()


# params for ShiTomasi corner detection
feature_params = dict(maxCorners=50,
                      qualityLevel=0.1,
                      minDistance=5,
                      blockSize=5)

# Parameters for lucas kanade optical flow
lk_params = dict(winSize=(15, 15),
                 maxLevel=3,
                 criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10,
                           0.03))

fflow = FocusedFlow(feature_params, lk_params)
pcalc_fflow = partial(calc_fflow, fflow=fflow, fps=5)

stream = (mp.get_datastream(
    data_dir, ext=".full.mp4", classes={
        'noshot': 1,
        'shot': 2,
        'attack': 0
    })
          | mp.select_field('filename')
          | cachecompute(".full.mp4", ".fflow.pickle", pcalc_fflow,
                         lambda x, nx: print("Skipping {}".format(x)))
          | execute)

示例#9

0

显示文件

import mPyPl.utils.image as mpui
from mPyPl.utils.pipeutils import *
from pipe import *
import functools as fn
import keras

print(mp.__version__)

train_dir = os.path.join(base_dir, 'training_set')
test_dir = os.path.join(base_dir, 'test_set')

classes = mp.get_classes(train_dir)
# we need to explicitly get classes in order to have the same correspondence of class and int for train and test set

# Show first few images from the training set
seq = (mp.get_datastream(train_dir, classes=classes)
       | take(10)
       | mp.apply(
           'filename', 'image',
           lambda fn: mpui.im_resize_pad(cv2.imread(fn), size=(100, 100)))
       | mp.select_field('image')
       | pexec(fn.partial(mpui.show_images, cols=2)))

transform = keras.preprocessing.image.ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

示例#10

0

显示文件

文件： resize_video.py 项目： herryliq/deltatre-microsoft-ai-soccer-action-recognition

    fx.write_videofile(nfn)
    clip.close()

def load_resize(x):
    fn = x['filename']
    nfn = fn.replace('.full.mp4','.resized.mp4')
    x['filename'] = nfn
    if os.path.isfile(nfn):
        print("Loading resized {}".format(nfn))
        vc = VideoFileClip(nfn)
        return vc
    else:
        print("Resizing {}".format(fn))
        vc = VideoFileClip(fn).fx(vfx.resize, width=video_width)
        vc.write_videofile(nfn)
        return vc
    
def resize(x):
    v = VideoFileClip(x)
    vfxc = v.fx(vfx.resize, width=video_width)
    return (v, vfxc)

if __name__ == "__main__":
    (mp.get_datastream(data_dir,ext=".full.mp4")
     | where( lambda f: not os.path.isfile( f['filename'].replace(".full.mp4",".resized.mp4") ) )
     | mp.apply('filename','video', resize )
     | cachecomputex(".full.mp4",".resized.mp4",resize_save,lambda x,nx: print("Skipping {}".format(x['filename'])))
     | execute
    )

示例#11

0

显示文件

文件： create_scene_change.py 项目： herryliq/deltatre-microsoft-ai-soccer-action-recognition

def main(data_dir):
    x = (mp.get_datastream(data_dir, ext=".resized.mp4")
     | mp.select_field("filename")
     | mp.as_list
    )
    return detect_and_write(x, filename = os.path.join(data_dir, "scene.changes.pkl"))