示例#1
0
def eval3():
    print("Evaluation 3: comparison of intelligent, random and oracle sampler")
    target_s, target_j = tutl.load_pkl(TARGET_MANUAL), tutl.load_pkl(
        TARGET_MANUAL_JAMES)
    d0_s, d0_j = tutl.load_pkl(DATA0_SANDEEP), tutl.load_pkl(DATA0_JAMES)
    #learning data
    ldata_s, ldata_j = tutl.load_pkl(LEARNING_DATA_SANDEEP), tutl.load_pkl(
        LEARNING_DATA_JAMES)
    #learning data for oracle
    oracle_s, oracle_j = tutl.load_pkl(ORACLE_SANDEEP), tutl.load_pkl(
        ORACLE_JAMES)

    num = 20
    arg1 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.2, 'RAND', None,
             None) for i in range(num)]
    arg2 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.2, 'DIST', None,
             None) for i in range(num)]
    arg3 = [(target_s, d0_s, shuffle_data(oracle_s), 10, 0.2, 'RAND', None,
             None) for i in range(num)]
    arg4 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.2, 'RAND', None,
             None) for i in range(num)]
    arg5 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.2, 'DIST', None,
             None) for i in range(num)]
    arg6 = [(target_j, d0_j, shuffle_data(oracle_j), 10, 0.2, 'RAND', None,
             None) for i in range(num)]

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(wrapper, arg1 + arg2 + arg3 + arg4 + arg5 + arg6)
    pool.close()

    add_overall_accuracy(result)

    tags = dict(
        target=np.hstack([
            np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10 * 3)
            for tag in ['sandeep', 'james']
        ]),
        sampler=np.hstack([
            np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10) for tag in
            ['random', 'distance', 'oracle', 'random', 'distance', 'oracle']
        ]))
    plot_data = gen_data_frame(result, tags).query(
        '(label == "sandeep" and target == "sandeep") or (label == "james" and target == "james")'
    )

    #print(plot_data)
    tutl.write_pkl(os.path.join(RESULT_OUT_PATH, 'eval3_plot.pkl'), plot_data)
    plot_file = os.path.join(RESULT_OUT_PATH, 'eval3_plot.pdf')
    ts_plot_with_distribution(plot_data,
                              plot_file,
                              'round#',
                              'accuracy',
                              hue='sampler',
                              style='target',
                              dashes=True,
                              ylim=ylim,
                              xticks=xticks)
示例#2
0
def eval5():
    print("Evaluation 5: Learning two targets with priority")

    target_s, target_j = tutl.load_pkl(TARGET_MANUAL), tutl.load_pkl(
        TARGET_MANUAL_JAMES)
    target = dict(names=target_s['names'] + target_j['names'],
                  embeddings=target_s['embeddings'] + target_j['embeddings'],
                  video=target_s['video'] + target_j['video'],
                  frame=target_s['frame'] + target_j['frame'])
    d0 = tutl.load_pkl(DATA0_2TARGETs)
    learningset = tutl.load_pkl(LEARNING_DATA_BOTH)

    num = 20

    arg1 = [(target, d0, shuffle_data(learningset), 10, 0.2, 'WEIGHT',
             dict(sandeep=1, james=1), None) for i in range(num)]
    arg2 = [(target, d0, shuffle_data(learningset), 10, 0.2, 'WEIGHT',
             dict(sandeep=10, james=1), None) for i in range(num)]
    arg3 = [(target, d0, shuffle_data(learningset), 10, 0.2, 'WEIGHT',
             dict(sandeep=1, james=10), None) for i in range(num)]

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(wrapper, arg1 + arg2 + arg3)
    pool.close()
    #print(result)

    add_overall_accuracy(result)

    tags = dict(weight=np.hstack([
        np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10)
        for tag in ['S1:J1', 'S10:J1', 'S1:J10']
    ]))
    plot_data = gen_data_frame(
        result, tags).query('label in ["sandeep", "james", "overall"]')

    tutl.write_pkl(os.path.join(RESULT_OUT_PATH, 'eval5_plot.pkl'), plot_data)

    plot_file = os.path.join(RESULT_OUT_PATH, 'eval5_plot.pdf')
    ts_plot_with_distribution(plot_data,
                              plot_file,
                              'round#',
                              'accuracy',
                              hue='label',
                              style='weight',
                              dashes=True,
                              ylim=ylim,
                              xticks=xticks)
示例#3
0
def eval4():
    print("Evaluation 4: evaluate the impact of threshold")
    target_s, target_j = tutl.load_pkl(TARGET_MANUAL), tutl.load_pkl(
        TARGET_MANUAL_JAMES)
    d0_s, d0_j = tutl.load_pkl(DATA0_SANDEEP), tutl.load_pkl(DATA0_JAMES)
    ldata_s, ldata_j = tutl.load_pkl(LEARNING_DATA_SANDEEP), tutl.load_pkl(
        LEARNING_DATA_JAMES)

    num = 20
    arg1 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.15, 'DIST', None,
             None) for i in range(num)]
    arg2 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.20, 'DIST', None,
             None) for i in range(num)]
    arg3 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.30, 'DIST', None,
             None) for i in range(num)]
    arg4 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.50, 'DIST', None,
             None) for i in range(num)]
    arg5 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.15, 'DIST', None,
             None) for i in range(num)]
    arg6 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.20, 'DIST', None,
             None) for i in range(num)]
    arg7 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.30, 'DIST', None,
             None) for i in range(num)]
    arg8 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.50, 'DIST', None,
             None) for i in range(num)]

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(wrapper,
                      arg1 + arg2 + arg3 + arg4 + arg5 + arg6 + arg7 + arg8)
    pool.close()

    add_overall_accuracy(result)

    tags = dict(threshold=np.hstack([
        np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10) for tag in [
            't=0.15', 't=0.2', 't=0.3', 't=0.5', 't=0.15', 't=0.2', 't=0.3',
            't=0.5'
        ]
    ]),
                target=np.hstack([
                    np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10 * 4)
                    for tag in ['sandeep', 'james']
                ]))
    plot_data = gen_data_frame(result, tags).query(
        '(label == "sandeep" and target == "sandeep") or (label == "james" and target == "james")'
    )
    print(tags)

    #print(plot_data)
    tutl.write_pkl(os.path.join(RESULT_OUT_PATH, 'eval4_plot.pkl'), plot_data)
    plot_file = os.path.join(RESULT_OUT_PATH, 'eval4_plot.pdf')
    ts_plot_with_distribution(plot_data,
                              plot_file,
                              'round#',
                              'accuracy',
                              hue='threshold',
                              style='target',
                              dashes=True,
                              ylim=ylim,
                              xticks=xticks)
示例#4
0
def main():
    args = setup_args()
    if args['data']:
        print("[INFO] loading data source...")
        source = tutl.load_pkl(args['data'])
    else:
        source = None

    data = gen_data(source, args['data_size'], args['label'],
                    args['exclude_label'], args['shuffle'], args['balanced'])

    print("[INFO] create pickle file...")
    tutl.write_pkl(args['output_file'], data)
示例#5
0
def eval1():
    print("Evaluation 1: Accuracy vs Amount of data")
    eval_cases = ['sandeep', 'james', 'overall']
    dummy_target = tutl.load_pkl(TARGET_RSS)

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(
        wrapper, [(dummy_target, empty, ldata, 10, 0.2, 'RAND', None, None)])
    pool.close()

    add_overall_accuracy(result)

    plot_data = {
        case: [round[case] for round in result[0]]
        for case in eval_cases
    }
示例#6
0
def eval2():
    print("Evaluation 2: Accuracy distribution for all labels")
    dummy_target = tutl.load_pkl(TARGET_RSS)

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(wrapper, [
        (dummy_target, empty, shuffle_data(ldata), 10, 0.2, 'RAND', None, None)
        for i in range(100)
    ])
    pool.close()

    add_overall_accuracy(result)

    plot_data = gen_data_frame(result)
    #plot_data = plot_data.query('label in ["sandeep", "james", "overall"]')
    tutl.write_pkl(os.path.join(RESULT_OUT_PATH, 'eval2_plot.pkl'), plot_data)
    plot_file = os.path.join(RESULT_OUT_PATH, 'eval2_plot.pdf')
    ts_plot_with_distribution(plot_data,
                              plot_file,
                              'round#',
                              'accuracy',
                              hue='label',
                              ylim=ylim,
                              xticks=xticks)
示例#7
0
ap = argparse.ArgumentParser()
ap.add_argument("-d",
                "--data",
                help="path to serialized db of facial embeddings")
ap.add_argument("-t",
                "--target",
                required=True,
                help="path to target data pkl to visualize")
ap.add_argument("-o", "--output", help="directory path to output images")
args = vars(ap.parse_args())

print("[INFO] loading face embeddings...")
DB_PATH = args['data'] if args['data'] else os.path.join(
    os.environ["HARVESTNET_ROOT_DIR"], 'tmp', "dataset", 'face_db.pkl')
db = tutl.load_pkl(DB_PATH)

target = tutl.load_pkl(args["target"])
OUT_DIR = args['output'] if args['output'] else os.path.join(
    os.environ["HARVESTNET_ROOT_DIR"], 'tmp', 'images')

print("[INFO] generate images...")
for name, emb, video, frame in zip(target['names'], target['embeddings'],
                                   target['video'], target['frame']):
    try:
        face_pixel = db[video][frame][0]['face_pixels']
        cv2.imwrite(os.path.join(OUT_DIR, video + '_' + str(frame) + '.jpg'),
                    face_pixel)
    except Exception as inst:
        print("[ERR] Image search failed!")
        print(str(inst))
示例#8
0
def eval6():
    print("Evaluation 6: comparison of impact of target imageset")
    target_s, target_j = tutl.load_pkl(TARGET_MANUAL), tutl.load_pkl(
        TARGET_MANUAL_JAMES)
    d0_s, d0_j = tutl.load_pkl(DATA0_SANDEEP), tutl.load_pkl(DATA0_JAMES)
    ldata_s, ldata_j = tutl.load_pkl(LEARNING_DATA_SANDEEP), tutl.load_pkl(
        LEARNING_DATA_JAMES)
    oracle_s, oracle_j = tutl.load_pkl(ORACLE_SANDEEP), tutl.load_pkl(
        ORACLE_JAMES)

    db = tutl.load_pkl(os.path.join(DATA_PATH, 'face_db.pkl'))

    def preprocess(img, size=(100, 100)):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        return cv2.resize(gray, size)

    pixels_s = [
        preprocess(db[video][frame][0]['face_pixels'])
        for video, frame in zip(oracle_s['video'], oracle_s['frame'])
    ]
    extr_s = TargetExtractor(pixels_s)

    pixels_j = [
        preprocess(db[video][frame][0]['face_pixels'])
        for video, frame in zip(oracle_j['video'], oracle_j['frame'])
    ]
    extr_j = TargetExtractor(pixels_j)
    #oracle_s['pixels'] = pixels

    num = 20

    ssim_idxes_s = [
        extr_s.diversity(min_tile=0, rank_by='mean') for i in range(num)
    ]
    ssim_targets_s = [{
        'names': [oracle_s['names'][idx] for idx in ssim_idx],
        'embeddings': [oracle_s['embeddings'][idx] for idx in ssim_idx],
        'video': [oracle_s['video'][idx] for idx in ssim_idx],
        'frame': [oracle_s['frame'][idx] for idx in ssim_idx]
    } for ssim_idx in ssim_idxes_s]

    ssim_idxes_j = [
        extr_j.diversity(min_tile=0, rank_by='mean') for i in range(num)
    ]
    ssim_targets_j = [{
        'names': [oracle_j['names'][idx] for idx in ssim_idx],
        'embeddings': [oracle_j['embeddings'][idx] for idx in ssim_idx],
        'video': [oracle_j['video'][idx] for idx in ssim_idx],
        'frame': [oracle_j['frame'][idx] for idx in ssim_idx]
    } for ssim_idx in ssim_idxes_j]

    ssimq_idxes_s = [extr_s.quantile() for i in range(num)]
    ssimq_targets_s = [{
        'names': [oracle_s['names'][idx] for idx in ssim_idx],
        'embeddings': [oracle_s['embeddings'][idx] for idx in ssim_idx],
        'video': [oracle_s['video'][idx] for idx in ssim_idx],
        'frame': [oracle_s['frame'][idx] for idx in ssim_idx]
    } for ssim_idx in ssimq_idxes_s]

    ssimq_idxes_j = [extr_j.quantile() for i in range(num)]
    ssimq_targets_j = [{
        'names': [oracle_j['names'][idx] for idx in ssim_idx],
        'embeddings': [oracle_j['embeddings'][idx] for idx in ssim_idx],
        'video': [oracle_j['video'][idx] for idx in ssim_idx],
        'frame': [oracle_j['frame'][idx] for idx in ssim_idx]
    } for ssim_idx in ssimq_idxes_j]

    rand_targets_s = [gen_data(oracle_s, 18) for i in range(num)]
    rand_targets_j = [gen_data(oracle_j, 18) for i in range(num)]

    arg1 = [(target_s, d0_s, shuffle_data(oracle_s), 10, 0.3, 'RAND', None,
             None) for i in range(num)]
    arg2 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.3, 'DIST', None,
             None) for i in range(num)]
    arg3 = [(target, d0_s, shuffle_data(ldata_s), 10, 0.3, 'DIST', None, None)
            for target in rand_targets_s]
    arg4 = [(target, d0_s, shuffle_data(ldata_s), 10, 0.3, 'DIST', None, None)
            for target in ssimq_targets_s]
    arg5 = [(target, d0_s, shuffle_data(ldata_s), 10, 0.3, 'DIST', None, None)
            for target in ssim_targets_s]
    arg6 = [(target_j, d0_j, shuffle_data(oracle_j), 10, 0.3, 'RAND', None,
             None) for i in range(num)]
    arg7 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.3, 'DIST', None,
             None) for i in range(num)]
    arg8 = [(target, d0_j, shuffle_data(ldata_j), 10, 0.3, 'DIST', None, None)
            for target in rand_targets_j]
    arg9 = [(target, d0_j, shuffle_data(ldata_j), 10, 0.3, 'DIST', None, None)
            for target in ssimq_targets_j]
    arg10 = [(target, d0_j, shuffle_data(ldata_j), 10, 0.3, 'DIST', None, None)
             for target in ssim_targets_j]

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(
        wrapper,
        arg1 + arg2 + arg3 + arg4 + arg5 + arg6 + arg7 + arg8 + arg9 + arg10)
    pool.close()

    add_overall_accuracy(result)

    tags = dict(target_generated_by=np.hstack([
        np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10) for tag in [
            'oracle', 'manual', 'random', 'ssim(quantile)', 'ssim(diversity)',
            'oracle', 'manual', 'random', 'ssim(quantile)', 'ssim(diversity)'
        ]
    ]),
                target=np.hstack([
                    np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10 * 5)
                    for tag in ['sandeep', 'james']
                ]))
    plot_data = gen_data_frame(result, tags).query(
        '(label == "sandeep" and target == "sandeep") or (label == "james" and target == "james")'
    )

    #print(plot_data)
    tutl.write_pkl(os.path.join(RESULT_OUT_PATH, 'eval6_plot.pkl'), plot_data)
    plot_file = os.path.join(RESULT_OUT_PATH, 'eval6_plot.pdf')
    ts_plot_with_distribution(plot_data,
                              plot_file,
                              'round#',
                              'accuracy',
                              hue='target_generated_by',
                              style='target',
                              ylim=ylim,
                              xticks=xticks)
示例#9
0
def eval0():
    print("Evaluation 0: Oracle")
    target_s, target_j = tutl.load_pkl(TARGET_MANUAL), tutl.load_pkl(
        TARGET_MANUAL_JAMES)
    d0_s, d0_j = tutl.load_pkl(DATA0_SANDEEP), tutl.load_pkl(DATA0_JAMES)
    ldata_s, ldata_j = tutl.load_pkl(LEARNING_DATA_SANDEEP), tutl.load_pkl(
        LEARNING_DATA_JAMES)
    oracle_s, oracle_j = tutl.load_pkl(ORACLE_SANDEEP), tutl.load_pkl(
        ORACLE_JAMES)

    db = tutl.load_pkl(os.path.join(DATA_PATH, 'face_db.pkl'))

    def preprocess(img, size=(100, 100)):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        return cv2.resize(gray, size)

    pixels = [
        preprocess(db[video][frame][0]['face_pixels'])
        for video, frame in zip(oracle_s['video'], oracle_s['frame'])
    ]
    extr = TargetExtractor(pixels)
    oracle_s_wp = copy.copy(oracle_s)
    oracle_s_wp['pixels'] = pixels

    num = 20

    ssim_idxes = [
        extr.diversity(min_tile=0, rank_by='mean') for i in range(num)
    ]
    ssim_targets = [{
        'names': [oracle_s['names'][idx] for idx in ssim_idx],
        'embeddings': [oracle_s['embeddings'][idx] for idx in ssim_idx],
        'video': [oracle_s['video'][idx] for idx in ssim_idx],
        'frame': [oracle_s['frame'][idx] for idx in ssim_idx]
    } for ssim_idx in ssim_idxes]

    arg1 = [(target, d0_s, shuffle_data(oracle_s), 10, 0.2, 'RAND', None, None)
            for target in ssim_targets]
    arg2 = [(target, d0_s, shuffle_data(oracle_s), 10, 0.2, 'DIST', None, None)
            for target in ssim_targets]
    arg3 = [(target, d0_s, shuffle_data(oracle_s_wp), 10, 0.2, 'DIST', None,
             extr) for target in ssim_targets]

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(wrapper, arg1 + arg2 + arg3)
    pool.close()
    add_overall_accuracy(result)

    tags = dict(sampler=np.hstack([
        np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10)
        for tag in ['random', 'distance(static)', 'distance(dynamic)']
    ]))
    plot_data = gen_data_frame(result, tags).query('label == "sandeep"')

    #print(plot_data)
    tutl.write_pkl(os.path.join(RESULT_OUT_PATH, 'eval0_plot.pkl'), plot_data)
    plot_file = os.path.join(RESULT_OUT_PATH, 'eval0_plot.pdf')
    ts_plot_with_distribution(plot_data,
                              plot_file,
                              'round#',
                              'accuracy',
                              hue='sampler',
                              ylim=ylim,
                              xticks=xticks)
示例#10
0
from facenet.evaluation.generator import gen_data
import utils.textfile_utils as tutl
import utils.calculation_utils as cutl

DATA_PATH = os.path.join(os.environ["HARVESTNET_ROOT_DIR"], 'tmp', 'dataset')
RESULT_OUT_PATH = os.path.join(os.environ["HARVESTNET_ROOT_DIR"], 'tmp',
                               'results')

#LEARNING DATA
ALL_DATA = os.path.join(DATA_PATH,
                        'face_learning_data.pkl')  #This includes testset.
ALL_LEARNING_DATA = os.path.join(DATA_PATH, 'learningset_all.pkl')
LEARNING_DATA = os.path.join(
    DATA_PATH,
    'learningset.pkl')  #Balanced learning dataset that is ramdomly sampled.
ldata = tutl.load_pkl(LEARNING_DATA)

DATA0_SANDEEP = os.path.join(
    DATA_PATH, 'learningset_w_o_sandeep.pkl')  #learning data except sandeep
DATA0_JAMES = os.path.join(DATA_PATH, 'learningset_w_o_james.pkl')
DATA0_2TARGETs = os.path.join(DATA_PATH,
                              'd0_2targets.pkl')  #d0 data except targets

LEARNING_DATA_SANDEEP = os.path.join(DATA_PATH, 'ldata_d0_U_target.pkl')
LEARNING_DATA_JAMES = os.path.join(DATA_PATH, 'ldata_d0_U_james.pkl')
LEARNING_DATA_BOTH = os.path.join(DATA_PATH, 'ldata_d0_U_2targets.pkl')

ORACLE_SANDEEP = os.path.join(
    DATA_PATH, 'oracle_sandeep.pkl')  #contains sandeep embeddings ONLY
ORACLE_JAMES = os.path.join(
    DATA_PATH, 'oracle_james.pkl')  #contains james embeddings ONLY
示例#11
0
def gen_data(source=None,
             data_size=None,
             label=None,
             exclude_label=None,
             shuffle=True,
             balanced=True):

    if source is None:
        print("[INFO] loading default data source...")
        SOURCE_PATH = os.path.join(os.environ["HARVESTNET_ROOT_DIR"], 'tmp',
                                   "dataset", 'learningset_all.pkl')
        source = tutl.load_pkl(SOURCE_PATH)

    KEYS = ['names', 'embeddings', 'video', 'frame']
    out = {k: [] for k in KEYS}

    if shuffle:
        print("[INFO] shuffling...")
        snames, sembeddings, svideo, sframe = sklearn.utils.shuffle(
            source['names'], source['embeddings'], source['video'],
            source['frame'])
        source = dict(names=snames,
                      embeddings=sembeddings,
                      video=svideo,
                      frame=sframe)

    print("[INFO] filtering...")
    for i, name in enumerate(source['names']):
        if not label:
            if not exclude_label or not name in exclude_label:
                for k in KEYS:
                    out[k].append(source[k][i])
        else:
            if name in label:
                for k in KEYS:
                    out[k].append(source[k][i])

    labels = set(out['names'])

    if data_size or balanced:
        print("[INFO] balancing...")
        desired_size = data_size if data_size else float('inf')
        max_size = min({lbl: out['names'].count(lbl)
                        for lbl in labels}.values())
        size = min(desired_size, max_size)
        #print('[DEBUG] size is ' + str(size))

        names, embeddings, video, frame = [], [], [], []

        idx = 0
        while min({lbl: names.count(lbl) for lbl in labels}.values()) < size:
            #print('[DEBUG] ' + str({label: names.count(label) for label in labels}))
            if names.count(out['names'][idx]) < size:
                names.append(out['names'][idx])
                embeddings.append(out['embeddings'][idx])
                video.append(out['video'][idx])
                frame.append(out['frame'][idx])
            idx += 1
    else:
        names, embeddings, video, frame = out['names'], out['embeddings'], out[
            'vide'], out['frame']

    return dict(names=names, embeddings=embeddings, video=video, frame=frame)
示例#12
0
        'james_apoorva_sandeep_1224'
    ]
]
INPUT_EMB_PATHES = [
    os.path.join(os.environ["HARVESTNET_ROOT_DIR"], 'tmp', 'EMBEDDINGS',
                 prefix + '.pkl') for prefix in PREFIXES
]
SAMPLE_EMB_PATH = os.path.join(os.environ["HARVESTNET_ROOT_DIR"], 'tmp',
                               'EMBEDDINGS', 'edge_embeddings.pickle')

OUTPUT_PKL_PATH = os.path.join(os.environ["HARVESTNET_ROOT_DIR"], 'tmp',
                               'temp_labeled.pkl')
OUTPUT_PIC_DIR = os.path.join(os.environ["HARVESTNET_ROOT_DIR"], 'tmp',
                              'face_pictures')

sample_emb = tutl.load_pkl(SAMPLE_EMB_PATH)

out_emb = {}

for path in INPUT_EMB_PATHES:
    embeddings = tutl.load_pkl(path)
    prefix = path.split(
        os.sep)[-1].split('.')[0].split('_embeddings_detections')[0]
    out_emb[prefix] = {}

    frame_num = 0
    while frame_num < len(embeddings):
        if len(embeddings[frame_num]) > 0:
            distances = np.array([
                cutl.distance(embeddings[frame_num][0]["embedding"], sample)
                for sample in sample_emb["embeddings"]
示例#13
0
"""

PREFIXES = [
    'whole_lab_training', 'abi_sandeep', 'csandeep_amine_andrew',
    'james_apoorva_sandeep_1224'
]
SUFFIX = '.jpg'
TEMP_PKL_PATH = os.path.join(os.environ["HARVESTNET_ROOT_DIR"], 'tmp',
                             'temp_labeled.pkl')
IMAGE_FILE_PATH = os.path.join(os.environ["HARVESTNET_ROOT_DIR"], 'tmp',
                               'face_pictures', 'ground_truth')
LABELS = [label for label in os.listdir(IMAGE_FILE_PATH) if label[0] != '.']
OUTPUT_PKL_PATH = os.path.join(os.environ["HARVESTNET_ROOT_DIR"], 'tmp',
                               'face_learning_data.pkl')

temp_emb = tutl.load_pkl(TEMP_PKL_PATH)

out_emb = dict(names=[], embeddings=[], video=[], frame=[])


def get_video_file_name_and_frame_num(fname):
    for prefix in PREFIXES:
        if fname.find(prefix) != -1:
            return (prefix, int(fname.split(prefix)[1].split(SUFFIX)[0]))
    raise Exception('[ERR] No video file name found! ' + str(fname))


for label in LABELS:
    for vname_frame in [
            get_video_file_name_and_frame_num(fname)
            for fname in os.listdir(os.path.join(IMAGE_FILE_PATH, label))
示例#14
0
ap.add_argument("-e",
                "--embeddings",
                help="path to serialized db of facial embeddings")
ap.add_argument("-r",
                "--recognizer",
                help="path to output model trained to recognize faces")
ap.add_argument("-n",
                "--data_size",
                help="number of embeddings per label for learning")
args = vars(ap.parse_args())

print("[INFO] loading face embeddings...")
LEARNING_DATA_PKL_PATH = args['embeddings'] if args[
    'embeddings'] else os.path.join(os.environ["HARVESTNET_ROOT_DIR"], 'tmp',
                                    "dataset", 'face_learning_data.pkl')
ldata = tutl.load_pkl(LEARNING_DATA_PKL_PATH)
orig_embeddings, label_str = np.array([
    embedding.reshape(128) for embedding in ldata['embeddings']
]), ldata['names']

LABEL_LIST = set(label_str)

print("[INFO] shuffle data...")
emb_dict = {
    label: shuffle([
        embedding for i, embedding in enumerate(orig_embeddings)
        if label == label_str[i]
    ])
    for label in LABEL_LIST
}
示例#15
0
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from utils.plotting_utils import *
import utils.textfile_utils as tutl

ap = argparse.ArgumentParser()
ap.add_argument("-m", "--model", nargs='+', required=True,
	help="path to model trained")
ap.add_argument("-g", "--graph",
    help="path to graph of accuracy plot")
ap.add_argument("-t", "--testset", required=True,
    help="path to testset piclke")
args = vars(ap.parse_args())

print("[INFO] get testdata...")
testdata = tutl.load_pkl(os.path.join(os.environ["HARVESTNET_ROOT_DIR"], 'tmp', "dataset", 'testset.pkl'))
X, labelstr = [emb.reshape(128) for emb in testdata['embeddings']], testdata['names']

print("[INFO] loading model...")
models, les = [], []
for mpath in args["model"]:
    model_obj = tutl.load_pkl(mpath)
    if isinstance(model_obj, list):
        for model_obj_elem in model_obj:
            models.append(model_obj_elem[0])
            les.append(model_obj_elem[1])
    elif isinstance(model_obj, tuple):
        (model_elem, le_elem) = tutl.load_pkl(mpath)
        models.append(model_elem)
        les.append(le_elem)
    else: