示例#1
0
def main():
    args = get_args()
    output_path = args.output
    db = args.db
    img_size = args.img_size
    min_score = args.min_score

    root_path = "./{}_crop/".format(db)
    mat_path = root_path + "{}.mat".format(db)
    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(mat_path, db)

    out_genders = []
    out_ages = []
    out_imgs = []

    for i in tqdm(range(len(face_score))):
        if face_score[i] < min_score:
            continue

        if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
            continue

        if ~(0 <= age[i] <= 100):
            continue

        if np.isnan(gender[i]):
            continue

        out_genders.append(int(gender[i]))
        out_ages.append(age[i])
        img = cv2.imread(root_path + str(full_path[i][0]))
        out_imgs.append(cv2.resize(img, (img_size, img_size)))

    np.savez(output_path,image=np.array(out_imgs), gender=np.array(out_genders), age=np.array(out_ages), img_size=img_size)
示例#2
0
def plot_age_dist():
    mat_path = r"D:\wiki_crop\wiki.mat"
    db = "wiki"
    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(mat_path, db)
    ok_idx = filter_unusual(full_path, gender, face_score, second_face_score, age)

    plot.histgram_demo(age[ok_idx])
def main():
    args=get_args()
    rootpath=args.rootpath
    outfile=args.outfile
    metafile =args.metafile
    min_score = args.minscore
    full_path, dob, gender, photo_taken, face_score, second_face_score, age=utils.get_meta(os.path.join(rootpath,'%s.mat'%metafile),metafile)

    total = 0

    label = []
    print("%d images " % len(face_score))
    for i in range(len(face_score)):
        if face_score[i] < min_score:
            continue

        if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
            continue

        if ~(0 <= age[i] <= 100):
            continue

        if np.isnan(gender[i]):
            continue
        fname=str(full_path[i][0])

        label.append([fname, age[i], gender[i]])
        total +=1

    with open(os.path.join(rootpath,outfile),'w') as f:
        for  line in label:
            f.write(line[0] + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
    print("filter data")
    print("total: %d image" %(total))
    print('Done!!!')
示例#4
0
def main():

    args = get_args()

    parent_folder = args.input + "/"
    db = args.db

    newdb_path = parent_folder + "new_database/"

    mat_path = parent_folder + "{}.mat".format(db)

    #create folder for new dataset
    if not os.path.exists(newdb_path):
        os.makedirs(newdb_path)

    #create a folder for every age
    if args.folders:
        for i in range(101):
            new_folder_path = newdb_path + ('%03d' % i)
            if not os.path.exists(new_folder_path):
                os.makedirs(new_folder_path)

    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
        mat_path, db)

    for i in tqdm(range(len(full_path))):
        if face_score[i] < 1.0:
            continue

        if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
            continue

        if ~(0 <= age[i] <= 100):
            continue

        if np.isnan(gender[i]):
            continue

        image = face_recognition.load_image_file(parent_folder +
                                                 full_path[i][0])
        face_locations = face_recognition.face_locations(image)
        if len(face_locations) == 0:
            continue
        face_locations = face_locations[0]
        img = image[face_locations[0]:face_locations[2],
                    face_locations[3]:face_locations[1], :]

        resized_image = cv2.resize(img, (224, 224))

        name_of_file = full_path[i][0].split('/')[-1].split('.')[0]

        new_name = name_of_file + "A" + str(age[i]) + ".jpg"

        if args.folders:
            cv2.imwrite(newdb_path + ('%03d' % age[i]) + "/" + new_name,
                        resized_image)
        else:
            cv2.imwrite(newdb_path + new_name, resized_image)
def main():
    args = get_args()
    output_path = args.output
    db = args.db
    max_count = args.max_count
    img_size = args.img_size
    min_score = args.min_score
    max_num_per_file = args.max_num_per_file
    train_ratio = args.train_ratio

    root_path = "data/{}_crop/".format(db)
    mat_path = root_path + "{}.mat".format(db)
    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
        mat_path, db)

    length = len(face_score)

    # make out filepath
    outpath_prefix = output_path.split('/')[:-1]
    outpath_prefix = path_concat(outpath_prefix)
    filename = output_path.split('/')[-1].split('.')[0]

    # filter bad images and get passed indexes
    indexes = get_passed(length, min_score, age, face_score, gender)

    # shuffle indexes
    random.shuffle(indexes)
    effective_length = len(indexes)
    train_length = int(max_count *
                       train_ratio) if max_count is not None else int(
                           effective_length * train_ratio)
    print('train_length=%s' % train_length)

    # spilit meta data
    train_idxs, val_idxs = split_meta_indexes(indexes,
                                              train_length,
                                              max_count=max_count)

    # get meta data from filtered indexes
    train_metas = make_meta(train_idxs, full_path, age, gender)
    val_metas = make_meta(val_idxs, full_path, age, gender)

    # split to mini batchs
    mini_list = minibatch_gen(train_metas, max_num_per_file)

    # process and save train data
    Parallel(n_jobs=-1, verbose=5)([
        delayed(process)('train', root_path, img_size, outpath_prefix,
                         filename, i, mini) for i, mini in enumerate(mini_list)
    ])

    # process and save validation data
    mini_list = minibatch_gen(val_metas, max_num_per_file)
    Parallel(n_jobs=-1, verbose=5)([
        delayed(process)('val', root_path, img_size, outpath_prefix, filename,
                         i, mini) for i, mini in enumerate(mini_list)
    ])
示例#6
0
def eval(env, num):
  meta = utils.get_meta(env)
  sender = utils.sender

  pool = ThreadPoolExecutor(max_workers=1000)

  logging.info('request num: {}'.format(num))
  lam = (60 * 1000.0) / num
  samples = np.random.poisson(lam, num)
  for s in samples:
    pool.submit(sender, meta)
    time.sleep(s/1000.0)
示例#7
0
def main_process(data_path, db, limit=None):
    mat_path = os.path.join(data_path, db + "_crop", db + ".mat")
    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
        mat_path, db)
    ok_idx = filter_unusual(full_path, gender, face_score, second_face_score,
                            age)
    meta_file = os.path.join(data_path, db + ".txt")
    with open(meta_file, 'w') as f:
        for i in trange(len(ok_idx)):
            if limit and i >= int(limit):
                break
            f.write("%s_crop/%s %d\n" % (db, full_path[i][0], age[i]))
    return meta_file
示例#8
0
def main():
    args = get_args()
    output_path = args.output
    db = args.db
    max_count = args.max_count
    img_size = args.img_size
    min_score = args.min_score

    root_path = "data/{}_crop/".format(db)
    mat_path = root_path + "{}.mat".format(db)
    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
        mat_path, db)

    out_genders = []
    out_ages = []
    out_imgs = []

    length = len(face_score)
    for i in tqdm(range(length)):
        if face_score[i] < min_score:
            continue

        #if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
        #    continue

        if ~(0 <= age[i] <= 100):
            continue

        if np.isnan(gender[i]):
            continue

        out_genders.append(int(gender[i]))
        out_ages.append(age[i])
        img = cv2.imread(root_path + str(full_path[i][0]), 1)
        img = cv2.resize(img, (img_size, img_size))
        img = img[..., ::-1]
        img = np.around(np.transpose(img, (2, 0, 1)) / 255.0, decimals=12)
        #img = img
        out_imgs.append(img)
        if max_count is not None and len(out_imgs) >= max_count:
            break

    output = {
        "image": np.array(out_imgs),
        "gender": np.array(out_genders),
        "age": np.array(out_ages),
        "db": db,
        "img_size": img_size,
        "min_score": min_score
    }
    scipy.io.savemat(output_path, output)
def main():
    args = get_args()
    output_path = args.output
    db = args.db
    img_size = args.img_size
    min_score = args.min_score

    root_path = "data/wiki_crop/".format(db)
    mat_path = root_path + "wiki.mat".format(db)
    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
        mat_path, db)

    out_genders = []
    out_ages = []
    out_imgs = []
    out_faceScore = []

    for i in tqdm(range(len(face_score))):
        if face_score[i] < min_score:
            continue

        if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
            continue

        if ~(0 <= age[i] <= 100):
            continue

        if np.isnan(gender[i]):
            continue

        out_genders.append(int(gender[i]))
        out_faceScore.append(int(face_score[i]))
        out_ages.append(age[i])
        img = cv2.imread(root_path + str(full_path[i][0]))
        out_imgs.append(cv2.resize(img, (img_size, img_size)))

    output = {
        "image": np.array(out_imgs),
        "gender": np.array(out_genders),
        "age": np.array(out_ages),
        "db": db,
        "img_size": img_size,
        "fscore": np.array(out_faceScore)
    }
    scipy.io.savemat(output_path, output)

    with open('mycsvfilet.csv', 'w') as f:  # Just use 'w' mode in 3.x
        w = csv.DictWriter(f, output.keys())
        w.writeheader()
        w.writerow(output)
示例#10
0
def main():
    args = get_args()
    output_path = args.output
    db = args.db
    img_size = args.img_size
    min_score = args.min_score
    fr = args.train_fraction

    root_path = "data/{}_crop/".format(db)
    mat_path = root_path + "{}.mat".format(db)
    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
        mat_path, db)

    out_genders = []
    out_ages = []
    out_imgs = []

    for i in tqdm(range(len(face_score))):
        if face_score[i] < min_score:
            continue

        if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
            continue

        if ~(0 <= age[i] <= 100):
            continue

        if np.isnan(gender[i]):
            continue

        out_genders.append(int(gender[i]))
        out_ages.append(age[i])
        img = cv2.imread(root_path + str(full_path[i][0]))
        out_imgs.append(cv2.resize(img, (img_size, img_size)))

    number_of_train = int(fr * len(out_imgs))
    train_images = out_imgs[0:number_of_train]
    labels_train_gender = out_genders[0:number_of_train]
    labels_train_age = out_ages[0:number_of_train]

    valid_images = out_imgs[number_of_train:]
    labels_valid_gender = out_genders[number_of_train:]
    labels_valid_age = out_ages[number_of_train:]

    tfrecord_train = 'train_' + str(img_size) + '.tfrecords'
    tfrecord_valid = 'valid_' + str(img_size) + '.tfrecords'
    create_tfrecord(tfrecord_train, train_images, labels_train_age,
                    labels_train_gender)
    create_tfrecord(tfrecord_valid, valid_images, labels_valid_age,
                    labels_valid_gender)
def main():
    logging.debug("Alaa -->  start creating dataBase ...")
    args = get_args()
    output_path = args.output
    db = args.db
    img_size = args.img_size
    min_score = args.min_score

    root_path = "data/{}_crop/".format(db)
    mat_path = root_path + "{}.mat".format(db)
    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
        mat_path, db)

    out_genders = []
    out_ages = []
    sample_num = len(face_score)
    out_imgs = np.empty((sample_num, img_size, img_size, 3), dtype=np.uint8)
    valid_sample_num = 0

    for i in tqdm(range(sample_num)):
        if face_score[i] < min_score:
            continue

        if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
            continue

        if ~(0 <= age[i] <= 100):
            continue

        if np.isnan(gender[i]):
            continue

        out_genders.append(int(gender[i]))
        out_ages.append(age[i])
        img = cv2.imread(root_path + str(full_path[i][0]))
        out_imgs[valid_sample_num] = cv2.resize(img, (img_size, img_size))
        valid_sample_num += 1

    output = {
        "image": out_imgs[:valid_sample_num],
        "gender": np.array(out_genders),
        "age": np.array(out_ages),
        "db": db,
        "img_size": img_size,
        "min_score": min_score
    }
    scipy.io.savemat(output_path, output)
    logging.debug("Alaa -->  succes to create dataBase and save it ...")
示例#12
0
def main():
    args = get_args()
    output_path = args.output
    db = args.db
    img_size = args.img_size
    min_score = args.min_score

    root_path = "/data/chercheurs/agarwals/DEX/{}_crop/".format(db)
    mat_path = root_path + "{}.mat".format(db)
    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
        mat_path, db)

    out_genders = []
    out_ages = []
    out_imgs = []

    for i in tqdm(range(len(face_score))):
        if face_score[i] < min_score:
            continue

        if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
            continue

        if ~(0 <= age[i] <= 100):
            continue

        if np.isnan(gender[i]):
            continue

        out_genders.append(int(gender[i]))
        out_ages.append(age[i])
        img = cv2.imread(root_path + str(full_path[i][0]))
        out_imgs.append(cv2.resize(img, (img_size, img_size)))

    output = {
        "image": np.array(out_imgs),
        "gender": np.array(out_genders),
        "age": np.array(out_ages),
        "db": db,
        "img_size": img_size,
        "min_score": min_score
    }
    scipy.io.savemat(output_path, output)
def main():
    args = get_args()
    output_path = args.output
    db = args.db
    img_size = args.img_size
    min_score = args.min_score

    root_path = args.input
    mat_path = "{}/{}.mat".format(root_path, db)
    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(mat_path, db)

    out_genders = []
    out_ages = []
    out_imgs = []

    for i in tqdm(range(len(face_score))):
        if args.max_samples and i >= args.max_samples:
            print 'Reached maximum samples {}'.format(args.max_samples)
            break

        if face_score[i] < min_score:
            continue

        if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
            continue

        if ~(0 <= age[i] <= 100):
            continue

        if np.isnan(gender[i]):
            continue

        out_genders.append(int(gender[i]))
        out_ages.append(age[i])
        img = cv2.imread(root_path + str(full_path[i][0]))
        out_imgs.append(cv2.resize(img, (img_size, img_size)))

    output = {"image": np.array(out_imgs), "gender": np.array(out_genders), "age": np.array(out_ages),
              "db": db, "img_size": img_size, "min_score": min_score}
    scipy.io.savemat(output_path, output)
示例#14
0
def eval(env):
  meta = utils.get_meta(env)
  sender = utils.sender

  pool = ThreadPoolExecutor(max_workers=10000)

  # nums = np.arange(100, 1001, 100)
  # nums = np.append(nums, np.arange(1000, 99, -100))
  nums = []
  with open('{}/workload/test_2h.csv'.format(folder), 'r') as f:
    reader = csv.DictReader(f)
    nums = [ int(row['tweets']) for row in reader ]
    print(sum(nums))

  for i in range(len(nums)):
    num = nums[i]
    logging.info('request num: {}'.format(num))
    lam = (60 * 1000.0) / num
    samples = np.random.poisson(lam, num)
    for s in samples:
        pool.submit(sender, meta)
        time.sleep(s/1000.0)
    def _load_db(self, db_dir, db):
        root_path = db_dir
        mat_path = str(root_path) + "/{}.mat".format(db)
        full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
            mat_path, db)

        for i in tqdm(range(len(face_score))):
            if face_score[i] < self.minscore:
                continue

            if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
                continue

            if ~(0 <= age[i] <= 100):
                continue

            if np.isnan(gender[i]):
                continue

            # if Path(root_path.name + str(full_path[i][0])).is_file():
            self.image_path_and_age_gender.append(
                [str(root_path+ "/"+str(full_path[i][0])), age[i], gender[i]])
示例#16
0
def useful_image_generate(crop_name, db_name):

    imdb_mat_path = crop_name + os.sep + db_name + '.mat'

    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
        imdb_mat_path, db_name)

    useful_counter = 0
    images_full_path = []
    images_ages = []
    images_genders = []
    for i in range(len(face_score)):

        if face_score[i] < 1.30:
            continue
        if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
            continue
        if ~(0 <= age[i] <= 100):
            continue
        if np.isnan(gender[i]):
            continue
        if not os.path.exists(crop_name + os.sep + str(full_path[i][0])):
            continue

        useful_counter += 1
        images_full_path.append(crop_name + os.sep + str(full_path[i][0]))
        images_ages.append(str(age[i]))
        images_genders.append(str(int(gender[i])))

    with open(crop_name + os.sep + db_name + '.csv', 'w') as f:

        for i in range(useful_counter):

            f.write(images_full_path[i] + ',' \
                    + images_ages[i] + ',' \
                    + images_genders[i] + '\n')
示例#17
0
def main():
    args = get_args()
    output_path = args.output
    db = args.db
    img_size = args.img_size

    root_path = "data/{}_crop/".format(db)
    mat_path = root_path + "{}.mat".format(db)
    full_path, dob, age = get_meta(mat_path, db)

    out_ages = []
    out_imgs = []

    for i in tqdm(range(len(age))):
        if ~(0 <= age[i] <= 100):
            continue

        out_ages.append(age[i])
        img = cv2.imread(root_path + str(full_path[i][0]))
        out_imgs.append(cv2.resize(img, (img_size, img_size)))

    output = {"image": np.array(out_imgs), "age": np.array(out_ages),
              "db": db, "img_size": img_size}
    scipy.io.savemat(output_path, output)
示例#18
0
import matplotlib.pyplot as plt
import numpy as np
import cv2
from utils import get_meta

db = "wiki"
# db = "imdb"
mat_path = "data/{}_crop/{}.mat".format(db, db)
full_path, dob, gender, photo_taken, face_score, second_face_score, age\
    = get_meta(mat_path, db)
print("#images: {}".format(len(face_score)))
print("#images with inf scores: {}".format(np.isinf(face_score).sum()))
hist = plt.hist(face_score[face_score > 0],
                bins=np.arange(0, 8, 0.2),
                color='b')
plt.xlabel("face score")
print("#images with second face scores: {}".format(
    (~np.isnan(second_face_score)).sum()))
hist = plt.hist(second_face_score[~np.isnan(second_face_score)],
                bins=np.arange(0, 8, 0.2),
                color='b')
plt.xlabel("second face score")
示例#19
0
def main():
    args = get_args()
    db = args.db
    min_score = args.min_score
    validation = args.validation_split

    root_path = "data/{}_crop/".format(db)
    mat_path = root_path + "{}.mat".format(db)
    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
        mat_path, db)

    out_genders = []
    out_ages = []
    out_fullpath = []
    for i in tqdm(range(len(face_score))):
        if face_score[i] < min_score:
            continue

        if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
            continue

        if ~(0 <= age[i] <= 100):
            continue

        if np.isnan(gender[i]):
            continue

        out_genders.append(int(gender[i]))
        out_ages.append(age[i])
        out_fullpath.append(full_path[i][0])

    print(type(out_genders))
    data_num = len(out_ages)
    indexes = np.arange(data_num)
    np.random.shuffle(indexes)
    out_fullpath = np.array(out_fullpath)[indexes]
    out_genders = np.array(out_genders)[indexes]
    out_ages = np.array(out_ages)[indexes]

    train_num = int(data_num*(1-validation))
    genders_train = out_genders[:train_num]
    genders_test = out_genders[train_num:]
    age_train = out_ages[:train_num]
    age_test = out_ages[train_num:]
    fullpath_train = out_fullpath[:train_num]
    fullpath_test = out_fullpath[train_num:]
    
    out_name = [name.split("/")[1] for name in out_fullpath]
    name_train=out_name[:train_num]
    name_test=out_name[train_num:]



    train_dir = Path(__file__).resolve().parent.joinpath(root_path,"train")
    train_dir.mkdir(parents=True, exist_ok=True)

    train_mat = {"full_path": np.array(name_train), "age": np.array(age_train),
                 "gender": np.array(genders_train), "db": db, "min_score": min_score}
    print(train_dir)
    
    for i in tqdm(range(len(fullpath_train))):
        copy2(root_path+fullpath_train[i], train_dir)
    scipy.io.savemat(str(train_dir), train_mat)

    test_dir = Path(__file__).resolve().parent.joinpath(root_path,"test")
    test_dir.mkdir(parents=True, exist_ok=True)
    test_mat = {"full_path": np.array(name_test), "age": np.array(age_test),
                "gender": np.array(genders_test), "db": db, "min_score": min_score}
    
    scipy.io.savemat(str(test_dir), test_mat)
    for i in tqdm(range(len(fullpath_test))):
        copy2(root_path+fullpath_test[i], test_dir)
示例#20
0
def main():

    datadir = '/data/evs/VCTK'

    version = 'VCTK-Corpus-0.92'  # 'VCTK-Corpus-0.92' or 'VCTK-sie'
    sr = '48k'  # '22k' or '48k'

    # version = 'VCTK-sie' # 'VCTK-Corpus-0.92' or 'VCTK-sie'
    # sr = '22k' # '22k' or '48k'

    # speakers that the original authors selected (another source)
    spk_file = os.path.join(datadir, version, 'speaker-selected.txt')
    sids_sel = ''.join(open(spk_file).readlines()).split('\n')

    # # runtime mode
    # args = parse_args()

    # interactive mode (comment out before running the script)
    args = argparse.ArgumentParser()
    # args.acoustic_dir = '{}/{}/wav{}_silence_trimmed'.format(datadir, version, sr[:2])
    args.acoustic_dir = '{}/{}/spec'.format(datadir, version)
    args.text_dir = '{}/{}/text'.format(datadir, version)
    args.list_dir = '{}/{}/list'.format(datadir, version)
    args.metafile = '{}/{}/speaker-info.txt'.format(datadir, version)
    args.delimiter = '|'
    args.seed = 0
    args.ordered = True
    args.gender = 'both'
    args.accents_excluded = ['Indian']
    args.mic = 'dual'

    # print out arguments
    print('acoustic dir: {}'.format(args.acoustic_dir))
    print('text dir: {}'.format(args.text_dir))
    print('list dir: {}'.format(args.list_dir))
    print('meta file: {}'.format(args.metafile))
    print('delimiter: {}'.format(args.delimiter))
    print('list random seed: {}'.format(args.seed))
    print('sort list by duration: {}'.format(args.ordered))
    print('gender: {}'.format(args.gender))
    print('accents excluded: {}'.format(args.accents_excluded))
    print('microphone: {}'.format(args.mic))

    # get speaker info (list of dicts) from meta file
    speakers = get_meta(args.metafile)

    # accent-to-speaker distribution from speaker info
    accent2nspkr = get_accent2nspk(speakers)

    speakers = sel_speaker(speakers,
                           gender=args.gender,
                           accents_excluded=args.accents_excluded)
    nspeakers = len(speakers)
    print('{} speakers selected by gender ({}) and accents (no {})'.format(
        nspeakers, args.gender, ' '.join(args.accents_excluded)))

    # get speaker IDs for those being selected
    sids = sorted(([spk['id'] for spk in speakers]))
    sids_intersected = sorted([s for s in sids if s in sids_sel])
    assert len(sids_intersected)==len(sids_sel), \
      'selected speakers not found in speaker pool!'
    del sids_intersected

    # get spaekers IDs for those not being selected
    sids_unsel = [spk['id'] for spk in speakers if spk['id'] not in sids_sel]
    for spk in speakers:
        if spk['id'] in sids_unsel:
            print(spk)

    ext = '.spec.npy'  # '.spec.npy' or '-feats_norm.npy'
    # ext = '-feats_norm.npy'  # '.spec.npy' or '-feats_norm.npy'

    listname = 'audio-txt-nframe-nphone_no-indian.txt'
    # listname = 'wgannorm-txt-nframe-nphone_no-indian.txt'

    listpath = os.path.join(args.list_dir, listname)
    if os.path.isfile(listpath):
        flists_flat = read_flist(listpath)
    else:

        # get file list per speaker
        flists = []
        for i, sid in enumerate(sids):
            print('processing speaker {} ({}/{}) ...'.format(
                sid, i, nspeakers))
            flist = []
            feats = sorted(
                glob.glob(
                    os.path.join(args.acoustic_dir, sid, '*{}'.format(ext))))
            # filter by microphone
            if args.mic != 'dual':
                feats = [
                    f for f in feats if 'mic{}{}'.format(args.mic, ext) in f
                ]
            for j, feat in enumerate(feats):
                feat_no_ext = os.path.join(
                    os.path.dirname(feat),
                    os.path.basename(feat).split('.')[0])
                uid = os.path.basename(feat).split('.')[0].split('_')[1]
                txtfile = '{}_{}.phones'.format(sid, uid)
                txtpath = os.path.join(args.text_dir, sid, txtfile)
                if os.path.isfile(txtpath):
                    phone_str = open(txtpath).readline().strip()
                    nphones = len(phone_str.split())
                    log_spectrogram = np.load(feat)
                    nframes = log_spectrogram.shape[0]
                    flist.append([feat_no_ext, phone_str, nframes, nphones])

            # append file list for current speaker
            flists.append(flist)

        # flatten 2-layer file list (flists) to 1-layer
        flists_flat = [item for sublist in flists for item in sublist]

        # write file list
        write_flist(flists_flat, args.delimiter, listpath)

    # find sid-uid with both mics
    if args.mic == 'dual':
        flists_dual, flists_mono = exclude_mono(flists_flat)
        flists_all = flists_dual[:]
    else:
        flists_all = flists_flat[:]

    # # find common utterance ids (potentially to choose valid, test set from)
    # uid2text = get_uid2text(flists_all)
    # uid2ntexts = get_uid2ntexts(uid2text)
    # nuids_uniq_text = len([1 for (uid, ntexts) in uid2ntexts.items() if ntexts==1])
    # msg = 'uids with single text / # of uids: {} / {}'
    # print(msg.format(nuids_uniq_text, len(uid2ntexts)))
    #
    # # find the dictionary of {sid:ntexts}
    # sid2nuids = find_sid2nuids(flists_all)
    #
    # # select sid with enough utterances
    # nuids_cutoff = 600 # select sids with at least 600 utterances
    # nuids_sorted = sorted(sid2nuids.values(), reverse=True)
    # sids_selected = [sid for sid in sid2nuids.keys() if sid2nuids[sid]>=nuids_cutoff]

    # get 2-layer file lists (file list per sid)
    flists = get_2dflist(flists_all)

    # randomly select 10/20 utterances for validation/testing per speaker
    flists_train, flists_valid, flists_test = [], [], []
    for i, sid in enumerate(sids_sel):
        flists_sid = flists[sid]
        uids_sid = get_uids(flists_sid)

        # randomize utterance ids with fixed random seed
        random.seed(args.seed)
        random.shuffle(uids_sid)

        uids_valid = uids_sid[:10]
        uids_test = uids_sid[10:30]
        uids_train = uids_sid[30:]

        flist_train = sel_flist(uids_train, flists_sid)
        flist_valid = sel_flist(uids_valid, flists_sid)
        flist_test = sel_flist(uids_test, flists_sid)

        flists_train += flist_train
        flists_valid += flist_valid
        flists_test += flist_test

    # write out split file list
    # listname = 'wgannorm-txt-nframe-nphone_no-indian_train.txt'
    listname = 'audio-txt-nframe-nphone_no-indian_train.txt'
    write_flist(flists_train, args.delimiter,
                os.path.join(args.list_dir, listname))
    # listname = 'wgannorm-txt-nframe-nphone_no-indian_valid.txt'
    listname = 'audio-txt-nframe-nphone_no-indian_valid.txt'
    write_flist(flists_valid, args.delimiter,
                os.path.join(args.list_dir, listname))
    # listname = 'wgannorm-txt-nframe-nphone_no-indian_test.txt'
    listname = 'audio-txt-nframe-nphone_no-indian_test.txt'
    write_flist(flists_test, args.delimiter,
                os.path.join(args.list_dir, listname))
示例#21
0
def show_imgs(img_paths):

    img_ids = np.random.choice(len(img_paths), img_num, replace=False)
    for i, img_id in enumerate(img_ids):
        print(img_id)
        plt.subplot(rows, cols, i + 1)
        img = cv2.imread(path_root + str(img_paths[img_id]))
        plt.title(str(prediction(img)))
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.axis('off')

    plt.show()


mat_path = str(path_root + 'wiki.mat')


full_path, dob, gender, photo_taken, face_score, second_face_score, age\
    = get_meta(mat_path, 'wiki')

img_paths = []

for i in range(len(face_score)):
    if face_score[i] >= 1.0 and np.isnan(second_face_score[i]):
        img_paths.append(full_path[i][0])

print("#images with scores >= than 1.0 and no second face: {}".format(
    len(img_paths)))

show_imgs(img_paths)
示例#22
0
from utils import get_meta
import cv2
import os

full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
    "imdb_crop/imdb.mat", "imdb")

# preprocess for gender classification


def preprocess_for_gen():
    print str(full_path.shape[0]) + " images found"

    countf = 0
    countm = 0
    countnan = 0

    for i in range(full_path.shape[0]):
        head, tail = os.path.split(full_path[i][0])

        if gender[i] == 0.0 and countf <= 5000:
            newtail = "FF/" + tail
            _path = "imdb_crop/" + str(full_path[i][0])
            t = cv2.imread(_path)
            tru = cv2.imwrite(newtail, t)
            if tru:
                countf += 1

        elif gender[i] == 1.0 and countm <= 5000:
            newtail = "MM/" + tail
            _path = "imdb_crop/" + str(full_path[i][0])
示例#23
0
    def __init__(self, source_path, cache_folder, destination, use_in_feeds,
        translations, default_lang, blog_url, messages):
        """Initialize post.

        The base path is the .txt post file. From it we calculate
        the meta file, as well as any translations available, and
        the .html fragment file path.

        `compile_html` is a function that knows how to compile this Post to
        html.
        """
        self.prev_post = None
        self.next_post = None
        self.blog_url = blog_url
        self.is_draft = False
        self.source_path = source_path  # posts/blah.txt
        self.post_name = os.path.splitext(source_path)[0]  # posts/blah
        # cache/posts/blah.html
        self.base_path = os.path.join(cache_folder, self.post_name + ".html")
        self.metadata_path = self.post_name + ".meta"  # posts/blah.meta
        self.folder = destination
        self.translations = translations
        self.default_lang = default_lang
        self.messages = messages
        if os.path.isfile(self.metadata_path):
            with codecs.open(self.metadata_path, "r", "utf8") as meta_file:
                meta_data = meta_file.readlines()
            while len(meta_data) < 6:
                meta_data.append("")
            (default_title, default_pagename, self.date, self.tags,
                self.link, default_description) = \
                    [x.strip() for x in meta_data][:6]
        else:
            (default_title, default_pagename, self.date, self.tags,
                self.link, default_description) = \
                    utils.get_meta(self.source_path)

        if not default_title or not default_pagename or not self.date:
            raise OSError("You must set a title and slug and date!")

        self.date = utils.to_datetime(self.date)
        self.tags = [x.strip() for x in self.tags.split(',')]
        self.tags = filter(None, self.tags)

        # While draft comes from the tags, it's not really a tag
        self.use_in_feeds = use_in_feeds and "draft" not in self.tags
        self.is_draft = 'draft' in self.tags
        self.tags = [t for t in self.tags if t != 'draft']

        self.pagenames = {}
        self.titles = {}
        self.descriptions = {}
        # Load internationalized titles
        # TODO: this has gotten much too complicated. Rethink.
        for lang in translations:
            if lang == default_lang:
                self.titles[lang] = default_title
                self.pagenames[lang] = default_pagename
                self.descriptions[lang] = default_description
            else:
                metadata_path = self.metadata_path + "." + lang
                source_path = self.source_path + "." + lang
                try:
                    if os.path.isfile(metadata_path):
                        with codecs.open(
                                metadata_path, "r", "utf8") as meta_file:
                            meta_data = [x.strip() for x in
                                meta_file.readlines()]
                            while len(meta_data) < 6:
                                meta_data.append("")
                            self.titles[lang] = meta_data[0] or default_title
                            self.pagenames[lang] = meta_data[1] or\
                                default_pagename
                            self.descriptions[lang] = meta_data[5] or\
                                default_description
                    else:
                        ttitle, ppagename, tmp1, tmp2, tmp3, ddescription = \
                            utils.get_meta(source_path)
                        self.titles[lang] = ttitle or default_title
                        self.pagenames[lang] = ppagename or default_pagename
                        self.descriptions[lang] = ddescription or\
                            default_description
                except:
                    self.titles[lang] = default_title
                    self.pagenames[lang] = default_pagename
                    self.descriptions[lang] = default_description
示例#24
0
Age -> idade 
dob-> data de nasci(n é necessário para o pre-process)
secondfacescore-> imagens em q há mais de um rosto detectado(descartar)

'''

db = "imdb"
root_path = "{}_crop/".format(db)
mat_path = root_path + "{}.mat".format(db)
min_score = 1.00  #Minimo de Confiança do detector de faces
img_size = 64  #Resolução minima

out_genders = []
out_ages = []
out_imgs = []
full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
    mat_path, db)

for i in tqdm(range(len(face_score))):
    if face_score[i] < min_score:
        continue

    if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
        continue

    if ~(0 <= age[i] <= 100):
        continue

    if np.isnan(gender[i]):
        continue

    out_genders.append(int(gender[i]))
def main():
    args = get_args()
    output_path = args.output
    db = args.db
    #    mat_path = args.input
    max_count = args.max_count
    img_size = args.img_size
    min_score = args.min_score
    max_num_per_file = args.max_num_per_file
    train_ratio = args.train_ratio

    root_path = "data/{}_crop/".format(db)
    mat_path = root_path + "{}.mat".format(db)
    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
        mat_path, db)

    out_genders = []
    out_ages = []
    out_imgs = []

    length = len(face_score)
    #    max_num_per_file = num_per_file
    file_count = 0
    outpath_prefix = output_path.split('/')[:-1]
    outpath_prefix = path_concat(outpath_prefix)
    filename = output_path.split('/')[-1].split('.')[0]
    ext = output_path.split('/')[-1].split('.')[1]
    total_count = 0
    indexes = get_passed(length, min_score, age, face_score, gender)
    random.shuffle(indexes)
    effective_length = len(indexes)
    train_length = int(max_count *
                       train_ratio) if max_count is not None else int(
                           effective_length * train_ratio)
    print('train_length=%s' % train_length)
    for i in tqdm(indexes):
        #print('total_count=%s' % total_count)
        #        if face_score[i] < min_score:
        #            print('face score bad')
        #            continue

        #         if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
        #             continue

        #        if ~(0 <= age[i] <= 100):
        #            print('age bad')
        #            continue

        #        if np.isnan(gender[i]):
        #            print('gender bad')
        #            continue

        out_genders.append(int(gender[i]))
        out_ages.append(age[i])
        img = cv2.imread(root_path + str(full_path[i][0]), 1)
        img = cv2.resize(img, (img_size, img_size))
        img = img[..., ::-1]
        img = np.around(np.transpose(img, (2, 0, 1)) / 255.0, decimals=12)

        out_imgs.append(img)
        total_count += 1
        if max_count is not None and total_count >= max_count:
            break

        if (len(out_imgs) % max_num_per_file == 0 and len(out_imgs) > 0) or \
                total_count == train_length:
            write_mat(out_imgs, out_genders, out_ages, db, img_size, min_score,
                      total_count, train_length, outpath_prefix, filename,
                      file_count, ext)
            file_count += 1
            if total_count == train_length:
                print('train range end')
                file_count = 0
            out_imgs = []
            out_genders = []
            out_ages = []
            output = {}
    write_mat(out_imgs, out_genders, out_ages, db, img_size, min_score,
              total_count, train_length, outpath_prefix, filename, file_count,
              ext)
示例#26
0
def main():
    args = get_args()
    output_path = args.train_out
    valout_path = args.val_out
    db = args.db
    img_size = args.img_size
    min_score = args.min_score

    root_path = "/data/{}_crop/".format(db)
    mat_path = "./data/{}_crop/".format(db)
    mat_path = mat_path + "{}.mat".format(db)
    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
        mat_path, db)

    out_genders = []
    out_ages = []
    #out_imgs = []
    img_path = []
    val_genders = []
    val_ages = []
    val_path = []
    f_count = 0
    m_count = 0

    for i in tqdm(range(len(face_score))):
        if face_score[i] < min_score:
            continue

        if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
            continue

        if ~(0 <= age[i] <= 100):
            continue

        if np.isnan(gender[i]):
            continue
        if i < (len(face_score) - 10000):
            out_genders.append(int(gender[i]))
            if int(gender[i]) == 0:
                f_count += 1
            elif int(gender[i]) == 1:
                m_count += 1
            out_ages.append(age[i])
            #img = cv2.imread(root_path + str(full_path[i][0]))
            img_path.append(root_path + str(full_path[i][0]))
            #print(full_path[i][0])
            #out_imgs.append(cv2.resize(img, (img_size, img_size)))
        else:
            val_genders.append(int(gender[i]))
            val_ages.append(age[i])
            val_path.append(root_path + str(full_path[i][0]))

    output = {
        "image_path": np.array(img_path),
        "gender": np.array(out_genders),
        "age": np.array(out_ages),
        "db": db,
        "img_size": img_size,
        "min_score": min_score
    }
    valout = {
        "image_path": np.array(val_path),
        "gender": np.array(val_genders),
        "age": np.array(val_ages),
        "db": db,
        "img_size": img_size,
        "min_score": min_score
    }
    #scipy.io.savemat(output_path, output)
    #scipy.io.savemat(valout_path, valout)
    print("the femal num ", f_count)
    print("the male  num ", m_count)
示例#27
0
    def __init__(self, source_path, destination, use_in_feeds,
        translations, default_lang, blog_url, messages):
        """Initialize post.

        The base path is the .txt post file. From it we calculate
        the meta file, as well as any translations available, and
        the .html fragment file path.

        `compile_html` is a function that knows how to compile this Post to
        html.
        """
        self.prev_post = None
        self.next_post = None
        self.blog_url = blog_url
        self.is_draft = False
        self.source_path = source_path  # posts/blah.txt
        self.post_name = os.path.splitext(source_path)[0]  # posts/blah
        # cache/posts/blah.html
        self.base_path = os.path.join('cache', self.post_name + ".html")
        self.metadata_path = self.post_name + ".meta"  # posts/blah.meta
        self.folder = destination
        self.translations = translations
        self.default_lang = default_lang
        self.messages = messages
        if os.path.isfile(self.metadata_path):
            with codecs.open(self.metadata_path, "r", "utf8") as meta_file:
                meta_data = meta_file.readlines()
            while len(meta_data) < 6:
                meta_data.append("")
            (default_title, default_pagename, self.date, self.tags,
                self.link, default_description) = \
                    [x.strip() for x in meta_data][:6]
        else:
            (default_title, default_pagename, self.date, self.tags,
                self.link, default_description) = \
                    utils.get_meta(self.source_path)

        if not default_title or not default_pagename or not self.date:
            raise OSError("You must set a title and slug and date!")

        self.date = utils.to_datetime(self.date)
        self.tags = [x.strip() for x in self.tags.split(',')]
        self.tags = filter(None, self.tags)

        # While draft comes from the tags, it's not really a tag
        self.use_in_feeds = use_in_feeds and "draft" not in self.tags
        self.is_draft = 'draft' in self.tags
        self.tags = [t for t in self.tags if t != 'draft']

        self.pagenames = {}
        self.titles = {}
        self.descriptions = {}
        # Load internationalized titles
        # TODO: this has gotten much too complicated. Rethink.
        for lang in translations:
            if lang == default_lang:
                self.titles[lang] = default_title
                self.pagenames[lang] = default_pagename
                self.descriptions[lang] = default_description
            else:
                metadata_path = self.metadata_path + "." + lang
                source_path = self.source_path + "." + lang
                try:
                    if os.path.isfile(metadata_path):
                        with codecs.open(
                                metadata_path, "r", "utf8") as meta_file:
                            meta_data = [x.strip() for x in
                                meta_file.readlines()]
                            while len(meta_data) < 6:
                                meta_data.append("")
                            self.titles[lang] = meta_data[0] or default_title
                            self.pagenames[lang] = meta_data[1] or\
                                default_pagename
                            self.descriptions[lang] = meta_data[5] or\
                                default_description
                    else:
                        ttitle, ppagename, tmp1, tmp2, tmp3, ddescription = \
                            utils.get_meta(source_path)
                        self.titles[lang] = ttitle or default_title
                        self.pagenames[lang] = ppagename or default_pagename
                        self.descriptions[lang] = ddescription or\
                            default_description
                except:
                    self.titles[lang] = default_title
                    self.pagenames[lang] = default_pagename
                    self.descriptions[lang] = default_description
示例#28
0
文件: nikola.py 项目: svankie/nikola
    def __init__(self, source_path, destination, use_in_feeds, translations, default_lang, blog_url, compile_html):
        """Initialize post.

        The base path is the .txt post file. From it we calculate
        the meta file, as well as any translations available, and
        the .html fragment file path.

        `compile_html` is a function that knows how to compile this Post to
        html.
        """
        self.prev_post = None
        self.next_post = None
        self.use_in_feeds = use_in_feeds
        self.blog_url = blog_url
        self.source_path = source_path  # posts/blah.txt
        self.post_name = os.path.splitext(source_path)[0]  # posts/blah
        self.base_path = os.path.join("cache", self.post_name + ".html")  # cache/posts/blah.html
        self.metadata_path = self.post_name + ".meta"  # posts/blah.meta
        self.folder = destination
        self.translations = translations
        self.default_lang = default_lang
        if os.path.isfile(self.metadata_path):
            with codecs.open(self.metadata_path, "r", "utf8") as meta_file:
                meta_data = meta_file.readlines()
            while len(meta_data) < 5:
                meta_data.append("")
            default_title, default_pagename, self.date, self.tags, self.link = [x.strip() for x in meta_data][:5]
        else:
            default_title, default_pagename, self.date, self.tags, self.link = utils.get_meta(self.source_path)

        if not default_title or not default_pagename or not self.date:
            raise OSError, "You must set a title and slug and date!"

        self.date = utils.to_datetime(self.date)
        self.tags = [x.strip() for x in self.tags.split(",")]
        self.tags = filter(None, self.tags)
        self.compile_html = compile_html

        self.pagenames = {}
        self.titles = {}
        # Load internationalized titles
        for lang in translations:
            if lang == default_lang:
                self.titles[lang] = default_title
                self.pagenames[lang] = default_pagename
            else:
                metadata_path = self.metadata_path + "." + lang
                source_path = self.source_path + "." + lang
                try:
                    if os.path.isfile(metadata_path):
                        with codecs.open(metadata_path, "r", "utf8") as meta_file:
                            meta_data = [x.strip() for x in meta_file.readlines()]
                            while len(meta_data) < 2:
                                meta_data.append("")
                            self.titles[lang] = meta_data[0] or default_title
                            self.pagenames[lang] = meta_data[1] or default_pagename
                    else:
                        ttitle, ppagename, tmp1, tmp2, tmp3 = utils.get_meta(source_path)
                        self.titles[lang] = ttitle or default_title
                        self.pagenames[lang] = ppagename or default_pagename
                except:
                    self.titles[lang] = default_title
                    self.pagenames[lang] = default_pagename
示例#29
0
def main():
    args = get_args()
    output_path = args.output
    db = args.db
    img_size = args.img_size
    min_score = args.min_score

    if args.db_path:
        root_path = args.db_path
    else:
        root_path = "data/{}_crop/".format(db)
    mat_path = os.path.join(root_path, "{}.mat".format(db))

    full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(
        mat_path, db)

    out_genders = []
    out_ages = []
    sample_num = len(face_score)
    out_imgs = np.empty((sample_num, img_size, img_size, 3), dtype=np.uint8)
    valid_sample_num = 0

    print(f"root_path = {root_path}")

    for i in tqdm(range(sample_num)):
        if face_score[i] < min_score:
            continue

        if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0:
            continue

        if ~(0 <= age[i] <= 100):
            continue

        if np.isnan(gender[i]):
            continue

        out_genders.append(int(gender[i]))
        out_ages.append(age[i])
        img_path = os.path.join(root_path, str(full_path[i][0]))
        # print(f"Read {img_path}")
        img = cv2.imread(img_path)
        out_imgs[valid_sample_num] = cv2.resize(img, (img_size, img_size))
        valid_sample_num += 1

    print(f"Saving {len(out_imgs)} items")

    if args.h5:
        base, ext = os.path.splitext(output_path)
        output_path = base + '.h5'
        h5 = h5py.File(output_path, mode='w')
        h5.create_dataset('image', data=out_imgs[:valid_sample_num])
        h5.create_dataset('gender', data=np.array(out_genders))
        h5.create_dataset('age', data=np.array(out_ages))
        h5.attrs['db'] = db
        h5.attrs['img_size'] = img_size
        h5.attrs['min_score'] = min_score
        h5.close()

        print(f"Data has been written to {output_path}.")
    else:
        output = {
            "image": out_imgs[:valid_sample_num],
            "gender": np.array(out_genders),
            "age": np.array(out_ages),
            "db": db,
            "img_size": img_size,
            "min_score": min_score
        }
        scipy.io.savemat(output_path, output, do_compression=True)