예제 #1
0
def download_synthetic(version, images_dir):
    temp = temp_dir()
    file_handle, _ = urlretrieve(version["url"])
    with zipfile.ZipFile(file_handle, 'r') as zipObj:
        zipObj.extractall(temp)

    labels_dir = path.join(temp, "hand_labels_synth")

    for split in ['synth1', 'synth2', 'synth3', 'synth4']:
        original_split_dir = path.join(labels_dir, split)

        split_dir = path.join(images_dir, split)
        makedir(split_dir)

        files = sorted(
            [f for f in os.listdir(original_split_dir) if f.endswith('.json')])
        for file in files:
            content = json.load(open(path.join(original_split_dir, file), "r"))
            fname = file.replace(".json", ".jpg")

            copyfile(path.join(original_split_dir, fname),
                     path.join(split_dir, fname))

            yield split, {
                "image": "/".join(["images", split, fname]),
                "hand": "left" if content["is_left"] else "right",
                "pose": content["hand_pts"]
            }
예제 #2
0
def pose_video(datum):
    if not exists(datum['video']):
        try:
            URLopener().retrieve(datum["video_url"], datum["video"])
        except Exception as e:
            makedir(datum["pose_dir"]) # Empty directory

    if not exists(datum["pose_dir"]):
        gpu = get_empty_gpu()

        # Create Container
        container_id = Docker.create_container(DOCKER_NAME, "-it -v " + datum["video"] + ":/video.mp4")

        def remove_container():
            Docker.remove_container(container_id)

        try:
            # Start Container
            Docker.start_container(container_id)

            cmd = "./build/examples/openpose/openpose.bin --video /video.mp4 --model_pose BODY_25 --display 0 --render_pose 0 --write_json /out/ --hand --face --num_gpu 1 "
            cmd += " --num_gpu_start " + str(gpu)
            Docker.exec_container(container_id, "bash -c 'cd /openpose && " + cmd + "'")

            # Copy files
            Docker.cp_container_directory(container_id, datum["pose_dir"], "/out/")
        except Exception as e:
            remove_container()
            raise e
        finally:
            remove_container()

    return True
예제 #3
0
 def cp_container_directory(container_id: str, local_dir: str,
                            docker_dir: str):
     makedir(local_dir)
     d_cp = "nvidia-docker cp " + container_id + ":" + docker_dir + ". " + local_dir
     print(d_cp)
     status = os.system(d_cp)
     if int(status) != 0:
         raise Exception("CP Status " + str(status))
예제 #4
0
def download(version, directory: str, dataset: list):
    if version["version"] != "Mediapipe":
        raise ValueError("Running this addon version is not implemented")

    poses_dir = path.join(directory, "poses")
    makedir(poses_dir)

    Docker.verify_image_exists(DOCKER_NAME)

    should_cleanup = False
    while True:
        existing = {path.join(poses_dir, di) for di in os.listdir(poses_dir)}
        missing_data = []
        for datum in dataset:
            datum["pose_dir"] = path.join(poses_dir, datum["id"])
            if datum["pose_dir"] not in existing:
                missing_data.append(datum)

        # Break when finished
        if len(missing_data) == 0:
            break

        print(missing_data)

        should_cleanup = True
        print("Done",
              len(dataset) - len(missing_data), "/", len(dataset), "tasks")

        # should_cleanup = False
        # for datum in tqdm(missing_data):
        #     pose_video(datum)

        distributed.clear_tasks()
        distributed.kill_slaves()
        clean_dockers()
        distributed.spawn_workers().flower()
        distributed.run(pose_video, missing_data[:50000])

    if should_cleanup:
        distributed.kill_slaves()
        clean_dockers()

    with jsonlines.open(path.join(directory, "index.jsonl"),
                        mode='w') as writer:
        for datum in tqdm(dataset):
            writer.write({
                "id": datum["id"],
                "poses": get_directory_hands(datum["pose_dir"])
            })
예제 #5
0
def download(directory: str, version, module_path: str, dataset=None):
    makedir(directory)
    version_dir = path.join(directory, version["version"])
    index_path = path.join(version_dir, 'index.jsonl')
    if not exists(version_dir) or not exists(index_path):
        makedir(version_dir)
        module = modular_import("module", module_path)
        if dataset is None:
            module.download(version, version_dir)
        else:
            module.download(version, version_dir, dataset)

    data = list(jsonlines.open(index_path))

    return version_dir, data
예제 #6
0
def download_digits(images_dir):
    from keras.datasets import mnist
    mnist_data = mnist.load_data()

    for split, (x, y) in zip(["train", "test"], mnist_data):
        split_dir = path.join(images_dir, split)
        makedir(split_dir)

        for i, (image, label) in enumerate(zip(x, y)):
            f_name = str(label) + "_" + str(i) + ".png"
            Image.fromarray(image).save(path.join(split_dir, f_name))

            yield split, {
                "label": str(label),
                "image": "/".join([split, f_name])
            }
예제 #7
0
def download(version, directory):
    images_dir = path.join(directory, "images")
    makedir(images_dir)

    if version["version"] == "manual":
        res = download_manual(version, images_dir)
    elif version["version"] == "synthetic":
        res = download_synthetic(version, images_dir)
    else:
        raise ValueError("Downloading this version is not implemented")

    splits = defaultdict(list)

    with jsonlines.open(path.join(directory, "index.jsonl"),
                        mode='w') as writer:
        for i, (split, row) in tqdm(enumerate(res)):
            splits[split].append(i)
            writer.write(row)

    json.dump(splits, open(path.join(directory, 'split.json'), "w"))
예제 #8
0
def download_manual(version, images_dir):
    temp = temp_dir()

    file_handle, _ = urlretrieve(version["url"])
    with zipfile.ZipFile(file_handle, 'r') as zipObj:
        zipObj.extractall(temp)

    labels_dir = path.join(temp, "hand_labels")

    for split in ["train", "test"]:
        original_split_dir = path.join(labels_dir, "manual_" + split)

        split_dir = path.join(images_dir, split)
        makedir(split_dir)

        files = sorted(
            [f for f in os.listdir(original_split_dir) if f.endswith('.json')])
        for file in files:
            content = json.load(open(path.join(original_split_dir, file), "r"))

            fname = file.replace(".json", ".jpg")

            # Crop image
            all_x, all_y, _ = zip(*content["hand_pts"])
            size = round(
                max(max(all_x) - min(all_x),
                    max(all_y) - min(all_y)) / 2)

            x = min(all_x) - size
            y = min(all_y) - size

            im = Image.open(path.join(original_split_dir, fname))
            crop = im.crop((x, y, x + 4 * size, y + 4 * size))
            crop.save(path.join(split_dir, fname))

            yield split, {
                "image": "/".join(["images", split, fname]),
                "hand": "left" if content["is_left"] else "right",
                "pose":
                [(x1 - x, y1 - y, z) for x1, y1, z in content["hand_pts"]]
            }
예제 #9
0
def download_SpreadTheSign(directory):
    makedir(path.join(directory, "videos"))

    # Initialize MultiProcessing Pool
    processes = multiprocessing.cpu_count() - 1
    pool = Pool(processes)

    # First gets the list of words and their languages
    print("Indexing SpreadTheSign...")
    words = index_words(directory, pool, processes)

    # For every ID and language, get the metadata
    print("Getting metadata for each sign...")
    data_index_path = path.join(directory, "data_index.json")
    data = json.load(open(data_index_path)) if exists(data_index_path) else []
    existing = {"_".join(d["id"].split("_")[:-1]) for d in data}

    videos = [(i, l) for i, languages in words.items() for l in languages if str(i) + "_" + str(l) not in existing]

    for chunk in tqdm(chunks(videos, processes * 10)):
        data += list(itertools.chain.from_iterable(pool.imap(get_video, list(chunk))))
        json.dump(data, open(data_index_path, "w"), indent=2)

    return data
예제 #10
0
def download_sign_language(version, images_dir):
    labels = string.ascii_lowercase

    temp = temp_dir()

    file_handle, _ = urlretrieve(version["url"])
    with zipfile.ZipFile(file_handle, 'r') as zipObj:
        zipObj.extractall(temp)

    for split in ["train", "test"]:
        split_dir = path.join(images_dir, split)
        makedir(split_dir)

        csv = [[int(r) for r in row.split(",")]
               for row in open(path.join(temp, "sign_mnist_" + split +
                                         ".csv")).readlines()[1:]]
        for i, row in enumerate(csv):
            label = labels[row.pop(0)]
            image = np.array(row, dtype=np.uint8).reshape((28, 28))

            f_name = label + "_" + str(i) + ".png"
            Image.fromarray(image).save(path.join(split_dir, f_name))

            yield split, {"label": label, "image": "/".join([split, f_name])}
예제 #11
0
import os
from os import path
from os.path import exists

from jsonlines import jsonlines
from tqdm import tqdm

from addons.OpenPose.pose_util import get_directory_person
from utils.dataset import load
from utils.file_system import makedir, listdir

if __name__ == "__main__":
    version = {"version": "BODY_25"}
    directory = "/home/nlp/amit/PhD/meta-scholar/datasets/SLCrawl/versions/SpreadTheSign/OpenPose/BODY_25"
    # dataset = load("SLCrawl", version="SpreadTheSign")

    poses_dir = path.join(directory, "poses")
    makedir(poses_dir)

    existing = {path.join(poses_dir, d) for d in os.listdir(poses_dir)}
    print("Finished", len(existing))
예제 #12
0
    def execute(self,
                run_name=None,
                tabs=0,
                x_params=None,
                previous_name=None,
                cache_name: str = None):
        # Every execution should refresh initial params
        params = CachedDict().union(self.initial_params) \
            if isinstance(self.initial_params, CachedDict) \
            else CachedDict(self.initial_params if self.initial_params else {})

        self.local_timer = Time.now()
        self.global_timer = Time.now()

        if not x_params:
            x_params = CachedDict()

        x_params = x_params.union(params)  # Add F to X

        if not previous_name:
            previous_name = cache_dir

        makedir(previous_name)

        if cache_name:
            previous_name = path.join(previous_name, cache_name)
            makedir(previous_name)

        if run_name:
            print("  " * tabs, run_name)

        key_len = max([len(qi.key) for qi in self.queue] + [0]) + 5
        name_len = max([len(qi.name) for qi in self.queue] + [0]) + 5

        for qi in self.queue:
            # key, name, method, load_cache, load_self
            if qi.key != "out" and not isinstance(qi.method, Pipeline):
                print(("  " * (tabs + 1)) +
                      ("%-" + str(key_len) + "s %-" + str(name_len) + "s") %
                      (qi.key, qi.name),
                      end=" ")

            pn = path.join(previous_name, qi.key)
            pnf = pn + "." + qi.ext

            if qi.load_cache and path.isfile(pnf):
                params.add_cache(qi.key, pnf)
                if qi.load_self:
                    params.load_cache(qi.key)
            else:
                if isinstance(qi.method, Pipeline):
                    params[qi.key] = qi.method.execute(
                        run_name=qi.name,
                        tabs=tabs + 1,
                        x_params=x_params.union(params),
                        previous_name=pn)
                    if isinstance(params[qi.key],
                                  CachedDict) and "out" in params[qi.key]:
                        params.copy_key(qi.key, params[qi.key], "out")
                else:
                    if self.mute:
                        Silencer.mute()
                    params[qi.key] = qi.method(params, x_params)
                    if self.mute:
                        Silencer.unmute()

                    f = open(pnf,
                             "wb" if qi.ext not in ["txt", "json"] else "w")
                    if qi.ext == "pkl":
                        pickle.dump(params[qi.key], f)
                    else:
                        if qi.ext in ["png", "jpg", "wav", "mp4"]:
                            params[qi.key] = get_file_bytes(params[qi.key],
                                                            format=qi.ext)
                        f.write(params[qi.key])
                    f.close()

            if qi.key != "out" and not isinstance(qi.method, Pipeline):
                local_passed, global_passed = self.timer_report()
                report = params[qi.key].report() \
                    if qi.key in params.val_dict and hasattr(params[qi.key], "report") else ""
                print(("%-15s\t\t" + report) % (local_passed))

        return params
예제 #13
0
def download(version, directory):
    raw_dir = path.join(directory, "raw")
    makedir(raw_dir)

    print("The PIG dataset requires authentication, which is granted to everyone who requests it.")
    print("You can register in: http://beam.kisarazu.ac.jp/~saito/research/PianoFingeringDataset/register.php")
    print("")

    username = input("Insert your username:"******"Insert your password:"******"url"]

    # Make further requests authenticated
    password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
    password_mgr.add_password(None, url, username, password)
    handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
    urllib.request.install_opener(urllib.request.build_opener(handler))

    file_handle, _ = urlretrieve(url)
    with zipfile.ZipFile(file_handle, 'r') as zipObj:
        zipObj.extractall(raw_dir)

    print("Download done")

    raw_dataset_dir = path.join(raw_dir, os.listdir(raw_dir)[0])

    metadata = csv.reader(open(path.join(raw_dataset_dir, "List.csv"), 'r'))
    next(metadata)
    metadata_id = {int(r[0]): r for r in metadata}

    fingering_dir = path.join(raw_dataset_dir, "FingeringFiles")
    with jsonlines.open(path.join(directory, "index.jsonl"), mode='w') as writer:
        for file in listdir(fingering_dir, full=False):
            file_id, tagger_id = map(int, file.split("_")[0].split("-"))
            mata = metadata_id[file_id]

            datum = {
                "id": file.split("_")[0],
                "piece": mata[2],
                "composer": mata[1],
                "tagger": mata[5 + tagger_id],
                "#bars": int(mata[3]),
                "#notes": int(mata[4]),
                "notes": []
            }

            f = open(path.join(fingering_dir, file), "r")
            notes = [r.split() for r in f.read().splitlines()[1:]]
            f.close()

            for note in notes:
                datum["notes"].append({
                    "on_event": {
                        "time": float(note[1]),
                        "velocity": int(note[4])
                    },
                    "off_event": {
                        "time": float(note[2]),
                        "velocity": int(note[5])
                    },
                    "spelled_pitch": note[3],
                    "midi_pitch": note_to_midi(note[3]),
                    "channel": int(note[6]),
                    "fingers": [{"finger": abs(f), "hand": "right" if f > 0 else "left"}
                                for f in [int(f) for f in note[7].strip('_').split("_")]]
                })

            writer.write(datum)
예제 #14
0
import json
from collections import Counter
from functools import lru_cache
from itertools import chain
from os.path import isfile
import requests

from utils.file_system import makedir

cache = "/tmp/dbpedia/"
makedir(cache)

DBPEDIA = "http://dbpedia.org/"


@lru_cache(maxsize=None)
def normalize_entity(entity: str):
    return entity \
        .replace("/", "%2F") \
        .replace("&", "%26") \
        .replace("+", "%2B")


@lru_cache(maxsize=None)
def get_dbpedia_entity(entity: str):
    entity = normalize_entity(entity)

    cache_ent = cache + entity + ".json"
    if isfile(cache_ent):
        f = open(cache_ent, "r")
        content = json.load(f)
예제 #15
0
def download(version, directory: str):
    FFmpeg.check_installed()

    dataset_directory = path.join(directory, "dataset")
    if not path.exists(dataset_directory):
        dataset_file = path.join(directory, "dataset.tgz")
        if not path.exists(dataset_file):
            print("Downloading Archive")
            wget.download(version["url"], dataset_file)

        print("Extracting Archive")
        tar = tarfile.open(dataset_file)
        tar.extractall(path=dataset_directory)
        tar.close()

        remove(dataset_file)

    archive_directory = path.join(dataset_directory, version["version"]) \
        if version["version"] == "ChicagoFSWild" else dataset_directory

    frames_directory = path.join(archive_directory, "frames")
    if not path.exists(frames_directory):
        print("Extracting Frames Archive")
        tar = tarfile.open(
            path.join(archive_directory, version["version"] + "-Frames.tgz"))
        tar.extractall(path=frames_directory)
        tar.close()

    if version["version"] == "ChicagoFSWildPlus":
        frames_directory = path.join(frames_directory, version["version"])

    videos_directory = path.join(directory, "videos")
    makedir(videos_directory)

    splits = defaultdict(list)
    data = []

    if version["version"] == "ChicagoFSWild":
        print(
            "Note! While ChicagoFSWild contains hand bounding boxes, we do not load them at this time."
        )

    with open(path.join(archive_directory,
                        version["version"] + ".csv")) as csv_file:
        csv_data = csv.reader(csv_file, delimiter=',')
        next(csv_data)  # Ignore the header
        for i, row in tqdm(enumerate(csv_data)):
            # As this ID will be used for file naming, lets make it work by default
            datum_id = row[1].replace("/",
                                      "-").replace("_(youtube)",
                                                   "").replace("_(nad)", "")

            # Convert Frames to a video
            datum_frames = path.join(frames_directory, row[1])
            datum_video = path.join(videos_directory, datum_id + ".mp4")
            if not path.exists(datum_video):
                FFmpeg.video_from_frames(datum_frames, 4, datum_video)

            data.append({
                "id": datum_id,
                "texts": [{
                    "text": row[7]
                }],
                "gloss": word2chars(row[7]),
                "description": row[9],
                "video_url": row[2],
                "video": datum_video,
                "timing": {
                    "start": row[3],
                },
                "sign_language": "en.us",
                "text_language": "en",
                "signer": {
                    "name": row[-1]
                },
                "metadata": {
                    "frames": row[4],
                    "width": int(row[5]),
                    "height": int(row[6])
                }
            })

            splits[row[10]].append(i)

    with jsonlines.open(path.join(directory, "index.jsonl"),
                        mode='w') as writer:
        for datum in data:
            writer.write(datum)

    json.dump(dict(splits), open(path.join(directory, "split.json"), "w"))
예제 #16
0
    def execute(self,
                run_name=None,
                tabs=0,
                x_params=None,
                previous_name=None,
                cache_name: str = None):
        self.local_timer = Time.now()
        self.global_timer = Time.now()

        if not x_params:
            x_params = CachedDict()

        if not previous_name:
            previous_name = cache_dir

        makedir(previous_name)

        if cache_name:
            previous_name = path.join(previous_name, cache_name)
            makedir(previous_name)

        if run_name:
            print("  " * tabs, run_name)

        key_len = max([len(qi.key) for qi in self.queue] + [0]) + 5
        name_len = max([len(qi.name) for qi in self.queue] + [0]) + 5

        for qi in self.queue:
            # key, name, method, load_cache, load_self
            if qi.key != "out" and not isinstance(qi.method, Pipeline):
                print(("  " * (tabs + 1)) +
                      ("%-" + str(key_len) + "s %-" + str(name_len) + "s") %
                      (qi.key, qi.name),
                      end=" ")

            pn = path.join(previous_name, qi.key)
            pnf = pn + "." + qi.ext

            if qi.load_cache and qi.key != "out" and path.isfile(pnf):
                self.params.add_cache(qi.key, pnf)
                if qi.load_self:
                    self.params.load_cache(qi.key)
            else:
                if isinstance(qi.method, Pipeline):
                    self.params[qi.key] = qi.method.execute(
                        run_name=qi.name,
                        tabs=tabs + 1,
                        x_params=x_params.union(self.params),
                        previous_name=pn)
                    if "out" in self.params[qi.key]:
                        self.params.copy_key(qi.key, self.params[qi.key],
                                             "out")
                else:
                    if self.mute:
                        Silencer.mute()
                    self.params[qi.key] = qi.method(self.params, x_params)
                    if self.mute:
                        Silencer.unmute()

                    f = open(pnf, "wb" if qi.ext != "txt" else "w")
                    if qi.ext == "sav":
                        pickle.dump(self.params[qi.key], f)
                    else:
                        f.write(self.params[qi.key])
                    f.close()

            if qi.key != "out" and not isinstance(qi.method, Pipeline):
                local_passed, global_passed = self.timer_report()
                report = self.params[qi.key].report() \
                    if qi.key in self.params.val_dict and hasattr(self.params[qi.key], "report") else ""
                print(("%-15s\t\t" + report) % (local_passed))

        return self.params
예제 #17
0
def download_FingerSpell(version, directory):
    FFmpeg.check_installed()

    letters = ['rest'] + list(string.ascii_lowercase)

    animated = {"j": temp_name(".jpg"), "z": temp_name(".jpg")}

    for l, f in list(animated.items()):
        urlretrieve(version["url"] + l + "-begin_" + l + "-end.jpg", f)
        animated[l] = cv2.imread(f)

    videos_path = path.join(directory, "videos")
    makedir(videos_path)

    for l1 in tqdm(letters):
        for l2 in tqdm(letters):
            is_l2_animated = l2 in animated
            is_l1_animated = l1 in animated

            text = (l1 + l2).replace("rest", "")
            gloss = "" if l1 == l2 == "rest" else l2 + "#" if l1 == "rest" else "#" + l1 if l2 == "rest" else "#" + l1 + "# #" + l2 + "#"

            download_l1 = l1
            download_l2 = l2
            if is_l2_animated:
                download_l2 = download_l2 + "-begin"
            if is_l1_animated:
                download_l1 = download_l1 + "-end"

            full_url = version["url"] + download_l1 + "_" + download_l2 + ".jpg"

            video_path = path.join(videos_path, text + ".mp4")
            if not path.exists(video_path):
                temp = temp_name(".jpg")
                urlretrieve(full_url, temp)
                img = cv2.imread(temp)

                if is_l2_animated and not is_l1_animated:
                    img = np.concatenate((img, animated[l2]))
                if is_l1_animated and not is_l2_animated:
                    img = np.concatenate((animated[l1], img))

                imgs = img.reshape((int(img.shape[0] / 256), 256, 256, 3))

                temp_dir_name = temp_dir()
                for i, im in enumerate(imgs):
                    cv2.imwrite(temp_dir_name + str(i).zfill(2) + ".jpg", im)

                FFmpeg.video_from_frames(temp_dir_name, 2, video_path)

            yield {
                "id": text if text != "" else "rest",
                "texts": [{
                    "text": text
                }],
                "gloss": gloss,
                "video": video_path,
                "video_url": full_url,
                "sign_language": "en.us",
                "text_language": "English",
                "metadata": {
                    "width": 256,
                    "height": 256
                }
            }