def pre_process(self): save_data = temp_dir() train_src, train_tgt = self.train_data dev_src, dev_tgt = self.dev_data if self.features: train_src = list(map(add_features, train_src)) dev_src = list(map(add_features, dev_src)) run_param('preprocess.py', { "train_src": save_temp(train_src), "train_tgt": save_temp(train_tgt), "valid_src": save_temp(dev_src), "valid_tgt": save_temp(dev_tgt), "save_data": save_data + "data", "dynamic_dict": None # This will add a dynamic-dict parameter }) data_zip = shutil.make_archive(base_name=temp_name(), format="gztar", root_dir=save_data) f = open(data_zip, "rb") bin_data = f.read() f.close() return bin_data
def download_synthetic(version, images_dir): temp = temp_dir() file_handle, _ = urlretrieve(version["url"]) with zipfile.ZipFile(file_handle, 'r') as zipObj: zipObj.extractall(temp) labels_dir = path.join(temp, "hand_labels_synth") for split in ['synth1', 'synth2', 'synth3', 'synth4']: original_split_dir = path.join(labels_dir, split) split_dir = path.join(images_dir, split) makedir(split_dir) files = sorted( [f for f in os.listdir(original_split_dir) if f.endswith('.json')]) for file in files: content = json.load(open(path.join(original_split_dir, file), "r")) fname = file.replace(".json", ".jpg") copyfile(path.join(original_split_dir, fname), path.join(split_dir, fname)) yield split, { "image": "/".join(["images", split, fname]), "hand": "left" if content["is_left"] else "right", "pose": content["hand_pts"] }
def pre_process(self): save_data = temp_dir() train_src = save_temp( [add_features(d.plan) for d in self.train_reader.data]) train_tgt = save_temp([d.delex for d in self.train_reader.data]) valid_src = save_temp( [add_features(d.plan) for d in self.dev_reader.data]) valid_tgt = save_temp([d.delex for d in self.dev_reader.data]) run_param( 'preprocess.py', { "train_src": train_src, "train_tgt": train_tgt, "valid_src": valid_src, "valid_tgt": valid_tgt, "save_data": save_data + "data", "dynamic_dict": None # This will add a dynamic-dict parameter }) data_zip = shutil.make_archive(base_name=temp_name(), format="gztar", root_dir=save_data) f = open(data_zip, "rb") bin_data = f.read() f.close() return bin_data
def train(self, save_data, opt): save_data_archive = save_temp_bin(save_data) save_data_dir = temp_dir() shutil.unpack_archive(filename=save_data_archive, extract_dir=save_data_dir, format="gztar") save_model = temp_dir() opt["data"] = save_data_dir + "data" opt["save_model"] = save_model if is_cuda: opt["world_size"] = 1 opt["gpu_ranks"] = 0 run_param('train.py', opt) return save_model
def download_manual(version, images_dir): temp = temp_dir() file_handle, _ = urlretrieve(version["url"]) with zipfile.ZipFile(file_handle, 'r') as zipObj: zipObj.extractall(temp) labels_dir = path.join(temp, "hand_labels") for split in ["train", "test"]: original_split_dir = path.join(labels_dir, "manual_" + split) split_dir = path.join(images_dir, split) makedir(split_dir) files = sorted( [f for f in os.listdir(original_split_dir) if f.endswith('.json')]) for file in files: content = json.load(open(path.join(original_split_dir, file), "r")) fname = file.replace(".json", ".jpg") # Crop image all_x, all_y, _ = zip(*content["hand_pts"]) size = round( max(max(all_x) - min(all_x), max(all_y) - min(all_y)) / 2) x = min(all_x) - size y = min(all_y) - size im = Image.open(path.join(original_split_dir, fname)) crop = im.crop((x, y, x + 4 * size, y + 4 * size)) crop.save(path.join(split_dir, fname)) yield split, { "image": "/".join(["images", split, fname]), "hand": "left" if content["is_left"] else "right", "pose": [(x1 - x, y1 - y, z) for x1, y1, z in content["hand_pts"]] }
def download_sign_language(version, images_dir): labels = string.ascii_lowercase temp = temp_dir() file_handle, _ = urlretrieve(version["url"]) with zipfile.ZipFile(file_handle, 'r') as zipObj: zipObj.extractall(temp) for split in ["train", "test"]: split_dir = path.join(images_dir, split) makedir(split_dir) csv = [[int(r) for r in row.split(",")] for row in open(path.join(temp, "sign_mnist_" + split + ".csv")).readlines()[1:]] for i, row in enumerate(csv): label = labels[row.pop(0)] image = np.array(row, dtype=np.uint8).reshape((28, 28)) f_name = label + "_" + str(i) + ".png" Image.fromarray(image).save(path.join(split_dir, f_name)) yield split, {"label": label, "image": "/".join([split, f_name])}
def download_FingerSpell(version, directory): FFmpeg.check_installed() letters = ['rest'] + list(string.ascii_lowercase) animated = {"j": temp_name(".jpg"), "z": temp_name(".jpg")} for l, f in list(animated.items()): urlretrieve(version["url"] + l + "-begin_" + l + "-end.jpg", f) animated[l] = cv2.imread(f) videos_path = path.join(directory, "videos") makedir(videos_path) for l1 in tqdm(letters): for l2 in tqdm(letters): is_l2_animated = l2 in animated is_l1_animated = l1 in animated text = (l1 + l2).replace("rest", "") gloss = "" if l1 == l2 == "rest" else l2 + "#" if l1 == "rest" else "#" + l1 if l2 == "rest" else "#" + l1 + "# #" + l2 + "#" download_l1 = l1 download_l2 = l2 if is_l2_animated: download_l2 = download_l2 + "-begin" if is_l1_animated: download_l1 = download_l1 + "-end" full_url = version["url"] + download_l1 + "_" + download_l2 + ".jpg" video_path = path.join(videos_path, text + ".mp4") if not path.exists(video_path): temp = temp_name(".jpg") urlretrieve(full_url, temp) img = cv2.imread(temp) if is_l2_animated and not is_l1_animated: img = np.concatenate((img, animated[l2])) if is_l1_animated and not is_l2_animated: img = np.concatenate((animated[l1], img)) imgs = img.reshape((int(img.shape[0] / 256), 256, 256, 3)) temp_dir_name = temp_dir() for i, im in enumerate(imgs): cv2.imwrite(temp_dir_name + str(i).zfill(2) + ".jpg", im) FFmpeg.video_from_frames(temp_dir_name, 2, video_path) yield { "id": text if text != "" else "rest", "texts": [{ "text": text }], "gloss": gloss, "video": video_path, "video_url": full_url, "sign_language": "en.us", "text_language": "English", "metadata": { "width": 256, "height": 256 } }