def init_interactive( defaults: Dict[str, str], provided: Dict[str, str], validator: Callable[[str, str], Union[str, Tuple[str, str]]] = None, stream: Optional[TextIO] = None, ) -> Dict[str, str]: command_prompts = lremove(provided.keys(), ["cmd"]) dependencies_prompts = lremove(provided.keys(), ["code", "data", "params"]) output_keys = ["models"] if "live" not in provided: output_keys.extend(["metrics", "plots"]) outputs_prompts = lremove(provided.keys(), output_keys) ret: Dict[str, str] = {} if "cmd" in provided: ret["cmd"] = provided["cmd"] for heading, prompts, allow_omission in ( ("", command_prompts, False), ("Enter experiment dependencies.", dependencies_prompts, True), ("Enter experiment outputs.", outputs_prompts, True), ): if prompts and heading: ui.error_write(heading, styled=True) response = _prompts( prompts, defaults=defaults, allow_omission=allow_omission, validator=validator, stream=stream, ) ret.update(compact(response)) if prompts: ui.error_write(styled=True) return ret
def init_interactive( name: str, defaults: Dict[str, str], provided: Dict[str, str], validator: Callable[[str, str], Union[str, Tuple[str, str]]] = None, live: bool = False, stream: Optional[TextIO] = None, ) -> Dict[str, str]: command = provided.pop("cmd", None) primary = lremove(provided.keys(), ["code", "data", "models", "params"]) secondary = lremove(provided.keys(), ["live"] if live else ["metrics", "plots"]) prompts = primary + secondary workspace = {**defaults, **provided} if not live and "live" not in provided: workspace.pop("live", None) for key in ("plots", "metrics"): if live and key not in provided: workspace.pop(key, None) ret: Dict[str, str] = {} if command: ret["cmd"] = command if not prompts and command: return ret ui.error_write( f"This command will guide you to set up a [bright_blue]{name}[/]", "stage in [green]dvc.yaml[/].", f"\nSee [repr.url]{PIPELINE_FILE_LINK}[/].\n", styled=True, ) if not command: ret.update( compact(_prompts(["cmd"], allow_omission=False, stream=stream))) if prompts: ui.error_write(styled=True) if not prompts: return ret ui.error_write( "Enter the paths for dependencies and outputs of the command.", styled=True, ) if workspace: ui.error_write(build_workspace_tree(workspace), styled=True) ui.error_write(styled=True) ret.update( compact(_prompts(prompts, defaults, validator=validator, stream=stream))) return ret
def push_pop(): #push, pop: asc / dec / rand min_num = 10 max_num = 200000 step = 5000 num_datas = list(range(min_num, max_num, step)) result_dics = list( tqdm(map(lambda n: expr_result(n, 3), num_datas), total=len(num_datas))) exprs = F.join_with(list, result_dics) #return num_datas, exprs pprint(exprs) print(num_datas) y_keys = F.lremove(lambda key: 'merge' in key or 'pop' in key, exprs.keys()) gradient_dic = {} for key in sorted(y_keys): plt.plot(num_datas, exprs[key], label=key, marker='.' if 'bh' in key else 'x', linestyle='-' if 'bh' in key else '--') gradient_dic[key] = np.polyfit(num_datas, exprs[key], 1)[0] pprint(gradient_dic) plt.xlabel('number of keys') plt.ylabel('milli seconds') plt.legend() plt.show() y_keys = F.lremove(lambda key: 'merge' in key or 'push' in key, exprs.keys()) gradient_dic = {} for key in sorted(y_keys): plt.plot(num_datas, exprs[key], label=key, marker='.' if 'bh' in key else 'x', linestyle='-' if 'bh' in key else '--') gradient_dic[key] = np.polyfit(num_datas, exprs[key], 1)[0] pprint(gradient_dic) plt.xlabel('number of keys') plt.ylabel('milli seconds') plt.legend() plt.show()
def main(args): with open(args.annotations, 'rt', encoding='UTF-8') as annotations: coco = json.load(annotations) info = coco['info'] licenses = coco['licenses'] images = coco['images'] annotations = coco['annotations'] categories = coco['categories'] number_of_images = len(images) images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) if not os.path.exists(args.traindir): pl.Path(args.traindir).mkdir(parents=True, exist_ok=True) if not os.path.exists(args.testdir): pl.Path(args.testdir).mkdir(parents=True, exist_ok=True) if args.having_annotations: images = funcy.lremove(lambda i: i['id'] not in images_with_annotations, images) x, y = train_test_split(images, train_size=args.split) save_coco(os.path.join(args.traindir,args.train), info, licenses, x, filter_annotations(annotations, x), categories) save_coco(os.path.join(args.testdir,args.test), info, licenses, y, filter_annotations(annotations, y), categories) for file in x: shutil.copy(os.path.join(args.inputdir, file["file_name"]), args.traindir) for file in y: shutil.copy(os.path.join(args.inputdir, file["file_name"]), args.testdir) print("Saved {} entries in {} and {} in {}".format(len(x), args.train, len(y), args.test))
def split_coco_annotation(annotations, split_ratio, train_json='train_anno.json', test_json='test_anno.json', is_having=True): with open(annotations, 'rt', encoding='UTF-8') as anno: coco = json.load(anno) images = coco['images'] annotations = coco['annotations'] categories = coco['categories'] images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) if is_having: images = funcy.lremove( lambda i: i['id'] not in images_with_annotations, images) x, y = train_test_split(images, train_size=split_ratio) save_coco(train_json, x, filter_annotations(annotations, x), categories) save_coco(test_json, y, filter_annotations(annotations, y), categories) print("Saved {} entries in {} and {} in {}".format( len(x), train_json, len(y), test_json))
def main(args): with open(args.annotations, "rt", encoding="UTF-8") as annotations: coco = json.load(annotations) info = coco["info"] licenses = coco["licenses"] images = coco["images"] annotations = coco["annotations"] categories = coco["categories"] number_of_images = len(images) images_with_annotations = funcy.lmap(lambda a: int(a["image_id"]), annotations) if args.having_annotations: images = funcy.lremove( lambda i: i["id"] not in images_with_annotations, images) x, y = train_test_split(images, train_size=args.split) save_coco( args.train, info, licenses, x, filter_annotations(annotations, x), categories, ) save_coco(args.test, info, licenses, y, filter_annotations(annotations, y), categories) print("Saved {} entries in {} and {} in {}".format( len(x), args.train, len(y), args.test))
def find_executables(): """Finds ffmpeg and melt executables, returning a dict of names to paths.""" required_exes = ['ffmpeg', 'ffprobe', 'melt'] all_exes = ['ffmpeg', 'ffprobe', 'ffplay', 'melt'] shotcut_dir = None while True: executables = { name: find_shotcut_executable(name, shotcut_dir=shotcut_dir) for name in all_exes } missing_exes = F.lremove(executables, required_exes) if not missing_exes: return executables result = wx.MessageBox( f"Unable to find {util.oxford_join(missing_exes)} (part of Shotcut)! " "Please install Shotcut, or locate Shotcut in your filesystem.", "Error", style=wx.OK | wx.CANCEL | wx.ICON_ERROR) if result == wx.CANCEL: return False result = dialogs.file_dialog(None, message="Select Shotcut", style=wx.FD_OPEN | wx.FD_FILE_MUST_EXIST) if not result: return False shotcut_dir = result if Path(result).is_dir() else str( Path(result).parent)
def main(annotation_path, split_ratio, having_annotations, train_save_path, test_save_path, random_state=None): with open(annotation_path, 'rt', encoding='UTF-8') as annotations: coco = json.load(annotations) info = coco['info'] licenses = coco['licenses'] images = coco['images'] annotations = coco['annotations'] categories = coco['categories'] number_of_images = len(images) images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) if having_annotations: images = funcy.lremove( lambda i: i['id'] not in images_with_annotations, images) x, y = train_test_split(images, train_size=split_ratio, random_state=random_state) save_coco(train_save_path, info, licenses, x, filter_annotations(annotations, x), categories) save_coco(test_save_path, info, licenses, y, filter_annotations(annotations, y), categories) print("Saved {} entries in {} and {} in {}".format( len(x), train_save_path, len(y), test_save_path))
def main(args): with open(args.annotations, 'rt', encoding='UTF-8') as annotations: coco = json.load(annotations) #info = coco['info'] #licenses = coco['licenses'] images = coco['images'] annotations = coco['annotations'] categories = coco['categories'] number_of_images = len(images) images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) if args.having_annotations: images = funcy.lremove( lambda i: i['id'] not in images_with_annotations, images) x, y = train_test_split(images, train_size=args.split) #save_coco(args.train, info, licenses, x, filter_annotations(annotations, x), categories) save_coco(args.train, x, filter_annotations(annotations, x), categories) #save_coco(args.test, info, licenses, y, filter_annotations(annotations, y), categories) save_coco(args.test, y, filter_annotations(annotations, y), categories) print("Saved {} entries in {} and {} in {}".format( len(x), args.train, len(y), args.test))
def init_interactive( name: str, defaults: Dict[str, str], provided: Dict[str, str], show_tree: bool = False, live: bool = False, ) -> Dict[str, str]: primary = lremove(provided.keys(), ["cmd", "code", "data", "models", "params"]) secondary = lremove(provided.keys(), ["live"] if live else ["metrics", "plots"]) if not (primary or secondary): return {} message = ui.rich_text.assemble( "This command will guide you to set up a ", (name, "bright_blue"), " stage in ", ("dvc.yaml", "green"), ".", ) doc_link = ui.rich_text.assemble("See ", (PIPELINE_FILE_LINK, "repr.url"), ".") ui.error_write(message, doc_link, "", sep="\n", styled=True) if show_tree: from rich.tree import Tree tree = Tree( "DVC assumes the following workspace structure:", highlight=True, ) workspace = {**defaults, **provided} workspace.pop("cmd", None) if not live and "live" not in provided: workspace.pop("live", None) for value in sorted(workspace.values()): tree.add(f"[green]{value}[/green]") ui.error_write(tree, styled=True) ui.error_write() return compact({ **_prompts(primary, defaults), **_prompts(secondary, defaults), })
def init_interactive( defaults: Dict[str, str], provided: Iterable[str], show_heading: bool = False, live: bool = False, ) -> Dict[str, str]: primary = lremove(provided, ["cmd", "code", "data", "models", "params"]) secondary = lremove(provided, ["live"] if live else ["metrics", "plots"]) if not (primary or secondary): return {} message = ("This command will guide you to set up your first stage in " "[green]dvc.yaml[/green].\n") if show_heading: ui.error_write(message, styled=True) return compact({ **_prompts(primary, defaults), **_prompts(secondary, defaults), })
def main(annotation_path, split_ratio, having_annotations, train_save_path, test_save_path, random_state=None): with open(annotation_path, 'rt', encoding='UTF-8') as annotations: coco = json.load(annotations) info = coco.get('info', '') licenses = coco.get('licenses', '') images = coco['images'] annotations = coco['annotations'] categories = coco['categories'] if not any([ele['id'] == 0 for ele in categories]): categories.insert(0, { 'supercategory': '', 'id': 0, 'name': '__background__' }) if split_ratio == 1: save_coco(train_save_path, info, licenses, images, annotations, categories) print("Saved {} entries in {}.".format(len(images), train_save_path)) return None number_of_images = len(images) images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) if having_annotations: images = funcy.lremove( lambda i: i['id'] not in images_with_annotations, images) x, y = train_test_split(images, train_size=split_ratio, random_state=random_state) save_coco(train_save_path, info, licenses, x, filter_annotations(annotations, x), categories) save_coco(test_save_path, info, licenses, y, filter_annotations(annotations, y), categories) print("Saved {} entries in {} and {} in {}".format( len(x), train_save_path, len(y), test_save_path))
def main(args): with open(args.annotations, 'rt', encoding='UTF-8') as annotations: coco = json.load(annotations) info = 'coco' licenses = 'coco' images = coco['images'] annotations = coco['annotations'] categories = coco['categories'] number_of_images = len(images) for image in images: image['id'] = image['id'].split('\\')[-1] image['file_name'] = image['file_name'].split('\\')[-1] categories = categories[:-3] new_cats = [] for cat in categories: if cat['name'] == 'c' or cat['name'] == 't': new_cats.append(cat) categories = new_cats new_annotaions = [] for annotation in annotations: annotation['image_id'] = annotation['image_id'].split('\\')[-1] if annotation['category_id'] == 5: annotation['category_id'] = 3 elif annotation['category_id'] == 6: annotation['category_id'] = 1 elif annotation['category_id'] == 7: annotation['category_id'] = 2 if annotation['category_id'] == 1 or annotation['category_id'] == 3: new_annotaions.append(annotation) annotations = new_annotaions images_with_annotations = funcy.lmap( lambda a: str(a['image_id'].split('\\')[-1]), annotations) if args.having_annotations: images = funcy.lremove( lambda i: i['id'] not in images_with_annotations, images) x, y = train_test_split(images, train_size=args.split) save_coco(args.train, info, licenses, x, filter_annotations(annotations, x), categories) save_coco(args.test, info, licenses, y, filter_annotations(annotations, y), categories) print("Saved {} entries in {} and {} in {}".format( len(x), args.train, len(y), args.test))
def main(annotation_path, save_path, folds, having_annotations, random_state=None): random.seed(random_state) with open(annotation_path, 'rt', encoding='UTF-8') as annotations: coco = json.load(annotations) info = coco.get('info', '') licenses = coco.get('licenses', '') images = coco['images'] annotations = coco['annotations'] categories = coco['categories'] if having_annotations: images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) images = funcy.lremove( lambda i: i['id'] not in images_with_annotations, images) num_images = len(images) fold_size = num_images // folds image_indices = list(range(num_images)) random.shuffle(image_indices) for fold in range(folds): val_indices = list(range(fold_size * (fold), fold_size * (fold + 1))) train = [ images[idx] for idx in image_indices if idx not in val_indices ] val = [images[idx] for idx in image_indices if idx in val_indices] os.makedirs(f'{save_path}/{fold}') train_save_path = f'{save_path}/{fold}/train.json' val_save_path = f'{save_path}/{fold}/val.json' save_coco(train_save_path, info, licenses, train, filter_annotations(annotations, train), categories) save_coco(val_save_path, info, licenses, val, filter_annotations(annotations, val), categories) print("[Fold {}] Saved {} entries in {} and {} in {}".format( fold, len(train), train_save_path, len(val), val_save_path))
def split_coco(ann_f, split=0.8, remove_empty=False): train_f = os.path.join( os.path.dirname(ann_f), os.path.basename(ann_f).replace('.json', '_train.json')) val_f = os.path.join(os.path.dirname(ann_f), os.path.basename(ann_f).replace('.json', '_val.json')) with open(ann_f, 'rt', encoding='UTF-8') as annotations: coco = json.load(annotations) info = '' if 'info' in coco.keys(): info = coco['info'] licenses = '' if 'licenses' in coco.keys(): licenses = coco['licenses'] images = coco['images'] annotations = coco['annotations'] categories = coco['categories'] number_of_images = len(images) if remove_empty: images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) # filter out images without annotations images = funcy.lremove( lambda i: i['id'] not in images_with_annotations, images) print( 'removed {} images without annotations, all images: {}, now: {}' .format(number_of_images - len(images), number_of_images, len(images))) else: print('all images: {}'.format(number_of_images)) x, y = train_test_split(images, train_size=float(split)) save_coco(train_f, info, licenses, x, filter_annotations(annotations, x), categories) save_coco(val_f, info, licenses, y, filter_annotations(annotations, y), categories) print("Saved {} entries in {} and {} in {}.".format( len(x), train_f, len(y), val_f))
def split_coco(self, test_size, save_dir, image_dir, having_annots=True, save_image=False): ''' having_annots: True if we use only images have labels. False otherwise ''' info = None #self.coco_instance.dataset['info'] licenses = None #self.coco_instance.dataset['licenses'] images, annots = self.get_info_for_given_images(os.listdir(image_dir)) cats = self.coco_instance.dataset['categories'] n_images = len(images) print(f"all_images: {n_images}") imgs_with_annots = funcy.lmap(lambda pair: int(pair['image_id']), annots) if having_annots: images = funcy.lremove( lambda pair: int(pair['id']) not in imgs_with_annots, images) print(f"n_samples: {len(images)}") train, val = train_test_split(images, test_size=test_size) train_dir = os.path.join(save_dir, 'train_annot.json') val_dir = os.path.join(save_dir, 'val_annot.json') save_coco(train_dir, info, licenses, train, filter_annotations(annots, train), cats) save_coco(val_dir, info, licenses, val, filter_annotations(annots, val), cats) if save_image: save_images(train, data_dir=image_dir, save_dir=os.path.join(save_dir, 'train')) save_images(val, data_dir=image_dir, save_dir=os.path.join(save_dir, 'val'))
def main(args): with open(args.annotations, 'rt', encoding='utf-8') as annos: coco = json.load(annos) images = coco['images'] annotations = coco['annotations'] categories = coco['categories'] images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) if args.annotationed: images = funcy.lremove( lambda i: i['id'] not in images_with_annotations) x, y = train_test_split(images, train_size=args.ratio) save_coco(TRAIN, x, filter_annotations(annotations, x), categories) save_coco(VAL, y, filter_annotations(annotations, y), categories) print('Splited!')
def main(args): with open(args.annotations, 'rt', encoding='UTF-8') as annotations: coco = json.load(annotations) images = coco['images'] annotations = coco['annotations'] categories = coco['categories'] number_of_images = len(images) images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) if args.having_annotations: images = funcy.lremove(lambda i: i['id'] not in images_with_annotations, images) x, y = train_test_split(images, train_size=args.ratio) save_coco(TRAIN_PATH, x, filter_annotations(annotations, x), categories) save_coco(VAL_PATH, y, filter_annotations(annotations, y), categories) print('Split completed!')
def split_coco_dataset(list_of_datasets_to_split, dest, test_size=0.2, mode='multi'): if len(list_of_datasets_to_split) > 1: dataset = concatenate_datasets(list_of_datasets_to_split) else: with open(list_of_datasets_to_split[0], 'r') as f: dataset = json.loads(f.read()) categories = dataset['categories'] info = dataset['info'] licenses = dataset['licenses'] annotations = dataset['annotations'] images = dataset['images'] images_with_annotations = funcy.lmap(lambda ann: int(ann['image_id']), annotations) images = funcy.lremove(lambda i: i['id'] not in images_with_annotations, images) if len(dataset['categories']) == 1: np.random.shuffle(images) x = images[int(len(images) * test_size):] y = images[0:int(len(images) * test_size)] print('Train:', len(x), 'images, valid:', len(y)) else: if mode == 'multi': x, y = MultiStratifiedShuffleSplit(images, annotations, test_size) else: x, y = PseudoStratifiedShuffleSplit(images, annotations, test_size) train = save_coco(dest + '_train.json', info, licenses, x, filter_annotations(annotations, x), categories) test = save_coco(dest + '_test.json', info, licenses, y, filter_annotations(annotations, y), categories) print('Finished stratified shuffle split. Results saved in:', dest + '_train.json', dest + '_test.json') return train, test
def merge(): min_num = 10 max_num = 5000 step = 100 num_datas = list(range(min_num, max_num, step)) result_dics = list( tqdm(map(lambda n: expr_result(3, n), num_datas), total=len(num_datas))) def avrg_merge_result(dic): def avrg(dics): return sum(map(lambda d: d['time'], dics)) / len(dics) dic['h.merge.2rand'] = avrg(dic['h.merge.2rand']) dic['bh.merge.2rand'] = avrg(dic['bh.merge.2rand']) return dic result_dics = F.lmap(avrg_merge_result, result_dics) print(result_dics) exprs = F.join_with(list, result_dics) #pprint(exprs) y_keys = F.lremove(lambda key: 'pop' in key or 'push' in key, exprs.keys()) #print(y_keys) gradient_dic = {} for key in sorted(y_keys): plt.plot(num_datas, exprs[key], label=key, marker='.' if 'bh' in key else 'x', linestyle='-' if 'bh' in key else '--') gradient_dic[key] = np.polyfit(num_datas, exprs[key], 1)[0] pprint(gradient_dic) plt.xlabel('max number of keys') plt.ylabel('milli seconds') plt.legend() plt.show()
def create_cocosplit(args): """ Downloaded from github.com/akarazniewicz/cocosplit.git@master and modified to just handle annotations Function used to create new data split from an original COCO Dataset :param args: :return: """ with open(args.annotations, 'rt', encoding='UTF-8') as annotations: coco = json.load(annotations) info = coco['info'] licenses = coco['licenses'] images = coco['images'] annotations = coco['annotations'] number_of_images = len(images) images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) if args.having_annotations: images = funcy.lremove( lambda i: i['id'] not in images_with_annotations, images) x, y = train_test_split(images, train_size=args.split, shuffle=True) if args.percentage < 0: args.percentage = 0 if args.percentage > 100: args.percentage = 100 break_x_idx = max(int(len(x) * args.percentage / 100) - 1, 0) break_y_idx = max(int(len(y) * args.percentage / 100) - 1, 0) save_coco(args.train, info, licenses, x[0:break_x_idx], filter_annotations(annotations, x[0:break_x_idx])) save_coco(args.test, info, licenses, y[0:break_y_idx], filter_annotations(annotations, y[0:break_y_idx])) print("Saved {} entries in {} and {} in {}".format( len(x), args.train, len(y), args.test))
def main(args): with open(args.annotations, 'rt', encoding='UTF-8') as annotations: coco = json.load(annotations) info = coco['info'] licenses = coco['licenses'] images = coco['images'] annotations = coco['annotations'] categories = coco['categories'] number_of_images = len(images) images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) if args.annotations: images = funcy.lremove( lambda i: i['id'] not in images_with_annotations, images) train_before, test = train_test_split(images, test_size=ratio_test) ratio_remaining = 1 - ratio_test ratio_valid_adjusted = ratio_valid / ratio_remaining train_after, valid = train_test_split(train_before, test_size=ratio_valid_adjusted) save_coco(args.trainJson_name, info, licenses, train_after, filter_annotations(annotations, train_after), categories) save_coco(args.testJson_name, info, licenses, test, filter_annotations(annotations, test), categories) save_coco(args.validJson_name, info, licenses, valid, filter_annotations(annotations, valid), categories) print("Saved {} entries in {} and {} in {} and {} in {}".format( len(train_after), args.trainJson_name, len(test), args.testJson_name, len(valid), args.validJson_name))
def reduce_cocosplit(args): """ Downloaded from github.com/akarazniewicz/cocosplit.git@master Function used to reduce pre-cleaned COCO annotation to keep only a certain percentage of COCO examples with annotations. :param args: :return: """ with open(args.annotations, 'rt', encoding='UTF-8') as annotations: coco = json.load(annotations) info = coco['info'] licenses = coco['licenses'] images = coco['images'] annotations = coco['annotations'] images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) if args.having_annotations: images = funcy.lremove( lambda i: i['id'] not in images_with_annotations, images) if args.percentage < 0: args.percentage = 0.0 if args.percentage > 100: args.percentage = 100.0 x, _ = train_test_split(images, train_size=args.percentage / 100, shuffle=True) save_coco(args.train, info, licenses, x, filter_annotations(annotations, x)) print("Saved")
def main(args): with open(args.annotations, 'rt', encoding='UTF-8') as annotations: coco = json.load(annotations) info = coco['info'] licenses = coco['licenses'] images = coco['images'] annotations = coco['annotations'] for item in coco["categories"]: item['name'] = CLASSES[int(item['id']) - 1] categories = coco['categories'] annotations = [item for item in annotations if item['area'] > 0] number_of_images = len(images) images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) if args.having_annotations: images = funcy.lremove( lambda i: i['id'] not in images_with_annotations, images) x = [ item for item in images if item['file_name'].split('/')[1] not in (test_set + exclude_set) ] y = [ item for item in images if item['file_name'].split('/')[1] in test_set ] if args.split > 0 and args.split < 1: x_train, x_val = train_test_split(x, train_size=args.split) else: x_val = y.copy() x_train = x.copy() if args.sample: random.shuffle(x_train) random.shuffle(x_val) random.shuffle(y) x_train = x_train[:int(len(x_train) * args.sample)] x_val = x_val[:int(len(x_val) * args.sample)] y = y[:int(len(y) * args.sample)] if not args.coco_category: category_map = { item['id']: select_classes.index(item['name']) for item in categories if item['name'] in select_classes } else: category_map = { item['id']: int(item['id'] - 1) for item in categories if item['name'] in select_classes } root = args.root label_folder = os.path.join(root, 'labels') if not os.path.exists(label_folder): os.makedirs(label_folder) # else: # print('delete {} ...'.format(label_folder)) # shutil.rmtree(label_folder) # os.makedirs(label_folder) image_folder = os.path.join(root, 'images') if not os.path.exists(image_folder): os.makedirs(image_folder) lists = [x_train, x_val, y] lists_mode = ['train', 'val', 'test'] if 'thermal' in args.annotations: suffix = '_thermal' else: suffix = '_rgb' lists_mode = [item + suffix for item in lists_mode] for items, mode in zip(lists, lists_mode): if not os.path.exists(os.path.join(label_folder, mode)): os.makedirs(os.path.join(label_folder, mode)) if not os.path.exists(os.path.join(image_folder, mode)): os.makedirs(os.path.join(image_folder, mode)) for item in items: txt_name = os.path.join( label_folder, mode, item['file_name'].replace('png', 'txt').replace('/', '_')) if not args.label_only: image_name = os.path.join( image_folder, mode, item['file_name'].replace('/', '_')) shutil.copyfile(os.path.join(root, item['file_name']), image_name) # item['file_name'] = item['file_name'].replace('/','_') anns = funcy.lfilter( lambda a: int(a['image_id']) in [item['id']], annotations) fid = open(txt_name, 'w') for ann in anns: if ann['category_id'] in category_map: bbox = ann['bbox'] bbox[0] = np.max([0., bbox[0]]) bbox[1] = np.max([0., bbox[1]]) bbox[2] = np.min( [bbox[0] + bbox[2], item['width'] - 1]) - bbox[0] bbox[3] = np.min( [bbox[1] + bbox[3], item['height'] - 1]) - bbox[1] if bbox[2] * bbox[3] > 0: fid.write( '%d %f %f %f %f\n' % (category_map[ann['category_id']], (bbox[0] + bbox[2] / 2.0) / item['width'], (bbox[1] + bbox[3] / 2.0) / item['height'], bbox[2] / item['width'], bbox[3] / item['height'])) fid.close() print("Saved {} entries in train {} in val, and {} in test".format( len(x_train), len(x_val), len(y)))
#num_expr = 3 if len(sys.argv) != 2 else int(sys.argv[1]) num_expr = 10 if len(sys.argv) != 2 else int(sys.argv[1]) #block = 2**29 // num_expr block = 2**10 // num_expr exprs = list(tqdm( map(expr_result, [n * block for n in range(1,num_expr)]), total = num_expr )) with open('result_%d.yml' % (num_expr,), 'w') as f: yaml.dump(F.join_with(list, exprs), f) with open('result_%d.yml' % (num_expr,)) as f: result_dic = yaml.safe_load(f) y_keys = F.lremove('data.num', result_dic.keys()) #print(F.lmap(result_dic, y_keys)) import numpy as np xs = result_dic['data.num'] gradient_dic = {} for key in y_keys: plt.plot(xs, result_dic[key], label=key, marker='x' if 'q' in key else 'o', linestyle='--' if 'q' in key else '-',) gradient_dic[key] = np.polyfit(xs, result_dic[key], 1)[0] print(gradient_dic) with open('result_%d_gradient.yml' % (num_expr,), 'w') as f: yaml.dump(gradient_dic, f) plt.xlabel('number of items')
def newcols(df): known_cols = set(cat(cols for _, cols in SCOPE_COLUMNS)) | TRASH_COLUMS return lremove(known_cols, df.columns)
def main(args): # 1. # Read in the data. PATH = os.path.abspath(os.getcwd()) TRAIN = PATH + args.train_folder #ARG TEST = PATH + args.test_folder #ARG SEGMENTATION = PATH + args.seg #ARG ANNOTATIONS_JSON = PATH + args.ann #ARG TRAIN_JSON = PATH + args.train_ann #ARG TEST_JSON = PATH + args.test_ann #ARG create_new_annotations = args.ann_bool #Arg split_size = args.train_split #ARG dataset_type = args.dataset_type if create_new_annotations == True: print("Creating new annotations") dataset_train = TRAIN csv_train = SEGMENTATION IMAGE_DIR = dataset_train df = pd.read_csv(csv_train) df = df.dropna(axis=0) # Drop where there are no ships. INFO = { "description": "Kaggle Dataset", "url": "https://github.com/pascal1129", "version": "0.1.0", "year": 2018, "contributor": "pascal1129", "date_created": datetime.datetime.utcnow().isoformat(' ') } LICENSES = [{ "id": 1, "name": "Attribution-NonCommercial-ShareAlike License", "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/" }] CATEGORIES = [ { 'id': 1, 'name': 'ship', 'supercategory': 'ship', }, ] test = create_annotations() with open(PATH + ANNOTATIONS_JSON, 'w') as output_json_file: json.dump(test, output_json_file, indent=4) # 2. with open(ANNOTATIONS_JSON, 'rt', encoding='UTF-8') as annotations: print("Creating new train/test split") coco = json.load(annotations) print("Loaded annotataions file:", ANNOTATIONS_JSON) info = coco['info'] licenses = coco['licenses'] images = coco['images'] annotations = coco['annotations'] categories = coco['categories'] images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) if dataset_type == 1: print("Dataset equal annotated/not annotated images.") img_ids = [int(image['id']) for image in images] img_ids_ann = [ int(annotation['image_id']) for annotation in annotations ] #Choose the same amount of no annotations. img_ids_no_ann = np.setdiff1d(img_ids, img_ids_ann) img_ids_no_ann = np.sort( np.random.choice(img_ids_no_ann, len(img_ids_ann))) images_consolidated = np.append(img_ids_no_ann, np.array(images_with_annotations)) images_consolidated = np.sort(images_consolidated) images = funcy.lremove( lambda i: i['id'] not in images_consolidated, images) # Removes all not in consolidated. elif dataset_type == 2: print("Dataset remove not annotated images.") images = funcy.lremove( lambda i: i['id'] not in images_with_annotations, images) # Removes all not annotated. val, train = train_test_split(images, train_size=split_size) save_coco(TRAIN_JSON, info, licenses, train, filter_annotations(annotations, train), categories) save_coco(TEST_JSON, info, licenses, val, filter_annotations(annotations, val), categories) print("Saved {} entries in {} and {} in {}".format( len(train), TRAIN_JSON, len(val), TEST_JSON))
def get_train_val_test(annot_file, valid_size=0.2, test_size=0.2, with_randomsampler=False): print(annot_file) annotations_file = open(annot_file, 'rt', encoding='UTF-8') coco = json.load(annotations_file) images = coco['images'] annotations = coco['annotations'] coco = COCO(annot_file) images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations) images = funcy.lremove(lambda i: i['id'] not in images_with_annotations, images) dataset_size = len(images) indices = list(range(dataset_size)) np.random.shuffle(indices) test_split = int(np.floor(test_size * dataset_size)) train_indices, test_indices = indices[test_split:], indices[:test_split] train_size = len(train_indices) valid_split = int(np.floor((1 - valid_size) * train_size)) train_indices, valid_indices = train_indices[:valid_split], train_indices[ valid_split:] # Check for category in each splits for split in [train_indices, valid_indices, test_indices]: cats = [0] * 91 for i in split: imageId = images[i]['id'] annotationIds = coco.getAnnIds(imageId) annotations = coco.loadAnns(annotationIds) for i in range(len(annotations)): entityId = annotations[i]['category_id'] cats[entityId] += 1 print("training") print(cats[1:]) l = 0 for i in cats: if i == 0: l += 1 print(l) annotations_file.close() train_ids = list(map(lambda x: images[x]['id'], train_indices)) valid_ids = list(map(lambda x: images[x]['id'], valid_indices)) test_ids = list(map(lambda x: images[x]['id'], test_indices)) if with_randomsampler: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(valid_indices) test_sampler = SubsetRandomSampler(test_indices) return [train_ids, valid_ids, test_ids], [train_sampler, valid_sampler, test_sampler] else: return [train_ids, valid_ids, test_ids]
def lremove(f,*seq): return F.lremove(f,*seq) if seq \ else lambda *xs: F.lremove(f,*xs)
def main(args): with open(args.annotations, "rt", encoding="UTF-8") as annotations: coco = json.load(annotations) info = coco["info"] licenses = coco["licenses"] images = coco["images"] annotations = coco["annotations"] categories = coco["categories"] print(coco.keys()) print("Original", len(images)) def nothing(): pass funcy.lmap( lambda a: print( a, next(i for i in images if i["id"] == a["image_id"])) if a["segmentation"] == [] else nothing(), annotations, ) print("Annotations", len(annotations)) a2 = [] for i in range(len(annotations)): if max(annotations[i]["bbox"][2], annotations[i]["bbox"][3]) < 50 or min( annotations[i]["bbox"][2], annotations[i]["bbox"][3]) < 30: pass else: a2.append(annotations[i]) annotations = a2 print("Annotations filtered by size", len(annotations)) c2 = [] ch = [] for c in categories: if c["name"] == "human" or c["name"] == "car": ch.append(c["id"]) else: c2.append(c) print(len(c2), len(categories)) categories = c2 a2 = [] for i in range(len(annotations)): if annotations[i]["category_id"] in ch: pass else: a2.append(annotations[i]) annotations = a2 print("Annotations filtered cars and humans", len(annotations)) images_with_annotations = funcy.lmap(lambda a: int(a["image_id"]), annotations) images = funcy.lremove( lambda i: i["id"] not in images_with_annotations, images) print("Removed empty images", len(images)) images = funcy.lremove(lambda i: "copy" in i["file_name"].lower(), images) print("Removed copy", len(images)) def f(e): return e["file_name"] images.sort(key=f) images = images[-300:] # funcy.lmap(lambda i : print(i['file_name'][9:12], end="\t"), images) print(len(images)) no_segm = funcy.lfilter(lambda a: len(a["segmentation"]) == 0, annotations) print(len(no_segm), len(annotations)) image_ids = funcy.lmap(lambda i: i["image_id"], no_segm) funcy.lmap( lambda i: print("! no segm annot in #" + i["file_name"]) if i["id"] in image_ids else nothing(), images, ) save_coco( args.annotations, info, licenses, images, filter_annotations(annotations, images), categories, )