Пример #1
0
def init_interactive(
    defaults: Dict[str, str],
    provided: Dict[str, str],
    validator: Callable[[str, str], Union[str, Tuple[str, str]]] = None,
    stream: Optional[TextIO] = None,
) -> Dict[str, str]:
    command_prompts = lremove(provided.keys(), ["cmd"])
    dependencies_prompts = lremove(provided.keys(), ["code", "data", "params"])
    output_keys = ["models"]
    if "live" not in provided:
        output_keys.extend(["metrics", "plots"])
    outputs_prompts = lremove(provided.keys(), output_keys)

    ret: Dict[str, str] = {}
    if "cmd" in provided:
        ret["cmd"] = provided["cmd"]

    for heading, prompts, allow_omission in (
        ("", command_prompts, False),
        ("Enter experiment dependencies.", dependencies_prompts, True),
        ("Enter experiment outputs.", outputs_prompts, True),
    ):
        if prompts and heading:
            ui.error_write(heading, styled=True)
        response = _prompts(
            prompts,
            defaults=defaults,
            allow_omission=allow_omission,
            validator=validator,
            stream=stream,
        )
        ret.update(compact(response))
        if prompts:
            ui.error_write(styled=True)
    return ret
Пример #2
0
def init_interactive(
    name: str,
    defaults: Dict[str, str],
    provided: Dict[str, str],
    validator: Callable[[str, str], Union[str, Tuple[str, str]]] = None,
    live: bool = False,
    stream: Optional[TextIO] = None,
) -> Dict[str, str]:
    command = provided.pop("cmd", None)
    primary = lremove(provided.keys(), ["code", "data", "models", "params"])
    secondary = lremove(provided.keys(),
                        ["live"] if live else ["metrics", "plots"])
    prompts = primary + secondary

    workspace = {**defaults, **provided}
    if not live and "live" not in provided:
        workspace.pop("live", None)
    for key in ("plots", "metrics"):
        if live and key not in provided:
            workspace.pop(key, None)

    ret: Dict[str, str] = {}
    if command:
        ret["cmd"] = command

    if not prompts and command:
        return ret

    ui.error_write(
        f"This command will guide you to set up a [bright_blue]{name}[/]",
        "stage in [green]dvc.yaml[/].",
        f"\nSee [repr.url]{PIPELINE_FILE_LINK}[/].\n",
        styled=True,
    )

    if not command:
        ret.update(
            compact(_prompts(["cmd"], allow_omission=False, stream=stream)))
        if prompts:
            ui.error_write(styled=True)

    if not prompts:
        return ret

    ui.error_write(
        "Enter the paths for dependencies and outputs of the command.",
        styled=True,
    )
    if workspace:
        ui.error_write(build_workspace_tree(workspace), styled=True)
    ui.error_write(styled=True)
    ret.update(
        compact(_prompts(prompts, defaults, validator=validator,
                         stream=stream)))
    return ret
Пример #3
0
def push_pop():
    #push, pop: asc / dec / rand
    min_num = 10
    max_num = 200000
    step = 5000
    num_datas = list(range(min_num, max_num, step))
    result_dics = list(
        tqdm(map(lambda n: expr_result(n, 3), num_datas),
             total=len(num_datas)))
    exprs = F.join_with(list, result_dics)
    #return num_datas, exprs

    pprint(exprs)
    print(num_datas)

    y_keys = F.lremove(lambda key: 'merge' in key or 'pop' in key,
                       exprs.keys())

    gradient_dic = {}
    for key in sorted(y_keys):
        plt.plot(num_datas,
                 exprs[key],
                 label=key,
                 marker='.' if 'bh' in key else 'x',
                 linestyle='-' if 'bh' in key else '--')
        gradient_dic[key] = np.polyfit(num_datas, exprs[key], 1)[0]

    pprint(gradient_dic)
    plt.xlabel('number of keys')
    plt.ylabel('milli seconds')
    plt.legend()
    plt.show()

    y_keys = F.lremove(lambda key: 'merge' in key or 'push' in key,
                       exprs.keys())

    gradient_dic = {}
    for key in sorted(y_keys):
        plt.plot(num_datas,
                 exprs[key],
                 label=key,
                 marker='.' if 'bh' in key else 'x',
                 linestyle='-' if 'bh' in key else '--')
        gradient_dic[key] = np.polyfit(num_datas, exprs[key], 1)[0]

    pprint(gradient_dic)
    plt.xlabel('number of keys')
    plt.ylabel('milli seconds')
    plt.legend()
    plt.show()
Пример #4
0
def main(args):
    with open(args.annotations, 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
        info = coco['info']
        licenses = coco['licenses']
        images = coco['images']
        annotations = coco['annotations']
        categories = coco['categories']

        number_of_images = len(images)

        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations)

        if not os.path.exists(args.traindir):
          pl.Path(args.traindir).mkdir(parents=True, exist_ok=True)
        if not os.path.exists(args.testdir):
          pl.Path(args.testdir).mkdir(parents=True, exist_ok=True)  

        if args.having_annotations:
            images = funcy.lremove(lambda i: i['id'] not in images_with_annotations, images)

        x, y = train_test_split(images, train_size=args.split)
        save_coco(os.path.join(args.traindir,args.train), info, licenses, x, filter_annotations(annotations, x), categories)
        save_coco(os.path.join(args.testdir,args.test), info, licenses, y, filter_annotations(annotations, y), categories) 

        for file in x:
            shutil.copy(os.path.join(args.inputdir, file["file_name"]), args.traindir)
        for file in y:
            shutil.copy(os.path.join(args.inputdir, file["file_name"]), args.testdir)

        print("Saved {} entries in {} and {} in {}".format(len(x), args.train, len(y), args.test))
Пример #5
0
def split_coco_annotation(annotations,
                          split_ratio,
                          train_json='train_anno.json',
                          test_json='test_anno.json',
                          is_having=True):
    with open(annotations, 'rt', encoding='UTF-8') as anno:
        coco = json.load(anno)
        images = coco['images']
        annotations = coco['annotations']
        categories = coco['categories']

        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']),
                                             annotations)

        if is_having:
            images = funcy.lremove(
                lambda i: i['id'] not in images_with_annotations, images)

        x, y = train_test_split(images, train_size=split_ratio)

        save_coco(train_json, x, filter_annotations(annotations, x),
                  categories)
        save_coco(test_json, y, filter_annotations(annotations, y), categories)

        print("Saved {} entries in {} and {} in {}".format(
            len(x), train_json, len(y), test_json))
Пример #6
0
def main(args):
    with open(args.annotations, "rt", encoding="UTF-8") as annotations:
        coco = json.load(annotations)
        info = coco["info"]
        licenses = coco["licenses"]
        images = coco["images"]
        annotations = coco["annotations"]
        categories = coco["categories"]

        number_of_images = len(images)

        images_with_annotations = funcy.lmap(lambda a: int(a["image_id"]),
                                             annotations)

        if args.having_annotations:
            images = funcy.lremove(
                lambda i: i["id"] not in images_with_annotations, images)

        x, y = train_test_split(images, train_size=args.split)

        save_coco(
            args.train,
            info,
            licenses,
            x,
            filter_annotations(annotations, x),
            categories,
        )
        save_coco(args.test, info, licenses, y,
                  filter_annotations(annotations, y), categories)

        print("Saved {} entries in {} and {} in {}".format(
            len(x), args.train, len(y), args.test))
Пример #7
0
def find_executables():
    """Finds ffmpeg and melt executables, returning a dict of names to paths."""
    required_exes = ['ffmpeg', 'ffprobe', 'melt']
    all_exes = ['ffmpeg', 'ffprobe', 'ffplay', 'melt']
    shotcut_dir = None
    while True:
        executables = {
            name: find_shotcut_executable(name, shotcut_dir=shotcut_dir)
            for name in all_exes
        }
        missing_exes = F.lremove(executables, required_exes)
        if not missing_exes:
            return executables

        result = wx.MessageBox(
            f"Unable to find {util.oxford_join(missing_exes)} (part of Shotcut)! "
            "Please install Shotcut, or locate Shotcut in your filesystem.",
            "Error",
            style=wx.OK | wx.CANCEL | wx.ICON_ERROR)
        if result == wx.CANCEL:
            return False

        result = dialogs.file_dialog(None,
                                     message="Select Shotcut",
                                     style=wx.FD_OPEN | wx.FD_FILE_MUST_EXIST)
        if not result:
            return False

        shotcut_dir = result if Path(result).is_dir() else str(
            Path(result).parent)
Пример #8
0
def main(annotation_path,
         split_ratio,
         having_annotations,
         train_save_path,
         test_save_path,
         random_state=None):

    with open(annotation_path, 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
        info = coco['info']
        licenses = coco['licenses']
        images = coco['images']
        annotations = coco['annotations']
        categories = coco['categories']

        number_of_images = len(images)

        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']),
                                             annotations)

        if having_annotations:
            images = funcy.lremove(
                lambda i: i['id'] not in images_with_annotations, images)

        x, y = train_test_split(images,
                                train_size=split_ratio,
                                random_state=random_state)

        save_coco(train_save_path, info, licenses, x,
                  filter_annotations(annotations, x), categories)
        save_coco(test_save_path, info, licenses, y,
                  filter_annotations(annotations, y), categories)

        print("Saved {} entries in {} and {} in {}".format(
            len(x), train_save_path, len(y), test_save_path))
Пример #9
0
def main(args):
    with open(args.annotations, 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
        #info = coco['info']
        #licenses = coco['licenses']
        images = coco['images']
        annotations = coco['annotations']
        categories = coco['categories']

        number_of_images = len(images)

        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']),
                                             annotations)

        if args.having_annotations:
            images = funcy.lremove(
                lambda i: i['id'] not in images_with_annotations, images)

        x, y = train_test_split(images, train_size=args.split)

        #save_coco(args.train, info, licenses, x, filter_annotations(annotations, x), categories)
        save_coco(args.train, x, filter_annotations(annotations, x),
                  categories)
        #save_coco(args.test, info, licenses, y, filter_annotations(annotations, y), categories)
        save_coco(args.test, y, filter_annotations(annotations, y), categories)

        print("Saved {} entries in {} and {} in {}".format(
            len(x), args.train, len(y), args.test))
Пример #10
0
def init_interactive(
    name: str,
    defaults: Dict[str, str],
    provided: Dict[str, str],
    show_tree: bool = False,
    live: bool = False,
) -> Dict[str, str]:
    primary = lremove(provided.keys(),
                      ["cmd", "code", "data", "models", "params"])
    secondary = lremove(provided.keys(),
                        ["live"] if live else ["metrics", "plots"])

    if not (primary or secondary):
        return {}

    message = ui.rich_text.assemble(
        "This command will guide you to set up a ",
        (name, "bright_blue"),
        " stage in ",
        ("dvc.yaml", "green"),
        ".",
    )
    doc_link = ui.rich_text.assemble("See ", (PIPELINE_FILE_LINK, "repr.url"),
                                     ".")
    ui.error_write(message, doc_link, "", sep="\n", styled=True)

    if show_tree:
        from rich.tree import Tree

        tree = Tree(
            "DVC assumes the following workspace structure:",
            highlight=True,
        )
        workspace = {**defaults, **provided}
        workspace.pop("cmd", None)
        if not live and "live" not in provided:
            workspace.pop("live", None)
        for value in sorted(workspace.values()):
            tree.add(f"[green]{value}[/green]")
        ui.error_write(tree, styled=True)
        ui.error_write()

    return compact({
        **_prompts(primary, defaults),
        **_prompts(secondary, defaults),
    })
Пример #11
0
def init_interactive(
    defaults: Dict[str, str],
    provided: Iterable[str],
    show_heading: bool = False,
    live: bool = False,
) -> Dict[str, str]:
    primary = lremove(provided, ["cmd", "code", "data", "models", "params"])
    secondary = lremove(provided, ["live"] if live else ["metrics", "plots"])

    if not (primary or secondary):
        return {}

    message = ("This command will guide you to set up your first stage in "
               "[green]dvc.yaml[/green].\n")
    if show_heading:
        ui.error_write(message, styled=True)

    return compact({
        **_prompts(primary, defaults),
        **_prompts(secondary, defaults),
    })
Пример #12
0
def main(annotation_path,
         split_ratio,
         having_annotations,
         train_save_path,
         test_save_path,
         random_state=None):

    with open(annotation_path, 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
        info = coco.get('info', '')
        licenses = coco.get('licenses', '')
        images = coco['images']
        annotations = coco['annotations']
        categories = coco['categories']

        if not any([ele['id'] == 0 for ele in categories]):
            categories.insert(0, {
                'supercategory': '',
                'id': 0,
                'name': '__background__'
            })

        if split_ratio == 1:

            save_coco(train_save_path, info, licenses, images, annotations,
                      categories)
            print("Saved {} entries in {}.".format(len(images),
                                                   train_save_path))
            return None

        number_of_images = len(images)

        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']),
                                             annotations)

        if having_annotations:
            images = funcy.lremove(
                lambda i: i['id'] not in images_with_annotations, images)

        x, y = train_test_split(images,
                                train_size=split_ratio,
                                random_state=random_state)

        save_coco(train_save_path, info, licenses, x,
                  filter_annotations(annotations, x), categories)
        save_coco(test_save_path, info, licenses, y,
                  filter_annotations(annotations, y), categories)

        print("Saved {} entries in {} and {} in {}".format(
            len(x), train_save_path, len(y), test_save_path))
Пример #13
0
def main(args):
    with open(args.annotations, 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
        info = 'coco'
        licenses = 'coco'
        images = coco['images']
        annotations = coco['annotations']
        categories = coco['categories']

        number_of_images = len(images)
        for image in images:
            image['id'] = image['id'].split('\\')[-1]
            image['file_name'] = image['file_name'].split('\\')[-1]

        categories = categories[:-3]
        new_cats = []
        for cat in categories:
            if cat['name'] == 'c' or cat['name'] == 't':
                new_cats.append(cat)
        categories = new_cats

        new_annotaions = []
        for annotation in annotations:
            annotation['image_id'] = annotation['image_id'].split('\\')[-1]
            if annotation['category_id'] == 5:
                annotation['category_id'] = 3
            elif annotation['category_id'] == 6:
                annotation['category_id'] = 1
            elif annotation['category_id'] == 7:
                annotation['category_id'] = 2

            if annotation['category_id'] == 1 or annotation['category_id'] == 3:
                new_annotaions.append(annotation)
        annotations = new_annotaions
        images_with_annotations = funcy.lmap(
            lambda a: str(a['image_id'].split('\\')[-1]), annotations)

        if args.having_annotations:
            images = funcy.lremove(
                lambda i: i['id'] not in images_with_annotations, images)

        x, y = train_test_split(images, train_size=args.split)

        save_coco(args.train, info, licenses, x,
                  filter_annotations(annotations, x), categories)
        save_coco(args.test, info, licenses, y,
                  filter_annotations(annotations, y), categories)

        print("Saved {} entries in {} and {} in {}".format(
            len(x), args.train, len(y), args.test))
Пример #14
0
def main(annotation_path,
         save_path,
         folds,
         having_annotations,
         random_state=None):

    random.seed(random_state)

    with open(annotation_path, 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
        info = coco.get('info', '')
        licenses = coco.get('licenses', '')
        images = coco['images']
        annotations = coco['annotations']
        categories = coco['categories']

    if having_annotations:
        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']),
                                             annotations)
        images = funcy.lremove(
            lambda i: i['id'] not in images_with_annotations, images)

    num_images = len(images)
    fold_size = num_images // folds

    image_indices = list(range(num_images))
    random.shuffle(image_indices)

    for fold in range(folds):

        val_indices = list(range(fold_size * (fold), fold_size * (fold + 1)))

        train = [
            images[idx] for idx in image_indices if idx not in val_indices
        ]
        val = [images[idx] for idx in image_indices if idx in val_indices]

        os.makedirs(f'{save_path}/{fold}')
        train_save_path = f'{save_path}/{fold}/train.json'
        val_save_path = f'{save_path}/{fold}/val.json'

        save_coco(train_save_path, info, licenses, train,
                  filter_annotations(annotations, train), categories)
        save_coco(val_save_path, info, licenses, val,
                  filter_annotations(annotations, val), categories)

        print("[Fold {}] Saved {} entries in {} and {} in {}".format(
            fold, len(train), train_save_path, len(val), val_save_path))
Пример #15
0
def split_coco(ann_f, split=0.8, remove_empty=False):
    train_f = os.path.join(
        os.path.dirname(ann_f),
        os.path.basename(ann_f).replace('.json', '_train.json'))
    val_f = os.path.join(os.path.dirname(ann_f),
                         os.path.basename(ann_f).replace('.json', '_val.json'))
    with open(ann_f, 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
        info = ''
        if 'info' in coco.keys():
            info = coco['info']
        licenses = ''
        if 'licenses' in coco.keys():
            licenses = coco['licenses']
        images = coco['images']
        annotations = coco['annotations']
        categories = coco['categories']

        number_of_images = len(images)

        if remove_empty:
            images_with_annotations = funcy.lmap(lambda a: int(a['image_id']),
                                                 annotations)

            # filter out images without annotations
            images = funcy.lremove(
                lambda i: i['id'] not in images_with_annotations, images)
            print(
                'removed {} images without annotations, all images: {}, now: {}'
                .format(number_of_images - len(images), number_of_images,
                        len(images)))
        else:
            print('all images: {}'.format(number_of_images))

        x, y = train_test_split(images, train_size=float(split))

        save_coco(train_f, info, licenses, x,
                  filter_annotations(annotations, x), categories)
        save_coco(val_f, info, licenses, y, filter_annotations(annotations, y),
                  categories)

        print("Saved {} entries in {} and {} in {}.".format(
            len(x), train_f, len(y), val_f))
Пример #16
0
    def split_coco(self,
                   test_size,
                   save_dir,
                   image_dir,
                   having_annots=True,
                   save_image=False):
        '''
        having_annots: True if we use only images have labels. False otherwise
        '''
        info = None  #self.coco_instance.dataset['info']
        licenses = None  #self.coco_instance.dataset['licenses']

        images, annots = self.get_info_for_given_images(os.listdir(image_dir))
        cats = self.coco_instance.dataset['categories']

        n_images = len(images)
        print(f"all_images: {n_images}")

        imgs_with_annots = funcy.lmap(lambda pair: int(pair['image_id']),
                                      annots)

        if having_annots:
            images = funcy.lremove(
                lambda pair: int(pair['id']) not in imgs_with_annots, images)

        print(f"n_samples: {len(images)}")
        train, val = train_test_split(images, test_size=test_size)
        train_dir = os.path.join(save_dir, 'train_annot.json')
        val_dir = os.path.join(save_dir, 'val_annot.json')

        save_coco(train_dir, info, licenses, train,
                  filter_annotations(annots, train), cats)
        save_coco(val_dir, info, licenses, val,
                  filter_annotations(annots, val), cats)

        if save_image:
            save_images(train,
                        data_dir=image_dir,
                        save_dir=os.path.join(save_dir, 'train'))
            save_images(val,
                        data_dir=image_dir,
                        save_dir=os.path.join(save_dir, 'val'))
Пример #17
0
def main(args):
    with open(args.annotations, 'rt', encoding='utf-8') as annos:
        coco = json.load(annos)
        images = coco['images']
        annotations = coco['annotations']
        categories = coco['categories']

        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']),
                                             annotations)

        if args.annotationed:
            images = funcy.lremove(
                lambda i: i['id'] not in images_with_annotations)

        x, y = train_test_split(images, train_size=args.ratio)

        save_coco(TRAIN, x, filter_annotations(annotations, x), categories)
        save_coco(VAL, y, filter_annotations(annotations, y), categories)

        print('Splited!')
Пример #18
0
def main(args):
    with open(args.annotations, 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
       
        images = coco['images'] 
        annotations = coco['annotations']
        categories = coco['categories']

        number_of_images = len(images)

        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']), annotations)

        if args.having_annotations:
            images = funcy.lremove(lambda i: i['id'] not in images_with_annotations, images)

        x, y = train_test_split(images, train_size=args.ratio)

        save_coco(TRAIN_PATH,  x, filter_annotations(annotations, x), categories)
        save_coco(VAL_PATH, y, filter_annotations(annotations, y), categories)

        print('Split completed!')
Пример #19
0
def split_coco_dataset(list_of_datasets_to_split,
                       dest,
                       test_size=0.2,
                       mode='multi'):
    if len(list_of_datasets_to_split) > 1:
        dataset = concatenate_datasets(list_of_datasets_to_split)
    else:
        with open(list_of_datasets_to_split[0], 'r') as f:
            dataset = json.loads(f.read())

    categories = dataset['categories']
    info = dataset['info']
    licenses = dataset['licenses']
    annotations = dataset['annotations']
    images = dataset['images']

    images_with_annotations = funcy.lmap(lambda ann: int(ann['image_id']),
                                         annotations)
    images = funcy.lremove(lambda i: i['id'] not in images_with_annotations,
                           images)

    if len(dataset['categories']) == 1:
        np.random.shuffle(images)
        x = images[int(len(images) * test_size):]
        y = images[0:int(len(images) * test_size)]
        print('Train:', len(x), 'images, valid:', len(y))
    else:
        if mode == 'multi':
            x, y = MultiStratifiedShuffleSplit(images, annotations, test_size)
        else:
            x, y = PseudoStratifiedShuffleSplit(images, annotations, test_size)

    train = save_coco(dest + '_train.json', info, licenses, x,
                      filter_annotations(annotations, x), categories)
    test = save_coco(dest + '_test.json', info, licenses, y,
                     filter_annotations(annotations, y), categories)

    print('Finished stratified shuffle split. Results saved in:',
          dest + '_train.json', dest + '_test.json')
    return train, test
Пример #20
0
def merge():
    min_num = 10
    max_num = 5000
    step = 100
    num_datas = list(range(min_num, max_num, step))
    result_dics = list(
        tqdm(map(lambda n: expr_result(3, n), num_datas),
             total=len(num_datas)))

    def avrg_merge_result(dic):
        def avrg(dics):
            return sum(map(lambda d: d['time'], dics)) / len(dics)

        dic['h.merge.2rand'] = avrg(dic['h.merge.2rand'])
        dic['bh.merge.2rand'] = avrg(dic['bh.merge.2rand'])
        return dic

    result_dics = F.lmap(avrg_merge_result, result_dics)
    print(result_dics)
    exprs = F.join_with(list, result_dics)
    #pprint(exprs)

    y_keys = F.lremove(lambda key: 'pop' in key or 'push' in key, exprs.keys())
    #print(y_keys)

    gradient_dic = {}
    for key in sorted(y_keys):
        plt.plot(num_datas,
                 exprs[key],
                 label=key,
                 marker='.' if 'bh' in key else 'x',
                 linestyle='-' if 'bh' in key else '--')
        gradient_dic[key] = np.polyfit(num_datas, exprs[key], 1)[0]

    pprint(gradient_dic)
    plt.xlabel('max number of keys')
    plt.ylabel('milli seconds')
    plt.legend()
    plt.show()
def create_cocosplit(args):
    """
    Downloaded from github.com/akarazniewicz/cocosplit.git@master and modified to just handle annotations
    Function used to create new data split from an original COCO Dataset

    :param args:
    :return:
    """
    with open(args.annotations, 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
        info = coco['info']
        licenses = coco['licenses']
        images = coco['images']
        annotations = coco['annotations']

        number_of_images = len(images)

        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']),
                                             annotations)

        if args.having_annotations:
            images = funcy.lremove(
                lambda i: i['id'] not in images_with_annotations, images)

        x, y = train_test_split(images, train_size=args.split, shuffle=True)
        if args.percentage < 0:
            args.percentage = 0
        if args.percentage > 100:
            args.percentage = 100
        break_x_idx = max(int(len(x) * args.percentage / 100) - 1, 0)
        break_y_idx = max(int(len(y) * args.percentage / 100) - 1, 0)
        save_coco(args.train, info, licenses, x[0:break_x_idx],
                  filter_annotations(annotations, x[0:break_x_idx]))
        save_coco(args.test, info, licenses, y[0:break_y_idx],
                  filter_annotations(annotations, y[0:break_y_idx]))

        print("Saved {} entries in {} and {} in {}".format(
            len(x), args.train, len(y), args.test))
def main(args):
    with open(args.annotations, 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
        info = coco['info']
        licenses = coco['licenses']
        images = coco['images']
        annotations = coco['annotations']
        categories = coco['categories']

        number_of_images = len(images)

        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']),
                                             annotations)

        if args.annotations:
            images = funcy.lremove(
                lambda i: i['id'] not in images_with_annotations, images)

        train_before, test = train_test_split(images, test_size=ratio_test)

        ratio_remaining = 1 - ratio_test
        ratio_valid_adjusted = ratio_valid / ratio_remaining

        train_after, valid = train_test_split(train_before,
                                              test_size=ratio_valid_adjusted)

        save_coco(args.trainJson_name, info, licenses, train_after,
                  filter_annotations(annotations, train_after), categories)
        save_coco(args.testJson_name, info, licenses, test,
                  filter_annotations(annotations, test), categories)
        save_coco(args.validJson_name, info, licenses, valid,
                  filter_annotations(annotations, valid), categories)

        print("Saved {} entries in {} and {} in {} and {} in {}".format(
            len(train_after), args.trainJson_name, len(test),
            args.testJson_name, len(valid), args.validJson_name))
def reduce_cocosplit(args):
    """
    Downloaded from github.com/akarazniewicz/cocosplit.git@master
    Function used to reduce pre-cleaned COCO annotation to keep only a certain percentage of
    COCO examples with annotations.

    :param args:
    :return:
    """
    with open(args.annotations, 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
        info = coco['info']
        licenses = coco['licenses']
        images = coco['images']
        annotations = coco['annotations']

        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']),
                                             annotations)

        if args.having_annotations:
            images = funcy.lremove(
                lambda i: i['id'] not in images_with_annotations, images)

        if args.percentage < 0:
            args.percentage = 0.0
        if args.percentage > 100:
            args.percentage = 100.0

        x, _ = train_test_split(images,
                                train_size=args.percentage / 100,
                                shuffle=True)

        save_coco(args.train, info, licenses, x,
                  filter_annotations(annotations, x))

        print("Saved")
def main(args):
    with open(args.annotations, 'rt', encoding='UTF-8') as annotations:
        coco = json.load(annotations)
        info = coco['info']
        licenses = coco['licenses']
        images = coco['images']
        annotations = coco['annotations']
        for item in coco["categories"]:
            item['name'] = CLASSES[int(item['id']) - 1]
        categories = coco['categories']

        annotations = [item for item in annotations if item['area'] > 0]

        number_of_images = len(images)

        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']),
                                             annotations)

        if args.having_annotations:
            images = funcy.lremove(
                lambda i: i['id'] not in images_with_annotations, images)

        x = [
            item for item in images
            if item['file_name'].split('/')[1] not in (test_set + exclude_set)
        ]
        y = [
            item for item in images
            if item['file_name'].split('/')[1] in test_set
        ]
        if args.split > 0 and args.split < 1:
            x_train, x_val = train_test_split(x, train_size=args.split)
        else:
            x_val = y.copy()
            x_train = x.copy()

        if args.sample:
            random.shuffle(x_train)
            random.shuffle(x_val)
            random.shuffle(y)
            x_train = x_train[:int(len(x_train) * args.sample)]
            x_val = x_val[:int(len(x_val) * args.sample)]
            y = y[:int(len(y) * args.sample)]

        if not args.coco_category:
            category_map = {
                item['id']: select_classes.index(item['name'])
                for item in categories if item['name'] in select_classes
            }
        else:
            category_map = {
                item['id']: int(item['id'] - 1)
                for item in categories if item['name'] in select_classes
            }

        root = args.root
        label_folder = os.path.join(root, 'labels')
        if not os.path.exists(label_folder):
            os.makedirs(label_folder)
        # else:
        #     print('delete {} ...'.format(label_folder))
        #     shutil.rmtree(label_folder)
        #     os.makedirs(label_folder)

        image_folder = os.path.join(root, 'images')
        if not os.path.exists(image_folder):
            os.makedirs(image_folder)

        lists = [x_train, x_val, y]
        lists_mode = ['train', 'val', 'test']
        if 'thermal' in args.annotations:
            suffix = '_thermal'
        else:
            suffix = '_rgb'
        lists_mode = [item + suffix for item in lists_mode]

        for items, mode in zip(lists, lists_mode):

            if not os.path.exists(os.path.join(label_folder, mode)):
                os.makedirs(os.path.join(label_folder, mode))

            if not os.path.exists(os.path.join(image_folder, mode)):
                os.makedirs(os.path.join(image_folder, mode))

            for item in items:
                txt_name = os.path.join(
                    label_folder, mode,
                    item['file_name'].replace('png', 'txt').replace('/', '_'))

                if not args.label_only:
                    image_name = os.path.join(
                        image_folder, mode,
                        item['file_name'].replace('/', '_'))
                    shutil.copyfile(os.path.join(root, item['file_name']),
                                    image_name)

                # item['file_name'] = item['file_name'].replace('/','_')
                anns = funcy.lfilter(
                    lambda a: int(a['image_id']) in [item['id']], annotations)
                fid = open(txt_name, 'w')
                for ann in anns:
                    if ann['category_id'] in category_map:
                        bbox = ann['bbox']
                        bbox[0] = np.max([0., bbox[0]])
                        bbox[1] = np.max([0., bbox[1]])
                        bbox[2] = np.min(
                            [bbox[0] + bbox[2], item['width'] - 1]) - bbox[0]
                        bbox[3] = np.min(
                            [bbox[1] + bbox[3], item['height'] - 1]) - bbox[1]
                        if bbox[2] * bbox[3] > 0:
                            fid.write(
                                '%d %f %f %f %f\n' %
                                (category_map[ann['category_id']],
                                 (bbox[0] + bbox[2] / 2.0) / item['width'],
                                 (bbox[1] + bbox[3] / 2.0) / item['height'],
                                 bbox[2] / item['width'],
                                 bbox[3] / item['height']))
                fid.close()

        print("Saved {} entries in train {} in val, and {} in test".format(
            len(x_train), len(x_val), len(y)))
Пример #25
0
#num_expr = 3 if len(sys.argv) != 2 else int(sys.argv[1])
num_expr = 10 if len(sys.argv) != 2 else int(sys.argv[1])
#block = 2**29 // num_expr
block = 2**10 // num_expr
exprs = list(tqdm(
    map(expr_result, [n * block for n in range(1,num_expr)]),
    total = num_expr
))

with open('result_%d.yml' % (num_expr,), 'w') as f:
    yaml.dump(F.join_with(list, exprs), f)
with open('result_%d.yml' % (num_expr,)) as f:
    result_dic = yaml.safe_load(f)

y_keys = F.lremove('data.num', result_dic.keys())
#print(F.lmap(result_dic, y_keys))

import numpy as np
xs = result_dic['data.num']
gradient_dic = {}
for key in y_keys:
    plt.plot(xs, result_dic[key], label=key,
             marker='x' if 'q' in key else 'o',
             linestyle='--' if 'q' in key else '-',)
    gradient_dic[key] = np.polyfit(xs, result_dic[key], 1)[0]
    
print(gradient_dic)
with open('result_%d_gradient.yml' % (num_expr,), 'w') as f:
    yaml.dump(gradient_dic, f)
plt.xlabel('number of items')
Пример #26
0
def newcols(df):
    known_cols = set(cat(cols for _, cols in SCOPE_COLUMNS)) | TRASH_COLUMS
    return lremove(known_cols, df.columns)
def main(args):
    # 1.
    # Read in the data.
    PATH = os.path.abspath(os.getcwd())

    TRAIN = PATH + args.train_folder  #ARG
    TEST = PATH + args.test_folder  #ARG
    SEGMENTATION = PATH + args.seg  #ARG
    ANNOTATIONS_JSON = PATH + args.ann  #ARG
    TRAIN_JSON = PATH + args.train_ann  #ARG
    TEST_JSON = PATH + args.test_ann  #ARG
    create_new_annotations = args.ann_bool  #Arg
    split_size = args.train_split  #ARG
    dataset_type = args.dataset_type

    if create_new_annotations == True:
        print("Creating new annotations")

        dataset_train = TRAIN
        csv_train = SEGMENTATION
        IMAGE_DIR = dataset_train

        df = pd.read_csv(csv_train)
        df = df.dropna(axis=0)  # Drop where there are no ships.

        INFO = {
            "description": "Kaggle Dataset",
            "url": "https://github.com/pascal1129",
            "version": "0.1.0",
            "year": 2018,
            "contributor": "pascal1129",
            "date_created": datetime.datetime.utcnow().isoformat(' ')
        }

        LICENSES = [{
            "id": 1,
            "name": "Attribution-NonCommercial-ShareAlike License",
            "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
        }]

        CATEGORIES = [
            {
                'id': 1,
                'name': 'ship',
                'supercategory': 'ship',
            },
        ]

        test = create_annotations()

        with open(PATH + ANNOTATIONS_JSON, 'w') as output_json_file:
            json.dump(test, output_json_file, indent=4)

    # 2.
    with open(ANNOTATIONS_JSON, 'rt', encoding='UTF-8') as annotations:
        print("Creating new train/test split")
        coco = json.load(annotations)
        print("Loaded annotataions file:", ANNOTATIONS_JSON)
        info = coco['info']
        licenses = coco['licenses']
        images = coco['images']
        annotations = coco['annotations']
        categories = coco['categories']

        images_with_annotations = funcy.lmap(lambda a: int(a['image_id']),
                                             annotations)
        if dataset_type == 1:
            print("Dataset equal annotated/not annotated images.")

            img_ids = [int(image['id']) for image in images]
            img_ids_ann = [
                int(annotation['image_id']) for annotation in annotations
            ]

            #Choose the same amount of no annotations.

            img_ids_no_ann = np.setdiff1d(img_ids, img_ids_ann)
            img_ids_no_ann = np.sort(
                np.random.choice(img_ids_no_ann, len(img_ids_ann)))

            images_consolidated = np.append(img_ids_no_ann,
                                            np.array(images_with_annotations))
            images_consolidated = np.sort(images_consolidated)

            images = funcy.lremove(
                lambda i: i['id'] not in images_consolidated,
                images)  # Removes all not in consolidated.
        elif dataset_type == 2:
            print("Dataset remove not annotated images.")
            images = funcy.lremove(
                lambda i: i['id'] not in images_with_annotations,
                images)  # Removes all not annotated.

        val, train = train_test_split(images, train_size=split_size)

        save_coco(TRAIN_JSON, info, licenses, train,
                  filter_annotations(annotations, train), categories)
        save_coco(TEST_JSON, info, licenses, val,
                  filter_annotations(annotations, val), categories)

        print("Saved {} entries in {} and {} in {}".format(
            len(train), TRAIN_JSON, len(val), TEST_JSON))
Пример #28
0
def get_train_val_test(annot_file,
                       valid_size=0.2,
                       test_size=0.2,
                       with_randomsampler=False):
    print(annot_file)
    annotations_file = open(annot_file, 'rt', encoding='UTF-8')

    coco = json.load(annotations_file)
    images = coco['images']
    annotations = coco['annotations']
    coco = COCO(annot_file)

    images_with_annotations = funcy.lmap(lambda a: int(a['image_id']),
                                         annotations)

    images = funcy.lremove(lambda i: i['id'] not in images_with_annotations,
                           images)

    dataset_size = len(images)
    indices = list(range(dataset_size))
    np.random.shuffle(indices)

    test_split = int(np.floor(test_size * dataset_size))
    train_indices, test_indices = indices[test_split:], indices[:test_split]

    train_size = len(train_indices)
    valid_split = int(np.floor((1 - valid_size) * train_size))
    train_indices, valid_indices = train_indices[:valid_split], train_indices[
        valid_split:]

    # Check for category in each splits
    for split in [train_indices, valid_indices, test_indices]:
        cats = [0] * 91

        for i in split:
            imageId = images[i]['id']
            annotationIds = coco.getAnnIds(imageId)
            annotations = coco.loadAnns(annotationIds)
            for i in range(len(annotations)):
                entityId = annotations[i]['category_id']
                cats[entityId] += 1
        print("training")
        print(cats[1:])
        l = 0
        for i in cats:
            if i == 0:
                l += 1
        print(l)

    annotations_file.close()
    train_ids = list(map(lambda x: images[x]['id'], train_indices))
    valid_ids = list(map(lambda x: images[x]['id'], valid_indices))
    test_ids = list(map(lambda x: images[x]['id'], test_indices))
    if with_randomsampler:
        train_sampler = SubsetRandomSampler(train_indices)
        valid_sampler = SubsetRandomSampler(valid_indices)
        test_sampler = SubsetRandomSampler(test_indices)
        return [train_ids, valid_ids,
                test_ids], [train_sampler, valid_sampler, test_sampler]
    else:
        return [train_ids, valid_ids, test_ids]
Пример #29
0
def lremove(f,*seq):
    return F.lremove(f,*seq) if seq \
    else lambda *xs: F.lremove(f,*xs)
Пример #30
0
def main(args):
    with open(args.annotations, "rt", encoding="UTF-8") as annotations:
        coco = json.load(annotations)
        info = coco["info"]
        licenses = coco["licenses"]
        images = coco["images"]
        annotations = coco["annotations"]
        categories = coco["categories"]

        print(coco.keys())

        print("Original", len(images))

        def nothing():
            pass

        funcy.lmap(
            lambda a: print(
                a, next(i for i in images if i["id"] == a["image_id"]))
            if a["segmentation"] == [] else nothing(),
            annotations,
        )

        print("Annotations", len(annotations))
        a2 = []
        for i in range(len(annotations)):
            if max(annotations[i]["bbox"][2],
                   annotations[i]["bbox"][3]) < 50 or min(
                       annotations[i]["bbox"][2],
                       annotations[i]["bbox"][3]) < 30:
                pass
            else:
                a2.append(annotations[i])
        annotations = a2
        print("Annotations filtered by size", len(annotations))

        c2 = []
        ch = []
        for c in categories:
            if c["name"] == "human" or c["name"] == "car":
                ch.append(c["id"])
            else:
                c2.append(c)
        print(len(c2), len(categories))
        categories = c2

        a2 = []
        for i in range(len(annotations)):
            if annotations[i]["category_id"] in ch:
                pass
            else:
                a2.append(annotations[i])
        annotations = a2
        print("Annotations filtered cars and humans", len(annotations))

        images_with_annotations = funcy.lmap(lambda a: int(a["image_id"]),
                                             annotations)

        images = funcy.lremove(
            lambda i: i["id"] not in images_with_annotations, images)

        print("Removed empty images", len(images))

        images = funcy.lremove(lambda i: "copy" in i["file_name"].lower(),
                               images)

        print("Removed copy", len(images))

        def f(e):
            return e["file_name"]

        images.sort(key=f)
        images = images[-300:]
        # funcy.lmap(lambda i : print(i['file_name'][9:12], end="\t"), images)

        print(len(images))

        no_segm = funcy.lfilter(lambda a: len(a["segmentation"]) == 0,
                                annotations)
        print(len(no_segm), len(annotations))
        image_ids = funcy.lmap(lambda i: i["image_id"], no_segm)
        funcy.lmap(
            lambda i: print("! no segm annot in #" + i["file_name"])
            if i["id"] in image_ids else nothing(),
            images,
        )

        save_coco(
            args.annotations,
            info,
            licenses,
            images,
            filter_annotations(annotations, images),
            categories,
        )