Exemplo n.º 1
0
    def _extract_images(self):
        """extract the downloaded images
        """
        if self._verbose:
            _L("Extracting the images in " + _P(self._im_extracted_path))

        os.system("tar xvfj {} -C {} {}".format(
            self._im_tar_path,
            self._path,
            ">/dev/null 2>&1" if self._verbose == False else " ",
        ))
Exemplo n.º 2
0
    def _resolve_dirs(self):
        """Resolve directories, delete old directories and create new ones
        """
        if self._verbose:
            _L("Resolving directories")
        # Del existing directories
        os.system("rm -rf {} {}".format(self._im_test_path,
                                        self._im_train_path))

        # Rename the intermediate folder to test_images
        os.system("mv {} {}".format(self._im_extracted_path,
                                    self._im_test_path))
        os.makedirs(self._im_train_path, exist_ok=True)
Exemplo n.º 3
0
    def _process_images(self):
        #   Process the downloaded dataset, split the images into test & train
        #   directories, and remove the intermediate files.
        #
        #   The dataset is split according to the list provided in the files in
        #   dataset with names test.txt and train.txt.
        #

        # Seperate files according to train.txt and test.txt

        if self._verbose:
            _L("Seperating files from {} to {}".format(_P(
                self._im_test_path), _S(self._im_train_path)))

        with open(self._im_train_list_path) as training_images_list:
            for image in [line.rstrip("\n") for line in training_images_list]:

                # mv  files from from_ to to_
                from_ = os.path.join(self._im_test_path, image) + ".png"
                to_ = os.path.join(self._im_train_path, image) + ".png"

                os.system("mv {} {}".format(from_, to_))

                if self._verbose:
                    _L("{} moved to {}".format(_P(from_), _S(to_)))

        if self._verbose:
            _L("Extracted " + _P(len(os.listdir(self._im_test_path))) +
               " Images in " + _S(self._im_test_path))
            _L("Extracted " + _P(len(os.listdir(self._im_train_path))) +
               " Images in " + _S(self._im_train_path))
Exemplo n.º 4
0
def download_dataset(urls, path):
    """
    Download dataset from the web
    
    Args:
        urls (dic)      : urls to download the dataset
        path (string)   : path where the dataset will be downloaded 
    """

    # check if the path exist or not
    os.makedirs(os.path.normpath(path), exist_ok=True)

    # Download the dataset
    for key in urls:
        _L("Downloading " + _P(urls[key]) + " in " + _S(path))
        # if (urls[key].split('.')[-1] != 'tar'):
        os.system("wget {} -P {}".format(urls[key], path))
Exemplo n.º 5
0
    def __init__(self, path="./data/daquar", force=False, verbose=False):
        """Construct a brand new Dqauar Data Folder

        Args:
            path (str, optional): folders path. Defaults to "./data/daquar".
            force (bool, optional): to force download. Defaults to False.
            verbose (bool, optional): detailed logs. Defaults to False.
        """
        self._path = os.path.abspath(path)
        self._force = force
        self._verbose = verbose
        self._urls = DAQUAR_URLS

        self._IM_DIR_TEST = "test_images"
        self._IM_DIR_TRAIN = "train_images"
        self._QA_JSON_TRAIN = "qa_train.json"

        ###############   Paths for image directories and files ###############

        # images
        self._im_extracted_path = os.path.join(
            self._path, self._urls[DAQUAR_IM].split("/")[-1].split(".")[0])
        self._im_test_path = os.path.join(self._path, self._IM_DIR_TEST)
        self._im_train_path = os.path.join(self._path, self._IM_DIR_TRAIN)
        self._im_tar_path = os.path.join(self._path,
                                         self._urls[DAQUAR_IM].split("/")[-1])
        self._im_train_list_path = os.path.join(
            self._path, self._urls[DAQUAR_IM_TRAIN].split("/")[-1])

        # qa pairs
        self._qa_train_txt_path = os.path.join(
            self._path, self._urls[DAQUAR_QA_TRAIN].split("/")[-1])
        self._qa_train_json_path = os.path.join(self._path,
                                                self._QA_JSON_TRAIN)

        #   logging if verbose is set to true
        if self._verbose:
            _L("Images .tar path " + _P(self._im_tar_path))
            _L("Images extraction path " + _P(self._im_extracted_path))
            _L("Test images path " + _P(self._im_test_path))
            _L("Train images path " + _P(self._im_train_path))
            _L("Train pairs text " + _P(self._qa_train_txt_path))
            _L("Train processed json " + _P(self._qa_train_json_path))
            _L("Images train list path" + _P(self._im_train_list_path))

        self.paths = {
            self._IM_DIR_TEST: self._im_test_path,
            self._IM_DIR_TRAIN: self._im_train_path,
            "qa_train": self._qa_train_json_path,
        }

        if force or (os.path.exists(self._im_train_path) == False
                     and os.path.exists(self._im_test_path) == False):
            self._download()
            self._extract_images()
            self._resolve_dirs()
            self._process_images()
            self._process_questions()
Exemplo n.º 6
0
 def _download(self):
     """Download the dataset from the web, urls are predefined in the config
     """
     if self._verbose:
         _L("Downloading " + _P("DAQUAR") + " in " + _S(self._path))