def data_shape_files(name, cache=".", load=True):
    """
    Downloads shape files.

    :param name: name of the shape file (see below)
    :param cache: cache folder
    :param load: loads the shape files, the function relies on
        :epkg:`geopandas`
    :return: shape files

    List of availables shape files:
    * `'depfr2018'`: see `Contours des départements français issus d'OpenStreetMap
      <https://www.data.gouv.fr/en/datasets/contours-des-departements-francais-issus-d-openstreetmap/>`_
    """
    if name == 'depfr2018':
        url = 'https://www.data.gouv.fr/en/datasets/r/eb36371a-761d-44a8-93ec-3d728bec17ce'
        dest = os.path.join(cache, 'departements-20180101-shp.zip')
        if not os.path.exists(dest):
            get_url_content_timeout(url, output=dest, encoding=None)
        res = unzip_files(dest, where_to=cache)
        shp = [name for name in res if name.endswith('.shp')]
        if len(shp) == 0:
            raise FileNotFoundError(  # pragma: no cover
                "Unable to find shp file in '{}'.".format(cache))
        import geopandas
        df = geopandas.read_file(shp[0])
        df['centroid'] = df['geometry'].apply(lambda r: r.centroid)
        df['DEPLONG'] = df['centroid'].apply(lambda r: r.x)
        df['DEPLAT'] = df['centroid'].apply(lambda r: r.y)
        return df
    raise ValueError("Unpexpected value for shape files: '{}'.".format(name))
Exemplo n.º 2
0
    def test_compress_helper(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        typbytes = bytes
        f = os.path.abspath(__file__).replace(".pyc", ".py")

        rz = zip_files(None, [f], fLOG=fLOG)
        fLOG(len(rz), type(rz))
        if not isinstance(rz, (typbytes, str)):
            raise TypeError(type(rz))

        res = unzip_files(rz)
        self.assertTrue(isinstance(res, list))
        self.assertEqual(len(res), 1)
        if not isinstance(res[0][1], (typbytes, str)):
            raise TypeError(type(res[0][1]))
        self.assertTrue(res[0][0].endswith(
            "_unittests/ut_filehelper/test_compress_helper.py"))

        # binary
        rg = gzip_files(None, [f], fLOG=fLOG)
        fLOG(len(rg), type(rg))
        if not isinstance(rg, typbytes):
            raise TypeError(type(rg))

        res = ungzip_files(rg)
        self.assertTrue(isinstance(res, list))
        self.assertEqual(len(res), 1)
        if not isinstance(res[0][1], (typbytes, str)):
            raise TypeError(type(res[0][1]))
        self.assertTrue(res[0][0].endswith(
            "_unittests/ut_filehelper/test_compress_helper.py"))
    def test_compress_helper(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        typbytes = bytes
        f = os.path.abspath(__file__).replace(".pyc", ".py")

        rz = zip_files(None, [f], fLOG=fLOG)
        fLOG(len(rz), type(rz))
        if not isinstance(rz, (typbytes, str)):
            raise TypeError(type(rz))

        res = unzip_files(rz)
        self.assertTrue(isinstance(res, list))
        self.assertEqual(len(res), 1)
        if not isinstance(res[0][1], (typbytes, str)):
            raise TypeError(type(res[0][1]))
        self.assertTrue(res[0][0].endswith(
            "_unittests/ut_filehelper/test_compress_helper.py"))

        # binary
        rg = gzip_files(None, [f], fLOG=fLOG)
        fLOG(len(rg), type(rg))
        if not isinstance(rg, typbytes):
            raise TypeError(type(rg))

        res = ungzip_files(rg)
        self.assertTrue(isinstance(res, list))
        self.assertEqual(len(res), 1)
        if not isinstance(res[0][1], (typbytes, str)):
            raise TypeError(type(res[0][1]))
        self.assertTrue(res[0][0].endswith(
            "_unittests/ut_filehelper/test_compress_helper.py"))
Exemplo n.º 4
0
    def test_unzip_bug(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        fold = get_temp_folder(__file__, "temp_unzip_bug")
        data = os.path.join(fold, "..", "data", "dada.zip")
        self.assertExists(data)
        files = unzip_files(data, where_to=fold,
                            fLOG=fLOG, fail_if_error=False)
        self.assertEqual(len(files), 5)
Exemplo n.º 5
0
    def test_plot_gallery(self):
        temp = get_temp_folder(__file__, "temp_plot_gallery")
        zipimg = os.path.join(temp, "..", "..", "..", "_doc", "notebooks",
                              "explore", "data", "dog-cat-pixabay.zip")
        files = unzip_files(zipimg, where_to=temp)

        fix_tkinter_issues_virtualenv(fLOG=noLOG)
        from matplotlib import pyplot as plt

        fig, _ = plot_gallery_images(files[:2], return_figure=True)
        img = os.path.join(temp, "gallery.png")
        fig.savefig(img)
        plt.close('all')
Exemplo n.º 6
0
def load_movielens_dataset(name='small', cache=None, fLOG=None):
    """
    Retourne un jeu de données extrait de la page
    `movielens <https://grouplens.org/datasets/movielens/>`_.
    Notebooks associés à ce jeu de données :

    .. runpython::
        :rst:

        from papierstat.datasets.documentation import list_notebooks_rst_links
        links = list_notebooks_rst_links('lectures', 'movielens')
        links = ['    * %s' % s for s in links]
        print('\\n'.join(links))

    @param      name    nom du jeu de données à télécharger
    @param      cache   cache les files avec :epkg:`pickle`
    @param      fLOG    logging function
    @return             dictionnaires de dataframes

    *cache* est un fichier, si celui-ci est présent, il recherché
    avec le module :epkg:`pickle`.
    """
    if cache is not None and os.path.exists(cache):
        with open(cache, 'rb') as f:
            return pickle.load(f)
    if name == 'small':
        url = 'http://files.grouplens.org/datasets/movielens/ml-latest-small.zip'
    else:
        raise ValueError(  # pragma: no cover
            "Value '{0}' is not implemented.".format(name))
    if fLOG:
        fLOG("[load_movielens_dataset] download '{0}'".format(url))
    res = get_url_content_timeout(url, encoding=None, fLOG=fLOG)
    if fLOG:
        fLOG("[load_movielens_dataset] unzip {0} bytes".format(len(res)))
    found = unzip_files(res, fLOG=fLOG)
    if fLOG:
        fLOG("[load_movielens_dataset] found {0} files".format(len(found)))
    dfiles = {}
    for name_, text in found:
        if name_.endswith('.csv'):
            df = pandas.read_csv(StringIO(text.decode('utf-8')), sep=',')
            key = os.path.splitext(os.path.split(name_)[-1])[0]
            dfiles[key] = df
    if cache is not None:
        with open(cache, 'wb') as f:
            pickle.dump(dfiles, f)
    return dfiles
Exemplo n.º 7
0
def any_local_file(name,
                   subfolder,
                   local=True,
                   cache_folder=".",
                   filename=True,
                   unzip=False,
                   encoding=None):
    """
    Returns a local data file, reads its content or returns its content.

    @param          name            file to download
    @param          subfolder       sub folder
    @param          local           local data or web
    @param          cache_folder    where to cache the data if downloaded a second time
    @param          filename        return the filename (True) or the content (False)
    @param          unzip           unzip as well
    @param          encoding        encoding
    @return                         text content (str)
    """
    if local:
        this = os.path.abspath(os.path.dirname(__file__))
        this = os.path.join(this, subfolder, name)
        if not os.path.exists(this):
            raise FileNotFoundError(this)
    else:
        import pyensae
        if not unzip and name.endswith(".zip"):
            raise ValueError(
                "The file will be unzipped anyway: {0}".format(name))
        this = pyensae.download_data(name, whereTo=cache_folder)
        unzip = False
    if unzip:
        this = unzip_files(this, where_to=cache_folder)
    if filename:
        return this
    else:
        if isinstance(this, list):
            if len(this) > 1:
                raise ValueError("more than one file for: {0}\n{1}".format(
                    name, this))
            else:
                this = this[0]
        if os.path.splitext(this)[-1] in (".zip", ".gz", ".tar", ".7z"):
            raise ValueError("Cannot read file as text: {0}".format(this))
        with open(this, "r", encoding=encoding) as f:
            return f.read()
Exemplo n.º 8
0
def villes_geo(folder=".", as_df=False, fLOG=noLOG):
    """
    Retrieves data vote places (bureaux de vote in French)
    with geocodes.

    @param      folder  where to download
    @param      as_df   return as a dataframe
    @param      fLOG    logging function
    @return             list of dataframe
    """
    this = os.path.abspath(os.path.dirname(__file__))
    data = os.path.join(this, "data_elections", "villesgeo.zip")
    geo = unzip_files(data, where_to=folder)
    if isinstance(geo, list):
        res = geo[0]
    else:
        res = geo
    if as_df:
        return pandas.read_csv(res, encoding="utf-8", sep="\t")
    return res
Exemplo n.º 9
0
def any_local_file(name, subfolder, local=True, cache_folder=".",
                   filename=True, unzip=False, encoding=None):
    """
    Returns a local data file, reads its content or returns its content.

    @param          name            file to download
    @param          subfolder       sub folder
    @param          local           local data or web
    @param          cache_folder    where to cache the data if downloaded a second time
    @param          filename        return the filename (True) or the content (False)
    @param          unzip           unzip as well
    @param          encoding        encoding
    @return                         text content (str)
    """
    if local:
        this = os.path.abspath(os.path.dirname(__file__))
        this = os.path.join(this, subfolder, name)
        if not os.path.exists(this):
            raise FileNotFoundError(this)
    else:
        import pyensae
        if not unzip and name.endswith(".zip"):
            raise ValueError(
                "The file will be unzipped anyway: {0}".format(name))
        this = pyensae.download_data(name, whereTo=cache_folder)
        unzip = False
    if unzip:
        this = unzip_files(this, where_to=cache_folder)
    if filename:
        return this
    else:
        if isinstance(this, list):
            if len(this) > 1:
                raise ValueError(
                    "more than one file for: {0}\n{1}".format(name, this))
            else:
                this = this[0]
        if os.path.splitext(this)[-1] in (".zip", ".gz", ".tar", ".7z"):
            raise ValueError("Cannot read file as text: {0}".format(this))
        with open(this, "r", encoding=encoding) as f:
            return f.read()
Exemplo n.º 10
0
def load_tweet_dataset(cache="."):
    """
    Retourne quelques tweets extrait en 2016.
    Les données sont disponibles dans le répertoire
    `data <https://github.com/sdpython/papierstat/tree/master/src/papierstat/datasets/data>`_.
    Notebooks associés à ce jeu de données :

    .. runpython::
        :rst:

        from papierstat.datasets.documentation import list_notebooks_rst_links
        links = list_notebooks_rst_links('lectures', 'artificiel_tokenize_features')
        links = ['    * %s' % s for s in links]
        print('\\n'.join(links))

    @param          cache       where to cache or unzip the data if downloaded a second time
    @return                     text content (str)
    """
    data = get_data_folder()
    name = os.path.join(data, 'tweets_macron_sijetaispresident_201609.zip')
    one = unzip_files(name, where_to=cache)
    return pandas.read_csv(one[0], encoding='utf-8', sep='\t')
    def test_search_predictions_keras(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        from mlinsights.search_rank import SearchEnginePredictionImages

        # We delay the import as keras backend is not necessarily available.
        with redirect_stderr(StringIO()):
            try:
                from keras.applications.mobilenet import MobileNet  # pylint: disable=E0401
            except (SyntaxError, ModuleNotFoundError) as e:
                warnings.warn(
                    "tensorflow is probably not available yet on python 3.7: {0}"
                    .format(e))
                return
            from keras.preprocessing.image import ImageDataGenerator  # pylint: disable=E0401
            from keras.preprocessing.image import img_to_array, load_img  # pylint: disable=E0401

        # deep learning model
        model = MobileNet(input_shape=None,
                          alpha=1.0,
                          depth_multiplier=1,
                          dropout=1e-3,
                          include_top=True,
                          weights='imagenet',
                          input_tensor=None,
                          pooling=None,
                          classes=1000)
        self.assertEqual(model.name, 'mobilenet_1.00_224')

        # images
        temp = get_temp_folder(__file__, "temp_search_predictions_keras")
        dest = os.path.join(temp, "simages")
        os.mkdir(dest)
        zipname = os.path.join(temp, "..", "..", "..", "_doc", "notebooks",
                               "explore", "data", "dog-cat-pixabay.zip")
        files = unzip_files(zipname, where_to=dest)
        self.assertTrue(len(files) > 0)

        # iterator
        gen = ImageDataGenerator(rescale=1. / 255)
        with redirect_stdout(StringIO()):
            iterim = gen.flow_from_directory(temp,
                                             batch_size=1,
                                             target_size=(224, 224),
                                             classes=['simages'],
                                             shuffle=False)

        # search
        se = SearchEnginePredictionImages(
            model, fct_params=dict(layer=len(model.layers) - 4), n_neighbors=5)
        r = repr(se)
        self.assertIn("SearchEnginePredictionImages", r)

        # fit
        se.fit(iterim, fLOG=fLOG)

        # neighbors
        score, ind, meta = se.kneighbors(iterim)

        # assert
        self.assertIsInstance(ind, (list, numpy.ndarray))
        self.assertEqual(len(ind), 5)
        self.assertEqual(ind[0], 0)

        self.assertIsInstance(score, numpy.ndarray)
        self.assertEqual(score.shape, (5, ))
        self.assertEqual(score[0], 0)

        self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame))
        self.assertEqual(meta.shape, (5, 2))
        self.assertEqual(meta.loc[0, 'name'].replace('\\', '/'),
                         'simages/cat-1151519__480.jpg')

        # neighbors 2
        img = load_img(os.path.join(temp, 'simages', 'cat-2603300__480.jpg'),
                       target_size=(224, 224))
        x = img_to_array(img)
        gen = ImageDataGenerator(rescale=1. / 255)
        iterim = gen.flow(x[numpy.newaxis, :, :, :], batch_size=1)
        score, ind, meta = se.kneighbors(iterim)

        self.assertIsInstance(ind, (list, numpy.ndarray))
        self.assertIsInstance(score, numpy.ndarray)
        self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame))
Exemplo n.º 12
0
    def test_search_predictions_torch(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        from mlinsights.search_rank import SearchEnginePredictionImages

        # We delay the import as keras backend is not necessarily available.
        with redirect_stderr(StringIO()):
            try:
                import torchvision.models as tmodels  # pylint: disable=E0401
            except (SyntaxError, ModuleNotFoundError) as e:
                warnings.warn("torch is not available: {0}".format(e))
                return
            from torchvision import datasets, transforms  # pylint: disable=E0401
            from torch.utils.data import DataLoader  # pylint: disable=E0401

        # deep learning model
        model = tmodels.squeezenet1_1(pretrained=True)

        # images
        temp = get_temp_folder(__file__, "temp_search_predictions_torch")
        dest = os.path.join(temp, "simages")
        os.mkdir(dest)
        zipname = os.path.join(temp, "..", "..", "..", "_doc", "notebooks",
                               "explore", "data", "dog-cat-pixabay.zip")
        files = unzip_files(zipname, where_to=dest)
        self.assertTrue(len(files) > 0)

        # sequence of images
        trans = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.CenterCrop(224),
            transforms.ToTensor()
        ])
        imgs_ = datasets.ImageFolder(temp, trans)
        dataloader = DataLoader(imgs_,
                                batch_size=1,
                                shuffle=False,
                                num_workers=1)
        img_seq = iter(dataloader)
        imgs = list(img[0] for img in img_seq)

        # search
        se = SearchEnginePredictionImages(model, n_neighbors=5)
        r = repr(se)
        self.assertIn("SearchEnginePredictionImages", r)

        # fit
        fLOG('[fit]')
        se.fit(imgs_, fLOG=fLOG)

        # neighbors
        fLOG('[test]', type(imgs[0]), imgs[0].shape)
        score, ind, meta = se.kneighbors(imgs[0])

        # assert
        self.assertIsInstance(ind, (list, numpy.ndarray))
        self.assertEqual(len(ind), 5)
        self.assertEqual(ind[0], 0)

        self.assertIsInstance(score, numpy.ndarray)
        self.assertEqual(score.shape, (5, ))
        self.assertLess(score[0], 50)

        self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame))
        self.assertEqual(meta.shape, (5, 2))
        self.assertEndsWith('simages/cat-1151519__480.jpg',
                            meta.iloc[0, 1].replace('\\', '/'))

        # neighbors 2
        score, ind, meta = se.kneighbors(imgs)

        self.assertIsInstance(ind, (list, numpy.ndarray))
        self.assertIsInstance(score, numpy.ndarray)
        self.assertIsInstance(meta, (numpy.ndarray, pandas.DataFrame))
Exemplo n.º 13
0
    def unzip_files(self, group):
        """
        Unzips files and convert notebooks into :epkg:`HTML`.

        @param          group       group name
        @return                     list of new filess
        """
        def fvalid(zip_name, local_name):
            if "__pycache__" in zip_name:
                return False
            if zip_name.endswith(".pyc"):
                return False
            return True

        names = list(self.enumerate_group_files(group))
        files = []
        for name in names:
            if "attachments" not in name:
                continue
            ext = os.path.splitext(name)[-1]
            if ext == ".zip":
                folder = os.path.splitext(name)[0] + "_zip"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG(
                        "[ProjectsRepository.unzip_files] unzip '{0}'".format(
                            name))
                    self.fLOG(
                        "[ProjectsRepository.unzip_files] creating '{0}'".
                        format(folder))
                    os.mkdir(folder)
                    try:
                        lf = unzip_files(name,
                                         folder,
                                         fLOG=self.fLOG,
                                         fvalid=fvalid,
                                         fail_if_error=False)
                    except zipfile.BadZipFile as e:
                        self.fLOG(
                            "[ProjectsRepository.unzip_files]    ERROR: unable to unzip '{0}' because of '{1}']"
                            .format(name, e))
                        lf = []
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".7z":
                folder = os.path.splitext(name)[0] + "_7z"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG("[ProjectsRepository.un7zip_files] un7zip '{0}'".
                              format(name))
                    self.fLOG(
                        "[ProjectsRepository.un7zip_files] creating '{0}'".
                        format(folder))
                    os.mkdir(folder)
                    lf = un7zip_files(name,
                                      folder,
                                      fLOG=self.fLOG,
                                      fvalid=fvalid)
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".rar":
                folder = os.path.splitext(name)[0] + "_rar"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG(
                        "[ProjectsRepository.unrar_files] unrar '{0}'".format(
                            name))
                    self.fLOG(
                        "[ProjectsRepository.unrar_files] creating '{0}'".
                        format(folder))
                    os.mkdir(folder)
                    lf = unrar_files(name,
                                     folder,
                                     fLOG=self.fLOG,
                                     fvalid=fvalid)
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".gz":
                folder = os.path.splitext(name)[0] + "_gz"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG("[ProjectsRepository.ungzip_files] ungzip '{0}'".
                              format(name))
                    self.fLOG(
                        "[ProjectsRepository.ungzip_files] creating '{0}'".
                        format(folder))
                    os.mkdir(folder)
                    unzip = "pkl.gz" not in name
                    lf = ungzip_files(name,
                                      folder,
                                      fLOG=self.fLOG,
                                      fvalid=fvalid,
                                      unzip=unzip)
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".tar.gz":
                raise Exception("unable to process such a file: " + name)
        return files
Exemplo n.º 14
0
    def unzip_files(self, group):
        """
        unzip files and convert notebooks into html

        @param          group       group name
        @return                     list of new filess
        """
        def fvalid(zip_name, local_name):
            if "__pycache__" in zip_name:
                return False
            if zip_name.endswith(".pyc"):
                return False
            return True

        names = list(self.enumerate_group_files(group))
        files = []
        for name in names:
            if "attachments" not in name:
                continue
            ext = os.path.splitext(name)[-1]
            if ext == ".zip":
                folder = os.path.splitext(name)[0] + "_zip"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG(
                        "ProjectsRepository.unzip_files [unzip {0}]".format(
                            name))
                    self.fLOG(
                        "ProjectsRepository.unzip_files [creating {0}]".format(
                            folder))
                    os.mkdir(folder)
                    lf = unzip_files(name,
                                     folder,
                                     fLOG=self.fLOG,
                                     fvalid=fvalid)
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".7z":
                folder = os.path.splitext(name)[0] + "_7z"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG(
                        "ProjectsRepository.un7zip_files [un7zip {0}]".format(
                            name))
                    self.fLOG("ProjectsRepository.un7zip_files [creating {0}]".
                              format(folder))
                    os.mkdir(folder)
                    lf = un7zip_files(name,
                                      folder,
                                      fLOG=self.fLOG,
                                      fvalid=fvalid)
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".gz":
                folder = os.path.splitext(name)[0] + "_gz"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG(
                        "ProjectsRepository.ungzip_files [ungzip {0}]".format(
                            name))
                    self.fLOG("ProjectsRepository.ungzip_files [creating {0}]".
                              format(folder))
                    os.mkdir(folder)
                    lf = ungzip_files(name,
                                      folder,
                                      fLOG=self.fLOG,
                                      fvalid=fvalid)
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".tar.gz":
                raise Exception("unable to process such a file: " + name)
        return files
    def unzip_files(self, group):
        """
        Unzips files and convert notebooks into :epkg:`HTML`.

        @param          group       group name
        @return                     list of new filess
        """
        def fvalid(zip_name, local_name):
            if "__pycache__" in zip_name:
                return False
            if zip_name.endswith(".pyc"):
                return False
            return True

        names = list(self.enumerate_group_files(group))
        files = []
        for name in names:
            if "attachments" not in name:
                continue
            ext = os.path.splitext(name)[-1]
            if ext == ".zip":
                folder = os.path.splitext(name)[0] + "_zip"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG(
                        "[ProjectsRepository.unzip_files] unzip '{0}'".format(name))
                    self.fLOG(
                        "[ProjectsRepository.unzip_files] creating '{0}'".format(folder))
                    os.mkdir(folder)
                    try:
                        lf = unzip_files(
                            name, folder, fLOG=self.fLOG, fvalid=fvalid, fail_if_error=False)
                    except zipfile.BadZipFile as e:
                        self.fLOG(
                            "[ProjectsRepository.unzip_files]    ERROR: unable to unzip '{0}' because of '{1}']".format(name, e))
                        lf = []
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".7z":
                folder = os.path.splitext(name)[0] + "_7z"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG(
                        "[ProjectsRepository.un7zip_files] un7zip '{0}'".format(name))
                    self.fLOG(
                        "[ProjectsRepository.un7zip_files] creating '{0}'".format(folder))
                    os.mkdir(folder)
                    lf = un7zip_files(
                        name, folder, fLOG=self.fLOG, fvalid=fvalid)
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".rar":
                folder = os.path.splitext(name)[0] + "_rar"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG(
                        "[ProjectsRepository.unrar_files] unrar '{0}'".format(name))
                    self.fLOG(
                        "[ProjectsRepository.unrar_files] creating '{0}'".format(folder))
                    os.mkdir(folder)
                    lf = unrar_files(
                        name, folder, fLOG=self.fLOG, fvalid=fvalid)
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".gz":
                folder = os.path.splitext(name)[0] + "_gz"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG(
                        "[ProjectsRepository.ungzip_files] ungzip '{0}'".format(name))
                    self.fLOG(
                        "[ProjectsRepository.ungzip_files] creating '{0}'".format(folder))
                    os.mkdir(folder)
                    unzip = "pkl.gz" not in name
                    lf = ungzip_files(
                        name, folder, fLOG=self.fLOG, fvalid=fvalid, unzip=unzip)
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".tar.gz":
                raise Exception("unable to process such a file: " + name)
        return files
Exemplo n.º 16
0
    def unzip_files(self, group):
        """
        unzip files and convert notebooks into html

        @param          group       group name
        @return                     list of new filess
        """
        def fvalid(zip_name, local_name):
            if "__pycache__" in zip_name:
                return False
            if zip_name.endswith(".pyc"):
                return False
            return True

        names = list(self.enumerate_group_files(group))
        files = []
        for name in names:
            if "attachments" not in name:
                continue
            ext = os.path.splitext(name)[-1]
            if ext == ".zip":
                folder = os.path.splitext(name)[0] + "_zip"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG(
                        "ProjectsRepository.unzip_files [unzip {0}]".format(name))
                    self.fLOG(
                        "ProjectsRepository.unzip_files [creating {0}]".format(folder))
                    os.mkdir(folder)
                    lf = unzip_files(
                        name, folder, fLOG=self.fLOG, fvalid=fvalid)
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".7z":
                folder = os.path.splitext(name)[0] + "_7z"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG(
                        "ProjectsRepository.un7zip_files [un7zip {0}]".format(name))
                    self.fLOG(
                        "ProjectsRepository.un7zip_files [creating {0}]".format(folder))
                    os.mkdir(folder)
                    lf = un7zip_files(
                        name, folder, fLOG=self.fLOG, fvalid=fvalid)
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".gz":
                folder = os.path.splitext(name)[0] + "_gz"
                folder = folder.replace(" ", "_").replace(",", "_")
                if not os.path.exists(folder):
                    self.fLOG(
                        "ProjectsRepository.ungzip_files [ungzip {0}]".format(name))
                    self.fLOG(
                        "ProjectsRepository.ungzip_files [creating {0}]".format(folder))
                    os.mkdir(folder)
                    lf = ungzip_files(
                        name, folder, fLOG=self.fLOG, fvalid=fvalid)
                    files.extend(lf)
                else:
                    # already done, we do not do it again
                    pass
            elif ext == ".tar.gz":
                raise Exception("unable to process such a file: " + name)
        return files