Exemplo n.º 1
0
 def test_damir(self):
     temp = get_temp_folder(__file__, "temp_damir")
     res = download_data("A201612_small.csv.gz", whereTo=temp)
     self.assertEqual(len(res), 1)
     checks = [os.path.join(temp, _)
               for _ in ["A201612_small.csv", "A201612_small.csv.gz"]]
     self.assertExists(checks[0])
     self.assertExists(checks[1])
Exemplo n.º 2
0
 def test_download_data2(self):
     fold = get_temp_folder(__file__, "temp_download_data2")
     exp = ["VOEUX01.txt", "voeux.zip"]
     res = download_data(["voeux.zip"], website=["xd"],
                         whereTo=fold, timeout=10)
     self.assertEqual(len(res), 14)
     self.assertIn("VOEUX01.txt", res[0])
     for f in exp:
         g = os.path.join(fold, f)
         self.assertExists(g)
Exemplo n.º 3
0
 def test_damir(self):
     temp = get_temp_folder(__file__, "temp_damir")
     res = download_data("A201612_small.csv.gz", whereTo=temp)
     self.assertEqual(len(res), 1)
     checks = [
         os.path.join(temp, _)
         for _ in ["A201612_small.csv", "A201612_small.csv.gz"]
     ]
     self.assertExists(checks[0])
     self.assertExists(checks[1])
Exemplo n.º 4
0
 def test_download_data2(self):
     fold = get_temp_folder(__file__, "temp_download_data2")
     exp = ["VOEUX01.txt", "voeux.zip"]
     res = download_data(["voeux.zip"], website=["xd"],
                         whereTo=fold, timeout=10)
     self.assertEqual(len(res), 14)
     self.assertIn("VOEUX01.txt", res[0])
     for f in exp:
         g = os.path.join(fold, f)
         self.assertExists(g)
Exemplo n.º 5
0
 def test_download_data_failures(self):
     fold = get_temp_folder(__file__, "temp_download_data_failures")
     one = "voeux2.zip"
     self.assertRaise(
         lambda: download_data(one, website="xd", whereTo=fold, timeout=10),
         DownloadDataException)
Exemplo n.º 6
0
 def test_download_data_failures(self):
     fold = get_temp_folder(__file__, "temp_download_data_failures")
     one = "voeux2.zip"
     self.assertRaise(lambda: download_data(one, website="xd", whereTo=fold, timeout=10),
                      DownloadDataException)
Exemplo n.º 7
0
 def test_gz(self):
     fold = get_temp_folder(__file__, "temp_gz")
     files = download_data("facebook_combined.txt.gz",
                           website="xd", whereTo=fold)
     self.assertNotEmpty(files)
Exemplo n.º 8
0
 def test_tar_gz(self):
     fold = get_temp_folder(__file__, "temp_tar_gz")
     files = download_data("facebook.tar.gz", website="xd", whereTo=fold)
     sh = [g for g in files if g.endswith("3980.egofeat")]
     self.assertNotEmpty(files)
     self.assertEqual(len(sh), 1)
Exemplo n.º 9
0
 def test_download_data_failures(self):
     fold = get_temp_folder(__file__, "temp_download_data_failures")
     one = "voeux2.zip"
     self.assertRaise(lambda: download_data(one, website="xd", whereTo=fold, timeout=10),
                      (DownloadDataException, zipfile.BadZipFile,
                       RuntimeError, RetrieveDataException))
Exemplo n.º 10
0
def download_pig_standalone(pig_version=PIG_VERSION,
                            hadoop_version=HADOOP_VERSION,
                            fLOG=noLOG):
    """
    Downloads the standalone :epkg:`jython`.
    If it does not exists, we should version ``HADOOP_VERSION``
    by default in order to fit the cluster's version.

    @param      pig_version         pig_version
    @param      hadoop_version      hadoop_version
    @param      fLOG                logging function
    @return                         location

    This function might need to be run twice if the first try
    fails, it might to due to very long path when unzipping the
    downloaded file.

    :epkg:`Hadoop` is downloaded from one of the websites
    referenced at
    `Apache Software Foundation <http://www.apache.org/dyn/closer.cgi/hadoop/common/>`_.
    Check the source to see which one was chosen.
    """
    fbs = []

    # download winutils.exe
    d = os.path.join(os.path.abspath(os.path.dirname(__file__)), "winutils")
    if not os.path.exists(d):
        os.mkdir(d)
    exe = download_data(name="winutils.zip",
                        whereTo=d,
                        website="xd",
                        fLOG=fLOG)
    fbs.append(exe)
    change_file_status(d)

    # download hadoop
    fLOG("download hadoop", hadoop_version)
    d = os.path.join(os.path.abspath(os.path.dirname(__file__)), "hadoopjar")
    if not os.path.exists(d):
        os.mkdir(d)
    fn = download_data(
        name="hadoop-%s.tar.gz" % hadoop_version,
        whereTo=d,
        website="http://apache.crihan.fr/dist/hadoop/common/hadoop-%s/" %
        hadoop_version,
        fLOG=fLOG)
    fbs.append(fn)
    change_file_status(d)

    # download pig
    fLOG("download pig", pig_version)
    d = os.path.join(os.path.abspath(os.path.dirname(__file__)), "pigjar")
    if not os.path.exists(d):
        os.mkdir(d)
    fn = download_data(name="pig-%s.tar.gz" % pig_version,
                       whereTo=d,
                       silent=True,
                       website="http://apache.crihan.fr/dist/pig/pig-%s/" %
                       pig_version,
                       fLOG=fLOG)
    fbs.append(fn)
    change_file_status(d)
    return fbs