def test_damir(self): temp = get_temp_folder(__file__, "temp_damir") res = download_data("A201612_small.csv.gz", whereTo=temp) self.assertEqual(len(res), 1) checks = [os.path.join(temp, _) for _ in ["A201612_small.csv", "A201612_small.csv.gz"]] self.assertExists(checks[0]) self.assertExists(checks[1])
def test_download_data2(self): fold = get_temp_folder(__file__, "temp_download_data2") exp = ["VOEUX01.txt", "voeux.zip"] res = download_data(["voeux.zip"], website=["xd"], whereTo=fold, timeout=10) self.assertEqual(len(res), 14) self.assertIn("VOEUX01.txt", res[0]) for f in exp: g = os.path.join(fold, f) self.assertExists(g)
def test_damir(self): temp = get_temp_folder(__file__, "temp_damir") res = download_data("A201612_small.csv.gz", whereTo=temp) self.assertEqual(len(res), 1) checks = [ os.path.join(temp, _) for _ in ["A201612_small.csv", "A201612_small.csv.gz"] ] self.assertExists(checks[0]) self.assertExists(checks[1])
def test_download_data_failures(self): fold = get_temp_folder(__file__, "temp_download_data_failures") one = "voeux2.zip" self.assertRaise( lambda: download_data(one, website="xd", whereTo=fold, timeout=10), DownloadDataException)
def test_download_data_failures(self): fold = get_temp_folder(__file__, "temp_download_data_failures") one = "voeux2.zip" self.assertRaise(lambda: download_data(one, website="xd", whereTo=fold, timeout=10), DownloadDataException)
def test_gz(self): fold = get_temp_folder(__file__, "temp_gz") files = download_data("facebook_combined.txt.gz", website="xd", whereTo=fold) self.assertNotEmpty(files)
def test_tar_gz(self): fold = get_temp_folder(__file__, "temp_tar_gz") files = download_data("facebook.tar.gz", website="xd", whereTo=fold) sh = [g for g in files if g.endswith("3980.egofeat")] self.assertNotEmpty(files) self.assertEqual(len(sh), 1)
def test_download_data_failures(self): fold = get_temp_folder(__file__, "temp_download_data_failures") one = "voeux2.zip" self.assertRaise(lambda: download_data(one, website="xd", whereTo=fold, timeout=10), (DownloadDataException, zipfile.BadZipFile, RuntimeError, RetrieveDataException))
def download_pig_standalone(pig_version=PIG_VERSION, hadoop_version=HADOOP_VERSION, fLOG=noLOG): """ Downloads the standalone :epkg:`jython`. If it does not exists, we should version ``HADOOP_VERSION`` by default in order to fit the cluster's version. @param pig_version pig_version @param hadoop_version hadoop_version @param fLOG logging function @return location This function might need to be run twice if the first try fails, it might to due to very long path when unzipping the downloaded file. :epkg:`Hadoop` is downloaded from one of the websites referenced at `Apache Software Foundation <http://www.apache.org/dyn/closer.cgi/hadoop/common/>`_. Check the source to see which one was chosen. """ fbs = [] # download winutils.exe d = os.path.join(os.path.abspath(os.path.dirname(__file__)), "winutils") if not os.path.exists(d): os.mkdir(d) exe = download_data(name="winutils.zip", whereTo=d, website="xd", fLOG=fLOG) fbs.append(exe) change_file_status(d) # download hadoop fLOG("download hadoop", hadoop_version) d = os.path.join(os.path.abspath(os.path.dirname(__file__)), "hadoopjar") if not os.path.exists(d): os.mkdir(d) fn = download_data( name="hadoop-%s.tar.gz" % hadoop_version, whereTo=d, website="http://apache.crihan.fr/dist/hadoop/common/hadoop-%s/" % hadoop_version, fLOG=fLOG) fbs.append(fn) change_file_status(d) # download pig fLOG("download pig", pig_version) d = os.path.join(os.path.abspath(os.path.dirname(__file__)), "pigjar") if not os.path.exists(d): os.mkdir(d) fn = download_data(name="pig-%s.tar.gz" % pig_version, whereTo=d, silent=True, website="http://apache.crihan.fr/dist/pig/pig-%s/" % pig_version, fLOG=fLOG) fbs.append(fn) change_file_status(d) return fbs