Exemplo n.º 1
0
def get_forex():
    """Gets latest Forex to Euros data."""
    print(">> Downloading the latest ECB Forex data...")
    url = source_config.forex_data_url['latest']
    output_zip = source_config.forex_data_files['latest']['zip']
    download_file_from_url(url, output_zip)
    unzip_file(output_zip, source_config.ecb_raw)
    rename_file(os.path.join(source_config.ecb_raw, 'eurofxref-hist.csv'),
                source_config.forex_data_files['latest']['raw'])
Exemplo n.º 2
0
def fetch_data():
    start_date: datetime = datetime(2019, 1, 1)
    end_date: datetime = datetime(2019, 6, 1)
    # africa_geometry: Geometry = ee.Geometry.Rectangle([-22.96874821, 33.29640894, 55.07812679, -39.20562293])
    chesapeake_bay_geometry: Geometry = ee.Geometry.Rectangle([-78, 40, -75, 36])

    landsat8: ImageCollection = ee.ImageCollection(DataSets.landsat8_reflectance_30m.value) \
                                  .filter(ee.Filter.date(start_date, end_date)) \
                                  .filterBounds(chesapeake_bay_geometry)
                                  # .select([Landsat8Bands.band4_645nm.value])
                                  # .limit(24)

    landsat8_image: Image = landsat8.map(masker).median().select([Landsat8Bands.band4_645nm.value, Landsat8Bands.band5_859nm.value])
    scale_m: int = 30

    url: str = landsat8_image.getDownloadURL({
        "name": f"high_resolution_landsat8_{scale_m}m_{datetime.now().isoformat()}",
        "region": chesapeake_bay_geometry,
        "crs": "EPSG:3857",
        "scale": scale_m,
        "bands": [Landsat8Bands.band4_645nm.value, Landsat8Bands.band5_859nm.value],
        "maxPixels": 210313503
    })
    print(url)
    download_file_from_url(url, tmp_dir / Path("landsat8_image.zip"))

    # chesapeake_bay_geometry_high: Geometry = ee.Geometry.Rectangle([-77.08, 37.33, -76.9, 37.2])

    # export_task: Task = Export.image.toDrive(
    #     image=landsat8_image,
    #     folder="aquaculture-exports",
    #     description=f"high_resolution_landsat8_{scale_m}m_{datetime.now().isoformat()}",
    #     region=chesapeake_bay_geometry,
    #     crs="EPSG:3857",
    #     scale=scale_m,
    #     maxPixels=210313503
    # )
    # start_time: int = int(time())
    # export_task.start()
    # while export_task.active():
    #     print(export_task.status())
    #     sleep(1)
    # end_time: int = int(time())
    # completion_status: Dict = export_task.status()
    # print(f"Batch Time: {end_time - start_time}")
    # if completion_status["state"] == "FAILED":
    #     print(completion_status)
    # else:
    #     print(completion_status["destination_uris"])
    #
    #     i: int = 0
    #     for uri in completion_status["destination_uris"]:
    #         download_file_from_url(uri, tmp_dir / Path(f"high_resolution_landsat8_{scale_m}m_{i}.tif"))

    print("finished")
Exemplo n.º 3
0
def get_insee_couple_famille_menages(decoupage_geo=None, verbose=False):
    """Gets all INSEE data about 'couple famille menages'."""
    if decoupage_geo is None:
        decoupage_geo = 'commune'
    output_datas = []
    for year in source_config.couple_famille_menages_url[decoupage_geo].keys():
        if verbose is True:
            print('Downloading INSEE couple-famille-menage / commune / '
                  '{}...'.format(year))
        input_url = source_config.couple_famille_menages_url[
            decoupage_geo][year]
        if decoupage_geo in ('commune',):
            if input_url.endswith('xls') or input_url.endswith('xlsx'):
                output_file = source_config.couple_famille_menages_files[
                    decoupage_geo][year]['raw']
                output_data = download_insee_excel(input_url, output_file)
            elif input_url.endswith('zip'):
                output_file = source_config.couple_famille_menages_files[
                    decoupage_geo][year]['zip']
                output_zip = download_file_from_url(input_url, output_file)
                unzip_file(output_zip, source_config.insee_raw)
                output_data = source_config.couple_famille_menages_files[
                    decoupage_geo][year]['raw']
        output_datas.append(output_data)
    return output_datas
Exemplo n.º 4
0
def fetch_data():
    start_date: datetime = datetime(2015, 1, 1)
    end_date: datetime = datetime(2015, 2, 1)
    # africa_geometry: Geometry = ee.Geometry.Rectangle([-22.96874821, 33.29640894, 55.07812679, -39.20562293])
    chesapeake_bay_geometry: Geometry = ee.Geometry.Rectangle(
        [-78, 40, -75, 36])

    modis: ImageCollection = ee.ImageCollection(DataSets.modis_terra_land_water.value) \
                               .filter(ee.Filter.date(start_date, end_date)) \
                               .filterBounds(chesapeake_bay_geometry) \
                               .select([ModisTerraLandWater.water_mask.value, ModisTerraLandWater.water_mask_QA.value]) \
                               .limit(1)

    modis_image: Image = modis.first().select(
        [ModisTerraLandWater.water_mask.value])
    modis_image_reduced = modis_image.reduceResolution(
        reducer=ee.Reducer.mean(), bestEffort=True)
    # video_args: Dict = {
    #     "dimensions": 768,
    #     "region": chesapeake_bay_geometry,
    #     "framesPerSecond": 7,
    #     "crs": "EPSG:3857",
    #     "min": 10,
    #     "max": 500,
    #     "palette": ['blue', 'purple', 'cyan', 'green', 'yellow', 'red']
    # }
    #
    # print(modis.getVideoThumbURL(video_args))

    image_args: Dict = {
        "name": "landmask_image",
        "dimensions": 768,
        "region": chesapeake_bay_geometry,
        "crs": "EPSG:3857",
        "bands": [ModisTerraLandWater.water_mask.value],
        "min": 0,
        "max": 1,
        "palette": ['blue', 'purple', 'cyan', 'green', 'yellow', 'red']
    }

    url: str = modis_image_reduced.getDownloadURL(image_args)

    print(url)
    download_file_from_url(url, tmp_dir / Path("landmask_image.zip"))
    unzip_file(tmp_dir / Path("landmask_image.zip"))

    print("finished")
Exemplo n.º 5
0
def get_laposte_base_code_postaux(verbose=False):
    """Gets LA POSTE data about code postaux."""
    if verbose is True:
        print('Downloading LA POSTE base des codes postaux...')
    input_url = source_config.laposte_code_postaux_url
    output_path = source_config.laposte_code_postaux_files['raw']
    output_data = download_file_from_url(input_url, output_path)
    return output_data
Exemplo n.º 6
0
def get_divers_idh2_ile_de_france(decoupage_geo=None, verbose=False):
    """Gets latest data on 'IDH2 for Ile de France'."""
    if decoupage_geo is None:
        decoupage_geo = 'commune'
    if verbose is True:
        print('Downloading "IDH2 / Ile-de-France / commune"...')
    input_url = source_config.idh2_idf_url[decoupage_geo]['latest']
    if decoupage_geo in ('commune', ):
        output_path = source_config.idh2_idf_files[decoupage_geo]['latest'][
            'raw']
        output_data = download_file_from_url(input_url, output_path)
    return output_data
Exemplo n.º 7
0
def get_caf_alloc_foyers_bas_revenus(decoupage_geo=None, verbose=False):
    """Gets all CAF data about 'allocations foyers bas revenus'."""
    if decoupage_geo is None:
        decoupage_geo = 'commune'
    output_datas = {}
    for year in source_config.foyers_alloc_bas_revenus_url[decoupage_geo].keys(
    ):
        if verbose is True:
            print('Downloading CAF foyers allocations bas '
                  'revenus / commune / {}...'.format(year))
        input_url = source_config.foyers_alloc_bas_revenus_url[decoupage_geo][
            year]
        if decoupage_geo in ('commune', ):
            if input_url.endswith('csv'):
                output_path = source_config.foyers_alloc_bas_revenus_files[
                    decoupage_geo][year]['raw']
                output_data = download_file_from_url(input_url, output_path)
                output_datas[year] = output_data
    return output_datas
Exemplo n.º 8
0
def get_insee_diplome_formation(decoupage_geo=None, verbose=False):
    """Gets all INSEE data about 'diplome formation'."""
    if decoupage_geo is None:
        decoupage_geo = 'commune'
    output_datas = []
    for year in source_config.diplome_formations_url[decoupage_geo].keys():
        if verbose is True:
            print('Downloading INSEE diplome-formation / commune / {}...'.format(year))
        input_url = source_config.diplome_formations_url[decoupage_geo][year]
        if decoupage_geo in ('commune',):
            if input_url.endswith('xls') or input_url.endswith('xlsx'):
                output_path = source_config.diplome_formation_files[decoupage_geo][year]['raw']
                output_data = download_insee_excel(input_url, output_path)
            elif input_url.endswith('zip'):
                output_path = source_config.diplome_formation_files[decoupage_geo][year]['zip']
                output_zip = download_file_from_url(input_url, output_path)
                unzip_file(output_zip, source_config.insee_raw)
                output_data = source_config.diplome_formation_files[decoupage_geo][year]['raw']
        output_datas.append(output_data)
Exemplo n.º 9
0
def process_links(movie_folder, links):
    decompressed_files = []
    print('{0} files expected in folder {1}'.format(len(links), movie_folder))
    for link in links:
        sub_file_name = movie_folder + '/' + link.split('/')[-1].replace('.gz', '.srt')
        sub_data = None
        while not sub_data:
            print('Going to download: {0}'.format(link))
            try:
                sub_data = utils.download_file_from_url(link, lib=TorDownloader.get_anonymous_urllib())
            except urllib.error.HTTPError as exc:
                print('An error occurred while processing subtitle {0}: {1}'.format(sub_file_name, exc))
                if exc.code == 410:
                    print('Download limit reached: a new IP is required. Tor IP will be changed and operation retried.')
                    TorDownloader.refresh_ip()

        os.makedirs(movie_folder, exist_ok=True)
        print('Going to decompress: {0}\n'.format(sub_file_name))
        with open(sub_file_name, 'wb') as out_file:
            out_file.write(utils.decompress_gzip(sub_data))
        decompressed_files.append(sub_file_name)
    return decompressed_files
Exemplo n.º 10
0
    def install_mitmproxy_cert(self, mitmproxy_proc, browser_path):
        """Install the CA certificate generated by mitmproxy, into geckoview android
        If running locally:
        1. Will use the `certutil` tool from the local Firefox desktop build

        If running in production:
        1. Get the tooltools manifest file for downloading hostutils (contains certutil)
        2. Get the `certutil` tool by downloading hostutils using the tooltool manifest

        Then, both locally and in production:
        1. Create an NSS certificate database in the geckoview browser profile dir, only
           if it doesn't already exist. Use this certutil command:
           `certutil -N -d sql:<path to profile> --empty-password`
        2. Import the mitmproxy certificate into the database, i.e.:
           `certutil -A -d sql:<path to profile> -n "some nickname" -t TC,, -a -i <path to CA.pem>`
        """
        self.CERTUTIL_SLEEP = 10
        if self.config['run_local']:
            # when running locally, it is found in the Firefox desktop build (..obj../dist/bin)
            self.certutil = os.path.join(self.config['obj_path'], 'dist',
                                         'bin')
            os.environ['LD_LIBRARY_PATH'] = self.certutil
        else:
            # must download certutil inside hostutils via tooltool; use this manifest:
            # mozilla-central/testing/config/tooltool-manifests/linux64/hostutils.manifest
            # after it will be found here inside the worker/bitbar container:
            # /builds/worker/workspace/build/hostutils/host-utils-66.0a1.en-US.linux-x86_64
            LOG.info("downloading certutil binary (hostutils)")

            # get path to the hostutils tooltool manifest; was set earlier in
            # mozharness/configs/raptor/android_hw_config.py, to the path i.e.
            # mozilla-central/testing/config/tooltool-manifests/linux64/hostutils.manifest
            # the bitbar container is always linux64
            if os.environ.get('GECKO_HEAD_REPOSITORY', None) is None:
                LOG.critical('Abort: unable to get GECKO_HEAD_REPOSITORY')
                raise

            if os.environ.get('GECKO_HEAD_REV', None) is None:
                LOG.critical('Abort: unable to get GECKO_HEAD_REV')
                raise

            if os.environ.get('HOSTUTILS_MANIFEST_PATH', None) is not None:
                manifest_url = os.path.join(
                    os.environ['GECKO_HEAD_REPOSITORY'], "raw-file",
                    os.environ['GECKO_HEAD_REV'],
                    os.environ['HOSTUTILS_MANIFEST_PATH'])
            else:
                LOG.critical("Abort: unable to get HOSTUTILS_MANIFEST_PATH!")
                raise

            # first need to download the hostutils tooltool manifest file itself
            _dest = os.path.join(self.raptor_dir, 'hostutils.manifest')
            have_manifest = download_file_from_url(manifest_url, _dest)
            if not have_manifest:
                LOG.critical(
                    'failed to download the hostutils tooltool manifest')
                raise

            # now use the manifest to download hostutils so we can get certutil
            tooltool_download(_dest, self.config['run_local'], self.raptor_dir)

            # the production bitbar container host is always linux
            self.certutil = os.path.join(
                self.raptor_dir, 'host-utils-66.0a1.en-US.linux-x86_64')

            # must add hostutils/certutil to the path
            os.environ['LD_LIBRARY_PATH'] = self.certutil

        bin_suffix = mozinfo.info.get('bin_suffix', '')
        self.certutil = os.path.join(self.certutil, "certutil" + bin_suffix)

        if os.path.isfile(self.certutil):
            LOG.info("certutil is found at: %s" % self.certutil)
        else:
            LOG.critical("unable to find certutil at %s" % self.certutil)
            raise

        # DEFAULT_CERT_PATH has local path and name of mitmproxy cert i.e.
        # /home/cltbld/.mitmproxy/mitmproxy-ca-cert.cer
        self.local_cert_path = DEFAULT_CERT_PATH

        # check if the nss ca cert db already exists in the device profile
        LOG.info(
            "checking if the nss cert db already exists in the android browser profile"
        )
        param1 = "sql:%s/" % self.config['local_profile_dir']
        command = [self.certutil, '-d', param1, '-L']

        try:
            subprocess.check_output(command, env=os.environ.copy())
            LOG.info("the nss cert db already exists")
            cert_db_exists = True
        except subprocess.CalledProcessError:
            # this means the nss cert db doesn't exist yet
            LOG.info("nss cert db doesn't exist yet")
            cert_db_exists = False

        # try a forced pause between certutil cmds; possibly reduce later
        time.sleep(self.CERTUTIL_SLEEP)

        if not cert_db_exists:
            # create cert db if it doesn't already exist; it may exist already
            # if a previous pageload test ran in the same test suite
            param1 = "sql:%s/" % self.config['local_profile_dir']
            command = [
                self.certutil, '-N', '-v', '-d', param1, '--empty-password'
            ]

            LOG.info("creating nss cert database using command: %s" %
                     ' '.join(command))
            cmd_proc = subprocess.Popen(command, env=os.environ.copy())
            time.sleep(self.CERTUTIL_SLEEP)
            cmd_terminated = cmd_proc.poll()
            if cmd_terminated is None:  # None value indicates process hasn't terminated
                LOG.critical("nss cert db creation command failed to complete")
                raise

        # import mitmproxy cert into the db
        command = [
            self.certutil, '-A', '-d', param1, '-n', 'mitmproxy-cert', '-t',
            'TC,,', '-a', '-i', self.local_cert_path
        ]

        LOG.info("importing mitmproxy cert into db using command: %s" %
                 ' '.join(command))
        cmd_proc = subprocess.Popen(command, env=os.environ.copy())
        time.sleep(self.CERTUTIL_SLEEP)
        cmd_terminated = cmd_proc.poll()
        if cmd_terminated is None:  # None value indicates process hasn't terminated
            LOG.critical(
                "command to import mitmproxy cert into cert db failed to complete"
            )

        # cannot continue if failed to add CA cert to Firefox, need to check
        if not self.is_mitmproxy_cert_installed():
            LOG.error(
                "Aborting: failed to install mitmproxy CA cert into Firefox")
            self.stop_mitmproxy_playback()
            sys.exit()