Пример #1
0
    def _response_hook(self, response, *args, **kwargs):
        loglevel = log.getEffectiveLevel()

        if loglevel >= 10:
            # Log request at DEBUG severity
            request_hdrs = '\n'.join(
                f'{k}: {v}' for k, v in response.request.headers.items())
            request_log = textwrap.indent(
                f"-----------------------------------------\n"
                f"{response.request.method} {response.request.url}\n"
                f"{request_hdrs}\n"
                f"\n"
                f"{response.request.body}\n"
                f"-----------------------------------------", '\t')
            log.debug(f"HTTP request\n{request_log}")
        if loglevel >= 5:
            # Log response at super-DEBUG severity
            response_hdrs = '\n'.join(f'{k}: {v}'
                                      for k, v in response.headers.items())
            if kwargs.get('stream'):
                response_log = textwrap.indent(
                    f"-----------------------------------------\n"
                    f"{response.status_code} {response.reason} {response.url}\n"
                    f"{response_hdrs}\n"
                    "Streaming Data\n"
                    f"-----------------------------------------", '\t')
            else:
                response_log = textwrap.indent(
                    f"-----------------------------------------\n"
                    f"{response.status_code} {response.reason} {response.url}\n"
                    f"{response_hdrs}\n"
                    f"\n"
                    f"{response.text}\n"
                    f"-----------------------------------------", '\t')
            log.log(5, f"HTTP response\n{response_log}")
Пример #2
0
def to_cache(response, cache_file):
    log.debug("Caching data to {0}".format(cache_file))
    response = copy.deepcopy(response)
    if hasattr(response, 'request'):
        for key in tuple(response.request.hooks.keys()):
            del response.request.hooks[key]
    with open(cache_file, "wb") as f:
        pickle.dump(response, f)
Пример #3
0
    def query_surveys(self, surveys='', cache=True,
                      help=False, open_form=False, **kwargs):
        """
        Query survey Phase 3 data contained in the ESO archive.

        Parameters
        ----------
        survey : string or list
            Name of the survey(s) to query.  Should beone or more of the
            names returned by `~astroquery.eso.EsoClass.list_surveys`.  If
            specified as a string, should be a comma-separated list of
            survey names.
        cache : bool
            Cache the response for faster subsequent retrieval

        Returns
        -------
        table : `~astropy.table.Table` or `None`
            A table representing the data available in the archive for the
            specified survey, matching the constraints specified in ``kwargs``.
            The number of rows returned is capped by the ROW_LIMIT
            configuration item. `None` is returned when the query has no
            results.

        """

        url = "http://archive.eso.org/wdb/wdb/adp/phase3_main/form"
        if open_form:
            webbrowser.open(url)
        elif help:
            self._print_surveys_help(url, cache=cache)
        else:
            survey_form = self._request("GET", url, cache=cache)
            query_dict = kwargs
            query_dict["wdbo"] = "csv/download"
            if isinstance(surveys, str):
                surveys = surveys.split(",")
            query_dict['collection_name'] = surveys
            if self.ROW_LIMIT >= 0:
                query_dict["max_rows_returned"] = int(self.ROW_LIMIT)
            else:
                query_dict["max_rows_returned"] = 10000

            survey_response = self._activate_form(survey_form, form_index=0,
                                                  form_id='queryform',
                                                  inputs=query_dict, cache=cache)

            content = survey_response.content
            # First line is always garbage
            content = content.split(b'\n', 1)[1]
            log.debug("Response content:\n{0}".format(content))
            if _check_response(content):
                table = Table.read(BytesIO(content), format="ascii.csv",
                                   comment="^#")
                return table
            else:
                warnings.warn("Query returned no results", NoResultsWarning)
Пример #4
0
    def _parse_result(self, response, verbose=False):
        """
        Parses the results form the HTTP response to `~astropy.table.Table`.

        Parameters
        ----------
        response : `requests.Response`
            The HTTP response object
        verbose : bool, optional
            Defaults to `False`. When true it will display warnings whenever
            the VOtable returned from the Service doesn't conform to the
            standard.

        Returns
        -------
        table : `~astropy.table.Table`
        """
        if not verbose:
            commons.suppress_vo_warnings()

        content = response.text
        log.debug(content)

        # Check if results were returned
        if 'The catalog is not in the list' in content:
            raise Exception("Catalogue not found")

        # Check that object name was not malformed
        if 'Either wrong or missing coordinate/object name' in content:
            raise Exception("Malformed coordinate/object name")

        # Check that the results are not of length zero
        if len(content) == 0:
            raise Exception("The LCOGT server sent back an empty reply")

        # Read it in using the astropy VO table reader
        try:
            first_table = votable.parse(six.BytesIO(response.content),
                                        pedantic=False).get_first_table()
        except Exception as ex:
            self.response = response
            self.table_parse_error = ex
            raise TableParseError("Failed to parse LCOGT votable! The raw "
                                  " response can be found in self.response,"
                                  " and the error in self.table_parse_error.")

        # Convert to astropy.table.Table instance
        table = first_table.to_table()

        # Check if table is empty
        if len(table) == 0:
            warnings.warn(
                "Query returned no results, so the table will "
                "be empty", NoResultsWarning)

        return table
Пример #5
0
 def from_cache(self, cache_location):
     request_file = self.request_file(cache_location)
     try:
         with open(request_file, "rb") as f:
             response = pickle.load(f)
         if not isinstance(response, requests.Response):
             response = None
     except IOError:  # TODO: change to FileNotFoundError once drop py2 support
         response = None
     if response:
         log.debug("Retrieving data from {0}".format(request_file))
     return response
Пример #6
0
    def query_tap(self, query, maxrec=None):
        """
        Send query to the ALMA TAP. Results in pyvo.dal.TapResult format.
        result.table in Astropy table format

        Parameters
        ----------
        maxrec : int
            maximum number of records to return

        """
        log.debug('TAP query: {}'.format(query))
        return self.tap.search(query, language='ADQL', maxrec=maxrec)
Пример #7
0
    def _query(self,
               url,
               column_filters={},
               columns=[],
               open_form=False,
               help=False,
               cache=True,
               **kwargs):

        table = None
        if open_form:
            webbrowser.open(url)
        elif help:
            self._print_query_help(url)
        else:
            instrument_form = self._request("GET", url, cache=cache)
            query_dict = {}
            query_dict.update(column_filters)
            # TODO: replace this with individually parsed kwargs
            query_dict.update(kwargs)
            query_dict["wdbo"] = "csv/download"

            # Default to returning the DP.ID since it is needed for header
            # acquisition
            query_dict['tab_dp_id'] = kwargs.pop('tab_dp_id', 'on')

            for k in columns:
                query_dict["tab_" + k] = True
            if self.ROW_LIMIT >= 0:
                query_dict["max_rows_returned"] = int(self.ROW_LIMIT)
            else:
                query_dict["max_rows_returned"] = 10000
            # used to be form 0, but now there's a new 'logout' form at the top
            # (form_index = -1 and 0 both work now that form_id is included)
            instrument_response = self._activate_form(instrument_form,
                                                      form_index=-1,
                                                      form_id='queryform',
                                                      inputs=query_dict,
                                                      cache=cache)

            content = instrument_response.content
            # First line is always garbage
            content = content.split(b'\n', 1)[1]
            log.debug("Response content:\n{0}".format(content))
            if _check_response(content):
                table = Table.read(BytesIO(content),
                                   format="ascii.csv",
                                   comment='^#')
                return table
            else:
                warnings.warn("Query returned no results", NoResultsWarning)
Пример #8
0
    def get_images(self,
                   coordinates,
                   radius,
                   collection=None,
                   get_url_list=False,
                   show_progress=False):
        """
        A coordinate-based query function that returns a list of
        fits files with cutouts around the passed in coordinates.

        Parameters
        ----------
        coordinates : str or `astropy.coordinates`.
            Coordinates around which to query.
        radius : str or `astropy.units.Quantity`
            The radius of the cone search AND cutout area.
        collection : str, optional
            Name of the CADC collection to query.
        get_url_list : bool, optional
            If ``True``, returns the list of data urls rather than
            the downloaded FITS files. Default is ``False``.
        show_progress : bool, optional
            Whether to display a progress bar if the file is downloaded
            from a remote server.  Default is ``False``.

        Returns
        -------
        list : A list of `~astropy.io.fits.HDUList` objects (or a list of
        str if returning urls).
        """

        filenames = self.get_images_async(coordinates, radius, collection,
                                          get_url_list, show_progress)

        if get_url_list:
            return filenames

        images = []

        for fn in filenames:
            try:
                images.append(fn.get_fits())
            except requests.exceptions.HTTPError as err:
                # Catch HTTPError if user is unauthorized to access file
                log.debug("{} - Problem retrieving the file: {}".format(
                    str(err), str(err.url)))
                pass

        return images
Пример #9
0
    def _HEADER_data_size(self, files):
        """
        Given a list of file URLs, return the data size.  This is useful for
        assessing how much data you might be downloading!
        (This is discouraged by the ALMA archive, as it puts unnecessary load
        on their system)
        """
        totalsize = 0 * u.B
        data_sizes = {}
        pb = ProgressBar(len(files))
        for index, fileLink in enumerate(files):
            response = self._request('HEAD', fileLink, stream=False,
                                     cache=False, timeout=self.TIMEOUT)
            filesize = (int(response.headers['content-length']) * u.B).to(u.GB)
            totalsize += filesize
            data_sizes[fileLink] = filesize
            log.debug("File {0}: size {1}".format(fileLink, filesize))
            pb.update(index + 1)
            response.raise_for_status()

        return data_sizes, totalsize.to(u.GB)
Пример #10
0
def get_mockreturn(self, method, url, data=None, timeout=10,
                   files=None, params=None, headers=None, **kwargs):
    log.debug("get_mockreturn url:{} params:{} kwargs:{}".format(url, params, kwargs))
    if kwargs and 'auth' in kwargs:
        auth = kwargs['auth']
        if auth and (auth[0] != 'user' or auth[1] != 'password'):
            log.debug("Rejecting credentials")
            return create_auth_failure_response()

    if 'data/async' in str(url):
        # Responses for an asynchronous SODA job
        if str(url).endswith('data/async'):
            self.first_job_pass = True
            return create_soda_create_response('111-000-111-000')
        elif str(url).endswith('/phase') and method == 'POST':
            key = "RUN_JOB"
        elif str(url).endswith('111-000-111-000') and method == 'GET':
            key = "RUN_JOB" if self.first_job_pass else "COMPLETED_JOB"
            self.first_job_pass = False
        else:
            raise ValueError("Unexpected SODA async {} call to url {}".format(method, url))
    elif 'datalink' in str(url):
        if 'cube-244' in str(url):
            key = 'DATALINK'
        else:
            key = 'DATALINK_NOACCESS'
    else:
        key = params['POS'].split()[0] if params['POS'] else None
    filename = data_path(DATA_FILES[key])
    log.debug('providing ' + filename)
    content = open(filename, 'rb').read()
    return MockResponse(content)
Пример #11
0
def pyregion_subset(region, data, mywcs):
    """
    Return a subset of an image (``data``) given a region.

    Parameters
    ----------
    region : `~pyregion.Shape`
        A Shape from a pyregion-parsed region file
    data : np.ndarray
        An array with shape described by WCS
    mywcs : `astropy.wcs.WCS`
        A world coordinate system describing the data
    """
    import pyregion

    shapelist = pyregion.ShapeList([region])
    if shapelist[0].coord_format not in ('physical', 'image'):
        celhdr = mywcs.sub([wcs.WCSSUB_CELESTIAL]).to_header()
        pixel_regions = shapelist.as_imagecoord(celhdr)
    else:
        # For this to work, we'd need to change the reference pixel after
        # cropping.  Alternatively, we can just make the full-sized
        # mask... todo....
        raise NotImplementedError("Can't use non-celestial coordinates "
                                  "with regions.")
        pixel_regions = shapelist

    # This is a hack to use mpl to determine the outer bounds of the regions
    # (but it's a legit hack - pyregion needs a major internal refactor
    # before we can approach this any other way, I think -AG)
    mpl_objs = pixel_regions.get_mpl_patches_texts()[0]

    # Find the minimal enclosing box containing all of the regions
    # (this will speed up the mask creation below)
    extent = mpl_objs[0].get_extents()
    xlo, ylo = extent.min
    xhi, yhi = extent.max
    all_extents = [obj.get_extents() for obj in mpl_objs]
    for ext in all_extents:
        xlo = int(np.round(xlo if xlo < ext.min[0] else ext.min[0]))
        ylo = int(np.round(ylo if ylo < ext.min[1] else ext.min[1]))
        xhi = int(np.round(xhi if xhi > ext.max[0] else ext.max[0]))
        yhi = int(np.round(yhi if yhi > ext.max[1] else ext.max[1]))

    log.debug("Region boundaries: ")
    log.debug("xlo={xlo}, ylo={ylo}, xhi={xhi}, yhi={yhi}".format(xlo=xlo,
                                                                  ylo=ylo,
                                                                  xhi=xhi,
                                                                  yhi=yhi))

    subwcs = mywcs[int(ylo):int(yhi), int(xlo):int(xhi)]
    subhdr = subwcs.sub([wcs.WCSSUB_CELESTIAL]).to_header()
    subdata = data[int(ylo):int(yhi), int(xlo):int(xhi)]

    mask = shapelist.get_mask(header=subhdr, shape=subdata.shape)
    log.debug("Shapes: data={0}, subdata={2}, mask={1}".format(
        data.shape, mask.shape, subdata.shape))
    return (xlo, xhi, ylo, yhi), mask
Пример #12
0
 def _response_hook(self, response, *args, **kwargs):
     # Log request at INFO severity
     request_hdrs = '\n'.join(f'{k}: {v}'
                              for k, v in response.request.headers.items())
     request_log = textwrap.indent(
         f"-----------------------------------------\n"
         f"{response.request.method} {response.request.url}\n"
         f"{request_hdrs}\n"
         f"\n"
         f"{response.request.body}\n"
         f"-----------------------------------------", '\t')
     log.debug(f"HTTP request\n{request_log}")
     # Log response at DEBUG severity
     response_hdrs = '\n'.join(f'{k}: {v}'
                               for k, v in response.headers.items())
     response_log = textwrap.indent(
         f"-----------------------------------------\n"
         f"{response.status_code} {response.reason} {response.url}\n"
         f"{response_hdrs}\n"
         f"\n"
         f"{response.text}\n"
         f"-----------------------------------------", '\t')
     log.log(5, f"HTTP response\n{response_log}")
Пример #13
0
def get_mockreturn(self,
                   method,
                   url,
                   data=None,
                   timeout=10,
                   files=None,
                   params=None,
                   headers=None,
                   **kwargs):
    log.debug("get_mockreturn url:{} params:{} kwargs:{}".format(
        url, params, kwargs))
    if kwargs and 'auth' in kwargs:
        auth = kwargs['auth']
        if auth and (auth[0] != USERNAME or auth[1] != PASSWORD):
            log.debug("Rejecting credentials")
            return create_auth_failure_response()

    if 'data/async' in str(url):
        # Responses for an asynchronous SODA job
        if str(url).endswith('data/async'):
            self.first_job_pass = True
            self.completed_job_key = "COMPLETED_JOB"
            return create_soda_create_response('111-000-111-000')
        elif str(url).endswith('/phase') and method == 'POST':
            key = "RUN_JOB"
        elif str(url).endswith(
                '111-000-111-000/parameters') and method == 'POST':
            assert "POS" in data
            print(data['POS'])
            pos_parts = data['POS'].split(' ')
            assert len(pos_parts) == 4
            self.completed_job_key = 'cutout_{}_{:.4f}_{:.4f}_{:.4f}'.format(
                pos_parts[0], float(pos_parts[1]), float(pos_parts[2]),
                float(pos_parts[3]))
            return create_soda_create_response('111-000-111-000')
        elif str(url).endswith('111-000-111-000') and method == 'GET':
            key = "RUN_JOB" if self.first_job_pass else self.completed_job_key
            self.first_job_pass = False
        else:
            raise ValueError("Unexpected SODA async {} call to url {}".format(
                method, url))
    elif 'datalink' in str(url):
        if 'cube-244' in str(url):
            key = 'DATALINK'
        else:
            key = 'DATALINK_NOACCESS'
    elif str(
            url) == 'https://data.csiro.au/casda_vo_proxy/vo/tap/availability':
        key = 'AVAILABILITY'
    else:
        key = params['POS'].split()[0] if params['POS'] else None
    filename = data_path(DATA_FILES[key])
    log.debug('providing ' + filename)
    content = open(filename, 'rb').read()
    return MockResponse(content)
Пример #14
0
def pyregion_subset(region, data, mywcs):
    """
    Return a subset of an image (`data`) given a region.
    """
    shapelist = pyregion.ShapeList([region])
    if shapelist[0].coord_format not in ('physical', 'image'):
        # Requires astropy >0.4...
        # pixel_regions = shapelist.as_imagecoord(self.wcs.celestial.to_header())
        # convert the regions to image (pixel) coordinates
        celhdr = mywcs.sub([wcs.WCSSUB_CELESTIAL]).to_header()
        pixel_regions = shapelist.as_imagecoord(celhdr)
    else:
        # For this to work, we'd need to change the reference pixel after cropping.
        # Alternatively, we can just make the full-sized mask... todo....
        raise NotImplementedError(
            "Can't use non-celestial coordinates with regions.")
        pixel_regions = shapelist

    # This is a hack to use mpl to determine the outer bounds of the regions
    # (but it's a legit hack - pyregion needs a major internal refactor
    # before we can approach this any other way, I think -AG)
    mpl_objs = pixel_regions.get_mpl_patches_texts()[0]

    # Find the minimal enclosing box containing all of the regions
    # (this will speed up the mask creation below)
    extent = mpl_objs[0].get_extents()
    xlo, ylo = extent.min
    xhi, yhi = extent.max
    all_extents = [obj.get_extents() for obj in mpl_objs]
    for ext in all_extents:
        xlo = int(xlo if xlo < ext.min[0] else ext.min[0])
        ylo = int(ylo if ylo < ext.min[1] else ext.min[1])
        xhi = int(xhi if xhi > ext.max[0] else ext.max[0])
        yhi = int(yhi if yhi > ext.max[1] else ext.max[1])

    log.debug("Region boundaries: ")
    log.debug("xlo={xlo}, ylo={ylo}, xhi={xhi}, yhi={yhi}".format(xlo=xlo,
                                                                  ylo=ylo,
                                                                  xhi=xhi,
                                                                  yhi=yhi))

    subwcs = mywcs[ylo:yhi, xlo:xhi]
    subhdr = subwcs.sub([wcs.WCSSUB_CELESTIAL]).to_header()
    subdata = data[ylo:yhi, xlo:xhi]

    mask = shapelist.get_mask(header=subhdr, shape=subdata.shape)
    log.debug("Shapes: data={0}, subdata={2}, mask={1}".format(
        data.shape, mask.shape, subdata.shape))
    return (xlo, xhi, ylo, yhi), mask
Пример #15
0
    def _parse_kwargs(self, min_frequency=None, max_frequency=None,
                      band='any', top20=None, chemical_name=None,
                      chem_re_flags=0, energy_min=None, energy_max=None,
                      energy_type=None, intensity_lower_limit=None,
                      intensity_type=None, transition=None, version=None,
                      exclude=None,
                      only_astronomically_observed=None,
                      only_NRAO_recommended=None,
                      line_lists=None, line_strengths=None, energy_levels=None,
                      export=None, export_limit=None, noHFS=None,
                      displayHFS=None, show_unres_qn=None,
                      show_upper_degeneracy=None, show_molecule_tag=None,
                      show_qn_code=None, show_lovas_labref=None,
                      show_lovas_obsref=None, show_orderedfreq_only=None,
                      show_nrao_recommended=None,
                      parse_chemistry_locally=True):
        """
        The Splatalogue service returns lines with rest frequencies in the
        range [min_frequency, max_frequency].

        Parameters
        ----------
        min_frequency : `astropy.units`
            Minimum frequency (or any spectral() equivalent)
        max_frequency : `astropy.units`
            Maximum frequency (or any spectral() equivalent)
        band : str
            The observing band.  If it is not 'any', it overrides
            minfreq/maxfreq.
        top20: str
            One of ``'comet'``, ``'planet'``, ``'top20'``, ``'ism_hotcore'``,
            ``'ism_darkcloud'``, ``'ism_diffusecloud'``.
            Overrides chemical_name
        chemical_name : str
            Name of the chemical to search for. Treated as a regular
            expression.  An empty set ('', (), [], {}) will match *any*
            species. Examples:

            ``'H2CO'`` - 13 species have H2CO somewhere in their formula.

            ``'Formaldehyde'`` - There are 8 isotopologues of Formaldehyde
                                 (e.g., H213CO).

            ``'formaldehyde'`` - Thioformaldehyde,Cyanoformaldehyde.

            ``'formaldehyde',chem_re_flags=re.I`` - Formaldehyde,thioformaldehyde,
                                                    and Cyanoformaldehyde.

            ``' H2CO '`` - Just 1 species, H2CO. The spaces prevent including
                           others.
        parse_chemistry_locally : bool
            Attempt to determine the species ID #'s locally before sending the
            query?  This will prevent queries that have no matching species.
            It also performs a more flexible regular expression match to the
            species IDs.  See the examples in `get_species_ids`
        chem_re_flags : int
            See the `re` module
        energy_min : `None` or float
            Energy range to include.  See energy_type
        energy_max : `None` or float
            Energy range to include.  See energy_type
        energy_type : ``'el_cm1'``, ``'eu_cm1'``, ``'eu_k'``, ``'el_k'``
            Type of energy to restrict.  L/U for lower/upper state energy,
            cm/K for *inverse* cm, i.e. wavenumber, or K for Kelvin
        intensity_lower_limit : `None` or float
            Lower limit on the intensity.  See intensity_type
        intensity_type : `None` or ``'sij'``, ``'cdms_jpl'``, ``'aij'``
            The type of intensity on which to place a lower limit
        transition : str
            e.g. 1-0
        version : ``'v1.0'``, ``'v2.0'``, ``'v3.0'`` or ``'vall'``
            Data version
        exclude : list
            Types of lines to exclude.  Default is:
            (``'potential'``, ``'atmospheric'``, ``'probable'``)
            Can also exclude ``'known'``.
            To exclude nothing, use 'none', not the python object None, since
            the latter is meant to indicate 'leave as default'
        only_astronomically_observed : bool
            Show only astronomically observed species?
        only_NRAO_recommended : bool
            Show only NRAO recommended species?
        line_lists : list
            Options:
            Lovas, SLAIM, JPL, CDMS, ToyoMA, OSU, Recomb, Lisa, RFI
        line_strengths : list
            * CDMS/JPL Intensity : ls1
            * Sij : ls3
            * Aij : ls4
            * Lovas/AST : ls5
        energy_levels : list
            * E_lower (cm^-1) : el1
            * E_lower (K) : el2
            * E_upper (cm^-1) : el3
            * E_upper (K) : el4
        export : bool
            Set up arguments for the export server (as opposed to the HTML
            server)?
        export_limit : int
            Maximum number of lines in output file
        noHFS : bool
            No HFS Display
        displayHFS : bool
            Display HFS Intensity
        show_unres_qn : bool
            Display Unresolved Quantum Numbers
        show_upper_degeneracy : bool
            Display Upper State Degeneracy
        show_molecule_tag : bool
            Display Molecule Tag
        show_qn_code : bool
            Display Quantum Number Code
        show_lovas_labref : bool
            Display Lab Ref
        show_lovas_obsref : bool
            Display Obs Ref
        show_orderedfreq_only : bool
            Display Ordered Frequency ONLY
        show_nrao_recommended : bool
            Display NRAO Recommended Frequencies

        Returns
        -------
        payload : dict
            Dictionary of the parameters to send to the SPLAT page

        """

        payload = {'submit': 'Search',
                   'frequency_units': 'GHz',
                   }

        if band != 'any':
            if band not in self.FREQUENCY_BANDS:
                raise ValueError("Invalid frequency band.")
            if min_frequency is not None or max_frequency is not None:
                warnings.warn("Band was specified, so the frequency "
                              "specification is overridden")
            payload['band'] = band
        elif min_frequency is not None and max_frequency is not None:
            # allow setting payload without having *ANY* valid frequencies set
            min_frequency = min_frequency.to(u.GHz, u.spectral())
            max_frequency = max_frequency.to(u.GHz, u.spectral())
            if min_frequency > max_frequency:
                min_frequency, max_frequency = max_frequency, min_frequency

            payload['from'] = min_frequency.value
            payload['to'] = max_frequency.value

        if top20 is not None:
            if top20 in self.TOP20_LIST:
                payload['top20'] = top20
            else:
                raise ValueError("Top20 is not one of the allowed values")
        elif chemical_name in ('', {}, (), [], set()):
            # include all
            payload['sid[]'] = []
        elif chemical_name is not None:
            if parse_chemistry_locally:
                species_ids = self.get_species_ids(chemical_name, chem_re_flags)
                if len(species_ids) == 0:
                    raise ValueError("No matching chemical species found.")
                payload['sid[]'] = list(species_ids.values())
            else:
                payload['chemical_name'] = chemical_name

        if energy_min is not None:
            payload['energy_range_from'] = float(energy_min)
        if energy_max is not None:
            payload['energy_range_to'] = float(energy_max)
        if energy_type is not None:
            validate_energy_type(energy_type)
            payload['energy_range_type'] = energy_type

        if intensity_type is not None:
            payload['lill'] = 'lill_' + intensity_type
            if intensity_lower_limit is not None:
                payload[payload['lill']] = intensity_lower_limit

        if transition is not None:
            payload['tran'] = transition

        if version in self.versions:
            payload['data_version'] = version
        elif version is not None:
            raise ValueError("Invalid version specified.  Allowed versions "
                             "are {vers}".format(vers=str(self.versions)))

        if exclude == 'none':
            for e in ('potential', 'atmospheric', 'probable', 'known'):
                # Setting a keyword value to 'None' removes it (see query_lines_async)
                log.debug("Setting no_{0} to None".format(e))
                payload['no_' + e] = None
        elif exclude is not None:
            for e in exclude:
                payload['no_' + e] = 'no_' + e

        if only_astronomically_observed:
            payload['include_only_observed'] = 'include_only_observed'
        if only_NRAO_recommended:
            payload['include_only_nrao'] = 'include_only_nrao'

        if line_lists is not None:
            if type(line_lists) not in (tuple, list):
                raise TypeError("Line lists should be a list of linelist "
                                "names.  See Splatalogue.ALL_LINE_LISTS")
            for L in self.ALL_LINE_LISTS:
                kwd = 'display' + L
                if L in line_lists:
                    payload[kwd] = kwd
                else:
                    payload[kwd] = ''

        if line_strengths is not None:
            for LS in line_strengths:
                payload[LS] = LS

        if energy_levels is not None:
            for EL in energy_levels:
                payload[EL] = EL

        for b in ("noHFS", "displayHFS", "show_unres_qn",
                  "show_upper_degeneracy", "show_molecule_tag",
                  "show_qn_code", "show_lovas_labref",
                  "show_orderedfreq_only", "show_lovas_obsref",
                  "show_nrao_recommended"):
            if locals()[b]:
                payload[b] = b

        # default arg, unmodifiable...
        payload['jsMath'] = 'font:symbol,warn:0'
        payload['__utma'] = ''
        payload['__utmc'] = ''

        if export:
            payload['submit'] = 'Export'
            payload['export_delimiter'] = 'colon'  # or tab or comma
            payload['export_type'] = 'current'
            payload['offset'] = 0
            payload['range'] = 'on'

        if export_limit is not None:
            payload['limit'] = export_limit
        else:
            payload['limit'] = self.LINES_LIMIT

        return payload
Пример #16
0
def make_finder_chart_from_image_and_catalog(
    image,
    catalog,
    save_prefix,
    alma_kwargs={
        'public': False,
        'science': False
    },
    bands=(3, 4, 5, 6, 7, 8, 9, 10),
    private_band_colors=(
        'maroon',
        'red',
        'orange',
        'coral',
        'brown',
        'yellow',
        'mediumorchid',
        'palegoldenrod',
    ),
    public_band_colors=(
        'blue',
        'cyan',
        'green',
        'turquoise',
        'teal',
        'darkslategrey',
        'chartreuse',
        'lime',
    ),
    integration_time_contour_levels=np.logspace(0, 5, base=2, num=6),
    save_masks=False,
    use_saved_masks=False,
    linewidth=1,
):
    """
    Create a "finder chart" showing where ALMA has pointed in various bands,
    including different color coding for public/private data and each band.

    Contours are set at various integration times.

    Parameters
    ----------
    image : fits.PrimaryHDU or fits.ImageHDU object
        The image to overlay onto
    catalog : astropy.Table object
        The catalog of ALMA observations
    save_prefix : str
        The prefix for the output files.  Both .reg and .png files will be
        written.  The .reg files will have the band numbers and
        public/private appended, while the .png file will be named
        prefix_almafinderchart.png
    alma_kwargs : dict
        Keywords to pass to the ALMA archive when querying.
    private_band_colors / public_band_colors : tuple
        A tuple or list of colors to be associated with private/public
        observations in the various bands
    integration_time_contour_levels : list or np.array
        The levels at which to draw contours in units of seconds.  Default is
        log-spaced (2^n) seconds: [  1.,   2.,   4.,   8.,  16.,  32.])
    """
    import aplpy

    import pyregion

    all_bands = bands
    bands = used_bands = [band for band in np.unique(catalog['band_list'])]
    log.info("The bands used include: {0}".format(used_bands))
    band_colors_priv = dict(zip(all_bands, private_band_colors))
    band_colors_pub = dict(zip(all_bands, public_band_colors))
    log.info("Color map private: {0}".format(band_colors_priv))
    log.info("Color map public: {0}".format(band_colors_pub))

    if use_saved_masks:
        hit_mask_public = {}
        hit_mask_private = {}

        for band in bands:
            pubfile = '{0}_band{1}_public.fits'.format(save_prefix, band)
            if os.path.exists(pubfile):
                hit_mask_public[band] = fits.getdata(pubfile)
            privfile = '{0}_band{1}_private.fits'.format(save_prefix, band)
            if os.path.exists(privfile):
                hit_mask_private[band] = fits.getdata(privfile)

    else:
        today = np.datetime64('today')

        # At least temporarily obsolete
        # private_circle_parameters = {
        #     band: [(row['RA'], row['Dec'], np.mean(rad).to(u.deg).value)
        #            for row, rad in zip(catalog, primary_beam_radii)
        #            if not row['Release date'] or
        #            (np.datetime64(row['Release date']) > today and row['Band'] == band)]
        #     for band in bands}

        # public_circle_parameters = {
        #     band: [(row['RA'], row['Dec'], np.mean(rad).to(u.deg).value)
        #            for row, rad in zip(catalog, primary_beam_radii)
        #            if row['Release date'] and
        #            (np.datetime64(row['Release date']) <= today and row['Band'] == band)]
        #     for band in bands}

        # unique_private_circle_parameters = {
        #     band: np.array(list(set(private_circle_parameters[band])))
        #     for band in bands}
        # unique_public_circle_parameters = {
        #     band: np.array(list(set(public_circle_parameters[band])))
        #     for band in bands}

        release_dates = np.array(catalog['obs_release_date'],
                                 dtype=np.datetime64)

        for band in bands:
            log.info("BAND {0}".format(band))
            privrows = sum((catalog['band_list'] == band)
                           & (release_dates > today))
            pubrows = sum((catalog['band_list'] == band)
                          & (release_dates <= today))
            log.info("PUBLIC:  Number of rows: {0}".format(pubrows, ))
            log.info("PRIVATE: Number of rows: {0}.".format(privrows))

        log.debug('Creating regions')
        prv_regions = {
            band: pyregion.ShapeList([
                add_meta_to_reg(fp, {'integration': row['t_exptime']})
                for row in catalog for fp in footprint_to_reg(row['s_region'])
                if (not row['obs_release_date']) or (
                    np.datetime64(row['obs_release_date']) > today
                    and row['band_list'] == band)
            ])
            for band in bands
        }
        pub_regions = {
            band: pyregion.ShapeList([
                add_meta_to_reg(fp, {'integration': row['t_exptime']})
                for row in catalog for fp in footprint_to_reg(row['s_region'])
                if row['obs_release_date'] and (
                    np.datetime64(row['obs_release_date']) <= today
                    and row['band_list'] == band)
            ])
            for band in bands
        }

        log.debug('Creating masks')
        prv_mask = {
            band:
            fits.PrimaryHDU(prv_regions[band].get_mask(image).astype('int'),
                            header=image.header)
            for band in bands if prv_regions[band]
        }
        pub_mask = {
            band:
            fits.PrimaryHDU(pub_regions[band].get_mask(image).astype('int'),
                            header=image.header)
            for band in bands if pub_regions[band]
        }

        hit_mask_public = {
            band: np.zeros_like(image.data)
            for band in pub_mask
        }
        hit_mask_private = {
            band: np.zeros_like(image.data)
            for band in prv_mask
        }
        mywcs = wcs.WCS(image.header)

        for band in bands:
            log.debug(
                'Adding integration-scaled masks for Band: {0}'.format(band))

            shapes = prv_regions[band]
            for shape in shapes:
                # private: release_date = 'sometime' says when it will be released
                (xlo, xhi, ylo,
                 yhi), mask = pyregion_subset(shape, hit_mask_private[band],
                                              mywcs)
                log.debug("{0},{1},{2},{3}: {4}".format(
                    xlo, xhi, ylo, yhi, mask.sum()))
                hit_mask_private[band][
                    ylo:yhi, xlo:xhi] += shape.meta['integration'] * mask

            if save_masks:
                shapes.write('{0}_band{1}_private.reg'.format(
                    save_prefix, band))

            shapes = pub_regions[band]
            for shape in shapes:
                # public: release_date = '' should mean already released
                (xlo, xhi, ylo,
                 yhi), mask = pyregion_subset(shape, hit_mask_public[band],
                                              mywcs)
                log.debug("{0},{1},{2},{3}: {4}".format(
                    xlo, xhi, ylo, yhi, mask.sum()))
                hit_mask_public[band][
                    ylo:yhi, xlo:xhi] += shape.meta['integration'] * mask

            if save_masks:
                shapes.write('{0}_band{1}_public.reg'.format(
                    save_prefix, band))

        if save_masks:
            for band in bands:
                if band in hit_mask_public:
                    if hit_mask_public[band].any():
                        hdu = fits.PrimaryHDU(data=hit_mask_public[band],
                                              header=image.header)
                        hdu.writeto('{0}_band{1}_public.fits'.format(
                            save_prefix, band),
                                    clobber=True)
                if band in hit_mask_private:
                    if hit_mask_private[band].any():
                        hdu = fits.PrimaryHDU(data=hit_mask_private[band],
                                              header=image.header)
                        hdu.writeto('{0}_band{1}_private.fits'.format(
                            save_prefix, band),
                                    clobber=True)

    fig = aplpy.FITSFigure(fits.HDUList(image), convention='calabretta')
    fig.show_grayscale(stretch='arcsinh', vmid=np.nanmedian(image.data))
    for band in bands:
        if band in hit_mask_public:
            if hit_mask_public[band].any():
                fig.show_contour(fits.PrimaryHDU(data=hit_mask_public[band],
                                                 header=image.header),
                                 levels=integration_time_contour_levels,
                                 colors=[band_colors_pub[int(band)]] *
                                 len(integration_time_contour_levels),
                                 linewidth=linewidth,
                                 convention='calabretta')
        if band in hit_mask_private:
            if hit_mask_private[band].any():
                fig.show_contour(fits.PrimaryHDU(data=hit_mask_private[band],
                                                 header=image.header),
                                 levels=integration_time_contour_levels,
                                 colors=[band_colors_priv[int(band)]] *
                                 len(integration_time_contour_levels),
                                 linewidth=linewidth,
                                 convention='calabretta')

    fig.save('{0}_almafinderchart.png'.format(save_prefix))

    return image, catalog, hit_mask_public, hit_mask_private
Пример #17
0
def to_cache(response, cache_file):
    log.debug("Caching data to {0}".format(cache_file))
    with open(cache_file, "wb") as f:
        pickle.dump(response, f)
Пример #18
0
def parse_lamda_lines(data):
    """
    Extract a LAMDA datafile into a dictionary of tables

    (non-pythonic!  more like, fortranic)
    """

    meta_rad = {}
    meta_mol = {}
    meta_coll = {}
    levels = []
    radtrans = []
    collider = None
    ncolltrans = None
    for ii, line in enumerate(data):
        if line[0] == '!':
            continue
        if 'molecule' not in meta_mol:
            meta_mol['molecule'] = _cln(line)
            continue
        if 'molwt' not in meta_mol:
            meta_mol['molwt'] = float(_cln(line))
            continue
        if 'nenergylevels' not in meta_mol:
            meta_mol['nenergylevels'] = int(_cln(line))
            continue
        if len(levels) < meta_mol['nenergylevels']:
            lev, en, wt = _cln(line).split()[:3]
            jul = " ".join(_cln(line).split()[3:])
            levels.append([int(lev), float(en), int(float(wt)), jul])
            continue
        if 'radtrans' not in meta_rad:
            meta_rad['radtrans'] = int(_cln(line))
            continue
        if len(radtrans) < meta_rad['radtrans']:
            # Can have wavenumber at the end.  Ignore that.
            trans, up, low, aval, freq, eu = _cln(line).split()[:6]
            radtrans.append([
                int(trans),
                int(up),
                int(low),
                float(aval),
                float(freq),
                float(eu)
            ])
            continue
        if 'ncoll' not in meta_coll:
            meta_coll['ncoll'] = int(_cln(line))
            collrates = {}
            continue
        if collider is None:
            collider = int(line[0])
            collname = collider_ids[collider]
            collrates[collider] = []
            meta_coll[collname] = {
                'collider': collname,
                'collider_id': collider
            }
            continue
        if ncolltrans is None:
            ncolltrans = int(_cln(line))
            meta_coll[collname]['ntrans'] = ncolltrans
            continue
        if 'ntemp' not in meta_coll[collname]:
            meta_coll[collname]['ntemp'] = int(_cln(line))
            continue
        if 'temperatures' not in meta_coll[collname]:
            meta_coll[collname]['temperatures'] = [
                int(float(x)) for x in _cln(line).split()
            ]
            continue
        if len(collrates[collider]) < meta_coll[collname]['ntrans']:
            trans, up, low = [int(x) for x in _cln(line).split()[:3]]
            temperatures = [float(x) for x in _cln(line).split()[3:]]
            collrates[collider].append([trans, up, low] + temperatures)
        if len(collrates[collider]) == meta_coll[collname]['ntrans']:
            # meta_coll[collider_ids[collider]+'_collrates'] = collrates
            log.debug("{ii} Finished loading collider {0:d}: "
                      "{1}".format(collider, collider_ids[collider], ii=ii))
            collider = None
            ncolltrans = None
            if len(collrates) == meta_coll['ncoll']:
                # All done!
                break

    if len(levels[0]) == 4:
        mol_table_names = ['Level', 'Energy', 'Weight', 'J']
    elif len(levels[0]) == 5:
        mol_table_names = ['Level', 'Energy', 'Weight', 'J', 'F']
    else:
        raise ValueError("Unrecognized levels structure.")
    mol_table_columns = [
        table.Column(name=name, data=data)
        for name, data in zip(mol_table_names, zip(*levels))
    ]
    mol_table = table.Table(data=mol_table_columns, meta=meta_mol)

    rad_table_names = [
        'Transition', 'Upper', 'Lower', 'EinsteinA', 'Frequency', 'E_u(K)'
    ]
    rad_table_columns = [
        table.Column(name=name, data=data)
        for name, data in zip(rad_table_names, zip(*radtrans))
    ]
    rad_table = table.Table(data=rad_table_columns, meta=meta_rad)

    coll_tables = {collider_ids[collider]: None for collider in collrates}
    for collider in collrates:
        collname = collider_ids[collider]
        coll_table_names = (['Transition', 'Upper', 'Lower'] + [
            'C_ij(T={0:d})'.format(tem)
            for tem in meta_coll[collname]["temperatures"]
        ])
        coll_table_columns = [
            table.Column(name=name, data=data)
            for name, data in zip(coll_table_names, zip(*collrates[collider]))
        ]
        coll_table = table.Table(data=coll_table_columns,
                                 meta=meta_coll[collname])
        coll_tables[collname] = coll_table

    return coll_tables, rad_table, mol_table
Пример #19
0
    def load_data(self, ids, data_release=None, data_structure='INDIVIDUAL', retrieval_type="ALL", valid_data=True,
                  band=None, avoid_datatype_check=False, format="votable", output_file=None,
                  overwrite_output_file=False, verbose=False):
        """Loads the specified table
        TAP+ only

        Parameters
        ----------
        ids : str list, mandatory
            list of identifiers
        data_release: str, optional, default None
            data release from which data should be taken. E.g. 'Gaia DR2'
            By default, it takes the current default one.
        data_structure: str, optional, default 'INDIVIDUAL'
            it can be 'INDIVIDUAL', 'COMBINED', 'RAW':
            'INDIVIDUAL' means...
            'COMBINED' means...
            'RAW' means...
        retrieval_type : str, optional, default 'ALL'
            retrieval type identifier. It can be either 'epoch_photometry'
            for compatibility reasons or 'ALL' to retrieve all data from
            the list of sources.
        valid_data : bool, optional, default True
            By default, the epoch photometry service returns only valid data,
            that is, all data rows where flux is not null and
            rejected_by_photometry flag is not true. In order to retrieve
            all data associated to a given source without this filter,
            this request parameter should be included (valid_data=False)
        band : str, optional, default None, valid values: G, BP, RP
            By default, the epoch photometry service returns all the
            available photometry bands for the requested source.
            This parameter allows to filter the output lightcurve by its band.
        avoid_datatype_check: boolean, optional, default False.
            By default, this value will be set to False. If it is set to 'true'
            the Datalink items tags will not be checked.
        format : str, optional, default 'votable'
            loading format
        output_file : string, optional, default None
            file where the results are saved.
            If it is not provided, the http response contents are returned.
        overwrite_output_file : boolean, optional, default False
            To overwrite the output_file if it already exists.
        verbose : bool, optional, default 'False'
            flag to display information about the process

        Returns
        -------
        A table object
        """
        if retrieval_type is None:
            raise ValueError("Missing mandatory argument 'retrieval_type'")

        now = datetime.now()
        now_formatted = now.strftime("%Y%m%d_%H%M%S")
        temp_dirname = "temp_" + now_formatted
        downloadname_formated = "download_" + now_formatted

        output_file_specified = False
        if output_file is None:
            output_file = os.path.join(os.getcwd(), temp_dirname, downloadname_formated)
        else:
            output_file_specified = True
            output_file = os.path.abspath(output_file)
            if not overwrite_output_file and os.path.exists(output_file):
                raise ValueError(f"{output_file} file already exists. Please use overwrite_output_file='False' to "
                                 f"overwrite output file.")

        path = os.path.dirname(output_file)

        if ids is None:
            raise ValueError("Missing mandatory argument 'ids'")

        if avoid_datatype_check is False:
            # we need to check params
            rt = str(retrieval_type).upper()
            if rt != 'ALL' and rt not in self.VALID_DATALINK_RETRIEVAL_TYPES:
                raise ValueError(f"Invalid mandatory argument 'retrieval_type'. Found {retrieval_type}, "
                                 f"expected: 'ALL' or any of {self.VALID_DATALINK_RETRIEVAL_TYPES}")

        params_dict = {}

        if not valid_data or str(retrieval_type) == 'ALL':
            params_dict['VALID_DATA'] = "false"
        elif valid_data:
            params_dict['VALID_DATA'] = "true"

        if band is not None:
            if band != 'G' and band != 'BP' and band != 'RP':
                raise ValueError("Invalid band value '%s' (Valid values: " +
                                 "'G', 'BP' and 'RP)" % band)
            else:
                params_dict['BAND'] = band
        if isinstance(ids, str):
            ids_arg = ids
        else:
            if isinstance(ids, int):
                ids_arg = str(ids)
            else:
                ids_arg = ','.join(str(item) for item in ids)
        params_dict['ID'] = ids_arg
        if data_release is not None:
            params_dict['RELEASE'] = data_release
        params_dict['DATA_STRUCTURE'] = data_structure
        params_dict['FORMAT'] = str(format)
        params_dict['RETRIEVAL_TYPE'] = str(retrieval_type)
        params_dict['USE_ZIP_ALWAYS'] = 'true'

        if path != '':
            try:
                os.mkdir(path)
            except FileExistsError:
                log.error("Path %s already exist" % path)
            except OSError:
                log.error("Creation of the directory %s failed" % path)

        try:
            self.__gaiadata.load_data(params_dict=params_dict,
                                      output_file=output_file,
                                      verbose=verbose)
            files = Gaia.__get_data_files(output_file=output_file, path=path)
        except Exception as err:
            raise err
        finally:
            if not output_file_specified:
                shutil.rmtree(path)

        if verbose:
            if output_file_specified:
                log.info("output_file = %s" % output_file)

        log.debug("List of products available:")
        # for key, value in files.items():
        # print("Product =", key)

        items = [key for key in files.keys()]
        items.sort()
        for item in items:
            # print(f'* {item}')
            if verbose:
                log.debug("Product = " + item)

        return files
Пример #20
0
def get_access_url(service, capability=None):
    """
    Returns the URL corresponding to a service by doing a lookup in the cadc
    registry. It returns the access URL corresponding to cookie authentication.
    :param service: the service the capability belongs to. It can be identified
    by a CADC uri ('ivo://cadc.nrc.ca/) which is looked up in the CADC registry
    or by the URL where the service capabilities is found.
    :param capability: uri representing the capability for which the access
    url is sought
    :return: the access url

    Note
    ------
    This function implements the functionality of a CADC registry as defined
    by the IVOA. It should be eventually moved to its own directory.

    Caching should be considered to reduce the number of remote calls to
    CADC registry
    """

    caps_url = ''
    if service.startswith('http'):
        if not capability:
            return service
        caps_url = service
    else:
        # get caps from the CADC registry
        if not get_access_url.caps:
            try:
                response = requests.get(conf.CADC_REGISTRY_URL)
                response.raise_for_status()
            except requests.exceptions.HTTPError as err:
                log.debug("ERROR getting the CADC registry: {}".format(
                    str(err)))
                raise err
            for line in response.text.splitlines():
                if len(line) > 0 and not line.startswith('#'):
                    service_id, capabilies_url = line.split('=')
                    get_access_url.caps[service_id.strip()] = \
                        capabilies_url.strip()
        # lookup the service
        service_uri = service
        if not service.startswith('ivo'):
            # assume short form of CADC service
            service_uri = 'ivo://cadc.nrc.ca/{}'.format(service)
        if service_uri not in get_access_url.caps:
            raise AttributeError(
                "Cannot find the capabilities of service {}".format(service))
        # look up in the CADC reg for the service capabilities
        caps_url = get_access_url.caps[service_uri]
        if not capability:
            return caps_url
    try:
        response2 = requests.get(caps_url)
        response2.raise_for_status()
    except Exception as e:
        log.debug("ERROR getting the service capabilities: {}".format(str(e)))
        raise e

    soup = BeautifulSoup(response2.text, features="html5lib")
    for cap in soup.find_all('capability'):
        if cap.get("standardid", None) == capability:
            if len(cap.find_all('interface')) == 1:
                return cap.find_all('interface')[0].accessurl.text
            for i in cap.find_all('interface'):
                if hasattr(i, 'securitymethod'):
                    sm = i.securitymethod
                    if not sm or sm.get("standardid", None) is None or\
                       sm['standardid'] == "ivo://ivoa.net/sso#cookie":
                        return i.accessurl.text
    raise RuntimeError("ERROR - capability {} not found or not working with "
                       "anonymous or cookie access".format(capability))
Пример #21
0
    def _activate_form(self, response, form_index=0, form_id=None, inputs={},
                       cache=True, method=None):
        """
        Parameters
        ----------
        method: None or str
            Can be used to override the form-specified method
        """
        # Extract form from response
        root = BeautifulSoup(response.content, 'html5lib')
        if form_id is None:
            form = root.find_all('form')[form_index]
        else:
            form = root.find_all('form', id=form_id)[form_index]
        # Construct base url
        form_action = form.get('action')
        if "://" in form_action:
            url = form_action
        elif form_action.startswith('/'):
            url = '/'.join(response.url.split('/', 3)[:3]) + form_action
        else:
            url = response.url.rsplit('/', 1)[0] + '/' + form_action
        # Identify payload format
        fmt = None
        form_method = form.get('method').lower()
        if form_method == 'get':
            fmt = 'get'  # get(url, params=payload)
        elif form_method == 'post':
            if 'enctype' in form.attrs:
                if form.attrs['enctype'] == 'multipart/form-data':
                    fmt = 'multipart/form-data'  # post(url, files=payload)
                elif form.attrs['enctype'] == 'application/x-www-form-urlencoded':
                    fmt = 'application/x-www-form-urlencoded'  # post(url, data=payload)
                else:
                    raise Exception("enctype={0} is not supported!".format(form.attrs['enctype']))
            else:
                fmt = 'application/x-www-form-urlencoded'  # post(url, data=payload)
        # Extract payload from form
        payload = []
        for form_elem in form.find_all(['input', 'select', 'textarea']):
            value = None
            is_file = False
            tag_name = form_elem.name
            key = form_elem.get('name')
            if tag_name == 'input':
                is_file = (form_elem.get('type') == 'file')
                value = form_elem.get('value')
                if form_elem.get('type') in ['checkbox', 'radio']:
                    if form_elem.has_attr('checked'):
                        if not value:
                            value = 'on'
                    else:
                        value = None
            elif tag_name == 'select':
                if form_elem.get('multiple') is not None:
                    value = []
                    if form_elem.select('option[value]'):
                        for option in form_elem.select('option[value]'):
                            if option.get('selected') is not None:
                                value.append(option.get('value'))
                    else:
                        for option in form_elem.select('option'):
                            if option.get('selected') is not None:
                                # bs4 NavigableString types have bad,
                                # undesirable properties that result
                                # in recursion errors when caching
                                value.append(str(option.string))
                else:
                    if form_elem.select('option[value]'):
                        for option in form_elem.select('option[value]'):
                            if option.get('selected') is not None:
                                value = option.get('value')
                        # select the first option field if none is selected
                        if value is None:
                            value = form_elem.select(
                                'option[value]')[0].get('value')
                    else:
                        # survey form just uses text, not value
                        for option in form_elem.select('option'):
                            if option.get('selected') is not None:
                                value = str(option.string)
                        # select the first option field if none is selected
                        if value is None:
                            value = str(form_elem.select('option')[0].string)

            if key in inputs:
                if isinstance(inputs[key], list):
                    # list input is accepted (for array uploads)
                    value = inputs[key]
                else:
                    value = str(inputs[key])

            if (key is not None):  # and (value is not None):
                if fmt == 'multipart/form-data':
                    if is_file:
                        payload.append(
                            (key, ('', '', 'application/octet-stream')))
                    else:
                        if type(value) is list:
                            for v in value:
                                entry = (key, ('', v))
                                # Prevent redundant key, value pairs
                                # (can happen if the form repeats them)
                                if entry not in payload:
                                    payload.append(entry)
                        elif value is None:
                            entry = (key, ('', ''))
                            if entry not in payload:
                                payload.append(entry)
                        else:
                            entry = (key, ('', value))
                            if entry not in payload:
                                payload.append(entry)
                else:
                    if type(value) is list:
                        for v in value:
                            entry = (key, v)
                            if entry not in payload:
                                payload.append(entry)
                    else:
                        entry = (key, value)
                        if entry not in payload:
                            payload.append(entry)

        # for future debugging
        self._payload = payload
        log.debug("Form: payload={0}".format(payload))

        if method is not None:
            fmt = method

        log.debug("Method/format = {0}".format(fmt))

        # Send payload
        if fmt == 'get':
            response = self._request("GET", url, params=payload, cache=cache)
        elif fmt == 'multipart/form-data':
            response = self._request("POST", url, files=payload, cache=cache)
        elif fmt == 'application/x-www-form-urlencoded':
            response = self._request("POST", url, data=payload, cache=cache)

        return response
Пример #22
0
    def _download_file(self,
                       url,
                       local_filepath,
                       timeout=None,
                       auth=None,
                       continuation=True,
                       cache=False,
                       method="GET",
                       head_safe=False,
                       **kwargs):
        """
        Download a file.  Resembles `astropy.utils.data.download_file` but uses
        the local ``_session``

        Parameters
        ----------
        url : string
        local_filepath : string
        timeout : int
        auth : dict or None
        continuation : bool
            If the file has already been partially downloaded *and* the server
            supports HTTP "range" requests, the download will be continued
            where it left off.
        cache : bool
        method : "GET" or "POST"
        head_safe : bool
        """

        if head_safe:
            response = self._session.request("HEAD",
                                             url,
                                             timeout=timeout,
                                             stream=True,
                                             auth=auth,
                                             **kwargs)
        else:
            response = self._session.request(method,
                                             url,
                                             timeout=timeout,
                                             stream=True,
                                             auth=auth,
                                             **kwargs)

        response.raise_for_status()
        if 'content-length' in response.headers:
            length = int(response.headers['content-length'])
            if length == 0:
                log.warn('URL {0} has length=0'.format(url))
        else:
            length = None

        if ((os.path.exists(local_filepath)
             and ('Accept-Ranges' in response.headers) and continuation)):
            open_mode = 'ab'

            existing_file_length = os.stat(local_filepath).st_size
            if length is not None and existing_file_length >= length:
                # all done!
                log.info(
                    "Found cached file {0} with expected size {1}.".format(
                        local_filepath, existing_file_length))
                return
            elif existing_file_length == 0:
                open_mode = 'wb'
            else:
                log.info("Continuing download of file {0}, with {1} bytes to "
                         "go ({2}%)".format(
                             local_filepath, length - existing_file_length,
                             (length - existing_file_length) / length * 100))

                # bytes are indexed from 0:
                # https://en.wikipedia.org/wiki/List_of_HTTP_header_fields#range-request-header
                end = "{0}".format(length - 1) if length is not None else ""
                self._session.headers['Range'] = "bytes={0}-{1}".format(
                    existing_file_length, end)

                response = self._session.request(method,
                                                 url,
                                                 timeout=timeout,
                                                 stream=True,
                                                 auth=auth,
                                                 **kwargs)
                response.raise_for_status()
                del self._session.headers['Range']

        elif cache and os.path.exists(local_filepath):
            if length is not None:
                statinfo = os.stat(local_filepath)
                if statinfo.st_size != length:
                    log.warning("Found cached file {0} with size {1} that is "
                                "different from expected size {2}".format(
                                    local_filepath, statinfo.st_size, length))
                    open_mode = 'wb'
                else:
                    log.info(
                        "Found cached file {0} with expected size {1}.".format(
                            local_filepath, statinfo.st_size))
                    response.close()
                    return
            else:
                log.info("Found cached file {0}.".format(local_filepath))
                response.close()
                return
        else:
            open_mode = 'wb'
            if head_safe:
                response = self._session.request(method,
                                                 url,
                                                 timeout=timeout,
                                                 stream=True,
                                                 auth=auth,
                                                 **kwargs)
                response.raise_for_status()

        blocksize = astropy.utils.data.conf.download_block_size

        log.debug(
            f"Downloading URL {url} to {local_filepath} with size {length} "
            f"by blocks of {blocksize}")

        bytes_read = 0

        # Only show progress bar if logging level is INFO or lower.
        if log.getEffectiveLevel() <= 20:
            progress_stream = None  # Astropy default
        else:
            progress_stream = io.StringIO()

        with ProgressBarOrSpinner(
                length,
            ('Downloading URL {0} to {1} ...'.format(url, local_filepath)),
                file=progress_stream) as pb:
            with open(local_filepath, open_mode) as f:
                for block in response.iter_content(blocksize):
                    f.write(block)
                    bytes_read += blocksize
                    if length is not None:
                        pb.update(
                            bytes_read if bytes_read <= length else length)
                    else:
                        pb.update(bytes_read)

        response.close()
        return response
Пример #23
0
    def load_data(self,
                  ids,
                  data_release=None,
                  data_structure='INDIVIDUAL',
                  retrieval_type="ALL",
                  valid_data=False,
                  band=None,
                  avoid_datatype_check=False,
                  format="votable",
                  output_file=None,
                  overwrite_output_file=False,
                  verbose=False):
        """Loads the specified table
        TAP+ only

        Parameters
        ----------
        ids : str list, mandatory
            list of identifiers
        data_release: str, optional, default None
            data release from which data should be taken. E.g. 'Gaia DR2'
            By default, it takes the current default one.
        data_structure: str, optional, default 'INDIVIDUAL'
            it can be 'INDIVIDUAL', 'COMBINED', 'RAW':
            'INDIVIDUAL' means products are provided in separate files for each sourceId. All files are zipped in a single
            bundle, even if only one source/file is considered
            'COMBINED' means products are provided in a single file concatenating the data of all sourceIds together.
            How this is organised depends on the chosen format
            'RAW' means products are provided following a Data Model similar to that used in the MDB, meaning in
            particular that parameters stored as arrays will remain as such. Like in the COMBINED structure, a single
            file is provided for the data of all sourceIds together, but in this case there will be always be one
            row per sourceId
        retrieval_type : str, optional, default 'ALL' to retrieve all data  from the list of sources
            retrieval type identifier. For GAIA DR2 possible values are ['EPOCH_PHOTOMETRY']
            For future GAIA DR3 (Once published), possible values will be ['EPOC_PHOTOMETRY', 'RVS', 'XP_CONTINUOUS',
            'XP_SAMPLED', 'MCMC_GSPPHOT' or 'MCMC_MSC']
        valid_data : bool, optional, default False
            By default, the epoch photometry service returns all available data, including
            data rows where flux is null and/or the rejected_by_photometry flag is set to True.
            In order to retrieve only valid data (data rows where flux is not null and/or the
            rejected_by_photometry flag is set to False) this request parameter should be included
            with valid_data=True.
        band : str, optional, default None, valid values: G, BP, RP
            By default, the epoch photometry service returns all the
            available photometry bands for the requested source.
            This parameter allows to filter the output lightcurve by its band.
        avoid_datatype_check: boolean, optional, default False.
            By default, this value will be set to False. If it is set to 'true'
            the Datalink items tags will not be checked.
        format : str, optional, default 'votable'
            loading format. Other available formats are 'csv', 'ecsv','json','votable_plain' and 'fits'
        output_file : string, optional, default None
            file where the results are saved.
            If it is not provided, the http response contents are returned.
        overwrite_output_file : boolean, optional, default False
            To overwrite the output_file if it already exists.
        verbose : bool, optional, default 'False'
            flag to display information about the process

        Returns
        -------
        A table object
        """

        if retrieval_type is None:
            raise ValueError("Missing mandatory argument 'retrieval_type'")

        now = datetime.now()
        now_formatted = now.strftime("%Y%m%d_%H%M%S")
        temp_dirname = "temp_" + now_formatted
        downloadname_formated = "download_" + now_formatted

        output_file_specified = False
        if output_file is None:
            output_file = os.path.join(os.getcwd(), temp_dirname,
                                       downloadname_formated)
        else:
            output_file_specified = True
            output_file = os.path.abspath(output_file)
            if not overwrite_output_file and os.path.exists(output_file):
                raise ValueError(
                    f"{output_file} file already exists. Please use overwrite_output_file='False' to "
                    f"overwrite output file.")

        path = os.path.dirname(output_file)

        if ids is None:
            raise ValueError("Missing mandatory argument 'ids'")

        if avoid_datatype_check is False:
            # we need to check params
            rt = str(retrieval_type).upper()
            if rt != 'ALL' and rt not in self.VALID_DATALINK_RETRIEVAL_TYPES:
                raise ValueError(
                    f"Invalid mandatory argument 'retrieval_type'. Found {retrieval_type}, "
                    f"expected: 'ALL' or any of {self.VALID_DATALINK_RETRIEVAL_TYPES}"
                )

        params_dict = {}

        if not valid_data or str(retrieval_type) == 'ALL':
            params_dict['VALID_DATA'] = "false"
        elif valid_data:
            params_dict['VALID_DATA'] = "true"

        if band is not None:
            if band != 'G' and band != 'BP' and band != 'RP':
                raise ValueError("Invalid band value '%s' (Valid values: " +
                                 "'G', 'BP' and 'RP)" % band)
            else:
                params_dict['BAND'] = band
        if isinstance(ids, str):
            ids_arg = ids
        else:
            if isinstance(ids, int):
                ids_arg = str(ids)
            else:
                ids_arg = ','.join(str(item) for item in ids)
        params_dict['ID'] = ids_arg
        if data_release is not None:
            params_dict['RELEASE'] = data_release
        params_dict['DATA_STRUCTURE'] = data_structure
        params_dict['FORMAT'] = str(format)
        params_dict['RETRIEVAL_TYPE'] = str(retrieval_type)
        params_dict['USE_ZIP_ALWAYS'] = 'true'

        if path != '':
            try:
                os.mkdir(path)
            except FileExistsError:
                log.error("Path %s already exist" % path)
            except OSError:
                log.error("Creation of the directory %s failed" % path)

        try:
            self.__gaiadata.load_data(params_dict=params_dict,
                                      output_file=output_file,
                                      verbose=verbose)
            files = Gaia.__get_data_files(output_file=output_file, path=path)
        except Exception as err:
            raise err
        finally:
            if not output_file_specified:
                shutil.rmtree(path)

        if verbose:
            if output_file_specified:
                log.info("output_file = %s" % output_file)

        log.debug("List of products available:")
        # for key, value in files.items():
        # print("Product =", key)

        items = [key for key in files.keys()]
        items.sort()
        for item in items:
            # print(f'* {item}')
            if verbose:
                log.debug("Product = " + item)

        return files
Пример #24
0
    def retrieve_data(self, datasets, continuation=False, destination=None,
                      with_calib='none', request_all_objects=False,
                      unzip=True, request_id=None):
        """
        Retrieve a list of datasets form the ESO archive.

        Parameters
        ----------
        datasets : list of strings or string
            List of datasets strings to retrieve from the archive.
        destination: string
            Directory where the files are copied.
            Files already found in the destination directory are skipped,
            unless continuation=True.
            Default to astropy cache.
        continuation : bool
            Force the retrieval of data that are present in the destination
            directory.
        with_calib : string
            Retrieve associated calibration files: 'none' (default), 'raw' for
            raw calibrations, or 'processed' for processed calibrations.
        request_all_objects : bool
            When retrieving associated calibrations (``with_calib != 'none'``),
            this allows to request all the objects included the already
            downloaded ones, to be sure to retrieve all calibration files.
            This is useful when the download was interrupted. `False` by
            default.
        unzip : bool
            Unzip compressed files from the archive after download. `True` by
            default.
        request_id : str, int
            Retrieve from an existing request number rather than sending a new
            query, with the identifier from the URL in the email sent from
            the archive from the earlier request as in:

                https://dataportal.eso.org/rh/requests/[USERNAME]/[request_id]

        Returns
        -------
        files : list of strings or string
            List of files that have been locally downloaded from the archive.

        Examples
        --------
        >>> dptbl = Eso.query_instrument('apex', pi_coi='ginsburg')
        >>> dpids = [row['DP.ID'] for row in dptbl if 'Map' in row['Object']]
        >>> files = Eso.retrieve_data(dpids)

        """
        calib_options = {'none': '', 'raw': 'CalSelectorRaw2Raw',
                         'processed': 'CalSelectorRaw2Master'}

        if with_calib not in calib_options:
            raise ValueError("invalid value for 'with_calib', "
                             "it must be 'none', 'raw' or 'processed'")

        if isinstance(datasets, str):
            return_list = False
            datasets = [datasets]
        else:
            return_list = True
        if not isinstance(datasets, (list, tuple, np.ndarray)):
            raise TypeError("Datasets must be given as a list of strings.")

        # First: Detect datasets already downloaded
        if with_calib != 'none' and request_all_objects:
            datasets_to_download, files = list(datasets), []
        else:
            log.info("Detecting already downloaded datasets...")
            datasets_to_download, files = self._check_existing_files(
                datasets, continuation=continuation, destination=destination)

        # Second: Check that the datasets to download are in the archive
        if request_id is None:
            log.info("Checking availability of datasets to download...")
            valid_datasets = [self.verify_data_exists(ds)
                          for ds in datasets_to_download]
        else:
            # Assume all valid if a request_id was provided
            valid_datasets = [(ds, True) for ds in datasets_to_download]

        if not all(valid_datasets):
            invalid_datasets = [ds for ds, v in zip(datasets_to_download,
                                                    valid_datasets) if not v]
            raise ValueError("The following data sets were not found on the "
                             "ESO servers: {0}".format(invalid_datasets))

        # Third: Download the other datasets
        log.info("Downloading datasets...")
        if datasets_to_download:
            if not self.authenticated():
                self.login()
            url = "http://archive.eso.org/cms/eso-data/eso-data-direct-retrieval.html"
            with suspend_cache(self):  # Never cache staging operations
                if request_id is None:
                    log.info("Contacting retrieval server...")
                    retrieve_data_form = self._request("GET", url,
                                                        cache=False)
                    retrieve_data_form.raise_for_status()
                    log.info("Staging request...")
                    inputs = {"list_of_datasets": "\n".join(datasets_to_download)}
                    data_confirmation_form = self._activate_form(
                        retrieve_data_form, form_index=-1, inputs=inputs,
                        cache=False)

                    data_confirmation_form.raise_for_status()

                    root = BeautifulSoup(data_confirmation_form.content,
                                         'html5lib')
                    login_button = root.select('input[value=LOGIN]')
                    if login_button:
                        raise LoginError("Not logged in. "
                                    "You must be logged in to download data.")
                    inputs = {}
                    if with_calib != 'none':
                        inputs['requestCommand'] = calib_options[with_calib]

                    # TODO: There may be another screen for Not Authorized;
                    # that should be included too
                    # form name is "retrieve"; no id
                    data_download_form = self._activate_form(
                        data_confirmation_form, form_index=-1, inputs=inputs,
                        cache=False)
                else:
                    # Build URL by hand
                    request_url = 'https://dataportal.eso.org/rh/requests/'
                    request_url += f'{self.USERNAME}/{request_id}'
                    data_download_form = self._request("GET", request_url,
                                                       cache=False)

                    _content = data_download_form.content.decode('utf-8')
                    if ('Request Handler - Error' in _content):
                        # Likely a problem with the request_url
                        msg = (f"The form at {request_url} returned an error."
                                " See your recent requests at "
                                "https://dataportal.eso.org/rh/requests/"
                                f"{self.USERNAME}/recentRequests")

                        raise RemoteServiceError(msg)

                log.info("Staging form is at {0}"
                         .format(data_download_form.url))
                root = BeautifulSoup(data_download_form.content, 'html5lib')
                state = root.select('span[id=requestState]')[0].text
                t0 = time.time()
                while state not in ('COMPLETE', 'ERROR'):
                    time.sleep(2.0)
                    data_download_form = self._request("GET",
                                                       data_download_form.url,
                                                       cache=False)
                    root = BeautifulSoup(data_download_form.content,
                                         'html5lib')
                    state = root.select('span[id=requestState]')[0].text
                    print("{0:20.0f}s elapsed"
                          .format(time.time() - t0), end='\r')
                    sys.stdout.flush()
                if state == 'ERROR':
                    raise RemoteServiceError("There was a remote service "
                                             "error; perhaps the requested "
                                             "file could not be found?")

            if with_calib != 'none':
                # when requested files with calibrations, some javascript is
                # used to display the files, which prevent retrieving the files
                # directly. So instead we retrieve the download script provided
                # in the web page, and use it to extract the list of files.
                # The benefit of this is also that in the download script the
                # list of files is de-duplicated, whereas on the web page the
                # calibration files would be duplicated for each exposure.
                link = root.select('a[href$="/script"]')[0]
                if 'downloadRequest' not in link.text:
                    # Make sure that we found the correct link
                    raise RemoteServiceError(
                        "A link was found in the download file for the "
                        "calibrations that is not a downloadRequest link "
                        "and therefore appears invalid.")

                href = link.attrs['href']
                script = self._request("GET", href, cache=False)
                fileLinks = re.findall(
                    r'"(https://dataportal\.eso\.org/dataPortal/api/requests/.*)"',
                    script.text)

                # urls with api/ require using Basic Authentication, though
                # it's easier for us to reuse the existing requests session (to
                # avoid asking agin for a username/password if it is not
                # stored). So we remove api/ from the urls:
                fileLinks = [
                    f.replace('https://dataportal.eso.org/dataPortal/api/requests',
                              'https://dataportal.eso.org/dataPortal/requests')
                    for f in fileLinks]

                log.info("Detecting already downloaded datasets, "
                         "including calibrations...")
                fileIds = [f.rsplit('/', maxsplit=1)[1] for f in fileLinks]
                filteredIds, files = self._check_existing_files(
                    fileIds, continuation=continuation,
                    destination=destination)

                fileLinks = [f for f, fileId in zip(fileLinks, fileIds)
                             if fileId in filteredIds]
            else:
                fileIds = root.select('input[name=fileId]')
                fileLinks = ["http://dataportal.eso.org/dataPortal" +
                             fileId.attrs['value'].split()[1]
                             for fileId in fileIds]

            nfiles = len(fileLinks)
            log.info("Downloading {} files...".format(nfiles))
            log.debug("Files:\n{}".format('\n'.join(fileLinks)))
            for i, fileLink in enumerate(fileLinks, 1):
                fileId = fileLink.rsplit('/', maxsplit=1)[1]

                if request_id is not None:
                    # Since we fetched the script directly without sending
                    # a new request, check here that the file in the list
                    # is among those requested in the input list
                    if fileId.split('.fits')[0] not in datasets_to_download:
                        continue

                log.info("Downloading file {}/{}: {}..."
                         .format(i, nfiles, fileId))
                filename = self._request("GET", fileLink, save=True,
                                         continuation=True)

                if filename.endswith(('.gz', '.7z', '.bz2', '.xz', '.Z')) and unzip:
                    log.info("Unzipping file {0}...".format(fileId))
                    filename = system_tools.gunzip(filename)

                if destination is not None:
                    log.info("Copying file {0} to {1}...".format(fileId, destination))
                    destfile = os.path.join(destination, os.path.basename(filename))
                    shutil.move(filename, destfile)
                    files.append(destfile)
                else:
                    files.append(filename)

        # Empty the redirect cache of this request session
        # Only available and needed for requests versions < 2.17
        try:
            self._session.redirect_cache.clear()
        except AttributeError:
            pass
        log.info("Done!")
        if (not return_list) and (len(files) == 1):
            files = files[0]
        return files
Пример #25
0
    def download_files(self, files, *, savedir=None, cache=True,
                       continuation=True, skip_unauthorized=True,
                       verify_only=False):
        """
        Given a list of file URLs, download them

        Note: Given a list with repeated URLs, each will only be downloaded
        once, so the return may have a different length than the input list

        Parameters
        ----------
        files : list
            List of URLs to download
        savedir : None or str
            The directory to save to.  Default is the cache location.
        cache : bool
            Cache the download?
        continuation : bool
            Attempt to continue where the download left off (if it was broken)
        skip_unauthorized : bool
            If you receive "unauthorized" responses for some of the download
            requests, skip over them.  If this is False, an exception will be
            raised.
        verify_only : bool
            Option to go through the process of checking the files to see if
            they're the right size, but not actually download them.  This
            option may be useful if a previous download run failed partway.
        """

        if self.USERNAME:
            auth = self._get_auth_info(self.USERNAME)
        else:
            auth = None

        downloaded_files = []
        if savedir is None:
            savedir = self.cache_location
        for file_link in unique(files):
            log.debug("Downloading {0} to {1}".format(file_link, savedir))
            try:
                check_filename = self._request('HEAD', file_link, auth=auth)
                check_filename.raise_for_status()
            except requests.HTTPError as ex:
                if ex.response.status_code == 401:
                    if skip_unauthorized:
                        log.info("Access denied to {url}.  Skipping to"
                                 " next file".format(url=file_link))
                        continue
                    else:
                        raise(ex)

            try:
                filename = re.search("filename=(.*)",
                                     check_filename.headers['Content-Disposition']).groups()[0]
            except KeyError:
                log.info(f"Unable to find filename for {file_link}  "
                         "(missing Content-Disposition in header).  "
                         "Skipping to next file.")
                continue

            if savedir is not None:
                filename = os.path.join(savedir,
                                        filename)

            if verify_only:
                existing_file_length = os.stat(filename).st_size
                if 'content-length' in check_filename.headers:
                    length = int(check_filename.headers['content-length'])
                    if length == 0:
                        warnings.warn('URL {0} has length=0'.format(url))
                    elif existing_file_length == length:
                        log.info(f"Found cached file {filename} with expected size {existing_file_length}.")
                    elif existing_file_length < length:
                        log.info(f"Found cached file {filename} with size {existing_file_length} < expected "
                                 f"size {length}.  The download should be continued.")
                    elif existing_file_length > length:
                        warnings.warn(f"Found cached file {filename} with size {existing_file_length} > expected "
                                      f"size {length}.  The download is likely corrupted.",
                                      CorruptDataWarning)
                else:
                    warnings.warn(f"Could not verify {url} because it has no 'content-length'")

            try:
                if not verify_only:
                    self._download_file(file_link,
                                        filename,
                                        timeout=self.TIMEOUT,
                                        auth=auth,
                                        cache=cache,
                                        method='GET',
                                        head_safe=False,
                                        continuation=continuation)

                downloaded_files.append(filename)
            except requests.HTTPError as ex:
                if ex.response.status_code == 401:
                    if skip_unauthorized:
                        log.info("Access denied to {url}.  Skipping to"
                                 " next file".format(url=file_link))
                        continue
                    else:
                        raise(ex)
                elif ex.response.status_code == 403:
                    log.error("Access denied to {url}".format(url=file_link))
                    if 'dataPortal' in file_link and 'sso' not in file_link:
                        log.error("The URL may be incorrect.  Try using "
                                  "{0} instead of {1}"
                                  .format(file_link.replace('dataPortal/',
                                                            'dataPortal/sso/'),
                                          file_link))
                    raise ex
                elif ex.response.status_code == 500:
                    # empirically, this works the second time most of the time...
                    self._download_file(file_link,
                                        filename,
                                        timeout=self.TIMEOUT,
                                        auth=auth,
                                        cache=cache,
                                        method='GET',
                                        head_safe=False,
                                        continuation=continuation)

                    downloaded_files.append(filename)
                else:
                    raise ex
        return downloaded_files