Exemplo n.º 1
0
def get_requestUrl(dl_url, server, _options, **options):
    """ Get the request url."""
    stopWatch = stop_watch.localThreadStopWatch()
    start_time = datetime.datetime.now()
    stopWatch.start('get_request')
    log.info("Requesting file to download (this can take a while)...")

    # Get request id
    m = utils_http.open_url(dl_url, **options)
    responseStr = m.read()
    dom = minidom.parseString(responseStr)
    node = dom.getElementsByTagName('statusModeResponse')[0]
    status = node.getAttribute('status')
    if status == "2":
        msg = node.getAttribute('msg')
        log.error(msg)
        get_req_url = None
    else:
        requestId = node.getAttribute('requestId')
        # Get request url
        get_req_url = server + '?action=getreqstatus&requestid=' + requestId + "&service=" + _options.service_id + "&product=" + _options.product_id

    stopWatch.stop('get_request')

    return get_req_url
Exemplo n.º 2
0
def try_get_data(url):
    try:
        CheckConnection.is_online()
        url_auth = authenticate_CAS_for_URL(url, config['UN_CMEMS'],
                                            config['PW_CMEMS'])
        response = open_url(url_auth)
        CheckConnection.is_online()
        read_bytes = response.read()
        CheckConnection.is_online()
        return xr.open_dataset(read_bytes)
    except Exception as e:
        logger.error(traceback.format_exc())
        raise ValueError(
            'Error:',
            BeautifulSoup(read_bytes,
                          'html.parser').find('p', {"class": "error"}),
            'Request: ', url, response)
def authenticate_CAS_for_URL(url, user, pwd, **url_config):
    """Performs a CAS authentication for the given URL service and returns
    the service url with the obtained credential.
    
    The following algorithm is done:
    1) A connection is opened on the given URL
    2) We check that the response is an HTTP redirection
    3) Redirected URL contains the CAS address
    4) We ask for a ticket for the given user and password
    5) We ask for a service ticket for the given service
    6) Then we return a new url with the ticket attached
    
    url: the url of the service to invoke
    user: the username
    pwd: the password"""

    log = logging.getLogger("utils_cas:authenticate_CAS_for_URL")

    server, sep, options = url.partition('?')

    log.info('Authenticating user %s for service %s' % (user, server))

    connexion = utils_http.open_url(url, **url_config)

    # connexion response code must be a redirection, else, there's an error (user can't be already connected since no cookie or ticket was sent)
    if connexion.url == url:
        raise Exception(
            utils_messages.get_external_messages()
            ['motuclient.exception.authentication.not-redirected'] % server)

    # find the cas url from the redirected url
    redirected_url = connexion.url
    p = parse_qs(urlparse(connexion.url).query, keep_blank_values=False)
    redirectServiceUrl = p['service'][0]

    m = re.search(CAS_URL_PATTERN, redirected_url)

    if m is None:
        raise Exception(utils_messages.get_external_messages()
                        ['motuclient.exception.authentication.unfound-url'] %
                        redirected_url)

    url_cas = m.group(1) + '/v1/tickets'

    opts = utils_http.encode(
        utils_collection.ListMultimap(username=quote(user),
                                      password=quote(pwd)))

    utils_log.log_url(log, "login user into CAS:\t", url_cas + '?' + opts)
    url_config['data'] = opts.encode()
    try:
        connexion = utils_http.open_url(url_cas, **url_config)
    except Exception as e:
        if e.code == 400:
            log.error("""Error: Bad user login or password:
            
                 On *nix OS, you must use the single quote, otherwise it may expand specific characters.
                 [...] -u 'string' or --user 'string' [...]
                 
                 On Windows OS, you must use the double quote, because single quotes are treated literally.
                 [...] -p "string" or --pwd "string" [...]
                 """)

        raise e

    fp = utils_html.FounderParser()
    for line in connexion:
        log.log(utils_log.TRACE_LEVEL, 'utils_html.FounderParser() line: %s',
                line)
        # py3 compatibility
        if (isinstance(line, bytes)):
            fp.feed(line.decode())
        else:
            fp.feed(line)

    tgt = fp.action_[fp.action_.rfind('/') + 1:]
    log.log(utils_log.TRACE_LEVEL, 'TGT: %s', tgt)

    # WARNING : don't use 'fp.action_' as url : it seems protocol is always http never https
    # use 'url_cas', extract TGT from 'fp.action_' , then construct url_ticket.
    # url_ticket = fp.action_
    url_ticket = url_cas + '/' + tgt

    if url_ticket is None:
        raise Exception(utils_messages.get_external_messages()
                        ['motuclient.exception.authentication.tgt'])

    utils_log.log_url(log, "found url ticket:\t", url_ticket)

    opts = utils_http.encode(
        utils_collection.ListMultimap(service=quote_plus(redirectServiceUrl)))

    utils_log.log_url(log, 'Granting user for service\t',
                      url_ticket + '?' + opts)
    url_config['data'] = opts.encode()

    ticket = utils_http.open_url(url_ticket, **url_config).readline()

    # py3 compatibility
    if (isinstance(ticket, bytes)):
        ticket = ticket.decode()

    utils_log.log_url(log, "found service ticket:\t", ticket)

    # we append the download url with the ticket and return the result
    service_url = redirectServiceUrl + '&ticket=' + ticket

    utils_log.log_url(log, "service url is:\t", service_url)

    return service_url
Exemplo n.º 4
0
def execute_request(_options):
    """
    the main function that submit a request to motu. Available options are:
    
    * Proxy configuration (with eventually user credentials)
      - proxy_server: 'http://my-proxy.site.com:8080'
      - proxy_user  : '******'
      - proxy_pwd   :'doe'

    * Autorisation mode: 'cas', 'basic', 'none'
      - auth_mode: 'cas'
      
    * User credentials for authentication 'cas' or 'basic'
      - user: '******'
      - pwd:  'doe'
    
    * Motu service URL
      - motu: 'http://atoll-dev.cls.fr:30080/mis-gateway-servlet/Motu'
    
    * Dataset identifier to download
      - product_id: 'dataset-duacs-global-nrt-madt-merged-h'
    
    * Service identifier to use for retrieving dataset
      - service_id: 'http://purl.org/myocean/ontology/service/database#yourduname'
    
    * Geographic extraction parameters
      - latitude_max :  10.0
      - latitude_min : -10.0
      - longitude_max: -0.333333333369
      - longitude_min:  0.0

    * Vertical extraction parameters
      - depth_max: 1000
      - depth_min: 0
    
    * Temporal extraction parameters, as a datetime instance or a string (format: '%Y-%m-%d %H:%M:%S')
      - date_max: 2010-04-25 12:05:36
      - date_min: 2010-04-25

    * Variable extraction
      - variable: ['variable1','variable2']
      
    * The file name and the directory of the downloaded dataset
      - out_dir : '.'
      - out_name: 'dataset'
      
    * The block size used to perform download
      - block_size: 12001
      
    * The socket timeout configuration
      - socket_timeout: 515

    * The user agent to use when performing http requests
      - user_agent: 'motu-api-client' 

    """
    global log
    global init_time

    init_time = datetime.datetime.now()
    stopWatch = stop_watch.localThreadStopWatch()
    stopWatch.start()
    try:
        log = logging.getLogger("motu_api")

        # at first, we check given options are ok
        check_options(_options)

        # print some trace info about the options set
        log.log(utils_log.TRACE_LEVEL, '-' * 60)

        for option in dir(_options):
            if not option.startswith('_'):
                log.log(utils_log.TRACE_LEVEL,
                        "%s=%s" % (option, getattr(_options, option)))

        log.log(utils_log.TRACE_LEVEL, '-' * 60)

        # start of url to invoke
        url_service = _options.motu

        # parameters of the invoked service
        url_params = build_params(_options)

        url_config = get_url_config(_options)

        # check if question mark is in the url
        questionMark = '?'
        if url_service.endswith(questionMark):
            questionMark = ''
        url = url_service + questionMark + url_params

        if _options.describe == True or _options.size == True:
            _options.out_name = _options.out_name.replace('.nc', '.xml')

        # set-up the socket timeout if any
        if _options.socket_timeout != None:
            log.debug("Setting timeout %s" % _options.socket_timeout)
            socket.setdefaulttimeout(_options.socket_timeout)

        if _options.auth_mode == AUTHENTICATION_MODE_CAS:
            stopWatch.start('authentication')
            # perform authentication before acceding service
            download_url = utils_cas.authenticate_CAS_for_URL(
                url, _options.user, _options.pwd, **url_config)
            url_service = download_url.split("?")[0]
            stopWatch.stop('authentication')
        else:
            # if none, we do nothing more, in basic, we let the url requester doing the job
            download_url = url

        # create a file for storing downloaded stream
        fh = os.path.join(_options.out_dir, _options.out_name)
        if _options.console_mode:
            fh = "console"

        try:
            # Synchronous mode
            if _options.sync == True or _options.describe == True or _options.size == True:
                is_a_download_request = False
                if _options.describe == False and _options.size == False:
                    is_a_download_request = True
                dl_2_file(download_url, fh, _options.block_size,
                          is_a_download_request, **url_config)
                log.info("Done")
            # Asynchronous mode
            else:
                stopWatch.start('wait_request')
                requestUrl = get_requestUrl(download_url, url_service,
                                            _options, **url_config)

                if requestUrl != None:
                    # asynchronous mode
                    status = "0"
                    dwurl = ""
                    msg = ""

                    while True:
                        if _options.auth_mode == AUTHENTICATION_MODE_CAS:
                            stopWatch.start('authentication')
                            # perform authentication before acceding service
                            requestUrlCas = utils_cas.authenticate_CAS_for_URL(
                                requestUrl, _options.user, _options.pwd,
                                **url_config)
                            stopWatch.stop('authentication')
                        else:
                            # if none, we do nothing more, in basic, we let the url requester doing the job
                            requestUrlCas = requestUrl

                        m = utils_http.open_url(requestUrlCas, **url_config)
                        motu_reply = m.read()
                        dom = None

                        try:
                            dom = minidom.parseString(motu_reply)
                        except:
                            log.error(motu_reply)
                            dom = None

                        if dom:
                            for node in dom.getElementsByTagName(
                                    'statusModeResponse'):
                                status = node.getAttribute('status')
                                dwurl = node.getAttribute('remoteUri')
                                msg = node.getAttribute('msg')
                        else:
                            status = "4"

                        # Check status
                        if status == "0" or status == "3":  # in progress/pending
                            log.info(
                                'Product is not yet available (request in progress)'
                            )
                            time.sleep(10)
                        else:  # finished (error|success)
                            break

                    if status == "2":
                        log.error(msg)
                    if status == "4":
                        log.error(
                            "Motu server API interaction appears to have failed, server response is invalid"
                        )
                    if status == "1":
                        log.info('The product is ready for download')
                        if dwurl != "":
                            dl_2_file(dwurl, fh, _options.block_size,
                                      not (_options.describe or _options.size),
                                      **url_config)
                            log.info("Done")
                        else:
                            log.error("Couldn't retrieve file")

                stopWatch.stop('wait_request')

        except:
            try:
                if (os.path.isfile(fh)):
                    os.remove(fh)
            except:
                pass
            raise
    finally:
        stopWatch.stop()
Exemplo n.º 5
0
def dl_2_file(dl_url,
              fh,
              block_size=65535,
              isADownloadRequest=None,
              **options):
    """ Download the file with the main url (of Motu) file.
     
    Motu can return an error message in the response stream without setting an
    appropriate http error code. So, in that case, the content-type response is
    checked, and if it is text/plain, we consider this as an error.
    
    dl_url: the complete download url of Motu
    fh: file handler to use to write the downstream"""

    stopWatch = stop_watch.localThreadStopWatch()
    start_time = datetime.datetime.now()
    lastProgressPercentValue = 0.0
    log.info("Downloading file (this can take a while)...")

    # download file
    temp = None
    if not fh.startswith("console"):
        temp = open(fh, 'w+b')

    try:
        stopWatch.start('processing')

        m = utils_http.open_url(dl_url, **options)
        try:
            # check the real url (after potential redirection) is not a CAS Url scheme
            match = re.search(utils_cas.CAS_URL_PATTERN, m.url)
            if match is not None:
                service, _, _ = dl_url.partition('?')
                redirection, _, _ = m.url.partition('?')
                raise Exception(
                    utils_messages.get_external_messages()
                    ['motuclient.exception.authentication.redirected'] %
                    (service, redirection))

            # check that content type is not text/plain
            headers = m.info()
            if "Content-Type" in headers and len(
                    headers['Content-Type']) > 0 and isADownloadRequest and (
                        headers['Content-Type'].startswith('text')
                        or headers['Content-Type'].find('html') != -1):
                raise Exception(utils_messages.get_external_messages()
                                ['motuclient.exception.motu.error'] % m.read())

            log.info('File type: %s' % headers['Content-Type'])
            # check if a content length (size of the file) has been send
            size = -1
            if "Content-Length" in headers:
                try:
                    # it should be an integer
                    size = int(headers["Content-Length"])
                    log.info('File size: %s (%i B)' %
                             (utils_unit.convert_bytes(size), size))
                except Exception as e:
                    size = -1
                    log.warn('File size is not an integer: %s' %
                             headers["Content-Length"])
            elif temp is not None:
                log.warn('File size: %s' % 'unknown')

            processing_time = datetime.datetime.now()
            stopWatch.stop('processing')
            stopWatch.start('downloading')

            # performs the download
            log.info('Downloading file %s' % os.path.abspath(fh))

            def progress_function(sizeRead):
                global lastProgressPercentValue
                percent = sizeRead * 100. / size
                if percent - lastProgressPercentValue > 1 or (
                        lastProgressPercentValue != 100 and percent >= 100):
                    log.info("- %s (%.1f%%)",
                             utils_unit.convert_bytes(size).rjust(8), percent)
                    lastProgressPercentValue = percent

            def none_function(sizeRead):
                global lastProgressPercentValue
                percent = 100
                log.info("- %s (%.1f%%)",
                         utils_unit.convert_bytes(size).rjust(8), percent)
                lastProgressPercentValue = percent

            if temp is not None:
                read = utils_stream.copy(
                    m, temp,
                    progress_function if size != -1 else none_function,
                    block_size)
            else:
                if isADownloadRequest:
                    #Console mode, only display the NC file URL on stdout
                    read = len(m.url)
                    print((m.url))
                else:
                    import io
                    output = io.StringIO()
                    utils_stream.copy(
                        m, output,
                        progress_function if size != -1 else none_function,
                        block_size)
                    read = len(output.getvalue())
                    print((output.getvalue()))

            end_time = datetime.datetime.now()
            stopWatch.stop('downloading')

            log.info("Processing  time : %s", str(processing_time - init_time))
            log.info("Downloading time : %s", str(end_time - processing_time))
            log.info("Total time       : %s", str(end_time - init_time))
            log.info(
                "Download rate    : %s/s",
                utils_unit.convert_bytes(
                    (read / total_milliseconds(end_time - start_time)) *
                    10**3))
        except Exception as e:
            log.error("Download failed: %s", e)
            if hasattr(e, 'reason'):
                log.info(' . reason: %s', e.reason)
            if hasattr(e, 'code'):
                log.info(' . code  %s: ', e.code)
            if hasattr(e, 'read'):
                try:
                    log.log(utils_log.TRACE_LEVEL, ' . detail:\n%s', e.read())
                except:
                    pass

            log.debug('-' * 60)
            log.debug("Stack trace exception is detailed herafter:")
            exc_type, exc_value, exc_tb = sys.exc_info()
            x = traceback.format_exception(exc_type, exc_value, exc_tb)
            for stack in x:
                log.debug(' . %s', stack.replace('\n', ''))
            log.debug('-' * 60)
            log.log(utils_log.TRACE_LEVEL,
                    'System info is provided hereafter:')
            system, node, release, version, machine, processor = platform.uname(
            )
            log.log(utils_log.TRACE_LEVEL, ' . system   : %s', system)
            log.log(utils_log.TRACE_LEVEL, ' . node     : %s', node)
            log.log(utils_log.TRACE_LEVEL, ' . release  : %s', release)
            log.log(utils_log.TRACE_LEVEL, ' . version  : %s', version)
            log.log(utils_log.TRACE_LEVEL, ' . machine  : %s', machine)
            log.log(utils_log.TRACE_LEVEL, ' . processor: %s', processor)
            log.log(utils_log.TRACE_LEVEL, ' . python   : %s', sys.version)
            log.log(utils_log.TRACE_LEVEL, ' . client   : %s',
                    get_client_version())
            log.log(utils_log.TRACE_LEVEL, '-' * 60)

        finally:
            m.close()
    finally:
        if temp is not None:
            temp.flush()
            temp.close()

    # raise exception if actual size does not match content-length header
    if temp is not None and size >= 0 and read < size:
        raise Exception(utils_messages.get_external_messages()
                        ['motuclient.exception.download.too-short'] %
                        (read, size))
Exemplo n.º 6
0
def authenticate_CAS_for_URL(url, user, pwd, **url_config):
    """Performs a CAS authentication for the given URL service and returns
    the service url with the obtained credential.
    
    The following algorithm is done:
    1) A connection is opened on the given URL
    2) We check that the response is an HTTP redirection
    3) Redirected URL contains the CAS address
    4) We ask for a ticket for the given user and password
    5) We ask for a service ticket for the given service
    6) Then we return a new url with the ticket attached
    
    url: the url of the service to invoke
    user: the username
    pwd: the password"""
    
    log = logging.getLogger("utils_cas:authenticate_CAS_for_URL")
    
    server, sep, options = url.partition( '?' )
    
    log.info( 'Authenticating user %s for service %s' % (user,server) )      
    delays = [10, 30, 100, 300]
    nbDelays = len(delays)
    tries = 0
    redirected = False
    while not redirected and tries<=nbDelays:
        try:
            connexion = utils_http.open_url(url, **url_config)
            # connexion response code must be a redirection, else, there's an error (user can't be already connected since no cookie or ticket was sent)
            if connexion.url != url:
                # find the cas url from the redirected url
                redirected_url = connexion.url
                p = parse_qs(urlparse(connexion.url).query, keep_blank_values=False)
                redirectServiceUrl = p['service'][0]
                m = re.search(CAS_URL_PATTERN, redirected_url)
                if not m is None:
                    redirected = True
        except Exception as e:
            if hasattr(e, 'code') and e.code == 400:
                log_error_password(log)
                raise e
            else:
              if not redirected and tries<nbDelays:
                log.warn("Warning: CAS connection failed, retrying in " + str(delays[tries]) + " seconds ...")
                time.sleep(delays[tries])
                tries = tries + 1
              else:
                raise e
        
    if not redirected:
        if redirected_url is None:
            raise Exception(
                utils_messages.get_external_messages()['motuclient.exception.authentication.not-redirected'] % server)
        else:
            raise Exception(
                utils_messages.get_external_messages()['motuclient.exception.authentication.unfound-url'] % redirected_url)
    
    url_cas = m.group(1) + '/v1/tickets'
    opts = utils_http.encode(utils_collection.ListMultimap(username = quote(user), password = quote(pwd) ))

    utils_log.log_url(log, "login user into CAS:\t", url_cas + '?' + opts)
    url_config['data']=opts.encode()
    
    connected = False
    tries = 0
    while not connected and tries<=nbDelays:
        try:
            connexion = utils_http.open_url(url_cas, **url_config)
            connected = True
        except Exception as e:
            if hasattr(e, 'code') and e.code == 400:
                log_error_password(log)
                raise e
            else:
                if tries<nbDelays:
                    log.warn("Warning: Authentication failed, retrying in " + str(delays[tries]) + " seconds ...")
                    time.sleep(delays[tries])
                    tries = tries + 1
                else:
                  raise e
    
    fp = utils_html.FounderParser()
    for line in connexion:
        log.log(utils_log.TRACE_LEVEL, 'utils_html.FounderParser() line: %s', line)
        # py3 compatibility
        if (isinstance(line, bytes)):
            fp.feed(line.decode())
        else:
            fp.feed(line)
        
    tgt = fp.action_[fp.action_.rfind('/') + 1:]
    log.log(utils_log.TRACE_LEVEL, 'TGT: %s', tgt)

    # WARNING : don't use 'fp.action_' as url : it seems protocol is always http never https 
    # use 'url_cas', extract TGT from 'fp.action_' , then construct url_ticket.
    # url_ticket = fp.action_
    url_ticket = url_cas + '/' + tgt

    if url_ticket is None:
        raise Exception(utils_messages.get_external_messages()['motuclient.exception.authentication.tgt'])
    
    utils_log.log_url(log, "found url ticket:\t", url_ticket)

    opts = utils_http.encode(utils_collection.ListMultimap(service = quote_plus(redirectServiceUrl)))

    utils_log.log_url(log, 'Granting user for service\t', url_ticket + '?' + opts)
    url_config['data']=opts.encode()

    validated = False
    tries = 0
    while not validated and tries<=nbDelays:
        try:
            ticket = utils_http.open_url(url_ticket, **url_config).readline()
            validated = True
        except Exception as e:
            if tries<nbDelays:
                log.warn("Warning: Ticket validation failed, retrying in " + str(delays[tries]) + " seconds ...")
                time.sleep(delays[tries])
                tries = tries + 1
            else:
                raise

    # py3 compatibility
    if (isinstance(ticket, bytes)):
        ticket = ticket.decode()

    utils_log.log_url(log, "found service ticket:\t", ticket)
    
    # we append the download url with the ticket and return the result  
    service_url = redirectServiceUrl + '&ticket=' + ticket
    
    utils_log.log_url(log, "service url is:\t", service_url)
      
    return service_url