def get_requestUrl(dl_url, server, **options): """ Get the request url.""" stopWatch = stop_watch.localThreadStopWatch() stopWatch.start('get_request') log.info("Requesting file to download (this can take a while)...") # Get request id m = utils_http.open_url(dl_url, **options) motu_reply = m.read() dom = minidom.parseString(motu_reply) for node in dom.getElementsByTagName('statusModeResponse'): requestId = node.getAttribute('requestId') # Get request url get_req_url = server + '?action=getreqstatus&requestid=' + requestId stopWatch.stop('get_request') return get_req_url
def execute_request(_options): """ the main function that submit a request to motu. Available options are: * Proxy configuration (with eventually user credentials) - proxy_server: 'http://my-proxy.site.com:8080' - proxy_user : '******' - proxy_pwd :'doe' * Autorisation mode: 'cas', 'basic', 'none' - auth_mode: 'cas' * User credentials for authentication 'cas' or 'basic' - user: '******' - pwd: 'doe' * Motu service URL - motu: 'http://atoll-dev.cls.fr:30080/mis-gateway-servlet/Motu' * Dataset identifier to download - product_id: 'dataset-duacs-global-nrt-madt-merged-h' * Service identifier to use for retrieving dataset - service_id: 'http://purl.org/myocean/ontology/service/' 'database#yourduname' * Geographic extraction parameters - latitude_max : 10.0 - latitude_min : -10.0 - longitude_max: -0.333333333369 - longitude_min: 0.0 * Vertical extraction parameters - depth_max: 1000 - depth_min: 0 * Temporal extraction parameters, as a datetime instance or a string (format: '%Y-%m-%d %H:%M:%S') - date_max: 2010-04-25 12:05:36 - date_min: 2010-04-25 * Variable extraction - variable: ['variable1','variable2'] * The file name and the directory of the downloaded dataset - out_dir : '.' - out_name: 'dataset' * The block size used to perform download - block_size: 12001 * The socket timeout configuration - socket_timeout: 515 * The user agent to use when performing http requests - user_agent: 'motu-api-client' """ global log stopWatch = stop_watch.localThreadStopWatch() stopWatch.start() try: log = logging.getLogger("motu_api") # at first, we check given options are ok check_options(_options) # print some trace info about the options set log.log(utils_log.TRACE_LEVEL, '-' * 60) for option in dir(_options): if not option.startswith('_'): log.log(utils_log.TRACE_LEVEL, "%s=%s" % (option, getattr(_options, option))) log.log(utils_log.TRACE_LEVEL, '-' * 60) # start of url to invoke url_service = _options.motu # parameters of the invoked service url_params = build_params(_options) url_config = get_url_config(_options) # check if question mark is in the url questionMark = '?' if url_service.endswith(questionMark): questionMark = '' url = url_service + questionMark + url_params if _options.describe is True: _options.out_name = _options.out_name.replace('.nc', '.xml') # set-up the socket timeout if any if _options.socket_timeout is not None: log.debug("Setting timeout %s" % _options.socket_timeout) socket.setdefaulttimeout(_options.socket_timeout) if _options.auth_mode == AUTHENTICATION_MODE_CAS: stopWatch.start('authentication') # perform authentication before acceding service download_url = utils_cas.authenticate_CAS_for_URL( url, _options.user, _options.pwd, **url_config) stopWatch.stop('authentication') else: # if none, we do nothing more, in basic, we let the url requester # doing the job download_url = url # create a file for storing downloaded stream fh = os.path.join(_options.out_dir, _options.out_name) try: # Synchronous mode if _options.sync is True: dl_2_file(download_url, fh, _options.block_size, **url_config) log.info("Done") # Asynchronous mode else: stopWatch.start('wait_request') requestUrl = get_requestUrl(download_url, url_service, **url_config) # asynchronous mode status = 0 dwurl = "" while True: if _options.auth_mode == AUTHENTICATION_MODE_CAS: stopWatch.start('authentication') # perform authentication before acceding service requestUrlCas = utils_cas.authenticate_CAS_for_URL( requestUrl, _options.user, _options.pwd, **url_config) stopWatch.stop('authentication') else: # if none, we do nothing more, in basic, we let the url # requester doing the job requestUrlCas = requestUrl m = utils_http.open_url(requestUrlCas, **url_config) dom = minidom.parseString(m.read()) for node in dom.getElementsByTagName('statusModeResponse'): status = node.getAttribute('status') dwurl = node.getAttribute('msg') # Check status if status == "0" or status == "3": # in progress/pending log.info('Product is not yet available ' '(request in process)') time.sleep(10) else: # finished (error|success) break stopWatch.stop('wait_request') if status == "2": log.error(dwurl) if status == "1": log.info('The product is ready for download') if dwurl != "": dl_2_file(dwurl, fh, _options.block_size, _options.describe, **url_config) log.info("Done") else: log.error("Couldn't retrieve file") except: try: if os.path.isfile(fh): os.remove(fh) except: pass raise finally: stopWatch.stop()
def dl_2_file(dl_url, fh, block_size=65535, describe='None', **options): """ Download the file with the main url (of Motu) file. Motu can return an error message in the response stream without setting an appropriate http error code. So, in that case, the content-type response is checked, and if it is text/plain, we consider this as an error. dl_url: the complete download url of Motu fh: file handler to use to write the downstream """ stopWatch = stop_watch.localThreadStopWatch() start_time = datetime.datetime.now() log.info("Downloading file (this can take a while)...") # download file temp = open(fh, 'w+b') try: stopWatch.start('processing') m = utils_http.open_url(dl_url, **options) try: # check the real url (after potential redirection) is not a CAS # Url scheme match = re.search(utils_cas.CAS_URL_PATTERN, m.url) if match is not None: service, _, _ = dl_url.partition('?') redirection, _, _ = m.url.partition('?') raise Exception( utils_messages.get_external_messages()[ 'motu-client.exception.authentication.redirected' ] % (service, redirection)) # check that content type is not text/plain headers = m.info() if "Content-Type" in headers: if len(headers['Content-Type']) > 0: if ((headers['Content-Type'].startswith('text') or headers['Content-Type'].find('html') != -1)): raise Exception( utils_messages.get_external_messages()[ 'motu-client.exception.motu.error' ] % m.read()) log.info('File type: %s' % headers['Content-Type']) # check if a content length (size of the file) has been send if "Content-Length" in headers: try: # it should be an integer size = int(headers["Content-Length"]) log.info('File size: %s (%i B)' % ( utils_unit.convert_bytes(size), size)) except Exception as e: size = -1 log.warn( 'File size is not an integer: %s' % headers[ "Content-Length"]) else: size = -1 log.warn('File size: %s' % 'unknown') processing_time = datetime.datetime.now() stopWatch.stop('processing') stopWatch.start('downloading') # performs the download log.info('Downloading file %s' % os.path.abspath(fh)) def progress_function(sizeRead): percent = sizeRead*100./size log.info("- %s (%.1f%%)", utils_unit.convert_bytes(size).rjust(8), percent) def none_function(sizeRead): percent = 100 log.info("- %s (%.1f%%)", utils_unit.convert_bytes(size).rjust(8), percent) read = utils_stream.copy(m, temp, progress_function if size != -1 else none_function, block_size) end_time = datetime.datetime.now() stopWatch.stop('downloading') log.info("Processing time : %s", str(processing_time - start_time)) log.info("Downloading time : %s", str(end_time - processing_time)) log.info("Total time : %s", str(end_time - start_time)) log.info("Download rate : %s/s", utils_unit.convert_bytes( (read / total_milliseconds(end_time - start_time)) * 10**3)) finally: m.close() finally: temp.flush() temp.close() # raise exception if actual size does not match content-length header if size >= 0 and read < size: raise Exception( utils_messages.get_external_messages()[ 'motu-client.exception.download.too-short' ] % (read, size))
def authenticate_CAS_for_URL(url, user, pwd, **url_config): """Performs a CAS authentication for the given URL service and returns the service url with the obtained credential. The following algorithm is done: 1) A connection is opened on the given URL 2) We check that the response is an HTTP redirection 3) Redirected URL contains the CAS address 4) We ask for a ticket for the given user and password 5) We ask for a service ticket for the given service 6) Then we return a new url with the ticket attached url: the url of the service to invoke user: the username pwd: the password""" log = logging.getLogger("utils_cas:authenticate_CAS_for_URL") server, _, _ = url.partition('?') log.info('Authenticating user %s for service %s', user, server) connexion = utils_http.open_url(url, **url_config) # connexion response code must be a redirection, else, there's an error # (user can't be already connected since no cookie or ticket was sent) if connexion.url == url: raise Exception( utils_messages.get_external_messages()[ 'motu-client.exception.authentication.not-redirected' ] % server) # find the cas url from the redirected url redirected_url = connexion.url m = re.search(CAS_URL_PATTERN, redirected_url) if m is None: raise Exception( utils_messages.get_external_messages()[ 'motu-client.exception.authentication.unfound-url' ] % redirected_url) url_cas = m.group(1) + '/v1/tickets' opts = utils_http.encode(utils_collection.ListMultimap(username=user, password=pwd)) utils_log.log_url(log, "login user into CAS:\t", url_cas+'?'+opts) url_config['data'] = opts connexion = utils_http.open_url(url_cas, **url_config) fp = utils_html.FounderParser() for line in connexion: log.log(utils_log.TRACE_LEVEL, 'utils_html.FounderParser() line: %s', line) fp.feed(line) tgt = fp.action_[fp.action_.rfind('/') + 1:] log.log(utils_log.TRACE_LEVEL, 'TGT: %s', tgt) # WARNING : don't use 'fp.action_' as url : it seems protocol is always # http never https use 'url_cas', extract TGT from 'fp.action_' , then # construct url_ticket. # url_ticket = fp.action_ url_ticket = url_cas + '/' + tgt if url_ticket is None: raise Exception( utils_messages.get_external_messages()[ 'motu-client.exception.authentication.tgt' ]) utils_log.log_url(log, "found url ticket:\t", url_ticket) opts = utils_http.encode( utils_collection.ListMultimap(service=urllib.quote_plus(url))) utils_log.log_url(log, 'Granting user for service\t', url_ticket + '?' + opts) url_config['data'] = opts ticket = utils_http.open_url(url_ticket, **url_config).readline() utils_log.log_url(log, "found service ticket:\t", ticket) # we append the download url with the ticket and return the result service_url = url + '&ticket=' + ticket utils_log.log_url(log, "service url is:\t", service_url) return service_url