def download(output_path, hostname, catalog=1, token=None, kwargs=None, config_file=None, credential_file=None): assert hostname, "A hostname is required!" server = dict() server["catalog_id"] = catalog if hostname.startswith("http"): url = urlparse(hostname) server["protocol"] = url.scheme server["host"] = url.netloc else: server["protocol"] = "https" server["host"] = hostname downloader = GenericDownloader(server, output_dir=output_path, kwargs=kwargs, config_file=config_file, credential_file=credential_file) if token: downloader.setCredentials(format_credential(token)) return downloader.download()
def host_to_url(host, path="/"): if not host: return None upr = urlparse(host) if upr.scheme and upr.netloc: url = urljoin(host, path) else: url = "https://%s%s" % (host, path if not host.endswith("/") else "") return url.lower()
def infer(self, limit=None, confidence=.75): """ Infer the current type by looking at the values in the table """ # Do initial infer tqo set up headers and schema. Table.infer(self) rows = self.read(cast=False) headers = self.headers # Get descriptor fields = [] type_matches = {} for header in headers: fields.append({'name': header}) rindex = 0 for rindex, row in enumerate(rows): if limit is not None and rindex == limit: break # build a column-wise lookup of type matches for cindex, value in enumerate(row): typeid = self.__get_type(value, type_matches.get(cindex, None)) type_matches[cindex] = typeid self.row_count = rindex url_type = type(urlparse('foo')) for index, results in type_matches.items(): type_name, type_format = None, 'default' if results is bool: type_name = 'boolean' elif results is int: type_name = 'integer' elif results is float: type_name = 'number' elif results is str: type_name = 'string' elif results is datetime.datetime: type_name = 'datetime' type_format = 'any' elif results is url_type: type_name = 'string' type_format = 'uri' else: raise DerivaCSVError(msg='Bad type in infer') fields[index].update({'type': type_name, 'format': type_format}) # Now update the schema to have the inferred values. self.schema.descriptor['fields'] = fields # Reset the key constraints as they were blasted away by the infer. self.__set_key_constraints() self.schema.commit() return
def download(cls, args): assert args.host, "A hostname is required!" server = dict() server["catalog_id"] = args.catalog if args.host.startswith("http"): url = urlparse(args.host) server["protocol"] = url.scheme server["host"] = url.netloc else: server["protocol"] = "https" server["host"] = args.host downloader = cls.get_downloader(server, **vars(args)) downloader.set_dcctx_cid(downloader.__class__.__name__) return downloader.download()
def __get_type(val, prev_type): # Skip over empty cells or if you have already gotten to string type. if val == '' or prev_type is str: next_type = prev_type else: # Deal with booleans so you don't confuse with strings. if val.upper() == 'TRUE': val = True elif val.upper() == 'FALSE': val = False # Now see if you can turn into python numeric type... try: v = ast.literal_eval(val) except SyntaxError: v = val except ValueError: v = val val_type = type(v) if val_type is str: try: dateutil.parser.parse(v, ignoretz=True) val_type = datetime.datetime except ValueError: pass if val_type is str: url_result = urlparse(v) if url_result.scheme != '' and url_result.netloc != '': val_type = type(url_result) next_type = val_type # Do promotion/demotion. if prev_type is not None: # Float overrides integer. if (val_type == float and prev_type == int) or (val_type == int and prev_type == float): next_type = float elif val_type != prev_type: # Types are different, so pick text next_type = str return next_type
def upload(uploader, data_path, hostname, catalog=1, token=None, config_file=None, credential_file=None, no_update=False, purge=False): if not issubclass(uploader, DerivaUpload): raise TypeError("DerivaUpload subclass required") assert hostname server = dict() server["catalog_id"] = catalog if hostname.startswith("http"): url = urlparse(hostname) server["protocol"] = url.scheme server["host"] = url.netloc else: server["protocol"] = "https" server["host"] = hostname deriva_uploader = uploader(config_file, credential_file, server) deriva_uploader.set_dcctx_cid(deriva_uploader.__class__.__name__) if token: deriva_uploader.setCredentials(format_credential(token)) if not config_file and not no_update: config = deriva_uploader.getUpdatedConfig() if config: write_config(deriva_uploader.getDeployedConfigFilePath(), config) if not deriva_uploader.isVersionCompatible(): raise RuntimeError("Version incompatibility detected", "Current version: [%s], required version(s): %s." % ( deriva_uploader.getVersion(), deriva_uploader.getVersionCompatibility())) deriva_uploader.scanDirectory(data_path, abort_on_invalid_input=False, purge_state=purge) deriva_uploader.uploadFiles(file_callback=deriva_uploader.defaultFileCallback) deriva_uploader.cleanup()
def export(config=None, base_dir=None, service_url=None, public=False, files_only=False, quiet=False, propagate_logs=True, require_authentication=True, allow_anonymous_download=False, max_payload_size_mb=None, timeout=None, dcctx_cid="export/unknown", request_ip="ip-unknown"): log_handler = configure_logging(logging.WARN if quiet else logging.INFO, log_path=os.path.abspath( os.path.join(base_dir, '.log')), propagate=propagate_logs) try: if not config: raise BadRequest("No configuration specified.") server = dict() try: # parse host/catalog params catalog_config = config["catalog"] host = catalog_config["host"] if host.startswith("http"): url = urlparse(host) server["protocol"] = url.scheme server["host"] = url.netloc else: server["protocol"] = "https" server["host"] = host server["catalog_id"] = catalog_config.get('catalog_id', "1") # parse credential params, if found in the request payload (unlikely) token = catalog_config.get("token", None) oauth2_token = catalog_config.get("oauth2_token", None) username = catalog_config.get("username", "anonymous") password = catalog_config.get("password", None) # sanity-check some bag params if "bag" in config: if files_only: del config["bag"] else: if not config["bag"].get("bag_archiver"): config["bag"]["bag_archiver"] = "zip" except (KeyError, AttributeError) as e: raise BadRequest('Error parsing configuration: %s' % format_exception(e)) credentials = None session = get_new_requests_session() try: if token: auth_url = ''.join([ server["protocol"], "://", server["host"], "/authn/session" ]) session.cookies.set("webauthn", token, domain=server["host"], path='/') response = session.get(auth_url) response.raise_for_status() if not oauth2_token: oauth2_token = get_bearer_token( web.ctx.env.get('HTTP_AUTHORIZATION')) credentials = format_credential( token=token if token else web.cookies().get("webauthn"), oauth2_token=oauth2_token, username=username, password=password) except (ValueError, HTTPError) as e: if require_authentication: raise Unauthorized(format_exception(e)) finally: if session: session.close() del session wallet = None identity = get_client_identity() if identity: try: wallet = get_client_wallet() except (KeyError, AttributeError) as e: raise BadRequest(format_exception(e)) if require_authentication and not (identity and wallet): raise Unauthorized() user_id = username if not identity else identity.get( 'display_name', identity.get('id')) create_access_descriptor( base_dir, identity=None if not identity else identity.get('id'), public=public or not require_authentication) try: sys_logger.info("Creating export at [%s] on behalf of %s at %s" % (base_dir, user_id, request_ip)) envars = {"request_ip": request_ip} if service_url: envars.update({GenericDownloader.SERVICE_URL_KEY: service_url}) downloader = GenericDownloader( server=server, output_dir=base_dir, envars=envars, config=config, credentials=credentials, allow_anonymous=allow_anonymous_download, max_payload_size_mb=max_payload_size_mb, timeout=timeout, dcctx_cid=dcctx_cid) return downloader.download(identity=identity, wallet=wallet) except DerivaDownloadAuthenticationError as e: raise Unauthorized(format_exception(e)) except DerivaDownloadAuthorizationError as e: raise Forbidden(format_exception(e)) except DerivaDownloadConfigurationError as e: raise Conflict(format_exception(e)) except Exception as e: raise BadGateway(format_exception(e)) finally: if log_handler: logger.removeHandler(log_handler)
def export(config=None, base_dir=None, quiet=False, files_only=False): log_handler = configure_logging(logging.WARN if quiet else logging.INFO, log_path=os.path.abspath( os.path.join(base_dir, '.log'))) try: if not config: raise BadRequest("No configuration specified.") server = dict() try: # parse host/catalog params catalog_config = config["catalog"] host = catalog_config["host"] if host.startswith("http"): url = urlparse(host) server["protocol"] = url.scheme server["host"] = url.netloc else: server["protocol"] = "https" server["host"] = host server["catalog_id"] = catalog_config.get('catalog_id', "1") # parse credential params token = catalog_config.get("token", None) username = catalog_config.get("username", "Anonymous") password = catalog_config.get("password", None) # sanity-check some bag params if "bag" in config: if files_only: del config["bag"] else: if not config["bag"].get("bag_archiver"): config["bag"]["bag_archiver"] = "zip" except (KeyError, AttributeError) as e: raise BadRequest('Error parsing configuration: %s' % format_exception(e)) try: auth_token = token if token else web.cookies().get("webauthn") credentials = format_credential(token=auth_token, username=username, password=password) except ValueError as e: raise Unauthorized(format_exception(e)) try: identity = get_client_identity() user_id = username if not identity else identity.get( 'display_name', identity.get('id')) create_access_descriptor( base_dir, identity=username if not identity else identity.get('id')) wallet = get_client_wallet() except (KeyError, AttributeError) as e: raise BadRequest(format_exception(e)) try: sys_logger.info("Creating export at [%s] on behalf of user: %s" % (base_dir, user_id)) downloader = GenericDownloader(server, output_dir=base_dir, config=config, credentials=credentials) return downloader.download(identity=identity, wallet=wallet) except DerivaDownloadAuthenticationError as e: raise Unauthorized(format_exception(e)) except DerivaDownloadAuthorizationError as e: raise Forbidden(format_exception(e)) except DerivaDownloadConfigurationError as e: raise Conflict(format_exception(e)) except Exception as e: raise BadGateway(format_exception(e)) finally: logger.removeHandler(log_handler)