Exemplo n.º 1
0
    def download(output_path,
                 hostname,
                 catalog=1,
                 token=None,
                 kwargs=None,
                 config_file=None,
                 credential_file=None):

        assert hostname, "A hostname is required!"
        server = dict()
        server["catalog_id"] = catalog
        if hostname.startswith("http"):
            url = urlparse(hostname)
            server["protocol"] = url.scheme
            server["host"] = url.netloc
        else:
            server["protocol"] = "https"
            server["host"] = hostname

        downloader = GenericDownloader(server,
                                       output_dir=output_path,
                                       kwargs=kwargs,
                                       config_file=config_file,
                                       credential_file=credential_file)
        if token:
            downloader.setCredentials(format_credential(token))

        return downloader.download()
Exemplo n.º 2
0
 def host_to_url(host, path="/"):
     if not host:
         return None
     upr = urlparse(host)
     if upr.scheme and upr.netloc:
         url = urljoin(host, path)
     else:
         url = "https://%s%s" % (host,
                                 path if not host.endswith("/") else "")
     return url.lower()
    def infer(self, limit=None, confidence=.75):
        """
        Infer the current type by looking at the values in the table
         """
        # Do initial infer tqo set up headers and schema.
        Table.infer(self)

        rows = self.read(cast=False)
        headers = self.headers
        # Get descriptor
        fields = []
        type_matches = {}
        for header in headers:
            fields.append({'name': header})

        rindex = 0
        for rindex, row in enumerate(rows):
            if limit is not None and rindex == limit:
                break
            # build a column-wise lookup of type matches
            for cindex, value in enumerate(row):
                typeid = self.__get_type(value, type_matches.get(cindex, None))
                type_matches[cindex] = typeid
        self.row_count = rindex
        url_type = type(urlparse('foo'))

        for index, results in type_matches.items():
            type_name, type_format = None, 'default'
            if results is bool:
                type_name = 'boolean'
            elif results is int:
                type_name = 'integer'
            elif results is float:
                type_name = 'number'
            elif results is str:
                type_name = 'string'
            elif results is datetime.datetime:
                type_name = 'datetime'
                type_format = 'any'
            elif results is url_type:
                type_name = 'string'
                type_format = 'uri'
            else:
                raise DerivaCSVError(msg='Bad type in infer')

            fields[index].update({'type': type_name, 'format': type_format})
        # Now update the schema to have the inferred values.
        self.schema.descriptor['fields'] = fields

        # Reset the key constraints as they were blasted away by the infer.
        self.__set_key_constraints()
        self.schema.commit()
        return
Exemplo n.º 4
0
    def download(cls, args):

        assert args.host, "A hostname is required!"
        server = dict()
        server["catalog_id"] = args.catalog
        if args.host.startswith("http"):
            url = urlparse(args.host)
            server["protocol"] = url.scheme
            server["host"] = url.netloc
        else:
            server["protocol"] = "https"
            server["host"] = args.host

        downloader = cls.get_downloader(server, **vars(args))
        downloader.set_dcctx_cid(downloader.__class__.__name__)

        return downloader.download()
    def __get_type(val, prev_type):
        # Skip over empty cells or if you have already gotten to string type.
        if val == '' or prev_type is str:
            next_type = prev_type
        else:
            # Deal with booleans so you don't confuse with strings.
            if val.upper() == 'TRUE':
                val = True
            elif val.upper() == 'FALSE':
                val = False

            # Now see if you can turn into python numeric type...
            try:
                v = ast.literal_eval(val)
            except SyntaxError:
                v = val
            except ValueError:
                v = val
            val_type = type(v)

            if val_type is str:
                try:
                    dateutil.parser.parse(v, ignoretz=True)
                    val_type = datetime.datetime
                except ValueError:
                    pass

            if val_type is str:
                url_result = urlparse(v)
                if url_result.scheme != '' and url_result.netloc != '':
                    val_type = type(url_result)

            next_type = val_type

            # Do promotion/demotion.
            if prev_type is not None:
                # Float overrides integer.
                if (val_type == float
                        and prev_type == int) or (val_type == int
                                                  and prev_type == float):
                    next_type = float
                elif val_type != prev_type:  # Types are different, so pick text
                    next_type = str

        return next_type
Exemplo n.º 6
0
    def upload(uploader,
               data_path,
               hostname,
               catalog=1,
               token=None,
               config_file=None,
               credential_file=None,
               no_update=False,
               purge=False):

        if not issubclass(uploader, DerivaUpload):
            raise TypeError("DerivaUpload subclass required")

        assert hostname
        server = dict()
        server["catalog_id"] = catalog
        if hostname.startswith("http"):
            url = urlparse(hostname)
            server["protocol"] = url.scheme
            server["host"] = url.netloc
        else:
            server["protocol"] = "https"
            server["host"] = hostname

        deriva_uploader = uploader(config_file, credential_file, server)
        deriva_uploader.set_dcctx_cid(deriva_uploader.__class__.__name__)
        if token:
            deriva_uploader.setCredentials(format_credential(token))
        if not config_file and not no_update:
            config = deriva_uploader.getUpdatedConfig()
            if config:
                write_config(deriva_uploader.getDeployedConfigFilePath(), config)
        if not deriva_uploader.isVersionCompatible():
            raise RuntimeError("Version incompatibility detected", "Current version: [%s], required version(s): %s." % (
                deriva_uploader.getVersion(), deriva_uploader.getVersionCompatibility()))
        deriva_uploader.scanDirectory(data_path, abort_on_invalid_input=False, purge_state=purge)
        deriva_uploader.uploadFiles(file_callback=deriva_uploader.defaultFileCallback)
        deriva_uploader.cleanup()
Exemplo n.º 7
0
def export(config=None,
           base_dir=None,
           service_url=None,
           public=False,
           files_only=False,
           quiet=False,
           propagate_logs=True,
           require_authentication=True,
           allow_anonymous_download=False,
           max_payload_size_mb=None,
           timeout=None,
           dcctx_cid="export/unknown",
           request_ip="ip-unknown"):

    log_handler = configure_logging(logging.WARN if quiet else logging.INFO,
                                    log_path=os.path.abspath(
                                        os.path.join(base_dir, '.log')),
                                    propagate=propagate_logs)
    try:
        if not config:
            raise BadRequest("No configuration specified.")
        server = dict()
        try:
            # parse host/catalog params
            catalog_config = config["catalog"]
            host = catalog_config["host"]
            if host.startswith("http"):
                url = urlparse(host)
                server["protocol"] = url.scheme
                server["host"] = url.netloc
            else:
                server["protocol"] = "https"
                server["host"] = host
            server["catalog_id"] = catalog_config.get('catalog_id', "1")

            # parse credential params, if found in the request payload (unlikely)
            token = catalog_config.get("token", None)
            oauth2_token = catalog_config.get("oauth2_token", None)
            username = catalog_config.get("username", "anonymous")
            password = catalog_config.get("password", None)

            # sanity-check some bag params
            if "bag" in config:
                if files_only:
                    del config["bag"]
                else:
                    if not config["bag"].get("bag_archiver"):
                        config["bag"]["bag_archiver"] = "zip"

        except (KeyError, AttributeError) as e:
            raise BadRequest('Error parsing configuration: %s' %
                             format_exception(e))

        credentials = None
        session = get_new_requests_session()
        try:
            if token:
                auth_url = ''.join([
                    server["protocol"], "://", server["host"], "/authn/session"
                ])
                session.cookies.set("webauthn",
                                    token,
                                    domain=server["host"],
                                    path='/')
                response = session.get(auth_url)
                response.raise_for_status()
            if not oauth2_token:
                oauth2_token = get_bearer_token(
                    web.ctx.env.get('HTTP_AUTHORIZATION'))
            credentials = format_credential(
                token=token if token else web.cookies().get("webauthn"),
                oauth2_token=oauth2_token,
                username=username,
                password=password)
        except (ValueError, HTTPError) as e:
            if require_authentication:
                raise Unauthorized(format_exception(e))
        finally:
            if session:
                session.close()
                del session

        wallet = None
        identity = get_client_identity()
        if identity:
            try:
                wallet = get_client_wallet()
            except (KeyError, AttributeError) as e:
                raise BadRequest(format_exception(e))
            if require_authentication and not (identity and wallet):
                raise Unauthorized()

        user_id = username if not identity else identity.get(
            'display_name', identity.get('id'))
        create_access_descriptor(
            base_dir,
            identity=None if not identity else identity.get('id'),
            public=public or not require_authentication)
        try:
            sys_logger.info("Creating export at [%s] on behalf of %s at %s" %
                            (base_dir, user_id, request_ip))
            envars = {"request_ip": request_ip}
            if service_url:
                envars.update({GenericDownloader.SERVICE_URL_KEY: service_url})
            downloader = GenericDownloader(
                server=server,
                output_dir=base_dir,
                envars=envars,
                config=config,
                credentials=credentials,
                allow_anonymous=allow_anonymous_download,
                max_payload_size_mb=max_payload_size_mb,
                timeout=timeout,
                dcctx_cid=dcctx_cid)
            return downloader.download(identity=identity, wallet=wallet)
        except DerivaDownloadAuthenticationError as e:
            raise Unauthorized(format_exception(e))
        except DerivaDownloadAuthorizationError as e:
            raise Forbidden(format_exception(e))
        except DerivaDownloadConfigurationError as e:
            raise Conflict(format_exception(e))
        except Exception as e:
            raise BadGateway(format_exception(e))

    finally:
        if log_handler:
            logger.removeHandler(log_handler)
Exemplo n.º 8
0
def export(config=None, base_dir=None, quiet=False, files_only=False):

    log_handler = configure_logging(logging.WARN if quiet else logging.INFO,
                                    log_path=os.path.abspath(
                                        os.path.join(base_dir, '.log')))
    try:
        if not config:
            raise BadRequest("No configuration specified.")
        server = dict()
        try:
            # parse host/catalog params
            catalog_config = config["catalog"]
            host = catalog_config["host"]
            if host.startswith("http"):
                url = urlparse(host)
                server["protocol"] = url.scheme
                server["host"] = url.netloc
            else:
                server["protocol"] = "https"
                server["host"] = host
            server["catalog_id"] = catalog_config.get('catalog_id', "1")

            # parse credential params
            token = catalog_config.get("token", None)
            username = catalog_config.get("username", "Anonymous")
            password = catalog_config.get("password", None)

            # sanity-check some bag params
            if "bag" in config:
                if files_only:
                    del config["bag"]
                else:
                    if not config["bag"].get("bag_archiver"):
                        config["bag"]["bag_archiver"] = "zip"

        except (KeyError, AttributeError) as e:
            raise BadRequest('Error parsing configuration: %s' %
                             format_exception(e))

        try:
            auth_token = token if token else web.cookies().get("webauthn")
            credentials = format_credential(token=auth_token,
                                            username=username,
                                            password=password)
        except ValueError as e:
            raise Unauthorized(format_exception(e))

        try:
            identity = get_client_identity()
            user_id = username if not identity else identity.get(
                'display_name', identity.get('id'))
            create_access_descriptor(
                base_dir,
                identity=username if not identity else identity.get('id'))
            wallet = get_client_wallet()
        except (KeyError, AttributeError) as e:
            raise BadRequest(format_exception(e))

        try:
            sys_logger.info("Creating export at [%s] on behalf of user: %s" %
                            (base_dir, user_id))
            downloader = GenericDownloader(server,
                                           output_dir=base_dir,
                                           config=config,
                                           credentials=credentials)
            return downloader.download(identity=identity, wallet=wallet)
        except DerivaDownloadAuthenticationError as e:
            raise Unauthorized(format_exception(e))
        except DerivaDownloadAuthorizationError as e:
            raise Forbidden(format_exception(e))
        except DerivaDownloadConfigurationError as e:
            raise Conflict(format_exception(e))
        except Exception as e:
            raise BadGateway(format_exception(e))

    finally:
        logger.removeHandler(log_handler)