Пример #1
0
    def bundle_ident(self):
        if not self._bundle:
            raise ConfigurationError(
                "Must assign bundle or bundle_dir to repostitory before this operation"
            )

        return self._bundle.identity
Пример #2
0
    def install_packages(self, module_name, pip_name, force=False):
        from ambry.util.packages import install

        python_dir = self._fs.python()

        if not python_dir:
            raise ConfigurationError(
                "Can't install python requirements without a configuration item for filesystems.python"
            )

        if not os.path.exists(python_dir):
            os.makedirs(python_dir)

        sys.path.append(python_dir)

        if force:
            self.logger.info('Upgrading required package: {}->{}'.format(
                module_name, pip_name))
            install(python_dir, module_name, pip_name)
        else:
            try:
                imp.find_module(module_name)
                return  # self.log("Required package already installed: {}->{}".format(module_name, pip_name))
            except ImportError:
                self.logger.info('Installing required package: {}->{}'.format(
                    module_name, pip_name))
                install(python_dir, module_name, pip_name)
Пример #3
0
    def _expand_each(self, each):
        """Generate a set of dicts from the cross product of each of the arrays
        of 'each' group."""

        # Normalize the each group, particular for the case where there is only
        # one dimension

        if not isinstance(each, list):
            raise ConfigurationError(
                "The 'each' key must have a list. Got a {} ".format(
                    type(each)))

        elif len(each) == 0:
            each = [[{}]]
        if not isinstance(each[0], list):
            each = [each]

        # Now the top level arrays of each are dimensions, and we can do a
        # multi dimensional iteration over them.
        # This is essentially a cross-product, where out <- out X dim(i)

        out = []
        for i, dim in enumerate(each):
            if i == 0:
                out = dim
            else:
                o2 = []
                for i in dim:
                    for j in out:
                        o2.append(dict(i.items() + j.items()))
                out = o2

        return out
Пример #4
0
    def _get_dependencies(self):

        if not self._bundle:
            raise ConfigurationError("Can't use the dep() method for a library that is not attached to a bundle");

        errors = 0

        deps = self._bundle.metadata.dependencies

        if not deps:
            return {}

        out = {}
        for k, v in deps.items():

            ident = self.resolve(v)
            if not ident:
                raise DependencyError("Failed to resolve {} ".format(v))

            if ident.partition:
                out[k] = ident.partition
            else:
                out[k] = ident



        return out
Пример #5
0
def calling_code(f, f_name=None, raise_for_missing=True):
    """Return the code string for calling a function. """
    import inspect
    from ambry.dbexceptions import ConfigurationError

    if inspect.isclass(f):
        try:
            args = inspect.getargspec(f.__init__).args
        except TypeError as e:
            raise TypeError("Failed to inspect {}: {}".format(f, e))

    else:
        args = inspect.getargspec(f).args

    if len(args) > 1 and args[0] == 'self':
        args = args[1:]

    for a in args:
        if a not in all_args + (
                'exception', ):  # exception arg is only for exception handlers
            if raise_for_missing:
                raise ConfigurationError(
                    'Caster code {} has unknown argument '
                    'name: \'{}\'. Must be one of: {} '.format(
                        f, a, ','.join(all_args)))

    arg_map = {e: e for e in var_args}

    args = [arg_map.get(a, a) for a in args]

    return "{}({})".format(f_name if f_name else f.__name__, ','.join(args))
Пример #6
0
def new_service(config):

    if config['service'] == 'github':
        from github import GitHubService  # @UnresolvedImport

        return GitHubService(**config)
    else:
        from ambry.dbexceptions import ConfigurationError
        raise ConfigurationError('No source service for name {}'.format(name))
Пример #7
0
            def resolve_data_type(value_type):
                from ambry.valuetype import resolve_value_type
                vt_class = resolve_value_type(value_type)

                if not vt_class:
                    raise ConfigurationError(
                        "Row error: unknown valuetype '{}'".format(value_type))

                return vt_class.python_type().__name__
Пример #8
0
    def bundle_dir(self):
        if not self._bundle and not self._bundle_dir:
            raise ConfigurationError(
                "Must assign bundle or bundle_dir to repostitory before this operation"
            )

        if self._bundle_dir:
            return self._bundle_dir
        else:
            return self.bundle.bundle_dir
Пример #9
0
def config_edit(args, l, rc):
    from ambry.dbexceptions import ConfigurationError
    from ambry.util import AttrDict

    edit_args = ' '.join(args.args)

    if args.yaml or args.json:
        if args.yaml:
            import yaml
            v = yaml.load(edit_args)
        elif args.json:
            import json
            v = json.loads(edit_args)

        d = AttrDict()
        d.update(v)

        print d

        rc.config.update_flat(d.flatten())

    else:
        key, value = edit_args.split('=')

        value = value.strip()
        key = key.strip()
        key_parts = key.split('.')
        e = rc.config
        for k in key_parts:
            k = k.strip()
            #print(k, str(key_parts[-1]))
            if str(k) == str(key_parts[-1]):
                e[k] = value
            else:
                e = e[k]


    configs = rc.config['loaded']['configs']

    if len(configs) != 1:
        raise ConfigurationError("Configuration was loaded from multiple files; don't know which to edit; "
                                 "'{}'".format(configs))

    try:
        del rc.config['accounts']
    except KeyError:
        pass

    try:
        del rc.config['loaded']
    except KeyError:
        pass

    with open(configs[0], 'w') as f:
        rc.config.dump(f)
Пример #10
0
 def usgeo(self):
     try:
         usgeo = self.library.dep('usgeo')
     except ConfigurationError:
         raise ConfigurationError(
             "MISSING DEPENDENCY: " +
             "To use the US geo datasets, the bundle ( or library  ) must specify a"
             +
             " dependency with a set named 'usgeo', in build.dependencies.usgeo"
         )
     return usgeo
Пример #11
0
    def widths(self):
        widths = [c.width for c in self.columns]
        if not all(bool(e) for e in widths):
            from ambry.dbexceptions import ConfigurationError
            raise ConfigurationError(
                'The widths array for source table {} has zero or null entries '
                .format(self.name))

        widths = [int(w) for w in widths]

        return widths
Пример #12
0
    def __init__(self, cache, database,
                 name=None, remotes=None,
                 source_dir = None,
                 require_upload=False,
                 doc_cache = None,
                 warehouse_cache = None,
                 host=None, port=None, urlhost = None):

        '''Libraries are constructed on the root cache name for the library.
        If the cache does not exist, it will be created.

        Args:

        cache: a path name to a directory where bundle files will be stored
        database:
        remote: URL of a remote library, for fallback for get and put.
        sync: If true, put to remote synchronously. Defaults to False.

        '''
        from ..util import get_logger

        assert database is not None

        self.name = name
        self.cache = cache
        self._doc_cache = doc_cache
        self._warehouse_cache = warehouse_cache
        self.source_dir = source_dir

        self._database = database
        self._bundle = None # Set externally in bundle.library()
        self.host = host
        self.port = port
        self.urlhost = urlhost if urlhost else ( '{}:{}'.format(self.host,self.port) if self.port else self.host)
        self.dep_cb = None# Callback for dependency resolution
        self.require_upload = require_upload
        self._dependencies = None
        self._remotes = remotes

        self._all_vids = None

        if not self.cache:
            raise ConfigurationError("Must specify library.cache for the "
                                     "library in bundles.yaml")

        self.logger = get_logger(__name__)
        self.logger.setLevel(logging.DEBUG)

        self.needs_update = False

        self.bundles = weakref.WeakValueDictionary()

        self._search = None
Пример #13
0
 def _places(self):
     try:
         places = self.library.dep('places').partition
     except ConfigurationError:
         raise ConfigurationError(
             "MISSING DEPENDENCY: " +
             "To use the US county datasets, the bundle ( or library  ) must specify a"
             +
             " dependency with a set named 'places', in build.dependencies.places. "
             +
             " See https://github.com/clarinova/ambry/wiki/Error-Messages#geoanalysisareasget_analysis_area"
         )
     return places
Пример #14
0
def new_repository(config):

    from ..service import new_service, GitServiceMarker  # @UnresolvedImport

    if not 'account' in config:
        config['account'] = {'user': None, 'password': None}

    service_config = config['account']
    service_config.update(config)

    service = new_service(service_config)

    if isinstance(service, GitServiceMarker):
        from .git import GitRepository  # @UnresolvedImport

        return GitRepository(service=service, dir=config['dir'])
    else:
        from ambry.dbexceptions import ConfigurationError
        raise ConfigurationError('Unknown {}'.format(type(service)))
Пример #15
0
    def include(self, node):
        if not self.dir:
            return "ConfigurationError: Can't include file: wasn't able to set base directory"

        relpath = self.construct_scalar(node)
        abspath = os.path.join(self.dir, relpath)

        if not os.path.exists(abspath):
            raise ConfigurationError(
                "Can't include file '{}': Does not exist".format(abspath))

        with open(abspath, 'r') as f:

            parts = abspath.split('.')
            ext = parts.pop()

            if ext == 'yaml':
                return yaml.load(f, OrderedDictYAMLLoader)
            else:
                return IncludeFile(abspath, relpath, f.read())
Пример #16
0
    def record_to_objects(self):
        """Write from the stored file data to the source records"""
        from ambry.orm import SourceTable

        bsfile = self.record

        failures = set()

        # Clear out all of the columns from existing tables. We don't clear out the
        # tables, since they may be referenced by sources

        for row in bsfile.dict_row_reader:
            st = self._dataset.source_table(row['table'])

            if st:
                st.columns[:] = []

        self._dataset.commit()

        for row in bsfile.dict_row_reader:
            st = self._dataset.source_table(row['table'])

            if not st:
                st = self._dataset.new_source_table(row['table'])
                # table_number += 1

            if 'datatype' not in row:
                row['datatype'] = 'unknown'

            del row['table']

            st.add_column(**row)  # Create or update

        if failures:
            raise ConfigurationError(
                'Failed to load source schema, missing sources: {} '.format(
                    failures))

        self._dataset.commit()
Пример #17
0
    def _compose(self, name, args, mkdir=True):
        """Get a named filesystem entry, and extend it into a path with additional
        path arguments"""
        from os.path import normpath
        from ambry.dbexceptions import ConfigurationError

        root = p = self._config.filesystem[name].format(root=self._root)

        if args:
            args = [e.strip() for e in args]
            p = join(p, *args)

        if not isdir(p) and mkdir:
            makedirs(p)

        p = normpath(p)

        if not p.startswith(root):
            raise ConfigurationError(
                "Path for name='{}', args={} resolved outside of define filesystem root"
                .format(name, args))

        return p
Пример #18
0
    def set_api(self):
        import ambry.client.ckan
        repo_group = self.bundle.config.group('datarepo')

        if not repo_group.get(self.repo_name):
            raise ConfigurationError(
                "'repository' group in configure either nonexistent" +
                " or missing {} sub-group ".format(self.repo_name))

        repo_config = repo_group.get(self.repo_name)

        self._api = ambry.client.ckan.Ckan(repo_config.url, repo_config.key)

        # Look for an S3 filestore

        fs_config = repo_config.get('filestore', False)

        if fs_config is not False:
            raise Exception("Deprecated?")
        else:
            self.filestore = None

        return self.remote
Пример #19
0
    def accounts(self):
        """
        Return an account reference
        :param account_id:
        :param accounts_password: The password for decrypting the secret
        :return:
        """
        d = {}

        if False and not self._account_password:
            from ambry.dbexceptions import ConfigurationError
            raise ConfigurationError(
                "Can't access accounts without setting an account password"
                " either in the accounts.password config, or in the AMBRY_ACCOUNT_PASSWORD"
                " env var.")

        for act in self.database.session.query(Account).all():
            if self._account_password:
                act.secret_password = self._account_password
            e = act.dict
            a_id = e['account_id']
            d[a_id] = e

        return d
Пример #20
0
    def number(self, assignment_class=None, namespace='d'):
        """
        Return a new number.

        :param assignment_class: Determines the length of the number. Possible values are 'authority' (3 characters) ,
            'registered' (5) , 'unregistered' (7)  and 'self' (9). Self assigned numbers are random and acquired locally,
            while the other assignment classes use the number server defined in the configuration. If None,
            then look in the number server configuration for one of the class keys, starting
            with the longest class and working to the shortest.
        :param namespace: The namespace character, the first character in the number. Can be one of 'd', 'x' or 'b'
        :return:
        """
        if assignment_class == 'self':
            # When 'self' is explicit, don't look for number server config
            return str(DatasetNumber())

        elif assignment_class is None:

            try:
                nsconfig = self.services['numbers']

            except ConfigurationError:
                # A missing configuration is equivalent to 'self'
                self.logger.error(
                    'No number server configuration; returning self assigned number'
                )
                return str(DatasetNumber())

            for assignment_class in ('self', 'unregistered', 'registered',
                                     'authority'):
                if assignment_class + '-key' in nsconfig:
                    break

            # For the case where the number configuratoin references a self-assigned key
            if assignment_class == 'self':
                return str(DatasetNumber())

        else:
            try:
                nsconfig = self.services['numbers']

            except ConfigurationError:
                raise ConfigurationError('No number server configuration')

            if assignment_class + '-key' not in nsconfig:
                raise ConfigurationError(
                    'Assignment class {} not number server config'.format(
                        assignment_class))

        try:

            key = nsconfig[assignment_class + '-key']
            config = {
                'key': key,
                'host': nsconfig['host'],
                'port': nsconfig.get('port', 80)
            }

            ns = NumberServer(**config)

            n = str(next(ns))
            self.logger.info('Got number from number server: {}'.format(n))

        except HTTPError as e:
            self.logger.error(
                'Failed to get number from number server for key: {}'.format(
                    key, e.message))
            self.logger.error(
                'Using self-generated number. There is no problem with this, '
                'but they are longer than centrally generated numbers.')
            n = str(DatasetNumber())

        return n
Пример #21
0
    def record_to_objects(self):
        """Create config records to match the file metadata"""
        from ambry.orm import Column, Table, Dataset

        def _clean_int(i):
            if i is None:
                return None
            elif isinstance(i, int):
                return i
            elif isinstance(i, string_types):
                if len(i) == 0:
                    return None

                return int(i.strip())

        bsfile = self.record

        contents = bsfile.unpacked_contents

        if not contents:
            return

        line_no = 1  # Accounts for file header. Data starts on line 2

        errors = []
        warnings = []

        extant_tables = {t.name: t for t in self._dataset.tables}

        old_types_map = {
            'varchar': Column.DATATYPE_STR,
            'integer': Column.DATATYPE_INTEGER,
            'real': Column.DATATYPE_FLOAT,
        }

        def run_progress_f(line_no):
            self._bundle.log(
                'Loading tables from file. Line #{}'.format(line_no))

        from ambry.bundle.process import CallInterval
        run_progress_f = CallInterval(run_progress_f, 10)

        table_number = self._dataset._database.next_sequence_id(
            Dataset, self._dataset.vid, Table)
        for row in bsfile.dict_row_reader:

            line_no += 1

            run_progress_f(line_no)

            # Skip blank lines
            if not row.get('column', False) and not row.get('table', False):
                continue

            if not row.get('column', False):
                raise ConfigurationError(
                    'Row error: no column on line {}'.format(line_no))

            if not row.get('table', False):
                raise ConfigurationError(
                    'Row error: no table on line {}'.format(line_no))

            if not row.get('datatype', False) and not row.get(
                    'valuetype', False):
                raise ConfigurationError(
                    'Row error: no type on line {}'.format(line_no))

            value_type = row.get('valuetype', '').strip() if row.get(
                'valuetype', False) else None
            data_type = row.get('datatype', '').strip() if row.get(
                'datatype', False) else None

            def resolve_data_type(value_type):
                from ambry.valuetype import resolve_value_type
                vt_class = resolve_value_type(value_type)

                if not vt_class:
                    raise ConfigurationError(
                        "Row error: unknown valuetype '{}'".format(value_type))

                return vt_class.python_type().__name__

            # If we have a value type field, and not the datatype,
            # the value type is as specified, and the data type is derived from it.
            if value_type and not data_type:
                data_type = resolve_data_type(value_type)

            elif data_type and not value_type:
                value_type = data_type
                data_type = resolve_data_type(value_type)

            # There are still some old data types hanging around
            data_type = old_types_map.get(data_type.lower(), data_type)

            table_name = row['table']

            try:
                table = extant_tables[table_name]
            except KeyError:
                table = self._dataset.new_table(
                    table_name,
                    sequence_id=table_number,
                    description=row.get('description')
                    if row['column'] == 'id' else '')

                table_number += 1
                extant_tables[table_name] = table

            data = {
                k.replace('d_', '', 1): v
                for k, v in list(row.items()) if k and k.startswith('d_') and v
            }

            if row['column'] == 'id':
                table.data.update(data)
                data = {}

            table.add_column(
                row['column'],
                fk_vid=row['is_fk'] if row.get('is_fk', False) else None,
                description=(row.get('description', '') or '').strip(),
                datatype=data_type,
                valuetype=value_type,
                parent=row.get('parent'),
                proto_vid=row.get('proto_vid'),
                size=_clean_int(row.get('size', None)),
                width=_clean_int(row.get('width', None)),
                data=data,
                keywords=row.get('keywords'),
                measure=row.get('measure'),
                transform=row.get('transform'),
                derivedfrom=row.get('derivedfrom'),
                units=row.get('units', None),
                universe=row.get('universe'),
                update_existing=True)

        self._dataset.t_sequence_id = table_number

        return warnings, errors
Пример #22
0
    def impl(self):
        if not self._impl:
            raise ConfigurationError(
                "Must assign bundle to repostitory before this operation")

        return self._impl
Пример #23
0
    def convert(self, table_name, progress_f=None):
        """Convert a spatialite geopartition to a regular arg
        by extracting the geometry and re-projecting it to WGS84

        :param config: a `RunConfig` object
        :rtype: a `LibraryDb` object

        :param config: a `RunConfig` object
        :rtype: a `LibraryDb` object

        """
        import subprocess
        import csv
        from ambry.orm import Column
        from ambry.dbexceptions import ConfigurationError

        #
        # Duplicate the geo arg table for the new arg
        # Then make the new arg
        #

        t = self.bundle.schema.add_table(table_name)

        ot = self.table

        for c in ot.columns:
            self.bundle.schema.add_column(t, c.name, datatype=c.datatype)

        #
        # Open a connection to spatialite and run the query to
        # extract CSV.
        #
        # It would be a lot more efficient to connect to the
        # Spatialite procss, attach the new database, the copt the
        # records in SQL.
        #

        try:
            subprocess.check_output('spatialite -version', shell=True)
        except:
            raise ConfigurationError(
                'Did not find spatialite on path. Install spatialite')

        # Check the type of geometry:
        p = subprocess.Popen(
            ('spatialite {file} "select GeometryType(geometry) FROM {table} LIMIT 1;"' .format(
                file=self.database.path,
                table=self.identity.table)),
            stdout=subprocess.PIPE,
            shell=True)

        out, _ = p.communicate()
        out = out.strip()

        if out == 'POINT':
            self.bundle.schema.add_column(
                t,
                '_db_lon',
                datatype=Column.DATATYPE_REAL)
            self.bundle.schema.add_column(
                t,
                '_db_lat',
                datatype=Column.DATATYPE_REAL)

            command_template = """spatialite -csv -header {file} "select *,
            X(Transform(geometry, 4326)) AS _db_lon, Y(Transform(geometry, 4326)) AS _db_lat
            FROM {table}" """
        else:
            self.bundle.schema.add_column(
                t,
                '_wkb',
                datatype=Column.DATATYPE_TEXT)

            command_template = """spatialite -csv -header {file} "select *,
            AsBinary(Transform(geometry, 4326)) AS _wkb
            FROM {table}" """

        self.bundle.database.commit()

        pid = self.identity
        pid.table = table_name
        arg = self.bundle.partitions.new_partition(pid)
        arg.create_with_tables()

        #
        # Now extract the data into a new database.
        #

        command = command_template.format(file=self.database.path,
                                          table=self.identity.table)

        self.bundle.log("Running: {}".format(command))

        p = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True)
        stdout, stderr = p.communicate()

        #
        # Finally we can copy the data.
        #

        # local csv module shadows root #@UndefinedVariable
        rdr = csv.reader(stdout.decode('ascii').splitlines())
        header = rdr.next()

        if not progress_f:
            progress_f = lambda x: x

        with arg.database.inserter(table_name) as ins:
            for i, line in enumerate(rdr):
                ins.insert(line)
                progress_f(i)