示例#1
0
    def _validate_query(self, cfg, query):
        validator = cfg.get("query_validator")
        if validator is None:
            return True

        filters = app.config.get("QUERY_FILTERS", {})
        validator_path = filters.get(validator)
        fn = plugin.load_function(validator_path)
        if fn is None:
            msg = "Unable to load query validator for {x}".format(x=validator)
            raise exceptions.ConfigurationException(msg)

        return fn(query)
示例#2
0
文件: query.py 项目: DOAJ/doaj
    def _post_filter_search_results(self, cfg, res, unpacked=False):
        filters = app.config.get("QUERY_FILTERS", {})
        result_filter_names = cfg.get("result_filters", [])
        for result_filter_name in result_filter_names:
            fn = plugin.load_function(filters.get(result_filter_name))
            if fn is None:
                msg = "Unable to load result filter for {x}".format(x=result_filter_name)
                raise exceptions.ConfigurationException(msg)

            # apply the result filter
            res = fn(res, unpacked=unpacked)

        return res
示例#3
0
文件: query.py 项目: DOAJ/doaj
    def _validate_query(self, cfg, query):
        validator = cfg.get("query_validator")
        if validator is None:
            return True

        filters = app.config.get("QUERY_FILTERS", {})
        validator_path = filters.get(validator)
        fn = plugin.load_function(validator_path)
        if fn is None:
            msg = "Unable to load query validator for {x}".format(x=validator)
            raise exceptions.ConfigurationException(msg)

        return fn(query)
示例#4
0
    def _post_filter_search_results(self, cfg, res, unpacked=False):
        filters = app.config.get("QUERY_FILTERS", {})
        result_filter_names = cfg.get("result_filters", [])
        for result_filter_name in result_filter_names:
            fn = plugin.load_function(filters.get(result_filter_name))
            if fn is None:
                msg = "Unable to load result filter for {x}".format(
                    x=result_filter_name)
                raise exceptions.ConfigurationException(msg)

            # apply the result filter
            res = fn(res, unpacked=unpacked)

        return res
示例#5
0
文件: query.py 项目: DOAJ/doaj
    def _pre_filter_search_query(self, cfg, query):
        # now run the query through the filters
        filters = app.config.get("QUERY_FILTERS", {})
        filter_names = cfg.get("query_filters", [])
        for filter_name in filter_names:
            # because of back-compat, we have to do a few tricky things here...
            # filter may be the name of a filter in the list of query filters
            fn = plugin.load_function(filters.get(filter_name))
            if fn is None:
                msg = "Unable to load query filter for {x}".format(x=filter_name)
                raise exceptions.ConfigurationException(msg)

            # run the filter
            fn(query)

        return query
示例#6
0
    def _pre_filter_search_query(self, cfg, query):
        # now run the query through the filters
        filters = app.config.get("QUERY_FILTERS", {})
        filter_names = cfg.get("query_filters", [])
        for filter_name in filter_names:
            # because of back-compat, we have to do a few tricky things here...
            # filter may be the name of a filter in the list of query filters
            fn = plugin.load_function(filters.get(filter_name))
            if fn is None:
                msg = "Unable to load query filter for {x}".format(
                    x=filter_name)
                raise exceptions.ConfigurationException(msg)

            # run the filter
            fn(query)

        return query
示例#7
0
文件: upgrade.py 项目: mauromsl/doaj
def do_upgrade(definition, verbose):
    # get the source and target es definitions
    source = definition.get("source")
    target = definition.get("target")

    if source is None:
        source = {
            "host": app.config.get("ELASTIC_SEARCH_HOST"),
            "index": app.config.get("ELASTIC_SEARCH_DB")
        }

    if target is None:
        target = {
            "host": app.config.get("ELASTIC_SEARCH_HOST"),
            "index": app.config.get("ELASTIC_SEARCH_DB"),
            "mappings": False
        }

    sconn = esprit.raw.Connection(source.get("host"), source.get("index"))
    tconn = esprit.raw.Connection(target.get("host"), target.get("index"))

    if verbose:
        print("Source", source)
        print("Target", target)

    # get the defined batch size
    batch_size = definition.get("batch", 500)

    for tdef in definition.get("types", []):
        print("Upgrading", tdef.get("type"))
        batch = []
        total = 0
        first_page = esprit.raw.search(sconn, tdef.get("type"))
        max = first_page.json().get("hits", {}).get("total", 0)
        type_start = datetime.now()

        default_query={
            "query": {"match_all": {}}
        }

        try:
            for result in esprit.tasks.scroll(sconn, tdef.get("type"), q=tdef.get("query",default_query), keepalive=tdef.get("keepalive", "1m"), page_size=tdef.get("scroll_size", 1000), scan=True):
                # learn what kind of model we've got
                model_class = MODELS.get(tdef.get("type"))

                original = deepcopy(result)
                if tdef.get("init_with_model", True):
                    # instantiate an object with the data
                    try:
                        result = model_class(**result)
                    except DataStructureException as e:
                        print("Could not create model for {0}, Error: {1}".format(result['id'], str(e)))
                        continue

                for function_path in tdef.get("functions", []):
                    fn = plugin.load_function(function_path)
                    result = fn(result)

                data = result
                _id = result.get("id", "id not specified")
                if isinstance(result, model_class):
                    # run the tasks specified with this object type
                    tasks = tdef.get("tasks", None)
                    if tasks:
                        for func_call, kwargs in tasks.items():
                            getattr(result, func_call)(**kwargs)

                    # run the prep routine for the record
                    try:
                        result.prep()
                    except AttributeError:
                        if verbose:
                            print(tdef.get("type"), result.id, "has no prep method - no, pre-save preparation being done")
                        pass

                    data = result.data
                    _id = result.id

                # add the data to the batch
                data = _diff(original, data)
                if "id" not in data:
                    data["id"] = _id
                data = {"doc" : data}

                batch.append(data)
                if verbose:
                    print("added", tdef.get("type"), _id, "to batch update")

                # When we have enough, do some writing
                if len(batch) >= batch_size:
                    total += len(batch)
                    print(datetime.now(), "writing ", len(batch), "to", tdef.get("type"), ";", total, "of", max)
                    esprit.raw.bulk(tconn, batch, idkey="doc.id", type_=tdef.get("type"), bulk_type="update")
                    batch = []
                    # do some timing predictions
                    batch_tick = datetime.now()
                    time_so_far = batch_tick - type_start
                    seconds_so_far = time_so_far.total_seconds()
                    estimated_seconds_remaining = ((seconds_so_far * max) / total) - seconds_so_far
                    estimated_finish = batch_tick + timedelta(seconds=estimated_seconds_remaining)
                    print('Estimated finish time for this type {0}.'.format(estimated_finish))
        except esprit.tasks.ScrollTimeoutException:
            # Try to write the part-batch to index
            if len(batch) > 0:
                total += len(batch)
                print(datetime.now(), "scroll timed out / writing ", len(batch), "to", tdef.get("type"), ";", total, "of", max)
                esprit.raw.bulk(tconn, batch, idkey="doc.id", type_=tdef.get("type"), bulk_type="update")
                batch = []

        # Write the last part-batch to index
        if len(batch) > 0:
            total += len(batch)
            print(datetime.now(), "final result set / writing ", len(batch), "to", tdef.get("type"), ";", total, "of", max)
            esprit.raw.bulk(tconn, batch, idkey="doc.id", type_=tdef.get("type"), bulk_type="update")
示例#8
0
文件: upgrade.py 项目: DOAJ/doaj
def do_upgrade(definition, verbose):
    # get the source and target es definitions
    source = definition.get("source")
    target = definition.get("target")

    if source is None:
        source = {
            "host": app.config.get("ELASTIC_SEARCH_HOST"),
            "index": app.config.get("ELASTIC_SEARCH_DB")
        }

    if target is None:
        target = {
            "host": app.config.get("ELASTIC_SEARCH_HOST"),
            "index": app.config.get("ELASTIC_SEARCH_DB"),
            "mappings": False
        }

    sconn = esprit.raw.Connection(source.get("host"), source.get("index"))
    tconn = esprit.raw.Connection(target.get("host"), target.get("index"))

    if verbose:
        print "Source", source
        print "Target", target

    # get the defined batch size
    batch_size = definition.get("batch", 500)

    for tdef in definition.get("types", []):
        print "Upgrading", tdef.get("type")
        batch = []
        total = 0
        first_page = esprit.raw.search(sconn, tdef.get("type"))
        max = first_page.json().get("hits", {}).get("total", 0)
        type_start = datetime.now()

        try:
            for result in esprit.tasks.scroll(sconn, tdef.get("type"), keepalive=tdef.get("keepalive", "1m"), page_size=tdef.get("scroll_size", 1000), scan=True):
                # learn what kind of model we've got
                model_class = MODELS.get(tdef.get("type"))

                original = deepcopy(result)
                if tdef.get("init_with_model", True):
                    # instantiate an object with the data
                    try:
                        result = model_class(**result)
                    except DataStructureException as e:
                        print "Could not create model for {0}, Error: {1}".format(result['id'], e.message)
                        continue

                for function_path in tdef.get("functions", []):
                    fn = plugin.load_function(function_path)
                    result = fn(result)

                data = result
                _id = result.get("id", "id not specified")
                if isinstance(result, model_class):
                    # run the tasks specified with this object type
                    tasks = tdef.get("tasks", None)
                    if tasks:
                        for func_call, kwargs in tasks.iteritems():
                            getattr(result, func_call)(**kwargs)

                    # run the prep routine for the record
                    try:
                        result.prep()
                    except AttributeError:
                        if verbose:
                            print tdef.get("type"), result.id, "has no prep method - no, pre-save preparation being done"
                        pass

                    data = result.data
                    _id = result.id

                # add the data to the batch
                data = _diff(original, data)
                if "id" not in data:
                    data["id"] = _id
                data = {"doc" : data}

                batch.append(data)
                if verbose:
                    print "added", tdef.get("type"), _id, "to batch update"

                # When we have enough, do some writing
                if len(batch) >= batch_size:
                    total += len(batch)
                    print datetime.now(), "writing ", len(batch), "to", tdef.get("type"), ";", total, "of", max
                    esprit.raw.bulk(tconn, batch, idkey="doc.id", type_=tdef.get("type"), bulk_type="update")
                    batch = []
                    # do some timing predictions
                    batch_tick = datetime.now()
                    time_so_far = batch_tick - type_start
                    seconds_so_far = time_so_far.total_seconds()
                    estimated_seconds_remaining = ((seconds_so_far * max) / total) - seconds_so_far
                    estimated_finish = batch_tick + timedelta(seconds=estimated_seconds_remaining)
                    print 'Estimated finish time for this type {0}.'.format(estimated_finish)
        except esprit.tasks.ScrollTimeoutException:
            # Try to write the part-batch to index
            if len(batch) > 0:
                total += len(batch)
                print datetime.now(), "scroll timed out / writing ", len(batch), "to", tdef.get("type"), ";", total, "of", max
                esprit.raw.bulk(tconn, batch, idkey="doc.id", type_=tdef.get("type"), bulk_type="update")
                batch = []

        # Write the last part-batch to index
        if len(batch) > 0:
            total += len(batch)
            print datetime.now(), "final result set / writing ", len(batch), "to", tdef.get("type"), ";", total, "of", max
            esprit.raw.bulk(tconn, batch, idkey="doc.id", type_=tdef.get("type"), bulk_type="update")