Exemplo n.º 1
0
 def _load(self):
     """
     Loads data relating to the recovery of a fragment for this request
     """
     super(FragmentSink, self)._load()
     self._graph_pattern = GraphPattern(r.smembers('{}gp'.format(self._request_key)))
     self._fragment_pattern = GraphPattern(r.smembers('{}:gp'.format(self._fragment_key)))
     self._filter_mapping = r.hgetall('{}filters'.format(self._request_key))
     self._dict_fields['mapping'] = r.hgetall('{}map'.format(self._request_key))
     self._dict_fields['preferred_labels'] = set(r.smembers('{}pl'.format(self._request_key)))
Exemplo n.º 2
0
    def __get_fragment(fid):
        if not r.sismember('{}:fragments'.format(AGENT_ID), fid):
            raise NotFound('The fragment {} does not exist'.format(fid))

        f_dict = {
            'id': fid,
            'gp': list(r.smembers('{}:fragments:{}:gp'.format(AGENT_ID, fid))),
            'synced': r.exists('{}:fragments:{}:sync'.format(AGENT_ID, fid)),
            'requests': list(r.smembers('{}:fragments:{}:requests'.format(AGENT_ID, fid)))
        }

        return f_dict
Exemplo n.º 3
0
 def load(self):
     dict_fields = r.hgetall(self._enrichment_key)
     self.target = URIRef(dict_fields.get('target', None))
     self.fragment_id = dict_fields.get('fragment_id', None)
     self.links = map(lambda (link, v): (URIRef(link), v), [eval(pair_str) for pair_str in
                                                            r.smembers('{}:links'.format(
                                                                self._enrichment_key))])
Exemplo n.º 4
0
def __notify_completion(fid, sinks):
    """
    Notify the ending of a fragment collection to all registered plugins
    :param fid: Fragment id
    :param sinks: Set of dependent sinks
    :return:
    """

    for sink in sinks.values():
        if sink.delivery == 'accepted':
            sink.delivery = 'ready'

    if FragmentPlugin.plugins:
        fragment_gp = GraphPattern(r.smembers('{}:fragments:{}:gp'.format(AGENT_ID, fid)))
        for plugin in FragmentPlugin.plugins():
            try:
                filtered_sinks = filter(lambda _: isinstance(sinks[_], plugin.sink_class), sinks)
                for rid in filtered_sinks:
                    sink = sinks[rid]
                    if plugin.sink_aware:
                        plugin.complete(fid, sink)
                if not plugin.sink_aware:
                    plugin.complete(fid, fragment_gp)
            except Exception as e:
                log.warning(e.message)
Exemplo n.º 5
0
    def __purge(self):
        while True:
            self.__lock.acquire()
            try:
                obsolete = filter(lambda x: not r.exists('{}:cache:{}'.format(AGENT_ID, x)),
                                  r.smembers(self.__cache_key))

                if obsolete:
                    with r.pipeline(transaction=True) as p:
                        p.multi()
                        log.info('Removing {} resouces from cache...'.format(len(obsolete)))
                        for uuid in obsolete:
                            uuid_lock = self.uuid_lock(uuid)
                            uuid_lock.acquire()
                            try:
                                gid = r.hget(self.__gids_key, uuid)
                                counter_key = '{}:cache:{}:cnt'.format(AGENT_ID, uuid)
                                usage_counter = r.get(counter_key)
                                if usage_counter is None or int(usage_counter) <= 0:
                                    try:
                                        resources_cache.remove_context(resources_cache.get_context(uuid))
                                        p.srem(self.__cache_key, uuid)
                                        p.hdel(self.__gids_key, uuid)
                                        p.hdel(self.__gids_key, gid)
                                        p.delete(counter_key)
                                        g = self.__uuid_dict[uuid]
                                        del self.__uuid_dict[uuid]
                                        del self.__graph_dict[g]
                                    except Exception, e:
                                        traceback.print_exc()
                                        log.error('Purging resource {} with uuid {}'.format(gid, uuid))
                                p.execute()
                            finally:
                                uuid_lock.release()
Exemplo n.º 6
0
def __deliver_responses():
    import time
    __log.info('Delivery daemon started')

    # Declare in-progress deliveries dictionary
    futures = {}
    while True:
        try:
            # Get all ready deliveries
            ready = r.smembers(__ready_key)
            for rid in ready:
                # If the delivery is not in the thread pool, just submit it
                if rid not in futures:
                    __log.info('Response delivery of request {} is ready. Putting it in queue...'.format(rid))
                    futures[rid] = __thp.submit(__deliver_response, rid)

            # Clear futures that have already ceased to be ready
            for obsolete_rid in set.difference(set(futures.keys()), ready):
                if obsolete_rid in futures and futures[obsolete_rid].done():
                    del futures[obsolete_rid]

            # All those deliveries that are marked as 'sent' are being cleared here along its request data
            sent = r.smembers(__sent_key)
            for rid in sent:
                r.srem(__ready_key, rid)
                r.srem(__deliveries_key, rid)
                try:
                    response = build_response(rid)
                    response.sink.remove()  # Its lock is removed too
                    __log.info('Request {} was sent and cleared'.format(rid))
                except AttributeError:
                    traceback.print_exc()
                    __log.warning('Request number {} was deleted by other means'.format(rid))
                    pass
                r.srem(__sent_key, rid)
        except Exception as e:
            __log.error(e.message)
            traceback.print_exc()
        finally:
            time.sleep(0.1)
Exemplo n.º 7
0
 def __check_gp_mappings(self, gp=None):
     """
     Used in _save method. Seeks matches with some fragment already registered
     :param gp: By default, _graph_pattern attribute is used when gp is None
     :return: The matching fragment id and the mapping dictionary or None if there is no matching
     """
     if gp is None:
         gp = self._graph_pattern
     gp_keys = r.keys('{}:*:gp'.format(self._fragments_key))
     for gpk in gp_keys:
         stored_gp = GraphPattern(r.smembers(gpk))
         mapping = stored_gp.mapping(gp)
         if mapping:
             return gpk.split(':')[-2], mapping
     return None
Exemplo n.º 8
0
def __load_fragment_requests(fid):
    """
    Load all requests and their sinks that are related to a given fragment id
    :param fid: Fragment id
    :return: A dictionary of sinks of all fragment requests
    """
    sinks_ = {}
    fragment_requests_key = '{}:{}:requests'.format(fragments_key, fid)
    for rid in r.smembers(fragment_requests_key):
        try:
            sinks_[rid] = build_response(rid).sink
        except Exception, e:
            log.warning(e.message)
            with r.pipeline(transaction=True) as p:
                p.multi()
                p.srem(fragment_requests_key, rid)
                p.execute()
Exemplo n.º 9
0
def __collect_fragments():
    registered_fragments = r.scard(fragments_key)
    synced_fragments = len(r.keys('{}:*:sync'.format(fragments_key)))
    log.info("""Collector daemon started:
                    - Fragments: {}
                    - Synced: {}""".format(registered_fragments, synced_fragments))

    futures = {}
    while True:
        for fid in filter(
                lambda x: r.get('{}:{}:sync'.format(fragments_key, x)) is None and r.get(
                    '{}:{}:pulling'.format(fragments_key, x)) is None,
                r.smembers(fragments_key)):
            if fid in futures:
                if futures[fid].done():
                    del futures[fid]
            if fid not in futures:
                futures[fid] = thp.submit(__pull_fragment, fid)
        time.sleep(1)
Exemplo n.º 10
0
def __pull_fragment(fid):
    fragment_key = '{}:{}'.format(fragments_key, fid)

    tps = r.smembers('{}:gp'.format(fragment_key))
    r_sinks = __load_fragment_requests(fid)
    log.info("""Starting collection of fragment {}:
                    - GP: {}
                    - Supporting: ({}) {}""".format(fid, list(tps), len(r_sinks), list(r_sinks.keys())))

    try:
        prefixes, gen = get_query_generator(*tps,
                                            broker_host=BROKER['host'],
                                            agora_host=AGORA['host'],
                                            broker_port=BROKER['port'],
                                            agora_port=AGORA['port'], wait=True)
    except Exception, e:
        traceback.print_exc()
        log.error('Scholar is not available')
        return
Exemplo n.º 11
0
def get_fragment_enrichments(fid):
    return [EnrichmentData(eid) for eid in r.smembers('{}:fragments:{}:enrichments'.format(AGENT_ID, fid))]
Exemplo n.º 12
0
def fragment_contexts(fid):
    return r.smembers('{}:{}:contexts'.format(fragments_key, fid))
Exemplo n.º 13
0
def __pull_fragment(fid):
    """
    Pull and replace (if needed) a given fragment
    :param fid: Fragment id
    """

    fragment_key = '{}:{}'.format(fragments_key, fid)

    # Load fragment graph pattern
    tps = r.smembers('{}:gp'.format(fragment_key))
    # Load fragment requests (including their sinks)
    r_sinks = __load_fragment_requests(fid)
    log.info("""Starting collection of fragment {}:
                    - GP: {}
                    - Supporting: ({}) {}""".format(fid, list(tps), len(r_sinks), list(r_sinks)))

    # Prepare the corresponding fragment generator and fetch the search plan
    start_time = datetime.now()
    try:
        fgm_gen, _, graph = agora_client.get_fragment_generator('{ %s }' % ' . '.join(tps), workers=N_COLLECTORS,
                                                                provider=graph_provider, queue_size=N_COLLECTORS)
    except Exception:
        log.error('Agora is not available')
        return

    # In case there is not SearchTree in the plan: notify, remove and abort collection
    if not list(graph.subjects(RDF.type, AGORA.SearchTree)):
        log.info('There is no search plan for fragment {}. Removing...'.format(fid))
        # TODO: Send additional headers notifying the reason to end
        __notify_completion(fid, r_sinks)
        __remove_fragment(fid)
        return

    # Update cache graph prefixes
    __bind_prefixes(graph)

    # Extract triple patterns' dictionary from the search plan
    context_tp = {tpn: __extract_tp_from_plan(graph, tpn) for tpn in
                  graph.subjects(RDF.type, AGORA.TriplePattern)}
    frag_contexts = {tpn: (fid, context_tp[tpn]) for tpn in context_tp}

    lock = fragment_lock(fid)
    lock.acquire()

    # Update fragment contexts
    with r.pipeline(transaction=True) as p:
        p.multi()
        p.set('{}:pulling'.format(fragment_key), True)
        contexts_key = '{}:contexts'.format(fragment_key)
        p.delete(contexts_key)
        for tpn in context_tp.keys():
            p.sadd(contexts_key, frag_contexts[tpn])
        p.execute()
    lock.release()

    # Init fragment collection counters
    n_triples = 0
    fragment_weight = 0
    fragment_delta = 0

    log.info('Collecting fragment {}...'.format(fid))
    try:
        # Iterate all fragment triples and their contexts
        for (c, s, p, o) in fgm_gen:
            pre_ts = datetime.now()
            # Update weights and counters
            triple_weight = len(u'{}{}{}'.format(s, p, o))
            fragment_weight += triple_weight
            fragment_delta += triple_weight

            # Store the triple if it was not obtained before and notify related requests
            try:
                lock.acquire()
                new_triple = add_stream_triple(fid, context_tp[c], (s, p, o))
                lock.release()
                if new_triple:
                    __consume_quad(fid, (context_tp[c], s, p, o), graph, sinks=r_sinks)
                n_triples += 1
            except Exception, e:
                log.warning(e.message)
                traceback.print_exc()

            if fragment_delta > 10000:
                fragment_delta = 0
                log.info('Pulling fragment {} [{} kB]'.format(fid, fragment_weight / 1000.0))

            if n_triples % 100 == 0:
                # Update fragment requests
                if r.scard('{}:requests'.format(fragment_key)) != len(r_sinks):
                    r_sinks = __load_fragment_requests(fid)

            post_ts = datetime.now()
            elapsed = (post_ts - pre_ts).total_seconds()
            throttling = THROTTLING_TIME - elapsed
            if throttling > 0:
                sleep(throttling)
    except Exception, e:
        log.warning(e.message)
        traceback.print_exc()
Exemplo n.º 14
0
def __pull_fragment(fid):
    """
    Pull and replace (if needed) a given fragment
    :param fid: Fragment id
    """

    fragment_key = '{}:{}'.format(fragments_key, fid)
    on_events = r.get('{}:events'.format(fragment_key))

    if on_events == 'True' and not change_in_fragment_resource(fid, int(r.get('{}:ud'.format(fragment_key)))):
        with r.pipeline(transaction=True) as p:
            p.multi()
            sync_key = '{}:sync'.format(fragment_key)
            p.set(sync_key, True)
            durability = int(r.get('{}:ud'.format(fragment_key)))
            p.expire(sync_key, durability)
            p.set('{}:updated'.format(fragment_key), calendar.timegm(dt.utcnow().timetuple()))
            p.delete('{}:pulling'.format(fragment_key))
            p.execute()
        return

    # Load fragment graph pattern
    tps = r.smembers('{}:gp'.format(fragment_key))
    # Load fragment requests (including their sinks)
    r_sinks = __load_fragment_requests(fid)
    log.info("""Starting collection of fragment {}:
                    - GP: {}
                    - Supporting: ({}) {}""".format(fid, list(tps), len(r_sinks), list(r_sinks)))

    init_fragment_resources(fid)

    # Prepare the corresponding fragment generator and fetch the search plan
    start_time = datetime.utcnow()
    try:
        fgm_gen, _, graph = agora_client.get_fragment_generator('{ %s }' % ' . '.join(tps), workers=N_COLLECTORS,
                                                                provider=graph_provider, queue_size=N_COLLECTORS*100)

    except Exception:
        traceback.print_exc()
        log.error('Agora is not available')
        return

    # In case there is not SearchTree in the plan: notify, remove and abort collection
    if not list(graph.subjects(RDF.type, AGORA.SearchTree)):
        log.info('There is no search plan for fragment {}. Removing...'.format(fid))
        # TODO: Send additional headers notifying the reason to end
        __notify_completion(fid, r_sinks)
        __remove_fragment(fid)
        return

    # Update cache graph prefixes
    __bind_prefixes(graph)

    # Extract triple patterns' dictionary from the search plan
    context_tp = {tpn: __extract_tp_from_plan(graph, tpn) for tpn in
                  graph.subjects(RDF.type, AGORA.TriplePattern)}
    frag_contexts = {tpn: (fid, context_tp[tpn]) for tpn in context_tp}

    lock = fragment_lock(fid)
    lock.acquire()

    # Update fragment contexts
    with r.pipeline(transaction=True) as p:
        p.multi()
        p.set('{}:pulling'.format(fragment_key), True)
        contexts_key = '{}:contexts'.format(fragment_key)
        p.delete(contexts_key)
        clear_fragment_stream(fid)
        for tpn in context_tp.keys():
            p.sadd(contexts_key, frag_contexts[tpn])
        p.execute()
    lock.release()

    # Init fragment collection counters
    n_triples = 0
    fragment_weight = 0
    fragment_delta = 0

    log.info('Collecting fragment {}...'.format(fid))
    try:
        # Iterate all fragment triples and their contexts
        pre_ts = datetime.utcnow()
        for (c, s, p, o) in fgm_gen:
            # Update weights and counters
            triple_weight = len(u'{}{}{}'.format(s, p, o))
            fragment_weight += triple_weight
            fragment_delta += triple_weight

            # Store the triple if it was not obtained before and notify related requests
            try:
                lock.acquire()
                new_triple = add_stream_triple(fid, context_tp[c], (s, p, o))
                lock.release()
                if new_triple:
                    if isinstance(s, URIRef):
                        if s not in resource_in_fragment:
                            resource_in_fragment[s] = set([])
                        resource_in_fragment[s].add(fid)
                        fragment_resources[fid].add(s)
                    __consume_quad(fid, (context_tp[c], s, p, o), graph, sinks=r_sinks)
                n_triples += 1
            except Exception as e:
                log.warning(e.message)
                traceback.print_exc()

            if fragment_delta > 10000:
                fragment_delta = 0
                log.info('Pulling fragment {} [{} kB]'.format(fid, fragment_weight / 1000.0))

            if n_triples % 100 == 0:
                # Update fragment requests
                if r.scard('{}:requests'.format(fragment_key)) != len(r_sinks):
                    r_sinks = __load_fragment_requests(fid)

            post_ts = datetime.utcnow()
            elapsed = (post_ts - pre_ts).total_seconds()
            throttling = THROTTLING_TIME - elapsed
            if throttling > 0:
                sleep(throttling)
            pre_ts = datetime.utcnow()
    except Exception as e:
        log.warning(e.message)
        traceback.print_exc()

    elapsed = (datetime.utcnow() - start_time).total_seconds()
    log.info(
        '{} triples retrieved for fragment {} in {} s [{} kB]'.format(n_triples, fid, elapsed,
                                                                      fragment_weight / 1000.0))

    # Update fragment cache and its contexts
    lock.acquire()
    try:
        __update_fragment_cache(fid, tps)
        log.info('Fragment {} data has been replaced with the recently collected'.format(fid))
        __cache_plan_context(fid, graph)
        log.info('BGP context of fragment {} has been cached'.format(fid))
        log.info('Updating result set for fragment {}...'.format(fid))

        # Calculate sync times and update fragment flags
        with r.pipeline(transaction=True) as p:
            p.multi()
            sync_key = '{}:sync'.format(fragment_key)
            demand_key = '{}:on_demand'.format(fragment_key)
            # Fragment is now synced
            p.set(sync_key, True)
            # If the fragment collection time has not exceeded the threshold, switch to on-demand mode
            # if elapsed < ON_DEMAND_TH and elapsed * random.random() < ON_DEMAND_TH / 4:
            #     p.set(demand_key, True)
            #     log.info('Fragment {} has been switched to on-demand mode'.format(fid))
            # else:
            p.delete(demand_key)

            updated_delay = int(r.get('{}:ud'.format(fragment_key)))
            last_requests_ts = map(lambda x: int(x), r.lrange('{}:hist'.format(fragment_key), 0, -1))
            print last_requests_ts
            current_ts = calendar.timegm(datetime.utcnow().timetuple())
            first_collection = r.get('{}:updated'.format(fragment_key)) is None
            base_ts = last_requests_ts[:]
            if not first_collection:
                if current_ts - base_ts[0] <= updated_delay:
                    current_ts += updated_delay  # Force
                base_ts = [current_ts] + base_ts
            request_intervals = [i - j for i, j in zip(base_ts[:-1], base_ts[1:])]
            if request_intervals:
                avg_gap = reduce(lambda x, y: x + y, request_intervals) / len(request_intervals)
                print avg_gap,
                durability = avg_gap - elapsed if avg_gap > updated_delay else updated_delay - elapsed
            else:
                durability = updated_delay - elapsed

            durability = int(max(durability, 1))
            print durability
            if durability <= updated_delay - elapsed:
                p.expire(sync_key, durability)
                log.info('Fragment {} is considered synced for {} s'.format(fid, durability))
            else:
                clear_fragment_stream(fid)
                p.delete('{}:updated'.format(fragment_key))
                p.delete('{}:hist'.format(fragment_key))
                log.info('Fragment {} will no longer be automatically updated'.format(fid))

            p.set('{}:updated'.format(fragment_key), calendar.timegm(dt.utcnow().timetuple()))
            p.delete('{}:pulling'.format(fragment_key))
            p.execute()

        __notify_completion(fid, r_sinks)
    finally:
        lock.release()

    log.info('Fragment {} collection is complete!'.format(fid))
Exemplo n.º 15
0
def get_fragments():
    fragment_ids = list(r.smembers('{}:fragments'.format(AGENT_ID)))
    f_list = [{'id': fid, 'gp': list(r.smembers('{}:fragments:{}:gp'.format(AGENT_ID, fid)))} for fid in fragment_ids]
    return jsonify(fragments=f_list)