def add_ops(self, roots, after_exop=None): """ Add exops needed to compute ops in roots. Args: roots: A collection of ops whose values are needed. after_exop: Where in the list to add the ops. Defaults to the end. """ if after_exop is None: after_exop = self.prev_exop # Get computation graph ops that are already inserted. available = OrderedSet() counts = dict() parents = defaultdict(OrderedSet) ready = OrderedSet() # Some ops in roots may have been replaced by other ops; if so, they # are in the graph already, although maybe not in this block. Get the # op from the exop so we have the current version. for op in roots: exop = self.computation_decl.get_exop(op, None) if exop is not None: op = exop.op available.add(op) while available: op = available.pop() if op in counts or op in self.all_ops: continue nchildren = 0 for child in op.all_deps: exop = self.computation_decl.get_exop(child, None) if exop is not None: child = exop.op if child not in self.all_ops: parents[child].add(op) available.add(child) nchildren += 1 if nchildren > 0: counts[op] = nchildren else: ready.add(op) while ready: op = ready.pop() after_exop = self.add_op(op, after_exop=after_exop) for p in parents.get(op, []): count = counts[p] - 1 if count == 0: ready.add(p) del counts[p] else: counts[p] = count if len(counts) > 0: raise ValueError("Graph not a DAG")
class OrderedSetQueue(Queue): """ https://stackoverflow.com/questions/16506429/check-if-element-is-already-in-a-queue """ def _init(self, maxsize): # print("b" * 200) self.cacheLock = Lock() self.queue = OrderedSet() def _put(self, item): with self.cacheLock: self.queue.add(item) def _get(self): with self.cacheLock: return self.queue.pop(last=False) def __contains__(self, item): with self.cacheLock: with self.mutex: return item in self.queue def _qsize(self): with self.cacheLock: return len(self.queue) def size(self): return self.qsize() def toList(self): return queueToList(self, maxsize=self.maxsize)
class Poller(object): def __init__(self): rospy.init_node('poller_node') self.rate = rospy.Rate(3) # 3hz self.extractBasestationFromParams() self.createCommunicators() self.request_list = OrderedSet([]) def createCommunicators(self): self.client = Client(10019) self.measurements_publisher = rospy.Publisher('measurements', MeasurementList, queue_size=10) self.request_subscriber = rospy.Subscriber("measurements_request", String, self.pushbackRequest) def extractBasestationFromParams(self): stations = rospy.get_param("/poller_node/basestations") self.storeBasestation(stations) def storeBasestation(self, stations): self.basestations = [] for station in stations: self.basestations.append(Basestation(station[0], float(station[1]), float(station[2]))) def pushbackRequest(self, msg): self.request_list.add(msg.data) def measurementsLoop(self): while not rospy.is_shutdown(): while not self.request_list.isEmpty(): station_address = self.request_list.pop() self.serveRequest(station_address) self.rate.sleep() def pollStation(self, station_address): return self.client.pollBasestation(station_address) def serveRequest(self, station_address): try: data = self.pollStation(station_address) if containsMeasurements(data): self.publishMeasuements(extractJson(data), station_address) except socket.error: pass def publishMeasuements(self, measurs, station): msg = MeasurementList() for el in measurs: msg.data.append(self.generateMeasurement(el)) msg.basestation = station msg.header.stamp = rospy.Time.now() self.measurements_publisher.publish(msg) def generateMeasurement(self, element): tmp = Measurement() tmp.tag = element['id_tag'].encode('utf-8') tmp.measurement = int(element['rssid']) return tmp
class SetStack(queue.Queue): def _init(self, maxsize): self.queue = OrderedSet() def _put(self, item): self.queue.add(item) def _get(self): return self.queue.pop(True) def hasElement(self,element): return element in self.queue
class SimpleBreadthFirstLearner(TemplateLearner): def __init__(self, solver_instance: LPSolver, eval_fn: EvalFunction, max_body_literals=4, do_print=False): super().__init__(solver_instance, eval_fn, do_print=do_print) self._max_body_literals = max_body_literals def initialise_pool(self): self._candidate_pool = OrderedSet() def put_into_pool( self, candidates: typing.Union[Clause, Procedure, typing.Sequence]) -> None: if isinstance(candidates, Clause): self._candidate_pool.add(candidates) else: self._candidate_pool |= candidates def get_from_pool(self) -> Clause: return self._candidate_pool.pop(0) def stop_inner_search(self, eval: typing.Union[int, float], examples: Task, clause: Clause) -> bool: if eval > 0: return True else: return False def process_expansions( self, examples: Task, exps: typing.Sequence[Clause], hypothesis_space: TopDownHypothesisSpace ) -> typing.Sequence[Clause]: # eliminate every clause with more body literals than allowed exps = [cl for cl in exps if len(cl) <= self._max_body_literals] # check if every clause has solutions exps = [(cl, self._solver.has_solution(*cl.get_body().get_literals())) for cl in exps] new_exps = [] for ind in range(len(exps)): if exps[ind][1]: # keep it if it has solutions new_exps.append(exps[ind][0]) else: # remove from hypothesis space if it does not hypothesis_space.remove(exps[ind][0]) return new_exps
def find_recvs(fro): # Find all the Receivers fro depends on visit = OrderedSet() recvs = OrderedSet() visit.add(fro) while visit: v = visit.pop() if isinstance(v, RecvOp): recvs.add(v) visit |= get_iterable(v.send_node()) else: if hasattr(v, 'args'): visit.update(v.args) return recvs
class __TransactionSet: def __init__(self): """Initialize transaction store.""" self.store = OrderedSet() def add(self, transaction): self.store.add(transaction) def add_multiple(self, transaction_list): """Add multiple transactions to the set. Since the transactions are re-added to the set (they were in it once), we add them at the front of the existing set. """ transaction_list = OrderedSet(transaction_list) self.store = transaction_list.union(self.store) def contains(self, transaction): return self.store.__contains__(transaction) def pop(self): """Remove and return a transaction from the set.""" try: return self.store.pop(0) # Catch KeyError if set is empty except KeyError: return None def discard(self, transaction): """Remove the transaction if it was present in the set.""" self.store.discard(transaction) def discard_multiple(self, transaction_list): """Remove multiple transactions from the set.""" [self.discard(tx) for tx in transaction_list] def clear(self): """Remove all transactions from the set.""" self.store.clear() def __len__(self): return len(self.store) def __iter__(self): yield from self.store def __repr__(self): return self.store.__repr__()
def comm_path_exists(fro, to): """ Find a path from fro to to, including paths non-explicit edges from a Receiver to its Sender. Note- this is a non-standard traversal, as most traversals stop at a Receiver. """ # TODO: Issue #1865 does this correctly handle traversing multiple send-recv junctions # from fro to to? visit = OrderedSet(fro.args) visit.add(fro) while visit: v = visit.pop() if v == to: return True if isinstance(v, RecvOp): visit |= get_iterable(v.send_node()) else: visit.update(v.args) return False
class Propagator: MAX_REQUESTED_KEYS_TO_KEEP = 1000 def __init__(self, metrics: MetricsCollector = NullMetricsCollector()): self.requests = Requests() self.requested_propagates_for = OrderedSet() self.metrics = metrics # noinspection PyUnresolvedReferences def propagate(self, request: Request, clientName): """ Broadcast a PROPAGATE to all other nodes :param request: the REQUEST to propagate """ if self.requests.has_propagated(request, self.name): logger.trace("{} already propagated {}".format(self, request)) else: with self.metrics.measure_time(MetricsName.SEND_PROPAGATE_TIME): self.requests.add_propagate(request, self.name) propagate = self.createPropagate(request, clientName) logger.debug("{} propagating request {} from client {}".format( self, request.key, clientName), extra={ "cli": True, "tags": ["node-propagate"] }) self.send(propagate) @staticmethod def createPropagate(request: Union[Request, dict], client_name) -> Propagate: """ Create a new PROPAGATE for the given REQUEST. :param request: the client REQUEST :return: a new PROPAGATE msg """ if not isinstance(request, (Request, dict)): logger.error( "{}Request not formatted properly to create propagate".format( THREE_PC_PREFIX)) return logger.trace("Creating PROPAGATE for REQUEST {}".format(request)) request = request.as_dict if isinstance(request, Request) else \ request if isinstance(client_name, bytes): client_name = client_name.decode() return Propagate(request, client_name) # noinspection PyUnresolvedReferences def canForward(self, request: Request): """ Determine whether to forward client REQUESTs to replicas, based on the following logic: - If exactly f+1 PROPAGATE requests are received, then forward. - If less than f+1 of requests then probably there's no consensus on the REQUEST, don't forward. - If more than f+1 then already forwarded to replicas, don't forward Even if the node hasn't received the client REQUEST itself, if it has received enough number of PROPAGATE messages for the same, the REQUEST can be forwarded. :param request: the client REQUEST """ if self.requests.forwarded(request): return 'already forwarded' # If not enough Propagates, don't bother comparing if not self.quorums.propagate.is_reached(self.requests.votes(request)): return 'not finalised' req = self.requests.req_with_acceptable_quorum(request, self.quorums.propagate) if req: self.requests.set_finalised(req) return None else: return 'not finalised' # noinspection PyUnresolvedReferences def forward(self, request: Request): """ Forward the specified client REQUEST to the other replicas on this node :param request: the REQUEST to propagate """ key = request.key num_replicas = self.replicas.num_replicas logger.debug('{} forwarding request {} to {} replicas'.format( self, key, num_replicas)) self.replicas.pass_message(ReqKey(key)) self.monitor.requestUnOrdered(key) self.requests.mark_as_forwarded(request, num_replicas) # noinspection PyUnresolvedReferences def recordAndPropagate(self, request: Request, clientName): """ Record the request in the list of requests and propagate. :param request: :param clientName: """ self.requests.add(request) self.propagate(request, clientName) self.tryForwarding(request) def tryForwarding(self, request: Request): """ Try to forward the request if the required conditions are met. See the method `canForward` for the conditions to check before forwarding a request. """ cannot_reason_msg = self.canForward(request) if cannot_reason_msg is None: # If haven't got the client request(REQUEST) for the corresponding # propagate request(PROPAGATE) but have enough propagate requests # to move ahead self.forward(request) else: logger.trace("{} not forwarding request {} to its replicas " "since {}".format(self, request, cannot_reason_msg)) def request_propagates(self, req_keys): """ Request PROPAGATEs for the given request keys. Since replicas can request PROPAGATEs independently of each other, check if it has been requested recently :param req_keys: :return: """ i = 0 for digest in req_keys: if digest not in self.requested_propagates_for: if digest not in self.requests: # Request from all nodes self.request_msg(PROPAGATE, {f.DIGEST.nm: digest}) else: # Request from nodes that didn't send send_to = [ conn for conn in self.nodestack.connecteds if conn not in self.requests[digest].propagates.keys() ] self.request_msg(PROPAGATE, {f.DIGEST.nm: digest}, frm=send_to) self._add_to_recently_requested(digest) i += 1 else: logger.debug( '{} already requested PROPAGATE recently for {}'.format( self, digest)) return i def _add_to_recently_requested(self, key): while len(self.requested_propagates_for ) > self.MAX_REQUESTED_KEYS_TO_KEEP: self.requested_propagates_for.pop(last=False) self.requested_propagates_for.add(key)
def main(): f1 = open("processeddata_average.csv", "a") f2 = open("processeddata_covered.csv", "a") prolog.consult( "../inputfiles/StringTransformations_BackgroundKnowledge.pl") amount_of_clauses = 500 chosen_pred = "t" minlength = 0 max_factor_per_length = 4 _, predicates = createKnowledge( "../inputfiles/StringTransformations_BackgroundKnowledge.pl", chosen_pred) train = readPositiveOfType("../inputfiles/StringTransformationProblems", "train_task") totalextension = [] filtered_predicates = [] for predicate in predicates: if predicate.name not in ["s", chosen_pred ] and predicate not in filtered_predicates: totalextension.append( lambda x, predicate=predicate: plain_extension( x, predicate, connected_clauses=True)) filtered_predicates.append(predicate) # create the hypothesis space hs = TopDownHypothesisSpace( primitives=totalextension, head_constructor=c_pred("train_task", 1), # TODO connected clause kan miss problemen hebben da ie geen nieuwe vars wil introducen, kweet eigl nie (check) expansion_hooks_keep=[lambda x, y: connected_clause(x, y)], expansion_hooks_reject=[ lambda x, y: has_singleton_vars(x, y), lambda x, y: has_duplicated_literal(x, y), # TODO check op fouten lol :p lambda x, y: has_all_same_vars_in_literal(x, y) ]) clauses_used = 0 possible_candidates = OrderedSet() put_into_pool(possible_candidates, hs.get_current_candidate()) amount_of_length = 0 previouslength = 0 while clauses_used < amount_of_clauses: current_cand = possible_candidates.pop(0) # expand the candidate _ = hs.expand(current_cand) # this is important: .expand() method returns candidates only the first time it is called; # if the same node is expanded the second time, it returns the empty list # it is safer than to use the .get_successors_of method exps = hs.get_successors_of(current_cand) expa = process_expansions(exps, hs) put_into_pool(possible_candidates, expa) if previouslength < len(current_cand): amount_of_length = 0 if random.random() < 0.5 and len( current_cand ) > minlength and amount_of_length < max_factor_per_length * len( current_cand): for problem in train: for example in train.get(problem): if random.random() < 0.25: input = get_input_data(current_cand, example, filtered_predicates) output, output2 = get_output_data( current_cand, expa, example, filtered_predicates) f1.write(input + "," + output + "\n") f2.write(input + "," + output2 + "\n") clauses_used += 1 amount_of_length += 1 previouslength = len(current_cand)
class DownloadManager(object): def __init__(self): # type: () -> None self.loop = asyncio.get_event_loop() self.timeout = aiohttp.ClientTimeout(total=None, sock_read=60) self.session = aiohttp.ClientSession(loop=self.loop, timeout=self.timeout, auto_decompress=False) self.concurrent_downloads = 3 #self.sem = asyncio.Semaphore(1000) self.chunksize = 1024 * 1024 # file write buffer self.queue = OrderedSet() self.active = OrderedSet() self.done = OrderedSet() self.error = OrderedSet() def status(self): # type: () -> str total_active = sum(t.downloaded for t in self.active) total_done = sum(t.downloaded for t in self.done) total_error = sum(t.downloaded for t in self.error) return "Queued: {}, active: {}, done: {}, error: {}\nDownload active: {}, done: {}, error: {}".format( len(self.queue), len(self.active), len(self.done), len(self.error), total_active, total_done, total_error) def _enqueue(self, task, priority): # type: (DownloadTask, Any) -> None self.queue.add(task) def _start(self, task): # type: (DownloadTask, ) -> asyncio.Task self.active.add(task) atask = asyncio.ensure_future(self._download(task)) return atask def _trystart(self): # type: () -> Optional[asyncio.Task] if len(self.active) < self.concurrent_downloads: try: task = self.queue.pop() return self._start(task) except KeyError: if not self.active: logger.info("all done") #self.loop.stop() #task = asyncio.ensure_future(self._close()) return None async def _download(self, task): # type: (DownloadTask, ) -> None task.start() #await asyncio.sleep(10) # send http head request first to check for range support try: #async with self.session.get(task.url, headers={"Range": "bytes=0-10"}) as response: async with self.session.get(task.url, headers={}) as response: stream = response.content try: size = int(response.headers.get("content-length", "")) # type: Optional[int] except (ValueError, TypeError): size = None accept_range = response.headers.get('Accept-Ranges', 'none').lower() if response.status == 200: # range not supported pass elif response.status == 206: # range supported if accept_range != "bytes": raise RuntimeError( "Only bytes content ranges are supported") bytes_range = response.headers.get( 'Content-Range') # 'bytes 0-10/46239' raise RuntimeError( "Range requests are not supported yet: {}".format( bytes_range)) with open(task.path, "wb", buffering=self.chunksize) as fw: async for data in stream.iter_any(): task.downloaded += len(data) fw.write(data) if size and size != task.downloaded: print("incomplete", task.downloaded, "of", size) except asyncio.TimeoutError: self.error.add(task) else: self.done.add(task) task.done() self.active.remove(task) self._trystart() def download(self, url, path="tmp.txt", priority=0, force=False): # type: (str, str, int, bool) -> Optional[asyncio.Task] logger.info("starting download") task = DownloadTask(url, path) if force: return self._start(task) else: self._enqueue(task, priority) return self._trystart() async def _close(self): await self.session.close()
class SimpleBreadthFirstLearner(TemplateLearner): def __init__(self, solver_instance: Prolog, max_body_literals=4): super().__init__(solver_instance) self._max_body_literals = max_body_literals def initialise_pool(self): self._candidate_pool = OrderedSet() def put_into_pool( self, candidates: typing.Union[Clause, Procedure, typing.Sequence]) -> None: if isinstance(candidates, Clause): self._candidate_pool.add(candidates) else: self._candidate_pool |= candidates def get_from_pool(self) -> Clause: return self._candidate_pool.pop(0) def evaluate(self, examples: Task, clause: Clause): pos, neg = examples.get_examples() numberofpositivecoverance = 0 self._solver.assertz(clause) for example in pos: if self._solver.has_solution(example): numberofpositivecoverance += 1 numberofnegativecoverance = 0 for example in neg: if self._solver.has_solution(example): numberofnegativecoverance += 1 # print(example) self._solver.retract(clause) if numberofnegativecoverance + numberofpositivecoverance == 0: return [0, 0] else: return [ numberofpositivecoverance / (numberofpositivecoverance + numberofnegativecoverance) * (numberofpositivecoverance) / len(pos), numberofnegativecoverance ] def stop_inner_search(self, eval, examples: Task, clause: Clause) -> bool: if eval[1] > 0: return True else: return False def process_expansions( self, examples: Task, exps: typing.Sequence[Clause], hypothesis_space: TopDownHypothesisSpace ) -> typing.Sequence[Clause]: # eliminate every clause with more body literals than allowed exps = [cl for cl in exps if len(cl) <= self._max_body_literals] new_exps = [] # check if every clause has solutions for cl in exps: y = self.evaluate(task, cl) if y[0] > 0: new_exps.append(cl) else: hypothesis_space.remove(cl) return new_exps
def add_ops(self, roots, after_exop=None): """ Add exops needed to compute ops in roots. Args: roots: A collection of ops whose values are needed. after_exop: Where in the list to add the ops. Defaults to the end. """ if after_exop is None: after_exop = self.prev_exop # Get computation graph ops that are already inserted. available = OrderedSet() counts = dict() parents = defaultdict(OrderedSet) ready = OrderedSet() # Setting the environmental variable below to 0 can be used to disable toposort # with priorities and switch to naive algo in case something went wrong unexpectedly algo_num = int(os.getenv('NGRAPH_TOPOSORT_ALGO', 1)) pqueue = PriorityQueue() op_counter = 0 wait_order = 100000 std_order = 2 start_order = 1 # Some ops in roots may have been replaced by other ops; if so, they # are in the graph already, although maybe not in this block. Get the # op from the exop so we have the current version. for op in roots: exop = self.computation_decl.get_exop(op, None) if exop is not None: op = exop.op available.add(op) while available: op = available.pop() if algo_num > 0: if 'priority' in op.metadata: if op.metadata['priority'] == 'high': op.metadata['order'] = start_order else: op.metadata['order'] = wait_order elif 'order' not in op.metadata: op.metadata['order'] = std_order if op in counts or op in self.all_ops: continue nchildren = 0 op_deps = op.all_deps if (isinstance(op, CPUMlslGatherRecvOp) or isinstance( op, CPUMlslScatterRecvOp)) and op.send_node() in available: op_deps.add(op.send_node()) for child in op_deps: exop = self.computation_decl.get_exop(child, None) if exop is not None: child = exop.op if child not in self.all_ops: parents[child].add(op) available.add(child) if algo_num > 0: ch_order = child.metadata[ 'order'] if 'order' in child.metadata else -1 new_order = op.metadata['order'] + 1 if 'priority' not in child.metadata and \ ('order' not in child.metadata or new_order < ch_order): child.metadata['order'] = new_order nchildren += 1 if nchildren > 0: counts[op] = nchildren else: if op not in ready: ready.add(op) if algo_num > 0: op_counter = op_counter - 1 pqueue.put((op.metadata['order'], op_counter, op)) if algo_num == 0: while ready: op = ready.pop() after_exop = self.add_op(op, after_exop=after_exop) for p in parents.get(op, []): count = counts[p] - 1 if count == 0: ready.add(p) del counts[p] else: counts[p] = count else: while len(pqueue.queue) > 0: _, _, op = pqueue.get() after_exop = self.add_op(op, after_exop=after_exop) for p in parents.get(op, []): count = counts[p] - 1 if count == 0: op_counter = op_counter - 1 # Shouldn't happen, but we have a way to get back to naive scheduling assert 'order' in p.metadata, \ "Something went wrong with the scheduling. \ Please try NGRAPH_TOPOSORT_ALGO=0" if p.metadata['order'] == wait_order: pqueue.put( (p.metadata['order'], int(-op_counter), p)) else: pqueue.put((p.metadata['order'], op_counter, p)) del counts[p] else: counts[p] = count if len(counts) > 0: raise ValueError("Graph not a DAG")
class Aleph(TemplateLearner): """ Implements the Aleph learner in loreleai. See https://www.cs.ox.ac.uk/activities/programinduction/Aleph/aleph.html#SEC45. Aleph efficiently searches the hypothesis space by bounding the search from above (X :- true) and below (using the bottom clause), and by using mode declarations for predicates. It iteratively adds new clauses that maximize the evalfn. Searching for a new clause is done using a branch-and-bound algorithm, where clauses that are guaranteed to not lead to improvements are immediately pruned. Aleph currently only supports eval functions that can define an upper bound on the quality of a clause, such as Coverage and Compression. """ def __init__( self, solver: LPSolver, eval_fn: EvalFunction, max_body_literals=5, do_print=False, ): super().__init__(solver, eval_fn, do_print) self._max_body_literals = max_body_literals def learn( self, examples: Task, knowledge: Knowledge, hypothesis_space: HypothesisSpace, initial_clause: typing.Union[Body,Clause] = None, minimum_freq: int = 0 ): """ To find a hypothesis, Aleph uses the following set covering approach: 1. Select a positive example to be generalised. If none exists, stop; otherwise proceed to the next step. 2. Construct the most specific clause (the bottom clause) (Muggleton, 1995) that entails the selected example and that is consistent with the mode declarations. 3. Search for a clause more general than the bottom clause and that has the best score. 4. Add the clause to the current hypothesis and remove all the examples made redundant by it. Return to step 1. (Description from Cropper and Dumancic ) """ # Variables for learning statics start_time = datetime.datetime.now() i = 0 stop = False self._learnresult = LearnResult() # Reset in case the learner is reused self._prolog_queries = 0 self._intermediate_coverage = [] self._eval_fn._clauses_evaluated = 0 # Assert all BK into engines self._solver.retract_all() self._assert_knowledge(knowledge) # Start with all examples examples_to_use = examples pos, _ = examples_to_use.get_examples() # List of clauses we're learning prog = [] # parameters for aleph_extension() allowed_positions = find_allowed_positions(knowledge) allowed_reflexivity = find_allowed_reflexivity(knowledge) if minimum_freq > 0: allowed_constants = find_frequent_constants(knowledge,minimum_freq) else: allowed_constants = None # Create HypothesisSpace: primitives will be different in each iteration # (based on the chosen positive example) hs = TopDownHypothesisSpace( primitives=[], head_constructor=list(pos)[0].get_predicate(), expansion_hooks_reject=[ lambda x, y: has_duplicated_literal(x, y), ], initial_clause=initial_clause ) while len(pos) > 0 and not stop: i += 1 # Pick example from pos pos_ex = Clause(list(pos)[0], []) bk = knowledge.as_clauses() bottom = compute_bottom_clause(bk, pos_ex) if self._print: print("Next iteration: generalizing example {}".format(str(pos_ex))) # print("Bottom clause: " + str(bottom)) # Predicates can only be picked from the body of the bottom clause body_predicates = list( set(map( lambda l: l.get_predicate(), bottom.get_body().get_literals())) ) # Constants can only be picked from the literals in the bottom clause, # and from constants that are frequent enough in bk (if applicable) if allowed_constants is None: allowed = lambda l: isinstance(l,Constant) or isinstance(l,int) else: allowed = lambda l: (isinstance(l,Constant) and l in allowed_constants) or isinstance(l,int) constants = list(set(list(filter( allowed, bottom.get_body().get_arguments(),)))) if self._print: print("Constants in bottom clause: {}".format(constants)) print("Predicates in bottom clause: {}".format(body_predicates)) # IMPORTANT: use VALUES of pred and constants, not the variables # Has something to do with closures extensions = [ lambda x,a=pred,b=allowed_positions,c=constants,d=allowed_reflexivity: aleph_extension(x,a,b,c,d) for pred in body_predicates ] # Update hypothesis space for this iteration hs._primitives = extensions hs.remove_all_edges() # Learn 1 clause and add to program cl = self._learn_one_clause(examples_to_use, hs) prog.append(cl) if self._print: print("- New clause: " + str(cl)) # update covered positive examples covered = self._execute_program(cl) if self._print: print( "Clause covers {} pos examples: {}".format( len(pos.intersection(covered)), pos.intersection(covered) ) ) # Find intermediate quality of program at this point, add to learnresult (don't cound these as Prolog queries) c = set() for cl in prog: c = c.union(self._execute_program(cl,count_as_query=False)) pos_covered = len(c.intersection(examples._positive_examples)) neg_covered = len(c.intersection(examples._negative_examples)) self._intermediate_coverage.append((pos_covered,neg_covered)) # Remove covered examples and start next iteration pos, neg = examples_to_use.get_examples() pos = pos.difference(covered) examples_to_use = Task(pos, neg) if self._print: print("Finished iteration {}".format(i)) # print("Current program: {}".format(str(prog))) # Wrap results into learnresult and return self._learnresult['learner'] = "Aleph" self._learnresult["total_time"] = (datetime.datetime.now() - start_time).total_seconds() self._learnresult["final_program"] = prog self._learnresult["num_iterations"] = i self._learnresult["evalfn_evaluations"] = self._eval_fn._clauses_evaluated self._learnresult["prolog_queries"] = self._prolog_queries self._learnresult["intermediate_coverage"] = self._intermediate_coverage return self._learnresult def initialise_pool(self): self._candidate_pool = OrderedSet() def put_into_pool( self, candidates: Tuple[typing.Union[Clause, Procedure, typing.Sequence], float] ) -> None: if isinstance(candidates, Tuple): self._candidate_pool.add(candidates) else: self._candidate_pool |= candidates def prune_pool(self, minValue): """ Removes all clauss with upper bound on value < minValue form pool """ self._candidate_pool = OrderedSet( [t for t in self._candidate_pool if not t[2] < minValue] ) def get_from_pool(self) -> Clause: return self._candidate_pool.pop(0) def stop_inner_search( self, eval: typing.Union[int, float], examples: Task, clause: Clause ) -> bool: raise NotImplementedError() def process_expansions( self, examples: Task, exps: typing.Sequence[Clause], hypothesis_space: TopDownHypothesisSpace, ) -> typing.Sequence[Clause]: # eliminate every clause with more body literals than allowed exps = [cl for cl in exps if len(cl) <= self._max_body_literals] # check if every clause has solutions exps = [ (cl, self._solver.has_solution(*cl.get_body().get_literals())) for cl in exps ] new_exps = [] for ind in range(len(exps)): if exps[ind][1]: # keep it if it has solutions new_exps.append(exps[ind][0]) # print(f"Not removed: {exps[ind][0]}") else: # remove from hypothesis space if it does not hypothesis_space.remove(exps[ind][0]) # print(f"Removed: {exps[ind][0]}") return new_exps def _execute_program(self, clause: Clause, count_as_query: bool = True) -> typing.Sequence[Atom]: """ Evaluates a clause using the Prolog engine and background knowledge Returns a set of atoms that the clause covers """ if len(clause.get_body().get_literals()) == 0: # Covers all possible examples because trivial hypothesis return None else: head_predicate = clause.get_head().get_predicate() head_args = clause.get_head_arguments() # print("{}({})".format(head_predicate, *head_args)) sols = self._solver.query(*clause.get_body().get_literals()) self._prolog_queries += 1 if count_as_query else 0 # Build a solution by substituting Variables with their found value # and copying constants without change sols = [head_predicate(*[s[v] if isinstance(v,Variable) else v for v in head_args]) for s in sols] return sols def _learn_one_clause( self, examples: Task, hypothesis_space: TopDownHypothesisSpace ) -> Clause: """ Learns a single clause to add to the theory. Algorithm from https://www.cs.ox.ac.uk/activities/programinduction/Aleph/aleph.html#SEC45 """ # reset the search space hypothesis_space.reset_pointer() # empty the pool just in case self.initialise_pool() # Add first clauses into pool (active) initial_clauses = hypothesis_space.get_current_candidate() self.put_into_pool( [ (cl, self.evaluate(examples, cl,hypothesis_space)[0], self.evaluate(examples, cl,hypothesis_space)[1]) for cl in initial_clauses ] ) # print(self._candidate_pool) currentbest = None currentbestvalue = -99999 i = 0 while len(self._candidate_pool) > 0: # Optimise: pick smart according to evalFn (e.g. shorter clause when using compression) k = self.get_from_pool() if self._print: print("Expanding clause {}".format(k[0])) # Generate children of k new_clauses = hypothesis_space.expand(k[0]) # Remove clauses that are too long... new_clauses = self.process_expansions( examples, new_clauses, hypothesis_space ) # Compute costs for these children value = {cl: self.evaluate(examples, cl, hypothesis_space)[0] for cl in new_clauses} upperbound_value = { cl: self.evaluate(examples, cl, hypothesis_space)[1] for cl in new_clauses } for c in new_clauses: # If upper bound too low, don't bother expanding if upperbound_value[c] <= currentbestvalue and not c == currentbest: hypothesis_space.remove(c) else: if value[c] > currentbestvalue: currentbestvalue = value[c] currentbest = c len_before = len(self._candidate_pool) self.prune_pool(value[c]) len_after = len(self._candidate_pool) if self._print: print("Found new best: {}: {} {}".format(c,self._eval_fn.name(),value[c])) print("Pruning to upperbound {} >= {}: {} of {} clauses removed".format(self._eval_fn.name(),value[c],(len_before-len_after),len_before)) self.put_into_pool((c, value[c], upperbound_value[c])) if self._print: print("Put {} into pool, contains {} clauses".format(str(c),len(self._candidate_pool))) i += 1 if self._print: print("New clause: {} with score {}".format(currentbest,currentbestvalue)) return currentbest
class Propagator: MAX_REQUESTED_KEYS_TO_KEEP = 1000 def __init__(self, metrics: MetricsCollector = NullMetricsCollector()): self.requests = Requests() self.requested_propagates_for = OrderedSet() self.metrics = metrics # noinspection PyUnresolvedReferences def propagate(self, request: Request, clientName): """ Broadcast a PROPAGATE to all other nodes :param request: the REQUEST to propagate """ if self.requests.has_propagated(request, self.name): logger.trace("{} already propagated {}".format(self, request)) else: with self.metrics.measure_time(MetricsName.SEND_PROPAGATE_TIME): self.requests.add_propagate(request, self.name) propagate = self.createPropagate(request, clientName) logger.debug("{} propagating request {} from client {}".format(self, request.key, clientName), extra={"cli": True, "tags": ["node-propagate"]}) self.send(propagate) @staticmethod def createPropagate( request: Union[Request, dict], client_name) -> Propagate: """ Create a new PROPAGATE for the given REQUEST. :param request: the client REQUEST :return: a new PROPAGATE msg """ if not isinstance(request, (Request, dict)): logger.error("{}Request not formatted properly to create propagate" .format(THREE_PC_PREFIX)) return logger.trace("Creating PROPAGATE for REQUEST {}".format(request)) request = request.as_dict if isinstance(request, Request) else \ request if isinstance(client_name, bytes): client_name = client_name.decode() return Propagate(request, client_name) # noinspection PyUnresolvedReferences def canForward(self, request: Request): """ Determine whether to forward client REQUESTs to replicas, based on the following logic: - If exactly f+1 PROPAGATE requests are received, then forward. - If less than f+1 of requests then probably there's no consensus on the REQUEST, don't forward. - If more than f+1 then already forwarded to replicas, don't forward Even if the node hasn't received the client REQUEST itself, if it has received enough number of PROPAGATE messages for the same, the REQUEST can be forwarded. :param request: the client REQUEST """ if self.requests.forwarded(request): return 'already forwarded' # If not enough Propagates, don't bother comparing if not self.quorums.propagate.is_reached(self.requests.votes(request)): return 'not finalised' req = self.requests.req_with_acceptable_quorum(request, self.quorums.propagate) if req: self.requests.set_finalised(req) return None else: return 'not finalised' # noinspection PyUnresolvedReferences def forward(self, request: Request): """ Forward the specified client REQUEST to the other replicas on this node :param request: the REQUEST to propagate """ key = request.key num_replicas = self.replicas.num_replicas logger.debug('{} forwarding request {} to {} replicas' .format(self, key, num_replicas)) self.replicas.pass_message(ReqKey(key)) self.monitor.requestUnOrdered(key) self.requests.mark_as_forwarded(request, num_replicas) # noinspection PyUnresolvedReferences def recordAndPropagate(self, request: Request, clientName): """ Record the request in the list of requests and propagate. :param request: :param clientName: """ self.requests.add(request) self.propagate(request, clientName) self.tryForwarding(request) def tryForwarding(self, request: Request): """ Try to forward the request if the required conditions are met. See the method `canForward` for the conditions to check before forwarding a request. """ cannot_reason_msg = self.canForward(request) if cannot_reason_msg is None: # If haven't got the client request(REQUEST) for the corresponding # propagate request(PROPAGATE) but have enough propagate requests # to move ahead self.forward(request) else: logger.trace("{} not forwarding request {} to its replicas " "since {}".format(self, request, cannot_reason_msg)) def request_propagates(self, req_keys): """ Request PROPAGATEs for the given request keys. Since replicas can request PROPAGATEs independently of each other, check if it has been requested recently :param req_keys: :return: """ i = 0 for digest in req_keys: if digest not in self.requested_propagates_for: self.request_msg(PROPAGATE, {f.DIGEST.nm: digest}) self._add_to_recently_requested(digest) i += 1 else: logger.debug('{} already requested PROPAGATE recently for {}'. format(self, digest)) return i def _add_to_recently_requested(self, key): while len( self.requested_propagates_for) > self.MAX_REQUESTED_KEYS_TO_KEEP: self.requested_propagates_for.pop(last=False) self.requested_propagates_for.add(key)
def get_JSONSchema_requirements(se, root, schema_name): json_schema = { "$schema": "http://json-schema.org/draft-07/schema#", "$id":"http://example.com/" + schema_name, "title": schema_name, "type": "object", "properties":{}, "required":[], "allOf":[] } # get graph corresponding to data model schema mm_graph = se.get_nx_schema() # nodes to check for dependencies, starting with the provided root nodes_to_process = OrderedSet() nodes_to_process.add(root) # keep track of nodes with processed dependencies nodes_with_processed_dependencies = set() ''' keep checking for dependencies until there are no nodes left to process ''' while nodes_to_process: process_node = nodes_to_process.pop() ''' get allowable values for this node; each of these values is a node that in turn is processed for dependencies and allowed values ''' """ print("===============") print(mm_graph.nodes[process_node]) print("===============") """ if requires_child in mm_graph.nodes[process_node]: if mm_graph.nodes[process_node][requires_child]: children = get_node_children(mm_graph, process_node) print(children) # set allowable values based on children nodes if children: schema_properties = { process_node:{"enum":children}} json_schema["properties"].update(schema_properties) # add children for requirements processing nodes_to_process.update(children) # set conditional dependencies based on children dependencies for child in children: child_dependencies = get_node_neighbor_dependencies(mm_graph, child) if child_dependencies: schema_conditional_dependencies = { "if": { "properties": { process_node: { "enum": [child] } }, "required":[process_node], }, "then": { "required": child_dependencies }, } nodes_with_processed_dependencies.add(child) nodes_to_process.update(child_dependencies) # only append dependencies if there are any #if schema_conditional_dependencies: # json_schema["allOf"].append(schema_conditional_dependencies) ''' get required nodes by this node (e.g. other terms/nodes that need to be specified based on a data model, if the given term is specified); each of these node/terms needs to be processed for dependencies in turn. ''' if not process_node in nodes_with_processed_dependencies: process_node_dependencies = get_node_neighbor_dependencies(mm_graph, process_node) if process_node_dependencies: if process_node == root: # these are unconditional dependencies json_schema["required"] += process_node_dependencies else: # these are dependencies given the processed node schema_conditional_dependencies = { "if": { "properties": { process_node: { "string":"*" } }, "required":[process_node], }, "then": { "required": [process_node_dependencies] }, } # only append dependencies if there are any #if schema_conditional_dependencies: # json_schema["allOf"].append(schema_conditional_dependencies) nodes_to_process.update(process_node_dependencies) nodes_with_processed_dependencies.add(process_node) """ print("Nodes to process") print(nodes_to_process) print("=================") """ print("=================") print("JSONSchema successfully generated from Schema.org schema!") print("=================") # if no conditional dependencies were added we can't have an empty 'AllOf' block in the schema, so remove it if not json_schema["allOf"]: del json_schema["allOf"] return json_schema