def probe(self, tuple1, resource, rjttable): # Probe a tuple against its corresponding table. probe_ts = time() # If the resource is in the table, produce results. if resource in rjttable: rjttable.get(resource).setRJTProbeTS(probe_ts) list_records = rjttable[resource].records # For each matching solution mapping, generate an answer. for record in list_records: if resource != "EOF": # Merge solution mappings. data = {} data.update(record.tuple.data) data.update(tuple1.data) else: data = "EOF" # Update ready and done vectors. ready = record.tuple.ready | tuple1.ready done = record.tuple.done | tuple1.done | pow(2, self.id_operator) sources = list(set(record.tuple.sources) | set(tuple1.sources)) # Create tuple. res = Tuple(data, ready, done, sources, self.id_operator) # Send tuple to eddy operators. self.qresults[self.eddy].put(res) return probe_ts
def __init__(self, id_operator, joinvars, eddies, eddy=None, **kwargs): self.left_table = dict() self.right_table = dict() self.id_operator = id_operator self.vars = joinvars self.eof = Tuple("EOF", 0, 0, set(), self.id_operator) self.eddies = eddies if eddy: self.eddy = eddy else: self.eddy = randint(1, self.eddies) self.left = None self.right = None self.qresults = None self.sources = None self.probing = Value('i', 1) self.independent_inputs = 1 self.results_per_source = {} self.produced_tuples = 0 self.requests = {} # Config self.__type2limit = { "tpf": 1, "brtpf": kwargs.get("brtpf_mappings", 30), "sparql": kwargs.get("sparql_mappings", 50) }
def __init__(self, id_operator, variables, eddies, left_leaf, right_leaf, eddy=None, hj_request_cost_factor=1.0): self.left_table = dict() self.right_table = dict() self.id_operator = id_operator self.vars = variables self.eof = Tuple("EOF", 0, 0, set(), self.id_operator) self.eddies = eddies self.eddy = randint(1, self.eddies) self.left = None self.right = None self.qresults = None self.probing = Value('i', 1) self.independent_inputs = 2 self.left_leaf = left_leaf self.right_leaf = right_leaf # Poly Specific Properties self.__right_pid = None self.tuples_to_probe = [] self.produced_tuples_list = [] self.requests = {} self.produced_tuples = 0 self.hj_request_cost_factor = hj_request_cost_factor self.switched = False
def __init__(self, id_operator, joinvars, eddies, eddy=None, left_card=-1): self.left_table = dict() self.right_table = dict() self.id_operator = id_operator self.vars = joinvars self.eof = Tuple("EOF", 0, 0, set(), self.id_operator) self.eddies = eddies if eddy: self.eddy = eddy else: self.eddy = randint(1, self.eddies) self.left = None self.right = None self.qresults = None self.sources = None self.probing = Value('i', 1) self.independent_inputs = 1 self.results_per_source = {} self.left_est_card = left_card self.produced_tuples = 0 # Config self.__type2limit = { "tpf" : 1, "brtpf" : 30, "sparql" : 50 }
def __init__(self, id_operator, variables, eddies): self.left_table = dict() self.right_table = dict() self.id_operator = id_operator self.vars = set(variables) self.eof = Tuple("EOF", 0, 0, set(), self.id_operator) self.eddies = eddies self.eddy = randint(1, self.eddies) self.left = None self.right = None self.qresults = None self.probing = Value('i', 1) self.independent_inputs = 2
def __init__(self, id_operator, joinvars, eddies): self.left_table = dict() self.right_table = dict() self.id_operator = id_operator self.vars = joinvars self.eof = Tuple("EOF", 0, 0, set(), self.id_operator) self.eddies = eddies self.eddy = randint(1, self.eddies) self.empty_answers = [] self.left = None self.right = None self.qresults = None self.sources = None self.probing = Value('i', 1) self.independent_inputs = 1
def probe(self, tuple, resource, rjttable): probe_ts = time() # If the resource is in the table, produce results. if resource in rjttable: rjttable.get(resource).setRJTProbeTS(probe_ts) list_records = rjttable[resource].records # Delete tuple from bag. try: self.bag.remove(tuple) except ValueError: pass for record in list_records: #print record.tuple.data if resource != "EOF": # Merge solution mappings. data = {} data.update(record.tuple.data) data.update(tuple.data) else: data = "EOF" # Update ready and done vectors. ready = record.tuple.ready | tuple.ready done = record.tuple.done | tuple.done | pow( 2, self.id_operator) sources = list(set(record.tuple.sources) | set(tuple.sources)) # Create tuple. res = Tuple(data, ready, done, sources, self.id_operator) # Send tuple to eddy operators. self.qresults[self.eddy].put(res) # Delete tuple from bag. try: self.bag.remove(record.tuple) except ValueError: pass return probe_ts
def stage3(self): # Stage 3: When both sources sent all the data. # This is the optional: Produce tuples that haven't matched already. for tuple in self.bag: #print "From Bag: {}".format(tuple.data) res_right = {} for var in self.vars_right: res_right.update({var: ''}) res = res_right res.update(tuple.data) ready = tuple.ready done = tuple.done | pow(2, self.id_operator) sources = list(set(tuple.sources)) # Create tuple. res_tuple = Tuple(res, ready, done, sources, self.id_operator) self.qresults[self.eddy].put(res_tuple)
def __init__(self, id_operator, joinvars, eddies, eddy=None): self.left_table = dict() self.right_table = dict() self.id_operator = id_operator #self.vars = set(joinvars) self.vars = joinvars #set([str(var) for var in joinvars]) self.eof = Tuple("EOF", 0, 0, set(), self.id_operator) self.eddies = eddies if eddy: self.eddy = eddy else: self.eddy = randint(1, self.eddies) self.empty_answers = [] self.left = None self.right = None self.qresults = None self.sources = None self.probing = Value('i', 1) self.independent_inputs = 1 self.produced_tuples = 0
def probe_table_of_source(self, rtuple, right, ldf_server, tuple_rjttable): # Get the value(s) of the operator variable(s) in the tuple. resource = '' for var in self.vars: resource = resource + str(rtuple.data[var]) probe_ts = time() # If the resource is in table, produce results. if resource in tuple_rjttable.keys(): tuple_rjttable.get(resource).setRJTProbeTS(probe_ts) list_records = tuple_rjttable[resource].records # For each match, produce the results (solution mappings). for record in list_records: res = {} if record.tuple.data == "EOF": break # Merge solution mappings. res.update(record.tuple.data) res.update(rtuple.data) # Update ready and done vectors. ready = record.tuple.ready | rtuple.ready done = record.tuple.done | rtuple.done | pow( 2, self.id_operator) sources = list(set(record.tuple.sources) | set(rtuple.sources)) # Create solution mapping. res = Tuple(res, ready, done, sources, self.id_operator) # Send solution mapping to eddy operators. self.to_queue(res, ldf_server) return None else: return rtuple
def __init__(self, id_operator, vars_left, vars_right, eddies, eddy=None): self.left_table = dict() self.right_table = dict() self.id_operator = id_operator self.eof = Tuple("EOF", 1, 1, set(), self.id_operator) self.eddies = eddies if eddy: self.eddy = eddy else: self.eddy = randint(1, self.eddies) self.bag = [] self.left = None self.right = None self.qresults = None self.sources = None self.probing = Value('i', 1) self.independent_inputs = 2 self.vars_left = set(vars_left) self.vars_right = set(vars_right) self.vars = self.vars_left.intersection(self.vars_right)
def probe(self, tuple1, resource, rjttable, other_rjttable): probe_ts = time() # If the resource is in table, produce results. if resource in rjttable.keys(): rjttable.get(resource).setRJTProbeTS(probe_ts) list_records = rjttable[resource].records # For each match, produce the results (solution mappings). for record in list_records: res = {} if record.tuple.data == "EOF": break # Merge solution mappings. res.update(record.tuple.data) res.update(tuple1.data) # Update ready and done vectors. ready = record.tuple.ready | tuple1.ready done = record.tuple.done | tuple1.done | pow( 2, self.id_operator) sources = list(set(record.tuple.sources) | set(tuple1.sources)) # Create solution mapping. res = Tuple(res, ready, done, sources, self.id_operator) # Send solution mapping to eddy operators. self.to_queue(res) #self.qresults[self.eddy].put(res) # If the resource is not in the table, contact the sources. else: # Extract domain and range of operator variables from the tuple. instances = {} for v in self.vars: instances.update({v: tuple1.data[v]}) # Contact the sources. qright = Queue() self.right.execute(self.vars, instances, qright) # Get the tuples from right_plan queue. tuple2 = qright.get(True) self.sources = tuple2.sources # Empty result set. if (tuple2 == "EOF") or (tuple2.data == "EOF"): record = Record(tuple2, probe_ts, time(), float("inf")) tail = RJTTail(record, float("inf")) rjttable[resource] = tail # Non-empty result set. while (tuple2 != "EOF") and (tuple2.data != "EOF"): # Create solution mapping. data = {} data.update(tuple2.data) data.update(tuple1.data) #print("{}; {}".format(self.id_operator, data)) # Update ready and done vectors of solution mapping. ready = tuple2.ready | tuple1.ready done = tuple2.done | tuple1.done | pow(2, self.id_operator) sources = list(set(tuple2.sources) | set(tuple1.sources)) # Create tuple. res = Tuple(data, ready, done, sources, self.id_operator) # Send tuple to eddy operators. self.to_queue(res) #self.qresults[self.eddy].put(res) # Introduce the results of contacting the sources in the corresponding table. record = Record(tuple2, probe_ts, time(), float("inf")) if resource in rjttable.keys(): rjttable.get(resource).updateRecords(record) rjttable.get(resource).setRJTProbeTS(probe_ts) else: tail = RJTTail(record, float("inf")) rjttable[resource] = tail # Get next solution. tuple2 = qright.get(True) # Close queue for this sources. qright.close() return probe_ts
def probe_tuples_from_source(self, tuple_list, right, ldf_server, tuple_rjttable): probe_ts = time() if len(tuple_list) > 0: instances = [] for rtuple in tuple_list: instance = {} for v in self.vars: instance.update({v: rtuple.data[v]}) instances.append(instance) # Contact the sources. qright = Queue() right.execute(self.vars, instances, qright, ldf_server=ldf_server) # Get the tuples from right_plan queue. tuple2 = qright.get(True) self.sources = tuple2.sources # Empty result set. if (tuple2 == "EOF") or (tuple2.data == "EOF"): # For all tested tuples add the tail to the records for tested_tuple in tuple_list: resource = '' for var in self.vars: resource = resource + str(tested_tuple.data[var]) record = Record(tuple2, probe_ts, time(), float("inf")) tail = RJTTail(record, float("inf")) tuple_rjttable[resource] = tail # Non-empty result set. while (tuple2 != "EOF") and (tuple2.data != "EOF"): rtuple_added = False for rtuple in tuple_list: if not compatible_solutions(rtuple.data, tuple2.data): continue #print "Got result", rtuple, tuple2, compatible_solutions(rtuple.data, tuple2.data) # Create solution mapping. data = {} data.update(tuple2.data) data.update(rtuple.data) # Update ready and done vectors of solution mapping. ready = tuple2.ready | rtuple.ready done = tuple2.done | rtuple.done | pow(2, self.id_operator) sources = list(set(tuple2.sources) | set(rtuple.sources)) # Create tuple. res = Tuple(data, ready, done, sources, self.id_operator) # Introduce the results of contacting the sources in the corresponding table. record = Record(tuple2, probe_ts, time(), float("inf")) resource = '' for var in self.vars: resource = resource + str(rtuple.data[var]) # Send tuple to eddy operators. self.to_queue(res, ldf_server) if resource in tuple_rjttable.keys() and not rtuple_added: tuple_rjttable.get(resource).updateRecords(record) tuple_rjttable.get(resource).setRJTProbeTS(probe_ts) else: tail = RJTTail(record, float("inf")) tuple_rjttable[resource] = tail rtuple_added = True # Get next solution. tuple2 = qright.get(True) r_source_id = self.right.source_id self.requests[r_source_id] += tuple2.requests.get(r_source_id, 0) # Close queue for this sources. qright.close()
def probe_tuple(self, tuple_list, right, ldf_server, tuple_rjttable): probe_ts = time() if len(tuple_list) > 0: instances = [] for rtuple in tuple_list: if rtuple.data != "EOF": instance = {} for v in self.vars: instance.update({v: rtuple.data[v]}) instances.append(instance) if len(instances) > 0: # Contact the sources. qright = Queue() right.execute(self.vars, instances, qright, ldf_server=ldf_server) # Get the tuples from right_plan queue. tuple2 = qright.get(True) self.sources = tuple2.sources # Empty result set. if (tuple2 == "EOF") or (tuple2.data == "EOF"): # For all tested tuples add the tail to the records for tested_tuple in tuple_list: resource = '' for var in self.vars: resource = resource + str(tested_tuple.data[var]) record = Record(tuple2, probe_ts, time(), float("inf")) tail = RJTTail(record, float("inf")) tuple_rjttable[resource] = tail # Non-empty result set. while (tuple2 != "EOF") and (tuple2.data != "EOF"): rtuple_added = False for rtuple in tuple_list: if not compatible_solutions(rtuple.data, tuple2.data): continue # Create solution mapping. data = {} data.update(tuple2.data) data.update(rtuple.data) # Update ready and done vectors of solution mapping. ready = tuple2.ready | rtuple.ready done = tuple2.done | rtuple.done | pow( 2, self.id_operator) sources = list( set(tuple2.sources) | set(rtuple.sources)) # Create tuple. res = Tuple(data, ready, done, sources, self.id_operator) # Introduce the results of contacting the sources in the corresponding table. record = Record(tuple2, probe_ts, time(), float("inf")) resource = '' for var in self.vars: resource = resource + str(rtuple.data[var]) # Send tuple to eddy operators. # Send it, if it has not been produced before # TODO: Is this always correct? # What if there are several identical mappings for the same variable from the left_plan side (We # would need to keep track of the triple producing the tuple or remove it from the table) if not res in self.produced_tuples_list: self.to_queue(res) #self.qresults[self.eddy].put(res) if resource in tuple_rjttable.keys( ) and not rtuple_added: tuple_rjttable.get(resource).updateRecords(record) tuple_rjttable.get(resource).setRJTProbeTS( probe_ts) else: tail = RJTTail(record, float("inf")) tuple_rjttable[resource] = tail rtuple_added = True # Get next solution. tuple2 = qright.get(True) r_source_id = self.right_operator.source_id self.requests[r_source_id] += tuple2.requests.get( r_source_id, 0) qright.close()
def probe(self, tuple, resource, rjttable, other_rjttable): probeTS = time() # If the resource is in table, produce results. if resource in rjttable: rjttable.get(resource).setRJTProbeTS(probeTS) list_records = rjttable[resource].records #list_records = rjttable[resource] # For each match, produce the results (solution mappings). for record in list_records: res = {} if record.tuple.data == "EOF": break # Merge solution mappings. res.update(record.tuple.data) res.update(tuple.data) # Update ready and done vectors. ready = record.tuple.ready | tuple.ready done = record.tuple.done | tuple.done | pow( 2, self.id_operator) sources = list(set(record.tuple.sources) | set(tuple.sources)) # Create solution mapping. res = Tuple(res, ready, done, sources, self.id_operator) # Send solution mapping to eddy operators. self.qresults[self.eddy].put(res) # If not, contact the source. else: instances = {} for v in self.vars: instances.update({v: tuple.data[v]}) # Contact the source. qright = Queue() self.right.execute(self.vars, instances, qright) # Get the tuples from right_plan queue. rtuple = qright.get(True) self.sources = rtuple.sources if (not (rtuple.data == "EOF")): while (not (rtuple.data == "EOF")): # Create solution mapping. data = {} data.update(rtuple.data) data.update(tuple.data) # print("{}; {}".format(self.id_operator, data)) # Update ready and done vectors of solution mapping. ready = rtuple.ready | tuple.ready done = rtuple.done | tuple.done | pow(2, self.id_operator) sources = list(set(rtuple.sources) | set(tuple.sources)) # Create tuple. res = Tuple(data, ready, done, sources, self.id_operator) # Send tuple to eddy operators. self.qresults[self.eddy].put(res) # Create and insert the record in the left_plan RJT table. record = Record(rtuple, probeTS, time(), float("inf")) if resource in rjttable: other_rjttable.get(resource).updateRecords(record) other_rjttable.get(resource).setRJTProbeTS(probeTS) else: tail = RJTTail(record, float("inf")) other_rjttable[resource] = tail rtuple = qright.get(True) else: # Build the empty tuple. rtuple = {} for att in self.right.vars: rtuple.update({att: ''}) # Produce the answer, rtuple.update(tuple.data) # Create tuple. sources = list(set(tuple.sources)) done = tuple.done | pow(2, self.id_operator) ready = tuple.ready res = Tuple(rtuple, ready, done, sources, self.id_operator) self.qresults[self.eddy].put(res) return probeTS