def __init__(self, id, if_subject, if_condition, then_subject, then_condition, if_and=False, if_subject2 = None, if_condition2 = None): self.id = id self.if_fact = Fact(if_subject, if_condition) self.then_fact = Fact(then_subject, then_condition) self.if_and = if_and if if_and == True: self.if_fact2 = Fact(if_subject2, if_condition2)
def convert_to_fact_instances(engine, tokens): "Convert facts literals to instances of Fact" for index, elem in enumerate(tokens): if isinstance(elem, str): # Case : "(A" if re.match(r"\([a-zA-Z]$", elem): fact = Fact(elem[1], engine) tokens[index:index + 1] = ["(", fact] # Case : "A)" elif re.match(r"[a-zA-Z]\)$", elem): fact = Fact(elem[0], engine) tokens[index:index + 1] = [fact, ")"] # Nomal case : "A elif re.match(r"[a-zA-Z]$", elem): tokens[index] = Fact(elem, engine)
def __init__(self, conclusion=Fact(), premises=[]): self.conclusion = conclusion # Inferred fact self.premises = premises # Conditions: list of facts self.ops = self.get_ops() # List of related relations and functions self.premises.sort() self.dup_predicate = self.detect_dup_predicate()
def main(args): test_fact = Fact(args.text, pre_processor.preprocess(args.text), FactType.TRUTH) all_facts = db_manager.get_all_facts() text_comp.load_model(cfg.DEFAULT_MODEL_FILE) matches = text_comp.match_fact(test_fact, all_facts) best_match = matches[0] threshold = cfg.SIMILARITY_THRESHOLD if args.threshold is not None: threshold = args.threshold if best_match[1] >= threshold: output = { 'fact': best_match[0].to_json_object(), 'similarity': float(best_match[1]) } else: output = {'fact': None, 'similarity': 0.0} print(json.dumps(output))
def main(args): # preprocess text = args.text fact_type = FactType.FAKE if args.fact == "fake" else FactType.TRUTH new_fact = Fact(text, pre_processor.preprocess(text), fact_type) if not args.append: # just create the new fact and print the json print(json.dumps(add_new_fact(new_fact).to_json_object())) return # compare with facts text_comp.load_model(cfg.DEFAULT_MODEL_FILE) similar_fact_tuples = text_comp.match_fact(new_fact, db_manager.get_all_facts()) similar_fact_tuples = similar_fact_tuples[:cfg.ADD_FACT_CHOOSE_AMOUNT] similar_fact_tuples = list( filter(lambda f: f[1] >= cfg.SIMILARITY_THRESHOLD, similar_fact_tuples)) similar_facts = [fact_tuple[0] for fact_tuple in similar_fact_tuples] if not similar_facts: # no similar fact yet # -> create new fact # -> retrain model add_new_fact(new_fact) else: # there are similar facts: # -> print top facts print("These are facts that show similarities to the given text:") for fact in similar_facts: print(fact.id, fact.content) # get input from admin matched_fact_id = input("Type id of fact (or -1): ") if int(matched_fact_id) == -1: # admin: doesn't match any of the given facts: # -> create new fact add_new_fact(new_fact) else: # admin: matches a given fact: matched_fact = next(f for f in similar_facts if f.id == matched_fact_id) if matched_fact is not None: # append preprocess to given fact new_processed = [ proc for proc in fact.processed if proc not in matched_fact.processed ] db_manager.add_to_processed(matched_fact_id, new_processed) # -> retrain model TODO (not currently possible) print("Added following key words to the fact: ", new_processed) else: print( "The given id does not match with any of the similar facts" )
def compare_test_to_db(test_query): preprocessor = PreProcessor() facts = [] with open('facts.txt', 'r') as fact_file: fact_data = fact_file.read().split("\n") for data in fact_data: fact = Fact(data, preprocessor.preprocess(data), FactType.TRUTH) facts.append(fact) text_comp = TextComparator() # text_comp.train_model(facts) file_name = "test" # text_comp.save_model(file_name) text_comp.load_model(file_name) test_fact = Fact(test_query, preprocessor.preprocess( test_query), FactType.TRUTH) return text_comp.match_fact(test_fact, facts, topn=3)
def handleFactForParentheses(self, fact): tmp = None if fact == 1 or fact == 0: tmp = Fact("-") tmp.setValue(b) return tmp else: tmp = fact return tmp
def test(): db_man = DBManager() facts = db_man.get_all_facts() print(facts) fact1 = Fact("some content that is raw and true", ["some", "content", "true"], FactType.TRUTH) db_man.add_fact(fact1) facts = db_man.get_all_facts() print(facts) fact2 = Fact("some content that is raw and false", ["some", "content", "false"], FactType.FAKE) db_man.add_fact(fact2) facts = db_man.get_all_facts() print(facts.__str__()) print(db_man.get_fact(1)) db_man.add_to_processed(1, ["HELO", "250"]) print(db_man.get_fact(1))
def opn_xor(self, l, r, facts): l = self.handleFactForParentheses(l) r = self.handleFactForParentheses(r) result = Fact(l.getName()) if ( l.getValue() == -1 or r.getValue() == -1 ): result.setValue( -1 ) elif ( l.getValue() == 1 ^ r.getValue() == 0 ): result.setValue( 1 ) else: result.setValue( 0 ) return (result)
def __init__(self): create_wall_comp = Skill(use_function=create_wall, targeting_skill=True, targeting_type='single', targeting_message=Message( 'Select tile to make wall', tcod.cyan)) create_wall_fact = Fact('Create Wall', 'Self explanatory', skill=create_wall_comp) break_wall_comp = Skill(use_function=break_wall, targeting_skill=True, targeting_type='single', targeting_message=Message( 'Select tile to destoy wall', tcod.cyan)) break_wall_fact = Fact('Break Wall', 'Self explanatory', skill=break_wall_comp) move_wall_comp = Skill(use_function=move_wall, targeting_skill=True, targeting_type='multi', targeting_message=Message( 'Select tile to move wall', tcod.cyan)) move_wall_fact = Fact('Move Wall', 'Self explanatory', skill=move_wall_comp) teleport_comp = Skill(use_function=teleport, targeting_skill=True, targeting_type='single', targeting_message=Message( 'Select tile to teleport', tcod.cyan)) teleport_fact = Fact('Teleport', 'Self explanatory', skill=teleport_comp) self.skills = [ create_wall_fact, break_wall_fact, move_wall_fact, teleport_fact ]
def extractFacts(article): facts = list() sentences = article.body.split('.') source = {'title': tryTitle(article), 'link': tryLink(article)} for s in sentences: c = checkability(s) if c > CHECK_THRESHOLD: facts.append( Fact(published_date=tryDate(article), distilledFact=str(distillFact(s)), factStrings=[s], factHash=hashFact(s), confidence=c, sources=[source])) return facts
def add_facts(self, tokenized_line, empty=False): """ Add facts to self.facts array Input: "=ABC" """ if empty: self.facts = set() if not tokenized_line.startswith("="): return for char in tokenized_line[1:]: if char in self.facts: raise ParsingError( f"Facts must be unique : {char} is present twice!") new_fact = Fact(char, self) self.facts.add(new_fact)
def add_query(self, tokenized_line, empty=False): """ Add query item to self.query array Input: ?XZY """ if empty: self.query = [] if not tokenized_line.startswith("?"): return for char in tokenized_line[1:]: if char in self.facts: raise ParsingError( f"Query must be unique : {char} is present twice!") new_fact = Fact(char, self) self.query.append(new_fact)
def create_fact(fact_line: str) -> Fact: """ Создание факта из строки :param fact_line: Строка с фактом :return: Факт """ open_bracket_idx, close_bracket_idx = fact_line.index( '('), fact_line.index(')') fact_name = fact_line[:open_bracket_idx].strip() fact_args = fact_line[open_bracket_idx + 1:close_bracket_idx].split(',') fact_args = [ Argument(name=x.strip(), atype=Argument.CONSTANT if x.strip()[0].isupper() else Argument.VARIABLE) for x in fact_args ] ret_fact = Fact(name=fact_name, arguments=fact_args) return ret_fact
def main(args): if args.reset: really = input( "Do you really want to reset the database? This action cannot be undone (y/N): ") # print(really) if really.lower() == "y": db_manager.create_table(True) print("database successfully reset") if args.load is not None: # load to database with open(args.load, 'r') as text_file: fact_data = text_file.read().split("\n") facts = [] for data in fact_data: fact_type = FactType.TRUTH if "[fake]" in data or "[0]" in data: fact_type = FactType.FAKE data = re.sub("\[([^\]|]*)\]", '', data) fact = Fact(data, pre_processor.preprocess(data), fact_type) facts.append(fact) for fact in facts: db_manager.add_fact(fact) print("loaded " + str(len(facts)) + " facts to database") if args.retrain: # train and save the NN print("Starting to retrain the model ...") all_facts = db_manager.get_all_facts() text_comp.train_model(all_facts) text_comp.save_model(cfg.DEFAULT_MODEL_FILE) print("... retrained the model and saved to " + cfg.DEFAULT_MODEL_FILE) if args.printdb: all_facts = db_manager.get_all_facts() print("===== START OF DATABASE (" + str(len(all_facts)) + " rows) =====") for fact in all_facts: print(fact) print("===== END OF DATABASE =====")
def actualize_result(self, variables: Dict[str, str]) -> Tuple[bool, Fact]: """ Актуализация результата правила (замена переменных на константы) :param variables: Словарь актуализированных переменных :return: Был ли факт актуализирован и актуализированный факт """ was_actualized = False actualized_fact = Fact(name=self.result.name, arguments=[]) # Идем по каждому аргументу результата for arg in self.result.arguments: # Если константа if arg.atype == Argument.CONSTANT: # То записываем без изменений actualized_fact.arguments.append(arg) else: # Если переменная # Актуализируем actualized_argument = Argument(name=variables.get(arg.name), atype=Argument.CONSTANT) actualized_fact.arguments.append(actualized_argument) was_actualized = True return was_actualized, actualized_fact
def fact_from_tuple(fact_tuple): return Fact(fact_tuple[1], fact_tuple[2].split(), FactType(fact_tuple[3]), fact_tuple[0])
def __init__(self, conclusion=Fact(), premises=[]): self.conclusion = conclusion self.premises = premises self.predicates = self.GetPredicates() self.premises.sort() self.dup_predicate = self.FindDupPredicate()
def fillVar(facts, letter): if isVar(letter): if not letter in facts: facts[letter] = Fact(letter)
def calcul(self, query, facts): ptr = { "+": self.op_add, "|": self.op_or, "^": self.op_xor, "!+": self.opn_add, "!|": self.opn_or, "!^": self.opn_xor } # print("Query:" + str( query ) ) index = self.getNextOperatorIndexInQuery(query) while (index != -1): # print() # print ("index="+str(index)) l = str(query[index-2]) r = str(query[index-1]) op = query[index] # print("fef"+ l+" "+op+" "+r ) left = None right = None if (l[0] == '!'): tmpFact = Fact(l[1]) if (l[1] != '0' and l[1] != '1'): tmpFact.setValue( facts[l[1]].searchValue(facts)) if (tmpFact.getValue() == 0): tmpFact.setValue(1) else: tmpFact.setValue(0) left = tmpFact else: left = Fact(l) if (l == '0' or l == '1') else facts[l] if (r[0] == '!'): tmpFact = Fact(r[1]) if (r[1] != '0' and r[1] != '1'): tmpFact.setValue( facts[r[1]].searchValue(facts)) if (tmpFact.getValue() == 0): tmpFact.setValue(1) else: tmpFact.setValue(0) right = tmpFact else: right = Fact(r) if (r == '0' or r == '1') else facts[r] # print("Query1:" + str( query ) +"result="+query[index]) query[index] = str(ptr[op](left, right, facts).getValue()) # print("Query2:" + str( query ) +"result="+query[index]) # print("peek-2:"+query[index-2] + "peek1" + query[index-1] ) query.pop(index-2) query.pop(index-1 -1) index = self.getNextOperatorIndexInQuery(query) # print("new query:" + str( query ) ) if (query[0] == '0'): return 0 elif (query[0] == '1'): return 1 # print ("##"+query[0] + " str:" + str(query)[0] +" - " +str(len(query[0]))) if len(query[0]) > 1: res = facts[query[0][1]].searchValue(facts); if res == 1: return 0 else: return 1 else: return facts[query[0]].searchValue(facts)
from elasticsearch import Elasticsearch from pyspark.sql import SparkSession from fact import Fact from product_dimension import ProductDimension spark = SparkSession.builder.appName("ProductApp").getOrCreate() ProductDimension(spark_session=spark).csv_to_tmp_view() Fact(spark_session=spark).csv_to_tmp_view() fact_sample = spark.sql(""" SELECT a.order_date, a.product, a.item_sold, b.gender, b.cmc_business_unit, a.product_net_cost, a.product_net_revenue, a.average_net_cost, a.absolute_margin, a.percentage_margin FROM ( SELECT f.order_date, f.product, SUM(f.item_sold) item_sold, SUM(f.product_net_cost) product_net_cost, SUM(f.product_net_revenue) product_net_revenue, SUM(CASE WHEN f.item_sold > 0 THEN f.product_net_cost/f.item_sold ELSE 0 end) as average_net_cost, SUM(f.product_net_revenue - f.product_net_cost) as absolute_margin, SUM(CASE WHEN f.product_net_revenue > 0
#!/usr/bin/python import json import filter_f, make_db from fact import Fact #write random fact to database random = Fact() r_hash = {} r_hash[random.factHash] = make_db.make_json(random) with open('facts_db_t.json', 'w') as t: t.write(json.dumps(r_hash)) print "loading database..." facts_db = filter_f.load_db('facts_db.json') print "filtering facts..." filter_f.filter_facts(random, facts_db)