def learn_model(fold_i): fold_n = fold_i + 1 print(f"Learning fold {fold_n} @ {datetime.now()}") model = get_untrained_model(fold_n) score, weights, atoms, iteration, lfi_problem = lfi.run_lfi( PrologString(model), examples=[]) learned_model = lfi_problem.get_model() with open(pjoin("models", f"model{fold_n}.pl"), "w") as f: f.write(learned_model + "\n")
def main(filename, examplefile): try: examples = list(lfi.read_examples(examplefile)) program = PrologFile(filename) score, weights, names, iterations = lfi.run_lfi(program, examples) new_names = [] for n in names: new_names.append( (str(n.with_probability()),) + program.lineno(n.location)[1:] ) return True, (score, weights, new_names, iterations) except Exception as err: return False, {"err": process_error(err)}
@author: PasqualeDeMarinis """ from problog.logic import Term from problog.program import PrologString from problog.learning import lfi model = PrologString(""" t(_)::l1. t(_)::l2. t(_)::l3. t(_)::win. win :- l1,l2,\+ l3. """) l1 = Term('l1') l2 = Term('l2') l3 = Term('l3') win = Term('win') examples = [ [(l1, True), (l2, True), (l3, True), (win,False)], [(l1, True), (l2, True), (l3, False), (win,True)], [(l1, True), (l2, False), (l3, False), (win,False)], [(l1, True), (l2, True), (l3, False), (win,True)], [(l1, False), (l2, True), (l3, False), (win,False)] ] score, weights, atoms, iteration, lfi_problem = lfi.run_lfi(model, examples) print (lfi_problem.get_model())
""" from problog.logic import Term from problog.program import PrologString from problog.learning import lfi model = """ t(0.5)::burglary. 0.2::earthquake. t(_)::p_alarm1. t(_)::p_alarm2. t(_)::p_alarm3. alarm :- burglary, earthquake, p_alarm1. alarm :- burglary, \+earthquake, p_alarm2. alarm :- \+burglary, earthquake, p_alarm3. """ alarm = Term('alarm') burglary = Term('burglary') earthquake = Term('earthquake') examples = [[(burglary, False), (alarm, False)], [(earthquake, False), (alarm, True), (burglary, True)], [(burglary, False)]] score, weights, atoms, iteration, lfi_problem = lfi.run_lfi( PrologString(model), examples) print(lfi_problem.get_model())
def createModel(fileName): tweetsList = readCSV(fileName) analyzer = SentimentIntensityAnalyzer() # in variable facts create all the random variables facts = """t(_)::userLocation.\n""" neg = False pos = False keywordsList = [''] examples = [] randomVariables = {} # create random variables based on dataset for i in range(1, len(tweetsList)): # if exists in csv negative sentiment create the random var negativeSentiment if sentiment_analyzer(tweetsList[i].text, analyzer) == -1 and neg == False: # print(i) facts = facts + "t(_)::negativeSentiment.\n" neg = True # if exists in csv positive sentiment create the random var negativeSentiment positiveSentiment if sentiment_analyzer(tweetsList[i].text, analyzer) == 1 and pos == False: facts = facts + "t(_)::positiveSentiment.\n" pos = True keywordsList.append(readRelatedWords(tweetsList[i].text)) keywordsList = list(dict.fromkeys(keywordsList)) #delete all duplicates keywordsList.remove('') temp = [] for i in range(0, len(keywordsList)): facts = "" + facts + "t(_)::" + keywordsList[i] + ".\n" # create the random variables in problog syntax for line in facts.splitlines(): if ":-" not in line: temp = line.split("t(_)::") variable = temp[1].split(".") randomVariables[variable[0]] = False for i in range(0, len(keywordsList)): randomVariables[keywordsList[i]] = False initialDict = randomVariables.copy() # with these for loop in list example put all evidences for i in range(1, len(tweetsList)): tempDict = initialDict.copy() # print(i) tempDict = sentiment_analyzer_scores( tweetsList[i].text, analyzer, tempDict) # sentiment analysis in each tweet tempDict = readRelatedWordsDict(tweetsList[i].text, tempDict) # NER in tweets tempDict = checkPlace(tweetsList[i].location, tempDict) # orderedDictionary = collections.OrderedDict(sorted(tempDict.items())) orderedDictionary = {Term(k): v for k, v in orderedDictionary.items() } # convert each key from dictionary to Term examples.append([ (key, value) for key, value in orderedDictionary.items() ]) # append the instance of evidence in examples c = Counter(tuple(x) for x in iter(examples)) newString = "" newExamples = [] # here create the rules based on evidence set for key, value in c.most_common(): rules = "visitLocation:-" final = value / len(tweetsList) probability = "t(" + str(final) + ")::" rules = probability + rules for (i, j) in key: if j != None: if j == False: pass else: rules = rules + "" + str(i) + "," newString += rules[:-1] newString = newString + ".\n" finalModel = facts + newString # begin train of model based on evidences and the current model score, weights, atoms, iteration, lfi_problem = lfi.run_lfi( PrologString(finalModel), examples) trainedModel = lfi_problem.get_model() tempForSplit = trainedModel.split("\n") currentRandomVars = {} for i in range(0, len(tempForSplit)): if ":-" in tempForSplit[i]: break pos = tempForSplit[i].split('::') key = pos[1].split(".") currentRandomVars[key[0]] = pos[0] # begin incremental learning newExamples, facts, lenOfSet = incrementalLearning(currentRandomVars, len(tweetsList), examples) rulesAndNum = {} convertArr = [] for i in range(0, len(newExamples)): convertArr.append([(str(k), v) for (k, v) in newExamples[i]]) arrayForNewEvidence = [] for i in range(0, len(newExamples)): arrayForNewEvidence.append([(str(k), v) for k, v in newExamples[i] if (v == True)]) rulesAndNum = Counter(tuple(y) for (y) in iter(arrayForNewEvidence)) #create the rules based on evidence set finalRules = "" for key, value in rulesAndNum.most_common(): rules = "visitLocation:-" final = value / (lenOfSet - 1) probability = "" + str(final) + "::" rules = probability + rules for (i, j) in key: if j != None: if j == False: pass else: rules = rules + "" + str(i) + "," finalRules += rules[:-1] finalRules = finalRules + ".\n" finalModelIncremental = facts + finalRules # store the trained model in txt file if os.path.exists('models/model.txt'): os.remove('models/model.txt') text_file = open("models/model.txt", "w") text_file.write(str(lenOfSet) + "\n") text_file.write(finalModelIncremental) text_file.close() print('the model trained successfully')
sample_curent.append((Term('height', Term('low')), False)) elif row["Height(cm)"] > 37: height_curr = 'medium' sample_curent.append((Term('height', Term('high')), False)) sample_curent.append((Term('height', Term('medium')), True)) sample_curent.append((Term('height', Term('low')), False)) else: height_curr = 'low' sample_curent.append((Term('height', Term('high')), False)) sample_curent.append((Term('height', Term('medium')), False)) sample_curent.append((Term('height', Term('low')), True)) if y_train["Breed Name"][index].lower().replace(" ", "") == 'amstaff': interpretari_amstaff.append(sample_curent) score, weights, atoms, iteration, lfi_problem = lfi.run_lfi(p_amstaff, interpretari_amstaff) print (lfi_problem.get_model()) p_amstaff = PrologString(''' 0.059459459459459::weight(low). 0.783783783783784::weight(medium). 0.156756756756757::weight(high). 0.0::height(low). 1.0::height(medium). 0.0::height(high). amstaff(C,D) :- weight(C), height(D). query(amstaff(A,B)). ''')
def run(): logging.info("STARTING AUTOMATED LEARNING FOR PROBLOG") structure_filepath, dataset_filepaths, relational_data = find_dataset_and_structure_files( ) results = {'filename': [], 'time': [], 'log-likelihood': []} # results_filename = __file__ +'.results.' # if relational_data: # results_filename += 'relational.' # else: # results_filename += 'propositional.' # results_filename += str(int(time.time())) + '.csv' # experiment_dir_name = experiment_dir.split('/')[-2] experiment_dir_name = os.path.basename(experiment_dir) time_ = int(time.time()) results_filepath = './results/{}/problog/{}/problog___{}___{}.csv'.format( experiment_dir_name, time_, experiment_dir_name, time_) problog_structure = read_structure_from_file(structure_filepath) min_improv = 0.001 for problog_dataset_filepath in dataset_filepaths: dataset_filename = problog_dataset_filepath.split('/')[-1] logging.info("Learning for dataset '{}'...".format(dataset_filename)) # begin countin time start_time = time.time() if relational_data: # RELATIONAL CASE ll, weights, atoms, iteration, lfi_problem = lfi.run_lfi( PrologString(problog_structure), lfi.read_examples(problog_dataset_filepath), min_improv=min_improv) else: # PROPOSITIONAL CASE problog_structure = parse_structure_to_problog(problog_structure) dataset = pd.read_csv(problog_dataset_filepath) dataset = parse_dataset_to_problog(dataset) ll, weights, atoms, iteration, lfi_problem = lfi.run_lfi( PrologString(problog_structure), dataset, min_improv=min_improv) string_model = lfi_problem.get_model() model = read_structure_not_file(string_model) # learner = Learner(structure_filepath, relational_data=relational_data) # ll, model = learner.learn_parameters(dataset) # end counting time end_time = time.time() learning_time = end_time - start_time logging.info( "Learned:\nDATASET '{}'\nTime: {}\nLog-Likelihood: {}\nModel: {}". format(dataset_filename, learning_time, ll, string_model)) results = store_results(results, dataset_filename, learning_time, ll, model) # logging.debug("Result added: {}".format(results)) save_results_to_file(results, results_filepath) return results