def test_answer_set_to_str_complex(): asp = 'a(a(2,3),(2,),b(c((d,),(e,f)))).' models = tuple(ASP(asp).parse_args) print('CAREFUL:', models) answerset = models[0] models = tuple(ASP(asp)) print('NORMAL :', models) assert ' '.join(utils.generate_answer_set_as_str(answerset, atom_end='.')) == asp
def test_file_saving_api_with_as_pyasp(asp_code): # create the read_answers from the dictionary repr of answer sets as_dict_answers = tuple(ASP(asp_code).as_pyasp) fname = utils.save_answers_in_file(as_dict_answers) read_answers = frozenset(utils.load_answers_from_file(fname)) # must be the same as regular repr of answer sets answers = frozenset(ASP(asp_code)) assert answers == read_answers
def test_solving(): """Prove that the program is valid""" answers = tuple(ASP(ASP_SOURCE).by_predicate) assert len(answers) == 1 for idx, answer in enumerate(answers): assert len(answer['p']) == 4 assert len(answer['rel']) == 4 assert answer['rel'] == {('a', 'b'), ('a', 'c'), ('a', 'd'), ('a', 'e')}
def test_api_asp(asp_code): answers = ASP(asp_code, use_clingo_module=False) # clingo module offers a *really* different statistics set found = set() for answer in answers.by_predicate.sorted.first_arg_only: found.add(''.join(answer['obj']) + '×' + ''.join(answer['att'])) assert found == {'a×cd', 'b×de', 'ab×d'} assert len(answers.statistics) == 4 assert answers.statistics['Calls'] == '1' assert answers.statistics['Models'] == '3' assert set(answers.statistics.keys()) == {'CPU Time', 'Calls', 'Models', 'Time'}
def test_save_in_file(asp_code): answers = frozenset(ASP(asp_code)) with tempfile.NamedTemporaryFile('w', delete=False) as ofd: ofd.write('\n'.join( utils.answer_set_to_str(answer) for answer in answers)) fname = ofd.name with open(fname) as ifd: read_answers = frozenset( frozenset(utils.answer_set_from_str(line)) for line in ifd) assert read_answers == answers
def test_file_saving_api(asp_code): answers = frozenset(ASP(asp_code)) fname = utils.save_answers_in_file(answers) read_answers = frozenset(utils.load_answers_from_file(fname)) assert answers == read_answers
def predict(experiment_type=None): ex_table = pd.read_csv(configuration_dict[experiment_type]) def extract_train_pid(): # check the duplicates between the train set and the test set duplicates_list = [] for pid in ex_table['train']: if pid in ex_table['test'].tolist(): duplicates_list.append(pid) if len(duplicates_list) == 0: print('No duplicates between the train set and the test set') else: print('Duplicates: ', str(duplicates_list)) print('The number of sentences in the train set: ', str(len(ex_table['train'].unique()))) print('The number of sentences in the test set: ', str(len(ex_table['test'].unique()))) # train_pid_list has indexes of the sentences in the train set train_pid_list = ex_table['train'].tolist() # add the general rules indexes to train_pid_list train_pid_list.append('general') train_pid_list.append('general_semantic') return train_pid_list train_pid_list = extract_train_pid() with open('facts_check_person.lp', 'rt') as file: facts_check_person = file.read() with open('rules_reasoning.lp', 'rt') as file: rules_reasoning = file.read() # add the semantic role rules which can be derived from the train set only df = pd.read_excel('rules_semantic_roles.xlsx') rules_semantic = '' for i in range(len(df)): if df['pID'][i] in train_pid_list: rules_semantic = rules_semantic + df['rules'][i] + '\n' # add the background knowledge principles which can be derived from the train set only bg_id_list = [] for i in range(len(df)): if df['pID'][i] in train_pid_list: if df['bg'][i] is not np.nan: bg_id_list.append(df['bg'][i]) bg_id_list = list(set(bg_id_list)) print('The number of the derived background knowledge principles: ', str(len(bg_id_list))) print('Waiting for the predictions...') # prediction results are saved in pred_dict pred_dict = {} for test_pid in ex_table['test']: print(test_pid) pred_dict[test_pid] = [] with open('./sentences/' + test_pid + '.lp', 'rt') as file: test_representation = file.read() # compare one Winograd schema sentence with each background knowledge principle for bg_id in bg_id_list: with open('./background_knowledge/high_level_' + bg_id + '.lp', 'rt') as file: bg_representation = file.read() whole_representation = ''.join([ test_representation, bg_representation, facts_check_person, rules_semantic, rules_reasoning ]) answers = ASP(whole_representation) for answer in answers.by_predicate.first_arg_only: try: pred_dict[test_pid].append(str(answer['ans'])) break except: pass print(pred_dict) return pred_dict
return 'Perform 1000 graphs in {} seconds.'.format( timeit(run, number=number)) def time_efficiency_alt(): program = (ASP_CODE) func = partial(asp_parsing.program_to_dependancy_graph, program, have_comments=False) return 'Perform 1000 graphs in ' + str(timeit(func, number=1000)) + ' seconds.' # last time: 0.01s if __name__ == '__main__': answers = ASP(ASP_CODE) for answer in answers.by_predicate.first_arg_only: print('{' + ','.join(answer['obj']) + '} × {' + ','.join(answer['att']) + '}') print() print('Dependancy graph:') pprint(asp_parsing.program_to_dependancy_graph(ASP_CODE)) print() print('Benchmark:') for parser in ( asp_parsing.precise_parser. parse_asp_program_by_arpeggio, # last time: 4.0s, 5.8s asp_parsing.precise_parser.parse_asp_program_by_pypeg ): # last time: 8.5s, 9.5s
def test_answer_set_to_str_with_tuple(): asp = 'a(b,(2,3,(a,b))).' model = next(ASP(asp).parse_args) assert ' '.join(utils.generate_answer_set_as_str(model, atom_end='.')) == asp
def run(ASP=ASP, ASP_CODE=ASP_CODE): return tuple(ASP(ASP_CODE))
import time if __name__ == "__main__": # ASP input from the C# settings if len(sys.argv) != 3: exit() answerSet = [ x.strip() for x in re.split( "(?<=[A-Za-z0-9\)\}])\.{1}(?=[A-Za-z0-9\(\{:%])", sys.argv[1]) ] # Solve using clyngor wrapper; Get the peg atoms from the answer count = 0 maxCount = 1000 answers = ASP(sys.argv[1], options="--rand-freq=1 --seed=1") shapeList = [] t_0 = time.time() for answer in answers: shapeList.append(answer) count += 1 if count >= maxCount: break t_1 = time.time() print("Created {0} Answer Sets...".format(len(shapeList))) print("It took {0} seconds.".format(t_1 - t_0)) if (sys.argv[2] == '0'):
def test_api_asp(asp_code): answers = ASP(asp_code) found = set() for answer in answers.by_predicate.sorted.first_arg_only: found.add(''.join(answer['obj']) + '×' + ''.join(answer['att'])) assert found == {'a×cd', 'b×de', 'ab×d'}
def pathmodel_pathway_picture(asp_code, picture_name, input_filename): """ Create the pathway picture using ASP results code from PathModel inference. Args: asp_code (str): string containing PathModel results picture_name (str): path to the output picture file input_filename (str): path to PathModel intermediary file """ DG = nx.DiGraph() known_compounds = [] inferred_compounds = [] known_reactions = [] inferred_reactions = [] absent_molecules = [] with open(input_filename, 'r') as intermediate_file: for answer in ASP(intermediate_file.read(), use_clingo_module=False ).parse_args.by_predicate.discard_quotes: for predicate in answer: if predicate == "absentmolecules": for atom in answer[predicate]: absent_molecules.append(atom[0]) for answer in ASP( asp_code, use_clingo_module=False).parse_args.by_predicate.discard_quotes: for predicate in answer: for atom in answer[predicate]: reaction = atom[0] reactant = atom[1] product = atom[2] if predicate == "reaction": if reactant not in absent_molecules: known_compounds.append(reactant) if product not in absent_molecules: known_compounds.append(product) if product not in absent_molecules and reactant not in absent_molecules: known_reactions.append((reactant, product)) DG.add_edge(reactant, product, label=reaction) elif predicate == "newreaction": if 'Prediction_' in reactant: inferred_compounds.append(reactant) if 'Prediction_' in product: inferred_compounds.append(product) inferred_reactions.append((reactant, product)) DG.add_edge(reactant, product, label=reaction) plt.figure(figsize=(25, 25)) nx.draw_networkx_nodes(DG, graphviz_layout(DG, prog='neato'), nodelist=known_compounds, node_color="green", node_size=3000, node_shape='s', alpha=0.5) nx.draw_networkx_nodes(DG, graphviz_layout(DG, prog='neato'), nodelist=inferred_compounds, node_color="blue", node_size=2000, node_shape='s', alpha=0.5) nx.draw_networkx_edges(DG, graphviz_layout(DG, prog='neato'), edgelist=known_reactions, edge_color="green", alpha=0.5, width=2.0, arrows=True, arrowstyle='->', arrowsize=14) nx.draw_networkx_edges(DG, graphviz_layout(DG, prog='neato'), edgelist=inferred_reactions, edge_color="blue", alpha=0.5, width=2.0, arrows=True, arrowstyle='->', arrowsize=14) nx.draw_networkx_labels(DG, graphviz_layout(DG, prog='neato'), font_size=15) ax = plt.gca() ax.set_axis_off() extension = os.path.splitext(picture_name)[1].strip('.') plt.savefig(picture_name, dpi=144, format=extension)
def create_2dmolecule(input_filename, output_directory, align_domain=None): ''' From an ASP input file create 2d representation of molecules. To use align_domain, you need the intermediate file creates by pathmodel_wrapper.py. With align_domain, rdkit will use domain to align molecules. Args: input_filename (str): path to PathMoldel output file output_directory (str): output folder containing pictures of the molecuels and of the infered pathway align_domain (bool): if True, rdkit will use domain to align molecules ''' with open(input_filename, 'r') as input_file: asp_code = input_file.read() # Set bond types transformation from ASP to rdkit. bondtypes = { 'single': Chem.BondType.SINGLE, 'singleS': Chem.BondType.SINGLE, 'singleR': Chem.BondType.SINGLE, 'double': Chem.BondType.DOUBLE, 'triple': Chem.BondType.TRIPLE, 'variable': Chem.BondType.UNSPECIFIED } # Set atomic number transformation from ASP to rdkit. atomicNumber = {'carb': 6, 'nitr': 7, 'oxyg': 8, 'phos': 15, 'variable': 0} if align_domain: domain_molecules = {} domain_molecule_numberings = {} domain_bonds = {} molecule_domains = {} molecules = {} molecule_numberings = {} bonds = {} # Parse ASP input file and extract molecules, atoms and bonds. for predicate in ASP(asp_code, use_clingo_module=False).parse_args.discard_quotes: for variable in predicate: if variable[0] == 'atom' or variable[0] == 'predictatom': atom_molecule = variable[1][0] atom_number = variable[1][1] atom_type = atomicNumber[variable[1][2]] if atom_molecule not in molecules: molecules[atom_molecule] = [(atom_number, atom_type)] molecule_numberings[atom_molecule] = [atom_number] else: molecules[atom_molecule].append((atom_number, atom_type)) molecule_numberings[atom_molecule].append(atom_number) elif variable[0] == 'bond' or variable[0] == 'predictbond': atom_molecule = variable[1][0] bond_number_1 = variable[1][2] bond_number_2 = variable[1][3] bond_type = bondtypes[variable[1][1]] if atom_molecule not in bonds: bonds[atom_molecule] = [(bond_number_1, bond_number_2, bond_type)] else: bonds[atom_molecule].append( (bond_number_1, bond_number_2, bond_type)) if align_domain: # Extract domain information. if variable[0] == 'atomDomain': atom_molecule = variable[1][0] atom_number = variable[1][1] atom_type = atomicNumber[variable[1][2]] if atom_molecule not in domain_molecules: domain_molecules[atom_molecule] = [(atom_number, atom_type)] domain_molecule_numberings[atom_molecule] = [ atom_number ] else: domain_molecules[atom_molecule].append( (atom_number, atom_type)) domain_molecule_numberings[atom_molecule].append( atom_number) elif variable[0] == 'bondDomain': atom_molecule = variable[1][0] bond_number_1 = variable[1][2] bond_number_2 = variable[1][3] bond_type = bondtypes[variable[1][1]] if atom_molecule not in domain_bonds: domain_bonds[atom_molecule] = [ (bond_number_1, bond_number_2, bond_type) ] else: domain_bonds[atom_molecule].append( (bond_number_1, bond_number_2, bond_type)) elif variable[0] == 'domain': molecule_name = variable[1][0] domain_name = variable[1][1] molecule_domains[molecule_name] = domain_name # For each domains, create the corresponding rdkit molecule. if align_domain: rddomains = {} for domain_name in domain_molecules: rddomain = create_rdkit_molecule(domain_name, domain_molecules, domain_molecule_numberings, domain_bonds) rddomains[domain_name] = rddomain # For each molecules, create a rdkit molecule. for molecule_name in molecules: rdmol = create_rdkit_molecule(molecule_name, molecules, molecule_numberings, bonds) if align_domain: # Use domain to align molecule. template = rddomains[molecule_domains[molecule_name]] AllChem.Compute2DCoords(rdmol) AllChem.Compute2DCoords(template) AllChem.GenerateDepictionMatching2DStructure(rdmol, template) # Add atom numbering to molecule. # Source: https://iwatobipen.wordpress.com/2017/02/25/draw-molecule-with-atom-index-in-rdkit/ def mol_with_atom_index(mol): atoms = mol.GetNumAtoms() for idx in range(atoms): mol.GetAtomWithIdx(idx).SetProp( 'molAtomMapNumber', str(sorted(molecule_numberings[molecule_name])[idx])) return mol # Remove Atom with atomic number == 0 # Source: https://sourceforge.net/p/rdkit/mailman/message/28157259/ rdmol = Chem.DeleteSubstructs(rdmol, Chem.MolFromSmarts('[#0]')) # Draw molecule. molecule_name = molecule_name print(molecule_name) output_molecule_path = os.path.join(output_directory, molecule_name + '.svg') Draw.MolToFile(mol_with_atom_index(rdmol), output_molecule_path, size=(800, 800), includeAtomNumbers=True) input_file.close()