parser.add_argument('-origins', metavar='origins', type=str, default=None, help='Will use the indices of the unlabeled trees instead, and visualize the corresponding hypotheses. Provide the file path to the origins file.') parser.add_argument('-totree', metavar='-totree', type=int, default=0, help='If a projection was used, merge the individual branches to a new tree (for vizualization)') parser.add_argument('-s', metavar='-s', type=int, default=0, help='Also print the leaves of the tree. For unordered trees they are printed in alphabetical order (default=0|1)') parser.add_argument('-show', metavar='-show', type=int, default=1, help='Shows the trees of each set in an nltk display (default=1|0)') parser.add_argument('-p', metavar='-p', type=int, default=1, help='Prints the trees in each set in ptb format (default=1|0)') parser.add_argument('-latex', metavar='-latex', type=int, default=0, help='Prints the trees in latex (1) forest, or (2) qtree format (1|2|default=0). Forest package: https://www.ctan.org/pkg/forest?lang=en') args = parser.parse_args() sots = read_sot(args.sot) # read the selected indices if '-' in args.indices: n,m = args.indices.split('-') selection = range(max(int(n),0),min(int(m)+1, len(sots))) elif args.indices== 'all' : selection = [] else: n = args.indices selection = [int(n)] if args.origins: origins_from, has_hyps = read_origins(args.origins) selection = set([sot for i in has_hyps for sot in has_hyps[i] if i in selection or args.indices=='all'])
if __name__ == "__main__" : parser = argparse.ArgumentParser(description='Makes a binary classification (positive or negative) of sets of trees based on the lazy positive hypothesis classification with the syntactic tree pattern structure.') parser.add_argument('unlabeled_trees', metavar='unlabeled_trees', type=str, help='File path to the unlabeled sets of trees.') parser.add_argument('labeled_trees', metavar='labeled_trees', type=str, help='File path to the sets of labeled trees.') parser.add_argument('labels', metavar='labels', type=str, help='File path to the class labels.') parser.add_argument('positive', metavar='positive', type=str, help='Indicate the positive labels (string separated by ;).') parser.add_argument('negative', metavar='negative', type=str, help='Indicate the negative labels (string separated by ;).') parser.add_argument('-projection', metavar='projection', default=1, type=int, help='Use the maximal branch projection (default=1|0).') parser.add_argument('-output', metavar='output', default='./out.labs', type=str, help='File where the output labels should be written to (default=./out.labs).') parser.add_argument('-min_pos_hypotheses', metavar='min_pos_hypotheses', default=1, type=int, help='Number of positive hypotheses needed for a positive classification (default=1).') parser.add_argument('-min_counterexamples', metavar='min_counterexamples', default=1, type=int, help='Number of negative counter examples that are to be found before a hypothesis is rejected (default=1).') parser.add_argument('-output_hypotheses', metavar='output_hypotheses', default=None, type=str, help='Output the positive hypotheses that are found and used for positive classifications. Provide the file path (default=None).') parser.add_argument('-output_origins', metavar='output_origins', default=None, type=str, help='outputs pairs of unlabeled and labeled object indices for which a positive hypothesis is found, and maps them to their corresponding index of the output_hypotheses file (in the format (unlabled,labeled):output_hypothesis_index). Provide the output file path (default=None).') args = parser.parse_args() unlabeled_trees = read_sot(args.unlabeled_trees) labeled_trees = read_sot(args.labeled_trees) labels = read_labels(args.labels) positive_labels = set(args.positive.split(';')) negative_labels = set(args.negative.split(';')) constraints = lambda x: True # Object descriptions are sets of trees, or sets of maximal branches the projection is used. if args.projection: describe = lambda treeSet: set([branch for tree in treeSet for branch in to_tuples(tree.projection())]) op = 'branches' else: describe = lambda treeSet: treeSet op = 'trees' object_descriptions = {i:describe(treeSet) for i,treeSet in labeled_trees.items()}