Пример #1
0
    def sr_parse(self, doc, bcvocab=None):
        """ Shift-reduce RST parsing based on model prediction

        :type texts: list of string
        :param texts: list of EDUs for parsing

        :type bcvocab: dict
        :param bcvocab: brown clusters
        """
        # raise NotImplementedError("Not finished yet")
        # Initialize parser
        srparser = SRParser([], [])
        srparser.init(doc)
        # Parsing
        while not srparser.endparsing():
            # Generate features
            stack, queue = srparser.getstatus()
            # Make sure call the generator with
            # same arguments as in data generation part
            fg = FeatureGenerator(stack, queue, doc, bcvocab)
            feat = fg.features()
            # label = self.predict(feat)
            labels = self.rank_labels(feat)
            for label in labels:
                action = label2action(label)
                try:
                    srparser.operate(action)
                    break
                except ActionError:
                    # print "Parsing action error with {}".format(action)
                    pass
        tree = srparser.getparsetree()
        rst = RSTTree()
        rst.asign_tree(tree)
        return rst
Пример #2
0
    def sr_parse(self, doc, bcvocab=None):
        """ Shift-reduce RST parsing based on model prediction

        :type texts: list of string
        :param texts: list of EDUs for parsing

        :type bcvocab: dict
        :param bcvocab: brown clusters
        """
        # raise NotImplementedError("Not finished yet")
        # Initialize parser
        srparser = SRParser([], [])
        srparser.init(doc)
        # Parsing
        while not srparser.endparsing():
            # Generate features
            stack, queue = srparser.getstatus()
            # Make sure call the generator with
            # same arguments as in data generation part
            fg = FeatureGenerator(stack, queue, doc, bcvocab)
            feat = fg.features()
            # label = self.predict(feat)
            labels = self.rank_labels(feat)
            for label in labels:
                action = label2action(label)
                try:
                    srparser.operate(action)
                    break
                except ActionError:
                    # print "Parsing action error with {}".format(action)
                    pass
        tree = srparser.getparsetree()
        rst = RSTTree()
        rst.asign_tree(tree)
        return rst
Пример #3
0
    def builddata(self, path):
        """ Build a list of feature list from a given path

        :type path: string
        :param path: data path, where all data files are saved
        """
        files = [os.path.join(path, fname) for fname in os.listdir(path) if fname.endswith('.dis')]
        for fname in files:
            rst = RSTTree(fname=fname)
            rst.build()
            actionlist, samplelist = rst.generate_samples()
            self.actionlist += actionlist
            self.samplelist += samplelist
Пример #4
0
def evalparser(path='./examples', report=False, 
               bcvocab=None, draw=True,
               withdp=False, fdpvocab=None, fprojmat=None):
    """ Test the parsing performance

    :type path: string
    :param path: path to the evaluation data

    :type report: boolean
    :param report: whether to report (calculate) the f1 score
    """
    # ----------------------------------------
    # Load the parsing model
    print 'Load parsing model ...'
    pm = ParsingModel(withdp=withdp,
        fdpvocab=fdpvocab, fprojmat=fprojmat)
    pm.loadmodel("model/parsing-model.pickle.gz")
    # ----------------------------------------
    # Evaluation
    met = Metrics(levels=['span','nuclearity','relation'])
    # ----------------------------------------
    # Read all files from the given path
    doclist = [joinpath(path, fname) for fname in listdir(path) if fname.endswith('.merge')]
    for fmerge in doclist:
        # ----------------------------------------
        # Read *.merge file
        dr = DocReader()
        doc = dr.read(fmerge)
        # ----------------------------------------
        # Parsing
        pred_rst = pm.sr_parse(doc, bcvocab)
        if draw:
            strtree = pred_rst.parse()
            drawrst(strtree, fmerge.replace(".merge",".ps"))
        # Get brackets from parsing results
        pred_brackets = pred_rst.bracketing()
        fbrackets = fmerge.replace('.merge', '.brackets')
        # Write brackets into file
        writebrackets(fbrackets, pred_brackets)
        # ----------------------------------------
        # Evaluate with gold RST tree
        if report:
            fdis = fmerge.replace('.merge', '.dis')
            gold_rst = RSTTree(fdis, fmerge)
            gold_rst.build()
            gold_brackets = gold_rst.bracketing()
            met.eval(gold_rst, pred_rst)
    if report:
        met.report()
Пример #5
0
    def sr_parse(self, texts):
        """ Shift-reduce RST parsing based on model prediction

        :type texts: list of string
        :param texts: list of EDUs for parsing
        """
        # Initialize parser
        srparser = SRParser([],[])
        srparser.init(texts)
        # Parsing
        while not srparser.endparsing():
            # Generate features
            stack, queue = srparser.getstatus()
            # Make sure call the generator with
            # same arguments as in data generation part
            fg = FeatureGenerator(stack, queue)
            features = fg.features()
            labels = self.predict(features)
            # Enumerate through all possible actions ranked based on predcition scores
            for i,label in enumerate(labels):
                action = label2action(label)                
                try:
                    srparser.operate(action)
                    break # if legal action, end the loop
                except ActionError:
                    if i < len(labels): # if not a legal action, try the next possible action
                        continue
                    else:               
                        print "Parsing action error with {}".format(action)
                        sys.exit()
  
        tree = srparser.getparsetree()
        rst = RSTTree(tree=tree)
        return rst
            
Пример #6
0
    def builddata(self, rpath):
        """ Build a list of feature list from a given path

        :type path: string
        :param path: data path, where all data files are saved
        """
        # Read RST tree file
        files = [os.path.join(rpath, fname) for fname in os.listdir(rpath) if fname.endswith('.dis')]
        for fdis in files:
            print 'Processing data from file: {}'.format(fdis)
            fmerge = fdis.replace('.dis', '.merge')
            rst = RSTTree(fdis, fmerge)
            rst.build()
            actionlist, samplelist = rst.generate_samples(self.bcvocab)
            self.actionlist += actionlist
            self.samplelist += samplelist
Пример #7
0
    def sr_parse(self, texts):
        """ Shift-reduce RST parsing based on model prediction

        :type texts: list of string
        :param texts: list of EDUs for parsing
        """
        # Initialize parser
        srparser = SRParser([], [])
        srparser.init(texts)
        # Parsing
        while not srparser.endparsing():
            # Generate features
            stack, queue = srparser.getstatus()
            # Make sure call the generator with
            # same arguments as in data generation part
            fg = FeatureGenerator(stack, queue)
            features = fg.features()
            label = self.predict(features)
            action = label2action(label)
            # The best choice here is to choose the first
            #   legal action
            try:
                srparser.operate(action)
            except ActionError:
                print "Parsing action error with {}".format(action)
                sys.exit()
        tree = srparser.getparsetree()
        rst = RSTTree(tree=tree)
        return rst
Пример #8
0
def evalparser(path='./examples', report=False):
    """ Test the parsing performance

    :type path: string
    :param path: path to the evaluation data

    :type report: boolean
    :param report: whether to report (calculate) the f1 score
    """
    from os import listdir
    from os.path import join as joinpath
    # ----------------------------------------
    # Load the parsing model
    pm = ParsingModel()
    pm.loadmodel("parsing-model.pickle.gz")
    # ----------------------------------------
    # Evaluation
    met = Metrics(levels=['span', 'nuclearity', 'relation'])
    # ----------------------------------------
    # Read all files from the given path
    doclist = [
        joinpath(path, fname) for fname in listdir(path)
        if fname.endswith('.edus')
    ]
    for fedus in doclist:
        # ----------------------------------------
        # Parsing
        pred_rst = parse(pm, fedus=fedus)
        # Get brackets from parsing results
        #      print fedus
        fin = open("test.dis", "w")
        r = fin.write(str(pred_rst))
        #   pred_brackets = pred_rst.bracketing()
        # fbrackets = fedus.replace('edus', 'brackets')
        #  writebrackets(fbrackets, pred_brackets)
        # ----------------------------------------
        # Evaluate with gold RST tree
        if report:
            fdis = fedus.replace('edus', 'dis')
            gold_rst = RSTTree(fname=fdis)
            gold_rst.build()
            gold_brackets = gold_rst.bracketing()
            met.eval(gold_rst, pred_rst)
    if report:
        met.report()
Пример #9
0
def evalparser(path='./examples', report=False):
    """ Test the parsing performance

    :type path: string
    :param path: path to the evaluation data

    :type report: boolean
    :param report: whether to report (calculate) the f1 score
    """
    from os import listdir
    from os.path import join as joinpath
    # ----------------------------------------
    # Load the parsing model
    pm = ParsingModel()
    pm.loadmodel("parsing-model.pickle.gz")
    # ----------------------------------------
    # Evaluation
    met = Metrics(levels=['span','nuclearity','relation'])
    # ----------------------------------------
    # Read all files from the given path
    doclist = [joinpath(path, fname) for fname in listdir(path) if fname.endswith('.edus')]
    for fedus in doclist:
        # ----------------------------------------
        # Parsing
        fpos = fedus + ".pos"
        d_pos = get_d_pos(fpos)
        fdep = fedus + ".dep"
        d_dep = get_d_dep(fdep)
        pred_rst = parse(pm, fedus=fedus, d_pos=d_pos, d_dep=d_dep)
        # Get brackets from parsing results
        pred_brackets = pred_rst.bracketing()
        fbrackets = fedus.replace('edus', 'brackets')
        writebrackets(fbrackets, pred_brackets)
        # ----------------------------------------
        # Evaluate with gold RST tree
        if report:
            fdis = fedus.replace('edus', 'dis')
            gold_rst = RSTTree(fname=fdis)
            gold_rst.build()
            gold_brackets = gold_rst.bracketing()
            met.eval(gold_rst, pred_rst)
    if report:
        met.report()
Пример #10
0
    def sr_parse(self, texts,fname):
        """ Shift-reduce RST parsing based on model prediction

        :type texts: list of string
        :param texts: list of EDUs for parsing
        """
        # Initialize parser
        srparser = SRParser([],[])

        dep = defaultdict()
        pos = defaultdict()
        lines =defaultdict()
   # print fname.split(".dis")[0]+'.dep'
        dir = fname.split

        s =fname.split(".edus")
 #   print fname
  #  st= fname
        if fname.endswith(".out.edus"):
         #   print "yes"
            s= fname.split(".out.edus")


        f= open(s[0]+'.dep',"r")
        data = f.read().splitlines()
        for line in data:
        #   print line
           l = line.split('@#%^&*')
           dep[l[0]] = l[1]

        f= open(s[0]+'.pos',"r")
        data = f.read().splitlines()
        for line in data:
        #   print line
               l = line.split('@#%^&*')
               pos[l[0]] = l[1].strip()
        f= open(s[0]+'.line',"r")
        data = f.read().splitlines()
        for line in data:
        #   print line
               l = line.split('@#%^&*')
               lines[l[0]] = l[1]





        srparser.init(texts,pos,dep,lines)
        # Parsing
        while not srparser.endparsing():
            # Generate features
            stack, queue = srparser.getstatus()
            # Make sure call the generator with
            # same arguments as in data generation part
            fg = FeatureGenerator(stack, queue)
            features = fg.features()
            labels = self.predict(features)
            # Enumerate through all possible actions ranked based on predcition scores
            for i,label in enumerate(labels):
                action = label2action(label)                
                try:
                    srparser.operate(action)
                    break # if legal action, end the loop
                except ActionError:
                    if i < len(labels): # if not a legal action, try the next possible action
                        continue
                    else:               
                        print "Parsing action error with {}".format(action)
                        sys.exit()
  
        tree = srparser.getparsetree()
        rst = RSTTree(tree=tree)
        return rst