Exemplo n.º 1
0
    def builddata(self, path):
        """ Build a list of feature list from a given path

        :type path: string
        :param path: data path, where all data files are saved
        """
        files = [os.path.join(path, fname) for fname in os.listdir(path) if fname.endswith('.dis')]
        for fname in files:
            rst = RSTTree(fname=fname)
            rst.build()
            actionlist, samplelist = rst.generate_samples()
            self.actionlist += actionlist
            self.samplelist += samplelist
Exemplo n.º 2
0
def evalparser(path='./examples', report=False, 
               bcvocab=None, draw=True,
               withdp=False, fdpvocab=None, fprojmat=None):
    """ Test the parsing performance

    :type path: string
    :param path: path to the evaluation data

    :type report: boolean
    :param report: whether to report (calculate) the f1 score
    """
    # ----------------------------------------
    # Load the parsing model
    print 'Load parsing model ...'
    pm = ParsingModel(withdp=withdp,
        fdpvocab=fdpvocab, fprojmat=fprojmat)
    pm.loadmodel("model/parsing-model.pickle.gz")
    # ----------------------------------------
    # Evaluation
    met = Metrics(levels=['span','nuclearity','relation'])
    # ----------------------------------------
    # Read all files from the given path
    doclist = [joinpath(path, fname) for fname in listdir(path) if fname.endswith('.merge')]
    for fmerge in doclist:
        # ----------------------------------------
        # Read *.merge file
        dr = DocReader()
        doc = dr.read(fmerge)
        # ----------------------------------------
        # Parsing
        pred_rst = pm.sr_parse(doc, bcvocab)
        if draw:
            strtree = pred_rst.parse()
            drawrst(strtree, fmerge.replace(".merge",".ps"))
        # Get brackets from parsing results
        pred_brackets = pred_rst.bracketing()
        fbrackets = fmerge.replace('.merge', '.brackets')
        # Write brackets into file
        writebrackets(fbrackets, pred_brackets)
        # ----------------------------------------
        # Evaluate with gold RST tree
        if report:
            fdis = fmerge.replace('.merge', '.dis')
            gold_rst = RSTTree(fdis, fmerge)
            gold_rst.build()
            gold_brackets = gold_rst.bracketing()
            met.eval(gold_rst, pred_rst)
    if report:
        met.report()
Exemplo n.º 3
0
    def builddata(self, rpath):
        """ Build a list of feature list from a given path

        :type path: string
        :param path: data path, where all data files are saved
        """
        # Read RST tree file
        files = [os.path.join(rpath, fname) for fname in os.listdir(rpath) if fname.endswith('.dis')]
        for fdis in files:
            print 'Processing data from file: {}'.format(fdis)
            fmerge = fdis.replace('.dis', '.merge')
            rst = RSTTree(fdis, fmerge)
            rst.build()
            actionlist, samplelist = rst.generate_samples(self.bcvocab)
            self.actionlist += actionlist
            self.samplelist += samplelist
Exemplo n.º 4
0
def evalparser(path='./examples', report=False):
    """ Test the parsing performance

    :type path: string
    :param path: path to the evaluation data

    :type report: boolean
    :param report: whether to report (calculate) the f1 score
    """
    from os import listdir
    from os.path import join as joinpath
    # ----------------------------------------
    # Load the parsing model
    pm = ParsingModel()
    pm.loadmodel("parsing-model.pickle.gz")
    # ----------------------------------------
    # Evaluation
    met = Metrics(levels=['span', 'nuclearity', 'relation'])
    # ----------------------------------------
    # Read all files from the given path
    doclist = [
        joinpath(path, fname) for fname in listdir(path)
        if fname.endswith('.edus')
    ]
    for fedus in doclist:
        # ----------------------------------------
        # Parsing
        pred_rst = parse(pm, fedus=fedus)
        # Get brackets from parsing results
        #      print fedus
        fin = open("test.dis", "w")
        r = fin.write(str(pred_rst))
        #   pred_brackets = pred_rst.bracketing()
        # fbrackets = fedus.replace('edus', 'brackets')
        #  writebrackets(fbrackets, pred_brackets)
        # ----------------------------------------
        # Evaluate with gold RST tree
        if report:
            fdis = fedus.replace('edus', 'dis')
            gold_rst = RSTTree(fname=fdis)
            gold_rst.build()
            gold_brackets = gold_rst.bracketing()
            met.eval(gold_rst, pred_rst)
    if report:
        met.report()
Exemplo n.º 5
0
def evalparser(path='./examples', report=False):
    """ Test the parsing performance

    :type path: string
    :param path: path to the evaluation data

    :type report: boolean
    :param report: whether to report (calculate) the f1 score
    """
    from os import listdir
    from os.path import join as joinpath
    # ----------------------------------------
    # Load the parsing model
    pm = ParsingModel()
    pm.loadmodel("parsing-model.pickle.gz")
    # ----------------------------------------
    # Evaluation
    met = Metrics(levels=['span','nuclearity','relation'])
    # ----------------------------------------
    # Read all files from the given path
    doclist = [joinpath(path, fname) for fname in listdir(path) if fname.endswith('.edus')]
    for fedus in doclist:
        # ----------------------------------------
        # Parsing
        fpos = fedus + ".pos"
        d_pos = get_d_pos(fpos)
        fdep = fedus + ".dep"
        d_dep = get_d_dep(fdep)
        pred_rst = parse(pm, fedus=fedus, d_pos=d_pos, d_dep=d_dep)
        # Get brackets from parsing results
        pred_brackets = pred_rst.bracketing()
        fbrackets = fedus.replace('edus', 'brackets')
        writebrackets(fbrackets, pred_brackets)
        # ----------------------------------------
        # Evaluate with gold RST tree
        if report:
            fdis = fedus.replace('edus', 'dis')
            gold_rst = RSTTree(fname=fdis)
            gold_rst.build()
            gold_brackets = gold_rst.bracketing()
            met.eval(gold_rst, pred_rst)
    if report:
        met.report()