Пример #1
0
def main():
    parser = OptionParser()
    parser.add_option("--result-fname", dest="result_fname")
    parser.add_option("--state-type", dest="state_type")
    (options, args) = parser.parse_args()
    results = ResultsFile(options.result_fname)
    entries = resultsModel.Entry.entries_from_results_file(results)

    annotation_id_to_entries = {}

    for entry in entries:
        annotation_id_to_entries.setdefault(entry.annotation.id, [])
        annotation_id_to_entries[entry.annotation.id].append(entry)

    new_corpus = []

    for i, (aid, entries) in enumerate(annotation_id_to_entries.iteritems()):
        annotation = entries[0].annotation

        for entry in entries:
            assert entry.annotation.id == annotation.id
            new_corpus.extend(
                make_annotation_for_result(annotation,
                                           entry.best_result_as_entry()))
        if i > 10:
            break
    annotationIo.save(new_corpus, "negative_from_inference.yaml")

    print "entries", len(entries)
Пример #2
0
def main():
    """
    Splits a corpus yaml file into multiple smaller files, for faster
    annotation saving and loading.
    """
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option("--corpus_fname", dest="corpus_fname")
    parser.add_option("--page_size", dest="page_size", type="int")
    parser.add_option("--result_dir", dest="result_dir", metavar="FILE")
    (options, args) = parser.parse_args()

    corpus = annotationIo.load(options.corpus_fname)

    pages = []
    current_page = []
    for annotation in corpus:
        current_page.append(annotation)
        if len(current_page) >= options.page_size:
            pages.append(current_page)
            current_page = []
    if len(current_page) != 0:
        pages.append(current_page)
    if not os.path.exists(options.result_dir):
        os.makedirs(options.result_dir)
    basename = os.path.basename(options.corpus_fname)
    name = basename[0:-5]
    extension = basename[-5:]
    assert extension == ".yaml"
    for page_i, page in enumerate(pages):
        fname = "%s/%s.page_%d.yaml" % (options.result_dir, name, page_i)
        annotationIo.save(page, fname)
Пример #3
0
def main():

    corpus = annotationIo.load(
        "%s/tools/forklift/dataAnnotation/data/negativeEsdcs.yaml" % SLU_HOME)

    assignment_id_to_count = collections.defaultdict(lambda: 0)

    new_corpus = []
    for a in corpus:
        assignment_id_to_count[a.id] += 1
        if assignment_id_to_count[a.id] <= 2:
            new_corpus.append(a)
    annotationIo.save(annotationIo.Corpus(new_corpus), "test.yaml")
Пример #4
0
    def testSource(self):
        corpus1 = annotationIo.load("data/corpusWithPathsSmall.v0.yaml")
        annotation1 = corpus1[0]
        esdc1 = annotation1.esdcs[0]
        annotation1.setSource(esdc1, "person 1")
        self.assertEqual(annotation1.getSource(esdc1), "person 1")

        annotationIo.save(corpus1, "data/corpusWithPathsSmall.v1.yaml")

        corpus2 = annotationIo.load("data/corpusWithPathsSmall.v1.yaml")
        annotation2 = corpus2[0]
        esdc2 = annotation2.esdcs[0]
        self.assertEqual(annotation1.getSource(esdc1),
                         annotation2.getSource(esdc2))
Пример #5
0
def main():

    extractor = Extractor()
    oldCorpus = annotationIo.load("%s/dataAnnotation/data/forkliftMturkEsdcs.stefie10.groundings.withPaths.yaml" % os.environ['FORKLIFT_HOME'])
    annotations = []
    for i, a in enumerate(oldCorpus):
#        if i != 140:
#            continue
        print "doing", i, a.entireText
        automatic_esdcs_groups = extractor.extractTopNEsdcs(a.entireText, n=10)

        
        for automatic_esdc_group in automatic_esdcs_groups:
            annotation = annotationIo.Annotation(a.assignmentId,
                                                 automatic_esdc_group)
            
            for automatic_esdc in automatic_esdc_group.flattenedEsdcs:
                if automatic_esdc in a.flattenedEsdcs:
                    annotation.setGroundingIsCorrect(automatic_esdc, True)
                else:
                    annotation.setGroundingIsCorrect(automatic_esdc, False)

            annotations.append(annotation)
    annotationIo.save(annotationIo.Corpus(annotations), "negativeEsdcs.yaml")
Пример #6
0
    def testLoadSave(self):
        corpus1 = annotationIo.load("data/corpusWithPathsSmall.v0.yaml")
        annotationIo.save(corpus1, "data/corpusWithPathsSmall.v1.yaml")
        corpus2 = annotationIo.load("data/corpusWithPathsSmall.v1.yaml")

        self.assertEqual(len(corpus1), len(corpus2))
Пример #7
0
 def save(self, fname=None):
     if fname != "" and fname != None:
         self.fnames = [fname]
         annotationIo.save(fname)
     annotationIo.save_separate_files(self.annotations)