def __init__(self, corpus_folder, taxonomy: Taxonomy): Corpus.__init__(self, "AMI", corpus_folder, taxonomy) self.test_files = [ "ES2004", "ES2014", "IS1009", "TS3003", "TS3007", "EN2002" ] corpus = self.load_corpus(corpus_folder) self.utterances = self.parse_corpus(corpus)
def __init__(self, midas_folder, taxonomy: Taxonomy): Corpus.__init__(self, "MIDAS", midas_folder, taxonomy) self.files = { "train": "train.txt", "dev": "dev.txt", "test": "test.txt" } corpus = self.load_corpus(midas_folder) self.utterances = self.parse_corpus(corpus)
def __init__(self, corpus_folder, taxonomy: Taxonomy): Corpus.__init__(self, "AMI", corpus_folder, taxonomy) corpus = self.load_corpus(corpus_folder) self.utterances = self.parse_corpus(corpus)
def __init__(self, maptask_folder, taxonomy: Taxonomy): Corpus.__init__(self, "Maptask", maptask_folder, taxonomy) corpus = self.load_corpus(maptask_folder) self.utterances = self.parse_corpus(corpus)
def __init__(self, switchboard_folder, taxonomy: Taxonomy): Corpus.__init__(self, "Switchboard", switchboard_folder, taxonomy) corpus = self.load_corpus(switchboard_folder) self.utterances = self.parse_corpus(corpus)
def __init__(self, switchboard_folder, taxonomy: Taxonomy): Corpus.__init__(self, "Switchboard", switchboard_folder, taxonomy) self.test_files = ["sw11", "sw12", "sw13"] corpus = self.load_corpus(switchboard_folder) self.utterances = self.parse_corpus(corpus)
def __init__(self, daily_dialog_folder, taxonomy: Taxonomy): Corpus.__init__(self, "DailyDialog", daily_dialog_folder, taxonomy) corpus = self.load_corpus(daily_dialog_folder) self.utterances = self.parse_corpus(corpus)
# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse from corpora.corpus import Corpus if __name__ == '__main__': parser = argparse.ArgumentParser( description="generate dictionary for a corpus") parser.add_argument( 'parsed_document', help="python pickle file, containing tokens and metadata") parser.add_argument('dictionary', help="output dictionary") args = parser.parse_args() print("loading corpus") corpus = Corpus.load(args.parsed_document) print("generate dictionary") corpus.generate_dictionary() print("saving dictionary") corpus.save_dictionary(args.dictionary)