def train_and_validate_viterbi2(_inputFile, _outputFile, _devFile, _devOutputFile, _validateFile): """ Create the Preprocessor object Train using the SG, EN, CN, FR datasets Generate the representer, vocabulary and states and feed it into an Emission object """ preprocessor = Preprocessor(_inputFile) representer = preprocessor.get_representer() vocabulary = preprocessor.get_vocabulary() states = preprocessor.get_states() listOfWords = getAllTokens(_devFile) """ Create the Emission and Transition objects Validate using the dev datasets Label the input sequence and output the file as dev.p3.out """ emission = Emission(representer, vocabulary, states, listOfWords) transition = Transition2() transition.compute_params(preprocessor) label_viterbi(_devFile, _devOutputFile, emission, transition) """ Calculate Validation Error """ evaluate(_validateFile, _devOutputFile)
def train_and_validate_emission(_inputFile, _outputFile, _devFile, _devOutputFile, _validateFile): """ Create the Preprocessor object Train using the SG, EN, CN, FR datasets Generate the representer, vocabulary and states and feed it into an Emission object """ preprocessor = Preprocessor(_inputFile) representer = preprocessor.get_representer() vocabulary = preprocessor.get_vocabulary() states = preprocessor.get_states() """ Create the Emission Object Validate using the dev datasets Label the input sequence and output the file as dev.p2.out """ emission = Emission(representer, vocabulary, states) emission.labelSequence(_devFile, _devOutputFile) """ Calculate Validation Error """ evaluate(_validateFile, _devOutputFile)