def move_and_parse(indir='xml-form'): """ Use corpkit/CoreNLP to parse the corpus """ import shutil import os from corpkit import Corpus, new_project # make a new project and move into it new_project('rsc-proj') shutil.copytree('xml-form', 'rsc-proj/data') os.chdir('rsc-proj') corpus = Corpus('rsc-form') parsed = corpus.parse(metadata=True, speaker_segmentation=False, multiprocess=15)
import corpkit from corpkit import Corpus unparsed = Corpus( 'C:\\Users\\jbjb\\Documents\\DATA\\weird corpus\\corpkit\\explit\\data') unparsed.parse()