def get_childes_files( root_location, file_name ): # fetches the childes file in xml and parses it into utterances with speaker in [0] position global ordered_utterance_list corpus_root = nltk.data.find(root_location) file_setup = CHILDESCorpusReaderX(corpus_root, file_name) ordered_utterance_list = file_setup.sents() return (ordered_utterance_list)
def get_childes_stemmed(root_location, file_name): global ordered_utterance_list stemmer = SnowballStemmer("english") corpus_root = nltk.data.find(root_location, paths=['.']) file_setup = CHILDESCorpusReaderX(corpus_root, file_name) ordered_utterance_list = file_setup.sents() for utterance in ordered_utterance_list: for i in range(1, len(utterance) - 1): utterance[i] = stemmer.stem(utterance[i]) return (ordered_utterance_list)
def get_childes_stemmed(root_location, file_name): global ordered_utterance_list stemmer = SnowballStemmer("english") corpus_root = nltk.data.find(root_location) file_setup = CHILDESCorpusReaderX(corpus_root, file_name) ordered_utterance_list = file_setup.sents() for utterance in ordered_utterance_list: for i in range(1, len(utterance) - 1): utterance[i] = stemmer.stem(utterance[i]) return(ordered_utterance_list)
def get_childes_files( root_location, file_name ): # fetches the childes file in xml and parses it into utterances with speaker in [0] position global ordered_utterance_list corpus_root = nltk.data.find(root_location, paths=["."]) file_setup = CHILDESCorpusReaderX(corpus_root, file_name) if extractRoles: global writeRoleHeader corpus_participants = file_setup.participants() for this_corpus_participants in corpus_participants[:2]: for key in sorted(this_corpus_participants.keys()): dct = this_corpus_participants[key] if writeRoleHeader: wrf.write("docId,") wrf.write(",".join([k for k in sorted(dct.keys())]) + "\n") writeRoleHeader = False wrf.write(file_name + ",") wrf.write(",".join([dct[k] for k in sorted(dct.keys())]) + "\n") # return #comment this out eventually ordered_utterance_list = file_setup.sents() return ordered_utterance_list
def get_childes_files( root_location, file_name ): # fetches the childes file in xml and parses it into utterances with speaker in [0] position global ordered_utterance_list corpus_root = nltk.data.find(root_location, paths=['.']) file_setup = CHILDESCorpusReaderX(corpus_root, file_name) if extractRoles: global writeRoleHeader corpus_participants = file_setup.participants() for this_corpus_participants in corpus_participants[:2]: for key in sorted(this_corpus_participants.keys()): dct = this_corpus_participants[key] if writeRoleHeader: wrf.write('docId,') wrf.write(','.join([k for k in sorted(dct.keys())]) + '\n') writeRoleHeader = False wrf.write(file_name + ',') wrf.write(','.join([dct[k] for k in sorted(dct.keys())]) + '\n') #return #comment this out eventually ordered_utterance_list = file_setup.sents() return (ordered_utterance_list)
def get_childes_files(root_location, file_name): # fetches the childes file in xml and parses it into utterances with speaker in [0] position global ordered_utterance_list corpus_root = nltk.data.find(root_location) file_setup = CHILDESCorpusReaderX(corpus_root, file_name) ordered_utterance_list = file_setup.sents() return(ordered_utterance_list)
def get_childes_files(root_location, file_name): # fetches the childes file in xml and parses it into utterances with speaker in [0] position global info_list corpus_root = nltk.data.find(root_location) file_setup = CHILDESCorpusReaderX(corpus_root, file_name) get_child_info(file_setup) return(info_list)