def make_parse_dictionary(): ''' Creates a python dictionary structure from the corpus in the file annotated-dictionary.xls, omitting nonwords of various types. The keys are valid the stems and the values are the corresponding parsed (root, pattern) pairs in the Dictionary. ''' nonwordtypes = ['abbreviation', 'compound', 'dialect_word', 'foreign_word', \ 'function_word', 'interjection', 'letter_name', 'proper_name'] baam_parsings=baampath + 'annotated-dictionary.xls' parsings=form.loadexcel(baam_parsings) source_parsings=parsings.sheet_by_name('dictionary') entries=source_parsings.col_values(0) root=source_parsings.col_values(1) pattern=source_parsings.col_values(2) source_types=parsings.sheet_by_name('types') types = source_types.col_values(0) dictionarydic={} stemdic={} point=0 for (point, entry) in enumerate(entries): if pattern[point] not in nonwordtypes: dictionarydic[entry]=(root[point].encode(),pattern[point].encode()) return dictionarydic
def make_BANARM_dictionary(source): ''' Create a BANARM dictionary for a named tab ('source') in the spreadsheet 'root-pattern-frequencies.xls'. Requires module 'form' Parameter: --------------- source: string Get BAAM data from Excel spreadsheet using function from module 'form' ''' baam_databook=baampath+r'root-pattern-frequencies.xls' databook=form.loadexcel(baam_databook) source_sheet = databook.sheet_by_name(source) source_names = source_sheet.col_values(0) for (n, name) in enumerate(source_names): source_names[n] = name.encode() source_tokens = source_sheet.col_values(1) source_no = source_sheet.nrows return make_dictionary(source_names,source_tokens)