def main(): description = "Get the phonemes from a language" args = commandline.LanguageAndWordInput.parse_arguments(description) pronunciations = get_pronunciations(args.language, args.word) if not pronunciations: message = "No pronunciations found for word '{}' in language '{}'" raise RuntimeError(message.format(args.word, args.language)) commandline.output_list(pronunciations)
self.all_data.append(row) def _get_language_code(self): return language_codes.Phoibe.map(self.language) def get_all_phonemes(self): """ get a set of all phonemes for the language of the phonemeCollector """ return set(row['Phoneme'].decode('utf8') for row in self.all_data) def get_phonemes(language): """Main entry point for the module will return a set of phonemes for the given language Arguments: language=language for which to get the phonemes Returns: set of phonemes """ phoibe_data = resources.phoible_database phonemes_collector = PhonemesCollector(language) phonemes_collector.parse_source(phoibe_data) return phonemes_collector.get_all_phonemes() if __name__ == '__main__': description = 'Get the phonemes from a language' args = commandline.LanguageInput.parse_arguments(description) phonemes = get_phonemes(args.language) commandline.output_list(phonemes)
pronunciation_entries = (entry.get(key, []) for entry in wiktionary_entry) pronunciations = itertools.chain.from_iterable(pronunciation_entries) return pronunciations def list_pronunciations(pronunciation_entries): """ Parses all the pronunciations from the entries Arguments: pronunciation_entries = a iteratable of entries to parse Returns: set of pronunciations : set(pron1, pron2, ...) """ pronunciations = set() pattern = re.compile('IPA: */(.*?)/') for entry in pronunciation_entries: found_pronunciations = pattern.findall(entry) for pronunciation in found_pronunciations: pronunciations.add(pronunciation) return pronunciations if __name__ == '__main__': description = 'Get the phonemes from a language' args = commandline.LanguageAndWordInput.parse_arguments( description, extra_arguments=['local']) pronunciations = get_pronunciations(args.language, args.word, args.local) if not pronunciations: message = "No pronunciations found for word '{}' in language '{}'" raise RuntimeError(message.format(args.word, args.language)) commandline.output_list(pronunciations)
text = requests.get(page_path).text stream = StringIO.StringIO(text) return stream def _get_frequency_list_from_file(file_pointer): """Take a pointer to a file and get the frequency list from it """ with open(file_pointer) as instream: freq_list = _get_frequency_list_from_filestream(instream) return freq_list def _get_frequency_list_from_filestream(instream): """Take a file stream and get the frequency list from it """ freq_list = [] for line in instream: if not line: break word, freq = line.split() freq_list.append(word) return freq_list if __name__ == '__main__': description = 'Get the word frequencies for a language' args = commandline.LanguageInput.parse_arguments(description) frequency_list = get_frequency_list(args.language) commandline.output_list(frequency_list[:5])
[resources.hermit_dave_github, language_code, page_name]) text = requests.get(page_path).text stream = StringIO.StringIO(text) return stream class FrequencySources(object): language_code = staticmethod(language_codes.HermitDave.map) frequency_filestream = staticmethod(_get_hermitdave_page) def _frequency_list_from_filestream(filestream, extended_return_value=False): """Take a filestream and get the frequency list from it if extended_return -> list of (word, ranking, occurances) """ freq_list = (line.strip().split() for line in filestream if line.strip()) freq_list = [ word if not extended_return_value else (word, i + 1, int(freq)) for i, (word, freq) in enumerate(freq_list) ] if not freq_list: raise RuntimeError("No entries found for creating a frequency list") return freq_list if __name__ == '__main__': description = 'Get the word frequencies for a language' args = commandline.LanguageInput.parse_arguments(description) frequency_list = get_frequency_list(args.language) commandline.output_list(frequency_list[:5])