def test_add_credit_no_reference(self): wikipage = MagicMock() wikipage.title.return_value = 'test' wikipage.site.language = 'en' entries = Translation.add_wiktionary_credit([self.entry1, self.entry2], wikipage) print(entries) assert 'reference' in entries[0].additional_data assert 'reference' in entries[1].additional_data
def import_translation_batch(self, batch_number=1): translation = Translation() batch_folder = 'user_data/translation_batch' translation.output.wikipage_renderer.pages_to_link = self.malagasy_words_to_link if not os.path.exists(batch_folder): os.mkdir(batch_folder) if os.path.exists(f'{batch_folder}/batch-{batch_number}.csv'): self._current_file = open( f'{batch_folder}/batch-{batch_number}.csv', 'r')
def run(self, word_additional_data_info, counter=0): translation = Translation() translation.output.wikipage_renderer.pages_to_link = self.malagasy_words_to_link word = word_additional_data_info['word'] word_id = word_additional_data_info['word_id'] part_of_speech = word_additional_data_info['part_of_speech'] definitions = self.get_definitions(word_id) translated_definitions = self.get_translated_definitions(word_id) print(f'{word} ({word_id}) ->', definitions, translated_definitions) entry = Entry(entry=word, part_of_speech=part_of_speech, language=self.language, definitions=translated_definitions) if not translated_definitions: return if translated_definitions and translated_definitions[0] == '.': return response = pywikibot.input( f'Entry # {counter + 1}: Accept and upload? (y/n)') if response.strip() == 'y': translation.publish_to_wiktionary(entry.entry, [entry]) translation._save_translation_from_page([entry]) self.mark_definition(word_id, 'done') elif response.strip() == 'n': self.mark_definition(word_id, 'rejected') else: return
def build_translation_batches(self, word_additional_data_info, counter=0): translation = Translation() batch_folder = 'user_data/translation_batch' translation.output.wikipage_renderer.pages_to_link = self.malagasy_words_to_link word = word_additional_data_info['word'] word_id = word_additional_data_info['word_id'] definitions = self.get_definitions(word_id) translated_definitions = self.get_translated_definitions(word_id) if not os.path.exists(batch_folder): os.mkdir(batch_folder) if os.path.exists( f'{batch_folder}/batch-{self._batch_file_counter}.csv'): self._current_file = open( f'{batch_folder}/batch-{self._batch_file_counter}.csv', 'a') if self._current_file is None: if self._current_file_size >= 5000: self._batch_file_counter += 1 filename = f'{batch_folder}/batch-{self._batch_file_counter}.csv' self._current_file = open(filename, 'a') else: if self._current_file_size >= 5000: self._current_file_size = 0 self._current_file.close() self._batch_file_counter += 1 filename = f'{batch_folder}/batch-{self._batch_file_counter}.csv' self._current_file = open(filename, 'a') if len(definitions) > 0 and len(translated_definitions) > 0: if translated_definitions and translated_definitions[0] == '.': return line = f'"{word}"/"{word_id}"/"{definitions[0]}"/"{translated_definitions[0]}"' self._current_file.write(f'{line}\n') self._current_file_size += len(definitions[0])
import sys from csv import writer import pywikibot import redis from api.entryprocessor import WiktionaryProcessorFactory from api.page_lister import redis_get_pages_from_category as get_pages_from_category from api.translation_v2.core import Translation from redis_wikicache import RedisSite if __name__ == '__main__': t = Translation(use_configured_postprocessors=True) # t.post_processors = postprocessors site = RedisSite('en', 'wiktionary', offline=False) errored = [] errors = 0 k = 100 entries = 0 wiktionary_processor_class = WiktionaryProcessorFactory.create('en') category = sys.argv[1] with open(f'user_data/translations/{category}.csv', 'w') as output_file: csv_writer = writer(output_file) for wiki_page in get_pages_from_category('en', category): print(wiki_page) try: entries = t.process_wiktionary_wiki_page(wiki_page) except (pywikibot.Error, redis.exceptions.TimeoutError): continue print('process error rate:', errors * 100. / (k))
def test_generate_summary(self): summary = Translation().generate_summary( [self.entry1, self.entry2, self.entry3]) self.assertIn(self.entry1.language, summary) self.assertIn(self.entry2.language, summary) self.assertIn(self.entry3.language, summary)
default='/opt/botjagwar/user_data/entry_translator.log') parser.add_argument('--host', dest='HOST', type=str, default='0.0.0.0') parser.add_argument('--log-level', dest='LOG_LEVEL', type=str, default='/opt/botjagwar/user_data/entry_translator.log') args = parser.parse_args() try: LOG_LEVEL = log._nameToLevel[args.LOG_LEVEL.upper()] except KeyError: LOG_LEVEL = 10 log.basicConfig(filename=args.LOG, level=LOG_LEVEL) translations = Translation() routes = web.RouteTableDef() # Throttle Config def set_throttle(i): from pywikibot import throttle t = throttle.Throttle(pwbot.Site('mg', 'wiktionary'), mindelay=0, maxdelay=1) pwbot.config.put_throttle = 1 t.setDelays(i) def _get_page(name, lang): page = pwbot.Page(pwbot.Site(lang, 'wiktionary'), name)