Exemplo n.º 1
0
 def test_add_credit_no_reference(self):
     wikipage = MagicMock()
     wikipage.title.return_value = 'test'
     wikipage.site.language = 'en'
     entries = Translation.add_wiktionary_credit([self.entry1, self.entry2],
                                                 wikipage)
     print(entries)
     assert 'reference' in entries[0].additional_data
     assert 'reference' in entries[1].additional_data
    def import_translation_batch(self, batch_number=1):
        translation = Translation()
        batch_folder = 'user_data/translation_batch'
        translation.output.wikipage_renderer.pages_to_link = self.malagasy_words_to_link
        if not os.path.exists(batch_folder):
            os.mkdir(batch_folder)

        if os.path.exists(f'{batch_folder}/batch-{batch_number}.csv'):
            self._current_file = open(
                f'{batch_folder}/batch-{batch_number}.csv', 'r')
    def run(self, word_additional_data_info, counter=0):
        translation = Translation()
        translation.output.wikipage_renderer.pages_to_link = self.malagasy_words_to_link
        word = word_additional_data_info['word']
        word_id = word_additional_data_info['word_id']
        part_of_speech = word_additional_data_info['part_of_speech']
        definitions = self.get_definitions(word_id)
        translated_definitions = self.get_translated_definitions(word_id)
        print(f'{word} ({word_id}) ->', definitions, translated_definitions)
        entry = Entry(entry=word,
                      part_of_speech=part_of_speech,
                      language=self.language,
                      definitions=translated_definitions)
        if not translated_definitions:
            return

        if translated_definitions and translated_definitions[0] == '.':
            return

        response = pywikibot.input(
            f'Entry # {counter + 1}: Accept and upload? (y/n)')
        if response.strip() == 'y':
            translation.publish_to_wiktionary(entry.entry, [entry])
            translation._save_translation_from_page([entry])
            self.mark_definition(word_id, 'done')
        elif response.strip() == 'n':
            self.mark_definition(word_id, 'rejected')
        else:
            return
    def build_translation_batches(self, word_additional_data_info, counter=0):
        translation = Translation()
        batch_folder = 'user_data/translation_batch'
        translation.output.wikipage_renderer.pages_to_link = self.malagasy_words_to_link
        word = word_additional_data_info['word']
        word_id = word_additional_data_info['word_id']
        definitions = self.get_definitions(word_id)
        translated_definitions = self.get_translated_definitions(word_id)
        if not os.path.exists(batch_folder):
            os.mkdir(batch_folder)

        if os.path.exists(
                f'{batch_folder}/batch-{self._batch_file_counter}.csv'):
            self._current_file = open(
                f'{batch_folder}/batch-{self._batch_file_counter}.csv', 'a')

        if self._current_file is None:
            if self._current_file_size >= 5000:
                self._batch_file_counter += 1

            filename = f'{batch_folder}/batch-{self._batch_file_counter}.csv'
            self._current_file = open(filename, 'a')
        else:
            if self._current_file_size >= 5000:
                self._current_file_size = 0
                self._current_file.close()
                self._batch_file_counter += 1
                filename = f'{batch_folder}/batch-{self._batch_file_counter}.csv'
                self._current_file = open(filename, 'a')

        if len(definitions) > 0 and len(translated_definitions) > 0:
            if translated_definitions and translated_definitions[0] == '.':
                return

            line = f'"{word}"/"{word_id}"/"{definitions[0]}"/"{translated_definitions[0]}"'
            self._current_file.write(f'{line}\n')
            self._current_file_size += len(definitions[0])
Exemplo n.º 5
0
import sys
from csv import writer

import pywikibot
import redis

from api.entryprocessor import WiktionaryProcessorFactory
from api.page_lister import redis_get_pages_from_category as get_pages_from_category
from api.translation_v2.core import Translation
from redis_wikicache import RedisSite

if __name__ == '__main__':
    t = Translation(use_configured_postprocessors=True)
    # t.post_processors = postprocessors
    site = RedisSite('en', 'wiktionary', offline=False)
    errored = []
    errors = 0
    k = 100
    entries = 0
    wiktionary_processor_class = WiktionaryProcessorFactory.create('en')
    category = sys.argv[1]
    with open(f'user_data/translations/{category}.csv', 'w') as output_file:
        csv_writer = writer(output_file)
        for wiki_page in get_pages_from_category('en', category):
            print(wiki_page)
            try:
                entries = t.process_wiktionary_wiki_page(wiki_page)
            except (pywikibot.Error, redis.exceptions.TimeoutError):
                continue

    print('process error rate:', errors * 100. / (k))
Exemplo n.º 6
0
 def test_generate_summary(self):
     summary = Translation().generate_summary(
         [self.entry1, self.entry2, self.entry3])
     self.assertIn(self.entry1.language, summary)
     self.assertIn(self.entry2.language, summary)
     self.assertIn(self.entry3.language, summary)
Exemplo n.º 7
0
                    default='/opt/botjagwar/user_data/entry_translator.log')
parser.add_argument('--host', dest='HOST', type=str, default='0.0.0.0')
parser.add_argument('--log-level',
                    dest='LOG_LEVEL',
                    type=str,
                    default='/opt/botjagwar/user_data/entry_translator.log')

args = parser.parse_args()

try:
    LOG_LEVEL = log._nameToLevel[args.LOG_LEVEL.upper()]
except KeyError:
    LOG_LEVEL = 10

log.basicConfig(filename=args.LOG, level=LOG_LEVEL)
translations = Translation()
routes = web.RouteTableDef()


# Throttle Config
def set_throttle(i):
    from pywikibot import throttle
    t = throttle.Throttle(pwbot.Site('mg', 'wiktionary'),
                          mindelay=0,
                          maxdelay=1)
    pwbot.config.put_throttle = 1
    t.setDelays(i)


def _get_page(name, lang):
    page = pwbot.Page(pwbot.Site(lang, 'wiktionary'), name)