Exemplo n.º 1
0
 def handle_noargs(self, **options):
     t = google_translator()
     reqs = TranslationRequest.objects.filter(translator=GOOGLE)
     completed_reqs = list()
     ta_sentences = list()
     for req in reqs:
         req_sentences = req.article.sourcesentence_set.all()
         translated_title = t.translate(req.article.title, source=req.article.language, target=req.target_language)
         for s in req_sentences:
             if not s.text.strip():
                 translated = s.text
             else:
                 translated = t.translate(s.text, source=s.article.language, target=req.target_language)
             ts = TranslatedSentence(
                 segment_id=s.segment_id,
                 source_sentence=s,
                 text=translated,
                 translated_by=t.name,
                 translation_date=datetime.now(),
                 language=req.target_language,
                 end_of_paragraph=s.end_of_paragraph,
             )
             ta_sentences.append(ts)
         ta = TranslatedArticle()
         ta.article = req.article
         ta.title = translated_title
         ta.timestamp = datetime.now()
         ta.language = req.target_language
         if self._save_article_info(ta, ta_sentences):
             completed_reqs.append(req)
     for cr in completed_reqs:
         cr.delete()
Exemplo n.º 2
0
 def handle_noargs(self, **options):
     t = google_translator()
     reqs = TranslationRequest.objects.filter(translator=GOOGLE)
     completed_reqs = list()
     ta_sentences = list()
     for req in reqs:
         req_sentences = req.article.sourcesentence_set.all()
         translated_title = t.translate(req.article.title,
                                        source=req.article.language,
                                        target=req.target_language)
         for s in req_sentences:
             translated = t.translate(s.text,
                                      source=s.article.language,
                                      target=req.target_language)
             ts = TranslatedSentence(segment_id=s.segment_id,
                                     source_sentence=s,
                                     text=translated,
                                     translated_by=t.name,
                                     translation_date=datetime.now(),
                                     language=req.target_language,
                                     best=True,
                                     end_of_paragraph=s.end_of_paragraph)
             ta_sentences.append(ts)
         ta = TranslatedArticle()
         ta.article = req.article
         ta.title = translated_title
         ta.timestamp = datetime.now()
         ta.language = req.target_language
         if self._save_article_info(ta, ta_sentences):
             completed_reqs.append(req)
     for cr in completed_reqs:
         cr.delete()
Exemplo n.º 3
0
 def handle_noargs(self, **options):
     t = apertium_translator()
     reqs = TranslationRequest.objects.filter(translator=APERTIUM)
     completed_reqs = list()
     ta_sentences = list()
     for req in reqs:
         req_sentences = req.article.sourcesentence_set.all()
         translated_title = t.translate(req.article.title,
                                        source=req.article.language,
                                        target=req.target_language)
         for s in req_sentences:
             translated = t.translate(s.text,
                                      source=s.article.language,
                                      target=req.target_language)
             ts = TranslatedSentence(segment_id=s.segment_id,
                                     source_sentence=s,
                                     text=translated,
                                     translated_by=t.name,
                                     translation_date=datetime.now(),
                                     language=req.target_language,
                                     best=True,
                                     end_of_paragraph=s.end_of_paragraph)
             ta_sentences.append(ts)
         ta = TranslatedArticle()
         ta.article = req.article
         ta.title = translated_title
         ta.timestamp = datetime.now()
         ta.language = req.target_language
         try:
             ta.save()
             for ts in ta_sentences:
                 ts.save()
             ta.sentences = ta_sentences
             ta.save()
             completed_reqs.append(req)
             for cr in completed_reqs:
                 cr.delete()
         except Exception as e:
             print type(e)
             print e.args
             ta.delete()
             for ts in ta_sentences:
                 ts.delete()
             raise
Exemplo n.º 4
0
def translate_from_scratch(request, source, target, title, aid, template_name="wt_articles/translate_form.html"):
    """
    Loads a source article by provided article id (aid) and generates formsets
    to contain each sentence in the requested translation.
    """
    sa_set = SourceArticle.objects.filter(id=aid)
    if len(sa_set) < 1:
        no_match = True
        return render_to_response(template_name, {"no_match": True}, context_instance=RequestContext(request))
    article = sa_set[0]
    ss_list = article.sourcesentence_set.all()
    TranslatedSentenceSet = formset_factory(TranslatedSentenceMappingForm, extra=0)

    if request.method == "POST":
        formset = TranslatedSentenceSet(request.POST, request.FILES)
        if formset.is_valid():
            ts_list = []
            ta = TranslatedArticle()
            for form in formset.forms:
                ss = form.cleaned_data["source_sentence"]
                text = form.cleaned_data["text"]
                ts = TranslatedSentence(
                    segment_id=ss.segment_id,
                    source_sentence=ss,
                    text=text,
                    translated_by=request.user.username,
                    translation_date=datetime.now(),
                    language=target,
                    best=True,  ### TODO figure something better out
                    end_of_paragraph=ss.end_of_paragraph,
                )
                ts_list.append(ts)
            ta.article = ss.article
            ta.title = ss.article.title
            ta.timestamp = datetime.now()
            ta.language = target
            ta.save()
            for ts in ts_list:
                ts.save()
            ta.sentences = ts_list
            ta.save()
            return HttpResponseRedirect(ta.get_absolute_url())
    else:
        initial_ss_set = [{"source_sentence": s} for s in ss_list]
        formset = TranslatedSentenceSet(initial=initial_ss_set)
    for form, s in zip(formset.forms, ss_list):
        form.fields["text"].label = s.text

    return render_to_response(
        template_name, {"formset": formset, "title": article.title}, context_instance=RequestContext(request)
    )
Exemplo n.º 5
0
    def parse_result_file(self, result_file, source_lang, target_lang):
        f = open(result_file, 'r')
        csv_reader = unicode_csv_reader(f)
        headers = csv_reader.next()
        header_map = {}
        for i, h in enumerate(headers):
            header_map[h] = i

# not assuming a specific order for the fields
        sa = None
        cur_aid = -1
        segment_ids = [
            header_map[x]
            for x in ['Input.seg_id%d' % i for i in range(1, 11)]
        ]
        segments = [
            header_map[x] for x in ['Input.seg%d' % i for i in range(1, 11)]
        ]
        translations = [
            header_map[x]
            for x in ['Answer.translation%d' % i for i in range(1, 11)]
        ]
        ta = None
        has_title = 'Input.article' in header_map
        for line in csv_reader:
            if has_title:
                title = line[header_map['Input.article']] + ' (translated)'
            else:
                title = 'Noname (translated)'
            approved = (line[header_map['AssignmentStatus']] == 'Approved')
            for i in range(10):
                try:
                    (aid, seg_id) = line[segment_ids[i]].split('_')
                except ValueError:
                    # treating this basically like an eof
                    break

                if cur_aid != int(aid):
                    if sa:
                        # save the previous SourceArticle
                        sa.save(manually_splitting=True)
                    # check if the document is already imported
                    if not has_title:
                        title = aid + ' ' + title
                    try:
                        sa = SourceArticle.objects.filter(
                            language=source_lang).get(doc_id=aid)
                        sa.sentences_processed = True
                        cur_aid = int(aid)
                        sa.language = source_lang
                        sa.doc_id = aid
                        sa.timestamp = datetime.now()
                        sa.title = title
                        sa.save(manually_splitting=True)
                        # get an id for the SourceArticle instance
                    except SourceArticle.DoesNotExist:
                        # make a new sa object
                        sa = SourceArticle()
                        sa.sentences_processed = True
                        cur_aid = int(aid)
                        language = source_lang
                        sa.language = language
                        sa.doc_id = aid
                        sa.timestamp = datetime.now()
                        sa.title = title
                        sa.save(manually_splitting=True)
                        # get an id for the SourceArticle instance
                    if ta:
                        # save the previous target article
                        ta.save()
# check if the target article has been translated and imported
                    try:
                        ta = TranslatedArticle.objects.filter(article=sa).get(
                            language=target_lang)
                        # if there is one, do not touch unknown fields.
                        ta.title = title
                        ta.timestamp = datetime.now()
                        ta.language = target_lang
                        ta.approved = approved
                        ta.save()
                    except TranslatedArticle.DoesNotExist:
                        # make a new TranslatedSentence object
                        ta = TranslatedArticle()
                        ta.article = sa
                        ta.title = title
                        ta.timestamp = datetime.now()
                        ta.language = target_lang
                        ta.approved = approved
                        ta.save()

                end_of_paragraph = True
                tag_id = 'Input.tag%d' % i
                if tag_id in header_map:
                    tag = line[header_map[tag_id]]
                    end_of_paragraph = re.search("LastSentence", tag) or False

                seg = line[segments[i]]
                try:
                    # do not touch end_of_paragraph because we do not know
                    ss = sa.sourcesentence_set.get(segment_id=seg_id)
                    ss.text = seg
                    ss.segment_id = seg_id
                    ss.end_of_paragraph = end_of_paragraph
                    ss.save()
                except SourceSentence.DoesNotExist:
                    ss = SourceSentence()
                    ss.article = sa
                    ss.text = seg
                    ss.segment_id = seg_id
                    ss.end_of_paragraph = end_of_paragraph
                    ss.save()
                    sa.source_text += seg + u'\n'

                translation = line[translations[i]]
                try:
                    ts = ta.sentences.get(segment_id=seg_id)
                    ts.source_sentence = ss
                    ts.text = translation
                    ts.translated_by = line[header_map['WorkerId']]
                    ts.language = target_lang
                    date_string = line[header_map['SubmitTime']]
                    df = date_string.split(' ')
                    tf = df[3].split(':')
                    ts.translation_date = datetime(int(df[5]), [
                        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
                        'Sep', 'Oct', 'Nov', 'Dec'
                    ].index(df[1]) + 1, int(df[2]), int(tf[0]), int(tf[1]),
                                                   int(tf[2]))
                    ts.approved = approved
                    ts.end_of_paragraph = ss.end_of_paragraph
                    ts.save()
                except TranslatedSentence.DoesNotExist:
                    ts = TranslatedSentence()
                    ts.segment_id = seg_id
                    ts.source_sentence = ss
                    ts.text = translation
                    ts.translated_by = line[header_map['WorkerId']]
                    ts.language = target_lang
                    date_string = line[header_map['SubmitTime']]
                    df = date_string.split(' ')
                    tf = df[3].split(':')
                    ts.translation_date = datetime(int(df[5]), [
                        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
                        'Sep', 'Oct', 'Nov', 'Dec'
                    ].index(df[1]) + 1, int(df[2]), int(tf[0]), int(tf[1]),
                                                   int(tf[2]))
                    ts.approved = approved
                    ts.end_of_paragraph = ss.end_of_paragraph
                    ts.save()
                    ta.sentences.add(ts)
        if sa:
            sa.save(manually_splitting=True)
        if ta:
            ta.save()
Exemplo n.º 6
0
    def parse_result_file(self, result_file, source_lang, target_lang):
        f = open(result_file, "r")
        csv_reader = unicode_csv_reader(f)
        headers = csv_reader.next()
        header_map = {}
        for i, h in enumerate(headers):
            header_map[h] = i

        # not assuming a specific order for the fields
        sa = None
        cur_aid = -1
        segment_ids = [header_map[x] for x in ["Input.seg_id%d" % i for i in range(1, 11)]]
        segments = [header_map[x] for x in ["Input.seg%d" % i for i in range(1, 11)]]
        translations = [header_map[x] for x in ["Answer.translation%d" % i for i in range(1, 11)]]
        ta = None
        has_title = "Input.article" in header_map
        for line in csv_reader:
            if has_title:
                title = line[header_map["Input.article"]] + " (translated)"
            else:
                title = "Noname (translated)"
            approved = line[header_map["AssignmentStatus"]] == "Approved"
            for i in range(10):
                try:
                    (aid, seg_id) = line[segment_ids[i]].split("_")
                except ValueError:
                    # treating this basically like an eof
                    break

                if cur_aid != int(aid):
                    if sa:
                        # save the previous SourceArticle
                        sa.save(manually_splitting=True)
                    # check if the document is already imported
                    if not has_title:
                        title = aid + " " + title
                    try:
                        sa = SourceArticle.objects.filter(language=source_lang).get(doc_id=aid)
                        sa.sentences_processed = True
                        cur_aid = int(aid)
                        sa.language = source_lang
                        sa.doc_id = aid
                        sa.timestamp = datetime.now()
                        sa.title = title
                        sa.save(manually_splitting=True)
                        # get an id for the SourceArticle instance
                    except SourceArticle.DoesNotExist:
                        # make a new sa object
                        sa = SourceArticle()
                        sa.sentences_processed = True
                        cur_aid = int(aid)
                        language = source_lang
                        sa.language = language
                        sa.doc_id = aid
                        sa.timestamp = datetime.now()
                        sa.title = title
                        sa.save(manually_splitting=True)
                        # get an id for the SourceArticle instance
                    if ta:
                        # save the previous target article
                        ta.save()
                        # check if the target article has been translated and imported
                    try:
                        ta = TranslatedArticle.objects.filter(article=sa).get(language=target_lang)
                        # if there is one, do not touch unknown fields.
                        ta.title = title
                        ta.timestamp = datetime.now()
                        ta.language = target_lang
                        ta.approved = approved
                        ta.save()
                    except TranslatedArticle.DoesNotExist:
                        # make a new TranslatedSentence object
                        ta = TranslatedArticle()
                        ta.article = sa
                        ta.title = title
                        ta.timestamp = datetime.now()
                        ta.language = target_lang
                        ta.approved = approved
                        ta.save()

                end_of_paragraph = True
                tag_id = "Input.tag%d" % i
                if tag_id in header_map:
                    tag = line[header_map[tag_id]]
                    end_of_paragraph = re.search("LastSentence", tag) or False

                seg = line[segments[i]]
                try:
                    # do not touch end_of_paragraph because we do not know
                    ss = sa.sourcesentence_set.get(segment_id=seg_id)
                    ss.text = seg
                    ss.segment_id = seg_id
                    ss.end_of_paragraph = end_of_paragraph
                    ss.save()
                except SourceSentence.DoesNotExist:
                    ss = SourceSentence()
                    ss.article = sa
                    ss.text = seg
                    ss.segment_id = seg_id
                    ss.end_of_paragraph = end_of_paragraph
                    ss.save()
                    sa.source_text += seg + u"\n"

                translation = line[translations[i]]
                try:
                    ts = ta.sentences.get(segment_id=seg_id)
                    ts.source_sentence = ss
                    ts.text = translation
                    ts.translated_by = line[header_map["WorkerId"]]
                    ts.language = target_lang
                    date_string = line[header_map["SubmitTime"]]
                    df = date_string.split(" ")
                    tf = df[3].split(":")
                    ts.translation_date = datetime(
                        int(df[5]),
                        ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"].index(
                            df[1]
                        )
                        + 1,
                        int(df[2]),
                        int(tf[0]),
                        int(tf[1]),
                        int(tf[2]),
                    )
                    ts.approved = approved
                    ts.end_of_paragraph = ss.end_of_paragraph
                    ts.save()
                except TranslatedSentence.DoesNotExist:
                    ts = TranslatedSentence()
                    ts.segment_id = seg_id
                    ts.source_sentence = ss
                    ts.text = translation
                    ts.translated_by = line[header_map["WorkerId"]]
                    ts.language = target_lang
                    date_string = line[header_map["SubmitTime"]]
                    df = date_string.split(" ")
                    tf = df[3].split(":")
                    ts.translation_date = datetime(
                        int(df[5]),
                        ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"].index(
                            df[1]
                        )
                        + 1,
                        int(df[2]),
                        int(tf[0]),
                        int(tf[1]),
                        int(tf[2]),
                    )
                    ts.approved = approved
                    ts.end_of_paragraph = ss.end_of_paragraph
                    ts.save()
                    ta.sentences.add(ts)
        if sa:
            sa.save(manually_splitting=True)
        if ta:
            ta.save()
Exemplo n.º 7
0
def translate_from_scratch(request,
                           source,
                           target,
                           title,
                           aid,
                           template_name="wt_articles/translate_form.html"):
    """
    Loads a source article by provided article id (aid) and generates formsets
    to contain each sentence in the requested translation.
    """
    sa_set = SourceArticle.objects.filter(id=aid)
    if len(sa_set) < 1:
        no_match = True
        return render_to_response(template_name, {"no_match": True},
                                  context_instance=RequestContext(request))
    article = sa_set[0]
    ss_list = article.sourcesentence_set.all()
    TranslatedSentenceSet = formset_factory(TranslatedSentenceMappingForm,
                                            extra=0)

    if request.method == "POST":
        formset = TranslatedSentenceSet(request.POST, request.FILES)
        if formset.is_valid():
            ts_list = []
            ta = TranslatedArticle()
            for form in formset.forms:
                ss = form.cleaned_data['source_sentence']
                text = form.cleaned_data['text']
                ts = TranslatedSentence(
                    segment_id=ss.segment_id,
                    source_sentence=ss,
                    text=text,
                    translated_by=request.user.username,
                    translation_date=datetime.now(),
                    language=target,
                    best=True,  ### TODO figure something better out
                    end_of_paragraph=ss.end_of_paragraph)
                ts_list.append(ts)
            ta.article = ss.article
            ta.title = ss.article.title
            ta.timestamp = datetime.now()
            ta.language = target
            ta.save()
            for ts in ts_list:
                ts.save()
            ta.sentences = ts_list
            ta.save()
            return HttpResponseRedirect(ta.get_absolute_url())
    else:
        initial_ss_set = [{'source_sentence': s} for s in ss_list]
        formset = TranslatedSentenceSet(initial=initial_ss_set)
    for form, s in zip(formset.forms, ss_list):
        form.fields['text'].label = s.text

    return render_to_response(template_name, {
        "formset": formset,
        "title": article.title,
    },
                              context_instance=RequestContext(request))