def handle_noargs(self, **options): articles_of_interest = ArticleOfInterest.objects.all() for article in articles_of_interest: article_dict = query_text_rendered(article.title, language=article.title_language) # don't import articles we already have if SourceArticle.objects.filter(doc_id__exact='%s' % article_dict['revid'], language=article.title_language): continue try: source_article = SourceArticle(title=article.title, language=article.title_language, source_text=article_dict['html'], timestamp=datetime.now(), doc_id=article_dict['revid']) source_article.save() tr = TranslationRequest(article=source_article, target_language=article.target_language, date=datetime.now(), translator=DEFAULT_TRANNY) tr.save() except Exception as e: print type(e) print e.args try: source_article.delete() tr.delete() except: pass
def handle_noargs(self, **options): articles_of_interest = ArticleOfInterest.objects.all() for article in articles_of_interest: article_dict = query_text_rendered(article.title, language=article.title_language) # don't import articles we already have if SourceArticle.objects.filter(doc_id__exact='%s' % article_dict['revid'], language=article.title_language): continue try: source_article = SourceArticle( title=article.title, language=article.title_language, source_text=article_dict['html'], timestamp=datetime.now(), doc_id=article_dict['revid']) source_article.save() tr = TranslationRequest( article=source_article, target_language=article.target_language, date=datetime.now(), translator=DEFAULT_TRANNY) tr.save() except Exception as e: print type(e) print e.args try: source_article.delete() tr.delete() except: pass
def handle_noargs(self, **options): articles_of_interest = ArticleOfInterest.objects.all() for article in articles_of_interest: # don't import articles we already have if SourceArticle.objects.filter(title__exact='%s' % article.title, language=article.title_language): continue article_dict = query_text_rendered( article.title, language=article.title_language.code) try: source_article = SourceArticle( title=article.title, language=article.title_language, source_text=article_dict['html'], timestamp=datetime.now(), doc_id=article_dict['revid'] ) source_article.save() except Exception as e: print "Looks like we have an exception of type %s" % type(e) print "Exception args:", e.args try: source_article.delete() except: pass
def parse_source_file(self, source_file, article_id_map): f = open(source_file, 'r') csv_reader = csv.reader(f) headers = csv_reader.next() header_map = {} for i,h in enumerate(headers): header_map[h] = i # The headers are uniform in this file # lang,(seg_id1,tag1,seg1,img_url1,machine_translation1),...,(seg_idn,...) sa = SourceArticle() cur_aid = -1 language = None segments = ['seg_id%s' % i for i in xrange(1,11)] for line in csv_reader: segment_offsets = [(header_map[seg]) for seg in segments] for offs in segment_offsets: try: (aid, seg_id) = line[offs].split('_') except IndexError: # treating this basically like an eof try: sa.save(manually_splitting=True) except UnicodeDecodeError: print 'Argh! Unicode issues (1)...' sa.delete() break if int(seg_id) == 0: sa.sentences_processed = True language = line[0] try: self.save_sentence(sa, line[0], aid, article_id_map[aid]) except UnicodeDecodeError: print 'Argh! Unicode issues...(2)' sa.delete() # make a new sa object sa = SourceArticle() sa.save(manually_splitting=True) # get an id #tag = 'tag' tag = line[(offs + 1)] #seg = 'seg' seg = line[(offs + 2)] ss = SourceSentence() ss.article = sa ss.text = seg ss.segment_id = seg_id ss.end_of_paragraph = re.search("LastSentence", seg) or False ss.save() print '%s :: %s :: %s' % (aid, seg_id, tag)
def request_translation(request, form_class=TranslationRequestForm, template_name="wt_articles/request_form.html", deletedId= -1, deleteAll = False, update = False): """ deletedId in this context is the deleted article id """ #Update if(update): from wikipydia import query_text_rendered, query_text_raw from wt_articles import DEFAULT_TRANNY if request.POST: post = request.POST.copy() user_form = UserForm(post, instance=request.user) if user_form.is_valid(): user_form.save() response = redirect('/accounts/' + request.user.username) else: articles_of_interest = ArticleOfInterest.objects.all() for article in articles_of_interest: if SourceArticle.objects.filter(title=article.title, language=article.title_language): continue #article_dict = query_text_raw(article.title, # language=article.title_language) article_dict = query_text_rendered(article.title, language=article.title_language) print(article.title, article.title_language) try: source_article = SourceArticle(title=article.title, language=article.title_language, #source_text=article_dict['text'], source_text=article_dict['html'], timestamp=datetime.now(), doc_id=article_dict['revid']) source_article.save() tr = TranslationRequest(article=source_article, target_language=article.target_language, date=datetime.now(), translator=DEFAULT_TRANNY) tr.save() except Exception as e: print type(e) print e.args try: source_article.delete() tr.delete() except: pass ###Delete if(deletedId != -1): article = ArticleOfInterest.objects.filter(id=deletedId) article.delete() if(deleteAll): ArticleOfInterest.objects.all().delete() if request.method == "POST": request_form = form_class(request.POST) if request_form.is_valid(): title = request_form.cleaned_data['title'] title_language = request_form.cleaned_data['title_language'] target_language = request_form.cleaned_data['target_language'] exists = ArticleOfInterest.objects.filter(title__exact=title, title_language__exact=title_language, target_language__exact=target_language) if len(exists) < 1: translation_request = request_form.save(commit=False) translation_request.date = datetime.now() translation_request.save() #return render_to_response("wt_articles/requests_thankyou.html", {}, # context_instance=RequestContext(request)) else: request_form = form_class() articles = all_articles_of_interest() return render_to_response(template_name, { "request_form": request_form, "articles": articles, }, context_instance=RequestContext(request))