Exemplo n.º 1
0
def process_files_law(basename, name):
    filtertext(app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin.txt',
               app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin_filtered.txt')
    filtertext(app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng.txt',
               app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng_filtered.txt')
    processhindi(app.config['UPLOAD_FOLDER'] +
                 '/' + basename + '_hin_filtered.txt')
    processenglish(app.config['UPLOAD_FOLDER'] +
                   '/' + basename + '_eng_filtered.txt')
    translatewithgoogle(app.config['UPLOAD_FOLDER'] +
                        '/' + basename + '_hin_filtered.txt', app.config['UPLOAD_FOLDER'] +
                        '/' + basename + '_eng_tran.txt')
    os.system(
        './helpers/bleualign.py -s ' + os.getcwd() + '/upload/' + basename + '_hin_filtered' + '.txt' + ' -t ' + os.getcwd() + '/upload/' + basename +
        '_eng_filtered' + '.txt' + ' --srctotarget ' + os.getcwd() + '/upload/' + basename + '_eng_tran' + '.txt' + ' -o ' + os.getcwd() + '/upload/' + basename + '_output')
    english_res = []
    hindi_res = []
    english_points = []
    english_points_words = []
    hindi_points = []
    hindi_points_words = []
    f_eng = open(app.config['UPLOAD_FOLDER'] +
                 '/' + basename + '_output-t', 'r')
    for f in f_eng:
        english_res.append(f)
        point = fetchwordsfromsentence(f, basename)
        english_points.append(point['avg'])
        english_points_words.append(point['values'])
    f_eng.close()
    f_hin = open(app.config['UPLOAD_FOLDER'] +
                 '/' + basename + '_output-s', 'r')
    for f in f_hin:
        hindi_res.append(f)
        point = fetchwordsfromsentence(f, basename)
        hindi_points.append(point['avg'])
        hindi_points_words.append(point['values'])
    f_hin.close()
    data = {'hindi': hindi_res, 'english': english_res,
            'english_scores': english_points, 'hindi_scores': hindi_points}
    sentences = []
    for i in range(0, len(hindi_res)):
        sentence = Sentence(status=STATUS_PENDING, alignment_accuracy=english_res[i].split(':::::')[1], basename=name,
                            source=hindi_res[i], target=english_res[i].split(':::::')[0],
                            source_ocr_words=hindi_points_words[i], source_ocr=str(hindi_points[i]),
                            target_ocr_words=english_points_words[i], target_ocr=str(english_points[i]))
        sentences.append(sentence)
        # sentence.save()
    Sentence.objects.insert(sentences)
    for f in glob.glob(app.config['UPLOAD_FOLDER'] + '/' + basename + '*'):
        os.remove(f)
    res = CustomResponse(Status.SUCCESS.value, data)
    # corpus = Corpus.objects(basename=basename)
    # corpus.update(set__status=STATUS_PROCESSED,
    #               set__no_of_sentences=len(hindi_res))
    return res.getres()
Exemplo n.º 2
0
def getfiledata():
    pool = mp.Pool(mp.cpu_count())
    basename = str(int(time.time()))
    current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
    f = request.files['file']
    filepath = os.path.join(
        app.config['UPLOAD_FOLDER'], basename + '.pdf')
    # translationProcess = TranslationProcess(
    #     status=STATUS_PROCESSING, name=f.filename, created_on=current_time, basename=basename)
    # translationProcess.save()
    f.save(filepath)
    pool.apply_async(converttoimage, args=(
        filepath, app.config['UPLOAD_FOLDER'], basename, '_eng'), callback=capturetext)
    pool.close()
    pool.join()
    filtertext(app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng.txt',
               app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng_filtered.txt')
    processenglish(app.config['UPLOAD_FOLDER'] +
                   '/' + basename + '_eng_filtered.txt')
    # translatewithanuvadaeng(app.config['UPLOAD_FOLDER'] +
    #                      '/'+basename+'_hin_filtered.txt', app.config['UPLOAD_FOLDER'] +
    #                      '/'+basename+'_eng_tran.txt')
    # f_eng = open(app.config['UPLOAD_FOLDER']+'/' +
    #              basename + '_eng_filtered.txt', 'r')
    english_res = []
    # hindi_res = []
    # for f in f_eng:
    #     english_res.append(f)
    # f_eng.close()
    f_eng = open(app.config['UPLOAD_FOLDER'] + '/' +
                 basename + '_eng_filtered.txt', 'r')
    for f in f_eng:
        english_res.append(f)
    f_eng.close()
    data = {'english': english_res}
    # translations = []
    # for i in range(0, len(hindi_res)):
    #     translation = Translation(basename=str(
    #         basename), source=hindi_res[i], target=english_res[i])
    #     translations.append(translation)
    # Translation.objects.insert(translations)

    res = CustomResponse(Status.SUCCESS.value, data)
    result = flask.send_file(os.path.join('upload/', basename + '_eng_filtered.txt'), as_attachment=True)
    result.headers["x-suggested-filename"] = basename + '.txt'

    # translationProcess = TranslationProcess.objects(basename=basename)
    # translationProcess.update(set__status=STATUS_PROCESSED)
    return result
Exemplo n.º 3
0
def translate():
    pool = mp.Pool(mp.cpu_count())
    basename = str(int(time.time()))
    current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
    f = request.files['file']
    filepath = os.path.join(
        app.config['UPLOAD_FOLDER'], basename + '.pdf')
    translationProcess = TranslationProcess(
        status=STATUS_PROCESSING, name=f.filename, created_on=current_time, basename=basename)
    translationProcess.save()
    f.save(filepath)
    pool.apply_async(converttoimage, args=(
        filepath, app.config['UPLOAD_FOLDER'], basename, '_hin'), callback=capturetext)
    pool.close()
    pool.join()
    filtertext(app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin.txt',
               app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin_filtered.txt')
    processenglish(app.config['UPLOAD_FOLDER'] +
                   '/' + basename + '_hin_filtered.txt')
    translatewithanuvadaeng(app.config['UPLOAD_FOLDER'] +
                            '/' + basename + '_hin_filtered.txt', app.config['UPLOAD_FOLDER'] +
                            '/' + basename + '_eng_tran.txt')
    f_eng = open(app.config['UPLOAD_FOLDER'] + '/' +
                 basename + '_eng_tran.txt', 'r')
    english_res = []
    hindi_res = []
    for f in f_eng:
        english_res.append(f)
    f_eng.close()
    f_hin = open(app.config['UPLOAD_FOLDER'] + '/' +
                 basename + '_hin_filtered.txt', 'r')
    for f in f_hin:
        hindi_res.append(f)
    f_hin.close()
    data = {'hindi': hindi_res, 'english': english_res}
    translations = []
    for i in range(0, len(hindi_res)):
        translation = Translation(basename=str(
            basename), source=hindi_res[i], target=english_res[i])
        translations.append(translation)
    Translation.objects.insert(translations)
    for f in glob.glob(app.config['UPLOAD_FOLDER'] + '/' + basename + '*'):
        os.remove(f)
    res = CustomResponse(Status.SUCCESS.value, data)
    translationProcess = TranslationProcess.objects(basename=basename)
    translationProcess.update(set__status=STATUS_PROCESSED)
    return res.getres()
Exemplo n.º 4
0
def translateFile():
    pool = mp.Pool(mp.cpu_count())
    basename = str(int(time.time()))
    current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
    f = request.files['file']
    filepath = os.path.join(
        app.config['UPLOAD_FOLDER'], basename + '.pdf')
    translationProcess = TranslationProcess(
        status=STATUS_PROCESSING, name=f.filename, created_on=current_time, basename=basename)
    translationProcess.save()
    f.save(filepath)
    pool.apply_async(converttoimage, args=(
        filepath, app.config['UPLOAD_FOLDER'], basename, '_hin'), callback=capturetext)
    pool.close()
    pool.join()
    filtertext(app.config['UPLOAD_FOLDER'] + '/'+basename+'_hin.txt',
               app.config['UPLOAD_FOLDER'] + '/'+basename+'_hin_filtered.txt')
    processenglish(app.config['UPLOAD_FOLDER'] +
                 '/'+basename+'_hin_filtered.txt')
    translatewithanuvadaeng(app.config['UPLOAD_FOLDER'] +
                         '/'+basename+'_hin_filtered.txt', app.config['UPLOAD_FOLDER'] +
                         '/'+basename+'_eng_tran.txt')
    f_eng = open(app.config['UPLOAD_FOLDER']+'/' +
                 basename + '_eng_tran.txt', 'r')
    english_res = []
    hindi_res = []
    for f in f_eng:
        english_res.append(f)
    f_eng.close()
    f_hin = open(app.config['UPLOAD_FOLDER']+'/' +
                 basename + '_hin_filtered.txt', 'r')
    index = 0
    previousY = 0
    previousX = 0
    previousH = 0
    previousP = ''
    text_y = {}
    text_x = 0
    for f in f_hin:
        hindi_res.append(f)
        print(f)
        point = fetchwordhocrfromsentence(f, basename)
        english = english_res[index]
        words = english.split(' ')
        wordIndex = 0
        
        for word in words:
            try:
                if point['values'] is not None and point['values'][wordIndex] is not None and point['values'][wordIndex]['height'] is not None:
                    previousY = point['values'][wordIndex]['left']
                    previousX = point['values'][wordIndex]['top']
                    previousH = point['values'][wordIndex]['height']
                    try:
                        if text_y[point['values'][wordIndex]['imagepath']] is None:
                            text_y[point['values'][wordIndex]['imagepath']] = 200
                    except Exception as e:
                        text_y[point['values'][wordIndex]['imagepath']] = 200
                    (text_x, vertical) = puttext(point['values'][wordIndex]['height'],200,text_y[point['values'][wordIndex]['imagepath']],english,point['values'][wordIndex]['imagepath'])
                    text_y[point['values'][wordIndex]['imagepath']] = vertical
                    # else:
                    #     (text_x, text_y) = puttext(point['values'][wordIndex]['height'],point['values'][wordIndex]['left'],point['values'][wordIndex]['top'],english,point['values'][wordIndex]['imagepath'])
                    previousP = point['values'][wordIndex]['imagepath']
                    break
            except Exception as e:
                previousY = previousY + 200
                # puttext(previousH,previousY,previousX,word,previousP)
            wordIndex = wordIndex + 1
            # puttext(point['values'][wordIndex]['left'],point['values'][wordIndex]['top'],word,point['values'][wordIndex]['imagepath'])
        index = index + 1
    f_hin.close()
    data = {'hindi': hindi_res, 'english': english_res}
    translations = []
    for i in range(0, len(hindi_res)):
        translation = Translation(basename=str(
            basename), source=hindi_res[i], target=english_res[i])
        translations.append(translation)
    Translation.objects.insert(translations)
    # for f in glob.glob(app.config['UPLOAD_FOLDER']+'/'+basename+'*'):
    #     os.remove(f)
    res = CustomResponse(Status.SUCCESS.value, data)
    translationProcess = TranslationProcess.objects(basename=basename)
    translationProcess.update(set__status=STATUS_PROCESSED)
    return res.getres()