Exemplo n.º 1
0
def get_status(request):
    id = request.GET.get("id")
    file_request = ConvertRequest(id, settings.MEDIA_DIR[0])

    response = {"answer": 200, "status": file_request.loadStatus()}

    return JsonResponse(response)
Exemplo n.º 2
0
def pdf_txt_converter(file_request: ConvertRequest):
    print("pdf_txt_converter...")
    filename = file_request.getPDFLocation()  #'artigo.pdf'
    outputFile = file_request.getTextLocation()  #'article.txt'
    print(filename)
    print(outputFile)

    # output = "output-"+"".join([chr(int(65+random.random()*25)) for i in range(0,10)])
    output = "output-" + file_request.id
    outputPath = file_request.path + '/' + output

    os.popen("pdftoppm " + filename + " " + outputPath + " -png").read()
    pages = int(os.popen("ls " + outputPath + "* | wc -l").read())
    step = 30 / pages

    texto = ""

    for i in range(1, pages + 1):
        if pages > 9 and i < 10: p = "0" + str(i)
        else: p = str(i)
        os.popen("tesseract " + outputPath + "-" + p + ".png " +
                 file_request.path + "/texto-" + output + "-" + p +
                 " -l por").read()
        with open(file_request.path + "/texto-" + output + "-" + p + ".txt",
                  'r',
                  encoding='utf-8') as txt_file:
            texto += txt_file.read()
        file_request.incrementStatus(step)

    os.popen("rm -rf " + outputPath + "*").read()
    os.popen("rm -rf " + file_request.path + "/texto-" + output + "-*").read()
    open(outputFile, "w", encoding='utf-8').write(texto)
    return texto
Exemplo n.º 3
0
def result(request):
    id = request.GET.get("id")
    file_request = ConvertRequest(id, settings.MEDIA_DIR[0])
    context = {"abstract": None}

    with open(file_request.getAbsPath(), 'r', encoding='utf8') as abs_file:
        context["abstract"] = abs_file.read()

    return render(request, 'dashboard/result.html', context)
Exemplo n.º 4
0
def text_reading(file_request: ConvertRequest):
    print("text_reading...")
    print("File path: ", file_request.getTextLocation())
    # article = open(file_request.getTextLocation(),'r')
    # file = article.readlines()
    # article.close()
    # contiguous_string = ''
    # for text in file_request.fullText:
    # 	contiguous_string += text.strip('\n')
    # abstract, body = contiguous_string.split('Introdução')
    abstract, body = file_request.abs, file_request.body

    #print('Abstract: \n')
    #print (abstract)
    #print('Body: \n')
    #print (body)

    sentences = body.split('.')
    COLS = ['Sentences']
    database = pd.DataFrame(sentences, columns=COLS)
    #print ("Text Reading")
    #print (database.to_string())
    csvFile = open(file_request.path + "/database.csv", 'w', encoding='utf-8')
    database.to_csv(csvFile,
                    mode='w',
                    columns=COLS,
                    index=False,
                    encoding="utf-8")
Exemplo n.º 5
0
def process_text(request):
    print("Process Text!")
    response = {"answer": 200, "result": None}

    if request.method == 'POST':
        rid = request.POST.get("rid")
        file_request = ConvertRequest(rid, settings.MEDIA_DIR[0])
        file_request.portion = int(request.POST.get("percent"))
        file_request.body = request.POST.get("text")
        file_request.saveStatus()
        file_request.saveAbs(__digest(file_request))
        response["result"] = file_request.id

    return JsonResponse(response)
Exemplo n.º 6
0
def __handle_uploaded_file(f, rid) -> ConvertRequest:
    print("Here!")
    # TODO validation (.pdf) 20 pages only too
    # id = "".join([chr(int(65+random.random()*25)) for i in range(0, 10)])
    fileRequest = ConvertRequest(rid, settings.MEDIA_DIR[0])
    fileRequest.pdfFile = f
    fileRequest.isSaved = fileRequest.savePDFFile()
    fileRequest.isPdf = True
    return fileRequest
Exemplo n.º 7
0
def __digest(file_request: ConvertRequest):
    if file_request.isPdf:
        file_request.fullText = pdf_txt_converter(file_request)  #+10%
        file_request.status = 30

        if file_request.isSBC:
            file_request.cleanupText()  #+10%
        else:
            file_request.body = file_request.fullText
        file_request.incrementStatus(10)
    else:
        file_request.status = 30

    text_reading(file_request)  #+10%
    file_request.incrementStatus(10)

    names_dict = get_name_dict(file_request.path)  #+10%
    file_request.incrementStatus(10)

    preprocessing(names_dict, file_request.path)  #+10%
    file_request.incrementStatus(10)

    position_weighted_metric(file_request.path)  #+10%
    file_request.incrementStatus(10)

    graph_methodology(file_request.path)  #+10%
    file_request.incrementStatus(10)

    # tfidf_cossine_euclidean(file_request.path)  #+10%
    # file_request.incrementStatus(10)
    #
    # brush_path(file_request.path)  #+10%
    # file_request.incrementStatus(10)

    output = k_medoids_method(file_request)  #+10%
    file_request.incrementStatus(10)
    return output