def update_figure(selectedFile):
    fileContent = utils.readAllText("analysis/" + selectedFile)
    jsonObject = json.loads(fileContent)

    try:
        bigramsJson = eval(str(jsonObject["bigrams"]))
    except TypeError:
        bigramsJson = eval(str(jsonObject["digrams"]))
    except KeyError:
        bigramsJson = eval(str(jsonObject["digrams"]))

    sort = OrderedDict(
        sorted(bigramsJson.items(), key=lambda item: item[1], reverse=True))
    literals = list(sort.keys())
    freq = list(sort.values())
    figure = []
    figure.append(
        go.Bar(x=literals,
               y=freq,
               name="Bigrams frequency",
               text="The exact value of occurrences of selected bigram"))
    return {
        "data":
        figure,
        'layout':
        go.Layout(
            xaxis={'title': 'Bigram'},
            yaxis={'title': 'Proportions of occurrences'},  # 'range': [0, 0.2]
        )
    }
def update_figure(selectedFile):
    fileContent = utils.readAllText("analysis/" + selectedFile)
    jsonObject = json.loads(fileContent)

    trigramsJson = eval(str(jsonObject["trigrams"]))
    alphabetical = OrderedDict(
        sorted(trigramsJson.items(), key=lambda item: item[0]))

    literals = list(alphabetical.keys())
    freq = list(alphabetical.values())
    figure = []
    figure.append(
        go.Bar(
            x=literals,
            y=freq,
            name="Trigram frequency",
            text="The exact value of occurrences of selected trigram",
        ))
    return {
        "data":
        figure,
        'layout':
        go.Layout(
            xaxis={'title': 'Trigram'},
            yaxis={'title': 'Proportions of occurrences'},  # 'range': [0, 0.2]
        )
    }
def detectLang(filenameToAnalyse):
    filesList = os.listdir("analysis")
    # print("ANALIZA: " + filenameToAnalyse)
    result = "No match found."
    score = 10
    for fileName in filesList:
        # print("fileName:" + fileName)
        # print("filenameToAnalyse:" + filenameToAnalyse)
        if (fileName != filenameToAnalyse):
            # print("PLIK:" + fileName)
            # similarity -> smaller=better
            similarity = lettersFactor(
                utils.readAllText("analysis/" + fileName),
                utils.readAllText("analysis/" + filenameToAnalyse))
            if (similarity < score):
                score = similarity
                result = "Best match: " + utils.mapFileToLanguage(fileName)
                # print("Aktualny rezultat: " + result)

    # print(result)
    return result
示例#4
0
import time
import datetime
import os
import json

# custom modules
import utils
import analyzer

try:
    # parsing files
    filesList = os.listdir("text samples")
    for fileName in filesList:
        # reading files and clearing data
        fileContent = utils.readAllText("text samples/" + fileName)
        clearedData = analyzer.clearData(fileContent)

        if not os.path.isdir("cleared data"):
            os.mkdir("cleared data")
        utils.writeAllText("cleared data/" + fileName, clearedData)

        # counting frequency
        lettersDict = utils.listToDict(analyzer.countLettersFreq(clearedData))
        if not os.path.isdir("letter frequency"):
            os.mkdir("letter frequency")
        utils.writeAllText("letter frequency/" + fileName,
                           json.dumps(lettersDict))

        bigramsDict = utils.listToDict(analyzer.countBigramsFreq(clearedData))
        if not os.path.isdir("bigrams"):
            os.mkdir("bigrams")