Exemplo n.º 1
0
def loadarticles():
    files = listdir(ARTICLES_FOLDER)
    article_ctr = 0
    for file in files:
        if article_ctr > MAX_ARTICLE_COUNT:
            break

        try:
            # initialize article element to containe sentences
            global articles
            articles.append([])
            article_names.append(file)
            # open the file
            f = open(ARTICLES_FOLDER + file, 'r')
            # get the text and perform string processing
            article = f.read().replace("\\n", "").replace("'b'", "").replace(
                "b'", "").replace("[", "").replace("]", "")
            article = ''.join([i for i in article if not i.isdigit()])
            # seperate by comma
            sentences = utils.delim_sentences(article)
            for sentence in sentences:
                articles[article_ctr].append(sentence)
            f.close()

        except Exception as e:
            print("Error in " + file)
            print(e)

        # increment
        article_ctr = article_ctr + 1
Exemplo n.º 2
0
def evaluate(testfiletxt, answerfilejson):
    f = open(testfiletxt, 'r')
    text = f.read().replace("\\n", "").replace("'b'", "").replace("b'", "").replace("[", "").replace("]", "")

    sentences = utils.delim_sentences(text)

    with open(answerfilejson) as data_ans:
        data = json.loads(data_ans.read())

    for sentence in sentences:
        print(sentence)
Exemplo n.º 3
0
def getResults(article, relation):
    model = Model(relation)
    sentences = utils.delim_sentences(article)

    ret_val = []

    for sentence in sentences:
        classification = model.predict(sentence)
        temp_item = {"sentence": sentence, "classification": classification}
        ret_val.append(temp_item)
        # print(sentence)
        # print(classification)

    return ret_val
Exemplo n.º 4
0
def main(relation):
    print('Creating test file for', relation)
    f = open('sen_' + relation + '.txt', 'r')
    fans = open('ans_file2_' + relation + '.json', 'w')
    wordmodel = ModelWords(relation)
    text = f.read()

    data = []

    sentences = utils.delim_sentences(text)
    for sentence in sentences:
        print('-------\nSen:', sentence)
        baseword, subword = wordmodel.getWords(sentence)
        print(':::', baseword, subword)
        isCorrect = input("Is correct? [ENTER] YES --- [NO] NO")

        if len(isCorrect) == 0:
            temp = {
                'sentence': sentence,
                'baseword': baseword,
                'subword': subword
            }
            data.append(temp)
            continue

        # else:
        #     r = random.randint(0, 10)
        #     if (r > 7):
        #         temp = {'sentence': sentence, 'baseword': baseword, 'subword': subword}
        #         data.append(temp)

        # baseword = input('Enter baseword: ')
        # if len(baseword) == 0:
        #     continue
        # subword = input('Enter subword: ')

    fans.write("[\n")
    for index, item in enumerate(data):
        if index == len(data) - 1:
            item['sentence'] = item['sentence'].replace('"', '')
            to_write = '\t{"baseword": "%s", "subword": "%s", "sentence": "%s"}\n' % (
                item['baseword'], item['subword'], item['sentence'])
            fans.write(to_write)
        else:
            item['sentence'] = item['sentence'].replace('"', '')
            to_write = '\t{"baseword": "%s", "subword": "%s", "sentence": "%s"},\n' % (
                item['baseword'], item['subword'], item['sentence'])
            fans.write(to_write)
    fans.write("]")
Exemplo n.º 5
0
def evaluate(testfiletxt, answerfilejson, relation, logfile):

    REL_INDEX = 0
    if relation == 'Synonymy':
        REL_INDEX = 1
    elif relation == 'Hyponym':
        REL_INDEX = 2
    elif relation == 'Holynym':
        REL_INDEX = 3
    elif relation == 'Meronym':
        REL_INDEX = 4

    model = Model(relation)

    f = open(testfiletxt, 'r')
    text = f.read().replace("\\n", "").replace("'b'",
                                               "").replace("b'", "").replace(
                                                   "[", "").replace("]", "")

    sentences = utils.delim_sentences(text)

    with open(answerfilejson) as data_ans:
        data = json.loads(data_ans.read())

    print("-------------------------")
    print("\n", relation)
    score = 0
    for sentence in sentences:
        print('\n\nClassifying:', sentence)
        # print(model_syn.getTotalScore(sentence))
        prediction = model.predict(sentence)
        sen_type = getType(data, sentence)
        print(':: Classification if', relation, ':\t', prediction)

        # one is our marker if syn
        if sen_type == REL_INDEX:
            ans = True
        else:
            ans = False
        print(':: From ANS file is', relation, ':\t', ans)

        if ans == prediction:
            score = score + 1
            print('Match')

    print('\n\nScore for', relation, ':', score, '/', len(sentences))
    to_write = "%f, %d, %d\n" % (score / len(sentences), score, len(sentences))
    logfile.write(to_write)
Exemplo n.º 6
0
def getResults(article, relation):
    model = Model(relation)
    modelwords = ModelWords(relation)
    sentences = utils.delim_sentences(article)

    ret_val = []

    for sentence in sentences:
        classification = model.predict(sentence)
        baseword, subword = modelwords.getWords(sentence)
        temp_item = {"sentence": sentence, "classification": classification, "baseword": baseword, "subword": subword}
        ret_val.append(temp_item)
        # print(sentence)
        # print(classification)

    return ret_val
def create_test_file(filepath, filename):
    f = open(filepath, 'r')
    fileans = 'ANSF' + filename.replace('.txt', '.json')
    text = f.read().replace("\\n", "").replace("'b'",
                                               "").replace("b'", "").replace(
                                                   "[", "").replace("]", "")

    sentences = utils.delim_sentences(text)

    data = []

    for sentence in sentences:
        print(sentence)
        sen_type = input(
            "Enter type: [0] None, [1] Syn, [2] Hyp, [3] Holy, [4] Mero \nType: "
        )

        temp = {'sentence': sentence, 'type': sen_type}
        data.append(temp)

    fans = open(fileans, 'w')
    fans.write("[\n")
    for index, item in enumerate(data):
        if index == len(data) - 1:
            to_write = '\t{"type": "%s", "sentence": "%s"}\n' % (
                item['type'],
                item['sentence'],
            )
            fans.write(to_write)
        else:
            to_write = '\t{"type": "%s", "sentence": "%s"},\n' % (
                item['type'],
                item['sentence'],
            )
            fans.write(to_write)
    fans.write("]")

    ftxt = open('TESTF' + filename, 'w')
    ftxt.write(text)
    ftxt.close()

    f.close()
    fans.close()
Exemplo n.º 8
0
def run():
    sentencemodel = ModelSentence('Synonymy')
    wordmodel = ModelWords('Synonymy')
    fsyn = open('sen_syn.txt', 'w')

    # work on the first 10 articles
    n = 100
    files = os.listdir('./articles')
    for i in range(0, len(files)):
        f = open('./articles/' + files[i], 'r')
        article = f.read()
        sentences = utils.delim_sentences(article)
        for sentence in sentences:

            #  A sentence with synonymy is found
            if (sentencemodel.predict(sentence)):
                to_write = sentence + '\n'
                fsyn.write(to_write)
                print(files[i], ":", wordmodel.getWords(sentence))
        f.close()
def run(relation):
    print(relation)
    sentencemodel = ModelSentence(relation)
    wordmodel = ModelWords(relation)
    fsyn = open('sen_' + relation + '.txt', 'w')

    # work on the first 10 articles
    n = 100
    files = os.listdir('./articles')
    for i in range(0, n):
        file_index = random.randint(0, len(files))
        f = open('./articles/' + files[file_index], 'r')
        article = f.read()
        sentences = utils.delim_sentences(article)
        for sentence in sentences:

            #  A sentence with synonymy is found
            if (sentencemodel.predict(sentence)):
                to_write = sentence + '.\n'
                fsyn.write(to_write)
        f.close()