Exemplo n.º 1
0
def process(file_name):
    print("1")
    data=open(glob.glob(file_name).pop()).readlines()
    print("2")
    poem=data[9:]
    print(poem)
    century=data[2]
    print("3")
    p_categories=data[5].split()
    print(p_categories)
    os.chdir(r'/media/bryan/Work/graph_poetry/features/')
    import functions
    poem = functions.mutateRemoveJunk(poem)
    linecount = len(poem)
    metrical_stress = functions.getStress(poem)
    last_words_ll = functions.get_last_words(poem)
    rhyme_pattern = functions.make_rhyme_pattern(last_words_ll)
    last_words_list = functions.get_last_words_list(last_words_ll)

    poem_vector = []
    poem_vector.append(file)
    import rhyme_detection as rd
    rhyme_pattern_vector = rd.get_rhyme_pattern_vector(last_words_ll)
    poem_vector.append(rhyme_pattern_vector)

    import poem_vector as pv
    rhyme_vector = pv.get_rhyme_type_count(last_words_ll)

    for item in rhyme_vector:
        try:
            item = float(item)/float(linecount)
        except:
            item = 0
        poem_vector.append(item)

    poem_vector.append(pv.get_ratio(last_words_ll))

    n = pv.get_couplets(last_words_ll)

    try:
        poem_vector.append(float(n)/float(linecount))
    except:
        poem_vector.append("0")

    n = pv.get_alternating_rhymes(last_words_ll)
    try:
        poem_vector.append(float(n)/float(linecount))
    except:
        poem_vector.append("0")

    n = pv.get_rhymes_two_lines_apart(last_words_ll)
    try:
        poem_vector.append(float(n)/float(linecount))
    except:
        poem_vector.append("0")

    poem_vector.append(linecount)

    foot_count = pv.foot_count(metrical_stress)
    total = sum(foot_count)
    number_of_feet_per_line = pv.get_feet_per_line(metrical_stress)

    for item in number_of_feet_per_line:
        poem_vector.append(item)

    for item in foot_count:
        try:
            poem_vector.append(float(item)/float(total))
        except:
            poem_vector.append("0")

    try:
        poem_vector.append(float(pv.get_streak(metrical_stress)))
    except:
        poem_vector.append("0")

    try:
            poem_vector.append(float(pv.get_switches(metrical_stress))/float(total))
    except:
            poem_vector.append("0")

    import re
    categories = ["Free","Rhymed","Couplet","Rhymes","Blank","Sonnet","Villanelle","Terza"]
    categories_compiled = list(map(re.compile,categories))
    poem_class = ""
    poeticTerm ='unknown'
    wekaClass = 'unknown'

    for item in categories_compiled:
        if re.search(free,item):
            wekaClass = 'NR'
            break
        if re.search(rhymed,item):
            wekaClass = 'R'
            break
        if re.search(couplet,item):
            wekaClass = 'R'
            break
        if re.search(rhymes,item):
            wekaClass = 'R'
            break
        if re.search(blank,item):
            wekaClass = 'NR'
            break
        if re.search(sonnet,item):
            wekaClass = 'R'
            break
        if re.search(villanelle,item):
            wekaClass = 'R'
            break
        if re.search(terza,item):
            wekaClass = 'R'
            break

    for item in poem_vector:
        arff.write(str(item))
        arff.write(", ")
    arff.write(str(wekaClass))
    arff.write('\n')

    os.chdir('./output')
    # this is where the results will be stored for each poem:
    output = open(str(file)[:-3]+'out.txt', 'w')
    output.write("Results" +'\n' +'\n')
    output.write("Line Count : " +'\n')
    output.write(str(linecount))
    output.write('\n')
    output.write('Metrical Stress : ' +'\n')
    for item in metrical_stress:
        output.write(str(item))
        output.write('\n')
    output.write('\n')

    output.write("Rhyme Pattern : " +'\n')
    p = last_words_ll.head
    while p.nodebelow is not last_words_ll.head:
        p = p.nodebelow
        output.write(str(p.outer_letter))
        output.write("-")
        output.write(str(p.letter_designation))
        output.write("----")
        output.write(str(p.word))
        output.write("----")
        output.write(str(p.rhyme_type))
        output.write('\n')
    output.write('\n')
    output.write("Poem : " +'\n')
    for line in poem:
        output.write(str(line))

    os.chdir(r'/media/bryan/Work/graph_poetry/poetry_collection/test_site/')
    os.rename(file, file.replace("txt", "done"))
Exemplo n.º 2
0
        for item in foot_count:
            try:
                poem_vector.append(float(item)/float(total))
            except:
                poem_vector.append("0")

        # 28 max same feet streak
        try:
            # poem_vector.append(float(pv.get_streak(metrical_stress))/float(total))
            poem_vector.append(float(pv.get_streak(metrical_stress)))
        except:
            poem_vector.append("0")

        # 29 number of switches
        try:
                poem_vector.append(float(pv.get_switches(metrical_stress))/float(total))
        except:
                poem_vector.append("0")


        # use regex to find these features:

        # 30 date

        # 31 author
        # 31 title

        # ord() gets the ascii number for the letter
        ascii_words = []
        for item in last_words_list:
            for ch in item: