Пример #1
0
def clean_koran_chapters_to_file(book_filename, output_path, results_filename, book_name):
    db = connect_db('religion')
    parameters = (book_name,)
    cursor = db.cursor(MySQLdb.cursors.DictCursor) 

    sql = 'select chapters.title, chapter_name_start, chapter_name_end, chapter_length \
    from chapters JOIN books ON chapters.book_id=books.book_id \
    where books.title=?' 
    cursor.execute('select chapters.title, chapter_name_start, chapter_name_end, chapter_length from chapters JOIN books ON chapters.book_id=books.book_id where books.title=%s', parameters)
    
    f = open(book_filename, 'r')
    txt = f.read()
    f.close()

    rf = open(results_filename, 'w')
    for row in cursor:
        verse = txt[row['chapter_name_end']:row['chapter_name_end']+row['chapter_length']]
#        verse = verse.replace(',', '', title)
        verse = verse.replace('REVEALED AT MECCA', '')
        verse = verse.replace('WHERE IT WAS REVEALED IS DISPUTED', '')
        verse = verse.replace('REVEALED AT MEDINA', '')
        verse = verse.replace('REVEALED PARTLY AT MECCA, AND PARTLY AT MEDINA', '')                
        verse = verse.replace('IN THE NAME OF THE MOST MERCIFUL GOD.\n\n', '')
        verse = verse.replace('A.L.M.\n', '')
        
        f_name = output_path + row['title'] + '.txt'
        f = open(f_name, 'w')
        f.write('%s' % verse)
        f.close()         
        
        rf.write('%s\t%d\n' % (row['title'], len(verse)))   

    rf.close()
Пример #2
0
def find_koran_verses():
    db = connect_db('religion')
    cursor = db.cursor(MySQLdb.cursors.DictCursor) 

#    sql = 'select chapters.title, chapter_name_start, chapter_name_end, chapter_length \
#    from chapters JOIN books ON chapters.book_id=books.book_id where books.title="Koran" and chapters.title="THE COW"' 
    sql = 'select chapters.title, chapters.chapter_id, chapter_name_start, chapter_name_end, chapter_length \
    from chapters JOIN books ON chapters.book_id=books.book_id where books.title="Koran"' 
    cursor.execute(sql)
    
    f = open('../../data/books/Koran.txt', 'r')
    txt = f.read()
    f.close()

    output_filename = '../../data/books/Koran_verses.txt'
    o_file = open(output_filename, 'w')    
    fields = ['chapter_id', 'number', 'start', 'end']
    o_file.write('\t'.join(fields) + '\n')
    
    row_index = 0
    for row in cursor:
        row_index+=1         
        chapter_start = row['chapter_name_end']
        verse = txt[row['chapter_name_end']:row['chapter_name_end']+row['chapter_length']]
        verse_start_search = re.search('IN THE NAME OF THE MOST MERCIFUL GOD.+?\s', verse, re.M + re.S + re.U)
        if verse_start_search == None:
            verse_start_search = re.search('\.\s', verse, re.M + re.S + re.U)
#        verse_start_location = verse_start_search.end() 
        previous_verse_location = verse_start_search.end() + chapter_start
        chapter_id = row['chapter_id']
        previous_verse_number = '0'
        
#        m = re.findall('^\d+', verse, re.M + re.S + re.U)
        for m in re.finditer('^\d+', verse, re.M + re.S + re.U):
            
            
#        if len(m) == 0:
#            print row_index        
        #First and Last section are different
#        for i in range(1, len(m)-1):
            o_file.write('%d\t%s\t%d\t%d\n' % (chapter_id, previous_verse_number, previous_verse_location, chapter_start + m.start()-1))
            previous_verse_location = m.end() + chapter_start
#            previous_chapter_id = row['chapter_id']
            previous_verse_number = m.group(0)
        if chapter_id == 226:
            x = 1
        
        verse_end_location = verse.find('\n\n', previous_verse_location) + chapter_start
        o_file.write('%d\t%s\t%d\t%d\n' % (chapter_id, previous_verse_number, previous_verse_location, verse_end_location))
        
    o_file.close()    
    return
Пример #3
0
def save_chapters_to_file_mysql(book_filename, output_path, book_name):
    db = connect_db('religion')
    parameters = (book_name,)
    cursor = db.cursor(MySQLdb.cursors.DictCursor) 
#cursor.execute (query) 
#rows = cursor.fetchall () 
#for row in rows: 
#print row['employee_id'] 
    sql = 'select chapters.title, chapter_name_start, chapter_name_end, chapter_length \
    from chapters JOIN books ON chapters.book_id=books.book_id \
    where books.title=?' 
    cursor.execute('select chapters.title, chapter_name_start, chapter_name_end, chapter_length from chapters JOIN books ON chapters.book_id=books.book_id where books.title=%s', parameters)
    
    f = open(book_filename, 'r')
    txt = f.read()
    f.close()
    for row in cursor:
        f_name = output_path + row['title'] + '.txt'
        f = open(f_name, 'w')
        f.write('%s' % txt[row['chapter_name_end']:row['chapter_name_end']+row['chapter_length']])
        f.close()       
Пример #4
0
def test_koran_verses():
    db = connect_db('religion')
    cursor = db.cursor(MySQLdb.cursors.DictCursor) 

#    sql = 'select chapters.title, chapter_name_start, chapter_name_end, chapter_length \
#    from chapters JOIN books ON chapters.book_id=books.book_id where books.title="Koran" and chapters.title="THE COW"' 
    sql = 'select chapters.title, chapters.chapter_id, chapter_name_start, chapter_name_end, chapter_length \
    from chapters JOIN books ON chapters.book_id=books.book_id where books.title="Koran"' 
    cursor.execute(sql)
    
    f = open('../../data/books/Koran.txt', 'r')
    txt = f.read()
    f.close()
    
    numerical_index_test = False
    end_of_chapter_test = True
    fail_check = 0

#    output_filename = '../../data/books/Koran_verses.txt'
#    o_file = open(output_filename, 'w')    
#    fields = ['chapter_id', 'number', 'start', 'end']
#    o_file.write('\t'.join(fields) + '\n')
    previous_verse = -1    
#    row = cursor.fetchone()
    check = 0
    row_index = 0
    for row in cursor:
        verse = txt[row['chapter_name_end']:row['chapter_name_end']+row['chapter_length']]
#        search_index = verse.find('.\n\n') 
#        search_index = verse.find('IN THE NAME OF THE MOST MERCIFUL GOD.\n\n')
        verse_start_search = re.search('IN THE NAME OF THE MOST MERCIFUL GOD.+?\s', verse, re.M + re.S + re.U)
        if verse_start_search == None:
            verse_start_search = re.search('\.\s', verse, re.M + re.S + re.U)
        verse_start_location = verse_start_search.end()
        
#        if verse_start >= 0:
#            check+=1
#        else:
#            print row['title']
#            continue
        if numerical_index_test:
            m = re.findall('^\d+', verse, re.M + re.S + re.U)
            if len(m) == 0:
                print row_index
        if end_of_chapter_test:
#            search_index = verse.find('________') #113
            search_index = verse.find('\n\n') #113
            if search_index <= 0:
                print row['title'] 
                fail_check+=1
            else:
                check+=1
                
        
        row_index+=1 
        #First and Last section are different

#    o_file.close()    
    print check
    print 'failures:%d' % fail_check
    return