Python jagged_array_to_file示例，data_utilities.util.jagged_array_to_file Python示例

示例#1

0

显示文件

def post_the_text(ja):
    testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
    util.jagged_array_to_file(testing_file, ja, ['Perek', 'Mishna', 'Comment'])
    testing_file.close()
    ref = create_ref()
    text = create_text(ja)
    functions.post_text(ref, text)

示例#2

0

显示文件

文件： chinuch.py 项目： JonMosenkis/Sefaria-Data

def post():
    minchat = {'name': 'Minchat Chinuch', 'text': produce_parsed_data(filename)}
    sefer = {'name': 'Sefer HaChinukh', 'text': Ref('Sefer HaChinukh').text('he').text}

    chinukh_links = find_links(minchat, sefer, grab_dh, u'<b>', u'</b>')

    with codecs.open('links.txt', 'w', 'utf-8') as outfile:
        for each_link in chinukh_links:
            outfile.write(u'{}\n'.format(each_link['refs']))

    alt = construct_alt_struct('Chinukh_by_Parsha.csv', 'Chinukh Mitzva names.csv')

    cleaned = util.clean_jagged_array(minchat['text'], [m_pattern, comment_pattern, u'@[0-9]{2}',
                                      u'\n', u'\r'])
    with codecs.open('parsed.txt', 'w', 'utf-8') as outfile:
        util.jagged_array_to_file(outfile, cleaned, [u'Mitzva', u'Seif', u'Paragraph'])

    full_text = {
        'versionTitle': 'Minchat Chinuch, Piotrków, 1902',
        'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001175092',
        'language': 'he',
        'text': cleaned
    }

    index = construct_index(alt)
    functions.post_index(index)
    functions.post_text('Minchat Chinuch', full_text)
    functions.post_link(chinukh_links)

示例#3

0

显示文件

文件： ls_functions.py 项目： JonMosenkis/Sefaria-Data

def post_the_text(ja):
    testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
    util.jagged_array_to_file(testing_file, ja, ['Perek', 'Mishna','Comment'])
    testing_file.close()
    ref = create_ref()
    text = create_text(ja)
    functions.post_text(ref, text)

示例#4

0

显示文件

文件： chinuch.py 项目： smontagu/Sefaria-Data

def post():
    minchat = {
        'name': 'Minchat Chinuch',
        'text': produce_parsed_data(filename)
    }
    sefer = {
        'name': 'Sefer HaChinukh',
        'text': Ref('Sefer HaChinukh').text('he').text
    }

    chinukh_links = find_links(minchat, sefer, grab_dh, u'<b>', u'</b>')

    with codecs.open('links.txt', 'w', 'utf-8') as outfile:
        for each_link in chinukh_links:
            outfile.write(u'{}\n'.format(each_link['refs']))

    alt = construct_alt_struct('Chinukh_by_Parsha.csv',
                               'Chinukh Mitzva names.csv')

    cleaned = util.clean_jagged_array(
        minchat['text'],
        [m_pattern, comment_pattern, u'@[0-9]{2}', u'\n', u'\r'])
    with codecs.open('parsed.txt', 'w', 'utf-8') as outfile:
        util.jagged_array_to_file(outfile, cleaned,
                                  [u'Mitzva', u'Seif', u'Paragraph'])

    full_text = {
        'versionTitle': 'Minchat Chinuch, Piotrków, 1902',
        'versionSource':
        'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001175092',
        'language': 'he',
        'text': cleaned
    }

    index = construct_index(alt)
    functions.post_index(index)
    functions.post_text('Minchat Chinuch', full_text)
    functions.post_link(chinukh_links)

示例#5

0

显示文件

文件： tje_parse.py 项目： JonMosenkis/Sefaria-Data

# -*- coding: utf-8 -*-
import codecs
import regex
from sefaria.model import *
from sources import functions
from data_utilities import util
from sources.Targum_Jerusalem_English import tje_functions

english_book_names = ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy']

all_five_books = tje_functions.parse_targum_jerusalem_english()

for book, book_name in zip(all_five_books, english_book_names):
    print(book_name)
    ref = 'Targum Jerusalem,_{}'.format(book_name)
    text = tje_functions.create_text(book)
    functions.post_text(ref, text)

testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
util.jagged_array_to_file(testing_file, all_five_books, ['Book', 'Chapter', 'Verse'])
testing_file.close()

示例#6

0

显示文件

文件： Noda_BeYehuda.py 项目： JonMosenkis/Sefaria-Data

    return index


def post_text_and_index(text_struct, section_names):

    index = build_index(section_names)
    functions.post_index(index)

    for section_num, section in enumerate(section_names):

        new_text = {
            "versionTitle": 'Noda BeYehuda Warsaw 1880',
            "versionSource": 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001983501',
            "language": 'he',
            "text": text_struct[section_num]
        }
        functions.post_text('Noda BeYehuda, {}'.format(section), new_text)

patterns = [u'@00', u'@22']
names = [u'חלק', u'סימן', u'טקסט']
section_names = ['Orach Chaim', 'Yoreh Deah', 'Even HaEzer', 'Choshen Mishpat']
parsed = util.file_to_ja([[[]]], noda_file, patterns, clean_and_align)
with codecs.open('testfile.txt', 'w', 'utf-8') as check_parse:
    util.jagged_array_to_file(check_parse, parsed.array(), names)

post_text_and_index(parsed.array(), section_names)

noda_file.close()
os.remove('errors.html')

示例#7

0

显示文件

文件： parse_gra_on_avot.py 项目： smontagu/Sefaria-Data

import codecs
import regex
from sefaria.model import *
from sources import functions
from data_utilities import util
from sources.GRA_on_pirkei_avot import gra_functions

"""
index record
parse text
text record
link
clean
"""

index = gra_functions.create_index()
functions.post_index(index)

gra_on_pirkei_avot = gra_functions.parse()

ref = 'Gra on Pirkei Avot'
text = gra_functions.create_text(gra_on_pirkei_avot)
functions.post_text(ref, text)

list_of_links = gra_functions.create_links(gra_on_pirkei_avot)
functions.post_link(list_of_links)

testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
util.jagged_array_to_file(testing_file, gra_on_pirkei_avot, ['Perek', 'Mishna', 'Comment'])
testing_file.close()

示例#8

0

显示文件

文件： tje_parse.py 项目： smontagu/Sefaria-Data

# -*- coding: utf-8 -*-
import codecs
import regex
from sefaria.model import *
from sources import functions
from data_utilities import util
from sources.Targum_Jerusalem_English import tje_functions

english_book_names = [
    'Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy'
]

all_five_books = tje_functions.parse_targum_jerusalem_english()

for book, book_name in zip(all_five_books, english_book_names):
    print(book_name)
    ref = 'Targum Jerusalem,_{}'.format(book_name)
    text = tje_functions.create_text(book)
    functions.post_text(ref, text)

testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
util.jagged_array_to_file(testing_file, all_five_books,
                          ['Book', 'Chapter', 'Verse'])
testing_file.close()

示例#9

0

显示文件

文件： parse_rif_nedarim.py 项目： JonMosenkis/Sefaria-Data

# -*- coding: utf-8 -*-
import codecs
from sefaria.model import *
import regex
from sources import functions
from data_utilities import util
from sources.Rif_on_Nedarim import rif_nedarim_functions

"""
index record
parse text
text record
link
clean
"""

index = rif_nedarim_functions.create_index()
functions.post_index(index)

rif_nedarim = rif_nedarim_functions.parse()

ref = 'Rif_Nedarim'
text = rif_nedarim_functions.create_text(rif_nedarim)
functions.post_text(ref, text)

testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
util.jagged_array_to_file(testing_file, rif_nedarim, ['Daf', 'Line'])
testing_file.close()

util.ja_to_xml(rif_nedarim, ['Daf', 'Line'])

示例#10

0

显示文件

文件： parse_eben_ezra_eicha.py 项目： smontagu/Sefaria-Data

from data_utilities import util
from sources.Eben_Ezra_on_Eicha import eee_functions

"""
index record
parse text
text record
link
clean
"""

index = eee_functions.create_index()
functions.post_index(index)

eben_ezra = eee_functions.parse()

for index, each_text in enumerate(eben_ezra):
    ref = 'Eben Ezra on Lamentations'
    if index == 0:
        ref = 'Eben Ezra on Lamentations,_Introduction'
    text = eee_functions.create_text(each_text)
    functions.post_text(ref, text)


list_of_links = eee_functions.create_links(eben_ezra[1])
functions.post_link(list_of_links)

testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
util.jagged_array_to_file(testing_file, eben_ezra, ["AHHHHH", 'PEREK', 'MISHNA', 'COMMENT'])
testing_file.close()

示例#11

0

显示文件


def post_text_and_index(text_struct, section_names):

    index = build_index(section_names)
    functions.post_index(index)

    for section_num, section in enumerate(section_names):

        new_text = {
            "versionTitle": 'Noda BeYehuda Warsaw 1880',
            "versionSource":
            'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001983501',
            "language": 'he',
            "text": text_struct[section_num]
        }
        functions.post_text('Noda BeYehuda, {}'.format(section), new_text)


patterns = [u'@00', u'@22']
names = [u'חלק', u'סימן', u'טקסט']
section_names = ['Orach Chaim', 'Yoreh Deah', 'Even HaEzer', 'Choshen Mishpat']
parsed = util.file_to_ja([[[]]], noda_file, patterns, clean_and_align)
with codecs.open('testfile.txt', 'w', 'utf-8') as check_parse:
    util.jagged_array_to_file(check_parse, parsed.array(), names)

post_text_and_index(parsed.array(), section_names)

noda_file.close()
os.remove('errors.html')

示例#12

0

显示文件

文件： parse_rif_megillah.py 项目： smontagu/Sefaria-Data

# -*- coding: utf-8 -*-
import codecs
from sefaria.model import *
import regex
from sources import functions
from data_utilities import util
from sources.Rif_on_Megillah import rif_megillah_functions
"""
index record
parse text
text record
link
clean
"""

index = rif_megillah_functions.create_index()
functions.post_index(index)

rif_megillah = rif_megillah_functions.parse()

ref = 'Rif_Megillah'
text = rif_megillah_functions.create_text(rif_megillah)
functions.post_text(ref, text)

testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
util.jagged_array_to_file(testing_file, rif_megillah, ['Daf', 'Line'])
testing_file.close()

示例#13

0

显示文件

文件： parse_rif_megillah.py 项目： JonMosenkis/Sefaria-Data

# -*- coding: utf-8 -*-
import codecs
from sefaria.model import *
import regex
from sources import functions
from data_utilities import util
from sources.Rif_on_Megillah import rif_megillah_functions

"""
index record
parse text
text record
link
clean
"""

index = rif_megillah_functions.create_index()
functions.post_index(index)

rif_megillah = rif_megillah_functions.parse()

ref = 'Rif_Megillah'
text = rif_megillah_functions.create_text(rif_megillah)
functions.post_text(ref, text)

testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
util.jagged_array_to_file(testing_file, rif_megillah, ['Daf', 'Line'])
testing_file.close()

示例#14

0

显示文件

# -*- coding: utf-8 -*-
import codecs
import regex
from sefaria.model import *
from sources import functions
from data_utilities import util
from sources.Targum_Isaiah_English import ti_functions

targum_isaiah = ti_functions.parse_targum_isaiah_english()

# ref = 'Targum Isaiah'
# text = ti_functions.create_text(targum_isaiah)
# functions.post_text(ref, text)

testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
util.jagged_array_to_file(testing_file, targum_isaiah, ['Chapter', 'Verse'])
testing_file.close()

示例#15

0

显示文件

文件： parse_lev_sameach.py 项目： smontagu/Sefaria-Data

from sources import functions
from data_utilities import util
from sources.Lev_Sameach import ls_functions

"""
index record
parse text
text record
link
clean
"""

index = ls_functions.create_index()
functions.post_index(index)

lev_sameach = ls_functions.parse()

a = ['Shorashim', 'Positive_Commandments', 'Negative_Commandments']
for index, each_depth_two in enumerate(lev_sameach):
    ref = 'Lev Sameach,_{}'.format(a[index])
    text = ls_functions.create_text(each_depth_two)
    functions.post_text(ref, text)


list_of_links = ls_functions.create_links(lev_sameach[0])
functions.post_link(list_of_links)

testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
util.jagged_array_to_file(testing_file, lev_sameach, ['DEPTH ONE', 'DEPTH TWO', 'DEPTH THREE'])
testing_file.close()

示例#16

0

显示文件

文件： parse_targum_jerusalem_hebrew.py 项目： JonMosenkis/Sefaria-Data

# -*- coding: utf-8 -*-
import codecs
import regex
from sefaria.model import *
from sources import functions
from data_utilities import util
from sources.Targum_Jerusalem_Hebrew import tjh_functions

english_names = ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy']
index = tjh_functions.create_index_record()
functions.post_index(index)

all_of_humash = tjh_functions.parse()

for book, book_name in zip(all_of_humash, english_names):
    ref = 'Targum Jerusalem, {}'.format(book_name)
    text = tjh_functions.create_text(book)
    functions.post_text(ref, text)

list_of_links = tjh_functions.create_links(all_of_humash)
functions.post_link(list_of_links)

testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
util.jagged_array_to_file(testing_file, all_of_humash, ['Book', 'Chapter', 'Verse'])
testing_file.close()

示例#17

0

显示文件

文件： parse_targum_jerusalem_hebrew.py 项目： smontagu/Sefaria-Data

# -*- coding: utf-8 -*-
import codecs
import regex
from sefaria.model import *
from sources import functions
from data_utilities import util
from sources.Targum_Jerusalem_Hebrew import tjh_functions

english_names = ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy']
index = tjh_functions.create_index_record()
functions.post_index(index)

all_of_humash = tjh_functions.parse()

for book, book_name in zip(all_of_humash, english_names):
    ref = 'Targum Jerusalem, {}'.format(book_name)
    text = tjh_functions.create_text(book)
    functions.post_text(ref, text)

list_of_links = tjh_functions.create_links(all_of_humash)
functions.post_link(list_of_links)

testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
util.jagged_array_to_file(testing_file, all_of_humash,
                          ['Book', 'Chapter', 'Verse'])
testing_file.close()

示例#18

0

显示文件

文件： parse_rif_nedarim.py 项目： smontagu/Sefaria-Data

# -*- coding: utf-8 -*-
import codecs
from sefaria.model import *
import regex
from sources import functions
from data_utilities import util
from sources.Rif_on_Nedarim import rif_nedarim_functions
"""
index record
parse text
text record
link
clean
"""

index = rif_nedarim_functions.create_index()
functions.post_index(index)

rif_nedarim = rif_nedarim_functions.parse()

ref = 'Rif_Nedarim'
text = rif_nedarim_functions.create_text(rif_nedarim)
functions.post_text(ref, text)

testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
util.jagged_array_to_file(testing_file, rif_nedarim, ['Daf', 'Line'])
testing_file.close()

util.ja_to_xml(rif_nedarim, ['Daf', 'Line'])