示例#1
0
def parse1(text):
    if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
        nose_kelim = nosekelim.open_file()
        fixed = nosekelim.parse(nose_kelim)
        links_netanel = []
        netanel = 0
    rosh = []
    chapters = re.split(ur'@00', text)
    for chapter_num, chapter in enumerate(chapters):
        print chapter_num, chapter[0:10]
        if len(chapter)<=1:
            pass
        else:
            perek = []
            a = re.split(ur'@22([^@]*)', chapter)
            for seif, cont in zip(a[1::2], a[2::2]):
                si = []
                korban =[]
                print seif
                if ur'[*]' in seif:
                    print "hello1"
                if ur'[*]' in seif and (os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet))) and netanel <= len(fixed):

                    print "hello", seif, netanel, len(fixed)
                    if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)):
                        commentator = "Korban Netanel"
                    if os.path.isfile('source/PilPula_Charifta_on_{}.txt'.format(masechet)):
                        commentator = "Pilpula Charifta"
                    korban.append(fixed[netanel])
                    roash = "Rosh on %s." % masechet  +str(len(rosh)+2) + "." + str(len(perek)+1) + ".1"
                    netanelink = commentator + " on " +  masechet +"."+ str(len(links_netanel)+1) + ".1"
                    print roash, netanelink
                    links.append(link(netanelink, roash))
                    netanel += 1
                content = re.split('@66', cont)
                seif = re.sub(ur'[^א-ת]',"", seif)
                seif = hebrew.heb_string_to_int(seif.strip())
                for num, co in enumerate(content):
                    a = re.findall('\[\*\]', co)
                    for b in a:
                        print b, seif
                        if (os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet))) and netanel < len(fixed):
                            if os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)):
                                commentator = "Korban Netanel "
                            if os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
                                commentator = "Pilpula Charifta "
                            korban.append(fixed[netanel])
                            roash = "Rosh on %s." % masechet + str(len(rosh)+2) + "." + str(len(perek)+1) + "." + str(num+1)
                            netanelink = commentator + "on " + masechet + "." + str(len(links_netanel)+1)+ "."+ str(len(korban))
                            print roash, netanelink
                            links.append(link(netanelink, roash))
                            netanel +=1
                    si.append(co)
                if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
                    links_netanel.append(korban)
                perek.append(si)
            rosh.append(perek)
示例#2
0
def parse1(text):
    if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
        nose_kelim = nosekelim.open_file()
        fixed = nosekelim.parse(nose_kelim)
        links_netanel = []
        netanel = 0
    rosh = []
    chapters = re.split(ur'@00', text)
    for chapter_num, chapter in enumerate(chapters):
        print chapter_num, chapter[0:10]
        if len(chapter)<=1:
            pass
        else:
            perek = []
            a = re.split(ur'@22([^@]*)', chapter)
            for seif, cont in zip(a[1::2], a[2::2]):
                si = []
                korban =[]
                print seif
                if ur'[*]' in seif:
                    print "hello1"
                if ur'[*]' in seif and (os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet))) and netanel <= len(fixed):

                    print "hello", seif, netanel, len(fixed)
                    if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)):
                        commentator = "Korban Netanel"
                    if os.path.isfile('source/PilPula_Charifta_on_{}.txt'.format(masechet)):
                        commentator = "Pilpula Charifta"
                    korban.append(fixed[netanel])
                    roash = "Rosh on %s." % masechet  +str(len(rosh)+2) + "." + str(len(perek)+1) + ".1"
                    netanelink = commentator + " on " +  masechet +"."+ str(len(links_netanel)+1) + ".1"
                    print roash, netanelink
                    links.append(link(netanelink, roash))
                    netanel += 1
                content = re.split('@66', cont)
                seif = re.sub(ur'[^א-ת]',"", seif)
                seif = hebrew.heb_string_to_int(seif.strip())
                for num, co in enumerate(content):
                    a = re.findall('\[\*\]', co)
                    for b in a:
                        print b, seif
                        if (os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet))) and netanel < len(fixed):
                            if os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)):
                                commentator = "Korban Netanel "
                            if os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
                                commentator = "Pilpula Charifta "
                            korban.append(fixed[netanel])
                            roash = "Rosh on %s." % masechet + str(len(rosh)+2) + "." + str(len(perek)+1) + "." + str(num+1)
                            netanelink = commentator + "on " + masechet + "." + str(len(links_netanel)+1)+ "."+ str(len(korban))
                            print roash, netanelink
                            links.append(link(netanelink, roash))
                            netanel +=1
                    si.append(co)
                if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
                    links_netanel.append(korban)
                perek.append(si)
            rosh.append(perek)
示例#3
0
def parse(text):
    if os.path.isfile("../source/Korban_Netanel_on_{}.txt".format(masechet)) or os.path.isfile(
        "../source/Pilpula_Charifta_on_{}.txt".format(masechet)
    ):
        # print "has korban netanel 2"
        nose_kelim = nosekelim.open_file()
        fixed = nosekelim.parse(nose_kelim)
        links_netanel = []
        netanel = 0
    rosh = []
    a = re.split(ur"@22([^@]*)", text)
    for seif, cont in zip(a[1::2], a[2::2]):
        si = []
        korban = []
        if ur"[*]" in seif and (
            os.path.isfile(
                "../source/Korban_Netanel_on_{}.txt".format(masechet)
                or os.path.isfile("../source/PilPula_Charifta_on_{}.txt".format(masechet))
            )
            and netanel <= len(fixed)
        ):
            if os.path.isfile("../source/Korban_Netanel_on_{}.txt".format(masechet)):
                commentator = "Korban Netanel on "
            if os.path.isfile("../source/PilPula_Charifta_on_{}.txt".format(masechet)):
                commentator = "Pilpula Charifta on "
            korban.append(fixed[netanel])
            # print len(links_netanel)
            roash = "Rosh on %s." % masechet + str(len(links_netanel) + 1) + ".1"
            netanelink = commentator + masechet + "." + str(len(links_netanel) + 1) + ".1"
            links.append(link(netanelink, roash))
            netanel += 1
            # print "netanel one seif", seif, netanel
            # print fixed[netanel]
        content = re.split("@66", cont)
        seif = re.sub(ur"[^א-ת]", "", seif)
        seif = hebrew.heb_string_to_int(seif.strip())
        for num, co in enumerate(content):
            if ur"[*]" in co:
                # print co
                a = re.findall("\[\*\](.{6})", co)
                for b in a:
                    if (
                        os.path.isfile("../source/Korban_netanel_on_{}.txt".format(masechet))
                        or os.path.isfile("../source/Pilpula_Charifta_on_{}.txt".format(masechet))
                    ) and netanel < len(fixed):
                        if os.path.isfile("../source/Korban_netanel_on_{}.txt".format(masechet)):
                            commentator = "Korban Netanel "
                        if os.path.isfile("../source/Pilpula_Charifta_on_{}.txt".format(masechet)):
                            commentator = "Pilpula Charifta "
                        korban.append(fixed[netanel])
                        roash = "Rosh on %s." % masechet + str(len(links_netanel) + 1) + "." + str(num + 1)
                        netanelink = (
                            commentator + "on " + masechet + "." + str(len(links_netanel) + 1) + "." + str(len(korban))
                        )
                        links.append(link(netanelink, roash))
                        netanel += 1
            si.append(co)
示例#4
0
def parse(text):
    if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
       # print "has korban netanel 2"
        nose_kelim = nosekelim.open_file()
        fixed = nosekelim.parse(nose_kelim)
        links_netanel = []
        netanel = 0
    rosh = []
    a = re.split(ur'@22([^@]*)', text)
    for seif, cont in zip(a[1::2], a[2::2]):
        si = []
        korban =[]
        if ur'[*]' in seif and (os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet) or os.path.isfile('source/PilPula_Charifta_on_{}.txt'.format(masechet))) and netanel <= len(fixed)):
            if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)):
                commentator = "Korban Netanel on "
            if os.path.isfile('source/PilPula_Charifta_on_{}.txt'.format(masechet)):
                commentator = "Pilpula Charifta on "
            korban.append(fixed[netanel])
            #print len(links_netanel)
            roash = "Rosh on %s." % masechet + str(len(links_netanel)+1) + ".1"
            netanelink = commentator + masechet +"."+ str(len(links_netanel)+1) + ".1"
            links.append(link(netanelink, roash))
            netanel += 1
            #print "netanel one seif", seif, netanel
            #print fixed[netanel]
        content = re.split('@66', cont)
        seif = re.sub(ur'[^א-ת]',"", seif)
        seif = hebrew.heb_string_to_int(seif.strip())
        for num, co in enumerate(content):
            if ur'[*]' in co:
                print co
                a = re.findall('\[\*\](.{6})', co)
                for b in a:
                    if (os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet))) and netanel < len(fixed):
                        if os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)):
                            commentator = "Korban Netanel "
                        if os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
                            commentator = "Pilpula Charifta "
                        korban.append(fixed[netanel])
                        roash = "Rosh on %s." % masechet + str(len(links_netanel)+1) + "." + str(num+1)
                        netanelink = commentator + "on " +masechet + "." + str(len(links_netanel)+1)+ "."+ str(len(korban))
                        links.append(link(netanelink, roash))
                        netanel +=1
            si.append(co)
示例#5
0
def parse1(text):
    old_numeri = 0
    if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(
            masechet)) or os.path.isfile(
                'source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
        nose_kelim = nosekelim.open_file()
        fixed = nosekelim.parse(nose_kelim)
        links_netanel = []
        netanel = 0
    rosh = []
    chapters = re.split(ur'(?:@00|@99)([^@]*)', text)
    for chapter_num, chapter in zip(chapters[1::2], chapters[2::2]):
        mispar = chapter_num.strip().split(" ")[1]
        if mispar.encode('utf-8') in misparim.keys():
            mispar_numeri = misparim[mispar.encode('utf-8')]
            print mispar_numeri
            if mispar_numeri - old_numeri > 1:
                for i in range(1, mispar_numeri - old_numeri):
                    rosh.append([])
                    #print "length of rosh", len(rosh)
            old_numeri = mispar_numeri
        print mispar
        #if len(chapter)<=1:
        #   pass
        #else:
        perek = []
        a = re.split(ur'@22([^@]*)', chapter)
        for seif, cont in zip(a[1::2], a[2::2]):
            si = []
            korban = []
            #print seif
            if ur'[*]' in seif and (
                    os.path.isfile(
                        'source/Korban_Netanel_on_{}.txt'.format(masechet))
                    or os.path.isfile(
                        'source/Pilpula_Charifta_on_{}.txt'.format(masechet))
            ) and netanel < len(fixed):
                # print "hello", seif, netanel, len(fixed)
                if os.path.isfile(
                        'source/Korban_Netanel_on_{}.txt'.format(masechet)):
                    commentator = "Korban Netanel"
                if os.path.isfile(
                        'source/PilPula_Charifta_on_{}.txt'.format(masechet)):
                    commentator = "Pilpula Charifta"
                korban.append(fixed[netanel])
                roash = "Rosh on %s." % masechet + str(
                    len(rosh) + 1) + "." + str(len(perek) + 1) + ".1"
                netanelink = commentator + " on " + masechet + "." + str(
                    len(links_netanel) + 1) + ".1"
                #print roash, netanelink
                links.append(link(netanelink, roash))
                netanel += 1
            content = re.split('@66', cont)
            seif = re.sub(ur'[^א-ת]', "", seif)
            seif = hebrew.heb_string_to_int(seif.strip())
            for num, co in enumerate(content):
                a = re.findall('\[\*\]', co)
                for b in a:
                    #   print b, seif
                    if (os.path.isfile(
                            'source/Korban_netanel_on_{}.txt'.format(masechet))
                            or os.path.isfile(
                                'source/Pilpula_Charifta_on_{}.txt'.format(
                                    masechet))) and netanel < len(fixed):
                        if os.path.isfile(
                                'source/Korban_netanel_on_{}.txt'.format(
                                    masechet)):
                            commentator = "Korban Netanel "
                        if os.path.isfile(
                                'source/Pilpula_Charifta_on_{}.txt'.format(
                                    masechet)):
                            commentator = "Pilpula Charifta "
                        korban.append(fixed[netanel])
                        roash = "Rosh on %s." % masechet + str(
                            len(rosh) + 1) + "." + str(len(perek) +
                                                       1) + "." + str(num + 1)
                        netanelink = commentator + "on " + masechet + "." + str(
                            len(links_netanel) + 1) + "." + str(len(korban))
                        #print roash, netanelink
                        links.append(link(netanelink, roash))
                        netanel += 1
                si.append(co)
            if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(
                    masechet)) or os.path.isfile(
                        'source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
                links_netanel.append(korban)
            perek.append(si)
        if len(perek) is not 0:
            rosh.append(perek)
示例#6
0
def parse1(text):
    old_numeri = 0
    if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
        nose_kelim = nosekelim.open_file()
        fixed = nosekelim.parse(nose_kelim)
        links_netanel = []
        netanel = 0
    rosh = []
    chapters = re.split(ur'(?:@00|@99)([^@]*)', text)
    for chapter_num, chapter in zip(chapters[1::2], chapters[2::2]):
        mispar = chapter_num.strip().split(" ")[1]
        if mispar.encode('utf-8') in misparim.keys():
            mispar_numeri = misparim[mispar.encode('utf-8')]
            print mispar_numeri
            if mispar_numeri - old_numeri > 1:
               for i in range(1,mispar_numeri-old_numeri):
                    rosh.append([])
                    #print "length of rosh", len(rosh)
            old_numeri = mispar_numeri
        print mispar
        #if len(chapter)<=1:
         #   pass
        #else:
        perek = []
        a = re.split(ur'@22([^@]*)', chapter)
        for seif, cont in zip(a[1::2], a[2::2]):
            si = []
            korban =[]
            #print seif
            if ur'[*]' in seif and (os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet))) and netanel < len(fixed):
               # print "hello", seif, netanel, len(fixed)
                if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)):
                    commentator = "Korban Netanel"
                if os.path.isfile('source/PilPula_Charifta_on_{}.txt'.format(masechet)):
                    commentator = "Pilpula Charifta"
                korban.append(fixed[netanel])
                roash = "Rosh on %s." % masechet  +str(len(rosh)+1) + "." + str(len(perek)+1) + ".1"
                netanelink = commentator + " on " +  masechet +"."+ str(len(links_netanel)+1) + ".1"
                #print roash, netanelink
                links.append(link(netanelink, roash))
                netanel += 1
            content = re.split('@66', cont)
            seif = re.sub(ur'[^א-ת]',"", seif)
            seif = hebrew.heb_string_to_int(seif.strip())
            for num, co in enumerate(content):
                a = re.findall('\[\*\]', co)
                for b in a:
                 #   print b, seif
                    if (os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet))) and netanel < len(fixed):
                        if os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)):
                            commentator = "Korban Netanel "
                        if os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
                            commentator = "Pilpula Charifta "
                        korban.append(fixed[netanel])
                        roash = "Rosh on %s." % masechet + str(len(rosh)+1) + "." + str(len(perek)+1) + "." + str(num+1)
                        netanelink = commentator + "on " + masechet + "." + str(len(links_netanel)+1)+ "."+ str(len(korban))
                        #print roash, netanelink
                        links.append(link(netanelink, roash))
                        netanel +=1
                si.append(co)
            if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
                links_netanel.append(korban)
            perek.append(si)
        if len(perek) is not 0:
            rosh.append(perek)