def parse1(text): if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)): nose_kelim = nosekelim.open_file() fixed = nosekelim.parse(nose_kelim) links_netanel = [] netanel = 0 rosh = [] chapters = re.split(ur'@00', text) for chapter_num, chapter in enumerate(chapters): print chapter_num, chapter[0:10] if len(chapter)<=1: pass else: perek = [] a = re.split(ur'@22([^@]*)', chapter) for seif, cont in zip(a[1::2], a[2::2]): si = [] korban =[] print seif if ur'[*]' in seif: print "hello1" if ur'[*]' in seif and (os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet))) and netanel <= len(fixed): print "hello", seif, netanel, len(fixed) if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)): commentator = "Korban Netanel" if os.path.isfile('source/PilPula_Charifta_on_{}.txt'.format(masechet)): commentator = "Pilpula Charifta" korban.append(fixed[netanel]) roash = "Rosh on %s." % masechet +str(len(rosh)+2) + "." + str(len(perek)+1) + ".1" netanelink = commentator + " on " + masechet +"."+ str(len(links_netanel)+1) + ".1" print roash, netanelink links.append(link(netanelink, roash)) netanel += 1 content = re.split('@66', cont) seif = re.sub(ur'[^א-ת]',"", seif) seif = hebrew.heb_string_to_int(seif.strip()) for num, co in enumerate(content): a = re.findall('\[\*\]', co) for b in a: print b, seif if (os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet))) and netanel < len(fixed): if os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)): commentator = "Korban Netanel " if os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)): commentator = "Pilpula Charifta " korban.append(fixed[netanel]) roash = "Rosh on %s." % masechet + str(len(rosh)+2) + "." + str(len(perek)+1) + "." + str(num+1) netanelink = commentator + "on " + masechet + "." + str(len(links_netanel)+1)+ "."+ str(len(korban)) print roash, netanelink links.append(link(netanelink, roash)) netanel +=1 si.append(co) if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)): links_netanel.append(korban) perek.append(si) rosh.append(perek)
def parse(text): if os.path.isfile("../source/Korban_Netanel_on_{}.txt".format(masechet)) or os.path.isfile( "../source/Pilpula_Charifta_on_{}.txt".format(masechet) ): # print "has korban netanel 2" nose_kelim = nosekelim.open_file() fixed = nosekelim.parse(nose_kelim) links_netanel = [] netanel = 0 rosh = [] a = re.split(ur"@22([^@]*)", text) for seif, cont in zip(a[1::2], a[2::2]): si = [] korban = [] if ur"[*]" in seif and ( os.path.isfile( "../source/Korban_Netanel_on_{}.txt".format(masechet) or os.path.isfile("../source/PilPula_Charifta_on_{}.txt".format(masechet)) ) and netanel <= len(fixed) ): if os.path.isfile("../source/Korban_Netanel_on_{}.txt".format(masechet)): commentator = "Korban Netanel on " if os.path.isfile("../source/PilPula_Charifta_on_{}.txt".format(masechet)): commentator = "Pilpula Charifta on " korban.append(fixed[netanel]) # print len(links_netanel) roash = "Rosh on %s." % masechet + str(len(links_netanel) + 1) + ".1" netanelink = commentator + masechet + "." + str(len(links_netanel) + 1) + ".1" links.append(link(netanelink, roash)) netanel += 1 # print "netanel one seif", seif, netanel # print fixed[netanel] content = re.split("@66", cont) seif = re.sub(ur"[^א-ת]", "", seif) seif = hebrew.heb_string_to_int(seif.strip()) for num, co in enumerate(content): if ur"[*]" in co: # print co a = re.findall("\[\*\](.{6})", co) for b in a: if ( os.path.isfile("../source/Korban_netanel_on_{}.txt".format(masechet)) or os.path.isfile("../source/Pilpula_Charifta_on_{}.txt".format(masechet)) ) and netanel < len(fixed): if os.path.isfile("../source/Korban_netanel_on_{}.txt".format(masechet)): commentator = "Korban Netanel " if os.path.isfile("../source/Pilpula_Charifta_on_{}.txt".format(masechet)): commentator = "Pilpula Charifta " korban.append(fixed[netanel]) roash = "Rosh on %s." % masechet + str(len(links_netanel) + 1) + "." + str(num + 1) netanelink = ( commentator + "on " + masechet + "." + str(len(links_netanel) + 1) + "." + str(len(korban)) ) links.append(link(netanelink, roash)) netanel += 1 si.append(co)
def parse(text): if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)): # print "has korban netanel 2" nose_kelim = nosekelim.open_file() fixed = nosekelim.parse(nose_kelim) links_netanel = [] netanel = 0 rosh = [] a = re.split(ur'@22([^@]*)', text) for seif, cont in zip(a[1::2], a[2::2]): si = [] korban =[] if ur'[*]' in seif and (os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet) or os.path.isfile('source/PilPula_Charifta_on_{}.txt'.format(masechet))) and netanel <= len(fixed)): if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)): commentator = "Korban Netanel on " if os.path.isfile('source/PilPula_Charifta_on_{}.txt'.format(masechet)): commentator = "Pilpula Charifta on " korban.append(fixed[netanel]) #print len(links_netanel) roash = "Rosh on %s." % masechet + str(len(links_netanel)+1) + ".1" netanelink = commentator + masechet +"."+ str(len(links_netanel)+1) + ".1" links.append(link(netanelink, roash)) netanel += 1 #print "netanel one seif", seif, netanel #print fixed[netanel] content = re.split('@66', cont) seif = re.sub(ur'[^א-ת]',"", seif) seif = hebrew.heb_string_to_int(seif.strip()) for num, co in enumerate(content): if ur'[*]' in co: print co a = re.findall('\[\*\](.{6})', co) for b in a: if (os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet))) and netanel < len(fixed): if os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)): commentator = "Korban Netanel " if os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)): commentator = "Pilpula Charifta " korban.append(fixed[netanel]) roash = "Rosh on %s." % masechet + str(len(links_netanel)+1) + "." + str(num+1) netanelink = commentator + "on " +masechet + "." + str(len(links_netanel)+1)+ "."+ str(len(korban)) links.append(link(netanelink, roash)) netanel +=1 si.append(co)
def parse1(text): old_numeri = 0 if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format( masechet)) or os.path.isfile( 'source/Pilpula_Charifta_on_{}.txt'.format(masechet)): nose_kelim = nosekelim.open_file() fixed = nosekelim.parse(nose_kelim) links_netanel = [] netanel = 0 rosh = [] chapters = re.split(ur'(?:@00|@99)([^@]*)', text) for chapter_num, chapter in zip(chapters[1::2], chapters[2::2]): mispar = chapter_num.strip().split(" ")[1] if mispar.encode('utf-8') in misparim.keys(): mispar_numeri = misparim[mispar.encode('utf-8')] print mispar_numeri if mispar_numeri - old_numeri > 1: for i in range(1, mispar_numeri - old_numeri): rosh.append([]) #print "length of rosh", len(rosh) old_numeri = mispar_numeri print mispar #if len(chapter)<=1: # pass #else: perek = [] a = re.split(ur'@22([^@]*)', chapter) for seif, cont in zip(a[1::2], a[2::2]): si = [] korban = [] #print seif if ur'[*]' in seif and ( os.path.isfile( 'source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile( 'source/Pilpula_Charifta_on_{}.txt'.format(masechet)) ) and netanel < len(fixed): # print "hello", seif, netanel, len(fixed) if os.path.isfile( 'source/Korban_Netanel_on_{}.txt'.format(masechet)): commentator = "Korban Netanel" if os.path.isfile( 'source/PilPula_Charifta_on_{}.txt'.format(masechet)): commentator = "Pilpula Charifta" korban.append(fixed[netanel]) roash = "Rosh on %s." % masechet + str( len(rosh) + 1) + "." + str(len(perek) + 1) + ".1" netanelink = commentator + " on " + masechet + "." + str( len(links_netanel) + 1) + ".1" #print roash, netanelink links.append(link(netanelink, roash)) netanel += 1 content = re.split('@66', cont) seif = re.sub(ur'[^א-ת]', "", seif) seif = hebrew.heb_string_to_int(seif.strip()) for num, co in enumerate(content): a = re.findall('\[\*\]', co) for b in a: # print b, seif if (os.path.isfile( 'source/Korban_netanel_on_{}.txt'.format(masechet)) or os.path.isfile( 'source/Pilpula_Charifta_on_{}.txt'.format( masechet))) and netanel < len(fixed): if os.path.isfile( 'source/Korban_netanel_on_{}.txt'.format( masechet)): commentator = "Korban Netanel " if os.path.isfile( 'source/Pilpula_Charifta_on_{}.txt'.format( masechet)): commentator = "Pilpula Charifta " korban.append(fixed[netanel]) roash = "Rosh on %s." % masechet + str( len(rosh) + 1) + "." + str(len(perek) + 1) + "." + str(num + 1) netanelink = commentator + "on " + masechet + "." + str( len(links_netanel) + 1) + "." + str(len(korban)) #print roash, netanelink links.append(link(netanelink, roash)) netanel += 1 si.append(co) if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format( masechet)) or os.path.isfile( 'source/Pilpula_Charifta_on_{}.txt'.format(masechet)): links_netanel.append(korban) perek.append(si) if len(perek) is not 0: rosh.append(perek)
def parse1(text): old_numeri = 0 if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)): nose_kelim = nosekelim.open_file() fixed = nosekelim.parse(nose_kelim) links_netanel = [] netanel = 0 rosh = [] chapters = re.split(ur'(?:@00|@99)([^@]*)', text) for chapter_num, chapter in zip(chapters[1::2], chapters[2::2]): mispar = chapter_num.strip().split(" ")[1] if mispar.encode('utf-8') in misparim.keys(): mispar_numeri = misparim[mispar.encode('utf-8')] print mispar_numeri if mispar_numeri - old_numeri > 1: for i in range(1,mispar_numeri-old_numeri): rosh.append([]) #print "length of rosh", len(rosh) old_numeri = mispar_numeri print mispar #if len(chapter)<=1: # pass #else: perek = [] a = re.split(ur'@22([^@]*)', chapter) for seif, cont in zip(a[1::2], a[2::2]): si = [] korban =[] #print seif if ur'[*]' in seif and (os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet))) and netanel < len(fixed): # print "hello", seif, netanel, len(fixed) if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)): commentator = "Korban Netanel" if os.path.isfile('source/PilPula_Charifta_on_{}.txt'.format(masechet)): commentator = "Pilpula Charifta" korban.append(fixed[netanel]) roash = "Rosh on %s." % masechet +str(len(rosh)+1) + "." + str(len(perek)+1) + ".1" netanelink = commentator + " on " + masechet +"."+ str(len(links_netanel)+1) + ".1" #print roash, netanelink links.append(link(netanelink, roash)) netanel += 1 content = re.split('@66', cont) seif = re.sub(ur'[^א-ת]',"", seif) seif = hebrew.heb_string_to_int(seif.strip()) for num, co in enumerate(content): a = re.findall('\[\*\]', co) for b in a: # print b, seif if (os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet))) and netanel < len(fixed): if os.path.isfile('source/Korban_netanel_on_{}.txt'.format(masechet)): commentator = "Korban Netanel " if os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)): commentator = "Pilpula Charifta " korban.append(fixed[netanel]) roash = "Rosh on %s." % masechet + str(len(rosh)+1) + "." + str(len(perek)+1) + "." + str(num+1) netanelink = commentator + "on " + masechet + "." + str(len(links_netanel)+1)+ "."+ str(len(korban)) #print roash, netanelink links.append(link(netanelink, roash)) netanel +=1 si.append(co) if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)) or os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)): links_netanel.append(korban) perek.append(si) if len(perek) is not 0: rosh.append(perek)