def LIST(murl): date = re.search('(\d+)-(\d{2})-(\d{2})', murl) xurl = 'http://mlb.mlb.com/gdcross/components/game/mlb/year_' + date.group( 1) + '/month_' + date.group(2) + '/day_' + date.group(3) + '/grid.json' dialog = xbmcgui.Dialog() ret = dialog.select('Choose Type', ['Condensed Games', 'Highlights']) if ret == -1: return if ret == 0: link = main.OPENURL(xurl) match = re.compile( '{"id":"([^"]+)","playback_scenario":"FLASH_1800K_.+?","state":"MEDIA_ARCHIVE","type":"condensed_game"}.+?"away_team_name":"([^"]+)".+?"home_team_name":"([^"]+)".+?"home_file_code":"([^"]+)"', re.DOTALL).findall(link) for id, away, home, fname in match: thumb = 'http://mlb.mlb.com/mlb/images/team_logos/logo_' + fname + '_79x76.jpg' mod = id[-3:] url = 'http://m.mlb.com/gen/multimedia/detail/' + mod[ 0] + '/' + mod[1] + '/' + mod[2] + '/' + id + '.xml' main.addPlayMs(away + " at " + home, url, 449, thumb, '', '', '', '', '') if ret == 1: link = main.OPENURL(xurl) match = re.compile( '"away_team_name":"([^"]+)".+?"home_team_name":"([^"]+)".+?"game_pk":"([^"]+)".+?"home_file_code":"([^"]+)"', re.DOTALL).findall(link) for away, home, id, fname in match: thumb = 'http://mlb.mlb.com/mlb/images/team_logos/logo_' + fname + '_79x76.jpg' main.addDir(away + " at " + home, id, 450, thumb)
def extractDate(row): i = 0 matched = False while ((i < len(date_regex)) & (not matched)): date = re.search(date_regex[i], row['text']) if (date != None): row['Day'] = date.group('Day') row['Month'] = date.group('Month') row['Year'] = date.group('Year') matched = True i = i + 1 return row
def treat_date(date): date = re.search(r'(\d+/\d+/\d+)', date) date = date.group(1) day = date.split("/")[1] month = date.split("/")[0] year = '20' + date.split("/")[2] return date, day, month, year
def whatday(date): if date: datetime = date.group(3) year = int(datetime.split('.')[0]) month = int(datetime.split('.')[1]) day = int(datetime.split('.')[2]) whatday = calendar.weekday(year, month, day) return whatday
def whatday(date): if date: datetime = date.group(3) year = int(datetime.split(".")[0]) month = int(datetime.split(".")[1]) day = int(datetime.split(".")[2]) whatday = calendar.weekday(year, month, day) return whatday
def forwardedMailSearch(body): date = re.search(r'Confirmation Date:.*?(\d+/\d+/)(\d+)', body, flags=re.S) confdate = parser.parse(date.group(1) + date.group(2)) flightyear = date.group(2) departureblock = re.findall(r"""([A-Z][a-z]{2}\ [A-Z][a-z]{2}\ \d{1,2}) .*? Depart .*? \(([A-Z]{3})\) .*?at.*? (\d+:\d+\ [AP]M)""" , body, flags=re.DOTALL|re.VERBOSE) flightdate = list(set(departureblock)) departuredate = parser.parse(departureblock[0][0] + ' ' + flightyear) if departuredate - confdate < timedelta(0): flightyear = str(int(flightyear) + 1) return flightyear, flightdate
def getDate(S): ''' return komponen date pada string S ''' date = re.search(date1pattern, S) if not date: date = re.search(date2pattern, S) if (not date): return -1 datelist = date.group().split(' ') datelist[1] = datelist[1][0:3] month = datelist[1] if month.lower() in months_synonym.keys(): datelist[1] = months_synonym[month.lower()] date = " ".join(datelist) dt = datetime.strptime(date, '%d %b %Y') return dt.date() date = date.group() if ('-' in date): date = '/'.join(date.split('-')) dt = datetime.strptime(date, '%d/%m/%Y') return dt.date()
def get_date(self, response): date = self.process_xpath( response, '//div[@id="details"]//div[contains(text(), "RELEASED")]/text()') if date: date = date.get() if "RELEASED" in date: date = re.search(r'(\w+ \d{1,2}, \d{4})', date) if date: return self.parse_date(date.group(1), date_formats=['%B %d, %Y' ]).isoformat() return self.parse_date('today').isoformat()
def addTodayTasks(file): """Add tasks occuring today from a file to the todo list""" rem = todo.getDict(file) for k,v in rem.iteritems(): if verbose: print "%3d: %s" % (k, v) re_date = re.compile(r"{([^}]+)} ") date = re.search(re_date, v) if date: isToday = parseREM(date.group(1)) # date.group(1) = date in Remind format: Wed, 18 +3, Jan 26 +4 if isToday: task = re.sub(re_date, "", v) if taskExists(task): if verbose: print "Exists: " + task continue todo.add(task) else: if verbose: print "No date found for ", v
def toRange(str,pos): global refObj,durs print 'in toRange(), str='+str+', pos='+pos bef=re.match(r'^<',str); aft=re.match(r'^>',str); abt=re.match(r'^\~',str) neg=re.match(r'^-',str); year=re.match(r'^\d{1,4}$',str) month=re.match(r'(\d{1,4})-(\d{1,2})$',str); \ date=re.match(r'^(\d{1,4})-(\d{1,2})-(\d{1,2})',str) dur=re.match(r'\d{1,3}(d|m|y)$',str); \ ref=re.match(r'([<,>,~])?(e\d{1,3})\.?(\w{1,2})',str) if (ref): refObj={"o":'"'+ref.group(1)+'"', "ref":ref.group(2), "pos": ref.group(3)} screwthis=refObj return refObj if (bef or aft or abt): print 'an operator, must parse ' + str # periods[12]['tSpans'][0]['e'] # >e8.e elif (dur): #durs +=1 print dur.group(); # get s:, which could be date, month or year elif (year): if pos == 's': s=gcal2jd(year.group()+',01,01')[0]+gcal2jd(year.group()+',01,01')[1] ls=gcal2jd(year.group()+',01,31')[0]+gcal2jd(year.group()+',01,31')[1] rangeObj={"s":s,"ls":ls} elif pos == 'e': ee=gcal2jd(year.group()+',01,01')[0]+gcal2jd(year.group()+',01,01')[1] e=gcal2jd(year.group()+',1,31')[0]+gcal2jd(year.group()+',1,31')[1] rangeObj={"ee":ee,"e":e} elif (month): if pos == 's': s=gcal2jd(month.group()+',01')[0]+gcal2jd(month.group()+',01')[1] ls=gcal2jd(month.group()+',31')[0]+gcal2jd(month.group()+',31')[1] rangeObj={"s":s,"ls":ls} elif pos == 'e': ee=gcal2jd(month.group()+',01')[0]+gcal2jd(month.group()+',01')[1] e=gcal2jd(month.group()+',31')[0]+gcal2jd(month.group()+',31')[1] rangeObj={"ee":ee,"e":e} elif (date): print 'it\'s a date' foo=gcal2jd(date.group(1),date.group(2),date.group(3))[0] + \ gcal2jd(date.group(1),date.group(2),date.group(3))[1] if pos == 's': rangeObj={"s":foo} elif pos == 'ls': rangeObj={"ls":foo} elif pos == 'e': rangeObj={"e":foo} elif pos == 'ee': rangeObj={"ee":foo} return rangeObj
def get_date(self, response): datestring = self.process_xpath(response, self.get_selector_map('date')) if datestring: datestring = datestring.get().replace(r"\u002F", "/") date = re.search(self.get_selector_map('re_date'), datestring) if not date: date = re.search(r'active_from=\"(\d{4}-\d{2}-\d{2})', datestring) if not date: date = re.search(r'active_from:\"(\d{1,2}/\d{1,2}/\d{2})', datestring) if date: date = date.group(1) return self.parse_date(date, date_formats=['%Y-%m-%d', '%m/%d/%Y']).isoformat() return self.parse_date('today').isoformat() return None
def addTodayTasks(file): """Add tasks occuring today from a file to the todo list""" rem = todo.getDict(file) for k, v in rem.iteritems(): if verbose: print "%3d: %s" % (k, v) re_date = re.compile(r"{([^}]+)} ") date = re.search(re_date, v) if date: isToday = parseREM( date.group(1) ) # date.group(1) = date in Remind format: Wed, 18 +3, Jan 26 +4 if isToday: task = re.sub(re_date, "", v) if taskExists(task): if verbose: print "Exists: " + task continue todo.add(task) else: if verbose: print "No date found for ", v
def LIST(murl): date=re.search('(\d+)-(\d{2})-(\d{2})',murl) xurl='http://mlb.mlb.com/gdcross/components/game/mlb/year_'+date.group(1)+'/month_'+date.group(2)+'/day_'+date.group(3)+'/grid.json' dialog = xbmcgui.Dialog() ret = dialog.select('Choose Type', ['Condensed Games','Highlights']) if ret == -1: return if ret == 0: link=main.OPENURL(xurl) match = re.compile('{"id":"([^"]+)","playback_scenario":"FLASH_1800K_.+?","state":"MEDIA_ARCHIVE","type":"condensed_game"}.+?"away_team_name":"([^"]+)".+?"home_team_name":"([^"]+)".+?"home_file_code":"([^"]+)"',re.DOTALL).findall(link) for id, away,home,fname in match: thumb='http://mlb.mlb.com/mlb/images/team_logos/logo_'+fname+'_79x76.jpg' mod=id[-3:] url='http://m.mlb.com/gen/multimedia/detail/'+mod[0]+'/'+mod[1]+'/'+mod[2]+'/'+id+'.xml' main.addPlayMs(away+" at "+home,url,449,thumb,'','','','','') if ret==1: link=main.OPENURL(xurl) match = re.compile('"away_team_name":"([^"]+)".+?"home_team_name":"([^"]+)".+?"game_pk":"([^"]+)".+?"home_file_code":"([^"]+)"',re.DOTALL).findall(link) for away,home,id,fname in match: thumb='http://mlb.mlb.com/mlb/images/team_logos/logo_'+fname+'_79x76.jpg' main.addDir(away+" at "+home,id,450,thumb)
# Generate dummy random files at PIC-INJECT all_path = random_line(filename, rses, number=15) #print(rses, all_path, 'this is list', len(all_path)) print(rses, 'this is list', len(all_path)) i = 1 for x in range(len(all_path)): path = all_path[x] import time from datetime import date try: date = re.search('\d{4}_\d{2}_\d{2}', path) date = datetime.strptime(date.group(), '%Y_%m_%d').date() date = date.strftime('%Y_%m_%d') today = str(time.strftime('%Y_%m_%d')) path = os.path.join('/', path.replace(date, today)) except: pass try: base, name = os.path.split(path) file_name = re.split(r'[`\-=~!@#$%^&*()_+\[\]{};\'\\:"|<,./<>?]', name) date = datetime.strptime(file_name[0], "%Y%m%d").date() date = date.strftime('%Y%m%d') today = str(time.strftime('%Y%m%d')) path = os.path.join('/', path.replace(date, today)) except: pass
def toRange(str, pos): global refObj, durs print 'in toRange(), str=' + str + ', pos=' + pos bef = re.match(r'^<', str) aft = re.match(r'^>', str) abt = re.match(r'^\~', str) neg = re.match(r'^-', str) year = re.match(r'^\d{1,4}$', str) month = re.match(r'(\d{1,4})-(\d{1,2})$', str) \ date=re.match(r'^(\d{1,4})-(\d{1,2})-(\d{1,2})',str) dur = re.match(r'\d{1,3}(d|m|y)$', str) \ ref=re.match(r'([<,>,~])?(e\d{1,3})\.?(\w{1,2})',str) if (ref): refObj = { "o": '"' + ref.group(1) + '"', "ref": ref.group(2), "pos": ref.group(3) } screwthis = refObj return refObj if (bef or aft or abt): print 'an operator, must parse ' + str # periods[12]['tSpans'][0]['e'] # >e8.e elif (dur): #durs +=1 print dur.group() # get s:, which could be date, month or year elif (year): if pos == 's': s = gcal2jd(year.group() + ',01,01')[0] + gcal2jd(year.group() + ',01,01')[1] ls = gcal2jd(year.group() + ',01,31')[0] + gcal2jd(year.group() + ',01,31')[1] rangeObj = {"s": s, "ls": ls} elif pos == 'e': ee = gcal2jd(year.group() + ',01,01')[0] + gcal2jd(year.group() + ',01,01')[1] e = gcal2jd(year.group() + ',1,31')[0] + gcal2jd(year.group() + ',1,31')[1] rangeObj = {"ee": ee, "e": e} elif (month): if pos == 's': s = gcal2jd(month.group() + ',01')[0] + gcal2jd(month.group() + ',01')[1] ls = gcal2jd(month.group() + ',31')[0] + gcal2jd(month.group() + ',31')[1] rangeObj = {"s": s, "ls": ls} elif pos == 'e': ee = gcal2jd(month.group() + ',01')[0] + gcal2jd(month.group() + ',01')[1] e = gcal2jd(month.group() + ',31')[0] + gcal2jd(month.group() + ',31')[1] rangeObj = {"ee": ee, "e": e} elif (date): print 'it\'s a date' foo=gcal2jd(date.group(1),date.group(2),date.group(3))[0] + \ gcal2jd(date.group(1),date.group(2),date.group(3))[1] if pos == 's': rangeObj = {"s": foo} elif pos == 'ls': rangeObj = {"ls": foo} elif pos == 'e': rangeObj = {"e": foo} elif pos == 'ee': rangeObj = {"ee": foo} return rangeObj
async def user(peer_id: int, command: str, msg_id: int): if command.lower().startswith(".л кто"): history = vk.method('messages.getHistory', { 'count': 1, 'peer_id': peer_id, 'rev': 0 }) user_id = history['items'][0]['reply_message']['from_id'] url = f'https://vk.com/foaf.php?id={user_id}' response = (urllib.request.urlopen(url)).read() soup = BeautifulSoup(response, "html.parser") date = re.search('<ya:created dc:date="(.+)"></ya:created>', str(soup)) date = (date.group().split('"')[1]) dt = datetime.strptime(date, "%Y-%m-%dT%H:%M:%S%z") new_date = datetime.strftime(dt, '%d.%m.%Y') date_reg = str(new_date) user = vk.method( 'users.get', { 'user_ids': user_id, 'fields': 'photo_50,status,bdate,blacklisted_by_me,' 'blacklisted,photo_max_orig,is_friend,' 'last_name_abl,first_name_abl,domain,' 'city,followers_count,last_seen,online,sex,is_closed' })[0] city_name: str = user.get('city', {}).get('title', "Мухосранск") followers: str = user.get('followers_count', "Их нет...") date_dr: str = user.get('bdate', "Ты когда родился?0_о") platform = user.get('last_seen', {}).get('platform', "Не официальное ПО") user["blacklisted_by_me"] = b2s(user["blacklisted_by_me"]) user["blacklisted"] = b2s(user["blacklisted"]) user["is_closed"] = b2s(user["is_closed"]) user["is_friend"] = b2s(user["is_friend"]) user["online"] = 'Online' if user["online"] else 'Offline' if user['sex'] == 1: user['sex'] = "👱♀️" elif user['sex'] == 2: user['sex'] = "👨" else: user["sex"] = "Ламинат" msg = f""" Информация о {user["first_name_abl"]} {user["last_name_abl"]} {user["online"]}, {platform} ID: {user["id"]} Короткая ссылка: {user["domain"]} Имя: {user["first_name"]} Фамилия: {user["last_name"]} Дата регистрации: {date_reg} Дата рождение: {date_dr} Город: {city_name} Друзья: {user["is_friend"]} Подписчики: {followers} Пол: {user["sex"]} Закрытый прoфиль: {user["is_closed"]} Статус: {user["status"]} Я в чс: {user["blacklisted"]} Он в чс: {user["blacklisted_by_me"]} Фото: {user["photo_max_orig"]} """.replace(' ', '') edit_msg(peer_id, msg, msg_id)
def update_db(): # c.execute('DROP TABLE personnages') # c.execute('DROP TABLE serveurs') # c.execute('DROP TABLE utilisateurs') # c.execute('DROP TABLE appartenances_serveurs') i = 0 anime = [] try: for row in c.execute('SELECT distinct origine FROM personnages'): anime.append(row[0]) for x in c.execute('SELECT max(id) FROM personnages'): i = x[0] + 1 except sqlite3.OperationalError: print("create personnages") c.execute( '''CREATE TABLE personnages(id integer PRIMARY KEY, nom text, origine text,jour integer, mois integer, annee integer, signe text)''' ) serveurs = [] try: for row in c.execute('SELECT count(id) FROM serveurs'): print(f"Serveurs : {row[0]}") except sqlite3.OperationalError: print("create serveurs") c.execute( '''CREATE TABLE serveurs(id integer PRIMARY KEY, canal integer)''') for row in c.execute('SELECT id FROM serveurs'): serveurs.append(row[0]) for guild in bot.guilds: if guild.id not in serveurs: c.execute( f'INSERT INTO serveurs VALUES ({guild.id},{guild.channels[0].id})' ) try: for row in c.execute('SELECT count(id) FROM utilisateurs'): print(f"Utilisateurs : {row[0]}") except sqlite3.OperationalError: print("create utilisateurs") c.execute( '''CREATE TABLE utilisateurs(id integer PRIMARY KEY, jour integer, mois integer)''' ) try: for row in c.execute( 'SELECT count(id_util) FROM appartenances_serveurs'): print(f"Liens : {row[0]}") except sqlite3.OperationalError: print("create appartenance_serveur") c.execute( '''CREATE TABLE appartenances_serveurs(id_serveur integer, id_util, PRIMARY KEY(id_serveur,id_util))''' ) sheets = client.open("Tableau Date de Naissance").worksheets() db = [] nb_anime = 0 nb_perso = 0 for sh in sheets: delta = time.time() title = sh.title if title not in anime and title not in ["Front", "Template"]: nb_anime += 1 for x in sh.get_all_values(): if x[0] == "Personnage" or x[0] == "" or x[1] == "" or x[ 1] == "???": continue date = re.search("([0-9]{1,2})/([0-9]{1,2})(/([0-9]{4}))?", x[1]) if date is None: continue jour = date.group(1) mois = date.group(2) annee = date.group(4) if annee is None: annee = 2019 row = (i, x[0], title, jour, mois, annee, x[2]) db.append(row) i += 1 nb_perso += 1 print(f"{title} : {time.time()-delta}") while time.time() - delta <= 1.05: time.sleep(.01) c.executemany('INSERT INTO personnages VALUES (?,?,?,?,?,?,?)', db) conn.commit() print("fini") return [nb_anime, nb_perso]
def extract_text_from_pdf(pdf_path): resource_manager = PDFResourceManager() fake_file_handle = io.StringIO() converter = TextConverter(resource_manager, fake_file_handle) page_interpreter = PDFPageInterpreter(resource_manager, converter) with open(pdf_path, 'rb') as fh: for page in PDFPage.get_pages(fh, caching=True, check_extractable=True): page_interpreter.process_page(page) text = fake_file_handle.getvalue() converter.close() fake_file_handle.close() print(text) print('\n\n') if "Paramount Trading Corporation" in text: PO = re.search("PO(.*?)Invoice", text) PO = PO.group() PO = PO.replace("PO Ref : ", " ") PO = PO.replace(" Invoice", " ") print(PO) date = re.search("Date :.{8}", text) date = date.group() date = date.replace("Date :", " ") print(date) name = "Paramount Trading Corporation" print(name) add = re.search("Billing Address (.*?)Date", text) add = add.group().replace("Billing Address ", " ").replace("Date", " ") print(add) inv = re.search("Invoice No(.*?)%", text) inv = inv.group().replace("Invoice No:- ", " ").replace("%", " ") print(inv) #cpan = re.search("Customer PAN (.*?)Ship",text) #cpan = cpan.group().replace("Customer PAN No"," ").replace("Ship"," ") #print(cpan) #cgst = re.search("Customer GST (.*?)Customer", text) #cgst = cgst.group().replace("Customer GST No"," ").replace("Customer"," ") #print(cgst) gst = re.search("GST No : (.*?)PAN", text) gst = gst.group().replace("GST No : ", " ").replace("PAN", " ") gst = gst.replace("Paramount Trading Corporation ", " ") print(gst) pan = re.search("PAN No : (.*?)Declaration", text) pan = pan.group().replace("PAN No : ", " ").replace("Declaration", " ") print(pan) total = re.search("18%.{300}", text) total = total.group().split(".") total = total[1][2:] + "." + total[2][:2] print(total) tax = re.search("18%.{300}", text) tax = tax.group().split(".") tax[0] = tax[0].replace("18%", " ") tax = tax[0] + "." + tax[1][:2] print(tax) des = re.search("Paramount Trading Corporation(.*?)#8", text) des = des.group() des = des.replace("Description", " ") des = des.replace("#8", " ") des = des.replace("Commercial Invoice", " ") des = des.replace("Shipping Method", " ") des = des.replace("Mode of Payment", " ") des = des.replace("Shipment Date", " ") des = des.replace("Hero MotoCorp Ltd.C/o", " ") des = des.replace( "The Grand New Delhi, Nelson Mandel Road, Vasant Kunj, Phase IINew Delhi, India. Pin - 110070", " ") des = des.replace("Contact : Avinash +919557971063", " ") des = des.replace("Total", " ") des = des.replace("Paramount Trading Corporation", " ") des = des.replace("Road", " ") des = des.replace("11th June 2019", " ") des = des.replace("Hero MotoCorp Ltd.", " ") des = des.replace("Customer PO Ref : ", " ") des = des.replace(PO, " ") des = des.replace("Invoice No:- ", " ") des = des.replace("GST No : ", " ") des = des.replace(gst, " ") des = des.replace("PAN No : ", " ") des = des.replace("%", " ") des = des.replace( "Declaration:We declare that this invoice shows the actual price of the goodsdescribed and that all particulars are true and correct.", " ") des = des.replace("Authorised Signatory", " ") des = des.replace("advance balance 60 ", " ") des = des.replace("against delivery", " ") des = des.replace(inv, " ") des = des.replace(pan, " ") des = des.replace("(round off)", " ") print(des) elif "SONATA" in text: PO = re.search("Cust PO Ref & Date(.*?)/", text) PO = PO.group().replace("Cust PO Ref & Date: ", " ").replace("/", " ") print(PO) date = re.search("Invoice Date: (.*?)BILL", text) date = date.group().replace("Invoice Date: ", " ").replace("BILL", " ") print(date) name = "SONATA INFORMATION TECHNOLOGY LIMITED" print(name) add = re.search("INVOICESONATA INFORMATION TECHNOLOGY LIMITED(.*?)TEL", text) add = add.group().replace( "INVOICESONATA INFORMATION TECHNOLOGY LIMITED", " ").replace("TEL", " ") print(add) inv = re.search("Invoice No.:(.*?)Invoice", text) inv = inv.group().replace("Invoice No.:", " ").replace("Invoice", " ") print(inv) gst = re.search("GSTIN : (.*?)PAN", text) gst = gst.group().replace("GSTIN : ", " ").replace("PAN", " ") print(gst) pan = re.search("Our PAN is (.*?)and", text) pan = pan.group().replace("Our PAN is ", " ").replace("and", " ") print(pan) total = re.search("Total Invoice Value (.*?)of", text) total = total.group().split(".") total[0] = total[0].replace("Total Invoice Value ", " ") total = total[0] + "." + total[1][:2] print(total) tax = re.search("Total Tax Value(.*?)Total", text) tax = tax.group().replace("Total Tax Value", " ").replace("Total", " ") print(tax) des = re.search("Description of Goods/Services(.*?)Each", text) des = des.group() des = des.replace("Description of Goods/Services", " ") des = des.replace("Each", " ") des = des.replace("Qty", " ") des = des.replace("UOM", " ") des = des.replace("Rate", " ") des = des.replace("(INR)", " ") des = des.replace("Amount", " ") print(des) elif "Concoct Human Resources Practitioners India" in text: PO = re.search("eWay Bill No#.{300}", text) PO = PO.group().split(" ") PO = PO[13] print(PO) date = re.search("eWay Bill No#.{300}", text) date = date.group().split(" ") date = date[12] print(date) name = "Concoct Human Resources Practitioners India" print(name) add = re.search("#(.*?)Proforma", text) add = add.group().replace("Proforma", " ") print(add) inv = re.search("Invoice No: (.*?)PAN", text) inv = inv.group().replace("Invoice No: ", " ").replace("PAN", " ") print(inv) gst = re.search("IGST No#:(.*?)IEC", text) gst = gst.group().replace("IGST No#:", " ").replace("IEC", " ") print(gst) pan = re.search("PAN No: (.*?)GSTIN", text) pan = pan.group().replace("PAN No: ", " ").replace("GSTIN", " ") print(pan) total = re.search("Total Inc. of GST @ 18%(.*?)Amount", text) total = total.group().replace("Total Inc. of GST @ 18%", " ").replace("Amount", " ") print(total) tax = "Not given separately" print(tax) des = re.search("Particulars(.*?)Total", text) des = des.group() des = des.replace("Particulars", " ") des = des.replace("Product", " ") des = des.replace("S/N", " ") des = des.replace("No# of Units", " ") des = des.replace("Price Per Unit", " ") des = des.replace("GST @ 18%", " ") des = des.replace("Amount", " ") des = des.replace("(INR)", " ") des = des.split(".") #des = re.findall("[a-z]",des) l = len(des) for i in range(0, l - 1): if "Unit" in des[i]: desi = des[i].split("Unit") desi = desi[0] print(desi) elif "MicroGenesis CADSoft" in text: PO = "Not given" print(PO) date = re.search("Despatched throughDated(.*?)Mode", text) date = date.group().replace("Despatched throughDated", " ").replace("Mode", " ") print(date) name = "MicroGenesis CADSoft" print(name) add = re.search("MicroGenesis CADSoft(.*?)MSMED", text) add = add.group().replace("MSMED", " ").replace("MicroGenesis CADSoft Pvt Ltd", " ") print(add) inv = re.search("Invoice No.(.*?)Delivery", text) inv = inv.group().replace("Invoice No.", " ").replace("Delivery", " ") print(inv) gst = re.search("GSTIN/UIN:(.*?)State", text) gst = gst.group().replace("GSTIN/UIN:", " ").replace("State", " ") print(gst) pan = re.search("Company's PAN :(.*?)Dec", text) pan = pan.group().replace("Company's PAN :", " ").replace("Dec", " ") print(pan) total = re.search("Total₹(.*?)No", text) total = total.group().replace("Total", " ").replace("No", " ") print(total) tax = re.search("IGST @ 18%(.*?)%", text) tax = tax.group().replace("IGST @ 18%", " ").replace("%", " ") print(tax) des = re.search("SACNo.Services(.*?)No", text) des = des.group().replace("SACNo.Services", " ").replace("No", " ") print(des)