def offset_link(html_str, url, querystring, court_name): try: if not parse_html(html_str, court_name): return False querystring['sort_by'] = "1" querystring['etal'] = "-1" soup = BeautifulSoup(html_str, "html.parser") div_tag = soup.find_all('div', {'class': 'browse_range'})[0] total_records = int(re.findall('\d+', str(div_tag.text))[-1]) total_calls = ceil(total_records/200) next_num = 0 for page_link in range(0, total_calls): next_num += 200 emergency_exit = select_one_query("SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True querystring['offset'] = str(next_num) response = requests.request("GET", url, headers=headers, params=querystring, proxies=proxy_dict) res = response.text if not parse_html(res, court_name): logging.error("Failed for url: " + str(next_num)) return False return True except Exception as e: logging.error("Error in offset_link. %s", e) return False
def request_data(court_name, bench, start_date, end_date_): try: url = base_url + "/tribunalorders" headers = { 'Content-Type': "application/x-www-form-urlencoded", 'Cache-Control': "no-cache" } i = 0 while True: i += 1 emergency_exit = select_one_query("SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True end_date = (datetime.datetime.strptime(str(start_date), "%d/%m/%Y") + datetime.timedelta(days=1) ).strftime("%d/%m/%Y") if datetime.datetime.strptime(str(end_date_), "%d/%m/%Y") + datetime.timedelta(days=1) < \ datetime.datetime.strptime(str(end_date), "%d/%m/%Y"): logging.error("DONE") break update_query("UPDATE Tracker SET Start_Date = '" + str(start_date) + "', End_Date = '" + str(end_date) + "' WHERE Name = '" + str(court_name) + "'") payload = "bench=" + str(bench) + \ "&appeal_type=" \ "&hearingdate=" \ "&pronouncementdate=" \ "&orderdate=" + str(start_date) + \ "&member=" \ "&assesseename=" response = requests.request("POST", url, data=payload, headers=headers, verify=False, proxies=proxy_dict) res = response.text if res is None: logging.error("NO data Found.") update_query("UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") start_date = end_date continue if not parse_html(res, court_name, bench): logging.error("Failed to parse data from bench: " + str(bench)) start_date = end_date return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from bench: " + str(bench)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, headers, start_date, end_date_): try: url = base_url + "coram-reported-judgment.php" i = 0 while True: i += 1 emergency_exit = select_one_query("SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True end_date = (datetime.datetime.strptime(str(start_date), "%d-%m-%Y") + datetime.timedelta(days=1) ).strftime("%d-%m-%Y") if datetime.datetime.strptime(str(end_date_), "%d-%m-%Y") + datetime.timedelta(days=1) < \ datetime.datetime.strptime(str(end_date), "%d-%m-%Y"): logging.error("DONE") break update_query("UPDATE Tracker SET Start_Date = '" + str(start_date) + "', End_Date = '" + str(end_date) + "' WHERE Name = '" + str(court_name) + "'") payload = "coram=0" \ "&ojtype=1" \ "&bench_type=0" \ "&reported=Y" \ "&startdate=" + str(start_date) + \ "&enddate=" + str(end_date) + \ "&coramqueryreported=0" response = requests.request("POST", url, data=payload, headers=headers, proxies=proxy_dict) res = response.text if "NO ROWS" in res.upper(): logging.error("NO data Found for start date: " + str(start_date)) update_query("UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") start_date = end_date continue if not offset_link(res, payload, court_name, headers): logging.error("Failed to parse data from date: " + str(start_date)) start_date = end_date return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, start_date, end_date_): try: headers = { 'Cache-Control': "no-cache", } if int(start_date[-2:]) < 10: update_query("UPDATE Tracker SET status = 'IN_NO_DATA_FOUND', emergency_exit=true WHERE Name = '" + str(court_name) + "'") if int(end_date_[-2:]) < 10: update_history_tracker(court_name) return True for month_year in month_list_([str(start_date), str(end_date_)]): month_year = date_fix(month_year) emergency_exit = select_one_query("SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True url = base_url + "JDMT" + str(month_year) + ".html" update_query("UPDATE Tracker SET Start_Date = '" + str(month_year) + "', End_Date = '" + str(end_date_) + "' WHERE Name = '" + str(court_name) + "'") response = requests.request("GET", url, headers=headers, proxies=proxy_dict) res = response.text if "file or directory not found" in res.upper(): logging.error("NO data Found for start date: " + str(month_year)) update_query("UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") continue if str(month_year[-2:]) == '10' or str(month_year) == 'Jan11': if not parse_html(res, court_name, True): logging.error("Failed to parse data from date: " + str(month_year)) else: if not parse_html(res, court_name, False): logging.error("Failed to parse data from date: " + str(month_year)) return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, bench, start_date, end_date_): try: for year in range(start_date, end_date_ + 1): if int(year) < 2010 or int(year) > 2016: logging.error("NO data Found for start date: " + str(start_date)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") continue section_types = ['111_111_A', '397_398', 'Others'] for section_type in section_types: child_url = str(bench) + '/' + str(year) + '/' + str( section_type) + '/' url = base_url + child_url + 'index.html' emergency_exit = select_one_query( "SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True update_query("UPDATE Tracker SET Start_Date = '" + str(year) + "', End_Date = '" + str(year) + "' WHERE Name = '" + str(court_name) + "'") response = requests.request("GET", url, proxies=proxy_dict) res = response.text if res is None: logging.error("NO data Found for year: " + str(year)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") continue if not parse_html(res, court_name, bench, child_url): logging.error("Failed to parse data for year: " + str(year)) return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, start_date, end_date_): try: if int(start_date) < 2012: update_query("UPDATE Tracker SET status = 'IN_NO_DATA_FOUND', emergency_exit=true WHERE Name = '" + str(court_name) + "'") if int(end_date_) < 2012: update_history_tracker(court_name) return True for year_ in range(int(start_date), int(end_date_) + 1): emergency_exit = select_one_query("SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True if int(year_) == 2018: year_ = '' url = base_url + "DecisionsHeadline" + str(year_) + ".html" update_query("UPDATE Tracker SET Start_Date = '" + str(year_) + "', End_Date = '" + str(end_date_) + "' WHERE Name = '" + str(court_name) + "'") response = requests.request("GET", url, proxies=proxy_dict) res = response.text if "file or directory not found" in res.lower(): logging.error("NO data Found for start date: " + str(year_)) update_query("UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") continue if not parse_html(res, court_name): logging.error("Failed to parse data from date: " + str(year_)) return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def start_scrap(): court_name = request.form['court_name'] bench = request.form['bench'] start_date = request.form['start_date'] end_date = request.form['end_date'] update_query( "UPDATE Tracker SET status='IN_CANCELLED', emergency_exit=true WHERE status='IN_RUNNING'" ) update_query( "UPDATE Tracker SET status='IN_RUNNING', emergency_exit=false, No_Cases=0, No_Year_NoData=0, " "No_Year_Error=0, No_Error=0, Start_Date='" + start_date + "', End_Date='" + end_date + "', bench='" + str(bench) + "' WHERE Name='" + court_name + "'") res = court_controller(court_name, bench, start_date, end_date) update_query( "UPDATE Tracker SET status = 'IN_BUCKET_TRANSFER' WHERE Name = '" + str(court_name) + "'") for filename in glob( "/home/karaa_krypt/CourtScrappingWebApp/Data_Files/PDF_Files/" + str(court_name) + "*.pdf"): if transfer_to_bucket('PDF_Files', filename): os.remove(filename) for filename in glob( "/home/karaa_krypt/CourtScrappingWebApp/Data_Files/Text_Files/" + str(court_name) + "*.txt"): if transfer_to_bucket('Text_Files', filename): os.remove(filename) if res: update_query( "UPDATE Tracker SET status = 'IN_SUCCESS', emergency_exit=true WHERE Name = '" + str(court_name) + "'") else: update_query( "UPDATE Tracker SET No_Year_Error = No_Year_Error + 1, status = 'IN_FAILED', " "emergency_exit=true WHERE Name = '" + str(court_name) + "'") update_history_tracker(court_name) return jsonify(res)
def request_data_old(court_name, start_date, end_date): try: url = base_url + "/judgments/browse" update_query("UPDATE Tracker SET Start_Date = '" + start_date + "', End_Date = '" + end_date + "' WHERE Name = '" + str(court_name) + "'") querystring = {"type": "reported", "value": "Reportable", "sort_by": "1", "order": "ASC", "rpp": "357", "etal": "0", "submit_browse": "Update"} response = requests.request("GET", url, headers=headers, params=querystring, proxies=proxy_dict) res = response.text if "NO ROWS" in res.upper(): update_query("UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") if not parse_html(res, court_name): logging.error("Failed to parse data old") update_query("UPDATE Tracker SET status = 'IN_SUCCESS', emergency_exit=true WHERE Name = '" + str(court_name) + "'") update_history_tracker(court_name) return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) update_query("UPDATE Tracker SET No_Year_Error = No_Year_Error + 1, status = 'IN_FAILED' WHERE Name = '" + str(court_name) + "'") update_history_tracker(court_name) return False
def request_data(court_name, dc, headers, start_date, end_date_): try: url = base_url + "/juddt1.php" i = 0 while True: i += 1 emergency_exit = select_one_query( "SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True end_date = ( datetime.datetime.strptime(str(start_date), "%d/%m/%Y") + datetime.timedelta(days=1)).strftime("%d/%m/%Y") if datetime.datetime.strptime(str(end_date_), "%d/%m/%Y") + datetime.timedelta(days=1) < \ datetime.datetime.strptime(str(end_date), "%d/%m/%Y"): logging.error("DONE") break update_query("UPDATE Tracker SET Start_Date = '" + str(start_date) + "', End_Date = '" + str(end_date) + "' WHERE Name = '" + str(court_name) + "'") querystring = {"dc": str(dc), "fflag": "1"} payload = "juddt=" + str(start_date) + "&Submit=Submit" response = requests.request("POST", url, data=payload, headers=headers, params=querystring, proxies=proxy_dict) res = response.text if "NO ROWS" in res.upper(): logging.error("NO data Found for start date: " + str(start_date)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") start_date = end_date continue if not offset_link(res, headers, court_name, dc): logging.error("Failed to parse data from date: " + str(start_date)) start_date = end_date return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, court_id, start_date, end_date_): try: url = base_url + "dtquery_new_v1.asp" headers = { 'Content-Type': "application/x-www-form-urlencoded", 'Cache-Control': "no-cache" } i = 0 while True: i += 1 emergency_exit = select_one_query( "SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True end_date = ( datetime.datetime.strptime(str(start_date), "%d/%m/%Y") + datetime.timedelta(days=180)).strftime("%d/%m/%Y") if datetime.datetime.strptime(end_date_, "%d/%m/%Y") + datetime.timedelta(days=180) < \ datetime.datetime.strptime(str(end_date), "%d/%m/%Y"): logging.error("DONE") break update_query("UPDATE Tracker SET Start_Date = '" + str(start_date) + "', End_Date = '" + str(end_date) + "' WHERE Name = '" + str(court_name) + "'") payload = "action=validate_login" \ "&Court_Id=" + str(court_id) + \ "&party=jus" \ "&FromDt=" + str(start_date) + \ "&ToDt=" + str(end_date) response = requests.request("POST", url, data=payload, headers=headers, proxies=proxy_dict) res = response.text if "no data found" in res.lower(): logging.error("NO data Found for start date: " + str(start_date)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") start_date = end_date continue if not parse_html(res, court_name, court_id): logging.error("Failed to parse data from date: " + str(start_date)) start_date = end_date return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, bench, headers, start_date, end_date_): try: url = base_url + '/' + str(bench) + "/services/judgement_status.php" i = 0 while True: i += 1 emergency_exit = select_one_query( "SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True end_date = ( datetime.datetime.strptime(str(start_date), "%Y-%m-%d") + datetime.timedelta(days=30)).strftime("%Y-%m-%d") if datetime.datetime.strptime(str(end_date_), "%Y-%m-%d") + datetime.timedelta(days=30) < \ datetime.datetime.strptime(str(end_date), "%Y-%m-%d"): logging.error("DONE") break update_query("UPDATE Tracker SET Start_Date = '" + str(start_date) + "', End_Date = '" + str(end_date) + "' WHERE Name = '" + str(court_name) + "'") payload = "case_no=" \ "&case_type=0" \ "&case_year=" \ "&filing_no=" \ "&from_date=" \ "&from_date1=" + str(start_date) + \ "&judge_detail=0" \ "&search_type=3" \ "&to_date=" \ "&to_date1=" + str(end_date) + \ "&txtState=" \ "&txtSubject=" response = requests.request("POST", url, data=payload, headers=headers, proxies=proxy_dict) res = response.text if res is None: logging.error("NO data Found for start date: " + str(start_date)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") start_date = end_date continue if not parse_html(res, court_name, bench, start_date): logging.error("Failed to parse data from date: " + str(start_date)) start_date = end_date return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, headers, start_date, end_date_): try: url = base_url + "/home.php" i = 0 while True: i += 1 emergency_exit = select_one_query( "SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True end_date = ( datetime.datetime.strptime(str(start_date), "%d/%m/%Y") + datetime.timedelta(days=1)).strftime("%d/%m/%Y") if datetime.datetime.strptime(end_date_, "%d/%m/%Y") + datetime.timedelta(days=1) < \ datetime.datetime.strptime(str(end_date), "%d/%m/%Y"): logging.error("DONE") break update_query("UPDATE Tracker SET Start_Date = '" + str(start_date) + "', End_Date = '" + str(end_date) + "' WHERE Name = '" + str(court_name) + "'") querystring = {"search_param": "free_text_search_judgment"} payload = "t_case_type=" \ "&t_case_year=" \ "&submit=Search%20Case" \ "&from_date=" + str(start_date) + \ "&to_date=" + str(end_date) + \ "&pet_name=" \ "&res_name=" \ "&free_text=Justice" response = requests.request("POST", url, data=payload, headers=headers, params=querystring, verify=False, proxies=proxy_dict) res = response.text if "no data found" in res.lower(): logging.error("NO data Found for start date: " + str(start_date)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") sleep(2) start_date = end_date continue if not offset_link(res, headers, court_name): logging.error("Failed to parse data from date: " + str(start_date)) start_date = end_date return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, start_date, end_date_): try: headers = { 'Content-Type': "application/x-www-form-urlencoded", 'Cache-Control': "no-cache", } url = base_url + '/judgementsdetails.asp' appeal_types = [ 'NDPS/FPA/ND', 'PMLA/FPA-PMLA', 'SAFEMA/FPA-1', 'FPA/BP', 'FEMA/FERA/FPA-FE' ] if int(start_date[-4:]) < 2013: update_query( "UPDATE Tracker SET status = 'IN_NO_DATA_FOUND', emergency_exit=true WHERE Name = '" + str(court_name) + "'") if int(end_date_[-4:]) < 2013: update_history_tracker(court_name) return True for month_year in month_list_([str(start_date), str(end_date_)]): for appeal_type in appeal_types: emergency_exit = select_one_query( "SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True update_query("UPDATE Tracker SET Start_Date = '" + str(month_year) + "', End_Date = '" + str(month_year) + "' WHERE Name = '" + str(court_name) + "'") payload = "ACTAPPEALTYPE=" + appeal_type + \ "&DDMONTH=" + str(month_year[:-4]) + \ "&DDYEAR=" + str(month_year[-4:]) response = requests.request("POST", url, data=payload, headers=headers, proxies=proxy_dict) res = response.text if 'there are no records at present' in res.lower(): logging.error("NO data Found for year: " + str(month_year)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") continue if not parse_html(res, court_name, appeal_type): logging.error("Failed to parse data for year: " + str(month_year)) return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, start_date, end_date_): try: url = base_url + 'php/getJBJ.php' headers = { 'Content-Type': "application/x-www-form-urlencoded; charset=UTF-8", 'Cache-Control': "no-cache" } i = 0 while True: i += 1 emergency_exit = select_one_query( "SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True end_date = ( datetime.datetime.strptime(str(start_date), "%d-%m-%Y") + datetime.timedelta(days=30)).strftime("%d-%m-%Y") if datetime.datetime.strptime(end_date_, "%d-%m-%Y") + datetime.timedelta(days=30) < \ datetime.datetime.strptime(str(end_date), "%d-%m-%Y"): logging.error("END date Exceed.") break update_query("UPDATE Tracker SET Start_Date = '" + str(start_date) + "', End_Date = '" + str(end_date) + "' WHERE Name = '" + str(court_name) + "'") payload = "jorrop=J" \ "&JBJfrom_date=" + str(start_date) + \ "&JBJto_date=" + str(end_date) response = requests.request("POST", url, data=payload, headers=headers, verify=False, proxies=proxy_dict) res = response.text if "no data found" in res.lower(): logging.error("NO data Found for start date: " + str(start_date)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") start_date = end_date continue if not parse_html(res, court_name): logging.error("Failed to parse data from date: " + str(start_date)) start_date = end_date return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, start_date, end_date_): try: if int(start_date[-4:]) < 2010: update_query( "UPDATE Tracker SET status = 'IN_NO_DATA_FOUND', emergency_exit=true WHERE Name = '" + str(court_name) + "'") if int(end_date_[-4:]) < 2010: update_history_tracker(court_name) return True for month_year in month_list_([str(start_date), str(end_date_)]): emergency_exit = select_one_query( "SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True data = { 'ctl00$CPHBody$DropDownListYear': str(month_year[-4:]), 'ctl00$CPHBody$DropDownListMonth': str(month_year[:-4]).lstrip("0"), 'ctl00$CPHBody$TextBox1': '', 'ctl00$CPHBody$SM1': 'ctl00$CPHBody$SM1|ctl00$CPHBody$DropDownListMonth' } with requests.Session() as s: page = s.get(base_url + 'judgement.aspx') soup = BeautifulSoup(page.content, "html.parser") data["__VIEWSTATE"] = soup.select_one("#__VIEWSTATE")["value"] data["__VIEWSTATEGENERATOR"] = soup.select_one( "#__VIEWSTATEGENERATOR")["value"] data["__EVENTVALIDATION"] = soup.select_one( "#__EVENTVALIDATION")["value"] update_query("UPDATE Tracker SET Start_Date = '" + str(month_year) + "' WHERE Name = '" + str(court_name) + "'") response = s.post(base_url + 'judgement.aspx', data=data) res = response.text if "no records were found." in res.lower( ) or "application error" in res.lower(): logging.error("NO data Found for start date: " + str(month_year)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") continue if not parse_html(res, court_name): logging.error("Failed to parse data") return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(headers, start_date, end_date_): try: url = base_url + "/ByDate.php" i = 0 while True: i += 1 emergency_exit = select_one_query( "SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True end_date = ( datetime.datetime.strptime(str(start_date), "%d-%m-%Y") + datetime.timedelta(days=180)).strftime("%d-%m-%Y") if datetime.datetime.strptime(str(end_date_), "%d-%m-%Y") + datetime.timedelta(days=180) < \ datetime.datetime.strptime(str(end_date), "%d-%m-%Y"): logging.error("DONE") break update_query("UPDATE Tracker SET Start_Date = '" + str(start_date) + "', End_Date = '" + str(end_date) + "' WHERE Name = '" + str(court_name) + "'") payload = "date_day=" + str(start_date[0:2]).replace("0", "") + \ "&date_month=" + str(start_date[3:5]).replace("0", "") + \ "&date_year=" + str(start_date[6:]) + \ "&date_day1=" + str(end_date[0:2]).replace("0", "") + \ "&date_month1=" + str(end_date[3:5]).replace("0", "") + \ "&date_year1=" + str(end_date[6:]) + \ "&submit=Submit" response = requests.request("POST", url, data=payload, headers=headers, proxies=proxy_dict) res = response.text if "invalid inputs given" in res.lower(): logging.error("NO data Found for start date: " + str(start_date)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") start_date = end_date continue if not offset_link(res, headers): logging.error("Failed to parse data from date: " + str(start_date)) start_date = end_date return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, start_date, end_date_): try: url = base_url + "/hcs/hcourt/hg_judgement_search" headers = { 'Content-Type': "application/x-www-form-urlencoded", 'Accept': "application/json", 'Cache-Control': "no-cache" } if int(start_date[-2:]) < 11: update_query( "UPDATE Tracker SET status = 'IN_NO_DATA_FOUND', emergency_exit=true WHERE Name = '" + str(court_name) + "'") if int(end_date_[-2:]) < 11: update_history_tracker(court_name) return True for month_year in month_list_([str(start_date), str(end_date_)]): year = int(month_year[-2:]) - 10 emergency_exit = select_one_query( "SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True update_query("UPDATE Tracker SET Start_Date = '" + str(month_year) + "', End_Date = '" + str(end_date_) + "' WHERE Name = '" + str(court_name) + "'") querystring = {"ajax_form": "1", "_wrapper_format": "drupal_ajax"} payload = "form_build_id=form-BS37MKVfuGmv9fgHWUqr3U9nFCjolonq-Nnenj3Ks24" \ "&form_id=ajax_example_form" \ "&ordermonth=" + str(month_year[:-2]).lstrip("0") + \ "&orderyear=" + str(year) + \ "&_triggering_element_name=op" \ "&_triggering_element_value=Search" \ "&_drupal_ajax=1" \ "&ajax_page_state%5Btheme%5D=mytheme" \ "&ajax_page_state%5Btheme_token%5D=%20" \ "&ajax_page_state%5Blibraries%5D=asset_injector%2Fcss%2Fanimation_accordin%2Casset_injector" \ "%2Fcss%2Fside_bar%2Casset_injector%2Fcss%2Ftable%2Casset_injector%2Fjs%2Fseperate_tab_%2C" \ "core%2Fdrupal.ajax%2Ccore%2Fhtml5shiv%2Ccore%2Fjquery.form%2Cmytheme%2Fmylibrarynew%2C" \ "system%2Fbase%2Cviews%2Fviews.module" response = requests.request("POST", url, data=payload, headers=headers, params=querystring, proxies=proxy_dict) json_res = json.loads(response.text) res = None for json_r in json_res: if "data" in json_r: res = BeautifulSoup(str(json_r['data']), "html.parser") break if res is None: logging.error("NO data Found for start date: " + str(month_year)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") continue if not parse_html(res, court_name): logging.error("Failed to parse data from date: " + str(month_year)) return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, m_sideflg, start_date, end_date_): try: url = base_url + "ordqryrepact_action.php" headers = { 'Content-Type': "application/x-www-form-urlencoded", 'Cache-Control': "no-cache" } i = 0 while True: i += 1 emergency_exit = select_one_query( "SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True end_date = ( datetime.datetime.strptime(str(start_date), "%d-%m-%Y") + datetime.timedelta(days=180)).strftime("%d-%m-%Y") if datetime.datetime.strptime(end_date_, "%d-%m-%Y") + datetime.timedelta(days=180) < \ datetime.datetime.strptime(str(end_date), "%d-%m-%Y"): logging.error("DONE") break update_query("UPDATE Tracker SET Start_Date = '" + str(start_date) + "', End_Date = '" + str(end_date) + "' WHERE Name = '" + str(court_name) + "'") payload = "pageno=1" \ "&frmaction=" \ "&m_sideflg=" + str(m_sideflg) + \ "&actcode=0" \ "&frmdate=" + str(start_date) + \ "&todate=" + str(end_date) response = requests.request("POST", url, data=payload, headers=headers) res = response.text if "invalid inputs given" in res.lower(): logging.error("NO data Found for start date: " + str(start_date)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") start_date = end_date continue if not parse_html(res, court_name, m_sideflg): logging.error("Failed to parse data from date: " + str(start_date)) start_date = end_date return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, start_date, end_date_): try: url = base_url + "date_JQ.asp" headers = { 'Content-Type': "application/x-www-form-urlencoded", 'Cache-Control': "no-cache" } i = 0 while True: i += 1 emergency_exit = select_one_query( "SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True end_date = ( datetime.datetime.strptime(str(start_date), "%d-%m-%Y") + datetime.timedelta(days=1)).strftime("%d-%m-%Y") if datetime.datetime.strptime(end_date_, "%d-%m-%Y") + datetime.timedelta(days=1) < \ datetime.datetime.strptime(str(end_date), "%d-%m-%Y"): logging.error("DONE") break update_query("UPDATE Tracker SET Start_Date = '" + str(start_date) + "', End_Date = '" + str(end_date) + "' WHERE Name = '" + str(court_name) + "'") payload = "txtday=" + str(start_date[0:2]).lstrip('0') + \ "&txtmonth=" + str(start_date[3:5]).lstrip('0') + \ "&txtyear=" + str(start_date[-4:]) response = requests.request("POST", url, data=payload, headers=headers, proxies=proxy_dict) res = response.text if "no judgement found for your search" in res.lower(): logging.error("NO data Found for start date: " + str(start_date)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") start_date = end_date continue if not parse_html(res, court_name): logging.error("Failed to parse data from date: " + str(start_date)) start_date = end_date return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False
def request_data(court_name, headers, start_date, end_date_): try: url = base_url + '/php/hc/judgement/judgement_pro_all.php' i = 0 while True: i += 1 end_date = ( datetime.datetime.strptime(str(start_date), "%d-%m-%Y") + datetime.timedelta(days=1)).strftime("%d-%m-%Y") if datetime.datetime.strptime(str(end_date_), "%d-%m-%Y") + datetime.timedelta(days=1) < \ datetime.datetime.strptime(str(end_date), "%d-%m-%Y"): logging.error("END date Exceed.") break benches = ['IND', 'JBP', 'GWL'] for bench in benches: emergency_exit = select_one_query( "SELECT emergency_exit FROM Tracker WHERE Name='" + court_name + "'") if emergency_exit['emergency_exit'] == 1: update_history_tracker(court_name) return True update_query("UPDATE Tracker SET Start_Date = '" + str(start_date) + "', End_Date = '" + str(end_date) + "' WHERE Name = '" + str(court_name) + "'") payload = "lst_judge=0" \ "&lst_pet=" \ "&txtparty=" \ "&lst_counsel=" \ "&txtcounsel=" \ "&date1=" + str(start_date) + \ "&date2=" + str(end_date) + \ "&court=" + str(bench) + \ "&lst_judge1=0" \ "&lst_judge2=0" \ "&btn_search=is" \ "&bench=" \ "&sort=jo" \ "&ad=DESC" \ "&code=" if int(end_date[-4:]) <= 2014 and int(start_date[-4:]) <= 2014: payload += "&onlyafr=N" else: payload += "&onlyafr=Y" response = requests.request("POST", url, data=payload, headers=headers, proxies=proxy_dict) res = response.text if "no jugdement or order found that you want to search" in res.lower( ): logging.error("NO data Found for start date: " + str(start_date)) update_query( "UPDATE Tracker SET No_Year_NoData = No_Year_NoData + 1 WHERE Name = '" + str(court_name) + "'") sleep(2) start_date = end_date continue if not parse_html(res, court_name, bench): logging.error("Failed to parse data from date: " + str(start_date)) start_date = end_date return True except Exception as e: traceback.print_exc() logging.error("Failed to get data from date: " + str(start_date)) logging.error("Failed to request: %s", e) return False