def GroupDBMaster(self): logger.debug('Dynamic Scrapping Consumer --- Group Master Called') Groupdb = DynamicCommonConnection.MySQLConnection() group = Groupdb.cursor() group.execute("select * from tbl_Bli_GroupMaster") grouplist = [] for row in group.fetchall(): BusinessType = row[6] if "Retail" in BusinessType: GroupRowList = (row[1], row[2] or 0) grouplist.append(GroupRowList) elif "Hotel" in BusinessType: GroupRowList = (row[1], row[2] or 0) grouplist.append(GroupRowList) grouplist_dict = dict(grouplist) group.close() Groupdb.close() return grouplist_dict
def DatabaseQueryCall(self): logger.debug("Dynamic Parser Producer --- Database Called") ''' Query Logic task :return: ''' print("Parser Database Called ") last_updated_date = self.mongodb.ParserQueueUpdate.find_one({'_id': 1}) Last_Parser_update_date = last_updated_date['QueueUpdateDateTime'] #records = db.HTMLRepository.find({'$and': [{'TimeStamp': {'$gte': Last_Parser_update_date}}]}) records = self.mongodb.HTMLRepository.find({'parsingStatus': 1}) print(records) if not records: SYSdate = datetime.datetime.now() self.mongodb.ParserQueueUpdate.update({'PARSER': '1'}, { "$set": { 'QueueUpdateDateTime': datetime.datetime.strftime(SYSdate, '%Y-%m-%d %H:%M:%S') } }) return records
def callback(self, ch, method, properties, body): #print(type(body)) print("Receiving Messages -- %r" % body) print("Consumer running Time", datetime.datetime.now()) try: data = body.decode('utf-8') consume_data = data.replace("'", "\"") consume_data = json.loads( consume_data) # convert string to python dict except Exception as e: data = body.decode('utf-8') consume_data = eval(data) # print('Error while converting into JSON') # logger.error("Consumer Input Json not properly Serialize"+ str(e)) #st() scexec = ScriptsExecution(consume_data) data = scexec.run() # try: # scexec = ScriptsExecution(consume_data) # data=scexec.run() # print("Received as a response",data) # except Exception as e: # print('Error Occur Check logs',str(e)) # logger.error('Error at ConsumerRequestDataScraper:' +str(e)) # data = None # if data: ch.basic_ack(delivery_tag=method.delivery_tag) logger.debug('Crawling done')
def Main(self): ''' Main will be called for all the functioning ''' try: logger.debug('Dynamic Scrapping Consumer ---main Function called') grouplistDict = self.GroupDBMaster() channel = self.RabbitConnection() print(grouplistDict) for key, value in grouplistDict.items(): print("Sequence", key) for _ in range(int(value)): try: channel.basic_consume(DynamicConsumer.callback, queue=str(key), consumer_tag=None) except pika.exceptions.ChannelClosed: logger.error('Closed or no Queue ' + key) print('Closed or no Queue ' + key) channel = self.RabbitConnection() channel.start_consuming() except pika.exceptions.ConnectionClosed: DynamicConsumer().Main()
def newconsume(): logger.debug('Dynamic Scrapping Reparse --- Ready to start') obj = ReparseConsumer() #t1 = threading.Thread(target=obj.Main, args=()) #t1.start() obj.Main() print("Consumer running Time", datetime.datetime.now()) time.sleep(0.1)
def Classification(self): logger.debug("Classfication Called") if (self.IsCategory == 1 or self.IsCategory == '1'): return ScriptsExecution.Category(self) else: #return self.Product() return ScriptsExecution.Product(self)
def RabbitConnection(self): logger.debug( 'Dynamic Scrapping Consumer --- rabbitMQ Connection called') connection = pika.BlockingConnection( pika.ConnectionParameters('localhost')) channel = connection.channel() channel.basic_qos(prefetch_count=1) return channel
def __init__(self): #threading.Thread.__init__(self) self.connection = pika.BlockingConnection( pika.ConnectionParameters(host='localhost')) self.channel = self.connection.channel() args = {} args["x-max-length"] = 100000000 args['x-max-priority'] = 9 logger.debug("Dynamic Queue Connection established")
def getRequest(): # db = pymysql.connect(host="192.168.8.67", # user="******", # passwd="eclerx#123", # db="eCube_Centralized_DB") # db = pymysql.connect(host="localhost", # user="******", # passwd="eclerx#123", # db="eCube_Centralized_DB") # db = pymysql.connect(host="192.168.8.37", # user="******", # passwd="eclerx#123", # db="eCube_Centralized_DB") # db = pymysql.connect(host="192.168.131.23", # user="******", # passwd="Eclerx#123", # db="eCube_Centralized_DB") db = pymysql.connect(**crawling_producer_config.get_pymysql_kwargs) cur = db.cursor() # return cur print("running") logger.debug('Database Connection Established') # cur = getConnection() # domainName = request.args.get('domainName') # domainName = 'http://www.mouser.com' # query_string = "select b.Id, a.DomainId, HeaderName, HeaderValue from eCube.tbl_DomainMaster a, eCube.tbl_DomainHeaderMapping b WHERE a.Id = b.DomainId and a.DomainName = '{domainName}'".format(domainName=domainName) # try: if True: # cur.execute(query_string) cur.callproc('spGetRequestRunDetail') res = cur.fetchall() # print("aetos res") # print(res) for r in res: print(r) # print(r[0],r[1]) print(r[0], r[1], r[-1]) # SaveRequest(r[0], r[1], cur,db) SaveRequest(r[0], r[1], r[-1], cur, db) UpdateStatus(r[0], cur, db) # r = [dict((cur.description[i][0], value) # for i, value in enumerate(row)) for row in cur.fetchall()] cur.close()
def __init__(self): threading.Thread.__init__(self) self.connection = pika.BlockingConnection( pika.ConnectionParameters(host='localhost')) self.channel = self.connection.channel() args = {} args["x-max-length"] = 10000000 args['x-max-priority'] = 9 self.mongodb = CommonConnection.MongoConnection() self.db = CommonConnection.MySQLConnection() self.RabbitCon = CommonConnection.RabbitMQConnection() self.IPAddr = CommonConnection.ServivesIP() logger.debug("Dynamic Queue Connection established")
def ParserDynamicDBConnection(self): ''' DB Connection here :return: ''' logger.debug('Dynamic Parsing Consumer --- Group Master Called') Groupdb = CommonConnection.MySQLConnection() group = Groupdb.cursor() group.execute("select * from tbl_Bli_GroupMaster") grouplist = [] for row in group.fetchall(): GroupRowList = (row[1], row[2] or 0) grouplist.append(GroupRowList) grouplist_dict = dict(grouplist) group.close() Groupdb.close() return grouplist_dict
def UpdateStatus(RequestRunId, cur, db): print('updated') # domainName = request.args.get('domainName') # domainName = 'http://www.mouser.com' # conn = mysql.connect() # cur = mysql.connect().cursor() # query_string = "select b.Id, a.DomainId, HeaderName, HeaderValue from eCube.tbl_DomainMaster a, eCube.tbl_DomainHeaderMapping b WHERE a.Id = b.DomainId and a.DomainName = '{domainName}'".format(domainName=domainName) try: # cur.execute(query_string) args = [RequestRunId] # cur.callproc('spInsertRequestDetails',args) # res = cur.fetchall() cur.callproc('spUpdateRequestStatus', args) db.commit() # r = [dict((cur.description[i][0], value) # for i, value in enumerate(row)) for row in cur.fetchall()] except Exception as e: logger.debug('Error Returned by spUpdateRequestStatus Query', str(e)) return jsonify({'StatusCode': 500, 'ResultData': e})
def MessageQueryCall(self): logger.debug("Dynamic Scrapping Producer -- Queue Called") db = CommonConnection.MySQLConnection() DBMeesages = db.cursor() ''' SP Called Location ''' DBMeesages.callproc("MessagingHotelQueue") message = DBMeesages.fetchall() logger.debug("Dynamic Queue HOTEL Db Connection called") DBMeesages.close() db.close() return message
def QueueGetCount(self): logger.debug( 'Dynamic Scrapping Consumer --- To Get Queue Count called') ''' To get Count How many Messages is in Queue :return: ''' args = {} args["x-max-length"] = 100000000 args['x-max-priority'] = 9 self.Count_QUEUE1 = self.channel.queue_declare( queue='RS', durable=True, arguments=args).method.message_count self.Count_QUEUE2 = self.channel.queue_declare( queue='Arrow', durable=True, arguments=args).method.message_count self.Count_QUEUE3 = self.channel.queue_declare( queue='Conrad', durable=True, arguments=args).method.message_count return self.Count_QUEUE1, self.Count_QUEUE2, self.Count_QUEUE3
def __init__(self, consume_data): #global IsPreview,StartTime,RequestId, RequestRunId, SubRequestId, RequestUrl, DomainName, PointOfSale, IsCategory, ScraperScript, ParserScript, ScraperModuleName, ParserModuleName, Country #IsCategory = "1" self.RequestInputs = consume_data self.IsPreview = "No" self.StartTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') self.RequestId = consume_data["RequestId"] self.RequestRunId = consume_data["RequestRunId"] self.SubRequestId = consume_data["SubRequestId"] self.RequestUrl = consume_data['RequestUrl'] self.DomainName = consume_data["DomainName"] self.PointOfSale = consume_data["PointOfSale"] self.IsCategory = consume_data["IsCategory"] self.ScraperScript = consume_data["ScraperScript"] #ScraperScript = "ConradPython_IT" self.ParserScript = consume_data["ParserScript"] self.Country = consume_data["Country"] self.ScraperModuleName = '' # Country = consume_data["Region"] # Added by Ankush for Retais /hotel Request Input JSON # self.RequestInputs = consume_data['RequestInputs'] # Added by ankush for Dynamic Queuing self.GroupName = consume_data['GroupName'] if consume_data["ScraperScript"]: self.ScraperModuleName = consume_data["ScraperScript"] self.ScraperModuleName = re.sub(".py", "", self.ScraperModuleName) #ScraperModuleName ="ScrapperConradPython_IT" if consume_data["ParserScript"]: self.ParserModuleName = consume_data["ParserScript"] self.ParserModuleName = re.sub(".py", "", self.ParserModuleName) logger.debug('Initialisation complete...:' + str(self.RequestUrl))
def Product(self): logger.debug("Product Called from Category :" + str(self.RequestUrl)) Instance = ScriptsExecution.getInstance(self) dict = Instance.getProductCrawl() # dict['Starttime'] = str(StartTime) # dict['EndTime'] = str(datetime.now()) # #StartTime = datetime.datetime.now() #StartTimeConverted = datetime.date.strftime(StartTime, '%Y-%m-%d %H:%M:%S') dict['startDT'] = self.StartTime EndTime = datetime.datetime.now() EndTimeConverted = datetime.date.strftime(EndTime, '%Y-%m-%d %H:%M:%S') dict['endDT'] = EndTimeConverted dict['requestId'] = int(self.RequestId) dict['subRequestId'] = int(self.SubRequestId) dict['RequestRunId'] = int(self.RequestRunId) # Added by Ankush dict['ParserScript'] = str(self.ParserScript) # added by ankush dict['groupName'] = self.GroupName dict['parsingStatus'] = 1 # Message ready for Parsing r = None try: r = json.dumps(dict) except Exception as e: logger.error('JSON Dump error: ', exc_info=True) loaded_r = json.loads(r) logger.debug('Scrapping Done : ' + self.RequestUrl) self.result = requests.post( 'http://192.168.8.7/site3/api/v1/SaveSourceHtml', json=r) #self.result = requests.post('http://192.168.7.128/site3/api/v1/SaveSourceHtml', json=r) logger.debug("Database Saving Response for " + str(self.RequestId) + " : " + self.RequestUrl + " : " + str(self.result.content)) print("Saved Response------", self.result.content) try: Data = json.dumps(self.RequestInputs) updateSubID = requests.post( 'http://192.168.8.7/site7/api/v1/update_subrequest_details', json=Data) print("UPDATE SbRequestID Service", updateSubID) except Exception as e: pass #print("Response recieved",self.result.content) return self.result.content
def run(self): SUBREQUESTID = [] logger.debug("Dynamic Parser Producer --- Main Function alled") records = ParserDynamicProducer.DatabaseQueryCall(self) for msg in records: if msg: self.channel.basic_publish(exchange='', routing_key="Parser" + str(msg['groupName']), body=str(msg)) print("Queue Sending Messages", msg) SUBREQUESTID.append(msg['subRequestId']) self.connection.close() ParserDynamicProducer.UpdateSUBRequest(self, SUBREQUESTID) logger.debug("Dynamic Queue Connection closed successfully")
elif data == "Access Denied": AccessDenied = DATA_MAKER(consume_data) print("Access Denied", AccessDenied) DataInsert = db.PNFData.insert(AccessDenied) ch.basic_ack(delivery_tag=method.delivery_tag) else: ch.basic_ack(delivery_tag=method.delivery_tag) ''' Below Code commented and added in startParserConsumerServices.py file ''' print("Connected") channel = Rabbit_connection.ParserQueueConnection( "") # calling Category Queue Connection class channel.basic_qos(prefetch_count=1) channel.basic_consume(callback, queue='Parser') channel.start_consuming() logger.debug('Consumer Ready to start') t1 = threading.Thread(target=ParserConsumer, args=[]) t1.start() time.sleep(0.08)
def SaveRequest(RequestRunId, RequestId, ReqModeId, cur, db): try: # cur = getConnection() # cur.execute(query_string) args = [RequestId, RequestRunId, ReqModeId] # cur.callproc('spInsertRequestDetails',args) # res = cur.fetchall() # cur.callproc(procname='spInsertRequestDetails', args=args) # Code Change done for Hotel by Shrikant 04-06-2017 19:52 # if ReqModeId == 1: cur.callproc(procname='spInsertRequestDetails', args=args) if ReqModeId in (2, 3): args1 = [RequestId] print("args1 1") print(args1) cur.callproc('spGetPreCrawlDetails', args=args1) res = cur.fetchall() for i in res: if i[6] == 1: # Based on Boardtype ID. startDate = i[4] endDate = i[5] for n in range((endDate - startDate).days + 1): thisdate = startDate + timedelta(n) if thisdate.strftime('%A') in i[7]: args2 = [i[1], RequestRunId, i[0], thisdate] print('Arg2', args2) if ReqModeId == 2: cur.callproc( 'spInsertHotelCrawlRequestDetails', args=args2) elif ReqModeId == 3: cur.callproc( 'spInsertHotelFlightCrawlRequestDetails', args=args2) db.commit() elif i[6] == 2: for advancedt in i[10].split(','): thisdate = date.today() + timedelta(int(advancedt)) args2 = [i[1], RequestRunId, i[0], thisdate] print('Arg2 1', args2) if ReqModeId == 2: cur.callproc('spInsertHotelCrawlRequestDetails', args=args2) elif ReqModeId == 3: cur.callproc( 'spInsertHotelFlightCrawlRequestDetails', args=args2) db.commit() elif i[6] == 3: for advancedt in i[10].split(','): thisdate = datetime.strptime(advancedt, '%m/%d/%Y').date() args2 = [i[1], RequestRunId, i[0], thisdate] print('Arg2 2', args2) if ReqModeId == 2: cur.callproc('spInsertHotelCrawlRequestDetails', args=args2) elif ReqModeId == 3: cur.callproc( 'spInsertHotelFlightCrawlRequestDetails', args=args2) db.commit() elif i[6] == 4: for advancedt in i[10].split(','): thisdate = date.today() + timedelta(int(advancedt) * 7) args2 = [i[1], RequestRunId, i[0], thisdate] print('Arg2 3', args2) if ReqModeId == 2: cur.callproc('spInsertHotelCrawlRequestDetails', args=args2) elif ReqModeId == 3: cur.callproc( 'spInsertHotelFlightCrawlRequestDetails', args=args2) db.commit() # cur.execute('spInsertRequestDetails', args) # cur.fetchall() # cur.close() # r = [dict((cur.description[i][0], value) # for i, value in enumerate(row)) for row in cur.fetchall()] except Exception as e: logger.debug('Error Returned by spInsertRequestDetails Query', str(e)) return jsonify({'StatusCode': 500, 'ResultData': e})
#st() scexec = ScriptsExecution(consume_data) data = scexec.run() # try: # scexec = ScriptsExecution(consume_data) # data=scexec.run() # print("Received as a response",data) # except Exception as e: # print('Error Occur Check logs',str(e)) # logger.error('Error at ConsumerRequestDataScraper:' +str(e)) # data = None # if data: ch.basic_ack(delivery_tag=method.delivery_tag) logger.debug('Crawling done') # channel = Rabbit_connection.CrawlerQueueConnection("") # calling Category Queue Connection class # channel.basic_qos(prefetch_count=1) # channel.basic_consume(callback, # queue='Crawler') # channel.start_consuming() ''' logger.debug('Consumer Ready to start') t1 = threading.Thread(target=CrawlerConsumer,args=[]) t1.start() print("Consumer running Time",datetime.datetime.now()) time.sleep(0.1) '''
def callback(ch, method, properties, body): print("parser Consumer Message time", datetime.datetime.now()) logger.debug('Consumer Ready to start') print("Recieving Messages --", body) data = body.decode('utf-8') # consume_data = data.replace("'", "\"") # consume_data = json.loads(consume_data) # convert string to python dict consume_data = ast.literal_eval(data) subRequestId = consume_data['subRequestId'] client = MongoClient('localhost', 27017) #client = MongoClient('192.168.7.134', 27017) db = client.HTMLDumps consume_data = db.HTMLRepository.find_one( {'subRequestId': subRequestId}) if consume_data['IsCategory'] == "Yes": json_data = json.dumps(consume_data) try: result = requests.post( 'http://192.168.8.7/site3/api/v1/SaveResponseData', json=consume_data) #result = requests.post('http://192.168.7.128/site3/api/v1/SaveResponseData', json=consume_data) except Exception as e: pass print(result) if result: ch.basic_ack(delivery_tag=method.delivery_tag) else: data = ScriptsExecution.ConsumerRequestData('', **consume_data) ''' try: data = ScriptsExecution.ConsumerRequestData('',**consume_data) print(data) except Exception as e: # logger.error('Script Executer Return Error',str(e)) print("Script Executer error",e) data = None data = "Error" ''' if data == "PNF": PNFData = DATA_MAKER(consume_data) print("PNFDATA", PNFData) ch.basic_ack(delivery_tag=method.delivery_tag) DataInsert = db.PNFData.insert(PNFData) elif data == "Access Denied": AccessDenied = DATA_MAKER(consume_data) print("Access Denied", AccessDenied) DataInsert = db.PNFData.insert(AccessDenied) ch.basic_ack(delivery_tag=method.delivery_tag) else: ch.basic_ack(delivery_tag=method.delivery_tag)
def run(self): logger.debug("Dynamic Scrapping Producer --- Main function Called") ''' Database Query Call ''' message = DynamicProducer.MessageQueryCall(self) if message: DumpDict = [] l = [] for row in message: row_list = [ ("RequestId", row[0]), ("SubRequestId", row[1]), ("RequestRunId", row[2]), ("RequestUrl", str(row[3]) or str('')), ('IsCategory', row[4]), # ('IsCategory', str(row[4]) or str('')), # commented for mouser testing ("DomainName", row[5] or str('')), ('ParserScript', row[6] or str('')), ('ScraperScript', row[7] or str('')), ("PointOfSale", row[8] or str('')), ('Country', row[9] or str('')), ('Region', "India"), ('GroupName', row[10] or ""), ("DomainID", row[11] or str('')), ("BusinessType", "Retail") ] data_row_dict = dict(row_list) data_row_dict.update({ "RequestInput": { "RequestUrl": data_row_dict['RequestUrl'] } }) #print("Crawler Producer Input Dictionary", data_row_dict) DumpDict.append(data_row_dict) for newDump in DumpDict: a = (newDump['DomainID']) l.append(a) newdata = set(l) lDATA = (list(newdata)) if len(lDATA) > 1: NewMessageDump = DynamicProducer.DomainWiseFetchALL( "", DumpDict) Multi_SubRequest = [] for message in NewMessageDump: self.channel.basic_publish(exchange='', routing_key=str( message['GroupName']), body=str(message)) print("Queue Sending Multiple Domain Messages -----------", message) Multi_SubRequest.append(message['SubRequestId']) MultiStatusUpdate = DynamicProducer.UpdateStatusPushedToQueue( Multi_SubRequest) print( "Messages Pushed in and Status updated in MySQl crawl table" ) else: Single_SubRequest = [] SingleFetch = DynamicProducer.SingleDomainEntry("", DumpDict) cnt = 0 for single_message in SingleFetch: self.channel.basic_publish( exchange='', routing_key=str(single_message['GroupName']), body=str(single_message)) print("Queue Sending Single Domain Messages ------------", single_message) Single_SubRequest.append(single_message['SubRequestId']) cnt += 1 MultiStatusUpdate = DynamicProducer.UpdateStatusPushedToQueue( Single_SubRequest) if MultiStatusUpdate == "Successfully updated Status": time.sleep(10) t2 = DynamicProducer() #t2.start() self.connection.close() logger.debug("Dynamic Queue Connection closed successfully")
def run(self): logger.debug("Dynamic Scrapping Producer --- Main function Called") ''' Database Query Call ''' message = HotelCrawling.MessageQueryCall(self) if message: DumpDict = [] l = [] for row in message: row_list = [("requestId", row[0]), ("subRequestId", row[1]), ("requestRunId", row[2]), ('IsCategory', row[3]), ("DomainName", row[4] or str('')), ('ParserScript', row[5] or str('')), ('ScraperScript', row[6] or str('')), ('GroupName', row[7] or ""), ("DomainID", row[8] or str('')), ("RequestUrl", str('')), ("PointOfSale", ''), ("BusinessType", "Hotel"), ( "country", row[19] or "", )] data_row_dict = dict(row_list) data_row_dict.update({ "RequestInputs": { "RequestUrl": data_row_dict['RequestUrl'], "checkIn": str(row[9]) or "", "nights": row[10] or "", "CompetitorName": row[11] or "", "pos": row[12] or "", "adults": row[13] or "", "children": row[14] or "", "CrawlMode": row[15] or "", "hotelName": row[16] or "", "webSiteHotelId": row[17] or "", "city": row[18] or "", "starRating": row[20] or "", "board": row[21] or "", "room": row[22] or "" }, }) DumpDict.append(data_row_dict) for newDump in DumpDict: a = (newDump['DomainID']) l.append(a) newdata = set(l) lDATA = (list(newdata)) if len(lDATA) > 1: NewMessageDump = HotelCrawling.DomainWiseFetchALL("", DumpDict) Multi_SubRequest = [] for message in NewMessageDump: self.channel.basic_publish(exchange='', routing_key=str( message['GroupName']), body=str(message)) print("Queue Sending Multiple Domain Messages -----------", message) Multi_SubRequest.append(message['subRequestId']) MultiStatusUpdate = HotelCrawling.UpdateStatusPushedToQueue( Multi_SubRequest) print( "Messages Pushed in and Status updated in MySQl crawl table" ) else: Single_SubRequest = [] SingleFetch = HotelCrawling.SingleDomainEntry("", DumpDict) cnt = 0 for single_message in SingleFetch: self.channel.basic_publish( exchange='', routing_key=str(single_message['GroupName']), body=str(single_message)) print("Queue Sending Single Domain Messages ------------", single_message) Single_SubRequest.append(single_message['subRequestId']) cnt += 1 MultiStatusUpdate = HotelCrawling.UpdateStatusPushedToQueue( Single_SubRequest) if MultiStatusUpdate == "Successfully updated Status": time.sleep(10) t2 = HotelCrawling() self.connection.close() logger.debug("Dynamic Queue Connection closed successfully")
def callback(ch, method, properties, body): # print("Method Name --------------", method) # print("Properties Name ----------", properties) logger.debug('Dynamic Scrapping Consumer ---CallBack function called') ''' :param ch: connection channel :param method: method name :param properties: priority Properties :param body: message :return: ''' # print("Receiving Messages ------------ %r" % body) # print("Consumer running Time", datetime.datetime.now()) try: data = body.decode('utf-8') consume_data = data.replace("'", "\"") consume_data = json.loads( consume_data) # convert string to python dict except Exception as e: data = body.decode('utf-8') consume_data = eval(data) print('queue message for SR:%s' % consume_data['subRequestId']) ''' if consume_data['GroupName'] =='Hotelbeds': try: scexec=ScriptHandler(consume_data) data=scexec.execute_crawl() except Exception as e: print("Script Executer return error", str(e)) ''' # from pdb import set_trace; set_trace() if "Retail" in consume_data['BusinessType']: try: scexec = ScriptsExecution(consume_data) data = scexec.run() print(" Received as a response", data) except Exception as e: print("Retail Script Executer return error", str(e)) #logger.error("Script Executer return error", str(e)) elif "Hotel" in consume_data['BusinessType']: error = None error_code = '' # sub_req_id = consume_data['subRequestId'] ch.basic_ack(delivery_tag=method.delivery_tag) try: crawled_data = ScriptHandler(consume_data).execute_crawl( method.redelivered) except core_exceptions.ScriptPNF: error = True error_code = 'script_timeout' crawled_data = consume_data.copy() crawled_hotel_count = len(crawled_data.get('hotels', list())) if not crawled_hotel_count and not error_code == 'script_timeout': error = True error_code = 'empty_hotels' print("\n\n\ncrawled hotels, error") print(crawled_hotel_count, bool(error), error_code) if error and error_code == 'empty_hotels': print('\n\n\nEmpty Hotels') core_services.MongoHandler().save_pnf(crawled_data, error) pnf_update_query = "UPDATE tbl_HotelCrawlRequestDetail SET StatusId = 8 WHERE HotelCrawlRequestDetailId = %s AND StatusId = 11" conn = DynamicCommonConnection.MySQLConnection() cur = conn.cursor() cur.execute(pnf_update_query % crawled_data['subRequestId']) conn.commit() cur.close() conn.close() elif error and error_code == 'script_timeout': print('\n\n\nScript Timeout') connection2 = pika.BlockingConnection( pika.ConnectionParameters(host='localhost')) channel2 = connection2.channel() channel2.queue_declare("Reparse") channel2.basic_publish(exchange='', routing_key='Reparse', body=json.dumps(crawled_data)) connection2.close() else: print('\n\n\nGREAT SUCCESS') core_services.MongoHandler().save_successful_crawl( crawled_data) connection2 = pika.BlockingConnection( pika.ConnectionParameters(host='localhost')) channel2 = connection2.channel() channel2.queue_declare("Parser") channel2.basic_publish(exchange='', routing_key='Parser', body=json.dumps(crawled_data)) connection2.close()
print("Queue Sending Single Domain Messages ------------", single_message) Single_SubRequest.append(single_message['subRequestId']) cnt += 1 MultiStatusUpdate = HotelCrawling.UpdateStatusPushedToQueue( Single_SubRequest) if MultiStatusUpdate == "Successfully updated Status": time.sleep(10) t2 = HotelCrawling() self.connection.close() logger.debug("Dynamic Queue Connection closed successfully") if __name__ == '__main__': logger.debug("Dynamic Producer Scrapping started ") # t1 = DynamicProducer() # t1.start() print("SCRIPT CALLED --- ") while True: t1 = HotelCrawling() t1.run() # t1.setDaemon(True) # t1.start() time.sleep(10)
def DBFetchData(): ''' Database Selection Query call function :return: ''' logger.debug("Priority Parser Producer --- Database Function called") client = MongoClient('localhost', 27017) mongoDB = client.HTMLDumps db = CommonConnection.MySQLConnection() group = db.cursor() ''' Status ID - 8 Is "Reparse" status in tbl_RequestRunDetail table ''' group.execute( "select RequestRunId from tbl_RequestRunDetail where FK_StatusId = 10" ) data = group.fetchall() requestRunIDList = [] if data: for row in data: requestRunIDList.append(row[0]) newRequestRunId = list(set(requestRunIDList)) messages = [] for requestRunID in newRequestRunId: records = mongoDB.HTMLRepository.find({ '$and': [{ 'RequestRunId': { '$eq': str(requestRunID) } }, { 'Error': "0" }] }) for row in records: messages.append(row) if messages: SYSdate = datetime.datetime.now() mongoDB.ParserQueueUpdate.update({'PARSER': '1'}, { "$set": { 'ReParseQueueUpdateDateTime': datetime.datetime.strftime(SYSdate, '%Y-%m-%d %H:%M:%S') } }) if data: for UpdateRunID in data: UpdateRequestRunId = UpdateRunID[0] ''' update tbl_RequestRunDetail Status = Push to Queue After Adding Reparse status records into ReParse Queue ''' group.execute( "update tbl_RequestRunDetail set ReParseStatus = 'Running' where RequestRunId = %s", (UpdateRequestRunId)) db.commit() group.close() db.close() return messages
:return: ''' return True def ProducerMain(): messages = DBFetchData() channel = RabbitConnection() if messages: for row in messages: message = row priority = 9 print(message) channel.basic_publish(exchange='', routing_key='parser', body=str(message), properties=pika.BasicProperties( delivery_mode=2, priority=priority)) logger.debug("Priority Parser Producer called") main = ProducerMain() # while True: # print("Start:", datetime.datetime.now()) # main = ProducerMain() # sleep(60) # print("Sleep Time ")