class File(): def __init__(self, file_path): self.file_path = file_path self.file_name = self.file_path.split('/')[-1].split('.')[0] self.db = DB_Client() def create_table(self): """reads file data and creates a table in database with filename""" file_df = pd.read_csv(self.file_path) columns = file_df.columns data_type = [] for col in columns: if file_df[col].dtype == int: data_type.append((col, 'int(10)')) elif file_df[col].dtype == object: data_type.append((col, 'varchar(20)')) variables = ''.join(['{} {}, '.format(col, dtype) for col, dtype in data_type])[:-2] try: drop_query = """DROP TABLE {table_name}""".format(table_name=self.file_name) result = self.db.exec_query(drop_query) except: pass query = """CREATE TABLE {file_name} ({variables}); """.format(file_name=self.file_name, variables=variables) print (query) # query to create file result = self.db.exec_query(query) def upload_to_db(self): """uploads a file to db""" query = """LOAD DATA LOCAL INFILE '{file_path}' INTO TABLE {table_name} FIELDS TERMINATED BY ',' LINES TERMINATED BY '\\n' IGNORE 1 ROWS; """.format(file_path=self.file_path, table_name=self.file_name) result = self.db.exec_query(query) print (result) def read_records(self): """read all records from table""" query = """SELECT * FROM {table_name}""".format(table_name=self.file_name) # ob = File("/home/shivam/Downloads/paygapp-master_version4/uploads/Glass_Door_data.csv") # ob.create_table() # ob.upload_to_db()
def __init__(self, start_time, end_time): #mapping between job name and job_details self.cron_job_meta = {} #mapping between job id and job name self.job_id_dict = {} self.db = DB_Client() results = self.db.execute_sql_command('select * from jobs') for job in results: self.cron_job_meta[job['job_name']] = job self.job_id_dict[job['id']] = job['job_name'] #print('job_meta', self.cron_job_meta) #print('RESULTS', self.job_id_dict) self.start_time = start_time self.end_time = end_time
def main(): global msg db = DB_Client() logger.info('program started:------------------------------------') while (True): try: record = db.get_new_feature() # logger.info('get %d records'%len(records)) processed = False if record: processed = True logger.info('processing record:' + str(record)) exp_status = False try: result, features = process_feature(db, record['bbiid']) logger.info('processing record %s, result:%s' % (record, result)) if result == 4: db.update_new_feature(record['bbiid'], features) db.update_new_feature_result(record['bbiid'], result, msg) except Exception as e: logger.error( 'record process failed: record:%s, exceptin:%s' % (record, traceback.format_exc())) msg = traceback.format_exc() db.update_new_feature_result(record['bbiid'], 5, msg) if not processed: db.close_connection() time.sleep(2) except Exception as e: logger.error(e) time.sleep(2)
class LogParser: ''' Base Class to parse cron log files ''' def __init__(self, start_time, end_time): #mapping between job name and job_details self.cron_job_meta = {} #mapping between job id and job name self.job_id_dict = {} self.db = DB_Client() results = self.db.execute_sql_command('select * from jobs') for job in results: self.cron_job_meta[job['job_name']] = job self.job_id_dict[job['id']] = job['job_name'] #print('job_meta', self.cron_job_meta) #print('RESULTS', self.job_id_dict) self.start_time = start_time self.end_time = end_time def execute_command(self, command): ''' command = the command to be executed op_file = Contains the output of the command if successfully executed err_file = Contains error msgs based on command execution on remote machine ''' try: process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) return process except subprocess.CalledProcessError: #TO-DO :Handle excpetion #should we retry ? or raise another custom exception pass def get_log_paths(self, job_name, start_time, end_time): ''' For a given job and date range, get a month-wise dict of cron log file to be grepped {'2016-10': '/home/csep/operations/dispatcher/logs/2016_10/dailyANSS1985_2016-10-*'} ''' job_metadata = self.cron_job_meta[job_name] parent_dir = job_metadata['base_path'] + job_metadata['log_dir'] #This will be the list of file paths to search file_dict = {} month_directory = [] #since logs are stored month-wise if start_time.year == end_time.year : if (start_time.month == end_time.month): month_directory.append(start_time) else: #Add all timestamps from start_time to end_time #Get all first days of the month from start_time and end_time st_first_day = start_time.replace(day=1) et_first_day = end_time.replace(day=1) temp_date = et_first_day while temp_date != st_first_day: month_directory.append(temp_date) temp_date = temp_date - timedelta(days=1) temp_date = temp_date.replace(day=1) month_directory.append(temp_date) else: #Add all timestamps from start_time to end_time #Get all first days of the month from start_time and end_time st_first_day = start_time.replace(day=1) et_first_day = end_time.replace(day=1) temp_date = et_first_day while temp_date != st_first_day: month_directory.append(temp_date) temp_date = temp_date - timedelta(days=1) temp_date = temp_date.replace(day=1) month_directory.append(temp_date) for m in month_directory: cur_dir = parent_dir cur_dir += (FILE_SEPARATOR + str(m.year) + MONTH_LOG_SEPARATOR + str(m.month)) file_meta_path = cur_dir + FILE_SEPARATOR + job_metadata['file_base_name'] + str(m.year) + \ LOG_FILE_DATE_SEPARATOR + str(m.month) + LOG_FILE_DATE_SEPARATOR month = str(m.year) + LOG_FILE_DATE_SEPARATOR + str(m.month) file_dict[month] = file_meta_path return file_dict def get_log_files(self, dir_list): ''' Get list of log files from dir_list, ignores directories Ex: i/p ['/home/csep-op/operations/dispatcher/logs/2016_10/dailyANSS1985_2016-10-4-*'] o/p [file1, file2,...] ''' #command = 'ls -p file1,file*' command = "ls -p " + ' '.join(map(str, dir_list)) #command += "*" proc = self.execute_command(command) log_files = [] for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"): log_files.append(line.strip()) return log_files def get_db_entries(self, job_data, start_time, end_time): command = "select log_file_path from job_history where job_id = %s and start_time > %s and end_time < %s" st = start_time.strftime('%Y-%m-%d %H:%M:%S') et = end_time.strftime('%Y-%m-%d %H:%M:%S') query_t = (job_data['id'] ,st, et) results = self.db.execute_sql_command(command, query_t) db_files = [] for lg in results: db_files.append(lg['log_file_path']) return db_files def get_cron_log_files_for_job(self, job_name, start_time, end_time): ''' For given job, get a dict of cron log files between the given set of dates Assumes start_time and end_time are date time objects {'2016-10-21': [log_file_1, log_file_2, log_file_3], '2016-10-22': [log_file_1]} ''' lp = self.get_log_paths(job_name, start_time, end_time) lpv = list(lp.values()) #Add meta-character to input list lpv = [ i+'*' for i in lpv ] #print('meta_month_list', lpv) #TO-DO Filter files from start date to end date log_files = self.get_log_files(lpv) #print('log_files11', log_files) return log_files def filter_logs_by_time_stamp(self, job_name, logs, start_time, end_time): ''' Given log files of the month, filter files based on start time and end time ''' job_metadata = self.cron_job_meta[job_name] base_name = job_metadata['file_base_name'] log_files = [] for file_path in logs: pos = file_path.find(base_name) if pos != -1: ts = file_path[pos+len(base_name):] #ts = '2016-12-9-001001' dt = datetime.strptime(ts, "%Y-%m-%d-%H%M%S") if start_time <= dt and dt <= end_time: log_files.append(file_path) return log_files def get_job_ids(self, job_data): job_ids = [] for job_name, job_details in job_data.items(): job_ids.append(job_details['id']) return job_ids def get_file_metadata(self, files): #command = 'ls -al --full-time' command = "ls -l --full-time " + ' '.join(map(str, files)) #TO-DO Add below files in config settings proc = self.execute_command(command) return proc def create_job_history_db_entries(self, log_entries): command = 'insert ignore into job_history(job_id,status,start_time,end_time,log_file_path)' command += 'values("{job_id}","{status}","{start_time}","{end_time}","{log_file_path}")'.format(job_id=log_entries[0]['job_id'], status=log_entries[0]['status'], start_time=log_entries[0]['start_time'], end_time=log_entries[0]['end_time'], log_file_path=log_entries[0]['file_path']) for entry in log_entries[1:]: command += ',("{job_id}","{status}","{start_time}","{end_time}","{log_file_path}")'.format(job_id=entry['job_id'], status=entry['status'], start_time=entry['start_time'], end_time=entry['end_time'], log_file_path=entry['file_path']) print('Creating Database entries\n', command) results = self.db.execute_sql_command(command) return results def get_job_name_from_log_file(self, log): for job_name,job_details in self.cron_job_meta.items(): if job_details['file_base_name'] in log: if job_details['file_base_name'] == 'global_one_year_': if 'global_one_year_V12.1_' in log: return 'Dispatcher_Global_One_Year_V12.1' elif 'global_one_year_V14.1_' in log: return 'Dispatcher_Global_One_Year_V14.1' else: return job_name else: return job_name return None #FIX-THIS #Use base-file-name and extract timestamp def extract_time_from_file_name(self, file_name): start_time_stamp = file_name.split('_')[-1].split('-') seconds = start_time_stamp[-1] if len(seconds) != 6: return None formatted_seconds_timestamp = seconds[0:2] + ":" + seconds[2:4] + ":" + seconds[4:] t1 = '-'.join(start_time_stamp[:-1]) st = t1 + ' ' + formatted_seconds_timestamp return st def build_log_entries(self, log_files, status): proc = self.get_file_metadata(log_files) log_entries = [] for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"): line = line.strip() file_data = line.split(' ') log_details = {} log_details['file_path'] = file_data[-1] file_name = log_details['file_path'].split('/')[-1] job_name = self.get_job_name_from_log_file(file_name) if job_name is None: continue log_details['start_time'] = self.extract_time_from_file_name(file_name) if log_details['start_time'] is None: continue log_details['end_time'] = file_data[-4] + ' ' + file_data[-3] log_details['job_id'] = self.cron_job_meta[job_name]['id'] log_details['status'] = status log_entries.append(log_details) return log_entries def parse_new_cron_output(self, new_files): #command = 'grep -l "SUCCESS"' command = "grep -l 'SUCCESS' " + ' '.join(map(str, new_files)) #TO-DO Add below files in config settings proc = self.execute_command(command) success_cron_logs = [] for line in io.TextIOWrapper(proc.stdout, encoding="utf-8"): success_cron_logs.append(line.strip()) failure_cron_logs = set(new_files) - set(success_cron_logs) log_entries = [] success_entries = self.build_log_entries(success_cron_logs, 'SUCCESS') failure_entries = self.build_log_entries(list(failure_cron_logs), 'FAILURE') log_entries.extend(success_entries) log_entries.extend(failure_entries) self.create_job_history_db_entries(log_entries) def get_results_from_db(self, start_time, end_time): ''' Get DB entries for given time range and given jobs ''' command = "select * from job_history where job_id in %s and start_time > %s and end_time < %s" st = start_time.strftime('%Y-%m-%d %H:%M:%S') et = end_time.strftime('%Y-%m-%d %H:%M:%S') job_ids = tuple(self.get_job_ids(self.cron_job_meta)) query_t = (job_ids, st, et) results = self.db.execute_sql_command(command, query_t) return results def parse(self): ''' Parse the cron log files within the given time-range ''' remain_files = [] for job_name, job_details in self.cron_job_meta.items(): log_files = self.get_cron_log_files_for_job(job_name, self.start_time, self.end_time) #print('log_files', log_files) logs = self.filter_logs_by_time_stamp(job_name, log_files, self.start_time, self.end_time) print('Current log files :\n', logs) #Get log files from DB db_files = self.get_db_entries({'id': job_details['id']}, self.start_time, self.end_time) print('Log entries in DB :\n', db_files) diff = set(logs) - set(db_files) print('Log file entries to be created : \n', diff) remain_files.extend(list(diff)) if len(remain_files) > 0: #Parse these files and create DB entries #print('remain', remain_files) self.parse_new_cron_output(remain_files) #change start time to start of day st = self.start_time.replace(hour=0, minute=0, second=0, microsecond=0) result = self.get_results_from_db(st, self.end_time) response = self.build_response(result, self.start_time, self.end_time) #print(type(result[0]),'\nRESPONSE', response) return response def build_response(self, result, start_time, end_time): response = {'jobs': {}, 'days': []} #Add all timestamps from start_time to end_time start_date = start_time.date() end_date = end_time.date() temp_date = start_date while temp_date != end_date: response['days'].append(temp_date) temp_date = temp_date + timedelta(days=1) for job_name in self.cron_job_meta.keys(): response['jobs'][job_name] = {} for db_entry in result: job_id = db_entry['job_id'] job_name = self.job_id_dict[job_id] result_date = db_entry['start_time'].date() if result_date in response['jobs'][job_name]: if db_entry['status'] == 'SUCCESS' and response['jobs'][job_name][result_date]['status'] == 'FAILURE': response['jobs'][job_name][result_date] = db_entry else: response['jobs'][job_name][result_date] = db_entry return response
from flask import Flask, render_template, request from db_client import DB_Client import json from main import export_outline_tan, parse_file db = DB_Client() app = Flask(__name__) #实时待下载记录 @app.route('/info') def info(): result = db.get_outline_queue() print(result) db.close_connection() return json.dumps(result) #实时下载完毕记录 @app.route('/finish/<int:id>') def finish(id): result = {'status': 1, 'msg': ''} try: db.del_outline_queue(id) db.close_connection() except Exception as e: result['status'] = -1 result['msg'] = str(e) return json.dumps(result)
def main(): db = DB_Client() logger.info('program started:------------------------------------') while (True): try: records = db.get_queue_records() # logger.info('get %d records'%len(records)) data = {} processed = False for record in records: processed = True logger.info('processing record:' + str(record)) exp_status = False try: data, folder, body_id = process_record(db, record['bbiid']) logger.info('processing result, input:%s, output:%s ' % (record, data)) if data: db.process_result(record['id'], data) exp_status = export_outline_tan( folder, body_id, record['bbiid']) db.insert_outline_queue(record['id'], record['bbiid'], folder, body_id, exp_status) else: db.del_queue(record['id']) except Exception as e: logger.error( 'record process failed: record:%s, exceptin:%s' % (record, str(e))) db.fail_queue(record['id'], traceback.format_exc()) db.del_queue(record['id']) if not processed: db.close_connection() time.sleep(2) except Exception as e: logger.error('exception occured:' + str(e)) time.sleep(2)
def main(): global msg db = DB_Client() logger.info('program started:------------------------------------') sleep_time = 2 while (True): try: record = db.get_new_outline() # logger.info('get %d records'%len(records)) data = {} processed = False if record: processed = True logger.info('processing record:' + str(record)) exp_status = False try: result, front, side, back, f_bd_features, s_bd_features, b_bd_features = process_outline( db, record['bbiid']) logger.info('processing record %s, result:%s' % (record, result)) if result == 4: db.update_new_outline(record['bbiid'], json.dumps(front), json.dumps(side), json.dumps(back), json.dumps(f_bd_features), json.dumps(s_bd_features), json.dumps(b_bd_features)) db.update_new_outline_result(record['bbiid'], result, msg) sleep_time = 2 except Exception as e: logger.error( 'record process failed: record:%s, exception:%s' % (record, traceback.format_exc())) msg = traceback.format_exc() retries = int(record['result']) if retries >= 3: db.update_new_outline_result( record['bbiid'], 5, 'retries:%d, exception trace:%s' % (retries, msg)) else: db.update_new_outline_result( record['bbiid'], retries + 1, 'retries:%d, exception trace:%s' % (retries, msg)) time.sleep(sleep_time) if sleep_time < 20: sleep_time = 2 * sleep_time if not processed: db.close_connection() time.sleep(2) except Exception as e: logger.error(e) time.sleep(20)
def __init__(self, file_path): self.file_path = file_path self.file_name = self.file_path.split('/')[-1].split('.')[0] self.db = DB_Client()