def upload_unuploaded_rows(rds_connection, local_connection, table_name): dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) cloud_pkey_name = rds_connection.get_primary_key_name(table_name) cloud_columns = rds_connection.get_column_names(table_name) local_columns = local_connection.get_column_names(table_name) local_columns = [item[0] for item in local_columns] local_pkey_name = local_connection.get_primary_key_name(table_name) next_unuploaded_pkey = get_local_next_unuploaded_pkey( local_connection, table_name, local_pkey_name) while (not (next_unuploaded_pkey == -1)): col_names = "dep_id" val_list = "\'" + str(dep_id) + "\'," for column in cloud_columns: if ((column[0] == cloud_pkey_name) or (column[0] == 'dep_id') or not (column[0] in local_columns)): continue col_names += (',' + column[0]) val = local_connection.get_rows_with_value(column[0], table_name, local_pkey_name, next_unuploaded_pkey) val = val[0][0] val_list += "\'" + str(val) + "\'," val_list = val_list[:-1] res = rds_connection.insert_row(table_name, col_names, val_list) res = local_connection.set_column(table_name, local_pkey_name, next_unuploaded_pkey, 'Uploaded', '1') next_unuploaded_pkey = get_local_next_unuploaded_pkey( local_connection, table_name, local_pkey_name) print("No (more) data to upload")
def read_file(f): try: root_dir=file_system_tasks.get_project_dir(-3) path=root_dir[0:-1]+file_system_tasks.get_parameters('parameters.json')['param']['m2g_log_dir']+'/' except Exception as e: print(e) with open(path+f, 'r') as file: lines = file.readlines() return lines
def insert_heart_beat(rds_connection): try: dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) ts = time.time() values = str(dep_id) + ',' + str(int(ts)) res = rds_connection.insert_row(table_name, col_names, values) except: print('exception when inserting heart beat') return -1 return res
def get_start_date(): start_date = -1 try: project_dir = file_system_tasks.get_project_dir(-3) start_time = get_start_time(project_dir) / 1000.0 start_date = datetime.datetime.fromtimestamp(start_time).strftime( '%Y-%m-%d') except: print('exception when getting start date') return start_date
def upload_zip_file(): project_dir = file_system_tasks.get_project_dir(level_up=-4) parent_dir = file_system_tasks.get_project_dir(level_up=-5) out_file = parent_dir + project_dir.split('/')[-2] + ".zip" print("creating .zip file....") file_paths = get_all_file_paths(project_dir) # writing files to a zipfile with ZipFile(out_file, 'w', allowZip64=True) as zip: # writing each file one by one for file in file_paths: zip.write(file) #shutil.make_archive(out_file, 'zip', project_dir,verbose=True) print("done") s3.get_bucket() print("uploading .zip file to cloud") s3.upload_file(out_file, "project_dir", is_progress=True) print("done")
def get_local_files(): paths = file_system_tasks.get_parameters( 'parameters.json')['param']['s3_upload_dirs'].split(',') project_dir = file_system_tasks.get_project_dir(-3)[:-1] paths = [project_dir + path for path in paths] file_list = [] for path in paths: files = [(path + "/" + f) for f in listdir(path) if isfile(join(path, f))] file_list += files return file_list
def get_sorted_file_names(): try: root_dir=file_system_tasks.get_project_dir(-3) path=root_dir[0:-1]+file_system_tasks.get_parameters('parameters.json')['param']['m2g_log_dir'] except Exception as e: print(e) try: file_names = [f for f in listdir(path) if isfile(join(path, f))] file_names.sort(reverse=False) except Exception as e: print(e) return -1 return file_names
def upload_all_rows(local_connection, rds_connection, table_name): dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) try: rows = local_connection.get_all_rows(table_name) for row in rows: insert_row_to_cloud(local_connection, rds_connection, table_name, row, dep_id) print('done') except Exception as e: print(e) return -1 return 0
def insert_missing_data(rds_connection,local_connection,table_name,missing_table_name): res=-1 try: dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) cloud_count=rds_connection.get_num_rows(table_name,dep_id) local_count=local_connection.get_num_rows(table_name) col_names='dep_id,ts,local_count,cloud_count' ts=str(datetime.datetime.fromtimestamp(time.time())) values="\'"+str(dep_id)+"\'," +"\'"+ str(ts)+"\'," + "\'"+str(local_count)+"\',"+"\'"+str(cloud_count)+"\'" res=rds_connection.insert_row(missing_table_name,col_names,values) except: print('Exception in insert_missing_data') return res
def get_s3_files(): s3.get_bucket() dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) all_objects = s3.list_items() dep_items = [] for name in all_objects: ar = name.split('/') if (len(ar) > 2 and len(ar[-1]) > 0): if (str(ar[0]) == str(dep_id)): dep_items.append(ar[1:]) cloud_files = get_paths(dep_items) return cloud_files
def upoload_missing_data_ts(rds_connection, local_connection, table_name): try: dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) start_date = dep_data.get_start_date() print('uploading data. ' + table_name) cloud_unique_ts_list = rds_connection.get_unique_row_list( table_name, 'ts', dep_id) local_uniqie_ts_list = local_connection.get_unique_row_list( table_name, 'ts') local_uniqie_ts_list = [ ts for ts in local_uniqie_ts_list if str(ts)[0:15] > start_date ] cloud_unique_ts_list.sort() if (len(cloud_unique_ts_list) > 2): final_cloud_ts = cloud_unique_ts_list[-2] selected_local_unique_ts_list = [ ts for ts in local_uniqie_ts_list if (ts > final_cloud_ts) ] else: selected_local_unique_ts_list = local_uniqie_ts_list for ts in selected_local_unique_ts_list: print(ts) ts_upload = False if (ts in cloud_unique_ts_list): num_cloud = rds_connection.get_num_rows_with_value( table_name, 'ts', ts, dep_id) num_local = local_connection.get_num_rows_with_value( table_name, 'ts', ts) if (num_local > num_cloud): ts_upload = True else: ts_upload = True if (ts_upload): #delete_rows_with_value(conn_cloud,cursor_cloud,table_name,col_name,ts) #upload all rows with this ts rows = local_connection.get_rows_with_value( -1, table_name, 'ts', ts) print('uploading ' + str(len(rows)) + ' rows') for i, row in enumerate(rows): res = insert_row_to_cloud(local_connection, rds_connection, table_name, row, dep_id) if (res == -1): print('did not upload...') print('finished uploading data.') except Exception as e: print(e)
def insert_missing_files_row(rds_connection): res=-1 try: dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) local_files=s3_upload.get_local_files() cloud_files=s3_upload.get_s3_files() missing_files=s3_upload.list_diff(local_files,cloud_files) col_names='dep_id,ts,local_count,cloud_count,missing' ts=str(datetime.datetime.fromtimestamp(time.time())) values="\'"+str(dep_id)+"\'," +"\'"+ str(ts)+"\'," + "\'"+str(len(local_files))+"\',"+"\'"+str(len(cloud_files))+"\',"+"\'"+str(len(missing_files))+"\'" res=rds_connection.insert_row('missing_files',col_names,values) except: print('exception when inserting to missing_files') return res
def insert_missing_M2G(rds_connection): res=-1 try: file_names=m2g.get_sorted_file_names() local_count=0 if(isinstance(file_names,list)): for file in file_names: lines=m2g.read_file(file) local_count+=len(lines) dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) cloud_count=rds_connection.get_num_rows('M2G',dep_id) col_names='dep_id,ts,local_count,cloud_count' ts=str(datetime.datetime.fromtimestamp(time.time())) values="\'"+str(dep_id)+"\'," +"\'"+ str(ts)+"\'," + "\'"+str(local_count)+"\',"+"\'"+str(cloud_count)+"\'" res=rds_connection.insert_row('missing_M2G',col_names,values) except: print('Exception in insert_missing_M2G') return res
def create_report_file(): report_file=file_system_tasks.get_project_dir(-3)+'generated_data/report.txt' local_files=s3_upload.get_local_files() cloud_files=s3_upload.get_s3_files() report=[] for file in local_files: splt=file.split('/')[-2:] local_file=splt[0]+'/'+splt[1] in_cloud=local_file in cloud_files report.append(local_file+' - ' + str(in_cloud)) #remove the file if(os.path.exists(report_file)): os.remove(report_file) #write new data to file with open(report_file, 'w') as f: for item in report: f.write("%s\n" % item)
def upload_dep_data_table(rds_connection): project_path = file_system_tasks.get_project_dir(-3)[:-1] try: local_con = sqlite3.connect(project_path + "/" + '/DeploymentInformation.db') except: print('cannot connect to local DB') return -1 try: rows = rds_connection.get_all_rows("DEPLOYMENT_DATA") local_cols = get_local_columns(local_con, "DEPLOYMENT_DATA") local_cols = [str(col[1]) for col in local_cols] cloud_cols = rds_connection.get_column_names("DEPLOYMENT_DATA") cloud_cols = [col[0] for col in cloud_cols] cursorObj = local_con.cursor() cursorObj.execute('SELECT * from DEPLOYMENT_DATA') rows = cursorObj.fetchall() except: print( 'Error when reading from databases. upload_dep_data_table() in dep_data.py' ) return -1 str_cols = "" for i, col in enumerate(local_cols): if (col in cloud_cols): str_cols += (col + ",") str_cols = str_cols[:-1] for row in rows: str_row = "" for i, item in enumerate(row): if (local_cols[i] in cloud_cols): str_row += ("\'" + str(item) + "\',") str_row = str_row[:-1] res = rds_connection.insert_row('DEPLOYMENT_DATA', str_cols, str_row) if (res != 1): print('error inserting row')
def upload_file(file_name, dir_name, is_progress=False): name = file_name.split('/')[-1] try: dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) key = str(dep_id) + '/' + str(dir_name) + '/' + str(name) if (is_progress): res = pcr_storage.upload_file( Filename=file_name, Key=key, Callback=ProgressPercentage(file_name)) else: res = pcr_storage.upload_file(Filename=file_name, Key=key) short_file = file_name.split('/')[-1] Log.log_s3('uploaded ' + dir_name + '/' + short_file) #compare checksums #print('checking checksum....') same = is_checksum_ok(file_name, key) if (not same): log_entry = 'checksum failed after uploading ' + dir_name + '/' + short_file + ' in upload_file of s3_functions exception=' + str( e) Log.log_s3(log_entry) #delete the file in cloud response = pcr_storage.delete_objects( Delete={'Objects': [{ 'Key': key }]}) return -1 else: Log.log_s3('checksum success ' + dir_name + '/' + short_file) return 0 except Exception as e: print(str(e)) short_file = file_name.split('/')[-1] log_entry = 'exception uploading ' + dir_name + '/' + short_file + ' in upload_file of s3_functions exception=' + str( e) Log.log_s3(log_entry) return -1
def get_rds_log_path(): rds_log_file = file_system_tasks.get_project_dir( -3) + 'generated_data/cloud_logs/rds_log.txt' return rds_log_file
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Nov 6 10:52:22 2019 @author: sleek_eagle """ import file_system_tasks from os import listdir from os.path import isfile, join import dep_data dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3)) col_names=file_system_tasks.get_parameters('parameters.json')['param']['m2g_fields'] #read the log file def read_file(f): try: root_dir=file_system_tasks.get_project_dir(-3) path=root_dir[0:-1]+file_system_tasks.get_parameters('parameters.json')['param']['m2g_log_dir']+'/' except Exception as e: print(e) with open(path+f, 'r') as file: lines = file.readlines() return lines def get_sorted_file_names(): try: root_dir=file_system_tasks.get_project_dir(-3)