示例#1
0
def upload_unuploaded_rows(rds_connection, local_connection, table_name):
    dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
    cloud_pkey_name = rds_connection.get_primary_key_name(table_name)
    cloud_columns = rds_connection.get_column_names(table_name)
    local_columns = local_connection.get_column_names(table_name)
    local_columns = [item[0] for item in local_columns]
    local_pkey_name = local_connection.get_primary_key_name(table_name)
    next_unuploaded_pkey = get_local_next_unuploaded_pkey(
        local_connection, table_name, local_pkey_name)

    while (not (next_unuploaded_pkey == -1)):
        col_names = "dep_id"
        val_list = "\'" + str(dep_id) + "\',"
        for column in cloud_columns:
            if ((column[0] == cloud_pkey_name) or (column[0] == 'dep_id')
                    or not (column[0] in local_columns)):
                continue
            col_names += (',' + column[0])
            val = local_connection.get_rows_with_value(column[0], table_name,
                                                       local_pkey_name,
                                                       next_unuploaded_pkey)
            val = val[0][0]
            val_list += "\'" + str(val) + "\',"
        val_list = val_list[:-1]

        res = rds_connection.insert_row(table_name, col_names, val_list)
        res = local_connection.set_column(table_name, local_pkey_name,
                                          next_unuploaded_pkey, 'Uploaded',
                                          '1')
        next_unuploaded_pkey = get_local_next_unuploaded_pkey(
            local_connection, table_name, local_pkey_name)

    print("No (more) data to upload")
示例#2
0
def read_file(f):
    try:
        root_dir=file_system_tasks.get_project_dir(-3)
        path=root_dir[0:-1]+file_system_tasks.get_parameters('parameters.json')['param']['m2g_log_dir']+'/'
    except Exception as e:
        print(e)
    with open(path+f, 'r') as file:
        lines = file.readlines()
    return lines
示例#3
0
def insert_heart_beat(rds_connection):
    try:
        dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
        ts = time.time()
        values = str(dep_id) + ',' + str(int(ts))
        res = rds_connection.insert_row(table_name, col_names, values)
    except:
        print('exception when inserting heart beat')
        return -1
    return res
示例#4
0
def get_start_date():
    start_date = -1
    try:
        project_dir = file_system_tasks.get_project_dir(-3)
        start_time = get_start_time(project_dir) / 1000.0
        start_date = datetime.datetime.fromtimestamp(start_time).strftime(
            '%Y-%m-%d')
    except:
        print('exception when getting start date')
    return start_date
示例#5
0
def upload_zip_file():
    project_dir = file_system_tasks.get_project_dir(level_up=-4)
    parent_dir = file_system_tasks.get_project_dir(level_up=-5)
    out_file = parent_dir + project_dir.split('/')[-2] + ".zip"
    print("creating .zip file....")

    file_paths = get_all_file_paths(project_dir)
    # writing files to a zipfile
    with ZipFile(out_file, 'w', allowZip64=True) as zip:
        # writing each file one by one
        for file in file_paths:
            zip.write(file)

    #shutil.make_archive(out_file, 'zip', project_dir,verbose=True)
    print("done")
    s3.get_bucket()
    print("uploading .zip file to cloud")
    s3.upload_file(out_file, "project_dir", is_progress=True)
    print("done")
示例#6
0
def get_local_files():
    paths = file_system_tasks.get_parameters(
        'parameters.json')['param']['s3_upload_dirs'].split(',')
    project_dir = file_system_tasks.get_project_dir(-3)[:-1]
    paths = [project_dir + path for path in paths]
    file_list = []
    for path in paths:
        files = [(path + "/" + f) for f in listdir(path)
                 if isfile(join(path, f))]
        file_list += files
    return file_list
示例#7
0
def get_sorted_file_names():
    try:
        root_dir=file_system_tasks.get_project_dir(-3)
        path=root_dir[0:-1]+file_system_tasks.get_parameters('parameters.json')['param']['m2g_log_dir']
    except Exception as e:
        print(e)
    try:
        file_names = [f for f in listdir(path) if isfile(join(path, f))]
        file_names.sort(reverse=False)
    except Exception as e:
        print(e)
        return -1
    return file_names
示例#8
0
def upload_all_rows(local_connection, rds_connection, table_name):
    dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
    try:
        rows = local_connection.get_all_rows(table_name)
        for row in rows:
            insert_row_to_cloud(local_connection, rds_connection, table_name,
                                row, dep_id)
        print('done')

    except Exception as e:
        print(e)
        return -1
    return 0
示例#9
0
def insert_missing_data(rds_connection,local_connection,table_name,missing_table_name):
    res=-1
    try:
        dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))   
        cloud_count=rds_connection.get_num_rows(table_name,dep_id)
        local_count=local_connection.get_num_rows(table_name)
        col_names='dep_id,ts,local_count,cloud_count'
        ts=str(datetime.datetime.fromtimestamp(time.time()))
        values="\'"+str(dep_id)+"\'," +"\'"+ str(ts)+"\'," + "\'"+str(local_count)+"\',"+"\'"+str(cloud_count)+"\'"
        res=rds_connection.insert_row(missing_table_name,col_names,values) 
    except:
        print('Exception in insert_missing_data')
    return res
示例#10
0
def get_s3_files():
    s3.get_bucket()
    dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
    all_objects = s3.list_items()

    dep_items = []
    for name in all_objects:
        ar = name.split('/')
        if (len(ar) > 2 and len(ar[-1]) > 0):
            if (str(ar[0]) == str(dep_id)):
                dep_items.append(ar[1:])
    cloud_files = get_paths(dep_items)
    return cloud_files
示例#11
0
def upoload_missing_data_ts(rds_connection, local_connection, table_name):
    try:
        dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
        start_date = dep_data.get_start_date()

        print('uploading data. ' + table_name)

        cloud_unique_ts_list = rds_connection.get_unique_row_list(
            table_name, 'ts', dep_id)
        local_uniqie_ts_list = local_connection.get_unique_row_list(
            table_name, 'ts')
        local_uniqie_ts_list = [
            ts for ts in local_uniqie_ts_list if str(ts)[0:15] > start_date
        ]
        cloud_unique_ts_list.sort()
        if (len(cloud_unique_ts_list) > 2):
            final_cloud_ts = cloud_unique_ts_list[-2]
            selected_local_unique_ts_list = [
                ts for ts in local_uniqie_ts_list if (ts > final_cloud_ts)
            ]
        else:
            selected_local_unique_ts_list = local_uniqie_ts_list
        for ts in selected_local_unique_ts_list:
            print(ts)
            ts_upload = False
            if (ts in cloud_unique_ts_list):

                num_cloud = rds_connection.get_num_rows_with_value(
                    table_name, 'ts', ts, dep_id)
                num_local = local_connection.get_num_rows_with_value(
                    table_name, 'ts', ts)

                if (num_local > num_cloud):
                    ts_upload = True
            else:
                ts_upload = True
            if (ts_upload):
                #delete_rows_with_value(conn_cloud,cursor_cloud,table_name,col_name,ts)
                #upload all rows with this ts
                rows = local_connection.get_rows_with_value(
                    -1, table_name, 'ts', ts)
                print('uploading ' + str(len(rows)) + ' rows')
                for i, row in enumerate(rows):
                    res = insert_row_to_cloud(local_connection, rds_connection,
                                              table_name, row, dep_id)
                    if (res == -1):
                        print('did not upload...')
        print('finished uploading data.')
    except Exception as e:
        print(e)
示例#12
0
def insert_missing_files_row(rds_connection):
    res=-1
    try:
        dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
        local_files=s3_upload.get_local_files()
        cloud_files=s3_upload.get_s3_files()
        missing_files=s3_upload.list_diff(local_files,cloud_files)
        col_names='dep_id,ts,local_count,cloud_count,missing'
        ts=str(datetime.datetime.fromtimestamp(time.time()))
        values="\'"+str(dep_id)+"\'," +"\'"+ str(ts)+"\'," + "\'"+str(len(local_files))+"\',"+"\'"+str(len(cloud_files))+"\',"+"\'"+str(len(missing_files))+"\'"
        res=rds_connection.insert_row('missing_files',col_names,values)
    except:
        print('exception when inserting to missing_files')
    return res
示例#13
0
def insert_missing_M2G(rds_connection):
    res=-1
    try:
        file_names=m2g.get_sorted_file_names()
        local_count=0
        if(isinstance(file_names,list)):
            for file in file_names:
                lines=m2g.read_file(file)
                local_count+=len(lines)
        dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))   
        cloud_count=rds_connection.get_num_rows('M2G',dep_id)
        col_names='dep_id,ts,local_count,cloud_count'
        ts=str(datetime.datetime.fromtimestamp(time.time()))
        values="\'"+str(dep_id)+"\'," +"\'"+ str(ts)+"\'," + "\'"+str(local_count)+"\',"+"\'"+str(cloud_count)+"\'"
        res=rds_connection.insert_row('missing_M2G',col_names,values) 
    except:
        print('Exception in insert_missing_M2G')
    return res
示例#14
0
def create_report_file():
    report_file=file_system_tasks.get_project_dir(-3)+'generated_data/report.txt'
    local_files=s3_upload.get_local_files()
    cloud_files=s3_upload.get_s3_files()
    
    report=[]
    for file in local_files:
        splt=file.split('/')[-2:]
        local_file=splt[0]+'/'+splt[1]
        in_cloud=local_file in cloud_files
        report.append(local_file+' - ' + str(in_cloud))
    
    #remove the file
    if(os.path.exists(report_file)):
        os.remove(report_file)
    #write new data to file     
    with open(report_file, 'w') as f:
        for item in report:
            f.write("%s\n" % item)
示例#15
0
def upload_dep_data_table(rds_connection):
    project_path = file_system_tasks.get_project_dir(-3)[:-1]
    try:
        local_con = sqlite3.connect(project_path + "/" +
                                    '/DeploymentInformation.db')
    except:
        print('cannot connect to local DB')
        return -1
    try:
        rows = rds_connection.get_all_rows("DEPLOYMENT_DATA")
        local_cols = get_local_columns(local_con, "DEPLOYMENT_DATA")
        local_cols = [str(col[1]) for col in local_cols]

        cloud_cols = rds_connection.get_column_names("DEPLOYMENT_DATA")
        cloud_cols = [col[0] for col in cloud_cols]

        cursorObj = local_con.cursor()
        cursorObj.execute('SELECT * from  DEPLOYMENT_DATA')
        rows = cursorObj.fetchall()
    except:
        print(
            'Error when reading from databases. upload_dep_data_table() in dep_data.py'
        )
        return -1

    str_cols = ""
    for i, col in enumerate(local_cols):
        if (col in cloud_cols):
            str_cols += (col + ",")
    str_cols = str_cols[:-1]

    for row in rows:
        str_row = ""
        for i, item in enumerate(row):
            if (local_cols[i] in cloud_cols):
                str_row += ("\'" + str(item) + "\',")
        str_row = str_row[:-1]
        res = rds_connection.insert_row('DEPLOYMENT_DATA', str_cols, str_row)
        if (res != 1):
            print('error inserting row')
示例#16
0
def upload_file(file_name, dir_name, is_progress=False):
    name = file_name.split('/')[-1]
    try:
        dep_id = dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
        key = str(dep_id) + '/' + str(dir_name) + '/' + str(name)
        if (is_progress):
            res = pcr_storage.upload_file(
                Filename=file_name,
                Key=key,
                Callback=ProgressPercentage(file_name))
        else:
            res = pcr_storage.upload_file(Filename=file_name, Key=key)
        short_file = file_name.split('/')[-1]
        Log.log_s3('uploaded ' + dir_name + '/' + short_file)
        #compare checksums
        #print('checking checksum....')
        same = is_checksum_ok(file_name, key)
        if (not same):
            log_entry = 'checksum failed after uploading ' + dir_name + '/' + short_file + ' in upload_file of s3_functions exception=' + str(
                e)
            Log.log_s3(log_entry)
            #delete the file in cloud
            response = pcr_storage.delete_objects(
                Delete={'Objects': [{
                    'Key': key
                }]})
            return -1
        else:
            Log.log_s3('checksum success ' + dir_name + '/' + short_file)
            return 0
    except Exception as e:
        print(str(e))
        short_file = file_name.split('/')[-1]
        log_entry = 'exception uploading ' + dir_name + '/' + short_file + ' in upload_file of s3_functions exception=' + str(
            e)
        Log.log_s3(log_entry)
        return -1
示例#17
0
def get_rds_log_path():
    rds_log_file = file_system_tasks.get_project_dir(
        -3) + 'generated_data/cloud_logs/rds_log.txt'
    return rds_log_file
示例#18
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Nov  6 10:52:22 2019

@author: sleek_eagle
"""
import file_system_tasks
from os import listdir
from os.path import isfile, join
import dep_data


dep_id=dep_data.get_dep_id(file_system_tasks.get_project_dir(-3))
col_names=file_system_tasks.get_parameters('parameters.json')['param']['m2g_fields']

#read the log file
def read_file(f):
    try:
        root_dir=file_system_tasks.get_project_dir(-3)
        path=root_dir[0:-1]+file_system_tasks.get_parameters('parameters.json')['param']['m2g_log_dir']+'/'
    except Exception as e:
        print(e)
    with open(path+f, 'r') as file:
        lines = file.readlines()
    return lines


def get_sorted_file_names():
    try:
        root_dir=file_system_tasks.get_project_dir(-3)