Пример #1
0
def main():
    config_path = 'visio_settings.json'
    pickle_file = 'visio_data.pickle'
    logger.debug('Config: {}'.format(config_path))
    logger.debug('Pickle: {}'.format(pickle_file))

    cnf_data = load_config(config_path)
    logger.debug(cnf_data)

    path = cnf_data['folders']
    filepath = os.path.join(*path)
    filepath = os.path.expanduser(filepath)
    logger.info(filepath)

    if not os.path.exists(filepath):
        logger.critical('Missing Path {}'.format(filepath))
        raise 'missing path'

    data = get_pickle_data(pickle_file)
    if data == {}:
        data['folders'] = path
        data['filter'] = cnf_data['filter']
        data['files'] = []
        data['hashes'] = {}

    for f in scanFiles(filepath):
        filename, file_extension = os.path.splitext(f['file'])

        if file_extension == cnf_data['filter']:
            logger.info('Scanning filename: {}{}'.format(
                filename, file_extension))

            f_path = os.path.normpath(os.path.join(f['folder'], f['file']))
            file_hash = make_hash(f_path)
            file_hash_sha = file_hash['SHA1']

            if not file_hash_sha in data['hashes']:
                data['hashes'][file_hash_sha] = []

            if not f_path in data['hashes'][file_hash_sha]:
                logger.info('\tadding hash {}'.format(file_hash_sha))
                data['hashes'][file_hash_sha].append(f_path)

            scan_file = True
            scan_idx = -1
            for s in data['files']:
                scan_idx += 1
                s_path = os.path.normpath(os.path.join(s['folder'], s['file']))
                if f_path == s_path:
                    if s['hash']['SHA1'] == file_hash_sha:
                        # file has been scanned and has not changed by hash
                        logger.info('\tfile already scanned!')
                        scan_file = False

                    else:
                        logging.info('\tFile is out of date, updating!')
                        if not 'archives' in data:
                            data['archives'] = []
                        data['archives'].append(s)
                        del data['files'][scan_idx]

                    # we found the file... so no need to search anymore
                    break

            if not scan_file:
                continue

            dwg_file = process_visiofile(f_path)

            # add the file details...
            dwg_file['folder'] = f['folder']
            dwg_file['file'] = f['file']
            dwg_file['modified'] = parse(f['modified'])
            dwg_file['accessed'] = parse(f['accessed'])
            dwg_file['size'] = f['size']
            dwg_file['hash'] = file_hash
            dwg_file['GUID'] = uuid.uuid4()

            data['files'].append(dwg_file)

    save_pickle_data(data, pickle_file)
Пример #2
0
from getFiles import scanFiles, get_info, get_pickle_data, save_pickle_data
import os

for f in scanFiles('./'):

    filename, file_extension = os.path.splitext(f['file'])
    if file_extension == '.pickle':
        print(f, filename, file_extension)
        data = get_pickle_data(f['file'])
        for item in data:
            print(item, len(data[item]))

        step = 1
        interval = len(data['files']) // 10
        for f in data['files']:
            step += 1
            #if step % interval == 0:
            #if 'thunderbird' in f['folder'].lower():
            if '.zip' in f['file'].lower():
                print(f)
Пример #3
0
        rt_row['name'] = rt['name']
        rt_row['recordTypeId'] = rt['recordTypeId']

        rt_rows.append(rt_row)

    data['record_type'] = rt_rows

    return data


config_path = r'sf.secrets.json'
instance = 'KaptioStaging'
config_data = get_config(config_path, instance)

pickle_file = 'salesforce_data.pickle'
data = get_pickle_data(pickle_file)
if not instance in data:
    data[instance] = {}

sf = connect_sf(config_data)

# save the runtime details
data[instance] = get_metadata(sf)

for obj_name in data[instance]:
    row = data[instance][obj_name]
    result = get_object_metadata(sf, row['name'])

    data[instance][obj_name] = {
        **row,
        **result, 'scanned_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
Пример #4
0
from getFiles import scanFiles, get_info, get_pickle_data, save_pickle_data
from collections import defaultdict
import os
import logging
from logging.config import fileConfig

fileConfig('logging_config.ini')
logger = logging.getLogger(__name__)

files_found = 0
picklename = "{}.pickle".format("scan_files")
data = get_pickle_data(picklename=picklename)

if not data.get("extension"):
    data_ext = defaultdict(list)
    data_files = defaultdict(list)

    for f in scanFiles('c:\\'):
        files_found += 1
        filename, file_extension = os.path.splitext(f['file']) 
        data_files[f['file']].append(f)
        data_ext[file_extension].append(f)

    logger.info("files scanned:{}".format(files_found))

    data = {}
    data['extension'] = data_ext
    data['files'] = data_files

    save_pickle_data(data=data, picklename=picklename)
    logger.info("Data Saved:{}".format(picklename))