Python Files примеры использования

Язык программирования: Python

Пространство имен/Пакет: apm.classes.files

Класс/Тип: Files

Примеров на hotexamples.com: 7

Python Files - 7 примеров найдено. Это лучшие примеры Python кода для apm.classes.files.Files, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Files(7)

empty_dir(3)

is_same_file(2)

get_file_by_uuid(1)

rename_file(1)

save_env(1)

strip_name(1)

Пример #1

Показать файл

Файл: rename.py Проект: michaeltg12/ADC_Reproc_Toolbox

    def run(self):
        manager = PluginManager()
        config = self.config
        f = Files(config)
        cwd = os.getcwd()
        stage = config['stage']
        collection = dir_pattern(3).format(stage, config['job'], 'collection')

        # Make sure files are supposed to be renamed
        if config['rename'] == False:
            return config, self.files

        # Verify there are no file collisions
        if self.check_for_collisions():
            return config, self.files

        # Strip the ARM prefix from all of the files
        print("\nStripping ARM prefix from files... ", end="")
        sys.stdout.flush()

        manager.callPluginCommand('hook_rename_preprocess', {'config': config})

        os.chdir(collection)
        sites = set(os.listdir('.'))
        for site in sites:
            os.chdir(site)
            instruments = set(os.listdir('.'))
            for ins in instruments:
                os.chdir(ins)
                files = set(os.listdir('.'))
                for i in files:
                    new_name = f.rename_file(i)
                    if new_name != None:
                        if i != new_name:
                            self.files[site][ins][new_name] = self.files[site][
                                ins][i]
                            self.files[site][ins].pop(i)

                        self.files[site][ins][new_name][
                            'current_name'] = new_name
                        self.files[site][ins][new_name][
                            'stripped_name'] = new_name

                os.chdir('..')

            os.chdir('..')

        manager.callPluginCommand('hook_renamed_files_alter',
                                  {'config': config})

        print("Done\n")
        sys.stdout.flush()

        return config, self.files

Пример #2

Показать файл

Файл: rename.py Проект: michaeltg12/ADC_Reproc_Toolbox

    def check_for_collisions(self):
        """ Check all unpacked files for file naming collisions """
        print("Checking for file naming collisions...", end="")
        sys.stdout.flush()

        config = self.config
        f = Files(config, self.files)
        cwd = os.getcwd()
        collection = dir_pattern(3).format(config['stage'], config['job'],
                                           'collection')
        os.chdir(collection)

        sites = os.listdir('.')
        for site in sites:
            os.chdir(site)
            instruments = set(os.listdir('.'))
            for ins in instruments:
                os.chdir(ins)
                files = set(os.listdir('.'))
                names = self.files[site][ins]

                # Mark files as deleted
                for k, v in names.items():
                    if k not in files:
                        names[k]['deleted'] = True

                # Check for duplicates
                for k, v in names.items():
                    if len(v['duplicate_files']) > 0 and v['deleted'] == False:
                        for i in v['duplicate_files']:
                            name = f.get_file_by_uuid(i)
                            if names[name]['uuid'] == i and names[name][
                                    'deleted'] == False:
                                config['duplicates'] = True
                                print("Fail")
                                print(
                                    "Files with naming collisions still exist.\nPlease resolve these issues before continuing.\n"
                                )
                                return True

                os.chdir('..')

            os.chdir('..')

        os.chdir(cwd)
        config['duplicates'] = False
        print("Done")
        sys.stdout.flush()
        return False

Пример #3

Показать файл

Файл: cleanup.py Проект: michaeltg12/ADC_Reproc_Toolbox

    def run(self):
        """ Run the cleanup portion of the cleanup phase """
        if self.config['cleanup_status']['archive']['status'] != True:
            print("Data files must be archived before they can be cleaned up.")
            self.config['exit'] = True
            return self.config, self.files

        stage = self.config['stage']
        job = self.config['job']

        ################################################################################
        # Update local archive database
        ################################################################################
        if not self.config['cleanup_status']['cleanup']['files_archived']:
            print("Updating local copy of the archive...", end="")
            # Setup the datastreams to update
            datastreams = []
            datastream_path = dir_pattern(3).format(stage, job, 'datastream')
            for site in os.listdir(datastream_path):
                path = dir_pattern().format(datastream_path, site)
                for folder in os.listdir(path):
                    abs_folder = dir_pattern().format(path, folder)
                    if os.path.isdir(
                            abs_folder) and not os.path.islink(abs_folder):
                        datastreams.append(folder)

            # Update the local copy of the archive db
            if not DEVEL:
                update_archive(datastreams)

            print("Done")
            ################################################################################
            # Verify that all files to be added to the archive, were added
            ################################################################################
            print(
                "Verifying processed and bundled files have been archived...",
                end="")
            cwd = os.getcwd()

            archive_files = {}
            db_file = '/apps/ds/conf/datainv/.db_connect'
            alias = 'inv_read'

            if not os.path.exists(db_file):
                print("Failed")
                print(
                    "Unable to connect to the archive database. Please try again later."
                )
                self.config['exit'] = True
                return self.config, self.files

            db = DB(self.config, db_file=db_file, alias=alias)

            # Store the query
            query = "SELECT * FROM get_remote_files_by_tag('%s') WHERE file_stamp >= %d AND file_stamp <= %d AND file_active = true ORDER BY file_stamp, file_version;"

            # List the column names so the values can be mapped in a dictionary
            cols = [
                'file_tag', 'file_name', 'file_version', 'file_size',
                'file_stored', 'file_md5', 'file_stamp', 'file_checked',
                'file_active'
            ]

            # convert the start and end dates to a unix timestamp
            start = convert_date_to_timestamp(self.config['begin'])
            end = convert_date_to_timestamp(self.config['end'])

            archive_file = dir_pattern(3).format(stage, job,
                                                 'current_archive.json')
            fp = open(archive_file, 'r')
            oArch = json.loads(fp.read())
            fp.close()
            del fp

            os.chdir(datastream_path)
            for site in os.listdir('.'):
                path = dir_pattern().format(datastream_path, site)
                os.chdir(site)

                for folder in os.listdir('.'):
                    os.chdir(folder)

                    args = (folder, start, end)
                    result = db.query(query % args, columns=cols)

                    for f in os.listdir('.'):
                        if not os.path.isdir(dir_pattern().format(
                                os.getcwd(), f)):
                            try:
                                new_version = next(d['file_version']
                                                   for d in result
                                                   if d['file_name'] == f)
                                old_version = next(o['file_version']
                                                   for o in oArch[folder]
                                                   if o['file_name'] == f)
                                if not new_version > old_version:
                                    print("Failed")
                                    print(
                                        "Not all files have been successfully archived. Please try again later."
                                    )
                                    self.config['exit'] = True
                                    return self.config, self.files
                            except StopIteration:
                                pass

                    os.chdir('..')
                os.chdir('..')

            os.chdir(cwd)
            self.config['cleanup_status']['cleanup']['files_archived'] = True
            print("Done")

        ################################################################################
        # Remove all files from `<job>/datastream`
        ################################################################################
        if not self.config['cleanup_status']['cleanup']['files_cleaned_up']:
            print("Cleaning up project files...", end="")
            # Remove archive.json
            # Remove current_archive.json
            # Remove <job>.deletion-list.txt

            f = Files(self.config)
            path = dir_pattern().format(stage, job)
            delete = [
                "datastream",
                "collection",
                "file_comparison/raw",
                "file_comparison/tar",
                'archive.json',
                'current_archive.json',
                '%s.deletion-list.txt' % job,
            ]

            try:
                for i in delete:
                    item = dir_pattern().format(path, i)
                    if os.path.exists(item):
                        if os.path.isdir(item):
                            f.empty_dir(item)
                        elif os.path.isfile(item):
                            os.remove(item)

            except:
                print("Failed")
                print(
                    "Unable to cleanup all files. Please try again, or cleanup project manually."
                )
                self.config['exit'] = True
                return self.config, self.files

            print("Done")
            self.config['cleanup_status']['cleanup']['files_cleaned_up'] = True

        self.config['cleanup_status']['cleanup']['status'] = True
        return self.config, self.files

Пример #4

Показать файл

    def run(self):
        """ Run the remove portion of the cleanup phase """
        self.start_time = datetime.now()
        if not self.config['cleanup_status']['review']['status']:
            print(
                "\nData must be reviewed before it can be removed from the archive."
            )
            self.config['exit'] = True
            return self.config, self.files

        stage = self.config['stage']
        job = self.config['job']

        del_file = '%s.deletion-list.txt' % job
        job_folder = dir_pattern().format(stage, job)

        exists = False
        replace = False

        # Check to see if deletion file exists
        if os.path.exists(dir_pattern().format(job_folder, del_file)):
            exists = True
            ui = UI()
            replace = ui.yn_choice(
                '%s already exists.\n Would you like to overwrite this file?' %
                del_file, 'n')

        if exists and not replace:
            return self.config, self.files

        # Either file doesn't exist or user has chosen to overwrite it
        # Create <job>.deletion-list.txt file

        # Reset statuses for this run
        for k in self.config['cleanup_status']['remove']:
            self.config['cleanup_status']['remove'][k] = False

        contents = []

        ##################################################
        # Get list of files from datastream folder
        ##################################################
        datastreams = []
        datastream_path = dir_pattern(3).format(stage, job, 'datastream')
        for site in os.listdir(datastream_path):
            path = dir_pattern().format(datastream_path, site)
            for folder in os.listdir(path):
                abs_folder = dir_pattern().format(path, folder)
                if os.path.isdir(
                        abs_folder) and not os.path.islink(abs_folder):
                    datastreams.append(folder)

        # Processed files
        p_files = {}
        for k, v in enumerate(datastreams):
            if v not in p_files:
                p_files[v] = []

            p_files[v] = os.listdir(
                dir_pattern(3).format(datastream_path, site, v))

        ##################################################
        # Update the local copy of the archive db
        ##################################################
        # print("\nUpdating list of files stored at the archive..."
        # if not DEVEL:
        # 	update_archive(datastreams)
        # print("Done"
        ##################################################
        # Get list of files from archive db
        ##################################################
        print("\nRetrieving list of relevant files stored at the archive...",
              end="")
        # Connect to the database
        archive_files = {}
        db_file = '/apps/ds/conf/datainv/.db_connect'
        alias = 'inv_read'

        if not os.path.exists(db_file):
            print(
                "\nUnable to connect to the archive database. Please try again later."
            )
            self.config['exit'] = True
            return self.config, self.files

        db = DB(self.config, db_file=db_file, alias=alias)

        # Store the query
        query = "SELECT * FROM get_remote_files_by_tag('%s') WHERE file_stamp >= %d AND file_stamp <= %d AND file_active = true ORDER BY file_stamp, file_version;"

        # List the column names so the values can be mapped in a dictionary
        cols = [
            'file_tag', 'file_name', 'file_version', 'file_size',
            'file_stored', 'file_md5', 'file_stamp', 'file_checked',
            'file_active'
        ]

        # convert the start and end dates to a unix timestamp
        start = convert_date_to_timestamp(self.config['begin'])
        end = convert_date_to_timestamp(self.config['end'])

        # Query the database for each of the datastreams
        for k, v in enumerate(datastreams):
            args = (v, start, end)
            result = db.query(query % args, columns=cols)

            if len(result) > 0:
                archive_files[v] = result
            else:
                print("\nNo results for %s" % v)

        # Unset loop variables
        if len(datastreams) > 0:
            del k, v, args, result

        print("Done")

        print("Map original tar bundle structure...", end="")
        self.maps['orig']['tar'] = self.get_tar_structure(
            dir_pattern(3).format(stage, job, "file_comparison/tar"))
        print("Done")

        if self.config['ingest']:
            # Add files to the list that should be removed from the archive
            print("\nGenerating list of files to remove from the archive...")
            sys.stdout.flush()
            ##################################################
            # Compare raw files to see if they changed
            ##################################################

            # Setup Variables for the following code to use
            file_history = self.files  # List of files as they have traveled from tar file through the ingest. Mapped by their current name

            raw_streams = [
            ]  # The datastreams that contain the raw files (ex. sgpmfrsrC1.00)

            # Setup the paths for the ingested and untarred raw files
            new_folder = dir_pattern(3).format(stage, job, 'datastream')
            old_folder = dir_pattern(3).format(stage, job,
                                               'file_comparison/raw')

            raw_files = {
            }  # container to hold a mapping of raw files in the <job>/datastream folder
            archive_tars = {
            }  # Container to hold a list of tar files at the archive

            bundle_data = False  # Does the raw data in "datastream" need to be bundled

            # Get a list of the sites in "datastream"
            for site in os.listdir(new_folder):
                raw_files[site] = {}

            # Establish a structure for the raw files in "datastream"
            #  This structure follows the same pattern as 'file_history'
            for site in raw_files:
                for instrument in glob(
                        dir_pattern(3).format(new_folder, site, '*.00')):
                    instrument = instrument.split('/')[-1]
                    raw_files[site][instrument] = {}
                    raw_streams.append(instrument)
                    for f in os.listdir(
                            dir_pattern(3).format(new_folder, site,
                                                  instrument)):
                        raw_files[site][instrument][f] = {}

            # Compare all of the existing files
            #  By comparing existing files instead of files that were unpacked
            #  we make sure to include all files and can check for files that are not being tracked
            # 		(This should never happen)
            c = Files(self.config)
            for i, s in raw_files.items():  # i = key, s = site
                for j, p in s.items():  # j = key, p = process/instrument
                    pbar = UI()
                    percent = 0
                    pbar.progress(percent)
                    count = len(p)
                    l = 1
                    for k, f in p.items():  # k = key, f = file
                        # Compare the file in 'datastream' with its counterpart in 'file_comparison/raw'
                        if k not in file_history[i][
                                j]:  # This if statement should never evaluate "True"
                            # File is not being tracked
                            # Raw files in datastream need to be rebundled
                            bundle_data = True

                            # Tar file with this raw file needs to be added to the archive
                            # Make sure the site is in the dict
                            if i not in self.archive['add']['raw']:
                                self.archive['add']['raw'][i] = {j: {}}

                            # Make sure the process is in the dict
                            if j not in self.archive['add']['raw'][i]:
                                self.archive['add']['raw'][i][j] = {}

                            # Add the file to the dict
                            self.archive['add']['raw'][i][j][k] = {}

                            continue  # Go to the next iteration of the loop (file cannot be compared because there is no counterpart)

                        # Compare the ingested raw file with the unpacked raw file
                        file_path = dir_pattern(5).format(
                            stage, job, '%s', i, j)
                        file_1 = dir_pattern().format(file_path % 'datastream',
                                                      k)
                        file_2 = dir_pattern().format(
                            file_path % 'file_comparison/raw',
                            file_history[i][j][k]['original_name'])
                        if not c.is_same_file(file_1, file_2):
                            # The files are not the same. Raw files in datastream need to be rebundled
                            bundle_data = True

                            # Ensure self.archive['remove']['raw'] has the proper structure
                            if i not in self.archive['remove']['raw']:
                                self.archive['remove']['raw'][i] = {j: []}

                            if j not in self.archive['remove']['raw'][i]:
                                self.archive['remove']['raw'][i][j] = []

                            self.archive['remove']['raw'][i][j].append(k)

                            # Make self.archive['remove']['raw'][i][j] a unique list
                            self.archive['remove']['raw'][i][j] = list(
                                set(self.archive['remove']['raw'][i][j]))

                        percent = int((float(l) / float(count)) * 100)
                        pbar.progress(percent)
                        l = l + 1

                    percent = int((float(l) / float(count)) * 100)
                    pbar.progress(percent)
                    print("")
                    sys.stdout.flush()

            # Unset loop variables
            if len(raw_files) > 0:
                del i, j, k, s, p, f, c

            if bundle_data:
                # Fill self.maps['orig']['history'] and bundle the data
                for site in file_history:
                    if site not in self.maps['orig']['history']:
                        self.maps['orig']['history'][site] = {}

                    for process in file_history[site]:
                        if process not in self.maps['orig']['history'][site]:
                            self.maps['orig']['history'][site][process] = {}

                        for f, d in file_history[site][process].items():
                            if d['original_name'] not in self.maps['orig'][
                                    'history'][site][process]:
                                self.maps['orig']['history'][site][process][
                                    d['original_name']] = d

                # Find any orig/bad files and copy them over (correcting names as necessary)
                other_files_path = dir_pattern(3).format(
                    stage, job, 'file_comparison/raw/%s/%s/%s')
                for i, s in self.maps['orig']['history'].items():
                    for j, p in s.items():
                        bad_files = glob(other_files_path % (i, j, '*.bad.*'))
                        orig_files = glob(other_files_path %
                                          (i, j, '*.orig.*'))
                        edit_files = glob(other_files_path %
                                          (i, j, '*.edit*.*'))

                        # if len(orig_files) > 0:
                        # 	pbar = UI()
                        # 	count = len(orig_files)
                        # 	pbar.progress(0)

                        for k, of in enumerate(orig_files):
                            oFile = of.split('/')[-1]
                            if oFile in p:
                                key = oFile.replace('orig', 'raw')
                                if key in p:
                                    filename = p[key]['current_name'].replace(
                                        '.raw.', '.orig.')
                                    filename = dir_pattern(6).format(
                                        stage, job, 'datastream', i, j,
                                        filename)
                                    shutil.copy(of, filename)

                            del k, of, oFile, key

                        # print(""
                        # sys.stdout.flush()

                        # if len(bad_files) > 0:
                        # 	pbar = UI()
                        # 	count = len(bad_files)
                        # 	pbar.progress(0)
                        for k, bf in enumerate(bad_files):
                            bFile = bf.split('/')[-1]
                            if bFile in p:
                                key = bFile.replace('bad', 'raw')

                                if key in p:
                                    filename = p[key]['current_name'].replace(
                                        '.raw.', '.bad.')
                                else:
                                    filename = bFile

                                filename = dir_pattern(6).format(
                                    stage, job, 'datastream', i, j, filename)
                                shutil.copy(bf, filename)

                            # # Update progress bar
                            # pbar.progress(int((float(k + 1) / float(count)) * 100))

                            del k, bf, bFile, key

                        # print(""
                        # sys.stdout.flush()

                        # if len(edit_files) > 0:
                        # 	pbar = UI()
                        # 	count = len(edit_files)
                        # 	pbar.progress(0)
                        for k, ef in enumerate(edit_files):
                            eFile = ef.split('/')[-1]
                            temp = eFile.split('.')
                            edit = None
                            for t in temp:
                                if temp[t].startswith('edit'):
                                    edit = temp[t]
                                    break

                            if eFile in p:
                                key = eFile.replace(edit, 'raw')
                                if key in p:
                                    filename = p[key]['current_name'].replace(
                                        '.raw.', ".%s." % edit)
                                    filename = dir_pattern(6).format(
                                        stage, job, 'datastream', i, j,
                                        filename)
                                    shutil.copy(ef, filename)

                            # # Update progress bar
                            # pbar.progress(int((float(k + 1) / float(count)) * 100))

                            del k, ef, eFile, edit, t, key

                        # print(""
                        # sys.stdout.flush()

                        del j, p
                    del i, s

                # Create any needed orig files
                print("Create needed orig files...")
                sys.stdout.flush()

                for i, s in self.archive['remove']['raw'].items():
                    for j, p in s.items():
                        path = dir_pattern(5).format(stage, job, "datastream",
                                                     i, j)
                        k = 0
                        count = len(p)
                        for f in p:
                            orig = f.replace('.raw.', '.orig.')
                            if not os.path.exists(dir_pattern().format(
                                    path, orig)):
                                src = dir_pattern(6).format(
                                    stage, job, "file_comparison/raw", i, j,
                                    file_history[i][j][f]['unpacked_name'])
                                dst = dir_pattern().format(path, orig)
                                shutil.copy(src, dst)
                                # del src, dst
                            percent = int((float(k) / float(count)) * 100)
                            pbar.progress(percent)
                            k = k + 1

                        if percent < 100:
                            percent = int((float(k) / float(count)) * 100)
                            pbar.progress(percent)
                        print("")

                    # Unset loop variables
                    # del i, s, j, p, path, f, orig, src, dst

                print("Done")

                # Bundle the data
                self.bundle_raw_data(raw_streams)
                self.config['cleanup_status']['remove']['files_bundled'] = True

                print("Map new tar bundle structure...", end="")
                self.maps['new']['tar'] = self.get_tar_structure(
                    dir_pattern(3).format(stage, job, "datastream"))
                print("Done")

                print("")
                print("Mapping raw structure from original tar files...",
                      end="")
                self.maps['orig']['raw'] = self.map_raw_structure(
                    self.maps['orig']['tar'])
                print("Done")

                print("Mapping raw structure from new tar files...", end="")
                self.maps['new']['raw'] = self.map_raw_structure(
                    self.maps['new']['tar'])
                print("Done")

                ##################################################
                # Find all of the tar files that need
                #   to be removed from the archive
                ##################################################
                print("")
                print(
                    "Generating list of tar files to be removed from the archive..."
                )
                sys.stdout.flush()

                # Find all of the tar files that need to be removed from the archive
                for i, s in self.archive['remove']['raw'].items():
                    percent = 0
                    for j, p in s.items():
                        pbar = UI()
                        count = len(p)
                        pbar.progress(percent)
                        k = 1
                        for raw_file in p:
                            tar_files = self.find_original_tar_bundle(
                                file_history[i][j][raw_file]['original_name'],
                                i, j)
                            for f in tar_files:
                                if f not in self.archive['remove']['tar']:
                                    tar = {
                                        'site': i,
                                        'instrument': j,
                                        'file_name': f
                                    }
                                    self.archive['remove']['tar'].append(tar)
                            percent = int((float(k) / float(count)) * 100)
                            pbar.progress(percent)
                            k = k + 1

                        if percent == 99:
                            pbar.progress(100)
                        print("")
                        sys.stdout.flush()

                # Unset loop variables
                if len(self.archive['remove']['raw']) > 0:
                    del i, s, j, p, raw_file, tar_files, f, tar

                print("Done")

                ##################################################
                # Find all of the tar files that need
                #   to be added to the archive
                ##################################################
                print("")
                print(
                    "Generating list of tar files to be added to the archive..."
                )
                pbar = UI()
                pbar.progress(0)
                count = len(self.archive['remove']['tar'])
                percent = 0
                i = 1

                # Find all of the tar files that need to be added to the archive
                for tar_file in self.archive['remove']['tar']:
                    files = self.find_all_files_from_original_tar(
                        tar_file['file_name'], tar_file['site'],
                        tar_file['instrument'])
                    for f in files:
                        temp = f
                        if not any(d['file_name'] == temp
                                   for d in self.archive['add']['tar']):
                            tar = {
                                'site': tar_file['site'],
                                'instrument': tar_file['instrument'],
                                'file_name': f
                            }

                            self.archive['add']['tar'].append(tar)
                    percent = int((float(i) / float(count)) * 100)
                    pbar.progress(percent)
                    i = i + 1

                if percent == 99:
                    pbar.progress(100)
                print("")
                sys.stdout.flush()

                # Unset loop variables
                if len(self.archive['remove']['tar']) > 0:
                    del tar_file, files, f

                for i, s in self.archive['add']['raw'].items():
                    for j, p in s.items():
                        pbar = UI()
                        pbar.progress(0)
                        percent = 0
                        count = len(p)
                        i = 1
                        for raw_file, info in p.items():
                            tar_files = self.find_original_tar_bundle(
                                raw_file, i, j)
                            for f in tar_files:
                                temp = f
                                if not any(
                                        d['file_name'] == temp
                                        for d in self.archive['add']['tar']):
                                    tar = {
                                        'site': i,
                                        'instrument': j,
                                        'file_name': f
                                    }
                                    self.archive['add']['tar'].append(tar)
                            percent = int((float(i) / float(count)) * 100)
                            pbar.progress(percent)
                            i = i + 1

                        if percent == 99:
                            pbar.progress(100)
                        print("")
                        sys.stdout.flush()

                # Unset loop variables
                if len(self.archive['add']['raw']) > 0:
                    del i, s, j, p, raw_file, info, tar_files

                    if 'f' in locals():
                        del f
                    if 'tar' in locals():
                        del tar

                ##################################################
                # Update archive db for raw datastream
                ##################################################
                if not DEVEL:
                    update_archive(raw_streams)

                # Get list of tar files from the archive
                for k, v in enumerate(raw_streams):
                    stream = dir_pattern(5).format(stage, job,
                                                   'file_comparison/tar', site,
                                                   v)
                    files = os.listdir(stream)
                    files = "','".join(files)
                    args = (v, files)
                    query = "SELECT * FROM get_remote_files_by_tag('%s') WHERE file_active = true and file_name in ('%s')"
                    result = db.query(query % args, columns=cols)

                    if len(result) > 0:
                        archive_tars[v] = result
                    else:
                        print("\nNo results for %s" % v)

                # Unset loop variables
                del k, v, args, result

                print("Done generating tar file list")

                # Find data on tar files in list and add it to 'contents'
                print("")
                print("Adding tar files to deletion list...", end="")

                for f in self.archive['remove']['tar']:
                    files = archive_tars[f['instrument']]
                    for k, v in enumerate(files):
                        if v['file_name'] == f['file_name']:
                            index = k
                            break
                    else:
                        print("\nUnable to find %s in archive db" %
                              f['file_name'])
                        self.config['exit'] = True
                        return self.config, self.files

                    temp = f['file_name']
                    if not any(d['filename'] == temp for d in contents):
                        contents.append({
                            'datastream': f['instrument'],
                            'filename': f['file_name'],
                            'hash': files[index]['file_md5'],
                            'version': files[index]['file_version']
                        })

                if len(self.archive['remove']['tar']) > 0:
                    del f, files, k, v, index
                    pass

                print("Done")

        # Set proper file names in deletion list
        print("Setting proper file names in deletion list...", end="")
        for k, v in archive_files.items():
            if k.split('.')[-1] != '00':
                for key, f in enumerate(v):
                    if f['file_name'] not in p_files[k]:
                        temp = f['file_name']
                        pass
                        if not any(d['filename'] == temp for d in contents):
                            contents.append({
                                'datastream': k,
                                'filename': f['file_name'],
                                'hash': f['file_md5'],
                                'version': f['file_version']
                            })

        print("Done")

        # Store the list of files that need to be archived to file
        archive_json_file = dir_pattern(3).format(stage, job, 'archive.json')
        fp = open(archive_json_file, 'w')
        fp.write(
            json.dumps(self.archive['add']['tar'],
                       indent=2,
                       sort_keys=False,
                       separators=(',', ': ')))
        fp.close()
        del fp

        # Update the saved status
        self.config['cleanup_status']['remove']['archive_list'] = True

        ##################################################
        # Write the results to file
        # (Use '\r\n' for Windows line endings)
        ##################################################
        print("\nEmailing deletion list...", end="")
        sys.stdout.flush()
        file_contents = []

        contents = sorted(contents, key=self.get_sort_key)

        for line in contents:
            l = "%s.v%s %s" % (line['filename'], line['version'], line['hash'])
            file_contents.append(l)

        fp = open(dir_pattern().format(job_folder, del_file), 'w')
        fp.write("\r\n".join(file_contents))
        fp.close()
        del fp

        # Update the saved status
        self.config['cleanup_status']['remove']['deletion_list'] = True

        # Send the deletion list to the appropriate place (currently email, may be upload at a later time)
        self.email_del_list("%s.deletion-list.txt" % self.config['job'])
        # self.upload_del_list()

        print("Done")

        # Update the saved status
        self.config['cleanup_status']['remove']['status'] = True

        duration = datetime.now() - self.start_time
        print(duration)

        return self.config, self.files

Пример #5

Показать файл

    def run(self):
        config = self.config
        manager = self.manager

        if config['ingest']:
            # If staging for Ingest

            # Make sure collection does not have any files that might get overwritten
            empty = self.check_collection_empty()
            if not empty:
                print(
                    "\nFiles currently exist in your collection directory.\nPlease empty {}/{}/collection and try again.\n"
                    .format(config['stage'], config['job']))
                config['exit'] = True
                return config, self.files

            # cd to the stage directory
            os.chdir(config['stage'])

            # Check to see if a plugin needs to modify the datastream
            temp = manager.callPluginCommand('hook_datastream_alter',
                                             {'config': config})
            config = temp if temp != None else config

            # Check to see if a plugin needs to modify the SIF data
            temp = manager.callPluginCommand('hook_sif_alter',
                                             {'config': config})
            config = temp if temp != None else config

            # Establish a database connection
            db = DB(config)

            # Get the data_paths
            data_paths = db.get_data_paths()

            # Check to see if a plugin needs to modify the data_paths
            temp = manager.callPluginCommand('hook_data_paths_alter', {
                'config': config,
                'data_paths': data_paths
            })
            data_paths = temp if temp != None else data_paths

            # for each instrument
            for k, v in enumerate(data_paths):
                archive_path = v['output']
                stage_path = v['input']

                # Set tar_path and check for plugin modifications
                tar_path = '{}/{}'.format(config['source'], archive_path)
                temp = manager.callPluginCommand('hook_tar_path_alter', {
                    'config': config,
                    'tar_path': tar_path
                })
                tar_path = temp if temp != None else tar_path

                if os.path.exists(tar_path):
                    # Get a list of tar files that match specified dates
                    tar = UnPack(config, archive_path, stage_path)
                    tar_files = tar.get_tar_files()

                    temp = manager.callPluginCommand('hook_tar_files_alter',
                                                     {'config': config})
                    tar_files = temp if temp != None else tar_files

                    if tar_files and len(tar_files) > 0:
                        # compare_path = '{}/{}/.compare/{}'.format(config['stage'], config['job'], stage_path)
                        compare_path = dir_pattern(5).format(
                            config['stage'], config['job'], 'file_comparison',
                            'raw', stage_path)
                        tar_backup = dir_pattern(5).format(
                            config['stage'], config['job'], 'file_comparison',
                            'tar', stage_path)
                        collection_path = '{}/{}/collection/{}'.format(
                            config['stage'], config['job'], stage_path)

                        # Make the above paths if they don't already exist
                        if not os.path.exists(compare_path):
                            os.makedirs(compare_path)

                        if not os.path.exists(tar_backup):
                            os.makedirs(tar_backup)

                        if not os.path.exists(collection_path):
                            os.makedirs(collection_path)

                        # Copy the tar files to the backup location
                        if not tar.copy_files(tar_files, tar_backup):
                            print("Unable to copy tar files")

                        # Unpack the tar files
                        tar.extract_tar_files(tar_files)
                        has_dups = tar.handle_duplicate_files()
                        if has_dups:
                            config['duplicates'] = True

                            for i in has_dups:
                                duplicates[i] = has_dups[i]

                    else:
                        temp = tar_path.split('/')
                        if not config['quiet']:
                            print(
                                '\nData not available for {} using the dates specified'
                                .format(temp[-1]))

                else:
                    temp = tar_path.split('/')
                    if not config['quiet']:
                        print('\nData for {} does not exist.'.format(temp[-1]))

                site, process = stage_path.split('/')

                if self.files == None:
                    self.files = {}

                if site not in self.files:
                    self.files[site] = {}

                site = self.files[site]
                if process not in site:
                    site[process] = {}

                process = site[process]

                if os.path.exists(
                        dir_pattern(4).format(self.config['stage'],
                                              self.config['job'], 'collection',
                                              stage_path)):
                    files = os.listdir(
                        dir_pattern(4).format(self.config['stage'],
                                              self.config['job'], 'collection',
                                              stage_path))
                    dup_uuid = {}
                    for i in files:
                        original_name = i
                        temp = i.split('.')
                        if temp[-1][0] == 'v':
                            try:
                                int(temp[-1][1:])
                                original_name = '.'.join(temp[:-1])
                            except:
                                pass

                        process[i] = {
                            "uuid": str(uuid.uuid4()),
                            "current_name": i,
                            "original_name": original_name,
                            "stripped_name": None,
                            "processed_name": None,
                            "unpacked_name": i,
                            "duplicate_files": [],
                            "deleted": False,
                        }
                        if original_name != i:
                            dup_uuid[i] = process[i]['uuid']

                    for i in duplicates:
                        if i.startswith(data_paths[k]['input']):
                            for j in duplicates[i]:
                                site, process, name = j.split('/')
                                for l in duplicates[i]:
                                    temp = l.split('/')
                                    if j != l:
                                        self.files[site][process][name][
                                            'duplicate_files'].append(
                                                dup_uuid[temp[2]])

                    # Copy the config files from /data/conf to /<stage>/<job>/conf
                    conf_path = "/data/conf/{0}/{0}{1}{2}".format(
                        self.config['site'], self.config['instrument'],
                        self.config['facility'])
                    conf_dest = "{0}/{1}/conf/{2}".format(
                        self.config['stage'], self.config['job'],
                        self.config['site'])
                    dest_folder = "{}{}{}".format(self.config['site'],
                                                  self.config['instrument'],
                                                  self.config['facility'])
                    if not os.path.exists(conf_path):
                        conf_path = "/data/conf/{0}/{1}{2}".format(
                            self.config['site'], self.config['instrument'],
                            self.config['facility'])
                        conf_dest = "{0}/{1}/conf/{2}".format(
                            self.config['stage'], self.config['job'],
                            self.config['site'])
                        dest_folder = "{}{}".format(self.config['instrument'],
                                                    self.config['facility'])

                    if os.path.exists(conf_path):
                        if not os.path.exists(conf_dest):
                            os.makedirs(conf_dest)

                        if os.path.exists(dir_pattern().format(
                                conf_dest, dest_folder)):
                            try:
                                os.rmdir(dir_pattern().format(
                                    conf_dest, dest_folder))
                            except OSError as e:
                                if e.errno == errno.ENOTEMPTY:
                                    exit(
                                        "Unable to copy config files to {}. Destination is not empty."
                                        .format(dir_pattern().format(
                                            conf_dest, dest_folder)))
                                else:
                                    raise e

                        shutil.copytree(
                            conf_path,
                            dir_pattern().format(conf_dest, dest_folder))

            f = Files(self.config)
            src = dir_pattern(3).format(config['stage'], config['job'],
                                        'collection')
            # dst = dir_pattern(3).format(config['stage'], config['job'], '.compare')
            dst = dir_pattern(4).format(config['stage'], config['job'],
                                        'file_comparison', 'raw')
            if os.path.exists(dst):
                f.empty_dir(dst)
                os.rmdir(dst)

            shutil.copytree(src, dst)

            if len(duplicates) > 0:
                print('')
                print(
                    'The following files had naming collisions when unpacked.\nPlease verify the contents and keep only the appropriate file(s).'
                )
                print(
                    'Please do not rename files, simply delete any unwanted files.'
                )
                for i in duplicates:
                    print('')
                    for j in duplicates[i]:
                        print(j)
                print('')

            f.save_env()

        elif config['vap']:
            f = Files(self.config)
            f.save_env()

            vap = VapMgr(self.config)
            vap.add_to_env()

        return config, self.files

Пример #6

Показать файл

Файл: unpack.py Проект: michaeltg12/ADC_Reproc_Toolbox

    def run(self):
        """ Unpack the tar file """
        # Setup Vars
        st_files = self.tar.st_files
        file_names = self.tar.file_names
        # files = self.tar.members
        # temp = self.config['']

        files = []
        for i in range(len(st_files)):
            files.append([])

        # Open the tar file
        tar = tarfile.open(
            dir_pattern().format(self.tar.archive_path, self.file), 'r')

        # Get the content of the tar file and check for duplicate file names
        members = tar.getmembers()

        f = Files(self.config)

        # Iterate over each tar file
        for i, m in enumerate(members):

            # Make sure arrays are not 0 length
            if len(file_names) == 0:
                file_names.append([])
            if len(files) == 0:
                files.append([])
            if len(st_files) == 0:
                st_files.append([])

            # Iterate over each entry in file_names
            # Add the file name to the correct array
            for k, v in enumerate(file_names):
                sf_names = st_files[k]
                sn = f.strip_name(m.name)
                if sn == None or sn == 'orig' or sn == 'bad':
                    sn = m.name

                if not (m.name in v or sn in sf_names):
                    file_names[k].append(m.name)
                    files[k].append(m)
                    st_files[k].append(sn)
                    break

            else:
                file_names.append([m.name])
                files.append([m])
                st_files.append([sn])

        duplicates = {}
        stripped = st_files[0]
        full_names = file_names[0]

        for i in range(1, len(file_names)):
            for k, v in enumerate(file_names[i]):
                try:
                    myIndex = stripped.index(st_files[i][k])
                except IndexError:
                    pass
                    print("\nOOPS\n")
                    print("\nI: {}\nK: {}".format(i, k))
                try:
                    key = full_names[myIndex]
                except IndexError:
                    pass
                    print("\nOOPS 2\n")

                if key not in duplicates:
                    duplicates[key] = []
                duplicates[key].append(v)

        # Extract all files
        for i in range(len(files)):
            path = None
            if i > 0:
                path = 'dup_{}'.format(i)
            else:
                path = ''

            tar.extractall(path=path, members=files[i])

        tar.close()

        self.tar.duplicates = duplicates

        return

Пример #7

Показать файл

Файл: unpack.py Проект: michaeltg12/ADC_Reproc_Toolbox

    def handle_duplicate_files(self):
        # Handle duplicates
        f = Files(self.config)
        dup_list = {}
        duplicates = {}

        files = self.file_names
        dups = self.duplicates

        if len(dups) > 0:
            for i, n in dups.items():
                for j, v in enumerate(n):
                    folder = 'dup_{}'.format(j + 1)
                    delete = False
                    move = False
                    if f.is_same_file(
                            dir_pattern().format(self.stage_path, i),
                            dir_pattern(3).format(self.stage_path, folder, v)):
                        delete = True
                        move = False
                    else:
                        delete = False
                        move = True

                    if delete:
                        os.remove(
                            dir_pattern(3).format(self.stage_path, folder, v))
                    elif move:
                        if i not in dup_list:
                            name = '{}.v1'.format(i)
                            dup_list[i] = [name]
                            src = dir_pattern().format(self.stage_path, i)
                            dst = dir_pattern().format(self.stage_path, name)
                            try:
                                os.rename(src, dst)
                            except OSError:
                                shutil.move(src, dst)

                        num = len(dup_list[i]) + 1
                        name = '{}.v{}'.format(v, num)
                        dup_list[i].append(name)
                        src = dir_pattern(3).format(self.stage_path, folder, v)
                        dst = dir_pattern().format(self.stage_path, name)
                        try:
                            os.rename(src, dst)
                        except OSError:
                            shutil.move(src, dst)

            for i in dup_list:
                if len(dup_list[i]) > 1:
                    key = dir_pattern().format(self.local, i)
                    duplicates[key] = []
                    for j in dup_list[i]:
                        duplicates[key].append(dir_pattern().format(
                            self.local, j))

            self.dups = duplicates

            # Delete directory if now empty
            dupdirs = glob('{}/dup_*'.format(self.stage_path))
            for i in dupdirs:
                f.empty_dir(i)
                os.rmdir(i)

        return False if duplicates == {} else duplicates