def main(): parser = _make_parser() args = parser.parse_args() _configure_logging(args) excel_paths = [] if args.excel: excel_paths.append(args.excel) if args.directory: directory_path = os.path.abspath(args.directory) for path in glob.glob("{}*.xls*".format(directory_path)): path = os.path.join(directory_path, path) excel_paths.append(path) if args.output: output_path = os.path.abspath(args.output) for excel_path in excel_paths: excel = ami_excel(excel_path) print(excel_path) excel.edit_sheet.add_PMDataToEM(excel.pres_sheet.sheet_values) excel.edit_sheet.convert_amiExcelToJSON(output_path) excel.pres_sheet.convert_amiExcelToJSON(output_path)
def main(): parser = _make_parser() args = parser.parse_args() _configure_logging(args) excel_paths = [] if args.excel: excel_paths.append(args.excel) if args.directory: directory_path = os.path.abspath(args.directory) for path in glob.glob("{}*.xls*".format(directory_path)): path = os.path.join(directory_path, path) excel_paths.append(path) if args.output: output_path = os.path.abspath(args.output) for excel_path in excel_paths: csv_name = os.path.splitext(os.path.split(excel_path)[1])[0] output_path = os.path.join(output_path, csv_name + '.csv') excel = ami_excel(excel_path) print(excel_path) print(output_path) excel.pres_sheet.convert_amiExcelToCSV(output_path)
def add_json_from_excel(self): self.excel_metadata = [ filename for filename in self.data_files if os.path.splitext(filename)[1] == ".xlsx" ] for filename in self.excel_metadata: excel = ami_excel(os.path.join(self.path, filename)) if excel.edit_sheet: em_path = os.path.join(self.path, "data/EditMasters") # TODO where do i error when files don't match try: excel.edit_sheet.add_PMDataToEM( excel.pres_sheet.sheet_values) except: LOGGER.error( "EM's and PM's do not have 1-1 correspondence") else: em_filepaths = [ x + ".json" for x in self.media_filepaths if em_path in x ] excel.edit_sheet.convert_amiExcelToJSON( em_path, filepaths=em_filepaths) pm_path = os.path.join(self.path, "data/PreservationMasters") pm_filepaths = [ x + ".json" for x in self.media_filepaths if pm_path in x ] excel.pres_sheet.convert_amiExcelToJSON(pm_path, filepaths=pm_filepaths)
def set_metadata_excel(self): self.metadata_files = [ filename for filename in self.data_files if os.path.splitext(filename)[1] == ".xlsx" ] self.media_files_md = [] for filename in self.metadata_files: excel = ami_excel(os.path.join(self.path, filename)) # collect list of filenames in metadata if excel.pres_sheet: paths = excel.pres_sheet.sheet_values[ "asset.referenceFilename"].tolist() self.media_files_md.extend(paths) if excel.edit_sheet: if "asset.referenceFilename" in excel.edit_sheet.sheet_values.columns: paths = excel.edit_sheet.sheet_values[ "asset.referenceFilename"].tolist() self.media_files_md.extend(paths) self.media_files_md = set(self.media_files_md) return
def main(): parser = _make_parser() args = parser.parse_args() _configure_logging(args) if args.excel: excel = ami_excel(args.excel) if excel and excel.validate_workbook(): LOGGER.info("{}: valid".format(args.excel)) else: if args.output: wb = load_workbook(args.excel, data_only=True) wb.save(args.output) new_excel = ami_excel(args.output) if new_excel.validate_workbook(): LOGGER.info("{}: valid".format(args.output)) else: LOGGER.error("{}: invalid".format(args.output))
def main(): parser = _make_parser() args = parser.parse_args() _configure_logging(args) if args.excel: excel = ami_excel(args.excel) if excel and excel.validate_workbook(): LOGGER.info("{}: valid".format(args.excel)) else: if args.output: wb = load_workbook(args.excel, data_only = True) wb.save(args.output) new_excel = ami_excel(args.output) if new_excel.validate_workbook(): LOGGER.info("{}: valid".format(args.output)) else: LOGGER.error("{}: invalid".format(args.output))
def check_metadata_excel(self): if not self.metadata_files: raise ami_bagError("Excel bag does not contain any files with xlsx extension") bad_excel = [] for filename in self.metadata_files: excel = ami_excel(os.path.join(self.path, filename)) if not excel.validate_workbook(): bad_excel.append(filename) if bad_excel: raise ami_bagError("Excel files contain formatting errors") return True
def check_metadata_excel(self): if not self.metadata_files: self.raise_bagerror( "Excel bag does not contain any files with xlsx extension") bad_excel = [] for filename in self.metadata_files: excel = ami_excel(os.path.join(self.path, filename)) if not excel.validate_workbook(): bad_excel.append(filename) if bad_excel: self.raise_bagerror("Excel files contain formatting errors") return True
def add_json_from_excel(self): self.excel_metadata = [filename for filename in self.data_files if os.path.splitext(filename)[1] == ".xlsx"] for filename in self.excel_metadata: excel = ami_excel(os.path.join(self.path, filename)) if excel.edit_sheet: em_path = os.path.join(self.path, "data/EditMasters") # TODO where do i error when files don't match try: excel.edit_sheet.add_PMDataToEM(excel.pres_sheet.sheet_values) except: LOGGER.error("EM's and PM's do not have 1-1 correspondence") else: em_filepaths = [x + ".json" for x in self.media_filepaths if em_path in x] excel.edit_sheet.convert_amiExcelToJSON(em_path, filepaths = em_filepaths) pm_path = os.path.join(self.path, "data/PreservationMasters") pm_filepaths = [x + ".json" for x in self.media_filepaths if pm_path in x] excel.pres_sheet.convert_amiExcelToJSON(pm_path, filepaths = pm_filepaths)
def set_metadata_excel(self): self.metadata_files = [filename for filename in self.data_files if os.path.splitext(filename)[1] == ".xlsx"] self.media_files_md = [] for filename in self.metadata_files: excel = ami_excel(os.path.join(self.path, filename)) # collect list of filenames in metadata if excel.pres_sheet: paths = excel.pres_sheet.sheet_values["asset.referenceFilename"].tolist() self.media_files_md.extend(paths) if excel.edit_sheet: if "asset.referenceFilename" in excel.edit_sheet.sheet_values.columns: paths = excel.edit_sheet.sheet_values["asset.referenceFilename"].tolist() self.media_files_md.extend(paths) self.media_files_md = set(self.media_files_md) return