Пример #1
0
 def test_id(self):
     '''The ID field is the main ID of this record, which will be used to generate the filename (ie. <ID>.mods.xml).'''
     for csv_info in ['ID,<mods:note>\n1,asdf\n', 'MODS ID,<mods:note>\n1,asdf\n', '<mods:mods id="">,<mods:note>\n1,asdf\n']:
         with self.subTest(csv_info=csv_info):
             with tempfile.TemporaryDirectory() as tmp:
                 process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp)
                 self.assertTrue(os.path.exists(os.path.join(tmp, '1.mods.xml')))
                 self.assertEqual(len(os.listdir(tmp)), 1)
Пример #2
0
 def test_process_spreadsheet_file_obj(self):
     with tempfile.TemporaryDirectory() as tmp:
         file_path = os.path.join('test_files', 'data.xls')
         with open(file_path, 'rb') as f:
             process(spreadsheet=f, xml_files_dir=tmp)
         self.assertTrue(os.path.exists(os.path.join(tmp, 'test1.mods.xml')))
     with tempfile.TemporaryDirectory() as tmp:
         file_path = os.path.join('test_files', 'data.csv')
         with open(file_path, 'rb') as f:
             process(spreadsheet=f, xml_files_dir=tmp)
         self.assertTrue(os.path.exists(os.path.join(tmp, 'test1.mods.xml')))

if __name__ == '__main__':
    XML_FILES_DIR = "xml_files"
    parser = ArgumentParser()
    parser.add_argument('file_name')
    parser.add_argument('-t', '--type',
                    action='store', dest='type', default='parent',
                    help='type of records (parent or child, default is parent)')
    parser.add_argument('--force-dates',
                    action='store_true', dest='force_dates', default=False,
                    help='force date conversion even if ambiguous')
    parser.add_argument('--copy-parent-to-children',
                    action='store_true', dest='copy_parent_to_children', default=False,
                    help='copy parent data into children')
    parser.add_argument('-s', '--sheet',
                    action='store', dest='sheet', default=1,
                    help='specify the sheet number (starting at 1) in an Excel spreadsheet')
    parser.add_argument('-r', '--ctrl_row',
                    action='store', dest='row', default=2,
                    help='specify the control row number (starting at 1) in an Excel spreadsheet')
    parser.add_argument('-i', '--input-encoding',
                    action='store', dest='in_enc', default='utf-8',
                    help='specify the input encoding for CSV files (default is UTF-8)')
    args = parser.parse_args()
    process(file_name=args.file_name, xml_files_dir=XML_FILES_DIR, sheet=int(args.sheet),
            control_row=int(args.row), force_dates=args.force_dates, object_type=args.type, input_encoding=args.in_enc,
            copy_parent_to_children=args.copy_parent_to_children)
    sys.exit()

Пример #4
0
 def test_missing_id(self):
     csv_info = '<mods:abstract>,<mods:note>\nasdf,jkl;\n'
     with tempfile.TemporaryDirectory() as tmp:
         with self.assertRaises(ControlRowError):
             process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp)
Пример #5
0
 def test_no_ctrl_row(self):
     csv_info = 'MODS,Note\n1,asdf\n2,jkl;'
     with tempfile.TemporaryDirectory() as tmp:
         with self.assertRaises(ControlRowError):
             process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp)
Пример #6
0
 def test_find_ctrl_row_second_row(self):
     csv_info = 'MODS,Note\nmods id,<mods:note>\n1,asdf\n'
     with tempfile.TemporaryDirectory() as tmp:
         process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp)
         self.assertTrue(os.path.exists(os.path.join(tmp, '1.mods.xml')))
         self.assertEqual(len(os.listdir(tmp)), 1)
Пример #7
0
 def test_process_duplicate_ids(self):
     csv_info = 'ID,<mods:note>\n1,asdf\n1,jkl\n'
     with tempfile.TemporaryDirectory() as tmp:
         with self.assertRaises(DataError):
             process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp)
Пример #8
0
 def test_process_record_missing_data(self):
     csv_info = 'ID,<mods:note>\n1,asdf\n2,\n3,jkl'
     with tempfile.TemporaryDirectory() as tmp:
         with self.assertRaises(DataError):
             process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp)
Пример #9
0
 def test_process_minimal_spreadsheet(self):
     csv_info = 'mods id,<mods:note>\n1,asdf\n'
     with tempfile.TemporaryDirectory() as tmp:
         process(spreadsheet=io.BytesIO(csv_info.encode('utf8')), xml_files_dir=tmp, control_row=1)
         self.assertTrue(os.path.exists(os.path.join(tmp, '1.mods.xml')))
Пример #10
0
 def test_process_no_xml_files_dir(self):
     with tempfile.TemporaryDirectory() as tmp:
         file_path = os.path.join('test_files', 'data.xls')
         xml_files_dir = os.path.join(tmp, 'xml_files')
         process(spreadsheet=file_path, xml_files_dir=xml_files_dir)
         self.assertTrue(os.path.exists(os.path.join(xml_files_dir, 'test1.mods.xml')))
Пример #11
0
        dest='sheet',
        default=1,
        help='specify the sheet number (starting at 1) in an Excel spreadsheet'
    )
    parser.add_argument(
        '-r',
        '--ctrl_row',
        action='store',
        dest='row',
        default=2,
        help=
        'specify the control row number (starting at 1) in an Excel spreadsheet'
    )
    parser.add_argument(
        '-i',
        '--input-encoding',
        action='store',
        dest='in_enc',
        default='utf-8',
        help='specify the input encoding for CSV files (default is UTF-8)')
    args = parser.parse_args()
    process(file_name=args.file_name,
            xml_files_dir=XML_FILES_DIR,
            sheet=int(args.sheet),
            control_row=int(args.row),
            force_dates=args.force_dates,
            object_type=args.type,
            input_encoding=args.in_enc,
            copy_parent_to_children=args.copy_parent_to_children)
    sys.exit()