def get_scancode_file(file_dict): '''Given a file dictionary from the scancode results, return a FileData object with the results''' # scancode records paths from the target directory onwards # which in tern's case is tern.utils.constants.untar_dir # removing that portion of the file path fspath = file_dict['path'].replace( constants.untar_dir + os.path.sep, '') fd = FileData( file_dict['name'], fspath, file_dict['date'], file_dict['file_type']) fd.short_file_type = get_file_type(file_dict) fd.add_checksums({'sha1': file_dict['sha1'], 'md5': file_dict['md5']}) if file_dict['licenses']: fd.licenses = [li['short_name'] for li in file_dict['licenses']] fd.license_expressions = file_dict['license_expressions'] if file_dict['copyrights']: fd.copyrights = [c['value'] for c in file_dict['copyrights']] if file_dict['urls']: fd.urls = [u['url'] for u in file_dict['urls']] fd.packages = file_dict['packages'] fd.authors = [a['value'] for a in file_dict['authors']] if file_dict['scan_errors']: # for each scan error make a notice for err in file_dict['scan_errors']: fd.origins.add_notice_to_origins( 'File: ' + fd.path, Notice(err, 'error')) return fd
def testInstance(self): file1 = FileData('file1', 'path/to/file1') self.assertEqual(file1.name, 'file1') self.assertEqual(file1.path, 'path/to/file1') self.assertFalse(file1.checksum_type) self.assertFalse(file1.checksum) self.assertFalse(file1.date) self.assertFalse(file1.version_control) self.assertFalse(file1.version) self.assertFalse(file1.file_type) self.assertFalse(file1.licenses) self.assertFalse(file1.license_expressions) self.assertFalse(file1.copyrights) self.assertFalse(file1.authors) self.assertFalse(file1.packages) self.assertFalse(file1.urls) with self.assertRaises(ValueError): file2 = FileData('file2', 'path/to/file2', '12355') file2 = FileData('file2', 'path/to/file2', '2020-01-01', 'binary') self.assertEqual(file2.date, '2020-01-01') self.assertEqual(file2.file_type, 'binary') file2.licenses = ['MIT', 'GPL'] file2.license_expressions = ['GPLv2 or MIT', 'MIT and GPLv2'] file2.copyrights = ['copyrights'] file2.authors = ['author1', 'author2'] file2.packages = ['package1', 'package2'] self.assertEqual(file2.licenses, ['MIT', 'GPL']) self.assertEqual(file2.license_expressions, ['GPLv2 or MIT', 'MIT and GPLv2']) self.assertEqual(file2.copyrights, ['copyrights']) self.assertEqual(file2.authors, ['author1', 'author2']) self.assertEqual(file2.packages, ['package1', 'package2'])
def testMerge(self): file1 = FileData('switch_root', 'sbin/switch_root') file1.set_checksum('sha256', '123abc456def') file1.extattrs = '-rwxr-xr-x|1000|1000|14408|1' file2 = FileData('switch_root', 'sbin/switch_root') file2.add_checksums({ 'SHA1': '12345abcde', 'MD5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1' }) file2.set_checksum('sha256', '123abc456def') file2.extattrs = '-rwxr-xr-x|1000|1000|14408|1' file2.date = '2012-02-02' file2.file_type = 'binary' file2.short_file_type = 'BINARY' file2.licenses = ['MIT', 'GPL'] file2.license_expressions = ['MIT or GPL'] file2.copyrights = ['copyrights'] file2.authors = ['author1', 'author2'] file2.packages = ['package1', 'package2'] file2.urls = ['url1', 'url2'] file2.origins.add_notice_to_origins( 'scanning', Notice('something happened', 'error')) file3 = FileData('switch_root', 'sbin/switch_root') file3.set_checksum('sha1', '456def123abc') file4 = FileData('e2image', 'sbin/e2image') self.assertFalse(file1.merge(file4)) self.assertTrue(file1.merge(file3)) self.assertEqual(file1.checksum, '123abc456def') self.assertEqual(file1.extattrs, '-rwxr-xr-x|1000|1000|14408|1') self.assertFalse(file1.merge('astring')) self.assertTrue(file1.merge(file2)) self.assertEqual(file1.checksums, { 'sha1': '12345abcde', 'md5': '1ff38cc592c4c5d0c8e3ca38be8f1eb1' }) self.assertEqual(file1.date, '2012-02-02') self.assertEqual(file1.file_type, 'binary') self.assertEqual(file1.short_file_type, 'BINARY') self.assertEqual(file1.licenses, ['MIT', 'GPL']) self.assertEqual(file1.license_expressions, ['MIT or GPL']) self.assertEqual(file1.copyrights, ['copyrights']) self.assertEqual(file1.authors, ['author1', 'author2']) self.assertEqual(file1.packages, ['package1', 'package2']) self.assertEqual(file1.urls, ['url1', 'url2']) self.assertEqual(len(file1.origins.origins), 1) self.assertEqual(file1.origins.origins[0].origin_str, 'scanning') self.assertEqual(len(file1.origins.origins[0].notices), 1) self.assertEqual(file1.origins.origins[0].notices[0].message, 'something happened')
def collect_layer_data(layer_obj): '''Use scancode to collect data from a layer filesystem. This function will create a FileData object for every file found. After scanning, it will return a list of FileData objects. ''' files = [] # run scancode against a directory command = 'scancode -ilpcu --quiet --json -' full_cmd = get_filesystem_command(layer_obj, command) origin_layer = 'Layer: ' + layer_obj.fs_hash[:10] result, error = rootfs.shell_command(True, full_cmd) if not result: logger.error("No scancode results for this layer: %s", str(error)) layer_obj.origins.add_notice_to_origins(origin_layer, Notice(str(error), 'error')) else: # make FileData objects for each result data = json.loads(result) for f in data['files']: if f['type'] == 'file': # scancode records paths from the target directory onwards # which in tern's case is tern.utils.constants.untar_dir # removing that portion of the file path fspath = f['path'].replace(constants.untar_dir + os.path.sep, '') fd = FileData(f['name'], fspath, f['date'], f['file_type']) if f['licenses']: fd.licenses = [l['short_name'] for l in f['licenses']] fd.license_expressions = f['license_expressions'] if f['copyrights']: fd.copyrights = [c['value'] for c in f['copyrights']] if f['urls']: fd.urls = [u['url'] for u in f['urls']] fd.packages = f['packages'] fd.authors = [a['value'] for a in f['authors']] if f['scan_errors']: # for each scan error make a notice for err in f['scan_errors']: fd.origins.add_notice_to_origins( 'File: ' + fd.path, Notice(err, 'error')) files.append(fd) return files