def load_javas(self, analysis_json, containing_image): pkgs_json = analysis_json.get('package_list', {}).get('pkgs.java', {}).get('base') if not pkgs_json: return [] pkgs = [] for path, pkg_str in list(pkgs_json.items()): pkg_json = json.loads(pkg_str) n = ImagePackage() # primary keys # TODO - some java names have a version in it, need to clean that up n.name = pkg_json.get('name') n.pkg_type = 'java' n.arch = 'N/A' n.pkg_path = path version = None versions_json = {} for k in [ 'maven-version', 'implementation-version', 'specification-version' ]: if not version and pkg_json.get(k, 'N/A') != 'N/A': version = pkg_json.get(k) versions_json[k] = pkg_json.get(k, 'N/A') if version: n.version = version else: n.version = 'N/A' n.image_user_id = containing_image.user_id n.image_id = containing_image.id # other non-PK values n.pkg_path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest() n.distro_name = 'java' n.distro_version = 'N/A' n.like_distro = 'java' n.fullversion = n.version m = pkg_json.get('metadata') m['java_versions'] = versions_json n.metadata_json = m fullname = n.name pomprops = n.get_pom_properties() if pomprops: fullname = "{}:{}".format(pomprops.get('groupId'), pomprops.get('artifactId')) n.normalized_src_pkg = fullname n.src_pkg = fullname pkgs.append(n) return pkgs
def load_npms(self, analysis_json, containing_image): handled_pkgtypes = ['pkgs.npms'] npms_json = analysis_json.get('package_list', {}).get('pkgs.npms',{}).get('base') if not npms_json: return [], handled_pkgtypes npms = [] image_packages = [] for path, npm_str in list(npms_json.items()): npm_json = json.loads(npm_str) # TODO: remove this usage of ImageNPM, that is deprecated n = ImageNpm() n.path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest() n.path = path n.name = npm_json.get('name') n.src_pkg = npm_json.get('src_pkg') n.origins_json = npm_json.get('origins') n.licenses_json = npm_json.get('lics') n.latest = npm_json.get('latest') n.versions_json = npm_json.get('versions') n.image_user_id = containing_image.user_id n.image_id = containing_image.id #npms.append(n) np = ImagePackage() # primary keys np.name = n.name if len(n.versions_json): version = n.versions_json[0] else: version = "N/A" np.version = version np.pkg_type = 'npm' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'npm' np.distro_version = 'N/A' np.like_distro = 'npm' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) #np.metadata_json = pkg_json.get('metadata') fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname image_packages.append(np) return image_packages, handled_pkgtypes
def load_gems(self, analysis_json, containing_image): gems_json = analysis_json.get('package_list', {}).get('pkgs.gems', {}).get('base') if not gems_json: return [] gems = [] image_packages = [] for path, gem_str in list(gems_json.items()): gem_json = json.loads(gem_str) n = ImageGem() n.path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest() n.path = path n.name = gem_json.get('name') n.src_pkg = gem_json.get('src_pkg') n.origins_json = gem_json.get('origins') n.licenses_json = gem_json.get('lics') n.versions_json = gem_json.get('versions') n.latest = gem_json.get('latest') n.image_user_id = containing_image.user_id n.image_id = containing_image.id #gems.append(n) np = ImagePackage() # primary keys np.name = n.name if len(n.versions_json): version = n.versions_json[0] else: version = "N/A" np.version = version np.pkg_type = 'gem' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'gem' np.distro_version = 'N/A' np.like_distro = 'gem' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) #np.metadata_json = pkg_json.get('metadata') fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname image_packages.append(np) return image_packages
def load_generic_packages(self, analysis_json, containing_image, excludes=[]): pkgs = [] handled_pkgtypes = [] package_types = analysis_json.get('package_list', {}) for package_type in package_types: if package_type not in excludes: patt = re.match(r"pkgs\.(.*)", package_type) if patt: ptype = patt.group(1) handled_pkgtypes.append(ptype) pkgs_json = analysis_json.get('package_list', {}).get(package_type, {}).get('base', {}) if not pkgs_json: return [], handled_pkgtypes for path, pkg_str in list(pkgs_json.items()): pkg_json = json.loads(pkg_str) n = ImagePackage() # primary keys n.name = pkg_json.get('name') n.pkg_path = path n.version = pkg_json.get('version') n.pkg_type = pkg_json.get('type', 'N/A') n.arch = 'N/A' n.image_user_id = n.image_user_id n.image_id = n.image_id # other n.pkg_path_hash = hashlib.sha256( ensure_bytes(path)).hexdigest() n.distro_name = n.pkg_type n.distro_version = 'N/A' n.like_distro = n.pkg_type n.fullversion = n.version n.license = pkg_json.get('license', 'N/A') n.origin = pkg_json.get('origin', 'N/A') fullname = n.name n.normalized_src_pkg = fullname n.src_pkg = fullname pkgs.append(n) return pkgs, handled_pkgtypes
def load_pythons(self, analysis_json, containing_image): pkgs_json = analysis_json.get('package_list', {}).get('pkgs.python', {}).get('base') if not pkgs_json: return [] pkgs = [] for path, pkg_str in list(pkgs_json.items()): pkg_json = json.loads(pkg_str) n = ImagePackage() # primary keys n.name = pkg_json.get('name') n.pkg_path = path n.version = pkg_json.get('version') n.pkg_type = 'python' n.arch = 'N/A' n.image_user_id = n.image_user_id n.image_id = n.image_id # other n.pkg_path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest() n.distro_name = 'python' n.distro_version = 'N/A' n.like_distro = 'python' n.fullversion = n.version n.license = pkg_json.get('license') n.origin = pkg_json.get('origin') m = { 'python_distribution_metadata': pkg_json.get('metadata'), 'files': pkg_json.get('files') } n.metadata_json = m fullname = n.name n.normalized_src_pkg = fullname n.src_pkg = fullname pkgs.append(n) return pkgs
def load_and_normalize_packages(self, package_analysis_json, image_obj): """ Loads and normalizes package data from all distros :param image_obj: :param package_analysis_json: :return: list of Package objects that can be added to an image """ pkgs = [] img_distro = DistroNamespace.for_obj(image_obj) # pkgs.allinfo handling pkgs_all = package_analysis_json.get('pkgs.allinfo', {}).values() if not pkgs_all: return [] else: pkgs_all = pkgs_all[0] for pkg_name, metadata_str in pkgs_all.items(): metadata = json.loads(metadata_str) p = ImagePackage() p.distro_name = image_obj.distro_name p.distro_version = image_obj.distro_version p.like_distro = image_obj.like_distro p.name = pkg_name p.version = metadata.get('version') p.origin = metadata.get('origin') p.size = metadata.get('size') p.arch = metadata.get('arch') p.license = metadata.get('license') if metadata.get( 'license') else metadata.get('lics') p.release = metadata.get('release', 'N/A') p.pkg_type = metadata.get('type') p.src_pkg = metadata.get('sourcepkg') p.image_user_id = image_obj.user_id p.image_id = image_obj.id if 'files' in metadata: # Handle file data p.files = metadata.get('files') if p.release != 'N/A': p.fullversion = p.version + '-' + p.release else: p.fullversion = p.version if img_distro.flavor == 'DEB': cleanvers = re.sub(re.escape("+b") + "\d+.*", "", p.version) spkg = re.sub(re.escape("-" + cleanvers), "", p.src_pkg) else: spkg = re.sub(re.escape("-" + p.version) + ".*", "", p.src_pkg) p.normalized_src_pkg = spkg pkgs.append(p) if pkgs: return pkgs else: log.warn('Pkg Allinfo not found, reverting to using pkgs.all') all_pkgs = package_analysis_json['pkgs.all']['base'] all_pkgs_src = package_analysis_json['pkgs_plus_source.all']['base'] for pkg_name, version in all_pkgs.items(): p = ImagePackage() p.image_user_id = image_obj.user_id p.image_id = image_obj.id p.name = pkg_name p.version = version p.fullversion = all_pkgs_src[pkg_name] if img_distro.flavor == 'RHEL': name, parsed_version, release, epoch, arch = split_rpm_filename( pkg_name + '-' + version + '.tmparch.rpm') p.version = parsed_version p.release = release p.pkg_type = 'RPM' p.origin = 'N/A' p.src_pkg = 'N/A' p.license = 'N/A' p.arch = 'N/A' elif img_distro.flavor == 'DEB': try: p.version, p.release = version.split('-') except: p.version = version p.release = None return pkgs
def policy_engine_packages_upgrade_007_008(): from anchore_engine.db import session_scope, ImagePackage, ImageNpm, ImageGem, Image if True: engine = anchore_engine.db.entities.common.get_engine() file_path_length = 512 hash_length = 80 new_columns = [{ 'table_name': 'image_packages', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), Column('pkg_path_hash', String(hash_length)), Column('metadata_json', StringJSON), ] }, { 'table_name': 'image_package_vulnerabilities', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), ] }, { 'table_name': 'image_package_db_entries', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), ] }] log.err("creating new table columns") for table in new_columns: for column in table['columns']: log.err("creating new column ({}) in table ({})".format( column.name, table.get('table_name', ""))) try: cn = column.compile(dialect=engine.dialect) ct = column.type.compile(engine.dialect) engine.execute( 'ALTER TABLE %s ADD COLUMN IF NOT EXISTS %s %s' % (table['table_name'], cn, ct)) except Exception as e: log.err( 'failed to perform DB upgrade on {} adding column - exception: {}' .format(table, str(e))) raise Exception( 'failed to perform DB upgrade on {} adding column - exception: {}' .format(table, str(e))) # populate the new columns log.err("updating new column (pkg_path) - this may take a while") for table in ['image_packages', 'image_package_vulnerabilities']: log.err("updating table ({}) column (pkg_path)".format(table)) done = False while not done: startts = time.time() rc = engine.execute( "UPDATE {} set pkg_path='pkgdb' where pkg_path is null". format(table)) log.err( "updated {} records in {} (time={}), performing next range" .format(rc.rowcount, table, time.time() - startts)) done = True with session_scope() as dbsession: db_image_ids = dbsession.query(Image.id).distinct().all() total_records = len(db_image_ids) record_count = 0 for record in db_image_ids: db_image_id = record[0] startts = time.time() rc = engine.execute( "UPDATE image_package_db_entries set pkg_path='pkgdb' where image_id='{}' and pkg_path is null" .format(db_image_id)) record_count = record_count + 1 log.err( "updated {} image ({} / {}) in {} (time={}), performing next image update" .format(db_image_id, record_count, total_records, 'image_package_db_entries', time.time() - startts)) exec_commands = [ 'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkg_image_id_fkey', 'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_image_id_fkey', 'ALTER TABLE image_packages DROP CONSTRAINT IF EXISTS image_packages_pkey', 'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_pkey', 'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkey', ] log.err( "dropping primary key / foreign key relationships for new column") cmdcount = 1 for command in exec_commands: log.err("running update operation {} of {}: {}".format( cmdcount, len(exec_commands), command)) engine.execute(command) cmdcount = cmdcount + 1 exec_commands = [ 'ALTER TABLE image_packages ADD PRIMARY KEY (image_id,image_user_id,name,version,pkg_type,arch,pkg_path)', 'ALTER TABLE image_package_vulnerabilities ADD PRIMARY KEY (pkg_user_id,pkg_image_id,pkg_name,pkg_version,pkg_type,pkg_arch,vulnerability_id,pkg_path)', 'ALTER TABLE image_package_db_entries ADD PRIMARY KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path,file_path)', 'ALTER TABLE image_package_vulnerabilities ADD CONSTRAINT image_package_vulnerabilities_pkg_image_id_fkey FOREIGN KEY (pkg_image_id, pkg_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE', 'ALTER TABLE image_package_db_entries ADD CONSTRAINT image_package_db_entries_image_id_fkey FOREIGN KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE', # These are helpers for the upgrade itself, not needed by the functioning system. Needed for large npm/gem tables and pagination support "CREATE SEQUENCE IF NOT EXISTS image_npms_seq_id_seq", "ALTER TABLE image_npms add column IF NOT EXISTS seq_id int DEFAULT nextval('image_npms_seq_id_seq')", "CREATE INDEX IF NOT EXISTS idx_npm_seq ON image_npms using btree (seq_id)", "CREATE SEQUENCE IF NOT EXISTS image_gems_seq_id_seq", "ALTER TABLE image_gems add column IF NOT EXISTS seq_id int DEFAULT nextval('image_gems_seq_id_seq')", "CREATE INDEX IF NOT EXISTS idx_gem_seq ON image_gems using btree (seq_id)", "ALTER TABLE image_packages ALTER COLUMN origin TYPE varchar" ] log.err( "updating primary key / foreign key relationships for new column - this may take a while" ) cmdcount = 1 for command in exec_commands: log.err("running update operation {} of {}: {}".format( cmdcount, len(exec_commands), command)) engine.execute(command) cmdcount = cmdcount + 1 log.err( "converting ImageNpm and ImageGem records into ImagePackage records - this may take a while" ) # migrate ImageNpm and ImageGem records into ImagePackage records with session_scope() as dbsession: total_npms = dbsession.query(ImageNpm).count() total_gems = dbsession.query(ImageGem).count() log.err("will migrate {} image npm records".format(total_npms)) npms = [] chunk_size = 8192 record_count = 0 skipped_count = 0 with session_scope() as dbsession: try: last_seq = -1 while record_count < total_npms: chunk_time = time.time() log.err('Processing next chunk of records') for n in dbsession.query(ImageNpm).filter( ImageNpm.seq_id > last_seq).limit(chunk_size): np = ImagePackage() # primary keys np.name = n.name if len(n.versions_json): version = n.versions_json[0] else: version = "N/A" np.version = version np.pkg_type = 'npm' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'npm' np.distro_version = 'N/A' np.like_distro = 'npm' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname npms.append(np) last_seq = n.seq_id if len(npms): log.err('Inserting {} new records'.format(len(npms))) startts = time.time() try: with session_scope() as dbsession2: dbsession2.bulk_save_objects(npms) except Exception as err: log.err("skipping duplicates: {}".format(err)) skipped_count += 1 record_count = record_count + len(npms) log.err("merged {} / {} npm records (time={})".format( record_count, total_npms, time.time() - startts)) log.err( 'Chunk took: {} seconds to process {} records'.format( time.time() - chunk_time, len(npms))) npms = [] except Exception as err: log.err('Error during npm migration: {}'.format(err)) raise err log.err("will migrate {} image gem records".format(total_gems)) gems = [] record_count = 0 skipped_count = 0 with session_scope() as dbsession: try: last_seq = -1 while record_count < total_gems: chunk_time = time.time() log.err('Processing next chunk of records') for n in dbsession.query(ImageGem).filter( ImageGem.seq_id > last_seq).limit(chunk_size): np = ImagePackage() # primary keys np.name = n.name if len(n.versions_json): version = n.versions_json[0] else: version = "N/A" np.version = version np.pkg_type = 'gem' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'gem' np.distro_version = 'N/A' np.like_distro = 'gem' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname gems.append(np) last_seq = n.seq_id if len(gems): log.err('Inserting {} new records'.format(len(gems))) startts = time.time() try: with session_scope() as dbsession2: dbsession2.bulk_save_objects(gems) except Exception as err: log.err("skipping duplicates: {}".format(err)) skipped_count += 1 record_count = record_count + len(gems) log.err("merged {} / {} gem records (time={})".format( record_count, total_gems, time.time() - startts)) log.err( 'Chunk took: {} seconds to process {} records'.format( time.time() - chunk_time, len(npms))) gems = [] except Exception as err: log.err('Error during gem migration: {}'.format(err)) raise err
def load_javas(self, analysis_json, containing_image): handled_pkgtypes = ['pkgs.java'] pkgs_json = analysis_json.get('package_list', {}).get('pkgs.java', {}).get('base') if not pkgs_json: return [], handled_pkgtypes pkgs = [] for path, pkg_str in list(pkgs_json.items()): pkg_json = json.loads(pkg_str) n = ImagePackage() # primary keys # TODO - some java names have a version in it, need to clean that up n.name = pkg_json.get('name') n.pkg_type = 'java' n.arch = 'N/A' n.pkg_path = path metaversion = None versions_json = {} for k in ['maven-version', 'implementation-version', 'specification-version']: if not metaversion and pkg_json.get(k, 'N/A') != 'N/A': metaversion = pkg_json.get(k) versions_json[k] = pkg_json.get(k, 'N/A') n.image_user_id = containing_image.user_id n.image_id = containing_image.id # other non-PK values n.pkg_path_hash = hashlib.sha256(ensure_bytes(path)).hexdigest() n.distro_name = 'java' n.distro_version = 'N/A' n.like_distro = 'java' m = pkg_json.get('metadata') m['java_versions'] = versions_json n.metadata_json = m fullname = n.name pomprops = n.get_pom_properties() pomversion = None if pomprops: fullname = "{}:{}".format(pomprops.get('groupId'), pomprops.get('artifactId')) pomversion = pomprops.get('version', None) n.normalized_src_pkg = fullname n.src_pkg = fullname # final version decision - try our best to get an accurate version/name pair n.version = 'N/A' if pomversion: n.version = pomversion elif metaversion: n.version = metaversion else: try: patt = re.match(r"(.*)-(([\d]\.)+.*)", n.name) if patt and patt.group(1): n.version = patt.group(2) n.name = patt.group(1) except Exception as err: pass n.fullversion = n.version pkgs.append(n) return pkgs, handled_pkgtypes
def policy_engine_packages_upgrade_007_008(): from anchore_engine.db import session_scope, ImagePackage, ImageNpm, ImageGem, Image if True: engine = anchore_engine.db.entities.common.get_engine() file_path_length = 512 hash_length = 80 new_columns = [ { 'table_name': 'image_packages', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), Column('pkg_path_hash', String(hash_length)), Column('metadata_json', StringJSON), ] }, { 'table_name': 'image_package_vulnerabilities', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), ] }, { 'table_name': 'image_package_db_entries', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), ] } ] log.err("creating new table columns") for table in new_columns: for column in table['columns']: log.err("creating new column ({}) in table ({})".format(column.name, table.get('table_name', ""))) try: cn = column.compile(dialect=engine.dialect) ct = column.type.compile(engine.dialect) engine.execute('ALTER TABLE %s ADD COLUMN IF NOT EXISTS %s %s' % (table['table_name'], cn, ct)) except Exception as e: log.err('failed to perform DB upgrade on {} adding column - exception: {}'.format(table, str(e))) raise Exception('failed to perform DB upgrade on {} adding column - exception: {}'.format(table, str(e))) # populate the new columns log.err("updating new column (pkg_path) - this may take a while") for table in ['image_packages', 'image_package_vulnerabilities']: log.err("updating table ({}) column (pkg_path)".format(table)) done = False while not done: startts = time.time() rc = engine.execute("UPDATE {} set pkg_path='pkgdb' where pkg_path is null".format(table)) log.err("updated {} records in {} (time={}), performing next range".format(rc.rowcount, table, time.time() - startts)) done=True with session_scope() as dbsession: db_image_ids = dbsession.query(Image.id).distinct().all() total_records = len(db_image_ids) record_count = 0 for record in db_image_ids: db_image_id = record[0] startts = time.time() rc = engine.execute("UPDATE image_package_db_entries set pkg_path='pkgdb' where image_id='{}' and pkg_path is null".format(db_image_id)) record_count = record_count + 1 log.err("updated {} image ({} / {}) in {} (time={}), performing next image update".format(db_image_id, record_count, total_records, 'image_package_db_entries', time.time() - startts)) exec_commands = [ 'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkg_image_id_fkey', 'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_image_id_fkey', 'ALTER TABLE image_packages DROP CONSTRAINT IF EXISTS image_packages_pkey', 'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_pkey', 'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkey', ] log.err("dropping primary key / foreign key relationships for new column") cmdcount = 1 for command in exec_commands: log.err("running update operation {} of {}: {}".format(cmdcount, len(exec_commands), command)) engine.execute(command) cmdcount = cmdcount + 1 exec_commands = [ 'ALTER TABLE image_packages ADD PRIMARY KEY (image_id,image_user_id,name,version,pkg_type,arch,pkg_path)', 'ALTER TABLE image_package_vulnerabilities ADD PRIMARY KEY (pkg_user_id,pkg_image_id,pkg_name,pkg_version,pkg_type,pkg_arch,vulnerability_id,pkg_path)', 'ALTER TABLE image_package_db_entries ADD PRIMARY KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path,file_path)', 'ALTER TABLE image_package_vulnerabilities ADD CONSTRAINT image_package_vulnerabilities_pkg_image_id_fkey FOREIGN KEY (pkg_image_id, pkg_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE', 'ALTER TABLE image_package_db_entries ADD CONSTRAINT image_package_db_entries_image_id_fkey FOREIGN KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE', ] log.err("updating primary key / foreign key relationships for new column - this may take a while") cmdcount = 1 for command in exec_commands: log.err("running update operation {} of {}: {}".format(cmdcount, len(exec_commands), command)) engine.execute(command) cmdcount = cmdcount + 1 log.err("converting ImageNpm and ImageGem records into ImagePackage records - this may take a while") # migrate ImageNpm and ImageGem records into ImagePackage records with session_scope() as dbsession: db_npms = dbsession.query(ImageNpm) total_npms = dbsession.query(ImageNpm).count() db_gems = dbsession.query(ImageGem) total_gems = dbsession.query(ImageGem).count() npms = [] chunk_size = 8192 record_count = 0 try: for n in db_npms: np = ImagePackage() # primary keys np.name = n.name if len(n.versions_json): version = n.versions_json[0] else: version = "N/A" np.version = version np.pkg_type = 'npm' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'npm' np.distro_version = 'N/A' np.like_distro = 'npm' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname npms.append(np) if len(npms) >= chunk_size: startts = time.time() try: with session_scope() as dbsession: dbsession.bulk_save_objects(npms) record_count = record_count + chunk_size except: log.err("skipping duplicates") record_count = record_count + chunk_size log.err("merged {} / {} npm records (time={}), performing next range".format(record_count, total_npms, time.time() - startts)) npms = [] if len(npms): startts = time.time() try: with session_scope() as dbsession: dbsession.bulk_save_objects(npms) record_count = record_count + len(npms) except: log.err("skipping duplicates") record_count = record_count + len(npms) log.err("final merged {} / {} npm records (time={})".format(record_count, total_npms, time.time() - startts)) except Exception as err: raise err gems = [] chunk_size = 8192 record_count = 0 try: for n in db_gems: np = ImagePackage() # primary keys np.name = n.name if len(n.versions_json): version = n.versions_json[0] else: version = "N/A" np.version = version np.pkg_type = 'gem' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'gem' np.distro_version = 'N/A' np.like_distro = 'gem' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname gems.append(np) if len(gems) >= chunk_size: startts = time.time() try: with session_scope() as dbsession: dbsession.bulk_save_objects(gems) record_count = record_count + chunk_size except: log.err("skipping duplicates") record_count = record_count + chunk_size log.err("merged {} / {} gem records (time={}), performing next range".format(record_count, total_gems, time.time() - startts)) gems = [] if len(gems): startts = time.time() try: with session_scope() as dbsession: dbsession.bulk_save_objects(gems) record_count = record_count + len(gems) except: log.err("skipping duplicates") record_count = record_count + len(gems) log.err("final merged {} / {} gem records (time={})".format(record_count, total_gems, time.time() - startts)) except Exception as err: raise err
def db_upgrade_007_008(): from anchore_engine.db import session_scope, ImagePackage, ImageNpm, ImageGem if True: engine = anchore_engine.db.entities.common.get_engine() file_path_length = 512 hash_length = 80 new_columns = [{ 'table_name': 'image_packages', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), Column('pkg_path_hash', String(hash_length)), Column('metadata_json', StringJSON), ] }, { 'table_name': 'image_package_vulnerabilities', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), ] }, { 'table_name': 'image_package_db_entries', 'columns': [ Column('pkg_path', String(file_path_length), primary_key=True), ] }] log.err("creating new table columns") for table in new_columns: for column in table['columns']: log.err("creating new column ({}) in table ({})".format( column.name, table.get('table_name', ""))) try: cn = column.compile(dialect=engine.dialect) ct = column.type.compile(engine.dialect) engine.execute( 'ALTER TABLE %s ADD COLUMN IF NOT EXISTS %s %s' % (table['table_name'], cn, ct)) except Exception as e: log.err( 'failed to perform DB upgrade on {} adding column - exception: {}' .format(table, str(e))) raise Exception( 'failed to perform DB upgrade on {} adding column - exception: {}' .format(table, str(e))) # populate the new columns for table in [ 'image_packages', 'image_package_vulnerabilities', 'image_package_db_entries' ]: log.err("updating table ({}) column (pkg_path)".format(table)) engine.execute( "UPDATE {} set pkg_path='pkgdb' where pkg_path is null".format( table)) exec_commands = [ 'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkg_image_id_fkey', 'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_image_id_fkey', 'ALTER TABLE image_packages DROP CONSTRAINT IF EXISTS image_packages_pkey', 'ALTER TABLE image_package_db_entries DROP CONSTRAINT IF EXISTS image_package_db_entries_pkey', 'ALTER TABLE image_package_vulnerabilities DROP CONSTRAINT IF EXISTS image_package_vulnerabilities_pkey', 'ALTER TABLE image_packages ADD PRIMARY KEY (image_id,image_user_id,name,version,pkg_type,arch,pkg_path)', 'ALTER TABLE image_package_vulnerabilities ADD PRIMARY KEY (pkg_user_id,pkg_image_id,pkg_name,pkg_version,pkg_type,pkg_arch,vulnerability_id,pkg_path)', 'ALTER TABLE image_package_db_entries ADD PRIMARY KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path,file_path)', 'ALTER TABLE image_package_vulnerabilities ADD CONSTRAINT image_package_vulnerabilities_pkg_image_id_fkey FOREIGN KEY (pkg_image_id, pkg_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE', 'ALTER TABLE image_package_db_entries ADD CONSTRAINT image_package_db_entries_image_id_fkey FOREIGN KEY (image_id, image_user_id, pkg_name, pkg_version, pkg_type, pkg_arch, pkg_path) REFERENCES image_packages (image_id, image_user_id, name, version, pkg_type, arch, pkg_path) MATCH SIMPLE', ] log.err( "updating primary key / foreign key relationships for new column") cmdcount = 1 for command in exec_commands: log.err("running update operation {} of {}: {}".format( cmdcount, len(exec_commands), command)) engine.execute(command) cmdcount = cmdcount + 1 log.err( "converting ImageNpm and ImageGem records into ImagePackage records" ) # migrate ImageNpm and ImageGem records into ImagePackage records with session_scope() as dbsession: db_npms = dbsession.query(ImageNpm) db_gems = dbsession.query(ImageGem) gems = [] npms = [] try: for n in db_npms: np = ImagePackage() # primary keys np.name = n.name np.version = n.versions_json[0] np.pkg_type = 'npm' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'npm' np.distro_version = 'N/A' np.like_distro = 'npm' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname npms.append(np) except Exception as err: raise err try: for n in db_gems: np = ImagePackage() # primary keys np.name = n.name np.version = n.versions_json[0] np.pkg_type = 'gem' np.arch = 'N/A' np.image_user_id = n.image_user_id np.image_id = n.image_id np.pkg_path = n.path # other np.pkg_path_hash = n.path_hash np.distro_name = 'gem' np.distro_version = 'N/A' np.like_distro = 'gem' np.fullversion = np.version np.license = ' '.join(n.licenses_json) np.origin = ' '.join(n.origins_json) fullname = np.name np.normalized_src_pkg = fullname np.src_pkg = fullname gems.append(np) except Exception as err: raise err with session_scope() as dbsession: log.err("merging npms: {} records to merge".format(len(npms))) try: for npm in npms: dbsession.merge(npm) except Exception as err: raise err with session_scope() as dbsession: log.err("merging gems: {} records to merge".format(len(gems))) try: for gem in gems: dbsession.merge(gem) except Exception as err: raise err