def __init__(self): super(self.__class__, self).__init__() self._mysql = MySQL(**config.mysqlhistory.db_params) self._site_id_map = {} self._dataset_id_map = {}
def __init__(self, application, service='dynamo', asuser='', db_params=config.registry.db_params): self._mysql = MySQL(**db_params) self.application = application self.service = service if asuser: self.user = asuser else: self.user = config.activitylock.default_user
def __init__(self): CopyInterface.__init__(self) DeletionInterface.__init__(self) SiteInfoSourceInterface.__init__(self) ReplicaInfoSourceInterface.__init__(self) DatasetInfoSourceInterface.__init__(self) self._mysql = MySQL(**config.mysqlregistry.db_params)
def __init__(self): self._last_update = 0 # unix time of last update self._mysqlreg = MySQL(**config.registry.db_params) self._mysqlhist = MySQL(**config.mysqlhistory.db_params)
def main(site): """ Gets the listing from the dynamo database, and remote XRootD listings of a given site. The differences are compared to deletion queues and other things. .. Note:: If you add things, list them in the module docstring. The differences that should be acted on are copied to the summary webpage and entered into the dynamoregister database. :param str site: The site to run the check over :returns: missing files, size, orphan files, size :rtype: list, long, list, long """ start = time.time() prev_missing = '%s_compare_missing.txt' % site prev_set = set() if os.path.exists(prev_missing): with open(prev_missing, 'r') as prev_file: for line in prev_file: prev_set.add(line.strip()) if int(config.config_dict().get('SaveCache')): prev_new_name = '%s.%s' % ( prev_missing, datetime.datetime.fromtimestamp( os.stat(prev_missing).st_mtime).strftime('%y%m%d')) else: prev_new_name = prev_missing shutil.move( prev_missing, os.path.join(config.config_dict()['CacheLocation'], prev_new_name)) # All of the files and summary will be dumped here webdir = config.config_dict()['WebDir'] # Open a connection temporarily to make sure we only list good sites status_check = MySQL(config_file='/etc/my.cnf', db='dynamo', config_group='mysql-dynamo') status = status_check.query('SELECT status FROM sites WHERE name = %s', site)[0] if status != 'ready': LOG.error('Site %s status is %s', site, status) # Note the attempt to do listing conn = sqlite3.connect(os.path.join(webdir, 'stats.db')) curs = conn.cursor() curs.execute( """ REPLACE INTO stats VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME(DATETIME(), "-4 hours"), ?, ?) """, (site, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)) conn.commit() conn.close() exit(0) # Close the connection while we are getting the trees together status_check.close() inv_tree = getinventorycontents.get_db_listing(site) # Reset the DirectoryList for the XRootDLister to run on config.DIRECTORYLIST = [ directory.name for directory in inv_tree.directories ] site_tree = getsitecontents.get_site_tree(site) # Get whether or not the site is debugged conn = sqlite3.connect(os.path.join(webdir, 'stats.db')) curs = conn.cursor() curs.execute('SELECT isgood FROM sites WHERE site = ?', (site, )) is_debugged = curs.fetchone()[0] conn.close() # Create the function to check orphans and missing # First, datasets in the deletions queue can be missing acceptable_missing = checkphedex.set_of_deletions(site) # Orphan files cannot belong to any dataset that should be at the site inv_sql = MySQL(config_file='/etc/my.cnf', db='dynamo', config_group='mysql-dynamo') acceptable_orphans = set( inv_sql.query( """ SELECT datasets.name FROM sites INNER JOIN dataset_replicas ON dataset_replicas.site_id=sites.id INNER JOIN datasets ON dataset_replicas.dataset_id=datasets.id WHERE sites.name=%s """, site)) # Orphan files may be a result of deletion requests acceptable_orphans.update(acceptable_missing) # Ignored datasets will not give a full listing, so they can't be accused of having orphans acceptable_orphans.update( inv_sql.query('SELECT name FROM datasets WHERE status=%s', 'IGNORED')) # Do not delete anything that is protected by Unified protected_unmerged = get_json('cmst2.web.cern.ch', '/cmst2/unified/listProtectedLFN.txt') acceptable_orphans.update(['/%s/%s-%s/%s' % (split_name[4], split_name[3], split_name[6], split_name[5]) \ for split_name in \ [name.split('/') for name in protected_unmerged['protected']] ]) LOG.debug('Acceptable orphans: \n%s\n', '\n'.join(acceptable_orphans)) ignore_list = config.config_dict().get('IgnoreDirectories', []) def double_check(file_name, acceptable): """ Checks the file name against a list of datasets to not list files from. :param str file_name: LFN of the file :param set acceptable: Datasets to not list files from (Acceptable orphans or missing) :returns: Whether the file belongs to a dataset in the list or not :rtype: bool """ LOG.debug('Checking file_name: %s', file_name) # Skip over paths that include part of the list of ignored directories for pattern in ignore_list: if pattern in file_name: return True split_name = file_name.split('/') try: return '/%s/%s-%s/%s' % (split_name[4], split_name[3], split_name[6], split_name[5]) in acceptable except IndexError: LOG.warning('Strange file name: %s', file_name) return True check_orphans = lambda x: double_check(x, acceptable_orphans) check_missing = lambda x: double_check(x, acceptable_missing) # Do the comparison missing, m_size, orphan, o_size = datatypes.compare( inv_tree, site_tree, '%s_compare' % site, orphan_check=check_orphans, missing_check=check_missing) LOG.debug('Missing size: %i, Orphan size: %i', m_size, o_size) # Enter things for site in registry if os.environ['USER'] == 'dynamo': reg_sql = MySQL(config_file='/etc/my.cnf', db='dynamoregister', config_group='mysql-dynamo') else: reg_sql = MySQL(config_file=os.path.join(os.environ['HOME'], 'my.cnf'), db='dynamoregister', config_group='mysql-register-test') # Determine if files should be entered into the registry many_missing = len(missing) > int(config.config_dict()['MaxMissing']) many_orphans = len(orphan) > int(config.config_dict()['MaxOrphan']) if is_debugged and not many_missing and not many_orphans: def execute(query, *args): """ Executes the query on the registry and outputs a log message depending on query :param str query: The SQL query to execute :param args: The arguments to the SQL query """ reg_sql.query(query, *args) if 'transfer_queue' in query: LOG.info('Copying %s from %s', args[0], args[1]) elif 'deletion_queue' in query: LOG.info('Deleting %s', args[0]) else: if many_missing: LOG.error('Too many missing files: %i, you should investigate.', len(missing)) if many_orphans: LOG.error( 'Too many orphan files: %i out of %i, you should investigate.', len(orphan), site_tree.get_num_files()) execute = lambda *_: 0 # Then do entries, if the site is in the debugged status def add_transfers(line, sites): """ Add the file into the transfer queue for multiple sites. :param str line: The file LFN to transfer :param list sites: Sites to try to transfer from :returns: Whether or not the entry was a success :rtype: bool """ # Don't add transfers if too many missing files if line in prev_set or not prev_set: for location in sites: execute( """ INSERT IGNORE INTO `transfer_queue` (`file`, `site_from`, `site_to`, `status`, `reqid`) VALUES (%s, %s, %s, 'new', 0) """, line, location, site) return bool(sites) # Setup a query for sites, with added condition at the end site_query = """ SELECT sites.name FROM sites INNER JOIN block_replicas ON sites.id = block_replicas.site_id INNER JOIN files ON block_replicas.block_id = files.block_id WHERE files.name = %s AND sites.name != %s AND sites.status = 'ready' AND block_replicas.is_complete = 1 AND group_id != 0 {0} """ # Track files with no sources no_source_files = [] for line in missing: # Get sites that are not tape sites = inv_sql.query( site_query.format('AND sites.storage_type != "mss"'), line, site) if not add_transfers(line, sites): # Track files without disk source no_source_files.append(line) # Get sites that are tape sites = inv_sql.query( site_query.format('AND sites.storage_type = "mss"'), line, site) add_transfers(line, sites) # Only get the empty nodes that are not in the inventory tree for line in orphan + \ [empty_node for empty_node in site_tree.empty_nodes_list() \ if not inv_tree.get_node('/'.join(empty_node.split('/')[2:]), make_new=False)]: execute( """ INSERT IGNORE INTO `deletion_queue` (`file`, `site`, `status`) VALUES (%s, %s, 'new') """, line, site) reg_sql.close() with open('%s_missing_nosite.txt' % site, 'w') as nosite: for line in no_source_files: nosite.write(line + '\n') # We want to track which blocks missing files are coming from track_missing_blocks = defaultdict( lambda: { 'errors': 0, 'blocks': defaultdict(lambda: { 'group': '', 'errors': 0 }) }) blocks_query = """ SELECT blocks.name, IFNULL(groups.name, 'Unsubscribed') FROM blocks INNER JOIN files ON files.block_id = blocks.id INNER JOIN block_replicas ON block_replicas.block_id = files.block_id INNER JOIN sites ON block_replicas.site_id = sites.id LEFT JOIN groups ON block_replicas.group_id = groups.id WHERE files.name = %s AND sites.name = %s """ with open('%s_compare_missing.txt' % site, 'r') as input_file: for line in input_file: split_name = line.split('/') dataset = '/%s/%s-%s/%s' % (split_name[4], split_name[3], split_name[6], split_name[5]) output = inv_sql.query(blocks_query, line.strip(), site) if not output: LOG.warning('The following SQL statement failed: %s', blocks_query % (line.strip(), site)) LOG.warning( 'Most likely cause is dynamo update between the listing and now' ) from_phedex = get_json( 'cmsweb.cern.ch', '/phedex/datasvc/json/prod/filereplicas', params={ 'node': site, 'LFN': line.strip() }, use_cert=True) try: output = [(from_phedex['phedex']['block'][0]['name'].split( '#')[1], from_phedex['phedex']['block'][0]['replica'] [0]['group'])] except IndexError: LOG.error('File replica not in PhEDEx either!') LOG.error('Skipping block level report for this file.') continue block, group = output[0] track_missing_blocks[dataset]['errors'] += 1 track_missing_blocks[dataset]['blocks'][block]['errors'] += 1 track_missing_blocks[dataset]['blocks'][block]['group'] = group inv_sql.close() # Output file with the missing datasets with open('%s_missing_datasets.txt' % site, 'w') as output_file: for dataset, vals in \ sorted(track_missing_blocks.iteritems(), key=lambda x: x[1]['errors'], reverse=True): for block_name, block in sorted(vals['blocks'].iteritems()): output_file.write('%10i %-17s %s#%s\n' % \ (block['errors'], block['group'], dataset, block_name)) # If there were permissions or connection issues, no files would be listed # Otherwise, copy the output files to the web directory shutil.copy('%s_missing_datasets.txt' % site, webdir) shutil.copy('%s_missing_nosite.txt' % site, webdir) shutil.copy('%s_compare_missing.txt' % site, webdir) shutil.copy('%s_compare_orphan.txt' % site, webdir) if (os.environ.get('ListAge') is None) and (os.environ.get('InventoryAge') is None): # Update the runtime stats on the stats page if the listing settings are not changed conn = sqlite3.connect(os.path.join(webdir, 'stats.db')) curs = conn.cursor() curs.execute( 'INSERT INTO stats_history SELECT * FROM stats WHERE site=?', (site, )) curs.execute( """ REPLACE INTO stats VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME(DATETIME(), "-4 hours"), ?, ?) """, (site, time.time() - start, site_tree.get_num_files(), site_tree.count_nodes(), len( site_tree.empty_nodes_list()), config.config_dict().get( 'NumThreads', config.config_dict().get('MinThreads', 0)), len(missing), m_size, len(orphan), o_size, len(no_source_files), site_tree.get_num_files(unlisted=True))) conn.commit() conn.close()
class MySQLHistory(TransactionHistoryInterface): """ Transaction history interface implementation using MySQL as the backend. """ def __init__(self): super(self.__class__, self).__init__() self._mysql = MySQL(**config.mysqlhistory.db_params) self._site_id_map = {} self._dataset_id_map = {} def _do_acquire_lock(self, blocking): #override while True: # Use the system table to "software-lock" the database self._mysql.query('LOCK TABLES `lock` WRITE') self._mysql.query( 'UPDATE `lock` SET `lock_host` = %s, `lock_process` = %s WHERE `lock_host` LIKE \'\' AND `lock_process` = 0', socket.gethostname(), os.getpid()) # Did the update go through? host, pid = self._mysql.query( 'SELECT `lock_host`, `lock_process` FROM `lock`')[0] self._mysql.query('UNLOCK TABLES') if host == socket.gethostname() and pid == os.getpid(): # The database is locked. break if blocking: logger.warning('Failed to lock database. Waiting 30 seconds..') time.sleep(30) else: logger.warning('Failed to lock database.') return False return True def _do_release_lock(self, force): #override self._mysql.query('LOCK TABLES `lock` WRITE') if force: self._mysql.query( 'UPDATE `lock` SET `lock_host` = \'\', `lock_process` = 0') else: self._mysql.query( 'UPDATE `lock` SET `lock_host` = \'\', `lock_process` = 0 WHERE `lock_host` LIKE %s AND `lock_process` = %s', socket.gethostname(), os.getpid()) # Did the update go through? host, pid = self._mysql.query( 'SELECT `lock_host`, `lock_process` FROM `lock`')[0] self._mysql.query('UNLOCK TABLES') if host != '' or pid != 0: raise TransactionHistoryInterface.LockError( 'Failed to release lock from ' + socket.gethostname() + ':' + str(os.getpid())) def _do_make_snapshot(self, tag): #override new_db = self._mysql.make_snapshot(tag) self._mysql.query( 'UPDATE `%s`.`lock` SET `lock_host` = \'\', `lock_process` = 0' % new_db) def _do_remove_snapshot(self, tag, newer_than, older_than): #override self._mysql.remove_snapshot(tag=tag, newer_than=newer_than, older_than=older_than) def _do_list_snapshots(self, timestamp_only): #override return self._mysql.list_snapshots(timestamp_only) def _do_recover_from(self, tag): #override self._mysql.recover_from(tag) def _do_new_run(self, operation, partition, policy_version, is_test, comment): #override part_ids = self._mysql.query( 'SELECT `id` FROM `partitions` WHERE `name` LIKE %s', partition) if len(part_ids) == 0: part_id = self._mysql.query( 'INSERT INTO `partitions` (`name`) VALUES (%s)', partition) else: part_id = part_ids[0] if operation == HistoryRecord.OP_COPY: if is_test: operation_str = 'copy_test' else: operation_str = 'copy' else: if is_test: operation_str = 'deletion_test' else: operation_str = 'deletion' return self._mysql.query( 'INSERT INTO `runs` (`operation`, `partition_id`, `policy_version`, `comment`, `time_start`) VALUES (%s, %s, %s, %s, NOW())', operation_str, part_id, policy_version, comment) def _do_close_run(self, operation, run_number): #override self._mysql.query( 'UPDATE `runs` SET `time_end` = FROM_UNIXTIME(%s) WHERE `id` = %s', time.time(), run_number) def _do_make_copy_entry(self, run_number, site, operation_id, approved, dataset_list, size): #override """ Site and datasets are expected to be already in the database. """ if len(self._site_id_map) == 0: self._make_site_id_map() if len(self._dataset_id_map) == 0: self._make_dataset_id_map() self._mysql.query( 'INSERT INTO `copy_requests` (`id`, `run_id`, `timestamp`, `approved`, `site_id`, `size`) VALUES (%s, %s, NOW(), %s, %s, %s)', operation_id, run_number, approved, self._site_id_map[site.name], size) self._mysql.insert_many( 'copied_replicas', ('copy_id', 'dataset_id'), lambda d: (operation_id, self._dataset_id_map[d.name]), dataset_list) def _do_make_deletion_entry(self, run_number, site, operation_id, approved, datasets, size): #override """ site and dataset are expected to be already in the database (save_deletion_decisions should be called first). """ site_id = self._mysql.query( 'SELECT `id` FROM `sites` WHERE `name` LIKE %s', site.name)[0] dataset_ids = self._mysql.select_many('datasets', ('id', ), 'name', [d.name for d in datasets]) self._mysql.query( 'INSERT INTO `deletion_requests` (`id`, `run_id`, `timestamp`, `approved`, `site_id`, `size`) VALUES (%s, %s, NOW(), %s, %s, %s)', operation_id, run_number, approved, site_id, size) self._mysql.insert_many('deleted_replicas', ('deletion_id', 'dataset_id'), lambda did: (operation_id, did), dataset_ids) def _do_update_copy_entry(self, copy_record): #override self._mysql.query( 'UPDATE `copy_requests` SET `approved` = %s, `size` = %s, `completed` = %s WHERE `id` = %s', copy_record.approved, copy_record.size, copy_record.completed, copy_record.operation_id) def _do_update_deletion_entry(self, deletion_record): #override self._mysql.query( 'UPDATE `deletion_requests` SET `approved` = %s, `size` = %s WHERE `id` = %s', deletion_record.approved, deletion_record.size, deletion_record.operation_id) def _do_save_sites(self, run_number, inventory): #override if len(self._site_id_map) == 0: self._make_site_id_map() sites_to_insert = [] for site_name in inventory.sites.keys(): if site_name not in self._site_id_map: sites_to_insert.append(site_name) if len(sites_to_insert) != 0: self._mysql.insert_many('sites', ('name', ), lambda n: (n, ), sites_to_insert) self._make_site_id_map() sites_in_record = set() insert_query = 'INSERT INTO `site_status_snapshots` (`site_id`, `run_id`, `status`) VALUES (%s, {run_number}, %s)'.format( run_number=run_number) query = 'SELECT s.`name`, ss.`status`+0 FROM `site_status_snapshots` AS ss INNER JOIN `sites` AS s ON s.`id` = ss.`site_id`' query += ' WHERE ss.`run_id` = (SELECT MAX(ss2.`run_id`) FROM `site_status_snapshots` AS ss2 WHERE ss2.`site_id` = ss.`site_id` AND ss2.`run_id` <= %d)' % run_number record = self._mysql.query(query) sites_in_record = set() for site_name, status in record: try: site = inventory.sites[site_name] except KeyError: continue sites_in_record.add(site) if site.status != status: self._mysql.query(insert_query, self._site_id_map[site.name], site.status) for site in inventory.sites.values(): if site not in sites_in_record: self._mysql.query(insert_query, self._site_id_map[site.name], site.status) def _do_get_sites(self, run_number): #override partition_id = self._mysql.query( 'SELECT `partition_id` FROM runs WHERE `id` = %s', run_number)[0] query = 'SELECT s.`name`, ss.`status`+0 FROM `site_status_snapshots` AS ss INNER JOIN `sites` AS s ON s.`id` = ss.`site_id`' query += ' WHERE ss.`run_id` = (SELECT MAX(ss2.`run_id`) FROM `site_status_snapshots` AS ss2 WHERE ss2.`site_id` = ss.`site_id` AND ss2.`run_id` <= %d)' % run_number record = self._mysql.query(query) status_map = dict([(site_name, status) for site_name, status in record]) query = 'SELECT s.`name`, q.`quota` FROM `quota_snapshots` AS q INNER JOIN `sites` AS s ON s.`id` = q.`site_id`' query += ' WHERE q.`partition_id` = %d' % partition_id query += ' AND q.`run_id` = (SELECT MAX(q2.`run_id`) FROM `quota_snapshots` AS q2 WHERE q2.`partition_id` = %d AND q2.`site_id` = q.`site_id` AND q2.`run_id` <= %d)' % ( partition_id, run_number) quota_map = dict(self._mysql.query(query)) sites_dict = {} for site_name, status in status_map.items(): try: quota = quota_map[site_name] except KeyError: quota = 0 sites_dict[site_name] = (status, quota) return sites_dict def _do_save_datasets(self, run_number, inventory): #override if len(self._dataset_id_map) == 0: self._make_dataset_id_map() datasets_to_insert = [] for dataset_name in inventory.datasets.keys(): if dataset_name not in self._dataset_id_map: datasets_to_insert.append(dataset_name) if len(datasets_to_insert) == 0: return self._mysql.insert_many('datasets', ('name', ), lambda n: (n, ), datasets_to_insert) self._make_dataset_id_map() def _do_save_quotas(self, run_number, quotas): #override if len(self._site_id_map) == 0: self._make_site_id_map() partition_id = self._mysql.query( 'SELECT `partition_id` FROM runs WHERE `id` = %s', run_number)[0] insert_query = 'INSERT INTO `quota_snapshots` (`site_id`, `partition_id`, `run_id`, `quota`) VALUES (%s, {partition_id}, {run_number}, %s)'.format( partition_id=partition_id, run_number=run_number) query = 'SELECT s.`name`, q.`quota` FROM `quota_snapshots` AS q INNER JOIN `sites` AS s ON s.`id` = q.`site_id` WHERE' query += ' q.`partition_id` = %d' % partition_id query += ' AND q.`run_id` = (SELECT MAX(q2.`run_id`) FROM `quota_snapshots` AS q2 WHERE q2.`partition_id` = %d AND q2.`site_id` = q.`site_id` AND q2.`run_id` <= %d)' % ( partition_id, run_number) record = self._mysql.query(query) sites_in_record = set() for site_name, last_quota in record: try: site, quota = next(item for item in quotas.items() if item[0].name == site_name) except StopIteration: continue sites_in_record.add(site) if last_quota != quota: self._mysql.query(insert_query, self._site_id_map[site.name], quota) for site, quota in quotas.items(): if site not in sites_in_record: self._mysql.query(insert_query, self._site_id_map[site.name], quota) def _do_save_conditions(self, policies): for policy in policies: text = re.sub('\s+', ' ', policy.condition.text) ids = self._mysql.query( 'SELECT `id` FROM `policy_conditions` WHERE `text` LIKE %s', text) if len(ids) == 0: policy.condition_id = self._mysql.query( 'INSERT INTO `policy_conditions` (`text`) VALUES (%s)', text) else: policy.condition_id = ids[0] def _do_save_copy_decisions(self, run_number, copies): #override pass def _do_save_deletion_decisions(self, run_number, deleted, kept, protected): #override # First save the size snapshots of the replicas, which will be referenced when reconstructing the history. # Decisions are saved only if they changed from the last run if len(self._site_id_map) == 0: self._make_site_id_map() if len(self._dataset_id_map) == 0: self._make_dataset_id_map() # (site_id, dataset_id) -> replica in inventory indices_to_replicas = {} for replica in deleted.keys(): indices_to_replicas[( self._site_id_map[replica.site.name], self._dataset_id_map[replica.dataset.name])] = replica for replica in kept.keys(): indices_to_replicas[( self._site_id_map[replica.site.name], self._dataset_id_map[replica.dataset.name])] = replica for replica in protected.keys(): indices_to_replicas[( self._site_id_map[replica.site.name], self._dataset_id_map[replica.dataset.name])] = replica partition_id = self._mysql.query( 'SELECT `partition_id` FROM `runs` WHERE `id` = %s', run_number)[0] # size snapshots # size NULL means the replica is deleted query = 'SELECT t1.`site_id`, t1.`dataset_id`, t1.`size` FROM `replica_size_snapshots` AS t1' query += ' WHERE t1.`partition_id` = %d' % partition_id query += ' AND t1.`size` IS NOT NULL' query += ' AND t1.`run_id` = (' query += ' SELECT MAX(t2.`run_id`) FROM `replica_size_snapshots` AS t2 WHERE t2.`site_id` = t1.`site_id` AND t2.`dataset_id` = t1.`dataset_id`' query += ' AND t2.`partition_id` = %d AND t2.`run_id` <= %d' % ( partition_id, run_number) query += ' )' in_record = set() insertions = [] # existing replicas that changed size or disappeared for site_id, dataset_id, size in self._mysql.query(query): index = (site_id, dataset_id) try: replica = indices_to_replicas[index] except KeyError: # this replica is not in the inventory any more insertions.append((site_id, dataset_id, None)) continue in_record.add(replica) if size != replica.size(): insertions.append((site_id, dataset_id, replica.size())) # new replicas for index, replica in indices_to_replicas.items(): if replica not in in_record: insertions.append((index[0], index[1], replica.size())) fields = ('site_id', 'dataset_id', 'partition_id', 'run_id', 'size') mapping = lambda (site_id, dataset_id, size): ( site_id, dataset_id, partition_id, run_number, size) self._mysql.insert_many('replica_size_snapshots', fields, mapping, insertions) # deletion decisions decisions = {} for replica, condition_id in deleted.items(): decisions[replica] = ('delete', condition_id) for replica, condition_id in kept.items(): decisions[replica] = ('keep', condition_id) for replica, condition_id in protected.items(): decisions[replica] = ('protect', condition_id) query = 'SELECT dd1.`site_id`, dd1.`dataset_id`, dd1.`decision`, dd1.`matched_condition` FROM `deletion_decisions` AS dd1' query += ' INNER JOIN `replica_size_snapshots` AS rs1 ON (rs1.`site_id`, rs1.`partition_id`, rs1.`dataset_id`) = (dd1.`site_id`, dd1.`partition_id`, dd1.`dataset_id`)' query += ' WHERE dd1.`partition_id` = %d' % partition_id query += ' AND rs1.`size` IS NOT NULL' query += ' AND rs1.`run_id` = (' query += ' SELECT MAX(rs2.`run_id`) FROM `replica_size_snapshots` AS rs2' query += ' WHERE (rs2.`site_id`, rs2.`partition_id`, rs2.`dataset_id`) = (rs1.`site_id`, rs1.`partition_id`, rs1.`dataset_id`)' query += ' AND rs2.`partition_id` = %d' % partition_id query += ' AND rs2.`run_id` <= %d' % run_number query += ' )' query += ' AND dd1.`run_id` = (' query += ' SELECT MAX(dd2.`run_id`) FROM `deletion_decisions` AS dd2' query += ' WHERE (dd2.`site_id`, dd2.`partition_id`, dd2.`dataset_id`) = (dd1.`site_id`, dd1.`partition_id`, dd1.`dataset_id`)' query += ' AND dd2.`partition_id` = %d' % partition_id query += ' AND dd2.`run_id` <= %d' % run_number query += ' )' insertions = [] for site_id, dataset_id, rec_decision, rec_condition_id in self._mysql.query( query): replica = indices_to_replicas.pop((site_id, dataset_id)) decision, condition_id = decisions[replica] if decision != rec_decision or condition_id != rec_condition_id: insertions.append( (site_id, dataset_id, decision, condition_id)) # replicas with no past decision entries for index, replica in indices_to_replicas.items(): insertions.append(index + decisions[replica]) fields = ('site_id', 'dataset_id', 'partition_id', 'run_id', 'decision', 'matched_condition') mapping = lambda (site_id, dataset_id, decision, condition_id ): (site_id, dataset_id, partition_id, run_number, decision, condition_id) self._mysql.insert_many('deletion_decisions', fields, mapping, insertions) # now fill the cache self._fill_snapshot_cache(run_number) def _do_get_deletion_decisions(self, run_number, size_only): #override self._fill_snapshot_cache(run_number) partition_id = self._mysql.query( 'SELECT `partition_id` FROM `runs` WHERE `id` = %s', run_number)[0] if size_only: # return {site_name: (protect_size, delete_size, keep_size)} volumes = {} sites = set() query = 'SELECT s.`name`, SUM(r.`size`) * 1.e-12 FROM `replica_snapshot_cache` AS c' query += ' INNER JOIN `replica_size_snapshots` AS r ON r.`id` = c.`size_snapshot_id`' query += ' INNER JOIN `deletion_decisions` AS d ON d.`id` = c.`decision_id`' query += ' INNER JOIN `sites` AS s ON s.`id` = r.`site_id`' query += ' WHERE c.`run_id` = %d' % run_number query += ' AND d.`decision` LIKE %s' query += ' GROUP BY r.`site_id`' for decision in ['protect', 'delete', 'keep']: volumes[decision] = dict(self._mysql.query(query, decision)) sites.update(set(volumes[decision].keys())) self._mysql.query( 'INSERT INTO `replica_snapshot_cache_usage` VALUES (%s, NOW())', run_number) product = {} for site_name in sites: v = {} for decision in ['protect', 'delete', 'keep']: try: v[decision] = volumes[decision][site_name] except: v[decision] = 0 product[site_name] = (v['protect'], v['delete'], v['keep']) return product else: # return {site_name: [(dataset_name, size, decision, reason)]} query = 'SELECT s.`name`, d.`name`, r.`size`, l.`decision`, p.`text` FROM `replica_snapshot_cache` AS c' query += ' INNER JOIN `sites` AS s ON s.`id` = c.`site_id`' query += ' INNER JOIN `datasets` AS d ON d.`id` = c.`dataset_id`' query += ' INNER JOIN `replica_size_snapshots` AS r ON r.`id` = c.`size_snapshot_id`' query += ' INNER JOIN `deletion_decisions` AS l ON l.`id` = c.`decision_id`' query += ' INNER JOIN `policy_conditions` AS p ON p.`id` = l.`matched_condition`' query += ' WHERE c.`run_id` = %d' % run_number query += ' ORDER BY s.`name` ASC, r.`size` DESC' product = {} _site_name = '' for site_name, dataset_name, size, decision, reason in self._mysql.query( query): if site_name != _site_name: product[site_name] = [] current = product[site_name] _site_name = site_name current.append((dataset_name, size, decision, reason)) return product def _do_save_dataset_popularity(self, run_number, datasets): #override if len(self._dataset_id_map) == 0: self._make_dataset_id_map() fields = ('run_id', 'dataset_id', 'popularity') mapping = lambda dataset: (run_number, self._dataset_id_map[ dataset.name], dataset.demand['request_weight'] if 'request_weight' in dataset.demand else 0.) self._mysql.insert_many('dataset_popularity_snapshots', fields, mapping, datasets) def _do_get_incomplete_copies(self, partition): #override query = 'SELECT h.`id`, UNIX_TIMESTAMP(h.`timestamp`), h.`approved`, s.`name`, h.`size`' query += ' FROM `copy_requests` AS h' query += ' INNER JOIN `runs` AS r ON r.`id` = h.`run_id`' query += ' INNER JOIN `partitions` AS p ON p.`id` = r.`partition_id`' query += ' INNER JOIN `sites` AS s ON s.`id` = h.`site_id`' query += ' WHERE h.`id` > 0 AND p.`name` LIKE \'%s\' AND h.`completed` = 0 AND h.`run_id` > 0' % partition history_entries = self._mysql.query(query) id_to_record = {} for eid, timestamp, approved, site_name, size in history_entries: id_to_record[eid] = HistoryRecord(HistoryRecord.OP_COPY, eid, site_name, timestamp=timestamp, approved=approved, size=size) id_to_dataset = dict( self._mysql.query('SELECT `id`, `name` FROM `datasets`')) id_to_site = dict( self._mysql.query('SELECT `id`, `name` FROM `sites`')) replicas = self._mysql.select_many('copied_replicas', ('copy_id', 'dataset_id'), 'copy_id', id_to_record.keys()) current_copy_id = 0 for copy_id, dataset_id in replicas: if copy_id != current_copy_id: record = id_to_record[copy_id] current_copy_id = copy_id record.replicas.append( HistoryRecord.CopiedReplica( dataset_name=id_to_dataset[dataset_id])) return id_to_record.values() def _do_get_copied_replicas(self, run_number): #override query = 'SELECT s.`name`, d.`name` FROM `copied_replicas` AS p' query += ' INNER JOIN `copy_requests` AS r ON r.`id` = p.`copy_id`' query += ' INNER JOIN `datasets` AS d ON d.`id` = p.`dataset_id`' query += ' INNER JOIN `sites` AS s ON s.`id` = r.`site_id`' query += ' WHERE r.`run_id` = %d' % run_number return self._mysql.query(query) def _do_get_site_name(self, operation_id): #override result = self._mysql.query( 'SELECT s.name FROM `sites` AS s INNER JOIN `copy_requests` AS h ON h.`site_id` = s.`id` WHERE h.`id` = %s', operation_id) if len(result) != 0: return result[0] result = self._mysql.query( 'SELECT s.name FROM `sites` AS s INNER JOIN `deletion_requests` AS h ON h.`site_id` = s.`id` WHERE h.`id` = %s', operation_id) if len(result) != 0: return result[0] return '' def _do_get_deletion_runs(self, partition, first, last): #override result = self._mysql.query( 'SELECT `id` FROM `partitions` WHERE `name` LIKE %s', partition) if len(result) == 0: return 0 partition_id = result[0] if first < 0: sql = 'SELECT MAX(`id`)' else: sql = 'SELECT `id`' sql += ' FROM `runs` WHERE `partition_id` = %d AND `time_end` NOT LIKE \'0000-00-00 00:00:00\' AND `operation` IN (\'deletion\', \'deletion_test\')' % partition_id if first >= 0: sql += ' AND `id` >= %d' % first if last >= 0: sql += ' AND `id` <= %d' % last return self._mysql.query(sql) def _do_get_copy_runs(self, partition, first, last): #override result = self._mysql.query( 'SELECT `id` FROM `partitions` WHERE `name` LIKE %s', partition) if len(result) == 0: return 0 partition_id = result[0] if first < 0: sql = 'SELECT MAX(`id`)' else: sql = 'SELECT `id`' sql += ' FROM `runs` WHERE `partition_id` = %d AND `time_end` NOT LIKE \'0000-00-00 00:00:00\' AND `operation` IN (\'copy\', \'copy_test\')' % partition_id if first >= 0: sql += ' AND `id` >= %d' % first if last >= 0: sql += ' AND `id` <= %d' % last return self._mysql.query(sql) def _do_get_run_timestamp(self, run_number): #override result = self._mysql.query( 'SELECT UNIX_TIMESTAMP(`time_start`) FROM `runs` WHERE `id` = %s', run_number) if len(result) == 0: return 0 return result[0] def _do_get_next_test_id(self): #override copy_result = self._mysql.query( 'SELECT MIN(`id`) FROM `copy_requests`')[0] if copy_result == None: copy_result = 0 deletion_result = self._mysql.query( 'SELECT MIN(`id`) FROM `deletion_requests`')[0] if deletion_result == None: deletion_result = 0 return min(copy_result, deletion_result) - 1 def _make_site_id_map(self): self._site_id_map = {} for name, site_id in self._mysql.query( 'SELECT `name`, `id` FROM `sites`'): self._site_id_map[name] = int(site_id) def _make_dataset_id_map(self): self._dataset_id_map = {} for name, dataset_id in self._mysql.query( 'SELECT `name`, `id` FROM `datasets`'): self._dataset_id_map[name] = int(dataset_id) def _fill_snapshot_cache(self, run_number): if self._mysql.query( 'SELECT COUNT(*) FROM `replica_snapshot_cache` WHERE `run_id` = %s', run_number)[0] == 0: partition_id = self._mysql.query( 'SELECT `partition_id` FROM `runs` WHERE `id` = %s', run_number)[0] query = 'INSERT INTO `replica_snapshot_cache`' query += ' SELECT %d, dd1.`site_id`, dd1.`dataset_id`, rs1.`id`, dd1.`id` FROM `deletion_decisions` AS dd1, `replica_size_snapshots` AS rs1' % run_number query += ' WHERE (dd1.`site_id`, dd1.`partition_id`, dd1.`dataset_id`) = (rs1.`site_id`, rs1.`partition_id`, rs1.`dataset_id`)' query += ' AND dd1.`partition_id` = %d' % partition_id query += ' AND rs1.`size` IS NOT NULL' query += ' AND rs1.`run_id` = (' query += ' SELECT MAX(rs2.`run_id`) FROM `replica_size_snapshots` AS rs2' query += ' WHERE (rs2.`site_id`, rs2.`partition_id`, rs2.`dataset_id`) = (rs1.`site_id`, rs1.`partition_id`, rs1.`dataset_id`)' query += ' AND rs2.`partition_id` = %d' % partition_id query += ' AND rs2.`run_id` <= %d' % run_number query += ' )' query += ' AND dd1.`run_id` = (' query += ' SELECT MAX(dd2.`run_id`) FROM `deletion_decisions` AS dd2' query += ' WHERE (dd2.`site_id`, dd2.`partition_id`, dd2.`dataset_id`) = (dd1.`site_id`, dd1.`partition_id`, dd1.`dataset_id`)' query += ' AND dd2.`partition_id` = %d' % partition_id query += ' AND dd2.`run_id` <= %d' % run_number query += ' )' self._mysql.query(query) self._mysql.query( 'INSERT INTO `replica_snapshot_cache_usage` VALUES (%s, NOW())', run_number) num_deleted = self._mysql.query( 'DELETE FROM `replica_snapshot_cache` WHERE `run_id` NOT IN (SELECT `run_id` FROM `replica_snapshot_cache_usage` WHERE `timestamp` > DATE_SUB(NOW(), INTERVAL 1 WEEK))' ) if num_deleted != 0: self._mysql.query('OPTIMIZE TABLE `replica_snapshot_cache`') num_deleted = self._mysql.query( 'DELETE FROM `replica_snapshot_cache_usage` WHERE `timestamp` < DATE_SUB(NOW(), INTERVAL 1 WEEK)' ) if num_deleted != 0: self._mysql.query('OPTIMIZE TABLE `replica_snapshot_cache_usage`')
class ActivityLock(object): """ Web-based activity lock using registry. """ def __init__(self, application, service='dynamo', asuser='', db_params=config.registry.db_params): self._mysql = MySQL(**db_params) self.application = application self.service = service if asuser: self.user = asuser else: self.user = config.activitylock.default_user def __enter__(self): self.lock() def __exit__(self, exc_type, exc_value, traceback): if not self.unlock(): raise RuntimeError('Failed to unlock') return exc_type is None and exc_value is None and traceback is None def lock(self): while True: self._mysql.query( 'LOCK TABLES `activity_lock` WRITE, `users` WRITE, `services` WRITE' ) query = 'SELECT `users`.`name`, `services`.`name` FROM `activity_lock`' query += ' INNER JOIN `users` ON `users`.`id` = `activity_lock`.`user_id`' query += ' INNER JOIN `services` ON `services`.`id` = `activity_lock`.`service_id`' query += ' WHERE `application` = %s' result = self._mysql.query(query, self.application) if len(result) == 0: break elif result[0] == (self.user, self.service): query = 'DELETE FROM `activity_lock` WHERE `application` = %s' self._mysql.query(query, self.application) break logger.info( 'Activity lock for %s in place: user = %s, service = %s', self.application, *result[0]) self._mysql.query('UNLOCK TABLES') time.sleep(60) query = 'INSERT INTO `activity_lock` (`user_id`, `service_id`, `application`, `timestamp`, `note`)' query += ' SELECT `users`.`id`, `services`.`id`, %s, NOW(), \'Dynamo running\' FROM `users`, `services`' query += ' WHERE `users`.`name` = %s AND `services`.`name` = %s' self._mysql.query(query, self.application, self.user, self.service) self._mysql.query('UNLOCK TABLES') logger.info('Locked system for %s', self.application) def unlock(self): self._mysql.query( 'LOCK TABLES `activity_lock` WRITE, `users` WRITE, `services` WRITE' ) query = 'SELECT `users`.`name`, `services`.`name` FROM `activity_lock`' query += ' INNER JOIN `users` ON `users`.`id` = `activity_lock`.`user_id`' query += ' INNER JOIN `services` ON `services`.`id` = `activity_lock`.`service_id`' query += ' WHERE `application` = %s' result = self._mysql.query(query, self.application) if len(result) == 0: self._mysql.query('UNLOCK TABLES') return True if result[0] == (self.user, self.service): query = 'DELETE FROM `activity_lock` WHERE `application` = %s' self._mysql.query(query, self.application) self._mysql.query('UNLOCK TABLES') return True else: logger.error( 'Lock logic error: some process obtained the activity lock for %s', self.application) self._mysql.query('UNLOCK TABLES') return False
def __init__(self, inventory, history): self._inventory = inventory self._history = history self._mysql = MySQL(**config.registry.db_params)
help='Logging level.') args = parser.parse_args() sys.argv = [] # Need to setup logging before loading other modules log_level = getattr(logging, args.log_level.upper()) logging.basicConfig(level=log_level) logger = logging.getLogger(__name__) from common.interface.mysql import MySQL store = MySQL(config_file='/etc/my.cnf', config_group='mysql-dynamo', db='dynamoregister') if args.command[0] == 'update': logger.info('Synchronizing the user list to SiteDB.') from common.interface.sitedb import SiteDB sitedb = SiteDB() domain_id = store.query( 'SELECT `id` FROM `domains` WHERE `name` = \'cern.ch\'')[0] query = 'INSERT INTO `users` (`name`, `domain_id`, `email`, `dn`) VALUES (%s, ' + str( domain_id ) + ', %s, %s) ON DUPLICATE KEY UPDATE `email` = `email`, `dn` = `dn`'
def get_phedex_tree(site): """ Get the file list tree from PhEDEx. Uses the InventoryAge configuration to determine when to refresh cache. :param str site: The site to get information from PhEDEx for. :returns: A tree containing file replicas that are supposed to be at the site :rtype: ConsistencyCheck.datatypes.DirectoryInfo """ tree = datatypes.DirectoryInfo('/store') valid_list = config.config_dict().get('DirectoryList', []) sql = MySQL(config_file='/etc/my.cnf', db='dynamo', config_group='mysql-dynamo') datasets = sql.query( 'SELECT datasets.name ' 'FROM sites INNER JOIN dataset_replicas INNER JOIN datasets ' 'WHERE dataset_replicas.dataset_id=datasets.id AND ' 'dataset_replicas.site_id=sites.id and sites.name=%s', site) def add_files(dataset, retries): """ :param str dataset: Dataset to get from PhEDEx :param int retries: The number of times to retry PhEDEx call :returns: Whether or not the addition was successful :rtype: bool """ LOG.info('Getting PhEDEx contents for %s', dataset) phedex_response = get_json('cmsweb.cern.ch', '/phedex/datasvc/json/prod/filereplicas', { 'node': site, 'dataset': dataset }, retries=retries, use_https=True) report = 0 if not phedex_response: LOG.warning('Bad response from PhEDEx for %s', dataset) return False for block in phedex_response['phedex']['block']: LOG.debug('%s', block) replica_list = [(replica['name'], replica['bytes'], int(replica['replica'][0]['time_create'] or time.time()), block['name']) \ for replica in block['file'] \ if replica['name'].split('/')[2] in valid_list] report += len(replica_list) tree.add_file_list(replica_list) LOG.info('%i files', report) return True separate = [] for primary in set([d.split('/')[1][:3] for d in datasets]): success = add_files('/%s*/*/*' % primary, 0) if not success: separate.append(primary) # Separate loop to retry datasets individually for dataset in [d for d in datasets if d.split('/')[1][:3] in separate]: success = add_files(dataset, 5) if not success: LOG.critical('Cannot get %s from PhEDEx. Do not trust results...', dataset) return tree
def get_db_listing(site): """ Get the list of files from dynamo database directly from MySQL. :param str site: The name of the site to load :returns: The file replicas that are supposed to be at a site :rtype: ConsistencyCheck.datatypes.DirectoryInfo """ inv_sql = MySQL(config_file='/etc/my.cnf', db='dynamo', config_group='mysql-dynamo') # Get list of files curs = inv_sql._connection.cursor() LOG.info('About to make MySQL query for files at %s', site) tree = datatypes.DirectoryInfo('/store') def add_to_tree(curs): """ Add cursor contents to the dynamo listing tree :param MySQLdb.cursor curs: The cursor which just completed a query to fetch """ dirs_to_look = iter(sorted(config.config_dict()['DirectoryList'])) files_to_add = [] look_dir = '' row = curs.fetchone() while row: name, size = row[0:2] timestamp = time.mktime(row[2].timetuple()) if len(row) == 3 else 0 current_directory = name.split('/')[2] try: while look_dir < current_directory: look_dir = next(dirs_to_look) except StopIteration: break if current_directory == look_dir: LOG.debug('Adding file: %s, %i', name, size) files_to_add.append((name, size, timestamp)) row = curs.fetchone() tree.add_file_list(files_to_add) curs.execute( """ SELECT files.name, files.size FROM block_replicas INNER JOIN sites ON block_replicas.site_id = sites.id INNER JOIN files ON block_replicas.block_id = files.block_id WHERE block_replicas.is_complete = 1 AND sites.name = %s AND group_id != 0 ORDER BY files.name ASC """, (site, )) add_to_tree(curs) curs.execute( """ SELECT files.name, files.size, NOW() FROM block_replicas INNER JOIN sites ON block_replicas.site_id = sites.id INNER JOIN files ON block_replicas.block_id = files.block_id WHERE (block_replicas.is_complete = 0 OR group_id = 0) AND sites.name = %s ORDER BY files.name ASC """, (site, )) add_to_tree(curs) LOG.info('MySQL query returned') return tree
def make_request(self, resource='', options=[], method=GET, format='url', cache_lifetime=0): url = self.url_base if resource: url += '/' + resource if method == GET and len(options) != 0: if type(options) is list: url += '?' + '&'.join(options) elif type(options) is str: url += '?' + options if logger.getEffectiveLevel() == logging.DEBUG: logger.debug(url) # first check the cache if method == GET and self._cache_lock is not None and cache_lifetime > 0: with self._cache_lock: try: db = MySQL(**config.webservice.cache_db_params) cache = db.query( 'SELECT UNIX_TIMESTAMP(`timestamp`), `content` FROM `webservice` WHERE `url` = %s', url) db.close() except: logger.error( 'Connection to cache DB failed when fetching the timestamp for %s.', url) cache = [] if len(cache) != 0: timestamp, content = cache[0] if time.time() - timestamp < cache_lifetime: logger.debug('Using cache for %s', url) if self.accept == 'application/json': result = json.loads(content) unicode2str(result) elif self.accept == 'application/xml': # TODO implement xml -> dict result = content return result # now query the URL request = urllib2.Request(url) if method == POST: if format == 'url': # Options can be a dict or a list of key=value strings or 2-tuples. The latter case allows repeated keys (e.g. dataset=A&dataset=B) if type(options) is list: # convert key=value strings to (key, value) 2-tuples optlist = [] for opt in options: if type(opt) is tuple: optlist.append(opt) elif type(opt) is str: key, eq, value = opt.partition('=') if eq == '=': optlist.append((key, value)) options = optlist data = urllib.urlencode(options) elif format == 'json': # Options must be jsonizable. request.add_header('Content-type', 'application/json') data = json.dumps(options) request.add_data(data) wait = 1. exceptions = [] while len(exceptions) != config.webservice.num_attempts: try: if self.auth_handler: opener = urllib2.build_opener(self.auth_handler()) else: opener = urllib2.build_opener() if 'Accept' not in self.headers: opener.addheaders.append(('Accept', self.accept)) opener.addheaders.extend(self.headers) response = opener.open(request) # clean up - break reference cycle so python can free the memory up for handler in opener.handlers: handler.parent = None del opener content = response.read() del response if method == GET and self._cache_lock is not None: with tempfile.NamedTemporaryFile(mode='w', delete=False) as tmpfile: filename = tmpfile.name tmpfile.write('\'%s\',\'%s\',\'%s\'' % (MySQL.escape_string(url), time.strftime('%Y-%m-%d %H:%M:%S'), MySQL.escape_string(content))) os.chmod(filename, 0644) with self._cache_lock: try: db = MySQL(**config.webservice.cache_db_params) db.query( 'DELETE FROM `webservice` WHERE `url` = %s', url) db.query( r"LOAD DATA LOCAL INFILE '%s' INTO TABLE `dynamocache`.`webservice` FIELDS TERMINATED BY ',' ENCLOSED BY '\''" % filename) db.close() except: logger.error( 'Connection to cache DB failed when writing the response of %s.', url) pass os.remove(filename) if self.accept == 'application/json': result = json.loads(content) unicode2str(result) elif self.accept == 'application/xml': # TODO implement xml -> dict result = content del content return result except urllib2.HTTPError as err: last_except = (str(err)) + '\nBody:\n' + err.read() except: last_except = sys.exc_info()[:2] exceptions.append(last_except) logger.info( 'Exception "%s" occurred in webservice. Trying again in %.1f seconds.', str(last_except), wait) time.sleep(wait) wait *= 1.5 else: # exhausted allowed attempts logger.error('Too many failed attempts in webservice') logger.error('%s' % ' '.join(map(str, exceptions))) raise RuntimeError('webservice too many attempts')
def __init__(self, db_params=config.registry.db_params): self._mysql = MySQL(**db_params)
class MySQLReplicaLock(object): """ A plugin for DemandManager that appends lists of block replicas that are locked. Sets one demand value: locked_blocks: {site: set of blocks} """ def __init__(self, db_params=config.registry.db_params): self._mysql = MySQL(**db_params) def load(self, inventory): self.update(inventory) def update(self, inventory): query = 'SELECT `item`, `sites`, `groups` FROM `detox_locks` WHERE `unlock_date` IS NULL' if len(config.mysqllock.users) != 0: query += ' AND (`user_id`, `service_id`) IN (' query += 'SELECT u.`id`, s.`id` FROM `users` AS u, `services` AS s WHERE ' query += ' OR '.join( '(u.`name` LIKE "%s" AND s.`name` LIKE "%s")' % us for us in config.mysqllock.users) query += ')' entries = self._mysql.query(query) for item_name, sites_pattern, groups_pattern in entries: if '#' in item_name: dataset_name, block_real_name = item_name.split('#') else: dataset_name = item_name block_real_name = None try: dataset = inventory.datasets[dataset_name] except KeyError: logger.debug('Cannot lock unknown dataset %s', dataset_name) continue if dataset.replicas is None: continue if dataset.blocks is None: inventory.store.load_blocks(dataset) if block_real_name is None: blocks = list(dataset.blocks) else: block = dataset.find_block( Block.translate_name(block_real_name)) if block is None: logger.debug('Cannot lock unknown block %s#%s', dataset_name, block_real_name) continue blocks = [block] sites = set() if sites_pattern: if '*' in sites_pattern: sites.update(s for n, s in inventory.sites.items() if fnmatch.fnmatch(n, sites_pattern)) else: try: sites.add(inventory.sites[sites_pattern]) except KeyError: pass if len(sites) == 0: # if no site matches the pattern, we will be on the safe side and treat it as a global lock sites.update(r.site for r in dataset.replicas) groups = set() if groups_pattern: if '*' in groups_pattern: groups.update(g for n, g in inventory.groups.items() if fnmatch.fnmatch(n, groups_pattern)) else: try: groups.add(inventory.groups[groups_pattern]) except KeyError: pass if len(groups) == 0: # if no group matches the pattern, we will be on the safe side and treat it as a global lock for replica in dataset.replicas: groups.update(brep.group for brep in replica.block_replicas) try: locked_blocks = dataset.demand['locked_blocks'] except KeyError: locked_blocks = dataset.demand['locked_blocks'] = {} for replica in dataset.replicas: if replica.site not in sites: continue if replica.site not in locked_blocks: locked_blocks[replica.site] = set() for block_replica in replica.block_replicas: if block_replica.group not in groups: continue if block_replica.block in blocks: locked_blocks[replica.site].add(block_replica.block)
def main(site): """ Gets the listing from the dynamo database, and remote XRootD listings of a given site. The differences are compared to deletion queues and other things. .. Note:: If you add things, list them in the module docstring. The differences that should be acted on are copied to the summary webpage and entered into the dynamoregister database. :param str site: The site to run the check over :returns: missing files, size, orphan files, size :rtype: list, long, list, long """ start = time.time() prev_missing = '%s_compare_missing.txt' % site prev_set = set() if os.path.exists(prev_missing): with open(prev_missing, 'r') as prev_file: for line in prev_file: prev_set.add(line.strip()) if int(config.config_dict().get('SaveCache')): prev_new_name = '%s.%s' % (prev_missing, datetime.datetime.fromtimestamp( os.stat(prev_missing).st_mtime).strftime('%y%m%d') ) else: prev_new_name = prev_missing shutil.move(prev_missing, os.path.join(config.config_dict()['CacheLocation'], prev_new_name) ) # All of the files and summary will be dumped here webdir = config.config_dict()['WebDir'] # Open a connection temporarily to make sure we only list good sites status_check = MySQL(config_file='/etc/my.cnf', db='dynamo', config_group='mysql-dynamo') status = status_check.query('SELECT status FROM sites WHERE name = %s', site)[0] if status != 'ready': LOG.error('Site %s status is %s', site, status) # Note the attempt to do listing conn = sqlite3.connect(os.path.join(webdir, 'stats.db')) curs = conn.cursor() curs.execute( """ REPLACE INTO stats VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME(DATETIME(), "-4 hours"), ?, ?) """, (site, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)) conn.commit() conn.close() exit(0) # Close the connection while we are getting the trees together status_check.close() inv_tree = getinventorycontents.get_db_listing(site) # Reset the DirectoryList for the XRootDLister to run on config.DIRECTORYLIST = [directory.name for directory in inv_tree.directories] site_tree = getsitecontents.get_site_tree(site) # Get whether or not the site is debugged conn = sqlite3.connect(os.path.join(webdir, 'stats.db')) curs = conn.cursor() curs.execute('SELECT isgood FROM sites WHERE site = ?', (site, )) is_debugged = curs.fetchone()[0] conn.close() # Create the function to check orphans and missing # First, datasets in the deletions queue can be missing acceptable_missing = checkphedex.set_of_deletions(site) # Orphan files cannot belong to any dataset that should be at the site inv_sql = MySQL(config_file='/etc/my.cnf', db='dynamo', config_group='mysql-dynamo') acceptable_orphans = set( inv_sql.query( """ SELECT datasets.name FROM sites INNER JOIN dataset_replicas ON dataset_replicas.site_id=sites.id INNER JOIN datasets ON dataset_replicas.dataset_id=datasets.id WHERE sites.name=%s """, site) ) # Orphan files may be a result of deletion requests acceptable_orphans.update(acceptable_missing) # Ignored datasets will not give a full listing, so they can't be accused of having orphans acceptable_orphans.update( inv_sql.query('SELECT name FROM datasets WHERE status=%s', 'IGNORED') ) # Do not delete anything that is protected by Unified protected_unmerged = get_json('cmst2.web.cern.ch', '/cmst2/unified/listProtectedLFN.txt') acceptable_orphans.update(['/%s/%s-%s/%s' % (split_name[4], split_name[3], split_name[6], split_name[5]) \ for split_name in \ [name.split('/') for name in protected_unmerged['protected']] ]) LOG.debug('Acceptable orphans: \n%s\n', '\n'.join(acceptable_orphans)) ignore_list = config.config_dict().get('IgnoreDirectories', []) def double_check(file_name, acceptable): """ Checks the file name against a list of datasets to not list files from. :param str file_name: LFN of the file :param set acceptable: Datasets to not list files from (Acceptable orphans or missing) :returns: Whether the file belongs to a dataset in the list or not :rtype: bool """ LOG.debug('Checking file_name: %s', file_name) # Skip over paths that include part of the list of ignored directories for pattern in ignore_list: if pattern in file_name: return True split_name = file_name.split('/') try: return '/%s/%s-%s/%s' % (split_name[4], split_name[3], split_name[6], split_name[5]) in acceptable except IndexError: LOG.warning('Strange file name: %s', file_name) return True check_orphans = lambda x: double_check(x, acceptable_orphans) check_missing = lambda x: double_check(x, acceptable_missing) # Do the comparison missing, m_size, orphan, o_size = datatypes.compare( inv_tree, site_tree, '%s_compare' % site, orphan_check=check_orphans, missing_check=check_missing) LOG.debug('Missing size: %i, Orphan size: %i', m_size, o_size) # Enter things for site in registry if os.environ['USER'] == 'dynamo': reg_sql = MySQL(config_file='/etc/my.cnf', db='dynamoregister', config_group='mysql-dynamo') else: reg_sql = MySQL(config_file=os.path.join(os.environ['HOME'], 'my.cnf'), db='dynamoregister', config_group='mysql-register-test') # Determine if files should be entered into the registry many_missing = len(missing) > int(config.config_dict()['MaxMissing']) many_orphans = len(orphan) > int(config.config_dict()['MaxOrphan']) if is_debugged and not many_missing and not many_orphans: def execute(query, *args): """ Executes the query on the registry and outputs a log message depending on query :param str query: The SQL query to execute :param args: The arguments to the SQL query """ reg_sql.query(query, *args) if 'transfer_queue' in query: LOG.info('Copying %s from %s', args[0], args[1]) elif 'deletion_queue' in query: LOG.info('Deleting %s', args[0]) else: if many_missing: LOG.error('Too many missing files: %i, you should investigate.', len(missing)) if many_orphans: LOG.error('Too many orphan files: %i out of %i, you should investigate.', len(orphan), site_tree.get_num_files()) execute = lambda *_: 0 # Then do entries, if the site is in the debugged status def add_transfers(line, sites): """ Add the file into the transfer queue for multiple sites. :param str line: The file LFN to transfer :param list sites: Sites to try to transfer from :returns: Whether or not the entry was a success :rtype: bool """ # Don't add transfers if too many missing files if line in prev_set or not prev_set: for location in sites: execute( """ INSERT IGNORE INTO `transfer_queue` (`file`, `site_from`, `site_to`, `status`, `reqid`) VALUES (%s, %s, %s, 'new', 0) """, line, location, site) return bool(sites) # Setup a query for sites, with added condition at the end site_query = """ SELECT sites.name FROM sites INNER JOIN block_replicas ON sites.id = block_replicas.site_id INNER JOIN files ON block_replicas.block_id = files.block_id WHERE files.name = %s AND sites.name != %s AND sites.status = 'ready' AND block_replicas.is_complete = 1 AND group_id != 0 {0} """ # Track files with no sources no_source_files = [] for line in missing: # Get sites that are not tape sites = inv_sql.query( site_query.format('AND sites.storage_type != "mss"'), line, site) if not add_transfers(line, sites): # Track files without disk source no_source_files.append(line) # Get sites that are tape sites = inv_sql.query( site_query.format('AND sites.storage_type = "mss"'), line, site) add_transfers(line, sites) # Only get the empty nodes that are not in the inventory tree for line in orphan + \ [empty_node for empty_node in site_tree.empty_nodes_list() \ if not inv_tree.get_node('/'.join(empty_node.split('/')[2:]), make_new=False)]: execute( """ INSERT IGNORE INTO `deletion_queue` (`file`, `site`, `status`) VALUES (%s, %s, 'new') """, line, site) reg_sql.close() with open('%s_missing_nosite.txt' % site, 'w') as nosite: for line in no_source_files: nosite.write(line + '\n') # We want to track which blocks missing files are coming from track_missing_blocks = defaultdict( lambda: {'errors': 0, 'blocks': defaultdict(lambda: {'group': '', 'errors': 0} ) }) blocks_query = """ SELECT blocks.name, IFNULL(groups.name, 'Unsubscribed') FROM blocks INNER JOIN files ON files.block_id = blocks.id INNER JOIN block_replicas ON block_replicas.block_id = files.block_id INNER JOIN sites ON block_replicas.site_id = sites.id LEFT JOIN groups ON block_replicas.group_id = groups.id WHERE files.name = %s AND sites.name = %s """ with open('%s_compare_missing.txt' % site, 'r') as input_file: for line in input_file: split_name = line.split('/') dataset = '/%s/%s-%s/%s' % (split_name[4], split_name[3], split_name[6], split_name[5]) output = inv_sql.query(blocks_query, line.strip(), site) if not output: LOG.warning('The following SQL statement failed: %s', blocks_query % (line.strip(), site)) LOG.warning('Most likely cause is dynamo update between the listing and now') from_phedex = get_json('cmsweb.cern.ch', '/phedex/datasvc/json/prod/filereplicas', params={'node': site, 'LFN': line.strip()}, use_cert=True) try: output = [(from_phedex['phedex']['block'][0]['name'].split('#')[1], from_phedex['phedex']['block'][0]['replica'][0]['group'])] except IndexError: LOG.error('File replica not in PhEDEx either!') LOG.error('Skipping block level report for this file.') continue block, group = output[0] track_missing_blocks[dataset]['errors'] += 1 track_missing_blocks[dataset]['blocks'][block]['errors'] += 1 track_missing_blocks[dataset]['blocks'][block]['group'] = group inv_sql.close() # Output file with the missing datasets with open('%s_missing_datasets.txt' % site, 'w') as output_file: for dataset, vals in \ sorted(track_missing_blocks.iteritems(), key=lambda x: x[1]['errors'], reverse=True): for block_name, block in sorted(vals['blocks'].iteritems()): output_file.write('%10i %-17s %s#%s\n' % \ (block['errors'], block['group'], dataset, block_name)) # If there were permissions or connection issues, no files would be listed # Otherwise, copy the output files to the web directory shutil.copy('%s_missing_datasets.txt' % site, webdir) shutil.copy('%s_missing_nosite.txt' % site, webdir) shutil.copy('%s_compare_missing.txt' % site, webdir) shutil.copy('%s_compare_orphan.txt' % site, webdir) if (os.environ.get('ListAge') is None) and (os.environ.get('InventoryAge') is None): # Update the runtime stats on the stats page if the listing settings are not changed conn = sqlite3.connect(os.path.join(webdir, 'stats.db')) curs = conn.cursor() curs.execute('INSERT INTO stats_history SELECT * FROM stats WHERE site=?', (site, )) curs.execute( """ REPLACE INTO stats VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME(DATETIME(), "-4 hours"), ?, ?) """, (site, time.time() - start, site_tree.get_num_files(), site_tree.count_nodes(), len(site_tree.empty_nodes_list()), config.config_dict().get('NumThreads', config.config_dict().get('MinThreads', 0)), len(missing), m_size, len(orphan), o_size, len(no_source_files), site_tree.get_num_files(unlisted=True))) conn.commit() conn.close()
def get_phedex_tree(site): """ Get the file list tree from PhEDEx. Uses the InventoryAge configuration to determine when to refresh cache. :param str site: The site to get information from PhEDEx for. :returns: A tree containing file replicas that are supposed to be at the site :rtype: ConsistencyCheck.datatypes.DirectoryInfo """ tree = datatypes.DirectoryInfo('/store') valid_list = config.config_dict().get('DirectoryList', []) sql = MySQL(config_file='/etc/my.cnf', db='dynamo', config_group='mysql-dynamo') datasets = sql.query('SELECT datasets.name ' 'FROM sites INNER JOIN dataset_replicas INNER JOIN datasets ' 'WHERE dataset_replicas.dataset_id=datasets.id AND ' 'dataset_replicas.site_id=sites.id and sites.name=%s', site) def add_files(dataset, retries): """ :param str dataset: Dataset to get from PhEDEx :param int retries: The number of times to retry PhEDEx call :returns: Whether or not the addition was successful :rtype: bool """ LOG.info('Getting PhEDEx contents for %s', dataset) phedex_response = get_json( 'cmsweb.cern.ch', '/phedex/datasvc/json/prod/filereplicas', {'node': site, 'dataset': dataset}, retries=retries, use_https=True) report = 0 if not phedex_response: LOG.warning('Bad response from PhEDEx for %s', dataset) return False for block in phedex_response['phedex']['block']: LOG.debug('%s', block) replica_list = [(replica['name'], replica['bytes'], int(replica['replica'][0]['time_create'] or time.time()), block['name']) \ for replica in block['file'] \ if replica['name'].split('/')[2] in valid_list] report += len(replica_list) tree.add_file_list(replica_list) LOG.info('%i files', report) return True separate = [] for primary in set([d.split('/')[1][:3] for d in datasets]): success = add_files('/%s*/*/*' % primary, 0) if not success: separate.append(primary) # Separate loop to retry datasets individually for dataset in [d for d in datasets if d.split('/')[1][:3] in separate]: success = add_files(dataset, 5) if not success: LOG.critical('Cannot get %s from PhEDEx. Do not trust results...', dataset) return tree
class QueueManager(object): def __init__(self, inventory, history): self._inventory = inventory self._history = history self._mysql = MySQL(**config.registry.db_params) def getTransfers(self,requests): sql = "select tq.reqid,tq.file,tq.site_from,tq.site_to,tq.status" sql = sql + " from transfer_queue as tq,requests_unified as ru" sql = sql + " where tq.status='done' and tq.reqid=0" entries = self._mysql.query(sql) singlesToDelete = [] for line in entries: reqid = int(line[0]) (fileName,siteFrom,siteTo,status) = line[1:] if reqid == 0 : singlesToDelete.append((fileName,siteTo)) self._mysql.delete_many('transfer_queue',('file','site_to'),singlesToDelete) sql = "select tq.reqid,tq.file,tq.site_from,tq.site_to,tq.status" sql = sql + " from transfer_queue as tq,requests_unified as ru" sql = sql + " where tq.status='done' and tq.reqid=ru.reqid" entries = self._mysql.query(sql) for line in entries: reqid = int(line[0]) (fileName,siteFrom,siteTo,status) = line[1:] if reqid == 0 : continue stripedName = (fileName.split('/'))[-1] uRequest = requests[reqid] dsetObj = self._inventory.datasets[uRequest._itemName] fileObj = None for filef in dsetObj.files: if filef.name == stripedName: fileObj = filef break uRequest.markDone(stripedName,fileObj) def getDeletions(self,requests): sql = "select dq.reqid,dq.file,dq.site,dq.status" sql = sql + " from deletion_queue as dq where dq.status='done' and dq.reqid=0" entries = self._mysql.query(sql) singlesToDelete = [] for line in entries: reqid = int(line[0]) (fileName,site,status) = line[1:] if reqid == 0 : singlesToDelete.append((fileName,site)) self._mysql.delete_many('deletion_queue',('file','site'),singlesToDelete) sql = "select dq.reqid,dq.file,dq.site,dq.status" sql = sql + " from deletion_queue as dq,requests_unified as ru" sql = sql + " where dq.status='done' and dq.reqid=ru.reqid" entries = self._mysql.query(sql) print '------------' for line in entries: reqid = int(line[0]) (fileName,site,status) = line[1:] if reqid == 0 : continue stripedName = (fileName.split('/'))[-1] uRequest = requests[reqid] dsetObj = self._inventory.datasets[uRequest._itemName] fileObj = None for filef in dsetObj.files: if filef.name == stripedName: fileObj = filef break uRequest.markDone(stripedName,fileObj) def fillDoneTransfers(self,requests): #for finished requests we update history first, then delete #for unfinished requests we update status and timestamps new_dataset_replicas = [] replica_timestamps = {} done_requests = [] for reqid in sorted(requests): uRequest = requests[reqid] dataset = self._inventory.datasets[uRequest._itemName] site = self._inventory.sites[uRequest._siteTo] reqtype = uRequest._reqType if reqtype != 'copy': continue dsetRep = dataset.find_replica(site) #new block replica is derived from existing replica targetGroup = None for someRep in dataset.replicas: if someRep == dsetRep: continue else: targetGroup = someRep.block_replicas[0].group break #ask for size, update only if size is changing if dsetRep == None: dsetRep = DatasetReplica(dataset, site) self._inventory.update(dsetRep) for block in dataset.blocks: blockRep = BlockReplica(block, site, targetGroup, size = 0, last_update = 0) self._inventory.update(blockRep) print "making new dataset replica" dsetDone = True for blockRep in dsetRep.block_replicas: block = blockRep.block size = uRequest.getSize(block) complete = False if size == block.size: complete = True else: dsetDone = False if size > blockRep.size: print 'updating block replica ...' blockRep.group = targetGroup blockRep.is_complete = complete blockRep.is_custodial = False blockRep.size = size blockRep.last_update = time.time() #here we enter done requests into the history databas #and delete them them from ongoing activities print dsetRep.dataset.name print "dset done status = " + str(dsetDone) if 0 in uRequest._files.itervalues(): print "reqid=" + str(reqid) + " request not finished" else: print "reqid=" + str(reqid) + " request is done !!!" new_dataset_replicas.append(dsetRep) replica_timestamps[dsetRep] = uRequest._created done_requests.append(uRequest._reqid) #save complete requests into history self._history.save_dataset_transfers(new_dataset_replicas,replica_timestamps) #and delete from registry self._mysql.delete_many('requests_unified','reqid',done_requests) self._mysql.delete_many('transfer_queue','reqid',done_requests) def fillDoneDeletions(self,requests): #for finished requests we update history first, then delete #for unfinished requests we update status and timestamps gone_dataset_replicas = [] replica_timestamps = {} done_requests = [] for reqid in sorted(requests): uRequest = requests[reqid] dataset = self._inventory.datasets[uRequest._itemName] site = self._inventory.sites[uRequest._siteTo] reqtype = uRequest._reqType if reqtype != 'delete': continue print '------------' print reqid dsetRep = dataset.find_replica(site) #deleting something that does not exist if dsetRep == None: print "..!!.. trying to delete non-existing dataset" print site.name print dataset.name continue #here we enter done requests into the history databas #and delete them them from ongoing activities print dsetRep.dataset.name if 0 in uRequest._files.itervalues(): print "reqid=" + str(reqid) + " request not finished" else: print "reqid=" + str(reqid) + " request is done !!!" gone_dataset_replicas.append(dsetRep) replica_timestamps[dsetRep] = uRequest._created done_requests.append(uRequest._reqid) print dsetRep.block_replicas dataset.replicas.remove(dsetRep) site.remove_dataset_replica(dsetRep) #save complete requests into history self._history.save_dataset_deletions(gone_dataset_replicas,replica_timestamps) #and delete from registry self._mysql.delete_many('requests_unified','reqid',done_requests) self._mysql.delete_many('deletion_queue','reqid',done_requests) def run(self, comment = ''): requests = {} logger.info('QueueManager run starting at %s', time.strftime('%Y-%m-%d %H:%M:%S')) sql = "select * from requests_unified where status='queued'" entries = self._mysql.query(sql) for line in entries: reqid = int(line[0]) (itemName,datatype,siteTo,reqtype,rank,status,created,updated) = line[1:] dsetObj = self._inventory.datasets[itemName] requests[reqid] = UserRequest(reqid,itemName,siteTo,reqtype,created) for fileObj in dsetObj.files: requests[reqid]._files[fileObj.name] = 0 self.getTransfers(requests) self.getDeletions(requests) self.fillDoneTransfers(requests) self.fillDoneDeletions(requests) logger.info('Finished QueueManager run at %s\n', time.strftime('%Y-%m-%d %H:%M:%S'))