def add_files(self, data_type, file_list): """ Add files to the database Parameters: data_type (str): the data_type of the new files file_list (list): a list of dictionaries in the format local_path (str): path to the file, case (str): the case these files belong to name (str): the filename remote_path (str): the remote path of these files, optional transfer_type (str): the transfer type of these files, optional year (int): the year of the file, optional month (int): the month of the file, optional remote_uuid (str): remote globus endpoint id, optional remote_hostname (str): remote hostname for sftp transfer, optional """ self._mutex.acquire() try: new_files = list() for file in file_list: new_files.append({ 'name': file['name'], 'local_path': file['local_path'], 'local_status': file.get('local_status', FileStatus.NOT_PRESENT.value), 'datatype': data_type, 'case': file['case'], 'year': file.get('year', 0), 'month': file.get('month', 0), 'remote_uuid': file.get('remote_uuid', ''), 'remote_hostname': file.get('remote_hostname', ''), 'remote_path': file.get('remote_path', ''), 'remote_status': FileStatus.NOT_PRESENT.value, 'local_size': 0, 'transfer_type': file.get('transfer_type', 'local') }) step = 50 for idx in range(0, len(new_files), step): DataFile.insert_many(new_files[idx:idx + step]).execute() finally: self._mutex.release()
def populate_file_list(self): """ Populate the database with the required DataFile entries """ msg = 'Creating file table' print_line( line=msg, event_list=self._event_list) newfiles = list() start_year = int(self._config['simulations']['start_year']) end_year = int(self._config['simulations']['end_year']) with DataFile._meta.database.atomic(): # for each case for case in self._config['simulations']: if case in ['start_year', 'end_year', 'comparisons']: continue # for each data type for _type in self._config['data_types']: data_types_for_case = self._config['simulations'][case]['data_types'] if 'all' not in data_types_for_case: if _type not in data_types_for_case: continue # setup the base local_path local_path = self.render_file_string( data_type=_type, data_type_option='local_path', case=case) new_files = list() if self._config['data_types'][_type].get('monthly') and self._config['data_types'][_type]['monthly'] in ['True', 'true', '1', 1]: # handle monthly data for year in range(start_year, end_year + 1): for month in range(1, 13): filename = self.render_file_string( data_type=_type, data_type_option='file_format', case=case, year=year, month=month) r_path = self.render_file_string( data_type=_type, data_type_option='remote_path', case=case, year=year, month=month) new_files.append({ 'name': filename, 'remote_path': os.path.join(r_path, filename), 'local_path': os.path.join(local_path, filename), 'local_status': FileStatus.NOT_PRESENT.value, 'case': case, 'remote_status': FileStatus.NOT_PRESENT.value, 'year': year, 'month': month, 'datatype': _type, 'local_size': 0, 'transfer_type': self._config['simulations'][case]['transfer_type'], 'remote_uuid': self._config['simulations'][case].get('remote_uuid', ''), 'remote_hostname': self._config['simulations'][case].get('remote_hostname', '') }) else: # handle one-off data filename = self.render_file_string( data_type=_type, data_type_option='file_format', case=case) r_path = self.render_file_string( data_type=_type, data_type_option='remote_path', case=case) new_files.append({ 'name': filename, 'remote_path': os.path.join(r_path, filename), 'local_path': os.path.join(local_path, filename), 'local_status': FileStatus.NOT_PRESENT.value, 'case': case, 'remote_status': FileStatus.NOT_PRESENT.value, 'year': 0, 'month': 0, 'datatype': _type, 'local_size': 0, 'transfer_type': self._config['simulations'][case]['transfer_type'], 'remote_uuid': self._config['simulations'][case].get('remote_uuid', ''), 'remote_hostname': self._config['simulations'][case].get('remote_hostname', '') }) tail, _ = os.path.split(new_files[0]['local_path']) if not os.path.exists(tail): os.makedirs(tail) step = 50 for idx in range(0, len(new_files), step): DataFile.insert_many( new_files[idx: idx + step]).execute() msg = 'Database update complete' print_line(msg, self._event_list)
def populate_file_list(self, simstart, simend, experiment): """ Populate the database with the required DataFile entries Parameters: simstart (int): the start year of the simulation, simend (int): the end year of the simulation, experiment (str): the name of the experiment ex: 20170915.beta2.A_WCYCL1850S.ne30_oECv3_ICG.edison """ print 'Creating file table' if self.sta: print 'Using short term archive' else: print 'Short term archive turned off' if not self.start_year: self.start_year = simstart newfiles = [] with DataFile._meta.database.atomic(): for _type in self.types: if _type not in file_type_map: continue if _type == 'rest': self.populate_handle_rest(simstart, newfiles) elif _type in [ 'streams.ocean', 'streams.cice', 'mpas-o_in', 'mpas-cice_in' ]: self.populate_handle_mpas(_type, newfiles) elif _type == 'meridionalHeatTransport': self.populate_heat_transport(newfiles) else: local_base = os.path.join(self.local_path, _type) if not os.path.exists(local_base): os.makedirs(local_base) for year in xrange(simstart, simend + 1): for month in xrange(1, 13): if _type == 'atm': name = file_type_map[_type].replace( 'EXPERIMENT', experiment) else: name = file_type_map[_type] yearstr = '{0:04d}'.format(year) monthstr = '{0:02d}'.format(month) name = name.replace('YEAR', yearstr) name = name.replace('MONTH', monthstr) local_path = os.path.join(local_base, name) if self.sta: remote_path = os.path.join( self.remote_path, 'archive', _type, 'hist', name) else: remote_path = os.path.join( self.remote_path, name) newfiles = self._add_file(newfiles=newfiles, name=name, local_path=local_path, remote_path=remote_path, _type=_type, year=year, month=month) print 'Inserting file data into the table' self.mutex.acquire() try: step = 50 for idx in range(0, len(newfiles), step): DataFile.insert_many(newfiles[idx:idx + step]).execute() except Exception as e: print_debug(e) finally: if self.mutex.locked(): self.mutex.release() print 'Database update complete'