def get_time_box_work(self, prev_exec_time, exec_time): result = [] file_list = glob.glob('/caom2pipe_test/*') for entry in file_list: stats = os.stat(entry) if prev_exec_time <= stats.st_mtime <= exec_time: result.append(dsc.StateRunnerMeta(entry, stats.st_mtime)) return result
def get_time_box_work(self, prev_exec_time, exec_time): """ :param prev_exec_time float timestamp start of the time-boxed chunk :param exec_time float timestamp end of the time-boxed chunk :return: a deque of file names with time their associated JSON (DB) records were modified from archive.gemini.edu. """ self._logger.debug(f'Begin get_time_box_work from {prev_exec_time} to ' f'{exec_time}.') # datetime format 2019-12-01T00:00:00.000000 prev_dt_str = mc.make_time_tz(prev_exec_time).strftime( mc.ISO_8601_FORMAT) exec_dt_str = mc.make_time_tz(exec_time).strftime(mc.ISO_8601_FORMAT) url = f'https://archive.gemini.edu/jsonsummary/canonical/' \ f'NotFail/notengineering/' \ f'entrytimedaterange={prev_dt_str}%20{exec_dt_str}/' \ f'?orderby=entrytime' # needs to be ordered by timestamps when processed self._logger.info(f'Querying {url}') entries = deque() response = None try: response = mc.query_endpoint(url) if response is None: logging.warning(f'Could not query {url}.') else: metadata = response.json() response.close() if metadata is not None: if len(metadata) == 0: self._logger.warning(f'No query results returned for ' f'interval from {prev_exec_time} ' f'to {exec_time}.') else: for entry in metadata: file_name = entry.get('name') entrytime = mc.make_time_tz(entry.get('entrytime')) entries.append( dsc.StateRunnerMeta(file_name, entrytime.timestamp())) finally: if response is not None: response.close() if len(entries) == 10000: self._max_records_encountered = True self._encounter_start = prev_exec_time self._encounter_end = exec_time self._logger.debug('End get_time_box_work.') return entries
def get_time_box_work(self, prev_exec_time, exec_time): """ Time-boxing the file url list returned from the site scrape. :param prev_exec_time timestamp start of the timestamp chunk :param exec_time timestamp end of the timestamp chunk :return: a list of StateRunnerMeta instances, for file names with time they were modified """ temp = [] for timestamp in self._todo_list.keys(): if prev_exec_time < timestamp <= exec_time: for entry in self._todo_list[timestamp]: temp.append(dsc.StateRunnerMeta(entry, timestamp)) return temp
def get_time_box_work(self, prev_exec_time, exec_time): """ :param prev_exec_time datetime start of the timestamp chunk :param exec_time datetime end of the timestamp chunk :return: a list of file names with time they were modified in /ams, structured as an astropy Table (for now). """ self._logger.debug('Entering get_time_box_work') # datetime format 2019-12-01T00:00:00.000000 prev_dt_str = datetime.fromtimestamp( prev_exec_time, tz=timezone.utc ).strftime(mc.ISO_8601_FORMAT) exec_dt_str = datetime.fromtimestamp( exec_time, tz=timezone.utc ).strftime(mc.ISO_8601_FORMAT) query = ( f"SELECT A.uri, A.lastModified " f"FROM caom2.Observation AS O " f"JOIN caom2.Plane AS P ON O.obsID = P.obsID " f"JOIN caom2.Artifact AS A ON P.planeID = A.planeID " f"WHERE P.planeID IN ( " f" SELECT A.planeID " f" FROM caom2.Observation AS O " f" JOIN caom2.Plane AS P ON O.obsID = P.obsID " f" JOIN caom2.Artifact AS A ON P.planeID = A.planeID " f" WHERE O.collection = '{self._config.collection}' " f" GROUP BY A.planeID " f" HAVING COUNT(A.artifactID) = 1 ) " f"AND P.dataRelease > '{prev_dt_str}' " f"AND P.dataRelease <= '{exec_dt_str}' " f"ORDER BY O.maxLastModified ASC " "" ) result = clc.query_tap_client(query, self._query_client) # results look like: # gemini:GEM/N20191202S0125.fits, ISO 8601 entries = deque() for row in result: entries.append( dsc.StateRunnerMeta( mc.CaomName(row['uri']).file_name, mc.make_time(row['lastModified']).timestamp(), ) ) return entries
def get_time_box_work(self, prev_exec_time, exec_time): """ Time-boxing the file list returned from the site scrape, where the list is a dict, with keys the entries to retrieve, and values are the timestamp associated with the respective entry. :param prev_exec_time datetime start of the timestamp chunk :param exec_time datetime end of the timestamp chunk :return: a list of file names with time they were modified at CSA, structured as an astropy Table. The time format is ISO 8601. """ self._logger.debug('Entering get_time_box_work') temp = [] for entry, timestamp in self._todo_list.items(): if prev_exec_time < timestamp <= exec_time: temp.append(dsc.StateRunnerMeta(entry, timestamp)) return temp