def test_get_and_create_download_dir(): # test default config path = get_and_create_download_dir() assert path == os.path.join(USER, 'sunpy', 'data') # test updated config new_path = os.path.join(USER, 'data_here_please') config.set('downloads', 'download_dir', new_path) path = get_and_create_download_dir() assert path == os.path.join(USER, new_path)
def test_get_and_create_download_dir(undo_download_dir_patch): # test default config path = get_and_create_download_dir() assert Path(path) == Path(USER) / 'sunpy' / 'data' # test updated config new_path = os.path.join(USER, 'sunpy_data_here_please') config.set('downloads', 'download_dir', new_path) path = get_and_create_download_dir() assert path == os.path.join(USER, new_path) # Set the config back os.rmdir(new_path) config.set('downloads', 'download_dir', os.path.join(USER, 'sunpy', 'data'))
def test_get_and_create_download_dir(undo_download_dir_patch): # test default config path = get_and_create_download_dir() assert Path(path) == Path(USER) / 'sunpy' / 'data' # test updated config new_path = os.path.join(USER, 'sunpy_data_here_please') config.set('downloads', 'download_dir', new_path) path = get_and_create_download_dir() assert path == os.path.join(USER, new_path) # Set the config back os.rmdir(new_path) config.set('downloads', 'download_dir', os.path.join(USER, 'sunpy', 'data'))
def test_print_config_files(tmpdir, tmp_path, undo_download_dir_patch): with io.StringIO() as buf, redirect_stdout(buf): print_config() printed = buf.getvalue() assert "time_format = %Y-%m-%d %H:%M:%S" in printed assert _find_config_files()[0] in printed assert get_and_create_download_dir() in printed assert get_and_create_sample_dir() in printed
def from_url(cls, url): """ Return object read from URL. Parameters ---------- url : str URL to retrieve the data from """ path = download_file(url, get_and_create_download_dir()) return cls.read(path)
def from_url(cls, url): """ Return object read from URL. Parameters ---------- url : str URL to retrieve the data from """ path = download_file(url, get_and_create_download_dir()) return cls.read(path)
def get(self, qres, path=None, error_callback=None, **kwargs): """ Download a set of results. Parameters ---------- qres : `~sunpy.net.dataretriever.QueryResponse` Results to download. Returns ------- Results Object """ urls = [] for qrblock in qres: urls.append(qrblock.url) filenames = [] for url in urls: filenames.append(url.split('/')[-1]) paths = [] for i, filename in enumerate(filenames): if path is None: fname = os.path.join(get_and_create_download_dir(), '{file}') elif isinstance(path, six.string_types) and '{file}' not in path: fname = os.path.join(path, '{file}') temp_dict = qres[i].map_.copy() temp_dict['file'] = filename fname = fname.format(**temp_dict) fname = os.path.expanduser(fname) if os.path.exists(fname): fname = replacement_filename(fname) fname = partial(simple_path, fname) paths.append(fname) res = Results(lambda x: None, 0, lambda map_: self._link(map_)) dobj = Downloader(max_conn=len(urls), max_total=len(urls)) # We cast to list here in list(zip... to force execution of # res.require([x]) at the start of the loop. for aurl, ncall, fname in list( zip(urls, map(lambda x: res.require([x]), urls), paths)): dobj.download(aurl, fname, ncall, error_callback) return res
def get(self, qres, path=None, error_callback=None, **kwargs): """ Download a set of results. Parameters ---------- qres : `~sunpy.net.dataretriever.QueryResponse` Results to download. Returns ------- Results Object """ urls = [] for qrblock in qres: urls.append(qrblock.url) filenames = [] for url in urls: filenames.append(url.split('/')[-1]) paths = [] for i, filename in enumerate(filenames): if path is None: fname = os.path.join(get_and_create_download_dir(), '{file}') elif isinstance(path, six.string_types) and '{file}' not in path: fname = os.path.join(path, '{file}') temp_dict = qres[i].map_.copy() temp_dict['file'] = filename fname = fname.format(**temp_dict) fname = os.path.expanduser(fname) if os.path.exists(fname): fname = replacement_filename(fname) fname = partial(simple_path, fname) paths.append(fname) res = Results(lambda x: None, 0, lambda map_: self._link(map_)) dobj = Downloader(max_conn=len(urls), max_total=len(urls)) # We cast to list here in list(zip... to force execution of # res.require([x]) at the start of the loop. for aurl, ncall, fname in list(zip(urls, map(lambda x: res.require([x]), urls), paths)): dobj.download(aurl, fname, ncall, error_callback) return res
def test_print_config_files(): # TODO: Tidy this up. stdout = sys.stdout out = io.StringIO() sys.stdout = out print_config() sys.stdout = stdout out.seek(0) printed = out.read() assert "time_format = %Y-%m-%d %H:%M:%S" in printed assert _find_config_files()[0] in printed assert get_and_create_download_dir() in printed assert get_and_create_sample_dir() in printed
def test_print_config_files(undo_download_dir_patch): # TODO: Tidy this up. stdout = sys.stdout out = io.StringIO() sys.stdout = out print_config() sys.stdout = stdout out.seek(0) printed = out.read() assert "time_format = %Y-%m-%d %H:%M:%S" in printed assert _find_config_files()[0] in printed assert get_and_create_download_dir() in printed assert get_and_create_sample_dir() in printed
def get_lytaf_event_types(lytaf_path=None, print_event_types=True): """Prints the different event types in the each of the LYTAF databases. Parameters ---------- lytaf_path : `str` Path location where LYTAF files are stored. Default = Path stored in confog file. print_event_types : `bool` If True, prints the artifacts in each lytaf database to screen. Returns ------- all_event_types : `list` List of all events types in all lytaf databases. """ # Set lytaf_path is not done by user if not lytaf_path: lytaf_path = get_and_create_download_dir() suffixes = ["lyra", "manual", "ppt", "science"] all_event_types = [] # For each database file extract the event types and print them. if print_event_types: print("\nLYTAF Event Types\n-----------------\n") for suffix in suffixes: dbname = "annotation_{0}.db".format(suffix) # Check database file exists, else download it. check_download_file(dbname, LYTAF_REMOTE_PATH, lytaf_path) # Open SQLITE3 LYTAF files connection = sqlite3.connect(os.path.join(lytaf_path, dbname)) # Create cursor to manipulate data in annotation file cursor = connection.cursor() cursor.execute("select type from eventType;") event_types = cursor.fetchall() all_event_types.append(event_types) if print_event_types: print("----------------\n{0} database\n----------------".format( suffix)) for event_type in event_types: print(str(event_type[0])) print(" ") # Unpack event types in all_event_types into single list all_event_types = [ event_type[0] for event_types in all_event_types for event_type in event_types ] return all_event_types
def get_lytaf_event_types(lytaf_path=None, print_event_types=True): """Prints the different event types in the each of the LYTAF databases. Parameters ---------- lytaf_path : `str` Path location where LYTAF files are stored. Default = Path stored in confog file. print_event_types : `bool` If True, prints the artifacts in each lytaf database to screen. Returns ------- all_event_types : `list` List of all events types in all lytaf databases. """ # Set lytaf_path is not done by user if not lytaf_path: lytaf_path = get_and_create_download_dir() suffixes = ["lyra", "manual", "ppt", "science"] all_event_types = [] # For each database file extract the event types and print them. if print_event_types: print("\nLYTAF Event Types\n-----------------\n") for suffix in suffixes: dbname = "annotation_{0}.db".format(suffix) # Check database file exists, else download it. check_download_file(dbname, LYTAF_REMOTE_PATH, lytaf_path) # Open SQLITE3 LYTAF files connection = sqlite3.connect(os.path.join(lytaf_path, dbname)) # Create cursor to manipulate data in annotation file cursor = connection.cursor() cursor.execute("select type from eventType;") event_types = cursor.fetchall() all_event_types.append(event_types) if print_event_types: print("----------------\n{0} database\n----------------" .format(suffix)) for event_type in event_types: print(str(event_type[0])) print(" ") # Unpack event types in all_event_types into single list all_event_types = [event_type[0] for event_types in all_event_types for event_type in event_types] return all_event_types
def download(self, url, path=None, callback=None, errback=None): """Downloads a file at a specified URL. Parameters ---------- url : str URL of file to download path : function, str Location to save file to. Can specify either a directory as a string or a function with signature: (path, url). Defaults to directory specified in sunpy configuration callback : function Function to call when download is successfully completed errback : function Function to call when download fails Returns ------- out : None """ # Load balancing? # TODO: explain server = self._get_server(url) # Create function to compute the filepath to download to if not set if path is None: path = partial(default_name, get_and_create_download_dir()) elif isinstance(path, str): path = partial(default_name, path) elif not callable(path): raise ValueError("path must be: None, string or callable") # Use default callbacks if none were specified if callback is None: callback = self._default_callback if errback is None: errback = self._default_error_callback # Attempt to download file from URL if not self._attempt_download(url, path, callback, errback): # If there are too many concurrent downloads, queue for later self.q[server].append((url, path, callback, errback))
def download(self, url, path=None, callback=None, errback=None): """Downloads a file at a specified URL. Parameters ---------- url : string URL of file to download path : function, string Location to save file to. Can specify either a directory as a string or a function with signature: (path, url). Defaults to directory specified in sunpy configuration callback : function Function to call when download is successfully completed errback : function Function to call when download fails Returns ------- out : None """ # Load balancing? # @todo: explain server = self._get_server(url) # Create function to compute the filepath to download to if not set if path is None: path = partial(default_name, get_and_create_download_dir()) elif isinstance(path, six.string_types): path = partial(default_name, path) elif not callable(path): raise ValueError("path must be: None, string or callable") # Use default callbacks if none were specified if callback is None: callback = self._default_callback if errback is None: errback = self._default_error_callback # Attempt to download file from URL if not self._attempt_download(url, path, callback, errback): # If there are too many concurrent downloads, queue for later self.q[server].append((url, path, callback, errback))
def _download(uri, kwargs, err='Unable to download data at specified URL'): """Attempts to download data at the specified URI. Parameters ---------- **kwargs : uri A url """ _filename = os.path.basename(uri).split("?")[0] # user specifies a download directory if "directory" in kwargs: download_dir = os.path.expanduser(kwargs["directory"]) else: download_dir = get_and_create_download_dir() # overwrite the existing file if the keyword is present if "overwrite" in kwargs: overwrite = kwargs["overwrite"] else: overwrite = False # If the file is not already there, download it filepath = os.path.join(download_dir, _filename) if not(os.path.isfile(filepath)) or (overwrite and os.path.isfile(filepath)): try: response = urllib.request.urlopen(uri) except (urllib.error.HTTPError, urllib.error.URLError): raise urllib.error.URLError(err) with open(filepath, 'wb') as fp: shutil.copyfileobj(response, fp) else: warnings.warn("Using existing file rather than downloading, use " "overwrite=True to override.", RuntimeWarning) return filepath
def _download(uri, kwargs, err='Unable to download data at specified URL'): """Attempts to download data at the specified URI. Parameters ---------- **kwargs : uri A url """ _filename = os.path.basename(uri).split("?")[0] # user specifies a download directory if "directory" in kwargs: download_dir = os.path.expanduser(kwargs["directory"]) else: download_dir = get_and_create_download_dir() # overwrite the existing file if the keyword is present if "overwrite" in kwargs: overwrite = kwargs["overwrite"] else: overwrite = False # If the file is not already there, download it filepath = os.path.join(download_dir, _filename) if not(os.path.isfile(filepath)) or (overwrite and os.path.isfile(filepath)): try: response = urllib.request.urlopen(uri) except (urllib.error.HTTPError, urllib.error.URLError): raise urllib.error.URLError(err) with open(filepath, 'wb') as fp: shutil.copyfileobj(response, fp) else: warnings.warn("Using existing file rather than downloading, use " "overwrite=True to override.", RuntimeWarning) return filepath
def _remove_lytaf_events(time, channels=None, artifacts=None, return_artifacts=False, fitsfile=None, csvfile=None, filecolumns=None, lytaf_path=None, force_use_local_lytaf=False): """ Removes periods of LYRA artifacts from a time series. This functions removes periods corresponding to certain artifacts recorded in the LYRA annotation file from an array of times given by the time input. If a list of arrays of other properties is supplied through the channels kwarg, then the relevant values from these arrays are also removed. This is done by assuming that each element in each array supplied corresponds to the time in the same index in time array. The artifacts to be removed are given via the artifacts kwarg. The default is "all", meaning that all artifacts will be removed. However, a subset of artifacts can be removed by supplying a list of strings of the desired artifact types. Parameters ---------- time : `numpy.ndarray` of `datetime.datetime` Gives the times of the timeseries. channels : `list` of `numpy.array` convertible to float64. Contains arrays of the irradiances taken at the times in the time variable. Each element in the list must have the same number of elements as time. artifacts : `list` of strings Contain the artifact types to be removed. For list of artifact types see reference [1]. For example, if user wants to remove only large angle rotations, listed at reference [1] as LAR, let artifacts=["LAR"]. Default=[], i.e. no artifacts will be removed. return_artifacts : `bool` Set to True to return a numpy recarray containing the start time, end time and type of all artifacts removed. Default=False fitsfile : `str` file name (including file path and suffix, .fits) of output fits file which is generated if this kwarg is not None. Default=None, i.e. no fits file is output. csvfile : `str` file name (including file path and suffix, .csv) of output csv file which is generated if this kwarg is not None. Default=None, i.e. no csv file is output. filecolumns : `list` of strings Gives names of columns of any output files produced. Although initially set to None above, the default is in fact ["time", "channel0", "channel1",..."channelN"] where N is the number of irradiance arrays in the channels input (assuming 0-indexed counting). lytaf_path : `str` directory path where the LYRA annotation files are stored. force_use_local_lytaf : `bool` Ensures current local version of lytaf files are not replaced by up-to-date online versions even if current local lytaf files do not cover entire input time range etc. Default=False Returns ------- clean_time : `numpy.ndarray` of `datetime.datetime` time array with artifact periods removed. clean_channels : `list` ndarrays/array-likes convertible to float64 list of irradiance arrays with artifact periods removed. artifact_status : `dict` List of 4 variables containing information on what artifacts were found, removed, etc. from the time series. artifact_status["lytaf"] = artifacts found : `numpy.recarray` The full LYRA annotation file for the time series time range output by get_lytaf_events(). artifact_status["removed"] = artifacts removed : `numpy.recarray` Artifacts which were found and removed from from time series. artifact_status["not_removed"] = artifacts found but not removed : `numpy.recarray` Artifacts which were found but not removed as they were not included when user defined artifacts kwarg. artifact_status["not_found"] = artifacts not found : `list` of strings Artifacts listed to be removed by user when defining artifacts kwarg which were not found in time series time range. References ---------- [1] http://proba2.oma.be/data/TARDIS Example ------- Sample data for example >>> from datetime import datetime, timedelta >>> from sunpy.instr.lyra import _remove_lytaf_events >>> time = np.array([datetime(2013, 2, 1)+timedelta(minutes=i) ... for i in range(120)]) >>> channel_1 = np.zeros(len(time))+0.4 >>> channel_2 = np.zeros(len(time))+0.1 Remove LARs (Large Angle Rotations) from time series. >>> time_clean, channels_clean = _remove_lytaf_events( ... time, channels=[channel_1, channel_2], artifacts=['LAR']) # doctest: +REMOTE_DATA """ # Check inputs if not lytaf_path: lytaf_path = get_and_create_download_dir() if channels and type(channels) is not list: raise TypeError("channels must be None or a list of numpy arrays " "of dtype 'float64'.") if not artifacts: raise ValueError("User has supplied no artifacts to remove.") if type(artifacts) is str: artifacts = [artifacts] if not all(isinstance(artifact_type, str) for artifact_type in artifacts): raise TypeError("All elements in artifacts must in strings.") all_lytaf_event_types = get_lytaf_event_types(lytaf_path=lytaf_path, print_event_types=False) for artifact in artifacts: if artifact not in all_lytaf_event_types: print(all_lytaf_event_types) raise ValueError("{0} is not a valid artifact type. See above.".format(artifact)) # Define outputs clean_time = np.array([parse_time(t) for t in time]) clean_channels = copy.deepcopy(channels) artifacts_not_found = [] # Get LYTAF file for given time range lytaf = get_lytaf_events(time[0], time[-1], lytaf_path=lytaf_path, force_use_local_lytaf=force_use_local_lytaf) # Find events in lytaf which are to be removed from time series. artifact_indices = np.empty(0, dtype="int64") for artifact_type in artifacts: indices = np.where(lytaf["event_type"] == artifact_type)[0] # If none of a given type of artifact is found, record this # type in artifact_not_found list. if len(indices) == 0: artifacts_not_found.append(artifact_type) else: # Else, record the indices of the artifacts of this type artifact_indices = np.concatenate((artifact_indices, indices)) artifact_indices.sort() # Remove relevant artifacts from timeseries. If none of the # artifacts the user wanted removed were found, raise a warning and # continue with code. if not len(artifact_indices): warn("None of user supplied artifacts were found.") artifacts_not_found = artifacts else: # Remove periods corresponding to artifacts from flux and time # arrays. bad_indices = np.empty(0, dtype="int64") all_indices = np.arange(len(time)) for index in artifact_indices: bad_period = np.logical_and(time >= lytaf["begin_time"][index], time <= lytaf["end_time"][index]) bad_indices = np.append(bad_indices, all_indices[bad_period]) clean_time = np.delete(clean_time, bad_indices) if channels: for i, f in enumerate(clean_channels): clean_channels[i] = np.delete(f, bad_indices) # If return_artifacts kwarg is True, return a list containing # information on what artifacts found, removed, etc. See docstring. if return_artifacts: artifact_status = {"lytaf": lytaf, "removed": lytaf[artifact_indices], "not_removed": np.delete(lytaf, artifact_indices), "not_found": artifacts_not_found} # Output FITS file if fits kwarg is set if fitsfile: # Create time array of time strings rather than datetime objects # and verify filecolumns have been correctly input. If None, # generate generic filecolumns (see docstring of function called # below. string_time, filecolumns = _prep_columns(time, channels, filecolumns) # Prepare column objects. cols = [fits.Column(name=filecolumns[0], format="26A", array=string_time)] if channels: for i, f in enumerate(channels): cols.append(fits.Column(name=filecolumns[i+1], format="D", array=f)) coldefs = fits.ColDefs(cols) tbhdu = fits.new_table(coldefs) hdu = fits.PrimaryHDU() tbhdulist = fits.HDUList([hdu, tbhdu]) # Write data to fits file. tbhdulist.writeto(fitsfile) # Output csv file if csv kwarg is set. if csvfile: # Create time array of time strings rather than datetime objects # and verify filecolumns have been correctly input. If None, # generate generic filecolumns (see docstring of function called # below. string_time, filecolumns = _prep_columns(time, channels, filecolumns) # Open and write data to csv file. with open(csvfile, 'w') as openfile: csvwriter = csv.writer(openfile, delimiter=';') # Write header. csvwriter.writerow(filecolumns) # Write data. if not channels: for i in range(len(time)): csvwriter.writerow(string_time[i]) else: for i in range(len(time)): row = [string_time[i]] for f in channels: row.append(f[i]) csvwriter.writerow(row) # Return values. if return_artifacts: if not channels: return clean_time, artifact_status else: return clean_time, clean_channels, artifact_status else: if not channels: return clean_time else: return clean_time, clean_channels
def get_lytaf_events(start_time, end_time, lytaf_path=None, combine_files=("lyra", "manual", "ppt", "science"), csvfile=None, force_use_local_lytaf=False): """ Extracts combined lytaf file for given time range. Given a time range defined by start_time and end_time, this function extracts the segments of each LYRA annotation file and combines them. Parameters ---------- start_time : `astropy.time.Time` or `str` Start time of period for which annotation file is required. end_time : `astropy.time.Time` or `str` End time of period for which annotation file is required. lytaf_path : `str` directory path where the LYRA annotation files are stored. combine_files : `tuple` of strings States which LYRA annotation files are to be combined. Default is all four, i.e. lyra, manual, ppt, science. See Notes section for an explanation of each. force_use_local_lytaf : `bool` Ensures current local version of lytaf files are not replaced by up-to-date online versions even if current local lytaf files do not cover entire input time range etc. Default=False Returns ------- lytaf : `numpy.recarray` Containing the various parameters stored in the LYTAF files. Notes ----- There are four LYRA annotation files which mark different types of events or artifacts in the data. They are named annotation_suffix.db where suffix is a variable equalling either lyra, manual, ppt, or science. annotation_lyra.db : contains entries regarding possible effects to the data due to normal operation of LYRA instrument. annotation_manual.db : contains entries regarding possible effects to the data due to unusual or manually logged events. annotation_ppt.db : contains entries regarding possible effects to the data due to pointing or positioning of PROBA2. annotation_science.db : contains events in the data scientifically interesting, e.g. GOES flares. References ---------- Further documentation: http://proba2.oma.be/data/TARDIS Examples -------- Get all events in the LYTAF files for January 2014 >>> from sunpy.instr.lyra import get_lytaf_events >>> lytaf = get_lytaf_events('2014-01-01', '2014-02-01') # doctest: +REMOTE_DATA """ # Check inputs # Check lytaf path if not lytaf_path: lytaf_path = get_and_create_download_dir() # Parse start_time and end_time start_time = parse_time(start_time) end_time = parse_time(end_time) # Check combine_files contains correct inputs if not all(suffix in ["lyra", "manual", "ppt", "science"] for suffix in combine_files): raise ValueError("Elements in combine_files must be strings equalling " "'lyra', 'manual', 'ppt', or 'science'.") # Remove any duplicates from combine_files input combine_files = list(set(combine_files)) combine_files.sort() # Convert input times to UNIX timestamp format since this is the # time format in the annotation files start_time_uts = (start_time - Time('1970-1-1')).sec end_time_uts = (end_time - Time('1970-1-1')).sec # Define numpy record array which will hold the information from # the annotation file. lytaf = np.empty((0, ), dtype=[("insertion_time", object), ("begin_time", object), ("reference_time", object), ("end_time", object), ("event_type", object), ("event_definition", object)]) # Access annotation files for suffix in combine_files: # Check database files are present dbname = "annotation_{0}.db".format(suffix) check_download_file(dbname, LYTAF_REMOTE_PATH, lytaf_path) # Open SQLITE3 annotation files connection = sqlite3.connect(os.path.join(lytaf_path, dbname)) # Create cursor to manipulate data in annotation file cursor = connection.cursor() # Check if lytaf file spans the start and end times defined by # user. If not, download newest version. # First get start time of first event and end time of last # event in lytaf. cursor.execute("select begin_time from event order by begin_time asc " "limit 1;") db_first_begin_time = cursor.fetchone()[0] db_first_begin_time = datetime.datetime.fromtimestamp( db_first_begin_time) cursor.execute("select end_time from event order by end_time desc " "limit 1;") db_last_end_time = cursor.fetchone()[0] db_last_end_time = datetime.datetime.fromtimestamp(db_last_end_time) # If lytaf does not include entire input time range... if not force_use_local_lytaf: if end_time > db_last_end_time or start_time < db_first_begin_time: # ...close lytaf file... cursor.close() connection.close() # ...Download latest lytaf file... check_download_file(dbname, LYTAF_REMOTE_PATH, lytaf_path, replace=True) # ...and open new version of lytaf database. connection = sqlite3.connect(os.path.join(lytaf_path, dbname)) cursor = connection.cursor() # Select and extract the data from event table within file within # given time range cursor.execute("select insertion_time, begin_time, reference_time, " "end_time, eventType_id from event where end_time >= " "{0} and begin_time <= " "{1}".format(start_time_uts, end_time_uts)) event_rows = cursor.fetchall() # Select and extract the event types from eventType table cursor.row_factory = sqlite3.Row cursor.execute("select * from eventType") eventType_rows = cursor.fetchall() eventType_id = [] eventType_type = [] eventType_definition = [] for eventType_row in eventType_rows: eventType_id.append(eventType_row["id"]) eventType_type.append(eventType_row["type"]) eventType_definition.append(eventType_row["definition"]) # Enter desired information into the lytaf numpy record array for event_row in event_rows: id_index = eventType_id.index(event_row[4]) lytaf = np.append( lytaf, np.array( (Time(datetime.datetime.utcfromtimestamp(event_row[0]), format='datetime'), Time(datetime.datetime.utcfromtimestamp(event_row[1]), format='datetime'), Time(datetime.datetime.utcfromtimestamp(event_row[2]), format='datetime'), Time(datetime.datetime.utcfromtimestamp(event_row[3]), format='datetime'), eventType_type[id_index], eventType_definition[id_index]), dtype=lytaf.dtype)) # Close file cursor.close() connection.close() # Sort lytaf in ascending order of begin time np.recarray.sort(lytaf, order="begin_time") # If csvfile kwarg is set, write out lytaf to csv file if csvfile: # Open and write data to csv file. with open(csvfile, 'w') as openfile: csvwriter = csv.writer(openfile, delimiter=';') # Write header. csvwriter.writerow(lytaf.dtype.names) # Write data. for row in lytaf: new_row = [] new_row.append(row[0].strftime("%Y-%m-%dT%H:%M:%S")) new_row.append(row[1].strftime("%Y-%m-%dT%H:%M:%S")) new_row.append(row[2].strftime("%Y-%m-%dT%H:%M:%S")) new_row.append(row[3].strftime("%Y-%m-%dT%H:%M:%S")) new_row.append(row[4]) new_row.append(row[5]) csvwriter.writerow(new_row) return lytaf
def remove_lytaf_events_from_timeseries(ts, artifacts=None, return_artifacts=False, lytaf_path=None, force_use_local_lytaf=False): """ Removes periods of LYRA artifacts defined in LYTAF from a TimeSeries. Parameters ---------- ts : `sunpy.timeseries.TimeSeries` artifacts : list of strings Sets the artifact types to be removed. For a list of artifact types see reference [1]. For example, if a user wants to remove only large angle rotations, listed at reference [1] as LAR, set artifacts=["LAR"]. The default is that no artifacts will be removed. return_artifacts : `bool` Set to True to return a `numpy.recarray` containing the start time, end time and type of all artifacts removed. Default=False lytaf_path : `str` directory path where the LYRA annotation files are stored. force_use_local_lytaf : `bool` Ensures current local version of lytaf files are not replaced by up-to-date online versions even if current local lytaf files do not cover entire input time range etc. Default=False Returns ------- ts_new : `sunpy.timeseries.TimeSeries` copy of input TimeSeries with periods corresponding to artifacts removed. artifact_status : `dict` List of 4 variables containing information on what artifacts were found, removed, etc. from the time series. | **artifact_status["lytaf"]** : `numpy.recarray` | The full LYRA annotation file for the time series time range | output by get_lytaf_events(). | **artifact_status["removed"]** : `numpy.recarray` | Artifacts which were found and removed from from time series. | **artifact_status["not_removed"]** : `numpy.recarray` | Artifacts which were found but not removed as they were not | included when user defined artifacts kwarg. | **artifact_status["not_found"]** : `list` of strings | Artifacts listed to be removed by user when defining | artifacts kwarg which were not found in time series time range. Notes ----- This function is intended to take TimeSeries objects as input, but the deprecated LightCurve is still supported here. References ---------- [1] http://proba2.oma.be/data/TARDIS Examples -------- Remove LARs (Large Angle Rotations) from TimeSeries for 4-Dec-2014: >>> import sunpy.timeseries as ts >>> import sunpy.data.sample # doctest: +REMOTE_DATA >>> from sunpy.instr.lyra import remove_lytaf_events_from_timeseries >>> lyrats = ts.TimeSeries(sunpy.data.sample.LYRA_LEVEL3_TIMESERIES, source='LYRA') # doctest: +REMOTE_DATA >>> ts_nolars = remove_lytaf_events_from_timeseries(lyrats, artifacts=["LAR"]) # doctest: +REMOTE_DATA To also retrieve information on the artifacts during that day: >>> ts_nolars, artifact_status = remove_lytaf_events_from_timeseries( ... lyrats, artifacts=["LAR"], return_artifacts=True) # doctest: +REMOTE_DATA """ # Check that input argument is of correct type if not lytaf_path: lytaf_path = get_and_create_download_dir() # Remove artifacts from time series data_columns = ts.data.columns time, channels, artifact_status = _remove_lytaf_events( ts.data.index, channels=[np.asanyarray(ts.data[col]) for col in data_columns], artifacts=artifacts, return_artifacts=True, lytaf_path=lytaf_path, force_use_local_lytaf=force_use_local_lytaf) # Create new copy copy of timeseries and replace data with # artifact-free time series. ts_new = copy.deepcopy(ts) ts_new.data = pandas.DataFrame(index=time, data=dict( (col, channels[i]) for i, col in enumerate(data_columns))) if return_artifacts: return ts_new, artifact_status else: return ts_new
def _remove_lytaf_events(time, channels=None, artifacts=None, return_artifacts=False, fitsfile=None, csvfile=None, filecolumns=None, lytaf_path=None, force_use_local_lytaf=False): """ Removes periods of LYRA artifacts from a time series. This functions removes periods corresponding to certain artifacts recorded in the LYRA annotation file from an array of times given by the time input. If a list of arrays of other properties is supplied through the channels kwarg, then the relevant values from these arrays are also removed. This is done by assuming that each element in each array supplied corresponds to the time in the same index in time array. The artifacts to be removed are given via the artifacts kwarg. The default is "all", meaning that all artifacts will be removed. However, a subset of artifacts can be removed by supplying a list of strings of the desired artifact types. Parameters ---------- time : `numpy.ndarray` of `astropy.time.Time` Gives the times of the timeseries. channels : `list` of `numpy.array` convertible to float64. Contains arrays of the irradiances taken at the times in the time variable. Each element in the list must have the same number of elements as time. artifacts : `list` of strings Contain the artifact types to be removed. For list of artifact types see reference [1]. For example, if user wants to remove only large angle rotations, listed at reference [1] as LAR, let artifacts=["LAR"]. Default=[], i.e. no artifacts will be removed. return_artifacts : `bool` Set to True to return a numpy recarray containing the start time, end time and type of all artifacts removed. Default=False fitsfile : `str` file name (including file path and suffix, .fits) of output fits file which is generated if this kwarg is not None. Default=None, i.e. no fits file is output. csvfile : `str` file name (including file path and suffix, .csv) of output csv file which is generated if this kwarg is not None. Default=None, i.e. no csv file is output. filecolumns : `list` of strings Gives names of columns of any output files produced. Although initially set to None above, the default is in fact ["time", "channel0", "channel1",..."channelN"] where N is the number of irradiance arrays in the channels input (assuming 0-indexed counting). lytaf_path : `str` directory path where the LYRA annotation files are stored. force_use_local_lytaf : `bool` Ensures current local version of lytaf files are not replaced by up-to-date online versions even if current local lytaf files do not cover entire input time range etc. Default=False Returns ------- clean_time : `numpy.ndarray` of `astropy.time.Time` time array with artifact periods removed. clean_channels : `list` ndarrays/array-likes convertible to float64 list of irradiance arrays with artifact periods removed. artifact_status : `dict` List of 4 variables containing information on what artifacts were found, removed, etc. from the time series. artifact_status["lytaf"] = artifacts found : `numpy.recarray` The full LYRA annotation file for the time series time range output by get_lytaf_events(). artifact_status["removed"] = artifacts removed : `numpy.recarray` Artifacts which were found and removed from from time series. artifact_status["not_removed"] = artifacts found but not removed : `numpy.recarray` Artifacts which were found but not removed as they were not included when user defined artifacts kwarg. artifact_status["not_found"] = artifacts not found : `list` of strings Artifacts listed to be removed by user when defining artifacts kwarg which were not found in time series time range. References ---------- [1] http://proba2.oma.be/data/TARDIS Example ------- Sample data for example >>> from sunpy.time import parse_time >>> from sunpy.instr.lyra import _remove_lytaf_events >>> time = parse_time(np.arange('2005-02-01T00:00:00', '2005-02-01T02:00:00', ... dtype='datetime64[m]')) >>> channel_1 = np.zeros(len(time))+0.4 >>> channel_2 = np.zeros(len(time))+0.1 Remove LARs (Large Angle Rotations) from time series. >>> time_clean, channels_clean = _remove_lytaf_events( ... time, channels=[channel_1, channel_2], artifacts=['LAR']) # doctest: +REMOTE_DATA """ # Check inputs if not lytaf_path: lytaf_path = get_and_create_download_dir() if channels and type(channels) is not list: raise TypeError("channels must be None or a list of numpy arrays " "of dtype 'float64'.") if not artifacts: raise ValueError("User has supplied no artifacts to remove.") if type(artifacts) is str: artifacts = [artifacts] if not all(isinstance(artifact_type, str) for artifact_type in artifacts): raise TypeError("All elements in artifacts must in strings.") all_lytaf_event_types = get_lytaf_event_types(lytaf_path=lytaf_path, print_event_types=False) for artifact in artifacts: if artifact not in all_lytaf_event_types: print(all_lytaf_event_types) raise ValueError( "{0} is not a valid artifact type. See above.".format( artifact)) # Define outputs clean_time = parse_time(time) clean_channels = copy.deepcopy(channels) artifacts_not_found = [] # Get LYTAF file for given time range lytaf = get_lytaf_events(time[0], time[-1], lytaf_path=lytaf_path, force_use_local_lytaf=force_use_local_lytaf) # Find events in lytaf which are to be removed from time series. artifact_indices = np.empty(0, dtype="int64") for artifact_type in artifacts: indices = np.where(lytaf["event_type"] == artifact_type)[0] # If none of a given type of artifact is found, record this # type in artifact_not_found list. if len(indices) == 0: artifacts_not_found.append(artifact_type) else: # Else, record the indices of the artifacts of this type artifact_indices = np.concatenate((artifact_indices, indices)) artifact_indices.sort() # Remove relevant artifacts from timeseries. If none of the # artifacts the user wanted removed were found, raise a warning and # continue with code. if not len(artifact_indices): warn("None of user supplied artifacts were found.") artifacts_not_found = artifacts else: # Remove periods corresponding to artifacts from flux and time # arrays. bad_indices = np.empty(0, dtype="int64") all_indices = np.arange(len(time)) for index in artifact_indices: bad_period = np.logical_and( time >= lytaf["begin_time"][index].datetime, time <= lytaf["end_time"][index].datetime) bad_indices = np.append(bad_indices, all_indices[bad_period]) clean_time = np.delete(clean_time, bad_indices) if channels: for i, f in enumerate(clean_channels): clean_channels[i] = np.delete(f, bad_indices) # If return_artifacts kwarg is True, return a list containing # information on what artifacts found, removed, etc. See docstring. if return_artifacts: artifact_status = { "lytaf": lytaf, "removed": lytaf[artifact_indices], "not_removed": np.delete(lytaf, artifact_indices), "not_found": artifacts_not_found } # Output FITS file if fits kwarg is set if fitsfile: # Create time array of time strings rather than Time objects # and verify filecolumns have been correctly input. If None, # generate generic filecolumns (see docstring of function called # below. string_time, filecolumns = _prep_columns(time, channels, filecolumns) # Prepare column objects. cols = [ fits.Column(name=filecolumns[0], format="26A", array=string_time) ] if channels: for i, f in enumerate(channels): cols.append( fits.Column(name=filecolumns[i + 1], format="D", array=f)) coldefs = fits.ColDefs(cols) tbhdu = fits.new_table(coldefs) hdu = fits.PrimaryHDU() tbhdulist = fits.HDUList([hdu, tbhdu]) # Write data to fits file. tbhdulist.writeto(fitsfile) # Output csv file if csv kwarg is set. if csvfile: # Create time array of time strings rather than Time objects # and verify filecolumns have been correctly input. If None, # generate generic filecolumns (see docstring of function called # below. string_time, filecolumns = _prep_columns(time, channels, filecolumns) # Open and write data to csv file. with open(csvfile, 'w') as openfile: csvwriter = csv.writer(openfile, delimiter=';') # Write header. csvwriter.writerow(filecolumns) # Write data. if not channels: for i in range(len(time)): csvwriter.writerow(string_time[i]) else: for i in range(len(time)): row = [string_time[i]] for f in channels: row.append(f[i]) csvwriter.writerow(row) # Return values. if return_artifacts: if not channels: return clean_time, artifact_status else: return clean_time, clean_channels, artifact_status else: if not channels: return clean_time else: return clean_time, clean_channels
def remove_lytaf_events_from_lightcurve(lc, artifacts=None, return_artifacts=False, lytaf_path=None, force_use_local_lytaf=False): """ Removes periods of LYRA artifacts defined in LYTAF from a LYRALightCurve. Parameters ---------- lc : `sunpy.lightcurve.LightCurve` artifacts : list of strings Contain the artifact types to be removed. For list of artifact types see reference [1]. For example, if user wants to remove only large angle rotations, listed at reference [1] as LAR, let artifacts=["LAR"]. Default=[], i.e. no artifacts will be removed. return_artifacts : `bool` Set to True to return a `numpy.recarray` containing the start time, end time and type of all artifacts removed. Default=False lytaf_path : `str` directory path where the LYRA annotation files are stored. force_use_local_lytaf : `bool` Ensures current local version of lytaf files are not replaced by up-to-date online versions even if current local lytaf files do not cover entire input time range etc. Default=False Returns ------- lc_new : `sunpy.lightcurve.LightCurve` copy of input LYRALightCurve with periods corresponding to artifacts removed. artifact_status : `dict` List of 4 variables containing information on what artifacts were found, removed, etc. from the time series. artifact_status["lytaf"] = artifacts found : `numpy.recarray` The full LYRA annotation file for the time series time range output by get_lytaf_events(). artifact_status["removed"] = artifacts removed : `numpy.recarray` Artifacts which were found and removed from from time series. artifact_status["not_removed"] = artifacts found but not removed : `numpy.recarray` Artifacts which were found but not removed as they were not included when user defined artifacts kwarg. artifact_status["not_found"] = artifacts not found : `list` of strings Artifacts listed to be removed by user when defining artifacts kwarg which were not found in time series time range. References ---------- [1] http://proba2.oma.be/data/TARDIS Examples -------- Remove LARs (Large Angle Rotations) from LYRALightCurve for 4-Dec-2014: >>> import sunpy.lightcurve as lc >>> lc = lc.LYRALightCurve.create("2014-12-02") >>> lc_nolars = lc.remove_artifacts_from_lyralightcurve(lc, artifacts=["LAR"]) To also retrieve information on the artifacts during that day: >>> lc_nolars, artifact_status = lc.remove_artifacts_from_lyralightcurve( lc, artifacts=["LAR"], return_artifacts=True) """ # Check that input argument is of correct type if not lytaf_path: lytaf_path = get_and_create_download_dir() if not isinstance(lc, lightcurve.LightCurve): raise TypeError("lc must be a LightCurve object.") # Remove artifacts from time series data_columns = lc.data.columns time, channels, artifact_status = _remove_lytaf_events( lc.data.index, channels=[np.asanyarray(lc.data[col]) for col in data_columns], artifacts=artifacts, return_artifacts=True, lytaf_path=lytaf_path, force_use_local_lytaf=force_use_local_lytaf) # Create new copy copy of lightcurve and replace data with # artifact-free time series. lc_new = copy.deepcopy(lc) lc_new.data = pandas.DataFrame( index=time, data=dict((col, channels[i]) for i, col in enumerate(data_columns))) if return_artifacts: return lc_new, artifact_status else: return lc_new
def _parse_url(self, request, **kwargs): path = download_file(request.full_url, get_and_create_download_dir()) return self._parse_path(pathlib.Path(path), **kwargs)
def remove_lytaf_events_from_timeseries(ts, artifacts=None, return_artifacts=False, lytaf_path=None, force_use_local_lytaf=False): """ Removes periods of LYRA artifacts defined in LYTAF from a TimeSeries. Parameters ---------- ts : `sunpy.timeseries.TimeSeries` artifacts : list of strings Sets the artifact types to be removed. For a list of artifact types see reference [1]. For example, if a user wants to remove only large angle rotations, listed at reference [1] as LAR, set artifacts=["LAR"]. The default is that no artifacts will be removed. return_artifacts : `bool` Set to True to return a `numpy.recarray` containing the start time, end time and type of all artifacts removed. Default=False lytaf_path : `str` directory path where the LYRA annotation files are stored. force_use_local_lytaf : `bool` Ensures current local version of lytaf files are not replaced by up-to-date online versions even if current local lytaf files do not cover entire input time range etc. Default=False Returns ------- ts_new : `sunpy.timeseries.TimeSeries` copy of input TimeSeries with periods corresponding to artifacts removed. artifact_status : `dict` List of 4 variables containing information on what artifacts were found, removed, etc. from the time series. | **artifact_status["lytaf"]** : `numpy.recarray` | The full LYRA annotation file for the time series time range | output by get_lytaf_events(). | **artifact_status["removed"]** : `numpy.recarray` | Artifacts which were found and removed from from time series. | **artifact_status["not_removed"]** : `numpy.recarray` | Artifacts which were found but not removed as they were not | included when user defined artifacts kwarg. | **artifact_status["not_found"]** : `list` of strings | Artifacts listed to be removed by user when defining | artifacts kwarg which were not found in time series time range. Notes ----- This function is intended to take TimeSeries objects as input, but the deprecated LightCurve is still supported here. References ---------- [1] http://proba2.oma.be/data/TARDIS Examples -------- Remove LARs (Large Angle Rotations) from TimeSeries for 4-Dec-2014: >>> import sunpy.timeseries as ts >>> import sunpy.data.sample # doctest: +REMOTE_DATA >>> from sunpy.instr.lyra import remove_lytaf_events_from_timeseries >>> lyrats = ts.TimeSeries(sunpy.data.sample.LYRA_LEVEL3_TIMESERIES, source='LYRA') # doctest: +REMOTE_DATA >>> ts_nolars = remove_lytaf_events_from_timeseries(lyrats, artifacts=["LAR"]) # doctest: +REMOTE_DATA To also retrieve information on the artifacts during that day: >>> ts_nolars, artifact_status = remove_lytaf_events_from_timeseries( ... lyrats, artifacts=["LAR"], return_artifacts=True) # doctest: +REMOTE_DATA """ # Check that input argument is of correct type if not lytaf_path: lytaf_path = get_and_create_download_dir() # Remove artifacts from time series data_columns = ts.data.columns time, channels, artifact_status = _remove_lytaf_events( ts.data.index, channels=[np.asanyarray(ts.data[col]) for col in data_columns], artifacts=artifacts, return_artifacts=True, lytaf_path=lytaf_path, force_use_local_lytaf=force_use_local_lytaf) # Create new copy copy of timeseries and replace data with # artifact-free time series. ts_new = copy.deepcopy(ts) ts_new.data = pandas.DataFrame( index=time, data=dict((col, channels[i]) for i, col in enumerate(data_columns))) if return_artifacts: return ts_new, artifact_status else: return ts_new
def _parse_args(self, *args, **kwargs): """ Parses an args list for data-header pairs. args can contain any mixture of the following entries: * tuples of data,header * data, header not in a tuple * data, wcs object in a tuple * data, wcs object not in a tuple * filename, which will be read * directory, from which all files will be read * glob, from which all files will be read * url, which will be downloaded and read * lists containing any of the above. Example ------- self._parse_args(data, header, (data, header), ['file1', 'file2', 'file3'], 'file4', 'directory1', '*.fits') """ data_header_pairs = list() already_maps = list() # Account for nested lists of items args = expand_list(args) # For each of the arguments, handle each of the cases i = 0 while i < len(args): arg = args[i] # Data-header or data-WCS pair if isinstance(arg, SUPPORTED_ARRAY_TYPES): arg_header = args[i+1] if isinstance(arg_header, WCS): arg_header = args[i+1].to_header() if self._validate_meta(arg_header): pair = (args[i], OrderedDict(arg_header)) data_header_pairs.append(pair) i += 1 # an extra increment to account for the data-header pairing # File name elif (isinstance(arg, str) and os.path.isfile(os.path.expanduser(arg))): path = os.path.expanduser(arg) pairs = self._read_file(path, **kwargs) data_header_pairs += pairs # Directory elif (isinstance(arg, str) and os.path.isdir(os.path.expanduser(arg))): path = os.path.expanduser(arg) files = [os.path.join(path, elem) for elem in os.listdir(path)] for afile in files: data_header_pairs += self._read_file(afile, **kwargs) # Glob elif (isinstance(arg, str) and '*' in arg): files = glob.glob(os.path.expanduser(arg)) for afile in files: data_header_pairs += self._read_file(afile, **kwargs) # Already a Map elif isinstance(arg, GenericMap): already_maps.append(arg) # A URL elif (isinstance(arg, str) and _is_url(arg)): url = arg path = download_file(url, get_and_create_download_dir()) pairs = self._read_file(path, **kwargs) data_header_pairs += pairs # A database Entry elif isinstance(arg, DatabaseEntry): data_header_pairs += self._read_file(arg.path, **kwargs) else: raise ValueError("File not found or invalid input") i += 1 # TODO: # In the end, if there are already maps it should be put in the same # order as the input, currently they are not. return data_header_pairs, already_maps
def _parse_args(self, *args, **kwargs): """ Parses an `args` list for data-header pairs. `args` can contain any mixture of the following entries: * tuples of (data, header, unit) (1) * data, header not in a tuple (1) * filename, which will be read * directory, from which all files will be read * glob, from which all files will be read * url, which will be downloaded and read * lists containing any of the above. (1) header/unit are optional and in either order, but data should be the first entry in each group. Examples -------- self._parse_args(data, header, (data, header), ['file1', 'file2', 'file3'], 'file4', 'directory1', '*.fits') """ data_header_unit_tuples = list() data_header_pairs = list() already_timeseries = list() filepaths = list() # Account for nested lists of items. Simply outputs a single list of # items, nested lists are expanded to element level. args = expand_list(args) # For each of the arguments, handle each of the cases i = 0 while i < len(args): arg = args[i] # Data-header pair in a tuple if (isinstance(arg, (np.ndarray, Table, pd.DataFrame))): # and self._validate_meta(args[i+1])): # Assume a Pandas Dataframe is given data = arg units = OrderedDict() meta = MetaDict() # Convert the data argument into a Pandas DataFrame if needed. if isinstance(data, Table): # We have an Astropy Table: data, meta, units = self._from_table(data) elif isinstance(data, np.ndarray): # We have a numpy ndarray. We assume the first column is a dt index data = pd.DataFrame(data=data[:, 1:], index=Time(data[:, 0])) # If there are 1 or 2 more arguments: for _ in range(2): if (len(args) > i+1): # If that next argument isn't data but is metaddata or units: if not isinstance(args[i+1], (np.ndarray, Table, pd.DataFrame)): if self._validate_units(args[i+1]): units.update(args[i+1]) i += 1 # an extra increment to account for the units elif self._validate_meta(args[i+1]): # if we have an astropy.io FITS header then convert # to preserve multi-line comments if isinstance(args[i+1], astropy.io.fits.header.Header): args[i+1] = MetaDict(sunpy.io.header.FileHeader(args[i+1])) meta.update(args[i+1]) i += 1 # an extra increment to account for the meta # Add a 3-tuple for this TimeSeries. data_header_unit_tuples.append((data, meta, units)) # Filepath elif (isinstance(arg, str) and os.path.isfile(os.path.expanduser(arg))): path = os.path.expanduser(arg) result = self._read_file(path, **kwargs) data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result) # Directory elif (isinstance(arg, str) and os.path.isdir(os.path.expanduser(arg))): path = os.path.expanduser(arg) files = [os.path.join(path, elem) for elem in os.listdir(path)] for afile in files: # returns a boolean telling us if it were read and either a # tuple or the original filepath for reading by a source result = self._read_file(afile, **kwargs) data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result) # Glob elif isinstance(arg, str) and '*' in arg: files = glob.glob(os.path.expanduser(arg)) for afile in files: # returns a boolean telling us if it were read and either a # tuple or the original filepath for reading by a source result = self._read_file(afile, **kwargs) data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result) # Already a TimeSeries elif isinstance(arg, GenericTimeSeries): already_timeseries.append(arg) # A URL elif (isinstance(arg, str) and _is_url(arg)): url = arg path = download_file(url, get_and_create_download_dir()) result = self._read_file(path, **kwargs) data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result) else: raise NoMatchError("File not found or invalid input") i += 1 # TODO: # In the end, if there are already TimeSeries it should be put in the # same order as the input, currently they are not. return data_header_unit_tuples, data_header_pairs, already_timeseries, filepaths
def _parse_args(self, *args, **kwargs): """ Parses an args list for data-header pairs. args can contain any mixture of the following entries: * tuples of (data, header, unit) (1) * data, header not in a tuple (1) * filename, which will be read * directory, from which all files will be read * glob, from which all files will be read * url, which will be downloaded and read * lists containing any of the above. (1) Note that header/unit are optional and in either order, but data but be the first entry in each group. Example ------- self._parse_args(data, header, (data, header), ['file1', 'file2', 'file3'], 'file4', 'directory1', '*.fits') """ data_header_unit_tuples = list() data_header_pairs = list() already_timeseries = list() filepaths = list() # Account for nested lists of items. Simply outputs a single list of # items, nested lists are expanded to element level. args = expand_list(args) # For each of the arguments, handle each of the cases i = 0 while i < len(args): arg = args[i] # Data-header pair in a tuple if isinstance(arg, (np.ndarray, Table, pd.DataFrame)): # Assume a Pandas Dataframe is given data = arg units = OrderedDict() meta = MetaDict() # Convert the data argument into a Pandas DataFrame if needed. if isinstance(data, Table): # We have an Astropy Table: data, meta, units = self._from_table(data) elif isinstance(data, np.ndarray): # We have a numpy ndarray. We assume the first column is a dt index data = pd.DataFrame(data=data[:, 1:], index=Time(data[:, 0])) # If there are 1 or 2 more arguments: for _ in range(2): if len(args) > i+1: # If that next argument isn't data but is metaddata or units: if not isinstance(args[i+1], (np.ndarray, Table, pd.DataFrame)): if self._validate_units(args[i+1]): units.update(args[i+1]) i += 1 # an extra increment to account for the units elif self._validate_meta(args[i+1]): # if we have an astropy.io FITS header then convert # to preserve multi-line comments if isinstance(args[i+1], astropy.io.fits.header.Header): args[i+1] = MetaDict(sunpy.io.header.FileHeader(args[i+1])) meta.update(args[i+1]) i += 1 # an extra increment to account for the meta # Add a 3-tuple for this TimeSeries. data_header_unit_tuples.append((data, meta, units)) # Filepath elif (isinstance(arg, str) and os.path.isfile(os.path.expanduser(arg))): path = os.path.expanduser(arg) result = self._read_file(path, **kwargs) data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result) # Directory elif (isinstance(arg, str) and os.path.isdir(os.path.expanduser(arg))): path = os.path.expanduser(arg) files = [os.path.join(path, elem) for elem in os.listdir(path)] for afile in files: # returns a boolean telling us if it were read and either a # tuple or the original filepath for reading by a source result = self._read_file(afile, **kwargs) data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result) # Glob elif isinstance(arg, str) and '*' in arg: files = glob.glob(os.path.expanduser(arg)) for afile in files: # returns a boolean telling us if it were read and either a # tuple or the original filepath for reading by a source result = self._read_file(afile, **kwargs) data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result) # Already a TimeSeries elif isinstance(arg, GenericTimeSeries): already_timeseries.append(arg) # A URL elif (isinstance(arg, str) and _is_url(arg)): url = arg path = download_file(url, get_and_create_download_dir()) result = self._read_file(path, **kwargs) data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result) else: raise NoMatchError("File not found or invalid input") i += 1 # TODO: # In the end, if there are already TimeSeries it should be put in the # same order as the input, currently they are not. return data_header_unit_tuples, data_header_pairs, already_timeseries, filepaths
def get_goes_data(t=None, sat_num=None): ''' Reads GOES data from https://umbra.nascom.nasa.gov/ repository, for date and satellite number provided. If sat_num is None, data for all available satellites are downloaded, with some sanity check used to decide the best. If the Time() object t is None, data for the day before the current date are read (since there is a delay of 1 day in availability of the data). Returns: goes_t GOES time array in plot_date format goes_data GOES 1-8 A lightcurve ''' # Can short-circuit the entire code below this block by using my goes.get_goes() routine lo, hi, goes_t = get_goes() if len(goes_t) != 0: # Got the data, now isolate the requested day good, = np.where(np.floor(goes_t.mjd) == np.floor(t.mjd)) if len(good) != 0: return goes_t.plot_date, lo from sunpy.util.config import get_and_create_download_dir import shutil from astropy.io import fits import urllib2 if t is None: t = Time(Time.now().mjd - 1, format='mjd') yr = t.iso[:4] datstr = t.iso[:10].replace('-', '') try: if sat_num is None: try: f = urllib2.urlopen( 'https://umbra.nascom.nasa.gov/goes/fits/' + yr, timeout=3) except: f = urllib2.urlopen('https://hesperia.gsfc.nasa.gov/goes/' + yr, timeout=3) lines = f.readlines() sat_num = [] for line in lines: idx = line.find(datstr) if idx != -1: sat_num.append(line[idx - 2:idx]) if type(sat_num) is int: sat_num = [str(sat_num)] filenames = [] for sat in sat_num: filename = 'go' + sat + datstr + '.fits' try: url = 'https://umbra.nascom.nasa.gov/goes/fits/' + yr + '/' + filename f = urllib2.urlopen(url, timeout=3) except: url = 'https://hesperia.gsfc.nasa.gov/goes/' + yr + '/' + filename f = urllib2.urlopen(url, timeout=3) with open(get_and_create_download_dir() + '/' + filename, 'wb') as g: shutil.copyfileobj(f, g) filenames.append(get_and_create_download_dir() + '/' + filename) pmerit = 0 for file in filenames: gfits = fits.open(file) data = gfits[2].data['FLUX'][0][:, 0] good, = np.where(data > 1.e-8) tsecs = gfits[2].data['TIME'][0] merit = len(good) date_elements = gfits[0].header['DATE-OBS'].split('/') if merit > pmerit: print 'File:', file, 'is best' pmerit = merit goes_data = data goes_t = Time(date_elements[2] + '-' + date_elements[1] + '-' + date_elements[0]).plot_date + tsecs / 86400. try: return goes_t, goes_data except: print 'No good GOES data for', datstr return None, None except: print 'GOES site unreachable?' return None, None
def get_lytaf_events(start_time, end_time, lytaf_path=None, combine_files=("lyra", "manual", "ppt", "science"), csvfile=None, force_use_local_lytaf=False): """ Extracts combined lytaf file for given time range. Given a time range defined by start_time and end_time, this function extracts the segments of each LYRA annotation file and combines them. Parameters ---------- start_time : `datetime.datetime` or `str` Start time of period for which annotation file is required. end_time : `datetime.datetime` or `str` End time of period for which annotation file is required. lytaf_path : `str` directory path where the LYRA annotation files are stored. combine_files : `tuple` of strings States which LYRA annotation files are to be combined. Default is all four, i.e. lyra, manual, ppt, science. See Notes section for an explanation of each. force_use_local_lytaf : `bool` Ensures current local version of lytaf files are not replaced by up-to-date online versions even if current local lytaf files do not cover entire input time range etc. Default=False Returns ------- lytaf : `numpy.recarray` Containing the various parameters stored in the LYTAF files. Notes ----- There are four LYRA annotation files which mark different types of events or artifacts in the data. They are named annotation_suffix.db where suffix is a variable equalling either lyra, manual, ppt, or science. annotation_lyra.db : contains entries regarding possible effects to the data due to normal operation of LYRA instrument. annotation_manual.db : contains entries regarding possible effects to the data due to unusual or manually logged events. annotation_ppt.db : contains entries regarding possible effects to the data due to pointing or positioning of PROBA2. annotation_science.db : contains events in the data scientifically interesting, e.g. GOES flares. References ---------- Further documentation: http://proba2.oma.be/data/TARDIS Examples -------- Get all events in the LYTAF files for January 2014 >>> from sunpy.instr.lyra import get_lytaf_events >>> lytaf = get_lytaf_events('2014-01-01', '2014-02-01') # doctest: +REMOTE_DATA """ # Check inputs # Check lytaf path if not lytaf_path: lytaf_path = get_and_create_download_dir() # Check start_time and end_time is a date string or datetime object start_time = parse_time(start_time) end_time = parse_time(end_time) # Check combine_files contains correct inputs if not all(suffix in ["lyra", "manual", "ppt", "science"] for suffix in combine_files): raise ValueError("Elements in combine_files must be strings equalling " "'lyra', 'manual', 'ppt', or 'science'.") # Remove any duplicates from combine_files input combine_files = list(set(combine_files)) combine_files.sort() # Convert input times to UNIX timestamp format since this is the # time format in the annotation files start_time_uts = (start_time - datetime.datetime(1970, 1, 1)).total_seconds() end_time_uts = (end_time - datetime.datetime(1970, 1, 1)).total_seconds() # Define numpy record array which will hold the information from # the annotation file. lytaf = np.empty((0,), dtype=[("insertion_time", object), ("begin_time", object), ("reference_time", object), ("end_time", object), ("event_type", object), ("event_definition", object)]) # Access annotation files for suffix in combine_files: # Check database files are present dbname = "annotation_{0}.db".format(suffix) check_download_file(dbname, LYTAF_REMOTE_PATH, lytaf_path) # Open SQLITE3 annotation files connection = sqlite3.connect(os.path.join(lytaf_path, dbname)) # Create cursor to manipulate data in annotation file cursor = connection.cursor() # Check if lytaf file spans the start and end times defined by # user. If not, download newest version. # First get start time of first event and end time of last # event in lytaf. cursor.execute("select begin_time from event order by begin_time asc " "limit 1;") db_first_begin_time = cursor.fetchone()[0] db_first_begin_time = datetime.datetime.fromtimestamp(db_first_begin_time) cursor.execute("select end_time from event order by end_time desc " "limit 1;") db_last_end_time = cursor.fetchone()[0] db_last_end_time = datetime.datetime.fromtimestamp(db_last_end_time) # If lytaf does not include entire input time range... if not force_use_local_lytaf: if end_time > db_last_end_time or start_time < db_first_begin_time: # ...close lytaf file... cursor.close() connection.close() # ...Download latest lytaf file... check_download_file(dbname, LYTAF_REMOTE_PATH, lytaf_path, replace=True) # ...and open new version of lytaf database. connection = sqlite3.connect(os.path.join(lytaf_path, dbname)) cursor = connection.cursor() # Select and extract the data from event table within file within # given time range cursor.execute("select insertion_time, begin_time, reference_time, " "end_time, eventType_id from event where end_time >= " "{0} and begin_time <= " "{1}".format(start_time_uts, end_time_uts)) event_rows = cursor.fetchall() # Select and extract the event types from eventType table cursor.row_factory = sqlite3.Row cursor.execute("select * from eventType") eventType_rows = cursor.fetchall() eventType_id = [] eventType_type = [] eventType_definition = [] for eventType_row in eventType_rows: eventType_id.append(eventType_row["id"]) eventType_type.append(eventType_row["type"]) eventType_definition.append(eventType_row["definition"]) # Enter desired information into the lytaf numpy record array for event_row in event_rows: id_index = eventType_id.index(event_row[4]) lytaf = np.append(lytaf, np.array((datetime.datetime.utcfromtimestamp(event_row[0]), datetime.datetime.utcfromtimestamp(event_row[1]), datetime.datetime.utcfromtimestamp(event_row[2]), datetime.datetime.utcfromtimestamp(event_row[3]), eventType_type[id_index], eventType_definition[id_index]), dtype=lytaf.dtype)) # Close file cursor.close() connection.close() # Sort lytaf in ascending order of begin time np.recarray.sort(lytaf, order="begin_time") # If csvfile kwarg is set, write out lytaf to csv file if csvfile: # Open and write data to csv file. with open(csvfile, 'w') as openfile: csvwriter = csv.writer(openfile, delimiter=';') # Write header. csvwriter.writerow(lytaf.dtype.names) # Write data. for row in lytaf: new_row = [] new_row.append(row[0].strftime("%Y-%m-%dT%H:%M:%S")) new_row.append(row[1].strftime("%Y-%m-%dT%H:%M:%S")) new_row.append(row[2].strftime("%Y-%m-%dT%H:%M:%S")) new_row.append(row[3].strftime("%Y-%m-%dT%H:%M:%S")) new_row.append(row[4]) new_row.append(row[5]) csvwriter.writerow(new_row) return lytaf
def _parse_args(self, *args, **kwargs): """ Parses an args list for data-header pairs. args can contain any mixture of the following entries: * tuples of data,header * data, header not in a tuple * data, wcs object in a tuple * data, wcs object not in a tuple * filename, which will be read * directory, from which all files will be read * glob, from which all files will be read * url, which will be downloaded and read * lists containing any of the above. Example ------- self._parse_args(data, header, (data, header), ['file1', 'file2', 'file3'], 'file4', 'directory1', '*.fits') """ data_header_pairs = list() already_maps = list() # Account for nested lists of items args = expand_list(args) # For each of the arguments, handle each of the cases i = 0 while i < len(args): arg = args[i] # Data-header or data-WCS pair if isinstance(arg, SUPPORTED_ARRAY_TYPES): arg_header = args[i+1] if isinstance(arg_header, WCS): arg_header = args[i+1].to_header() if self._validate_meta(arg_header): pair = (args[i], OrderedDict(arg_header)) data_header_pairs.append(pair) i += 1 # an extra increment to account for the data-header pairing # File name elif (isinstance(arg, six.string_types) and os.path.isfile(os.path.expanduser(arg))): path = os.path.expanduser(arg) pairs = self._read_file(path, **kwargs) data_header_pairs += pairs # Directory elif (isinstance(arg, six.string_types) and os.path.isdir(os.path.expanduser(arg))): path = os.path.expanduser(arg) files = [os.path.join(path, elem) for elem in os.listdir(path)] for afile in files: data_header_pairs += self._read_file(afile, **kwargs) # Glob elif (isinstance(arg, six.string_types) and '*' in arg): files = glob.glob(os.path.expanduser(arg)) for afile in files: data_header_pairs += self._read_file(afile, **kwargs) # Already a Map elif isinstance(arg, GenericMap): already_maps.append(arg) # A URL elif (isinstance(arg, six.string_types) and _is_url(arg)): url = arg path = download_file(url, get_and_create_download_dir()) pairs = self._read_file(path, **kwargs) data_header_pairs += pairs # A database Entry elif isinstance(arg, DatabaseEntry): data_header_pairs += self._read_file(arg.path, **kwargs) else: raise ValueError("File not found or invalid input") i += 1 # TODO: # In the end, if there are already maps it should be put in the same # order as the input, currently they are not. return data_header_pairs, already_maps
def remove_lytaf_events_from_lightcurve(lc, artifacts=None, return_artifacts=False, lytaf_path=None, force_use_local_lytaf=False): """ Removes periods of LYRA artifacts defined in LYTAF from a LYRALightCurve. Parameters ---------- lc : `sunpy.lightcurve.LightCurve` artifacts : list of strings Contain the artifact types to be removed. For list of artifact types see reference [1]. For example, if user wants to remove only large angle rotations, listed at reference [1] as LAR, let artifacts=["LAR"]. Default=[], i.e. no artifacts will be removed. return_artifacts : `bool` Set to True to return a `numpy.recarray` containing the start time, end time and type of all artifacts removed. Default=False lytaf_path : `str` directory path where the LYRA annotation files are stored. force_use_local_lytaf : `bool` Ensures current local version of lytaf files are not replaced by up-to-date online versions even if current local lytaf files do not cover entire input time range etc. Default=False Returns ------- lc_new : `sunpy.lightcurve.LightCurve` copy of input LYRALightCurve with periods corresponding to artifacts removed. artifact_status : `dict` List of 4 variables containing information on what artifacts were found, removed, etc. from the time series. artifact_status["lytaf"] = artifacts found : `numpy.recarray` The full LYRA annotation file for the time series time range output by get_lytaf_events(). artifact_status["removed"] = artifacts removed : `numpy.recarray` Artifacts which were found and removed from from time series. artifact_status["not_removed"] = artifacts found but not removed : `numpy.recarray` Artifacts which were found but not removed as they were not included when user defined artifacts kwarg. artifact_status["not_found"] = artifacts not found : `list` of strings Artifacts listed to be removed by user when defining artifacts kwarg which were not found in time series time range. References ---------- [1] http://proba2.oma.be/data/TARDIS Examples -------- Remove LARs (Large Angle Rotations) from LYRALightCurve for 4-Dec-2014: >>> import sunpy.lightcurve as lc >>> lc = lc.LYRALightCurve.create("2014-12-02") >>> lc_nolars = lc.remove_artifacts_from_lyralightcurve(lc, artifacts=["LAR"]) To also retrieve information on the artifacts during that day: >>> lc_nolars, artifact_status = lc.remove_artifacts_from_lyralightcurve( lc, artifacts=["LAR"], return_artifacts=True) """ # Check that input argument is of correct type if not lytaf_path: lytaf_path = get_and_create_download_dir() if not isinstance(lc, lightcurve.LightCurve): raise TypeError("lc must be a LightCurve object.") # Remove artifacts from time series data_columns = lc.data.columns time, channels, artifact_status = _remove_lytaf_events( lc.data.index, channels=[np.asanyarray(lc.data[col]) for col in data_columns], artifacts=artifacts, return_artifacts=True, lytaf_path=lytaf_path, force_use_local_lytaf=force_use_local_lytaf) # Create new copy copy of lightcurve and replace data with # artifact-free time series. lc_new = copy.deepcopy(lc) lc_new.data = pandas.DataFrame(index=time, data=dict( (col, channels[i]) for i, col in enumerate(data_columns))) if return_artifacts: return lc_new, artifact_status else: return lc_new