示例#1
0
 def _extract_timeseries_from_video(vid, scalars, channels):
     res = [[] for k in range(len(scalars))]
     sample_time = []
     tf = drms.to_datetime(vid.attrs['end_time'])
     first_frame = None
     for frame_key in sorted(list(vid.keys()), key=lambda frame_key : float(frame_key[5:])):
         if('channels' in vid[frame_key].keys() and
            len(vid[frame_key]['channels'].shape) == 3):
             ti = drms.to_datetime(vid[frame_key].attrs['T_REC'])
             sample_time += [(tf - ti).total_seconds()/60]
             if(first_frame is None):
                 first_frame = Data_Gen._extract_frame(vid[frame_key]['channels'], vid[frame_key].attrs['SEGS'], channels)
             i = 0
             for scalar in scalars:
                 if(scalar == 'RMS'):
                     l1_err = 0
                     try:
                         for c in range(first_frame.shape[2]):
                             if(vid[frame_key].attrs['SEGS'][c].decode() in channels):
                                     this_frame = Data_Gen._extract_frame(vid[frame_key]['channels'], vid[frame_key].attrs['SEGS'], channels)
                                     l1_err += np.sum(np.abs(sk.resize(this_frame[:,:,c], first_frame.shape[:2], preserve_range=True)-first_frame[:,:,c]))
                         res[i] += [l1_err/np.product(first_frame.shape[0:2])*len(channels)]
                     except:
                         sample_time = sample_time[:-1]
                         print('Frame {} not extracted.'.format(frame_key))
                         print(traceback.format_exc())
                 else:
                     res[i] += [vid[frame_key].attrs[scalar]]
                 i += 1
     return np.array(res), np.array(sample_time)
示例#2
0
    def _get_frames_key_from_query(ar_nb, peak_time, keys):
        list_keys = []
        for k in range(len(keys.NOAA_AR)):
            if (keys.NOAA_AR[k] == ar_nb and abs(keys.LAT_FWT[k]) <= 68
                    and abs(keys.LON_FWT[k]) <= 68 and drms.to_datetime(
                        keys.T_REC[k]) <= drms.to_datetime(peak_time)):
                list_keys += [k]

        return list_keys
示例#3
0
 def extract_B_flares_from_goes(goes_data_path, output_path, time_window):
     with open(goes_data_path, 'r', newline='') as file:
         reader = csv.reader(file, delimiter=',')
         counter_init_B = 0
         counter_final_B = 0
         counter_M_X = 0
         dict_M_X = {}
         dict_B = {}
         # first, store the M-class flares in a dictionnary
         [date, noaa] = [2, 1]  # we assume this format
         for event in reader:
             if (re.match('(M|X)[1-9]\.[0-9],[1-9][0-9]*,.*,.*,.*,.*',
                          str.join(',', event))):
                 counter_M_X += 1
                 event_date = drms.to_datetime(event[date])
                 if (event_date in dict_M_X):
                     dict_M_X[event_date] += [event[noaa]]
                 else:
                     dict_M_X[event_date] = [event[noaa]]
         file.seek(0)
         # Then analyze all B-class flares and store them
         with open(output_path, 'w', newline='') as out:
             writer = csv.writer(out, delimiter=',')
             for event in reader:
                 if (re.match('B[1-9]\.[0-9],[1-9][0-9]*,.*,.*,.*,.*',
                              str.join(',', event))):
                     counter_init_B += 1
                     event_date = drms.to_datetime(event[date])
                     exclude_event = False
                     for time_delta in range(-time_window, time_window + 1):
                         event_date_window = event_date + timedelta(
                             days=time_delta)
                         if (event_date_window in dict_M_X and event[noaa]
                                 in dict_M_X[event_date_window]):
                             #print('Event {} ignored because of M-X flare {}'.format(event,event_date_window))
                             exclude_event = True
                             break
                         elif (event_date_window in dict_B and event[noaa]
                               in dict_B[event_date_window]):
                             #print('Event {} ignored because of B flare {}'.format(event, event_date_window))
                             exclude_event = True
                             break
                     if (not exclude_event):
                         writer.writerow(event)
                         counter_final_B += 1
                         if (event_date in dict_B):
                             dict_B[event_date] += [event[noaa]]
                         else:
                             dict_B[event_date] = [event[noaa]]
                 elif (not re.match(
                         '(B|C|M|X)[1-9]\.[0-9],[1-9][0-9]*,.*,.*,.*,.*',
                         str.join(',', event))):
                     writer.writerow(event)
             print('Total number of M-X flares: {}'.format(counter_M_X))
             print('Total number of B flares: {}'.format(counter_init_B))
             print('Number of output B flares: {}'.format(counter_final_B))
示例#4
0
def unduh_fits(j,z):
    time = (tanggal_jsoc(j)[1] - timedelta(hours = int(z))).strftime('%Y.%m.%d_%H:%M:%S_TAI')
    k = c.query('%s[%d][%s]' % (series, sharpnum,time), key=kwlist, rec_index=True)
    #Find the record that is clostest to the central meridian, by using the minimum of the patch's absolute longitude:
    rec_cm = k.LON_FWT.idxmin()
    t_cm = drms.to_datetime(k.T_REC[rec_cm])
    # print(rec_cm, '@', k.LON_FWT[rec_cm], 'deg')
    t_cm_str = t_cm.strftime('%Y%m%d_%H%M%S_TAI')
    os.chdir(wdir)
    k.to_csv(path('meta',time)+'/k_'+t_cm_str[9:15]+'.csv',index_label='query')
    os.chdir(path('Fits',time))
    fname_mask = '{series}.{sharpnum}.{tstr}.{segment}.fits'
    fnames = {
        s: fname_mask.format(
            series=series, sharpnum=sharpnum, tstr=t_cm_str, segment=s)
        for s in segments}
    download_segments = []
    for w, v in fnames.items():
        if not os.path.exists(v):
            os.chdir(wdir)
            download_segments.append(w)
            print('{} terunduh.'.format(v))
        else: print('{} sudah ada di folder.'.format(v))
    if download_segments:
        exp_query = '%s{%s}' % (rec_cm, ','.join(download_segments))
        r = c.export(exp_query)
        r.download(path('Fits',time))
示例#5
0
文件: web.py 项目: MSTEM-QUDA/swmfpy
def _get_urls_hmi_b720(client, mag_time):
    """Returns for #download_magnetogram_hmi needed urls for hmi.B_720s

    Args:
        client (drms.Client): To query and return urls.
        mag_time (datetime.datetime): To find nearest magnetogram.

    Returns:
        generator that yields (datetime.datetime, str): Time of magnetogram,
            suffix url of magnetogram
    """
    import drms
    query_string = 'hmi.B_720s'
    query_string += f'[{mag_time.year}.'
    query_string += f'{str(mag_time.month).zfill(2)}.'
    query_string += f'{str(mag_time.day).zfill(2)}_'
    query_string += f'{str(mag_time.hour).zfill(2)}'
    query_string += '/1h]'
    data = client.query(query_string, key='T_REC', seg='field')
    times = drms.to_datetime(data[0].T_REC)
    nearest_time = _nearest(mag_time, times)
    # Generator to find the nearest time
    urls = ((data_time, mag_url) for (data_time, mag_url)
            in zip(times, data[1].field) if data_time == nearest_time)
    return urls
示例#6
0
def convert_time_2015(k):
    #change T_REC to datetime type
    k.T_REC = drms.to_datetime(k.T_REC)

    #convert tai time to utc
    t1 = Time(k.T_REC, format='datetime64', scale='tai')
    t2 = t1.utc
    t3 = t2.iso
    k.T_REC = t3
    k.T_REC = pd.to_datetime(k.T_REC)

    #delete first row of df from previous year
    k = k[(k['T_REC'].dt.year != 2014)]
    return (k)
示例#7
0
 def _in_time_window(time, start_time, end_time):
     if (start_time is None and end_time is None):
         return True
     try:
         if (start_time is None):
             before_end = (drms.to_datetime(time) <=
                           drms.to_datetime(end_time))
             return before_end
         elif (end_time is None):
             after_start = (drms.to_datetime(time) >=
                            drms.to_datetime(start_time))
             return after_start
         else:
             after_start = (drms.to_datetime(time) >=
                            drms.to_datetime(start_time))
             before_end = (drms.to_datetime(time) <=
                           drms.to_datetime(end_time))
             return (after_start and before_end)
     except:
         print('Impossible to determine if time {} is in [{}, {}]'.format(
             time, start_time, end_time))
         return False
示例#8
0
def fnames(i, j, m):
    os.chdir(wdir)
    os.chdir(path_folder('meta', i, j))
    k = pd.read_csv(os.listdir()[m], index_col='query')
    rec_cm = k.LON_FWT.idxmin()
    k_cm = k.loc[rec_cm]
    t_cm = drms.to_datetime(k.T_REC[rec_cm])
    t_cm_str = t_cm.strftime('%Y%m%d_%H%M%S_TAI')
    os.chdir(wdir)
    os.chdir(path_folder('Fits', i, j))
    fname_mask = '{series}.{sharpnum}.{tstr}.{segment}.fits'
    fname = {
        s: fname_mask.format(series=series,
                             sharpnum=ar_sharpnum(i)[1],
                             tstr=t_cm_str,
                             segment=s)
        for s in segments
    }
    # os.chdir(wdir)
    return fname, k_cm, t_cm_str, t_cm
def get_Hmi_sharp():
    c = drms.Client()
    startDate = datetime.strptime('2010.05.01 00:00:00', "%Y.%m.%d %H:%M:%S")

    while startDate.year < 2020:
        startDateString = startDate.strftime('%Y.%m.%d_%H:%M:%S')
        startDate = startDate + relativedelta(weeks=+1)
        endDateString = startDate.strftime('%Y.%m.%d_%H:%M:%S')

        dateString = startDateString + '-' + endDateString
        print(dateString)

        # variable = 'T_REC,HARPNUM,TOTUSJH,TOTPOT,TOTUSJZ,ABSNJZH,SAVNCPP,USFLUX,AREA_ACR,MEANPOT,R_VALUE,SHRGT45,NOAA_AR,NOAA_NUM,NOAA_ARS,QUALITY'
        variable = 'T_REC,HARPNUM,TOTUSJH,TOTPOT,TOTUSJZ,ABSNJZH,SAVNCPP,USFLUX,AREA_ACR,MEANPOT,R_VALUE,SHRGT45,MEANSHR,MEANGAM,MEANGBT,MEANGBZ,MEANGBH,MEANJZH,MEANJZD,MEANALP,NOAA_AR,NOAA_NUM,NOAA_ARS,QUALITY'
        df = c.query('hmi.sharp_720s[][' + dateString + ']', key=variable)

        if (df.size == 0):
            continue
        df.T_REC = drms.to_datetime(df.T_REC)

        conn = sqlite3.connect('HMI_SHARP_SWPC_FINAL.db')
        df.to_sql('02_HMI_SHARP', conn, if_exists='append', index=False)
示例#10
0
def SHARPtime(sharpnum, method='cm', maxlon=90):
    """
    For a given SHARP, identify a single frame with (if possible) longitude centroid within +-maxlon of Central Meridian.
    - If method='cm', use the frame with timestamp closest to central meridian.
    - If method='maxflux', use the frame with maximum unsigned flux.
    Returns (1) timestamp of chosen frame, and (2) corresponding emergence time (next noon) as a datetime object, and (3) ivalid flag True if valid frame exists and False if frame is outside +-maxlon.
    """

    c = drms.Client()
    # Get time series of unsigned fluxes and longitudes of this SHARP (0 is central meridian):
    k = c.query('hmi.sharp_cea_720s[%i][]' % sharpnum,
                key='HARPNUM, T_REC, USFLUXL, LON_FWT')

    # Find individual record:
    if (method == 'cm'):
        rec_cm = k.LON_FWT.abs().idxmin()
        k_cm = k.loc[rec_cm]
        if (np.abs(k.LON_FWT[rec_cm]) <= maxlon):
            ivalid = True
        else:
            ivalid = False
    if (method == 'maxflux'):
        usfluxl = k.USFLUXL.where(np.abs(k.LON_FWT) <= maxlon, other=0)
        if (usfluxl.max() > 0):
            rec_cm = usfluxl.abs().idxmax()
            k_cm = k.loc[rec_cm]
            ivalid = True
        else:
            rec_cm = 0
            ivalid = False

    t_cm = drms.to_datetime(k.T_REC[rec_cm])

    # Identify emergence (completion) time - next noon:
    twelve_hrs = datetime.timedelta(hours=12)
    t_em = t_cm + twelve_hrs
    t_em = t_em.replace(hour=12, minute=0, second=0)

    return k.T_REC[rec_cm], t_em, ivalid
def get_cgemLorentz():
    c = drms.Client()
    startDate = datetime.strptime('2010.05.01 00:00:00', "%Y.%m.%d %H:%M:%S")

    while startDate.year < 2020:
        startDateString = startDate.strftime('%Y.%m.%d_%H:%M:%S')
        startDate = startDate + relativedelta(weeks=+1)
        endDateString = startDate.strftime('%Y.%m.%d_%H:%M:%S')

        dateString = startDateString + '-' + endDateString
        print(dateString)

        variable = 'HARPNUM, T_REC, TOTBSQ, TOTFZ, EPSZ, TOTFY, TOTFX, EPSY, EPSX, QUALITY, NOAA_ARS, NOAA_AR, NOAA_NUM'
        df = c.query('cgem.lorentz[][' + dateString + ']', key=variable)

        if (df.size == 0):
            continue

        df.T_REC = drms.to_datetime(df.T_REC)

        conn = sqlite3.connect('HMI_SHARP_SWPC_FINAL.db')
        df.to_sql('02_CGEM_LORENTZ', conn, if_exists='append', index=False)
示例#12
0
# entries from aia.lev1 in this case.
print('Querying series info...')
si = c.info(series)
si_lev1 = c.info(series_lev1)
for k in keys:
    linkinfo = si.keywords.loc[k].linkinfo
    if linkinfo is not None and linkinfo.startswith('lev1->'):
        note_str = si_lev1.keywords.loc[k].note
    else:
        note_str = si.keywords.loc[k].note
    print('%10s : %s' % (k, note_str))

# Get keyword values for the selected timespan and wavelength
print('Querying keyword data...\n -> %s' % qstr)
res = c.query(qstr, key=keys)
print(' -> %d lines retrieved.' % len(res))

# Only use entries with QUALITY==0
res = res[res.QUALITY == 0]
print(' -> %d lines after QUALITY selection.' % len(res))

# Convert T_REC strings to datetime and use it as index for the series
res.index = drms.to_datetime(res.T_REC)

# Create some simple plots
ax = res[['DATAMIN', 'DATAMAX', 'DATAMEAN', 'DATARMS', 'DATASKEW']].plot(
    figsize=(8, 10), subplots=True)
ax[0].set_title(qstr, fontsize='medium')
plt.tight_layout()
plt.show()
示例#13
0
tsel = '2010.05.01_TAI-2016.04.01_TAI@12h'

# DRMS query string
qstr = '%s[%s]' % (series, tsel)


# Create DRMS JSON client, use debug=True to see the query URLs
c = drms.Client()

# Send request to the DRMS server
print('Querying keyword data...\n -> %s' % qstr)
res = c.query(qstr, key=['T_REC', 'CAPN2', 'CAPS2'])
print(' -> %d lines retrieved.' % len(res))

# Convert T_REC strings to datetime and use it as index for the series
res.index = drms.to_datetime(res.pop('T_REC'))

# Determine smallest timestep
dt = np.diff(res.index.to_pydatetime()).min()

# Make sure the time series contains all time steps (fills gaps with NaNs)
# Note: This does not seem to work with old pandas versions (e.g. v0.14.1)
a = res.asfreq(dt)

# Compute 30d moving average and standard deviation using a boxcar window
win_size = int(30*24*3600/dt.total_seconds())
if tuple(map(int, pd.__version__.split('.')[:2])) >= (0, 18):
    a_avg = a.rolling(win_size, min_periods=1, center=True).mean()
    a_std = a.rolling(win_size, min_periods=1, center=True).std()
else:
    # this is deprecated since pandas v0.18.0
示例#14
0
import drms

date0 = '2012-07-12T16:00:00Z'

# convert a DRMS time to an astropy Time object, This can then be converted to tai
from astropy.time import Time
td = drms.to_datetime(date0)
print(td)
ta = Time(td, format='datetime', scale='utc')
print(ta)
tt = Time(ta, format='datetime', scale='tai')
print(tt)

from sunpy.time import parse_time
print(parse_time(date0.split('Z')[0]))

print(ta.tai)
print(parse_time(ta.isot))

# test out time differencing
d0 = '2012-07-12T11:00:00.000'
d1 = '2012-07-12T13:00:00.000'
t0 = Time(d0, scale='tai')
t1 = Time(d1, scale='tai')
delta_t = t1 - t0
print()
print('-----------------------')
print(t0)
print(t1)
print(delta_t.sec)
示例#15
0
    'USFLUX',
    'ERRVF',
    'CRPIX1',
    'CRPIX2',
    'CDELT1',
    'CDELT2',
    'CRVAL1',
    'CRVAL2',
]

# Create DRMS client, use debug=True to see the query URLs.
c = drms.Client(verbose=True)

print('Querying metadata...')
kw = c.query(f'{series}[{int(sharpnum)}]', key=kwlist, rec_index=True)
t = drms.to_datetime(kw.T_REC)

print('Finding central meridian crossing...')
rec_cm = kw.LON_FWT.abs().idxmin()
k_cm = kw.loc[rec_cm]
t_cm = drms.to_datetime(kw.T_REC[rec_cm])
print('-> rec_cm:', rec_cm, '@', kw.LON_FWT[rec_cm], 'deg')

# Check if any files were already downloaded.
fnames = {}
download_segments = []
t_cm_str = t_cm.strftime('%Y%m%d_%H%M%S_TAI')
for s in segments:
    fnames[s] = fname_fmt_str.format(series=series,
                                     sharpnum=sharpnum,
                                     tstr=t_cm_str,
from matplotlib import dates
import drms

import numpy
numpy.set_printoptions(threshold=1600)

file = open('testfile.txt', 'w')
series = 'hmi.sharp_cea_720s'
sharpnum = 5298  # NOAA12297
kwlist = ['T_REC', 'LON_FWT', 'TOTPOT', 'TOTUSJH', 'TOTUSJZ', 'AREA_ACR']

c = drms.Client()
k = c.query('%s[%d]' % (series, sharpnum), key=kwlist, n='none')
file.write(str(k))
file.close()
k.index = drms.to_datetime(k.T_REC)
t_cm = k.LON_FWT.abs().argmin()
print(k)
plt.rc('axes', titlesize='medium')
plt.rc('axes.formatter', use_mathtext=True)
plt.rc('mathtext', default='regular')
plt.rc('legend', fontsize='medium')

fig, ax = plt.subplots(2, 2, sharex=True, figsize=(10, 6))

axi = ax[0, 0]
axi.plot(k.index, k.TOTPOT, '.', ms=2, label='TOTPOT')
axi.set_title('Total Photospheric Magnetic Free Energy')
axi.set_ylabel(r'Ergs $cm^{-1}$', size=15)

axi = ax[0, 1]
示例#17
0
tsel = '2010.05.01_TAI-2016.04.01_TAI@6h'

# DRMS query string
qstr = '%s[%s]' % (series, tsel)


# Create DRMS JSON client, use debug=True to see the query URLs
c = drms.Client()

# Send request to the DRMS server
print('Querying keyword data...\n -> %s' % qstr)
res = c.query(qstr, key=['T_REC', 'DATAMEAN', 'DATARMS'])
print(' -> %d lines retrieved.' % len(res))

# Convert T_REC strings to datetime and use it as index for the series
res.index = drms.to_datetime(res.pop('T_REC'))

# Note: DATARMS contains the standard deviation, not the RMS!
t = res.index
avg = res.DATAMEAN/1e3
std = res.DATARMS/1e3

# Create plot
fig, ax = plt.subplots(1, 1, figsize=(15, 7))
ax.set_title(qstr, fontsize='medium')
ax.fill_between(
    t, avg+std, avg-std, edgecolor='none', facecolor='b', alpha=0.3,
    interpolate=True)
ax.plot(t, avg, color='b')
ax.set_xlabel('Time')
ax.set_ylabel('Disk-averaged continuum intensity [kDN/s]')
示例#18
0
def test_corner_case_series(time_series, expected):
    assert pd.isnull(drms.to_datetime(time_series)).equals(expected)
示例#19
0
def test_force_string(time_string, expected):
    assert drms.to_datetime(time_string, force=True) == expected
示例#20
0
def test_z_leap_string(time_string, expected):
    assert drms.to_datetime(time_string) == expected
def main(image_size_output, path_to_mag_cube, mag_cube_name, base, mission):

    warnings.simplefilter(action="ignore", category=FutureWarning)
    warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

    ### This put all the FITS header keywords, for all the MDI or HMI data products available in a pandas dataframe called mag_keys. ###
    ### This part takes about 10 minutes ###

    client = drms.Client()
    query_mag = 'mdi.fd_M_96m_lev182[]'  ### or ('hmi.M_720s') if want SDO HMI instead of SOHO MDI
    mag_keys = client.query(query_mag, key=drms.const.all)
    print('len(mag_keys):', len(mag_keys))

    print('image_size_output:', image_size_output)
    print('path_to_mag_cube:', path_to_mag_cube)
    print('mag_cube_name:', mag_cube_name)
    print('base:', base)
    print('mission:', mission)

    mag_keys_list = list(
        client.keys('mdi.fd_M_96m_lev182'
                    ))  #or ('hmi.M_720s') if want SDO HMI instead of SOHO MDI
    print('mag_keys_list:', mag_keys_list)

    cube_orig = h5py.File(f'{path_to_mag_cube}{mag_cube_name}', 'r')
    print(list(cube_orig.keys()))

    cube_orig_data = cube_orig[list(cube_orig.keys(
    ))[0]][:]  #cube_orig[f'{base}_{mission}_{image_size_output}'][:]
    print('np.shape(cube_orig_data):', np.shape(cube_orig_data))

    times_list = csv_times_reader(path_to_mag_cube,
                                  pattern=f'*{base}*{mission}*[!sync].csv')

    print('times_list[0:10]:', times_list[0:10])
    print('times_list[-10:]:', times_list[-10:])
    print('np.shape(times_list):', np.shape(times_list))

    ### creat cube copy with data from original cube and add the metadata via attributes which can now write ###
    full_mag_cube_name = f'{path_to_mag_cube}{mag_cube_name}'
    mag_cube_name_new = full_mag_cube_name.split(
        '.')[0] + '_retroactivemetadata.h5'
    print(mag_cube_name_new)

    data_cube_new = h5py.File(mag_cube_name_new, 'w')
    data_cube_new.create_dataset(f'{base}_{mission}_{image_size_output}',
                                 data=cube_orig_data,
                                 compression="gzip")

    counter = 0
    meta_data_dict = {}

    for t_pre in tqdm(times_list):  #[0:2] saftey check

        t_drms_split = str(drms.to_datetime(t_pre)).split(' ')
        t_tai = '_'.join(
            (t_drms_split[0].replace('-', '.'), t_drms_split[1])) + '_TAI'

        ### this original method line below never completes on JSOC as starts fast but after 800 files starts to slow down exponentially ###
        #query = client.query(f'mdi.fd_M_96m_lev182[{t_tai}]', key = client.keys('mdi.fd_M_96m_lev182')) with client = drms.Client(email,verbose=False)

        query_pre = mag_keys.loc[mag_keys['T_REC'] == t_tai]
        query = mag_keys.loc[query_pre.index[0]]

        query_metadata_update = downsample_header_local(
            mission, image_size_output, query, mag_keys)

        for j, key in enumerate(mag_keys):
            if (key == 'COMMENT') or (key == 'HISTORY'):
                key1 = f'{key}{counter}'
                ##########data_cube_new.attrs[f'{key1}_{counter}'] = query_metadata_update[key] #[0]
                meta_data_dict[f'{key1}_{counter}'] = query_metadata_update[
                    key]
            else:
                ##########data_cube_new.attrs[f'{key}_{counter}'] = query_metadata_update[key] #[0]
                meta_data_dict[f'{key}_{counter}'] = query_metadata_update[key]

        #########data_cube_new.attrs[f'COMMENT_{counter}'] = f'Zeros outside solar disk for {base}'
        meta_data_dict[
            f'COMMENT_{counter}'] = f'Zeros outside solar disk for {base}'

        counter += 1

    ########data_cube_new.attrs.update(meta_data_dict)
    data_cube_new.create_dataset(
        f'{base}_{mission}_{image_size_output}_metadata',
        data=json.dumps(meta_data_dict, cls=NpEncoder))
    data_cube_new.attrs['NOTE'] = 'JSON serialization'
    data_cube_new.close()
示例#22
0
def test_time_series(time_series, expected):
    assert drms.to_datetime(time_series).equals(expected)
    def download_jsoc_data(
            self,
            files_core_name='jsoc_data',
            directory=None,
            goes_data_path=None,
            goes_row_pattern='(B|C|M|X)[1-9]\.[0-9],[1-9][0-9]*,.*,.*,.*,.*',
            start_time=None,
            end_time=None,
            hours_before_event=24,
            sample_time='@1h',
            limit=400):

        if (directory is None and
                not os.path.isdir(os.path.join(self.main_path, 'JSOC-Data'))):
            os.mkdir(os.path.join(self.main_path, 'JSOC-Data'))
            os.chdir(os.path.join(self.main_path, 'JSOC-Data'))
        elif (os.path.isdir(os.path.join(self.main_path, directory))):
            os.chdir(os.path.join(self.main_path, directory))
        else:
            print('The path {} does not exist.'.format(
                os.path.join(self.main_path, directory)))
            return False

        essential_ar_attrs = {'NOAA_AR', 'HARPNUM', 'LAT_FWT', 'LON_FWT'}
        essential_goes_attrs = {
            'start_time', 'peak_time', 'noaa_active_region', 'event_class'
        }
        jsoc_serie = 'hmi.sharp_cea_720s[1-7256]'

        # Verifications of the path to GOES data and the format of the .csv
        if (goes_data_path is None):
            if (os.path.exists(os.path.join(self.main_path, 'GOES_data.csv'))):
                goes_data_path = os.path.join(self.main_path, 'GOES_data.csv')
            else:
                print('Please enter a valid path to the GOES data.')
                return False
        if (not os.path.exists(goes_data_path)):
            print('Please enter a valid path to the GOES data.')
            return False

        missing_goes_attrs = self._check_essential_attributes(
            set(self.goes_attrs), essential_goes_attrs)
        if (len(missing_goes_attrs) > 0):
            print('Missing attributes in GOES file : {}.'.format(
                missing_goes_attrs))
            return False

        [start, peak, noaa_ar] = [
            self.goes_attrs.index('start_time'),
            self.goes_attrs.index('peak_time'),
            self.goes_attrs.index('noaa_active_region')
        ]

        total_length = sum(1 for line in open(goes_data_path, 'r'))
        self.ar_attrs += self._check_essential_attributes(
            set(self.ar_attrs), essential_ar_attrs)

        # Estimation of the number of solar eruption videos considered.
        # Limit the number of videos if 'limit' is reached.
        nb_positive = 0
        considered_events = []
        with open(goes_data_path, 'r', newline='') as file:
            reader = csv.reader(file, delimiter=',')
            counter = 0
            for event in reader:
                counter += 1
                if (self._in_time_window(event[start], start_time, end_time)
                        and re.match(goes_row_pattern, str.join(',', event))
                        and int(event[noaa_ar]) > 0):
                    nb_positive += 1
                    considered_events += [counter]
        if (limit is not None and nb_positive > limit):
            events_really_considered = np.random.choice(
                considered_events, limit)

        # Summary
        print('Nb of videos to download: {}/{}'.format(nb_positive, counter))
        print('Look up of pictures until {}h before an event.'.format(
            hours_before_event))

        with open(goes_data_path, 'r', newline='') as file:
            reader = csv.reader(file, delimiter=',')
            client = drms.Client()
            mem = 0  # Set a counter for the current cache memory (in bytes) used by videos
            part_counter = 0
            vid_counter = 0
            counter = 0
            current_save_file = h5py.File(
                '{}_part_{}.hdf5'.format(files_core_name, part_counter), 'w')
            for event in reader:
                counter += 1
                if (re.match(goes_row_pattern, str.join(',', event))
                        and (limit is None or nb_positive <= limit or
                             (counter in events_really_considered))
                        and self._in_time_window(event[start], start_time,
                                                 end_time)):
                    ar_nb = int(event[noaa_ar])
                    # We process only numbered flares
                    if (ar_nb > 0):
                        peak_time = drms.to_datetime(event[peak])
                        start_time = peak_time - timedelta(
                            hours=hours_before_event)
                        # Change the date format
                        peak_time = self._UTC2JSOC_time(str(peak_time))
                        start_time = self._UTC2JSOC_time(str(start_time))
                        # Do the request to JSOC database
                        query = '{}[{}-{}{}]'.format(jsoc_serie, start_time,
                                                     peak_time, sample_time)
                        if (len(self.ar_segs) == 0):
                            keys = client.query(query, key=self.ar_attrs)
                        else:
                            keys, segments = client.query(query,
                                                          key=self.ar_attrs,
                                                          seg=self.ar_segs)
                        try:
                            # Downloads the video of this solar flare and construct
                            # the HDF5 file.
                            nb_frame = len(keys.NOAA_AR) - 1
                            dumping = False
                            current_vid = current_save_file.create_group(
                                'video{}'.format(vid_counter))
                            vid_counter += 1
                            for k in range(len(self.goes_attrs)):
                                current_vid.attrs[
                                    self.goes_attrs[k]] = event[k]

                            print(
                                'Trying to extract data for video {} corresponding to event {}'
                                .format(vid_counter, event[peak]))
                            frame_counter = 0
                            while (nb_frame > -1 and not dumping):
                                right_pic  = (keys.NOAA_AR[nb_frame] == ar_nb)\
                                   and abs(keys.LAT_FWT[nb_frame]) <= 68\
                                   and abs(keys.LON_FWT[nb_frame]) <= 68

                                #Creates a new frame and add it to the video
                                if (right_pic):
                                    current_frame = current_vid.create_group(
                                        'frame{}'.format(frame_counter))
                                    frame_counter += 1
                                    for k in range(len(self.ar_attrs)):
                                        current_frame.attrs[
                                            self.ar_attrs[k]] = keys[
                                                self.ar_attrs[k]][nb_frame]
                                    current_frame.attrs['SEGS'] = np.string_(
                                        list(self.ar_segs))
                                    data_shape = None  # unknown
                                    frame = None
                                    seg_counter = 0
                                    for seg in self.ar_segs:
                                        url = 'http://jsoc.stanford.edu' + segments[
                                            seg][nb_frame]
                                        data = np.array(fits.getdata(
                                            url, cache=False),
                                                        dtype=np.float32)
                                        if (data_shape is None):
                                            data_shape = data.shape
                                            frame = np.zeros(
                                                data_shape +
                                                (len(self.ar_segs), ),
                                                dtype=np.float32)
                                        frame[:, :, seg_counter] = data
                                        seg_counter += 1
                                        mem += data.nbytes
                                    current_frame.create_dataset('channels',
                                                                 data=frame)

                                    if (mem /
                                        (1024 * 1024) > 2 * self.mem_limit):
                                        print(
                                            'Memory usage > {}MB. Dumping...'.
                                            format(3 * self.mem_limit))
                                        dumping = True
                                nb_frame -= 1
                            if (frame_counter == 0):
                                #    del current_save_file['video{}'.format(vid_counter)]
                                #    vid_counter -=1
                                print('No frame downloaded, video erased.')
                            else:
                                print(
                                    'Video {} associated to event {} extracted ({} frames)'
                                    .format(vid_counter, event[peak],
                                            frame_counter))

                        except:
                            print(
                                'Impossible to extract data for event {0} (nb {1})'
                                .format(event[peak], counter))
                            print(traceback.format_exc())
                else:  # if the row pattern does not match
                    print('Row ignored: ' + str.join(',', event))

                if (int(counter * 100.0 / total_length) % 5 == 0):
                    print(
                        str(counter * 100.0 / total_length) +
                        '% of GOES data set analyzed')

                # Save the current HDF5 file. Reset vid_counter for the next HDF5 file.
                if (mem / (1024 * 1024) > self.mem_limit):
                    current_save_file.close()
                    part_counter += 1
                    vid_counter = 0
                    mem = 0
                    current_save_file = h5py.File(
                        '{}_part_{}.hdf5'.format(files_core_name,
                                                 part_counter), 'w')

        # After the downloading, close the last file !
        current_save_file.close()
        print('Data base has been downloaded successfully !')
        return True
示例#24
0
import numpy as n
#%matplotlib inline


series = 'hmi.sharp_cea_720s'
sharpnum = 377 #sharp number
segments = ['magnetogram', 'continuum']
kwlist = ['T_REC', 'LON_FWT', 'OBS_VR', 'CROTA2',
          'CRPIX1', 'CRPIX2', 'CDELT1', 'CDELT2', 'CRVAL1', 'CRVAL2']

c = drms.Client(email='*****@*****.**', verbose=True) ##Use your own email address.
k = c.query('%s[%d]' % (series, sharpnum), key=kwlist, rec_index=True)

rec_cm = k.LON_FWT.abs().idxmin()
k_cm = k.loc[rec_cm]
t_cm = drms.to_datetime(k.T_REC[rec_cm])
print(rec_cm, '@', k.LON_FWT[rec_cm], 'deg')
print('Timestamp:', t_cm)

t_cm_str = t_cm.strftime('%Y%m%d_%H%M%S_TAI')
fname_mask = '{series}.{sharpnum}.{tstr}.{segment}.fits'
fnames = {
    s: fname_mask.format(
        series=series, sharpnum=sharpnum, tstr=t_cm_str, segment=s)
    for s in segments}

download_segments = []
for k, v in fnames.items():
    if not os.path.exists(v):
        download_segments.append(k)
示例#25
0
        note_str = series_info_lev1.keywords.loc[key].note
    else:
        note_str = series_info.keywords.loc[key].note
    print(f'{key:>10} : {note_str}')

###############################################################################
# Construct the DRMS query string: "Series[timespan][wavelength]"

qstr = 'aia.lev1_euv_12s[2014-01-01T00:00:01Z/365d@1d][335]'

# Get keyword values for the selected timespan and wavelength
print(f'Querying keyword data...\n -> {qstr}')
result = client.query(qstr, key=keys)
print(f' -> {len(result)} lines retrieved.')

# Only use entries with QUALITY==0
result = result[result.QUALITY == 0]
print(f' -> {len(result)} lines after QUALITY selection.')

# Convert T_REC strings to datetime and use it as index for the series
result.index = drms.to_datetime(result.T_REC)

###############################################################################
# Create some simple plots

ax = result[['DATAMIN', 'DATAMAX', 'DATAMEAN', 'DATARMS',
             'DATASKEW']].plot(figsize=(8, 10), subplots=True)
ax[0].set_title(qstr, fontsize='medium')
plt.tight_layout()
plt.show()
示例#26
0
def test_corner_case(time_string, expected):
    assert pd.isnull(drms.to_datetime(time_string)) == expected
    assert isinstance(drms.to_datetime([]), pd.Series)
    assert drms.to_datetime([]).empty
示例#27
0
    def download_jsoc_data(
            self,
            files_core_name='jsoc_data',
            directory=None,
            goes_data_path=None,
            goes_row_pattern='(B|C|M|X)[1-9]\.[0-9],[1-9][0-9]*,.*,.*,.*,.*',
            start_time=None,
            end_time=None,
            nb_frames_before_event=24,
            sample_time=1,  # in hours
            limit=400):

        if (directory is None and
                not os.path.isdir(os.path.join(self.main_path, 'JSOC-Data'))):
            os.mkdir(os.path.join(self.main_path, 'JSOC-Data'))
            os.chdir(os.path.join(self.main_path, 'JSOC-Data'))
        elif (os.path.isdir(os.path.join(self.main_path, directory))):
            os.chdir(os.path.join(self.main_path, directory))
        else:
            print('The path {} does not exist.'.format(
                os.path.join(self.main_path, directory)))
            return False

        essential_ar_attrs = {
            'NOAA_AR', 'HARPNUM', 'LAT_FWT', 'LON_FWT', 'T_REC'
        }
        essential_goes_attrs = {
            'start_time', 'peak_time', 'noaa_active_region', 'event_class'
        }
        jsoc_serie = 'hmi.sharp_cea_720s[1-7256]'

        # Verifications of the path to GOES data and the format of the .csv
        if (goes_data_path is None):
            if (os.path.exists(os.path.join(self.main_path, 'GOES_data.csv'))):
                goes_data_path = os.path.join(self.main_path, 'GOES_data.csv')
            else:
                print('Please enter a valid path to the GOES data.')
                return False
        if (not os.path.exists(goes_data_path)):
            print('Please enter a valid path to the GOES data.')
            return False

        missing_goes_attrs = self._check_essential_attributes(
            set(self.goes_attrs), essential_goes_attrs)
        if (len(missing_goes_attrs) > 0):
            print('Missing attributes in GOES file : {}.'.format(
                missing_goes_attrs))
            return False

        [start, peak, noaa_ar] = [
            self.goes_attrs.index('start_time'),
            self.goes_attrs.index('peak_time'),
            self.goes_attrs.index('noaa_active_region')
        ]

        total_length = sum(1 for line in open(goes_data_path, 'r'))
        self.ar_attrs += self._check_essential_attributes(
            set(self.ar_attrs), essential_ar_attrs)

        # Estimation of the number of solar eruption videos considered.
        # Limit the number of videos if 'limit' is reached.
        nb_positive = 0
        considered_events = []
        with open(goes_data_path, 'r', newline='') as file:
            reader = csv.reader(file, delimiter=',')
            counter = 0
            for event in reader:
                counter += 1
                if (self._in_time_window(event[start], start_time, end_time)
                        and re.match(goes_row_pattern, str.join(',', event))
                        and int(event[noaa_ar]) > 0):
                    nb_positive += 1
                    considered_events += [counter]
        if (limit is not None and nb_positive > limit):
            events_really_considered = np.random.choice(
                considered_events, limit)

        # Summary
        print('Nb of videos to download: {}/{}'.format(nb_positive, counter))
        print('Look up of pictures until {}h before an event.'.format(
            sample_time * nb_frames_before_event))

        with open(goes_data_path, 'r', newline='') as file:
            reader = csv.reader(file, delimiter=',')
            client = drms.Client()
            mem = 0  # Set a counter for the current cache memory (in bytes) used by videos
            part_counter = 0
            vid_counter = 0
            counter = 0
            current_save_file = h5py.File(
                '{}_part_{}.hdf5'.format(files_core_name, part_counter), 'w')
            # Get the delta time for the look up in the JSOC data base (with a marge)
            dt = timedelta(hours=sample_time * (nb_frames_before_event + 1))
            # Change the sampling rate format
            sample_time = '@{}h'.format(sample_time)

            for event in reader:
                counter += 1
                if (re.match(goes_row_pattern, str.join(',', event))
                        and (limit is None or nb_positive <= limit or
                             (counter in events_really_considered))
                        and self._in_time_window(event[start], start_time,
                                                 end_time)):
                    ar_nb = int(event[noaa_ar])
                    # We process only numbered flares
                    if (ar_nb > 0):
                        peak_time = drms.to_datetime(event[peak])
                        start_time = peak_time - dt
                        # Change the date format
                        peak_time = self._UTC2JSOC_time(str(peak_time))
                        start_time = self._UTC2JSOC_time(str(start_time))

                        # Do the request to JSOC database
                        query = '{}[{}-{}{}]'.format(jsoc_serie, start_time,
                                                     peak_time, sample_time)
                        if (len(self.ar_segs) == 0):
                            keys = client.query(query, key=self.ar_attrs)
                        else:
                            keys, segments = client.query(query,
                                                          key=self.ar_attrs,
                                                          seg=self.ar_segs)
                        try:
                            # Get only the frames that are:
                            # * related to our AR (same NOAA)
                            # * within +/- 68deg from the central meridian
                            # * before the peak time
                            frames_keys = self._get_frames_key_from_query(
                                ar_nb, peak_time, keys)

                            # Do not download videos with missing data
                            if (len(frames_keys) < nb_frames_before_event):
                                print(
                                    'Only {} (< {}) frames found for the SF produced on {}. Ignored.'
                                    .format(len(frames_keys),
                                            nb_frames_before_event,
                                            event[peak]))
                            else:
                                current_vid = current_save_file.create_group(
                                    'video{}'.format(vid_counter))
                                vid_counter += 1
                                for k in range(len(self.goes_attrs)):
                                    current_vid.attrs[
                                        self.goes_attrs[k]] = event[k]
                                if (len(frames_keys) > nb_frames_before_event):
                                    print(
                                        '{} frames are found for the SF produced on {}, only the last {} are considered'
                                        .format(len(frames_keys), event[peak],
                                                nb_frames_before_event))
                                    frames_keys = frames_keys[
                                        len(frames_keys) -
                                        nb_frames_before_event:]
                                # We download each video with the LAST frame corresponding to the eruption
                                for i in range(nb_frames_before_event):
                                    current_frame = current_vid.create_group(
                                        'frame{}'.format(i))
                                    # Includes the specific attributes to the frame
                                    current_frame.attrs['SEGS'] = np.string_(
                                        list(self.ar_segs))
                                    for a in self.ar_attrs:
                                        current_frame.attrs[a] = keys[a][
                                            frames_keys[i]]

                                    # Downloads the specific segments
                                    data_frame = []
                                    for seg in self.ar_segs:
                                        url = 'http://jsoc.stanford.edu' + segments[
                                            seg][frames_keys[i]]
                                        data = np.array(fits.getdata(
                                            url, cache=False),
                                                        dtype=np.float32)
                                        data_frame += [data]
                                        mem += data.nbytes
                                    data_frame = np.array(data_frame,
                                                          dtype=np.float32)
                                    # Creates the actual data set in the hdf5 file
                                    current_frame.create_dataset(
                                        'channels', data=data_frame)

                        except:
                            print('Impossible to extract data for event {0}.'.
                                  format(event[peak]))
                            print(traceback.format_exc())
                else:  # if the row pattern does not match
                    print('Row ignored: ' + str.join(',', event))

                if (counter % 20 == 0):
                    print('{:0.2f}% of GOES data set analyzed'.format(
                        counter * 100.0 / total_length))

                # Save the current HDF5 file. Reset vid_counter for the next HDF5 file.
                if (mem / (1024 * 1024) > self.mem_limit):
                    current_save_file.close()
                    part_counter += 1
                    vid_counter = 0
                    mem = 0
                    current_save_file = h5py.File(
                        '{}_part_{}.hdf5'.format(files_core_name,
                                                 part_counter), 'w')

        # After the downloading, close the last file !
        current_save_file.close()
        print('The data base has been downloaded successfully !')
        return True
示例#28
0
    def _extract_timeseries_from_video(vid,
                                       scalars,
                                       channels,
                                       time_event_last_frame=True):
        res = [[] for k in range(len(scalars))]
        sample_time = []
        tf = drms.to_datetime(vid.attrs['end_time'])
        last_frame = None
        for frame_key in sorted(list(vid.keys()),
                                key=lambda frame_key: float(frame_key[5:])):
            if ('channels' in vid[frame_key].keys()
                    and len(vid[frame_key]['channels'].shape) == 3):
                ti = drms.to_datetime(vid[frame_key].attrs['T_REC'])
                sample_time += [(tf - ti).total_seconds() / 60]
                i = 0
                for scalar in scalars:
                    if (scalar == 'l1_err' or scalar == 'TV'):
                        l1_err = 0
                        TV = 0
                        try:
                            this_frame = Data_Gen._extract_frame(
                                vid[frame_key]['channels'],
                                vid[frame_key].attrs['SEGS'], channels)
                            if (last_frame is None):
                                last_frame = this_frame
                            for c in range(last_frame.shape[2]):
                                if (channels is None
                                        or vid[frame_key].attrs['SEGS']
                                    [c].decode() in channels):
                                    if (scalar == 'l1_err'):
                                        l1_err += np.sum(
                                            np.abs(
                                                sk.resize(this_frame[:, :, c],
                                                          last_frame.shape[:2],
                                                          preserve_range=True)
                                                - last_frame[:, :, c]))
                                    else:
                                        # only valid total variation
                                        TV += np.sum(
                                            np.sqrt(
                                                np.square(
                                                    np.diff(this_frame[:, :,
                                                                       c],
                                                            axis=0)[:, 1:])) +
                                            np.square(
                                                np.diff(this_frame[:, :, c],
                                                        axis=1)[1:, :]))
                            if (channels is None):
                                nb_channels = last_frame.shape[2]
                            else:
                                nb_channels = len(channels)
                            normalization = np.product(
                                last_frame.shape[0:2]) * nb_channels
                            if (scalar == 'TV'):
                                res[i] += [TV / normalization]
                            else:
                                res[i] += [l1_err / normalization]
                        except:
                            sample_time = sample_time[:-1]
                            print('Frame {} not extracted.'.format(frame_key))
                            print(traceback.format_exc())
                    else:
                        res[i] += [vid[frame_key].attrs[scalar]]
                    i += 1
                last_frame = this_frame

        if (time_event_last_frame):
            return np.flip(np.array(res),
                           axis=1), np.flip(np.array(sample_time), axis=0)
        return np.array(res), np.array(sample_time)
示例#29
0
    def _download_images(self, fits_directory: str, records: List[Tuple[str,
                                                                        str]]):
        fits_directory = os.path.join(fits_directory, "_fits_temp")
        os.makedirs(fits_directory, exist_ok=True)

        logger.debug(
            f'Downloading {len(records)} FITS files into {fits_directory}...')

        for record, url, extra_keys in records:
            record_match = self.RECORD_PARSE_REGEX.match(record)

            if record_match is None:
                raise Exception(f"Invalid record format '{record}'")

            record_date_raw, record_wavelength = record_match.groups()

            record_date = dt.datetime.strptime(
                record_date_raw, self.RECORD_DATE_FORMAT_HMI
                if len(record_wavelength) == 1 else self.RECORD_DATE_FORMAT)

            if len(record_wavelength) == 1:
                record_wavelength = self.HMI_PARSE_REGEX.match(
                    record).groups()[0]

            output_file_name = f"{record_date:%Y-%m-%dT%H%M%S}_{record_wavelength}.fits"
            fp = os.path.join(fits_directory, output_file_name)
            if not os.path.isfile(
                    fp):  #TODO: Check for corruption, incomplete files
                retries = 0
                while True:
                    try:
                        urllib.request.urlretrieve(url, fp)
                    except Exception as e:
                        retries += 1
                        if retries % 100 == 0:
                            logger.info(
                                f'Failed fetching FITS %s after {retries} retries: %s',
                                url, e)
                            if isinstance(e, URLError) and isinstance(
                                    e.reason, ConnectionRefusedError):
                                logger.info('waiting for a while longer...')
                            else:
                                break
                        time.sleep(0.5)
                    else:
                        logger.info(f'{retries} retries')
                        # extend HMI Fits with extra keys
                        if extra_keys is not None:
                            try:
                                data, header = fits.getdata(fp, header=True)
                                if header['BITPIX'] == -32 or header[
                                        'BITPIX'] == -64:
                                    del header[
                                        'BLANK']  # https://github.com/astropy/astropy/issues/7253
                                for k in extra_keys.iteritems():
                                    if k[1] == 'Invalid KeyLink':
                                        logger.warning(
                                            f'Invalid KeyLink for {k[0]}, {fp}'
                                        )
                                        continue
                                    if k[0].upper() not in self.DATE_KEYS:
                                        header[k[0]] = k[1]
                                    else:
                                        pdt = drms.to_datetime(
                                            k[1]).to_pydatetime()
                                        if pdt is not pd.NaT:
                                            header[k[0]] = pdt.strftime(
                                                "%Y-%m-%dT%H%M%S")
                                fits.writeto(fp, data, header, overwrite=True)
                            except Exception as e:
                                logger.error(
                                    f"Unable to extend HMI file {fp}, removing & skipping... {e}"
                                )
                                try:
                                    os.remove(fp)
                                except Exception as e2:
                                    logger.error(
                                        f'Was unable to delete file {fp}, {e2}'
                                    )
                                continue

                        break
            else:
                logger.debug(f'Already found {fp}')

        logger.debug("Downloaded %d files to %s", len(records), fits_directory)
示例#30
0
# entries from aia.lev1 in this case.
print('Querying series info...')
si = c.info(series)
si_lev1 = c.info(series_lev1)
for k in keys:
    linkinfo = si.keywords.loc[k].linkinfo
    if linkinfo is not None and linkinfo.startswith('lev1->'):
        note_str = si_lev1.keywords.loc[k].note
    else:
        note_str = si.keywords.loc[k].note
    print('%10s : %s' % (k, note_str))

# Get keyword values for the selected timespan and wavelength
print('Querying keyword data...\n -> %s' % qstr)
res = c.query(qstr, key=keys)
print(' -> %d lines retrieved.' % len(res))

# Only use entries with QUALITY==0
res = res[res.QUALITY == 0]
print(' -> %d lines after QUALITY selection.' % len(res))

# Convert T_REC strings to datetime and use it as index for the series
res.index = drms.to_datetime(res.T_REC)

# Create some simple plots
ax = res[['DATAMIN', 'DATAMAX', 'DATAMEAN', 'DATARMS',
          'DATASKEW']].plot(figsize=(8, 10), subplots=True)
ax[0].set_title(qstr, fontsize='medium')
plt.tight_layout()
plt.show()