Пример #1
0
def main():
    """
    """
    file_base = '/store/msrad/radar/pyrad_products/rad4alp_hydro_PHA/'
    time_dir_list = ['2017-06-29']
    trt_cell_id = '2017062913000174'

    datatype_list = ['dBZc', 'ZDRc', 'RhoHVc', 'KDPc', 'TEMP', 'hydro']
    dataset_list = [
        'reflectivity', 'ZDRc', 'RhoHVc', 'KDPc', 'temperature', 'hydroclass'
    ]

    print("====== Plot time-hist started: %s" %
          datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
    atexit.register(_print_end_msg, "====== Plot time-hist finished: ")

    for time_dir in time_dir_list:
        for i, datatype in enumerate(datatype_list):
            dataset = dataset_list[i]
            file_path = file_base + time_dir + '/' + dataset + '_trt_traj/HISTOGRAM/'
            flist = glob.glob(file_path + '*_' + trt_cell_id +
                              '_histogram_*_' + datatype + '.csv')

            if not flist:
                warn('No histogram files found in ' + file_path +
                     ' for TRT cell ' + trt_cell_id)
                continue

            tbin_edges, bin_edges, data_ma = read_histogram_ts(flist, datatype)

            basepath_out = os.path.dirname(flist[0])
            fname = (basepath_out + '/' + trt_cell_id + '_trt_HISTOGRAM_' +
                     datatype + '.png')
            field_name = get_fieldname_pyart(datatype)
            field_dict = get_metadata(field_name)
            titl = 'TRT cell ' + trt_cell_id + '\n' + get_field_name(
                field_dict, field_name)

            _plot_time_range(tbin_edges,
                             bin_edges,
                             data_ma,
                             'frequency_of_occurrence', [fname],
                             titl=titl,
                             ylabel=get_colobar_label(field_dict, field_name),
                             vmin=0.,
                             vmax=np.max(data_ma),
                             figsize=[10, 8],
                             dpi=72)

            print("----- plot to '%s'" % fname)
Пример #2
0
def main():
    """
    """

    # parse the arguments
    parser = argparse.ArgumentParser(
        description='Entry to Pyrad processing framework')

    # positional arguments
    parser.add_argument('proc_cfgfile',
                        type=str,
                        help='name of main configuration file')

    parser.add_argument('days',
                        nargs='+',
                        type=str,
                        help='Dates to process. Format YYYY-MM-DD')

    # keyword arguments
    parser.add_argument('--trtbase',
                        type=str,
                        default='/store/msrad/radar/trt/',
                        help='name of folder containing the TRT cell data')

    parser.add_argument(
        '--radarbase',
        type=str,
        default='/store/msrad/radar/pyrad_products/rad4alp_hydro_PHA/',
        help='name of folder containing the radar data')

    parser.add_argument('--cfgpath',
                        type=str,
                        default=os.path.expanduser('~') +
                        '/pyrad/config/processing/',
                        help='configuration file path')

    parser.add_argument(
        '--datatypes',
        type=str,
        default='hydro,KDPc,dBZc,RhoHVc,TEMP,ZDRc',
        help='Name of the polarimetric moments to process. Coma separated')

    parser.add_argument(
        '--datasets',
        type=str,
        default='hydroclass,KDPc,reflectivity,RhoHVc,temperature,ZDRc',
        help='Name of the directory containing the datasets')

    parser.add_argument('--hres',
                        type=float,
                        default=250.,
                        help='Height resolution')

    args = parser.parse_args()

    print("====== PYRAD TRT data processing started: %s" %
          datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
    atexit.register(_print_end_msg,
                    "====== PYRAD TRT data processing finished: ")

    print('config path: ' + args.cfgpath)
    print('config file: ' + args.proc_cfgfile)
    print('trt path: ' + args.trtbase)
    print('radar data path: ' + args.radarbase)

    cfgfile_proc = args.cfgpath + args.proc_cfgfile
    trajtype = 'trt'

    time_dir_list = args.days
    datatype_list = args.datatypes.split(',')
    dataset_list = args.datasets.split(',')

    if np.size(datatype_list) != np.size(dataset_list):
        warn(
            str(np.size(datatype_list)) + ' datatypes but ' +
            str(np.size(dataset_list)) +
            ' dataset directories. Their number must be equal')
        return

    # Find all TRT files in directory
    trt_list = []
    for time_dir in time_dir_list:
        trt_list.extend(
            glob.glob(args.trtbase + time_dir + '/TRTC_cell_plots/All/*.trt'))
        trt_list.extend(
            glob.glob(args.trtbase + time_dir + '/TRTC_cell_plots/Some/*.trt'))

    # Pyrad data processing
    trt_cell_id_list = []
    trt_file_list = []
    for fname in trt_list:
        print('processing TRT cell file ' + fname)
        try:
            infostr = os.path.basename(fname).split('.')[0]
            pyrad_main(cfgfile_proc,
                       trajfile=fname,
                       infostr=infostr,
                       trajtype=trajtype)
            trt_cell_id_list.append(infostr)
            trt_file_list.append(fname)
        except ValueError:
            print(ValueError)

    # plot time series and get altitude of graupel column
    if 'hydro' in datatype_list:
        cell_ID_list = np.asarray([], dtype=int)
        time_list = np.asarray([], dtype=datetime.datetime)
        lon_list = np.asarray([], dtype=float)
        lat_list = np.asarray([], dtype=float)
        area_list = np.asarray([], dtype=float)
        rank_list = np.asarray([], dtype=float)
        rm_hmin_list = np.ma.asarray([], dtype=float)
        rm_hmax_list = np.ma.asarray([], dtype=float)

    for i, trt_cell_id in enumerate(trt_cell_id_list):
        print('\n\nPost-processing cell: ' + trt_cell_id)
        dt_str = trt_cell_id[0:12]
        dt_cell = datetime.datetime.strptime(dt_str, "%Y%m%d%H%M")
        time_dir = dt_cell.strftime("%Y-%m-%d")
        for j, datatype in enumerate(datatype_list):
            dataset = dataset_list[j]
            file_base2 = args.radarbase + time_dir + '/' + dataset + '_trt_traj/'

            field_name = get_fieldname_pyart(datatype)
            field_dict = get_metadata(field_name)
            titl = 'TRT cell ' + trt_cell_id + '\n' + get_field_name(
                field_dict, field_name)

            # plot time-height
            flist = glob.glob(file_base2 + 'PROFILE/*_' + trt_cell_id +
                              '_rhi_profile_*_' + datatype + '_hres' +
                              str(int(args.hres)) + '.csv')
            if not flist:
                warn('No profile files found in ' + file_base2 +
                     'PROFILE/ for TRT cell ' + trt_cell_id +
                     ' with resolution ' + str(args.hres))
            else:
                labels = [
                    '50.0-percentile', '25.0-percentile', '75.0-percentile'
                ]
                if datatype == 'RhoHVc':
                    labels = [
                        '80.0-percentile', '65.0-percentile', '95.0-percentile'
                    ]
                elif datatype == 'hydro':
                    labels = [
                        'Mode', '2nd most common', '3rd most common',
                        '% points mode', '% points 2nd most common',
                        '% points 3rd most common'
                    ]
                elif datatype == 'entropy' or 'prop' in datatype:
                    labels = ['Mean', 'Min', 'Max']

                tbin_edges, hbin_edges, _, data_ma, start_time = (
                    read_profile_ts(flist, labels, hres=args.hres))

                basepath_out = os.path.dirname(flist[0])
                fname = (basepath_out + '/' + trt_cell_id +
                         '_trt_TIME_HEIGHT_' + datatype + '_hres' +
                         str(args.hres) + '.png')

                vmin = vmax = None
                if datatype == 'RhoHVc':
                    vmin = 0.95
                    vmax = 1.00

                xlabel = ('time (s from ' +
                          start_time.strftime("%Y-%m-%d %H:%M:%S") + ')')
                _plot_time_range(tbin_edges,
                                 hbin_edges,
                                 data_ma,
                                 field_name, [fname],
                                 titl=titl,
                                 xlabel=xlabel,
                                 ylabel='height (m MSL)',
                                 figsize=[10, 8],
                                 vmin=vmin,
                                 vmax=vmax,
                                 dpi=72)

                print("----- plot to '%s'" % fname)

                # Get min and max altitude of graupel/hail area
                if datatype == 'hydro':
                    (traj_ID, yyyymmddHHMM, lon, lat, _, _, _, area, _, _, _,
                     RANKr, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _,
                     _) = read_trt_traj_data(trt_file_list[i])

                    hmin, hmax = get_graupel_column(tbin_edges, hbin_edges,
                                                    data_ma, start_time,
                                                    yyyymmddHHMM)

                    cell_ID_list = np.append(cell_ID_list, traj_ID)
                    time_list = np.append(time_list, yyyymmddHHMM)
                    lon_list = np.append(lon_list, lon)
                    lat_list = np.append(lat_list, lat)
                    area_list = np.append(area_list, area)
                    rank_list = np.append(rank_list, RANKr)
                    rm_hmin_list = np.ma.append(rm_hmin_list, hmin)
                    rm_hmax_list = np.ma.append(rm_hmax_list, hmax)

            # plot time-hist
            flist = glob.glob(file_base2 + 'HISTOGRAM/*_' + trt_cell_id +
                              '_histogram_*_' + datatype + '.csv')

            if not flist:
                warn('No histogram files found in ' + file_base2 +
                     'HISTOGRAM/ for TRT cell ' + trt_cell_id)
            else:
                tbin_edges, bin_edges, data_ma, start_time = read_histogram_ts(
                    flist, datatype)

                basepath_out = os.path.dirname(flist[0])
                fname = (basepath_out + '/' + trt_cell_id + '_trt_HISTOGRAM_' +
                         datatype + '.png')

                data_ma[data_ma == 0.] = np.ma.masked
                xlabel = ('time (s from ' +
                          start_time.strftime("%Y-%m-%d %H:%M:%S") + ')')
                _plot_time_range(tbin_edges,
                                 bin_edges,
                                 data_ma,
                                 'frequency_of_occurrence', [fname],
                                 titl=titl,
                                 xlabel=xlabel,
                                 ylabel=get_colobar_label(
                                     field_dict, field_name),
                                 vmin=0.,
                                 vmax=np.max(data_ma),
                                 figsize=[10, 8],
                                 dpi=72)

                print("----- plot to '%s'" % fname)

            # plot quantiles
            flist = glob.glob(file_base2 + 'QUANTILES/*_' + trt_cell_id +
                              '_quantiles_*_' + datatype + '.csv')

            if not flist:
                warn('No quantiles files found in ' + file_base2 +
                     'QUANTILES/ for TRT cell ' + trt_cell_id)
                continue

            tbin_edges, qbin_edges, data_ma, start_time = read_quantiles_ts(
                flist, step=5., qmin=0., qmax=100.)

            basepath_out = os.path.dirname(flist[0])
            fname = (basepath_out + '/' + trt_cell_id + '_trt_QUANTILES_' +
                     datatype + '.png')

            vmin = vmax = None
            if datatype == 'RhoHVc':
                vmin = 0.95
                vmax = 1.00
            xlabel = ('time (s from ' +
                      start_time.strftime("%Y-%m-%d %H:%M:%S") + ')')
            _plot_time_range(tbin_edges,
                             qbin_edges,
                             data_ma,
                             field_name, [fname],
                             titl=titl,
                             xlabel=xlabel,
                             ylabel='Quantile',
                             vmin=vmin,
                             vmax=vmax,
                             figsize=[10, 8],
                             dpi=72)

            print("----- plot to '%s'" % fname)

    if 'hydro' in datatype_list:
        fname = args.trtbase + 'cell_rimmed_particles_column.csv'
        write_trt_cell_lightning(cell_ID_list, time_list, lon_list, lat_list,
                                 area_list, rank_list, rm_hmin_list,
                                 rm_hmax_list, fname)

        print("----- written to '%s'" % fname)
Пример #3
0
def main():
    """
    """

    # parse the arguments
    parser = argparse.ArgumentParser(
        description='Entry to Pyrad processing framework')

    # keyword arguments
    parser.add_argument('--database',
                        type=str,
                        default='/store/msrad/radar/pyrad_products/',
                        help='base path to the radar data')

    parser.add_argument(
        '--datadirs',
        type=str,
        default=(
            'mals_sha_windmills_point_psr_filtered_WM1_20200304-20200311,'
            'mals_sha_windmills_point_psr_filtered_WM1_20200312-20200315,'
            'mals_sha_windmills_point_psr_filtered_WM1_20200316-20200320,'
            'mals_sha_windmills_point_psr_filtered_WM1_20200321-20200325'),
        help='directories containing data')

    parser.add_argument(
        '--datatypes',
        type=str,
        default='dBuZ,dBuZv,rcs_h,rcs_v,ZDRu,RhoHVu,uPhiDPu,Vu,Wu',
        help='Data types. Coma separated')

    args = parser.parse_args()

    print("====== PYRAD windmill data processing started: %s" %
          datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
    atexit.register(_print_end_msg,
                    "====== PYRAD windmill data processing finished: ")

    datadirs = args.datadirs.split(',')
    datatypes = args.datatypes.split(',')

    # Read periods of processing
    for datatype in datatypes:
        first_read = False
        for datadir in datadirs:
            # Read data time series files
            flist = glob.glob(args.database + datadir + '/' + datatype +
                              '_TS/TS/ts_POINT_MEASUREMENT_hist_' + datatype +
                              '.csv')
            if not flist:
                continue

            hist_aux, bin_edges_aux = read_histogram(flist[0])
            if not first_read:
                hist = hist_aux
                bin_edges = bin_edges_aux
                first_read = True
                continue

            hist += hist_aux

        basepath = os.path.dirname(flist[0]) + '/'

        # Histogram plots
        field_name = get_fieldname_pyart(datatype)
        field_dict = get_metadata(field_name)

        fname = args.database + 'ts_POINT_MEASUREMENT_hist_' + datatype + '.png'

        bin_centers = bin_edges[:-1] + ((bin_edges[1] - bin_edges[0]) / 2.)
        fname = plot_histogram2(bin_centers,
                                hist, [fname],
                                labelx=get_colobar_label(
                                    field_dict, field_name),
                                titl=datatype)
        print('Plotted ' + ' '.join(fname))

        fname = args.database + 'ts_POINT_MEASUREMENT_hist_' + datatype + '.csv'
        fname = write_histogram(bin_edges, hist, fname)
        print('Written ' + fname)
Пример #4
0
def main():
    """
    """

    # parse the arguments
    parser = argparse.ArgumentParser(
        description='Entry to Pyrad processing framework')

    # keyword arguments
    parser.add_argument('--database',
                        type=str,
                        default='/store/msrad/radar/pyrad_products/',
                        help='base path to the radar data')

    parser.add_argument(
        '--datadirs',
        type=str,
        default=(
            'mals_sha_windmills_point_psr_filtered_WM1_20200304-20200311,'
            'mals_sha_windmills_point_psr_filtered_WM1_20200312-20200315,'
            'mals_sha_windmills_point_psr_filtered_WM1_20200316-20200320,'
            'mals_sha_windmills_point_psr_filtered_WM1_20200321-20200325'),
        help='directories containing data')

    parser.add_argument(
        '--datatypes',
        type=str,
        default='dBuZ,dBuZv,rcs_h,rcs_v,uPhiDPu,RhoHVu,ZDRu,Vu,Wu',
        help='Data types. Coma separated')

    parser.add_argument(
        '--orientations',
        type=str,
        default=
        '0,10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200,210,220,230,240,250,260,270,280,290,300,310,320,330,340,350',
        help='Orientation respect to radar')

    parser.add_argument('--span', type=float, default=10., help='Span')

    parser.add_argument('--vel_limit',
                        type=float,
                        default=0.,
                        help='Velocity limit')

    args = parser.parse_args()

    print("====== PYRAD windmill data processing started: %s" %
          datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
    atexit.register(_print_end_msg,
                    "====== PYRAD windmill data processing finished: ")

    datadirs = args.datadirs.split(',')
    datatypes = args.datatypes.split(',')

    orientations = np.asarray(args.orientations.split(','), dtype=float)
    speeds = [
        'speed_GT' + str(args.vel_limit), 'speed_LE' + str(args.vel_limit)
    ]

    scan_type = 'ppi'

    for ori in orientations:
        for speed in speeds:
            for datatype in datatypes:
                first_read = False
                for datadir in datadirs:
                    # Read data time series files
                    flist = glob.glob(args.database + datadir + '/' +
                                      datatype + '_TS/TS/' + datatype +
                                      '_span' + str(args.span) + '_ori' +
                                      str(ori) + '_' + speed + '_hist.csv')

                    if not flist:
                        continue

                    hist_aux, bin_edges_aux = read_histogram(flist[0])
                    if not first_read:
                        hist = hist_aux
                        bin_edges = bin_edges_aux
                        first_read = True
                        continue

                    hist += hist_aux

                if not first_read:
                    warn('No files for orientation ' + str(ori) + ' and ' +
                         speed)
                    continue

                # Histogram plots
                field_name = get_fieldname_pyart(datatype)
                field_dict = get_metadata(field_name)

                fname = (args.database + datatype + '_span' + str(args.span) +
                         '_ori' + str(ori) + '_' + speed + '_hist.png')

                titl = (datatype + ' span ' + str(args.span) + ' ori ' +
                        str(ori) + ' ' + speed)

                bin_centers = bin_edges[:-1] + (
                    (bin_edges[1] - bin_edges[0]) / 2.)
                fname = plot_histogram2(bin_centers,
                                        hist, [fname],
                                        labelx=get_colobar_label(
                                            field_dict, field_name),
                                        titl=titl)
                print('Plotted ' + ' '.join(fname))

                fname = (args.database + datatype + '_span' + str(args.span) +
                         '_ori' + str(ori) + '_' + speed + '_hist.csv')
                fname = write_histogram(bin_edges, hist, fname)
                print('Written ' + fname)
Пример #5
0
def main():
    """
    """
    # parse the arguments
    parser = argparse.ArgumentParser(
        description='Entry to Pyrad processing framework')

    # positional arguments
    parser.add_argument('days',
                        nargs='+',
                        type=str,
                        help='Dates to process. Format YYYYMMDD')

    # keyword arguments
    parser.add_argument(
        '--basepath',
        type=str,
        default='/store/msrad/radar/pyrad_products/rad4alp_hydro_PHA/',
        help='name of folder containing the radar data')

    parser.add_argument(
        '--datatypes',
        type=str,
        default='hydro,KDPc,dBZc,RhoHVc,TEMP,ZDRc',
        help='Name of the polarimetric moments to process. Coma separated')

    parser.add_argument('--steps',
                        type=str,
                        default='None,0.05,0.5,0.001,1.,0.1',
                        help='Step of the histogram for each data type')

    args = parser.parse_args()

    print("====== LMA trajectories radar data processing started: %s" %
          datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
    atexit.register(
        _print_end_msg,
        "====== LMA trajectories radar data processing finished: ")

    day_vec = []
    for day in args.days:
        day_vec.append(datetime.datetime.strptime(day, '%Y%m%d'))

    datatype_vec = args.datatypes.split(',')
    steps = args.steps.split(',')

    if np.size(datatype_vec) != np.size(steps):
        warn(
            str(np.size(datatype_vec)) + ' datatypes but ' +
            str(np.size(steps)) + ' steps. Their number must be equal')
        return

    step_list = []
    for step in steps:
        if step == 'None':
            step_list.append(None)
        else:
            step_list.append(float(step))

    for j, datatype in enumerate(datatype_vec):
        step = step_list[j]

        field_name = get_fieldname_pyart(datatype)
        field_dict = get_metadata(field_name)

        labelx = get_colobar_label(field_dict, field_name)

        values_list = []
        values_first_list = []
        flash_cnt = 0
        source_cnt = 0
        for day in day_vec:
            day_dir = day.strftime('%Y-%m-%d')
            day_str = day.strftime('%Y%m%d')

            fname_test = (args.basepath + day_dir + '/*_traj/AT_FLASH/' +
                          day_str + '*_allflash_ts_trajlightning_' + datatype +
                          '.csv')
            fname_list = glob.glob(fname_test)
            if not fname_list:
                warn('No file found in ' + fname_test)
                continue

            fname = fname_list[0]

            basepath_out = os.path.dirname(fname)
            fname_first_source = (basepath_out + '/' + day_str +
                                  '_firstsource_ts_trajlightning_' + datatype +
                                  '.png')

            fname_all_sources = (basepath_out + '/' + day_str +
                                 '_allsources_ts_trajlightning_' + datatype +
                                 '.png')

            print('\nReading file ' + fname)
            time_flash, flashnr, _, val_at_flash, _, _, _, _ = (
                read_lightning_traj(fname))

            print('N sources: ' + str(flashnr.size))
            source_cnt += flashnr.size

            # Plot all sources histogram
            bins, values = compute_histogram(val_at_flash,
                                             field_name,
                                             step=step)
            print('Valid values: ' + str(values.size))

            values_list.extend(values)

            plot_histogram(bins,
                           values, [fname_all_sources],
                           labelx=labelx,
                           titl=("Trajectory Histogram %s" %
                                 time_flash[0].strftime("%Y-%m-%d")))

            print("----- plot to '%s'" % fname_all_sources)

            # Get and plot first sources histogram
            flashnr_first, unique_ind = np.unique(flashnr, return_index=True)

            print('N flashes: ' + str(flashnr_first.size))
            flash_cnt += flashnr_first.size

            val_first = val_at_flash[unique_ind]
            time_flash_first = time_flash[unique_ind]

            bins, values = compute_histogram(val_first, field_name, step=step)

            values_first_list.extend(values)

            plot_histogram(bins,
                           values, [fname_first_source],
                           labelx=labelx,
                           titl=("Trajectory Histogram First Source %s" %
                                 time_flash_first[0].strftime("%Y-%m-%d")))

            print("----- plot to '%s'" % fname_first_source)

        print('N sources total: ' + str(source_cnt))
        print('N flashes total: ' + str(flash_cnt))

        values_list = np.asarray(values_list)
        values_first_list = np.asarray(values_first_list)

        print('Valid values total: ' + str(values_list.size))
        print('Valid flashes total: ' + str(values_first_list.size))

        # Plot all sources histogram
        fname_all_sources = (args.basepath + '/allsources_ts_trajlightning_' +
                             datatype + '.png')
        plot_histogram(bins,
                       values_list, [fname_all_sources],
                       labelx=labelx,
                       titl="Trajectory Histogram All Sources")

        print("----- plot to '%s'" % fname_all_sources)

        # store histogram
        fname_all_sources = (args.basepath + 'allsources_ts_trajlightning_' +
                             datatype + '.csv')
        hist_values, _ = np.histogram(values_list, bins=bins)
        write_histogram(bins, hist_values, fname_all_sources)
        print('Written ' + ' '.join(fname_all_sources))

        # Plot first source histogram
        fname_first_source = (args.basepath + 'firstsource_ts_trajlightning_' +
                              datatype + '.png')
        plot_histogram(bins,
                       values_first_list, [fname_first_source],
                       labelx=labelx,
                       titl="Trajectory Histogram First Source")

        print("----- plot to '%s'" % fname_first_source)

        # store histogram
        fname_first_source = (args.basepath + 'firstsource_ts_trajlightning_' +
                              datatype + '.csv')
        hist_values_first, _ = np.histogram(values_first_list, bins=bins)
        write_histogram(bins, hist_values_first, fname_first_source)
        print('Written ' + ' '.join(fname_all_sources))
Пример #6
0
def main():
    """
    """
    # basepath = '/data/pyrad_products/rad4alp_hydro_PHA/'
    basepath = '/store/msrad/radar/pyrad_products/rad4alp_hydro_PHA/'
    day_vec = [
        datetime.datetime(2017, 6, 29),
        datetime.datetime(2017, 6, 30),
        datetime.datetime(2017, 7, 10),
        datetime.datetime(2017, 7, 14),
        datetime.datetime(2017, 7, 18),
        datetime.datetime(2017, 7, 19),
        datetime.datetime(2017, 7, 30),
        datetime.datetime(2017, 8, 1)
    ]

    #    day_vec = [
    #        datetime.datetime(2017, 7, 14)]

    basename = 'Santis_data_entropy_CGpn'
    filt_type = 'keep_all'
    nsources_min = 10

    if 'entropy' in basename:
        pol_vals_labels = [
            'hydro', 'entropy', 'propAG', 'propCR', 'propIH', 'propLR',
            'propMH', 'propRN', 'propRP', 'propVI', 'propWS'
        ]

        datatype_vec = [
            'hydro', 'entropy', 'propAG', 'propCR', 'propIH', 'propLR',
            'propMH', 'propRN', 'propRP', 'propVI', 'propWS'
        ]

        step_list = [None, 0.1, 1., 1., 1., 1., 1., 1., 1., 1., 1.]
    else:
        pol_vals_labels = [
            'hydro [-]', 'KDPc [deg/Km]', 'dBZc [dBZ]', 'RhoHVc [-]',
            'TEMP [deg C]', 'ZDRc [dB]'
        ]

        datatype_vec = ['hydro', 'KDPc', 'dBZc', 'RhoHVc', 'TEMP', 'ZDRc']

        step_list = [None, 0.05, 0.5, 0.001, 1., 0.1]

    for label in pol_vals_labels:
        if 'hydro' in label:
            hydro_label = label
            break

    print("====== Lightning post-processing started: %s" %
          datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
    atexit.register(_print_end_msg,
                    "====== Lightning post-processing finished: ")

    # read all the data to analyze
    flashnr, time_data, time_in_flash, lat, lon, alt, dBm, pol_vals_dict = (
        read_data(basepath,
                  day_vec,
                  basename=basename,
                  pol_vals_labels=pol_vals_labels))

    #    flashnr, time_data, time_in_flash, lat, lon, alt, dBm, pol_vals_dict = (
    #        read_data_two_sources(
    #            basepath, day_vec, basename1='Santis_data_entropy',
    #            basename2='Santis_data_entropy_CGt', basename_out='Santis_data_entropy_no_CG',
    #            keep_common=False,
    #            pol_vals_labels=pol_vals_labels))

    # Get indices of data to keep
    if filt_type == 'keep_all':
        ind, data_ID, subtitl = get_indices_all_data(flashnr,
                                                     nsources_min=nsources_min)
    elif filt_type == 'keep_solid':
        ind, data_ID, subtitl = get_indices_solid_phase(
            flashnr, pol_vals_dict[hydro_label], nsources_min=nsources_min)
    elif filt_type == 'keep_liquid':
        ind, data_ID, subtitl = get_indices_liquid_phase(
            flashnr, pol_vals_dict[hydro_label], nsources_min=nsources_min)
    elif filt_type == 'keep_liquid_origin':
        ind, data_ID, subtitl = get_indices_liquid_phase_origin(
            flashnr, pol_vals_dict[hydro_label], nsources_min=nsources_min)
    else:
        warn('Unknown filter type ' + filt_type)
        return

    flashnr_filt = flashnr[ind]
    time_data_filt = time_data[ind]
    time_in_flash_filt = time_in_flash[ind]
    lat_filt = lat[ind]
    lon_filt = lon[ind]
    alt_filt = alt[ind]
    dBm_filt = dBm[ind]
    pol_vals_dict_filt = deepcopy(pol_vals_dict)
    for key in pol_vals_dict.keys():
        pol_vals_dict_filt[key] = pol_vals_dict[key][ind]


#    # write the filtered data in a file
#    vals_list = []
#    for label in pol_vals_labels:
#        vals_list.append(pol_vals_dict_filt[label])
#
#    fname = basepath+basename'_'+data_ID+'.csv'
#    write_ts_lightning(
#        flashnr_filt, time_data_filt, time_in_flash_filt, lat_filt, lon_filt,
#        alt_filt, dBm_filt, vals_list, fname, pol_vals_labels)
#    print('written to '+fname)

# get flashes origin of filtered data
    flashnr_first, ind_first = np.unique(flashnr_filt, return_index=True)
    time_data_first = time_data_filt[ind_first]
    time_in_flash_first = time_in_flash_filt[ind_first]
    lat_first = lat_filt[ind_first]
    lon_first = lon_filt[ind_first]
    alt_first = alt_filt[ind_first]
    dBm_first = dBm_filt[ind_first]
    pol_vals_dict_first = deepcopy(pol_vals_dict_filt)
    for key in pol_vals_dict_filt.keys():
        pol_vals_dict_first[key] = pol_vals_dict_filt[key][ind_first]

    # get duration and area of flash
    duration_filt = np.ma.masked_all(flashnr_first.size)
    area_filt = np.ma.masked_all(flashnr_first.size)

    chy_filt, chx_filt, _ = wgs84_to_swissCH1903(lon_filt,
                                                 lat_filt,
                                                 alt_filt,
                                                 no_altitude_transform=True)

    for i, flash_ID in enumerate(flashnr_first):
        time_data_flash = time_data_filt[flashnr_filt == flash_ID]
        duration_filt[i] = (
            1e3 * (time_data_flash[-1] - time_data_flash[0]).total_seconds())

        chy_flash = chy_filt[flashnr_filt == flash_ID]
        chx_flash = chx_filt[flashnr_filt == flash_ID]

        points_flash = shapely.geometry.MultiPoint(
            list(zip(chy_flash, chx_flash)))
        area_filt[i] = points_flash.minimum_rotated_rectangle.area * 1e-6

    print('N flashes: ' + str(flashnr_first.size))
    print('N sources: ' + str(flashnr_filt.size))

    # Analyse the data

    # create histograms of hydrometeor proportions
    if 'propAG' in pol_vals_dict_filt:
        bins_centers = np.arange(0, 10, 1)
        bins_edges = np.arange(-0.5, 10.5, 1)

        # Create histogram of number of differnt hydrometeors types in each
        # radar range gate. All sources
        nhydros_hist = hist_nhydros_gate(pol_vals_dict_filt, percent_min=10.)

        fname = (basepath + data_ID +
                 '_allsources_ts_trajlightning_nhydro.png')
        plot_histogram2(bins_centers,
                        nhydros_hist, [fname],
                        labelx='Number of hydrometeors in radar range gate',
                        labely='occurrence',
                        titl='Trajectory Histogram All Sources' + subtitl)

        print("----- plot to '%s'" % fname)

        # store histogram
        fname = (basepath + data_ID +
                 '_allsources_ts_trajlightning_nhydro.csv')
        write_histogram(bins_edges, nhydros_hist, fname)
        print('Written ' + fname)

        # Create histogram of number of different hydrometeors types in each
        # radar range gate. First source
        nhydros_hist = hist_nhydros_gate(pol_vals_dict_first, percent_min=10.)

        fname = (basepath + data_ID +
                 '_firstsource_ts_trajlightning_nhydro.png')
        plot_histogram2(bins_centers,
                        nhydros_hist, [fname],
                        labelx='Number of hydrometeors in radar range gate',
                        labely='occurrence',
                        titl='Trajectory Histogram First Sources' + subtitl)

        print("----- plot to '%s'" % fname)

        # store histogram
        fname = (basepath + data_ID +
                 '_firstsource_ts_trajlightning_nhydro.csv')
        write_histogram(bins_edges, nhydros_hist, fname)
        print('Written ' + fname)

        return

        # Create histograms of dominant hydrometeors all sources
        hydro_hist2 = hist_dominant_hydrometeors(pol_vals_dict_filt,
                                                 percent_min=10.)

        fname_hist = basepath + data_ID + '_allsources_ts_trajlightning_hydro_dominant.png'
        fname_hist = _plot_time_range(bins_edges,
                                      bins_edges,
                                      hydro_hist2,
                                      None, [fname_hist],
                                      titl='Trajectory Histogram All Sources' +
                                      subtitl,
                                      xlabel='Dominant hydrometeor',
                                      ylabel='2nd most dominant hydrometeor',
                                      vmin=0,
                                      clabel='Occurrence',
                                      figsize=[10, 8],
                                      dpi=72)
        print('Plotted ' + ' '.join(fname_hist))

        # Create histogram of dominant hydrometeors first sources
        hydro_hist2 = hist_dominant_hydrometeors(pol_vals_dict_first,
                                                 percent_min=10.)

        fname_hist = basepath + data_ID + '_firstsource_ts_trajlightning_hydro_dominant.png'
        fname_hist = _plot_time_range(
            bins_edges,
            bins_edges,
            hydro_hist2,
            None, [fname_hist],
            titl='Trajectory Histogram First Sources' + subtitl,
            xlabel='Dominant hydrometeor',
            ylabel='2nd most dominant hydrometeor',
            vmin=0,
            clabel='Occurrence',
            figsize=[10, 8],
            dpi=72)
        print('Plotted ' + ' '.join(fname_hist))

        # create histogram of percentage of dominant hydrometeor all sources
        hydro_hist = hist_hydrometeor_mixtures(pol_vals_dict_filt)

        fname = (basepath + data_ID +
                 '_allsources_ts_trajlightning_hydro_prop.png')
        plot_histogram2(bins_centers,
                        hydro_hist, [fname],
                        labelx='radar echo classification (-)',
                        labely='percentage',
                        titl='Trajectory Histogram All Sources' + subtitl)

        print("----- plot to '%s'" % fname)

        # store histogram
        fname = (basepath + data_ID +
                 '_allsources_ts_trajlightning_hydro_prop.csv')
        write_histogram(bins_edges, hydro_hist, fname)
        print('Written ' + fname)

        # create histogram of percentage of dominant hydrometeor first sources
        hydro_hist = hist_hydrometeor_mixtures(pol_vals_dict_first)

        fname = (basepath + data_ID +
                 '_firstsource_ts_trajlightning_hydro_prop.png')
        plot_histogram2(bins_centers,
                        hydro_hist, [fname],
                        labelx='radar echo classification (-)',
                        labely='percentage',
                        titl='Trajectory Histogram First Sources' + subtitl)

        print("----- plot to '%s'" % fname)

        # store histogram
        fname = (basepath + data_ID +
                 '_firstsource_ts_trajlightning_hydro_prop.csv')
        write_histogram(bins_edges, hydro_hist, fname)
        print('Written ' + fname)

    for i, key in enumerate(pol_vals_labels):
        step = step_list[i]
        datatype = datatype_vec[i]

        field_name = get_fieldname_pyart(datatype)
        field_dict = get_metadata(field_name)

        labelx = get_colobar_label(field_dict, field_name)

        vals = pol_vals_dict_filt[key]
        bins, values = compute_histogram(vals, field_name, step=step)

        print(datatype + ' min: ' + str(vals.min()))
        print(datatype + ' max: ' + str(vals.max()))

        # Plot all sources histogram
        fname_first_source = (basepath + data_ID +
                              '_allsources_ts_trajlightning_' + datatype +
                              '.png')
        plot_histogram(bins,
                       values, [fname_first_source],
                       labelx=labelx,
                       titl='Trajectory Histogram All Sources' + subtitl)

        print("----- plot to '%s'" % fname_first_source)

        # store histogram
        fname_first_source = (basepath + data_ID +
                              '_allsources_ts_trajlightning_' + datatype +
                              '.csv')
        hist_values, _ = np.histogram(values, bins=bins)
        write_histogram(bins, hist_values, fname_first_source)
        print('Written ' + fname_first_source)

        # First sources
        vals = pol_vals_dict_first[key]
        bins, values = compute_histogram(vals, field_name, step=step)

        # Plot first source histogram
        fname_first_source = (basepath + data_ID +
                              '_firstsource_ts_trajlightning_' + datatype +
                              '.png')
        plot_histogram(bins,
                       values, [fname_first_source],
                       labelx=labelx,
                       titl='Trajectory Histogram First Source' + subtitl)

        print("----- plot to '%s'" % fname_first_source)

        # store histogram
        fname_first_source = (basepath + data_ID +
                              '_firstsource_ts_trajlightning_' + datatype +
                              '.csv')

        hist_values_first, _ = np.histogram(values, bins=bins)
        write_histogram(bins, hist_values_first, fname_first_source)
        print('Written ' + fname_first_source)

    # Get histograms of sources altitude and power

    # define histogram bin edges
    bin_edges_alt = np.arange(-50., 14150., 100.)
    bin_edges_dBm = np.arange(-17., 47., 1.)
    bin_edges_time = np.arange(0, 25, 1)
    bin_edges_area = np.arange(0., 2100., 100.)
    bin_edges_duration = np.arange(0., 1100., 100.)

    # Plot histogram of LMA flash area
    _, area_filt_values = compute_histogram(area_filt,
                                            None,
                                            bin_edges=bin_edges_area)
    fname_hist = basepath + data_ID + '_Santis_hist_area.png'
    fname_hist = plot_histogram(bin_edges_area,
                                area_filt_values, [fname_hist],
                                labelx='Area [km2]',
                                titl='Flash area' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_area.csv'
    hist_area, _ = np.histogram(area_filt_values, bins=bin_edges_area)
    fname_hist = write_histogram(bin_edges_area, hist_area, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram of LMA flash duration
    _, duration_filt_values = compute_histogram(duration_filt,
                                                None,
                                                bin_edges=bin_edges_duration)
    fname_hist = basepath + data_ID + '_Santis_hist_duration.png'
    fname_hist = plot_histogram(bin_edges_duration,
                                duration_filt_values, [fname_hist],
                                labelx='Duration [ms]',
                                titl='Flash duration' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_duration.csv'
    hist_duration, _ = np.histogram(duration_filt_values,
                                    bins=bin_edges_duration)
    fname_hist = write_histogram(bin_edges_duration, hist_duration, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram time of occurrence
    time_hour_first = occurrence_time(time_data_first)

    fname_hist = basepath + data_ID + '_Santis_hist_time.png'
    fname_hist = plot_histogram(bin_edges_time,
                                time_hour_first, [fname_hist],
                                labelx='Hour [UTC]',
                                titl='Flash occurrence time' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_time.csv'
    hist_time, _ = np.histogram(time_hour_first, bins=bin_edges_time)
    fname_hist = write_histogram(bin_edges_time, hist_time, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram altitude all sources
    _, alt_filt_values = compute_histogram(alt_filt,
                                           None,
                                           bin_edges=bin_edges_alt)
    fname_hist = basepath + data_ID + '_Santis_hist_alt.png'
    fname_hist = plot_histogram(bin_edges_alt,
                                alt_filt_values, [fname_hist],
                                labelx='Altitude [m MSL]',
                                titl='Flash sources altitude' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_alt.csv'
    hist_alt, _ = np.histogram(alt_filt_values, bins=bin_edges_alt)
    fname_hist = write_histogram(bin_edges_alt, hist_alt, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram altitude first sources
    _, alt_first_values = compute_histogram(alt_first,
                                            None,
                                            bin_edges=bin_edges_alt)
    fname_hist = basepath + data_ID + '_Santis_hist_alt_first_source.png'
    fname_hist = plot_histogram(bin_edges_alt,
                                alt_first_values, [fname_hist],
                                labelx='Altitude [m MSL]',
                                titl='Flash first source altitude' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_alt_first_source.csv'
    hist_alt_fist, _ = np.histogram(alt_first_values, bins=bin_edges_alt)
    fname_hist = write_histogram(bin_edges_alt, hist_alt_fist, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram power all sources
    _, dBm_filt_values = compute_histogram(dBm_filt,
                                           None,
                                           bin_edges=bin_edges_dBm)
    fname_hist = basepath + data_ID + '_Santis_hist_dBm.png'
    fname_hist = plot_histogram(bin_edges_dBm,
                                dBm_filt_values, [fname_hist],
                                labelx='Power [dBm]',
                                titl='Flash sources power' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_dBm.csv'
    hist_dBm, _ = np.histogram(dBm_filt_values, bins=bin_edges_dBm)
    fname_hist = write_histogram(bin_edges_dBm, hist_dBm, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram power first sources
    _, dBm_first_values = compute_histogram(dBm_first,
                                            None,
                                            bin_edges=bin_edges_dBm)
    fname_hist = basepath + data_ID + '_Santis_hist_dBm_first_source.png'
    fname_hist = plot_histogram(bin_edges_dBm,
                                dBm_first_values, [fname_hist],
                                labelx='Power [dBm]',
                                titl='Flash first source power' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_dBm_first_source.csv'
    hist_dBm_first, _ = np.histogram(dBm_first_values, bins=bin_edges_dBm)
    fname_hist = write_histogram(bin_edges_dBm, hist_dBm_first, fname_hist)
    print('Written ' + fname_hist)

    # Plot 2D histogram all sources
    H, _, _ = np.histogram2d(alt_filt_values,
                             dBm_filt_values,
                             bins=[bin_edges_alt, bin_edges_dBm])

    # set 0 values to blank
    H = np.ma.asarray(H)
    H[H == 0] = np.ma.masked

    fname_hist = basepath + data_ID + '_Santis_2Dhist_alt_dBm.png'
    fname_hist = _plot_time_range(bin_edges_alt,
                                  bin_edges_dBm,
                                  H,
                                  None, [fname_hist],
                                  titl='LMA sources Altitude-Power histogram' +
                                  subtitl,
                                  xlabel='Altitude [m MSL]',
                                  ylabel='Power [dBm]',
                                  clabel='Occurrence',
                                  vmin=0,
                                  vmax=None,
                                  figsize=[10, 8],
                                  dpi=72)
    print('Plotted ' + ' '.join(fname_hist))

    # Plot 2D histogram first sources
    H, _, _ = np.histogram2d(alt_first_values,
                             dBm_first_values,
                             bins=[bin_edges_alt, bin_edges_dBm])

    # set 0 values to blank
    H = np.ma.asarray(H)
    H[H == 0] = np.ma.masked

    fname_hist = basepath + data_ID + '_Santis_2Dhist_alt_dBm_first_source.png'
    fname_hist = _plot_time_range(
        bin_edges_alt,
        bin_edges_dBm,
        H,
        None, [fname_hist],
        titl='LMA first sources Altitude-Power histogram' + subtitl,
        xlabel='Altitude [m MSL]',
        ylabel='Power [dBm]',
        clabel='Occurrence',
        vmin=0,
        vmax=None,
        figsize=[10, 8],
        dpi=72)
    print('Plotted ' + ' '.join(fname_hist))

    # plot position all sources
    figfname = basepath + data_ID + '_Santis_LMA_sources_pos_max_height_on_top.png'
    figfname = plot_pos(lat_filt,
                        lon_filt,
                        alt_filt, [figfname],
                        sort_altitude='Highest_on_top',
                        cb_label='Source height [m MSL]',
                        titl='Flash sources position. Highest on top' +
                        subtitl)
    print('Plotted ' + ' '.join(figfname))

    figfname = basepath + data_ID + '_Santis_LMA_sources_pos_min_height_on_top.png'
    figfname = plot_pos(lat_filt,
                        lon_filt,
                        alt_filt, [figfname],
                        sort_altitude='Lowest_on_top',
                        cb_label='Source height [m MSL]',
                        titl='Flash sources position. Lowest on top' + subtitl)
    print('Plotted ' + ' '.join(figfname))

    # plot position first source
    figfname = (basepath + data_ID +
                '_Santis_LMA_first_source_pos_max_height_on_top.png')
    figfname = plot_pos(lat_first,
                        lon_first,
                        alt_first, [figfname],
                        sort_altitude='Highest_on_top',
                        cb_label='Source height [m MSL]',
                        titl='First flash source position. Highest on top' +
                        subtitl)
    print('Plotted ' + ' '.join(figfname))

    figfname = (basepath + data_ID +
                '_Santis_LMA_first_source_pos_min_height_on_top.png')
    figfname = plot_pos(lat_first,
                        lon_first,
                        alt_first, [figfname],
                        sort_altitude='Lowest_on_top',
                        cb_label='Source height [m MSL]',
                        titl='First flash source position. Lowest on top' +
                        subtitl)
    print('Plotted ' + ' '.join(figfname))