Exemplo n.º 1
0
def cat_dup_search(catalog, dirname):
    """Graph possible number of duplicate events given various distances
    and time differences.
    """
    epochtimes = [qcu.to_epoch(row.time) for row in catalog.itertuples()]
    tdifsec = np.asarray(abs(np.diff(epochtimes)))

    lat1 = np.asarray(catalog.latitude[:-1])
    lon1 = np.asarray(catalog.longitude[:-1])
    lat2 = np.asarray(catalog.latitude[1:])
    lon2 = np.asarray(catalog.longitude[1:])
    ddelkm = [
        gps2dist_azimuth(lat1[i], lon1[i], lat2[i], lon2[i])[0] / 1000.
        for i in range(len(lat1))
    ]

    diffdf = pd.DataFrame({'tdifsec': tdifsec, 'ddelkm': ddelkm})

    kmlimits = [1, 2, 4, 8, 16, 32, 64, 128, 256]
    tmax = 16
    dtime = 0.05
    timebins = np.arange(0, tmax + dtime / 2, dtime)

    numevents = np.empty([len(kmlimits), len(timebins) - 1])

    for jdx in range(len(kmlimits)):

        cat_subset = diffdf[diffdf.ddelkm <= kmlimits[jdx]]

        for idx in range(len(timebins) - 1):

            numevents[jdx][idx] = cat_subset[cat_subset.tdifsec.between(
                timebins[idx], timebins[idx + 1])].count()[0]

    totmatch = np.transpose(np.cumsum(np.transpose(numevents), axis=0))

    plt.figure(figsize=(10, 6))
    for idx in range(len(kmlimits)):
        times = timebins[1:]
        matches = totmatch[idx]
        lab = str(kmlimits[idx]) + ' km'
        plt.plot(times, matches, label=lab)

    plt.xlabel('Time (s)', fontsize=14)
    plt.ylabel('Possible duplicate events', fontsize=14)
    plt.xlim(0, tmax)
    plt.ylim(0, np.amax(totmatch) + 0.5)
    plt.legend(loc=2, numpoints=1)
    plt.title(('Cumulative number of events within X seconds\n'
               'and Z km (Z specified in legend)'),
              fontsize=20)

    plt.savefig('%s_catdupsearch.png' % dirname, dpi=300)
    plt.close()
Exemplo n.º 2
0
def create_figures():
    """Generate and save all relevant figures and text files."""
    parser = argparse.ArgumentParser()

    parser.add_argument('catalog1',
                        nargs='?',
                        type=str,
                        help='pick first catalog to download data from; to \
                        download data from all catalogs, use "preferred"; if \
                        using -sf, give catalog name')
    parser.add_argument('catalog2',
                        nargs='?',
                        type=str,
                        help='pick second catalog to download data from; to \
                        download data from all catalogs, use "preferred"; if \
                        using -sf, give catalog name')
    parser.add_argument('startyear',
                        nargs='?',
                        type=int,
                        help='pick starting year; if using -sf, give first \
                        year in catalog')
    parser.add_argument('endyear',
                        nargs='?',
                        type=int,
                        help='pick end year (to get a single year of data, \
                        enter same year as startyear); if using -sf, give \
                        last year in catalog')

    parser.add_argument('-mr',
                        '--magrange',
                        nargs=2,
                        type=float,
                        default=[-5, 12],
                        help='give the magnitude range for downloading data \
                        (default range is from -5 to 12)')
    parser.add_argument('-sf',
                        '--specifyfiles',
                        nargs=2,
                        type=str,
                        help='specify two existing .csv files to use')
    parser.add_argument('-fd',
                        '--forcedownload',
                        action='store_true',
                        help='forces downloading of data even if .csv file \
                        exists')
    parser.add_argument('-nm',
                        '--nomatches',
                        action='store_false',
                        help='do not include list of matching events in HTML \
                        report')

    args = parser.parse_args()

    minmag, maxmag = args.magrange

    if args.specifyfiles is None:

        if not args.catalog1:
            sys.stdout.write('No first catalog specified. Exiting...\n')
            sys.exit()
        elif not args.catalog2:
            sys.stdout.write('No second catalog specified. Exiting...\n')
            sys.exit()
        elif not args.startyear:
            sys.stdout.write('No starting year specified. Exiting...\n')
            sys.exit()
        elif not args.endyear:
            sys.stdout.write('No ending year specified. Exiting...\n')
            sys.exit()

        cat1, cat2 = args.catalog1.lower(), args.catalog2.lower()
        startyear, endyear = map(int, [args.startyear, args.endyear])
        download = args.forcedownload

        dirname = '%s-%s%s-%s' % (cat1, cat2, startyear, endyear)

        if download:
            try:
                os.makedirs(dirname)
            except OSError as exception:
                if exception.errno != errno.EEXIST:
                    raise
            datadf1 = qcu.get_data(cat1,
                                   dirname,
                                   startyear=startyear,
                                   endyear=endyear,
                                   minmag=minmag,
                                   maxmag=maxmag)
            datadf2 = qcu.get_data(cat2,
                                   dirname,
                                   startyear=startyear,
                                   endyear=endyear,
                                   minmag=minmag,
                                   maxmag=maxmag)
        else:
            # Python 2
            try:
                try:
                    datadf1 = pd.read_csv('%s/%s%s-%s.csv' %
                                          (dirname, cat1, startyear, endyear))
                    datadf2 = pd.read_csv('%s/%s%s-%s.csv' %
                                          (dirname, cat2, startyear, endyear))
                except IOError:
                    try:
                        os.makedirs(dirname)
                    except OSError as exception:
                        if exception.errno != errno.EEXIST:
                            raise
                    datadf1 = qcu.get_data(cat1,
                                           dirname,
                                           startyear=startyear,
                                           endyear=endyear,
                                           minmag=minmag,
                                           maxmag=maxmag)
                    datadf2 = qcu.get_data(cat2,
                                           dirname,
                                           startyear=startyear,
                                           endyear=endyear,
                                           minmag=minmag,
                                           maxmag=maxmag)
            # Python 3
            except:
                try:
                    datadf1 = pd.read_csv('%s/%s%s-%s.csv' %
                                          (dirname, cat1, startyear, endyear))
                    datadf2 = pd.read_csv('%s/%s%s-%s.csv' %
                                          (dirname, cat2, startyear, endyear))
                except FileNotFoundError:
                    try:
                        os.makedirs(dirname)
                    except OSError as exception:
                        if exception.errno != errno.EEXIST:
                            raise
                    datadf1 = qcu.get_data(cat1,
                                           dirname,
                                           startyear=startyear,
                                           endyear=endyear,
                                           minmag=minmag,
                                           maxmag=maxmag)
                    datadf2 = qcu.get_data(cat2,
                                           dirname,
                                           startyear=startyear,
                                           endyear=endyear,
                                           minmag=minmag,
                                           maxmag=maxmag)

    else:
        from shutil import copy2

        sfcat1, sfcat2 = args.specifyfiles
        cat1, cat2 = args.catalog1, args.catalog2
        dirname = '%s-%s%s-%s' % (cat1, cat2, args.startyear, args.endyear)

        try:
            os.makedirs(dirname)
        except OSError as exception:
            if exception.errno != errno.EEXIST:
                raise

        datadf1, datadf2 = pd.read_csv(sfcat1), pd.read_csv(sfcat2)
        try:
            copy2(sfcat1, dirname)
            copy2(sfcat2, dirname)
        except shutil.SameFileError:
            pass

    if len(datadf1) == 0:
        sys.stdout.write(('%s catalog has no data available for that time '
                          'period. Quitting...\n') % cat1.upper())
        sys.exit()

    if len(datadf2) == 0:
        sys.stdout.write(('%s catalog has no data available for that time '
                          'period. Quitting...\n') % cat2.upper())
        sys.exit()

    cat1, cat2 = cat1.upper(), cat2.upper()

    os.chdir(dirname)
    basic_cat_sum(datadf1, cat1, dirname)
    basic_cat_sum(datadf2, cat2, dirname)

    datadf1.loc[:, 'convtime'] = [
        ' '.join(x.split('T')) for x in datadf1['time'].tolist()
    ]
    datadf1.loc[:, 'convtime'] = datadf1['convtime'].astype('datetime64[ns]')
    datadf1.loc[:, 'time'] = [qcu.to_epoch(x) for x in datadf1['time']]
    datadf2.loc[:, 'convtime'] = [
        ' '.join(x.split('T')) for x in datadf2['time'].tolist()
    ]
    datadf2.loc[:, 'convtime'] = datadf2['convtime'].astype('datetime64[ns]')
    datadf2.loc[:, 'time'] = [qcu.to_epoch(x) for x in datadf2['time']]
    datadf1, datadf2 = qcu.trim_times(datadf1, datadf2)

    cat1ids, cat2ids, newcat1, newcat2 = match_events(datadf1, datadf2,
                                                      dirname)

    if len(cat1ids) == 0:
        sys.stdout.write('*** No matching events found ***\n')

    comp_criteria(datadf1, cat1, cat1ids, datadf2, cat2, cat2ids, dirname)
    #find_closest(datadf1, cat1, cat1ids, datadf2, dirname)

    map_events(newcat1, cat1, newcat2, cat2, cat1ids, cat2ids, dirname)
    map_unique_events(datadf1, cat1, cat1ids)
    map_unique_events(datadf2, cat2, cat2ids)
    make_az_dist(newcat1, cat1, newcat2, cat2, cat1ids, cat2ids, dirname)
    compare_params(newcat1, cat1, newcat2, cat2, cat1ids, cat2ids, 'mag',
                   dirname)
    compare_params(newcat1, cat1, newcat2, cat2, cat1ids, cat2ids, 'depth',
                   dirname)
    make_diff_hist(newcat1,
                   newcat2,
                   cat1ids,
                   cat2ids,
                   'time',
                   0.5,
                   dirname,
                   xlabel='%s-%s time differences (sec)' %
                   (cat1.upper(), cat2.upper()))
    make_diff_hist(newcat1,
                   newcat2,
                   cat1ids,
                   cat2ids,
                   'mag',
                   0.1,
                   dirname,
                   xlabel='%s-%s magnitude differences' %
                   (cat1.upper(), cat2.upper()))
    make_diff_hist(newcat1,
                   newcat2,
                   cat1ids,
                   cat2ids,
                   'depth',
                   2,
                   dirname,
                   xlabel='%s-%s depth differences (km)' %
                   (cat1.upper(), cat2.upper()))
    make_diff_hist(newcat1,
                   newcat2,
                   cat1ids,
                   cat2ids,
                   'distance',
                   2,
                   dirname,
                   xlabel='%s-%s distances (km)' %
                   (cat1.upper(), cat2.upper()))

    return dirname, args.nomatches
Exemplo n.º 3
0
def list_duplicates(catalog,
                    dirname,
                    timewindow=2,
                    distwindow=15,
                    magwindow=None,
                    minmag=-5,
                    locfilter=None):
    """Make a list of possible duplicate events."""
    catalog.loc[:, 'convtime'] = [
        ' '.join(x.split('T')) for x in catalog['time'].tolist()
    ]
    catalog.loc[:, 'convtime'] = catalog['convtime'].astype('datetime64[ns]')
    catalog = catalog[catalog['mag'] >= minmag]
    if locfilter:
        catalog = catalog[catalog['place'].str.contains(locfilter, na=False)]
    cat = catalog[[
        'time', 'convtime', 'id', 'latitude', 'longitude', 'depth', 'mag'
    ]].copy()
    cat.loc[:, 'time'] = [qcu.to_epoch(x) for x in cat['time']]

    duplines1 = [('Possible duplicates using %ss time threshold and %skm '
                  'distance threshold\n') % (timewindow, distwindow),
                 '***********************\n'
                 'date time id latitude longitude depth magnitude '
                 '(distance) (Δ time) (Δ magnitude)\n']
    duplines2 = [('\n\nPossible duplicates using 16s time threshold and 100km '
                  'distance threshold\n'), '***********************\n'
                 'date time id latitude longitude depth magnitude '
                 '(distance) (Δ time) (Δ magnitude)\n']
    sep = '-----------------------\n'

    thresh1dupes, thresh2dupes = 0, 0
    for event in cat.itertuples():

        trimdf = cat[cat['convtime'].between(event.convtime,
                                             event.convtime +
                                             pd.Timedelta(seconds=16),
                                             inclusive=False)]

        if len(trimdf) != 0:
            for tevent in trimdf.itertuples():
                dist = gps2dist_azimuth(event.latitude, event.longitude,
                                        tevent.latitude,
                                        tevent.longitude)[0] / 1000.
                if dist < 100:
                    dtime = (event.convtime - tevent.convtime).total_seconds()
                    dmag = event.mag - tevent.mag
                    diffs = map('{:.2f}'.format, [dist, dtime, dmag])

                    dupline1 = ' '.join([str(x) for x in event[1:]]) + ' ' +\
                               ' '.join(diffs) + '\n'
                    dupline2 = ' '.join([str(x) for x in tevent[1:]]) + '\n'
                    duplines2.extend((sep, dupline1, dupline2))

                    thresh2dupes += 1

                    if (dist < distwindow) and (abs(dtime) < timewindow):
                        duplines1.extend((sep, dupline1, dupline2))
                        thresh1dupes += 1

            continue

    with open('%s_duplicates.txt' % dirname, 'w') as dupfile:
        for dupline in duplines1:
            dupfile.write(dupline)
        for dupline in duplines2:
            dupfile.write(dupline)

    return thresh1dupes, thresh2dupes