Exemplo n.º 1
0
 def match_from_biglist(self, cv, iter, match):
     """Write to lexicons file 2 files: _info.txt with info and simple lex file _lex.txt"""
     a = [i.strip().split(",") for i in open(self.f).readlines()]
     word, blick, ngram, realword, realblick, realngram = [], [], [], [], [], []
     a = a[1:]
     random.shuffle(a)
     word_spot = 0
     if self.disc == 1: word_spot = -1
     for row in a[1:]:
         if row[3] != "Real":
             word += [row[word_spot]]; blick += [float(row[1])]; ngram += [float(row[2])]
         else: 
             realword += [row[word_spot]]; realblick += [float(row[1])]; realngram += [float(row[2])]
     if match == "ngram":
         p = zip(ngram); realpoints = zip(realngram); minpoint = .03
     elif match == "blick":
         p = zip(blick); realpoints = zip(realblick); minpoint = .03
     else: 
         p = zip(blick, ngram); realpoints = zip(realblick, realngram); minpoint = .03
     tree = kd(p)
     if cv == 0: minpoint = minpoint*.75
     print self.f
     outfile = open("Lexicons/lex_" + self.f.split("/")[-1][:-12] + "_cv" + str(cv) + "_iter" + str(iter) + "_mbigmatch_" + match +  "_info.txt", "w")
     outfile2 = open("Lexicons/lex_" + self.f.split("/")[-1][:-12] + "_cv" + str(cv) + "_iter" + str(iter) + "_mbigmatch_" + match + "_lex.txt", "w")
     self.match_loop(outfile, outfile2, realpoints, realword, realblick, realngram, self.f, cv, iter, minpoint, blick, ngram, word, tree)
     outfile.close()
     outfile2.close()
     return
def evalNovKDTree(data,archive):
  tot_data = np.vstack( (data,archive) )
  tree=kd(tot_data) 
  
  nov = np.zeros(data.shape[0])
  for idx in xrange(data.shape[0]):
   nov[idx]=eval_ind_k(tree,data[idx])

  return nov
Exemplo n.º 3
0
def find_duplicates(craters,
                    radius=1737.4,
                    k=10,
                    rcd=5.,
                    ddiam=0.25,
                    filter_pairs=False):
    """Finds duplicate pairs within crater catalog.

    Triples or more will show up as multiple pairs.

    Parameters
    ----------
    craters : pandas.DataFrame
        Crater catalogue.
    radius : float, optional
        Radius of the world.
    k : int, optional
        Nearest neighbours to search for duplicates.  Default is 10.
    rcd : float, optional
        Minimum value of min(crater_pair_diameters) / pair_distance to be
        considered a crater pair.  Minimum rather than average is used to help
        weed out satellite craters.  This criterion is asymmetric between
        pairs, and when filter_pairs=False may lead to single pair entries.
    ddiam : float, optional
        Maximum value of abs(diameter1 - diameter2) / avg(diameters) to be
        considered a crater pair.
    filter_pairs : bool, optional
        If `True`, filters data frame and keeps only one entry per crater
        pair.

    Returns
    -------
    outframe : pandas.DataFrame
        Data frame of crater duplicate pairs.
    """

    mgeod = geodesic.Geodesic(radius=radius, flattening=0.)

    # Convert to 3D (<https://en.wikipedia.org/wiki/
    # Spherical_coordinate_system#Cartesian_coordinates>); phi = [-180, 180) is
    # equivalent to [0, 360).
    craters['phi'] = np.pi / 180. * craters['Long']
    craters['theta'] = np.pi / 180. * (90 - craters['Lat'])

    craters['x'] = radius * np.sin(craters['theta']) * np.cos(craters['phi'])
    craters['y'] = radius * np.sin(craters['theta']) * np.sin(craters['phi'])
    craters['z'] = radius * np.cos(craters['theta'])

    # Create tree.
    kdt = kd(craters[["x", "y", "z"]].as_matrix(), leafsize=10)

    # Loop over all craters to find duplicates.  First, find k + 1 nearest
    # neighbours (k + 1 because query will include self).
    Lnn, inn = kdt.query(craters[["x", "y", "z"]].as_matrix(), k=k + 1)
    # Remove crater matching with itself (by checking id).
    Lnn_remove_self = np.empty([Lnn.shape[0], Lnn.shape[1] - 1])
    inn_remove_self = np.empty([Lnn.shape[0], Lnn.shape[1] - 1], dtype=int)
    for i in range(Lnn_remove_self.shape[0]):
        not_self = (inn[i] != i)
        inn_remove_self[i] = inn[i][not_self]
        Lnn_remove_self[i] = Lnn[i][not_self]
    craters['Lnn'] = list(Lnn_remove_self)
    craters['inn'] = list(inn_remove_self)

    # Get radii of nearest neighbors.
    inn_ravel = inn[:, 1:].ravel()
    craters['dnn'] = list(
        craters['Diameter (km)'].as_matrix()[inn_ravel].reshape(-1, 10))
    craters['long_nn'] = list(craters['Long'].as_matrix()[inn_ravel].reshape(
        -1, 10))
    craters['lat_nn'] = list(craters['Lat'].as_matrix()[inn_ravel].reshape(
        -1, 10))
    craters['set_nn'] = list(craters['Dataset'].as_matrix()[inn_ravel].reshape(
        -1, 10))

    # Prepare empty lists.
    dup_id1 = []
    dup_id2 = []
    dup_D1 = []
    dup_D2 = []
    dup_L = []
    dup_LEuclid = []
    dup_ll1 = []
    dup_ll2 = []
    dup_source1 = []
    dup_source2 = []

    # Iterate over craters to determine if any are duplicate pairs.
    for index, row in craters.iterrows():
        # For each pair, record the smaller crater diameter.
        pair_diameter_min = np.array(
            [min(x, row['Diameter (km)']) for x in row['dnn']])
        proper_dist = np.asarray(
            mgeod.inverse(np.array([row['Long'], row['Lat']]),
                          np.vstack([row['long_nn'], row['lat_nn']]).T))[:, 0]
        # Duplicate pair criteria: 1). min(diameter) / distance > rcd; 2).
        # abs(diameter1 - diameter2) / average(diameter) < ddiam - i.e. the
        # separation distance of the centres must be much smaller than either
        # diameter, and the diameters should be very similar.
        rcd_crit = (pair_diameter_min / row['Lnn'] > rcd)
        diam_sim_crit = ((2. * abs(row['dnn'] - row['Diameter (km)']) /
                          (row['dnn'] + row['Diameter (km)'])) < ddiam)
        dup_candidates, = np.where(rcd_crit & diam_sim_crit)
        if dup_candidates.size:
            for i in dup_candidates:
                if index == row['inn'][i]:
                    raise AssertionError("Two craters with identical IDs.")
                dup_id1.append(index)
                dup_id2.append(row['inn'][i])
                dup_D1.append(row['Diameter (km)'])
                dup_D2.append(row['dnn'][i])
                dup_L.append(proper_dist[i])
                dup_LEuclid.append(row['Lnn'][i])
                dup_ll1.append((row['Long'], row['Lat']))
                dup_ll2.append((row['long_nn'][i], row['lat_nn'][i]))
                dup_source1.append(row['Dataset'])
                dup_source2.append(row['set_nn'][i])

    # Multi-index pandas table; see
    # <https://pandas.pydata.org/pandas-docs/stable/advanced.html>.

    outframe = pd.DataFrame(
        {
            'ID1': dup_id1,
            'ID2': dup_id2,
            'Diameter1 (km)': dup_D1,
            'Diameter2 (km)': dup_D2,
            'Separation (km)': dup_L,
            'Euclidean Separation (km)': dup_LEuclid,
            'Lat/Long1': dup_ll1,
            'Lat/Long2': dup_ll2,
            'Dataset1': dup_source1,
            'Dataset2': dup_source2
        },
        columns=('ID1', 'ID2', 'Diameter1 (km)', 'Diameter2 (km)',
                 'Separation (km)', 'Euclidean Separation (km)', 'Lat/Long1',
                 'Lat/Long2', 'Dataset1', 'Dataset2'))

    # Hacky, O(N^2) duplicate entry removal.
    if filter_pairs:
        osub = outframe[["ID1", "ID2"]].as_matrix()
        osub = np.array([set(x) for x in osub])
        indices_to_remove = []
        for i in range(osub.shape[0]):
            if i not in indices_to_remove:
                dups = np.where(osub[i + 1:] == osub[i])[0] + i + 1
                indices_to_remove += list(dups)
        indices_to_remove = list(set(indices_to_remove))
        outframe.drop(indices_to_remove, inplace=True)
        outframe.reset_index(inplace=True, drop=True)

    return outframe
Exemplo n.º 4
0
def make_density_map(craters,
                     img,
                     kernel=None,
                     k_support=8,
                     k_sig=4.,
                     knn=10,
                     beta=0.3,
                     kdict={},
                     truncate=True):
    """Makes Gaussian kernel density maps.

    Parameters
    ----------
    craters : pandas.DataFrame
        craters dataframe that includes pixel x and y columns
    img : numpy.ndarray
        original image; assumes colour channel is last axis (tf standard)
    kernel : function, "knn" or None
        If a function is inputted, function must return an array of 
        length craters.shape[0].  If "knn",  uses variable kernel with 
            sigma = beta*<d_knn>,
        where <d_knn> is the mean Euclidean distance of the k = knn nearest 
        neighbouring craters.  If anything else is inputted, will use
        constant kernel size with sigma = k_sigma.
    k_support : int
        Kernel support (i.e. size of kernel stencil) coefficient.  Support
        is determined by kernel_support = k_support*sigma.  Defaults to 8.
    k_sig : float
        Sigma for constant sigma kernel.  Defaults to 1.
    knn : int
        k nearest neighbours, used for "knn" kernel.  Defaults to 10.
    beta : float
        Beta value used to calculate sigma for "knn" kernel.  Default 
        is 0.3.
    kdict : dict
        If kernel is custom function, dictionary of arguments passed to kernel.
    truncate : bool
        If True, truncate mask where image truncates
    """

    # Load blank density map
    imgshape = img.shape[:2]
    dmap = np.zeros(imgshape)

    # Get number of craters
    N_ctrs = craters.shape[0]

    # Obtain gaussian kernel sigma values
    # callable checks if kernel is function
    if callable(kernel):
        sigma = kernel(**kdict)
    # If knn is used
    elif kernel == "knn":
        # If we have more than 1 crater, select either nearest 11 or N_ctrs
        # neighbours, whichever is closer
        if N_ctrs > 1:
            kdt = kd(craters[["x", "y"]].as_matrix(), leafsize=10)
            dnn = kdt.query(craters[["x","y"]].as_matrix(), \
                                    k=min(N_ctrs, knn + 1))[0][:, 1:].mean(axis=1)
        # Otherwise, assume there are craters "offscreen" half an image away
        else:
            dnn = 0.5 * imgshape[0] * np.ones(1)
        sigma = beta * dnn
    else:
        sigma = k_sig * np.ones(N_ctrs)

    # Gaussian adding loop
    for i in range(N_ctrs):
        cx = int(craters["x"][i])
        cy = int(craters["y"][i])

        # A bit convoluted, but ensures that kernel_support
        # is always odd so that centre of gaussian falls on
        # a pixel.
        ks_half = int(k_support * sigma[i] / 2)
        kernel_support = ks_half * 2 + 1
        kernel = gkern(kernel_support, sigma[i])

        # Calculate indices on image where kernel should be added
        [imxl, imxr, gxl, gxr] = get_merge_indices(cx, imgshape[1], ks_half,
                                                   kernel_support)
        [imyl, imyr, gyl, gyr] = get_merge_indices(cy, imgshape[0], ks_half,
                                                   kernel_support)

        # Add kernel to image
        dmap[imyl:imyr, imxl:imxr] += kernel[gyl:gyr, gxl:gxr]

    # Removes
    if truncate:
        if img.ndim == 3:
            dmap[img[:, :, 0] == 0] = 0
        else:
            dmap[img == 0] = 0

    return dmap
Exemplo n.º 5
0
def eval_pop(population):
  data=numpy.vstack([k.behavior for k in population+archive])
  tree=kd(data)  
  for art in population:
   eval_ind_k(art,tree)
  return tree