示例#1
0
  def _testBasic(self, dtype):
    indices = np.asarray([0, 2, -1, 1], dtype=np.int64)
    depth = 3
    on_value = np.asarray(1.0, dtype=dtype)
    off_value = np.asarray(-1.0, dtype=dtype)

    truth = np.asarray(
        [[1.0, -1.0, -1.0],
         [-1.0, -1.0, 1.0],
         [-1.0, -1.0, -1.0],
         [-1.0, 1.0, -1.0]],
        dtype=dtype)

    # axis == -1
    self._testBothOneHot(
        indices=indices,
        depth=depth,
        on_value=on_value,
        off_value=off_value,
        dtype=dtype,
        truth=truth)

    # axis == 0
    self._testBothOneHot(
        indices=indices,
        depth=depth,
        on_value=on_value,
        off_value=off_value,
        axis=0,
        dtype=dtype,
        truth=truth.T)  # Output is transpose version in this case
示例#2
0
文件: try2.py 项目: Beirdo/misc-stuff
def resample(oldrate,newrate,x,n,dtype,factor):
    print "Resampling from",oldrate,"Hz to",newrate,"Hz, amplification factor",factor
    rategcd = gcd(oldrate,newrate)
    uprate = newrate / rategcd
    dnrate = oldrate / rategcd

    oldcount = len(x)
    midcount = oldcount * uprate
    newcount = midcount / dnrate

    print "Upsampling by",uprate
    if uprate == 1:
        yout = np.asarray(x, dtype=dtype)
    else:
        yout = np.zeros(midcount, dtype=dtype)
        for i in range(0, oldcount-1):
            yout[i * uprate] = x[i] * uprate

    wl = min(1.0/uprate,1.0/dnrate)
    print "Antialias filtering at",wl
    
    midrate = oldrate * uprate
    filt = firfilter(0, (midrate * wl) / 2.0, midrate, n)
    y = signal.lfilter(filt, 1, yout)

    print "Downsampling by",dnrate
    if dnrate == 1:
        yout = np.asarray(y, dtype=dtype)
    else:
        yout = np.zeros(newcount, dtype=dtype)
        for i in range(0, newcount-1):
            yout[i] = y[i * dnrate] * factor

    return yout
示例#3
0
def compress(data):
    """
    Convert 4-byte integer value to semi-logarithmic 2-byte integer.

    The storage format for numbers up to 32767 is the number itself
    The storage format for numbers above 32767 is
         - (mantissa + 10000*10**power)
    where the mantissa is 4 digits and power is 1, 2 or 3.
    
    add an extra integer at 0, 1024
    """
    data = numpy.asarray(data.flatten(), 'int32')
    assert len(data) == 16384

    # Logarithmic compression
    base = 10000
    erridx = data > 2767000
    idx = data > 32767
    power = numpy.ceil(numpy.log10(data[idx]))-4
    mantissa = data[idx] // (10**power)
    data[idx] = numpy.asarray(-(mantissa + power*base), data.dtype)
    data[erridx] = -777
    
    # Add values at 0, 1022, 2*1022, ...
    fulldata = numpy.zeros(16384 + 17, 'i')
    idx = numpy.arange(len(fulldata),dtype='i')%1022 != 0
    fulldata[idx] = data
    return fulldata
示例#4
0
  def _testDefaultBasic(self, dtype):
    indices = np.asarray([0, 2, -1, 1], dtype=dtype)
    depth = 3

    truth = np.asarray(
            [[1.0, 0.0, 0.0],
             [0.0, 0.0, 1.0],
             [0.0, 0.0, 0.0],
             [0.0, 1.0, 0.0]],
            dtype=dtype)

    # axis == -1
    self._testBothOneHot(
            indices=indices,
            depth=depth,
            dtype=dtype,
            truth=truth)

    # axis == 0
    self._testBothOneHot(
            indices=indices,
            depth=depth,
            axis=0,
            dtype=dtype,
            truth=truth.T)  # Output is transpose version in this case
示例#5
0
def load_adm_sat_school_data(return_X_y=False):

    with open("./merged_adm_sat_data.csv") as csv_file:
        data_file = csv.reader(csv_file)
        temp = next(data_file)
        n_samples = int(temp[0])
        n_features = int(temp[1])
        target_names = np.array(temp[2:])


    df = pd.read_csv("./merged_adm_sat_data.csv", sep=",", usecols=(0, 1, 2, 3), skiprows=0)
    data = np.empty((n_samples, n_features), dtype=int)
    target = np.ma.empty((n_samples,), dtype=int)

    for index, row in df.iterrows():
        data[index] = np.asarray([df.iloc[index][0], df.iloc[index][1], df.iloc[index][2]], dtype=np.float)
        target[index] = np.asarray(df.iloc[index][3], dtype=np.int)

    feature_names = np.array(['ACT_AVG','SAT_AVG','GRAD_DEBT','REGION'])

    if return_X_y:
        return data, target

    return datasets.base.Bunch(data=data, target=target,
                 target_names=target_names,
                 DESCR='School Data set',
                 feature_names=feature_names)
示例#6
0
 def test_layer_mul(self):
 # Ensure layer multiplication  gives the correct output
     layer_o=self.layer6*self.layer7
     array1=np.asarray(layer_o.get_nparray())
     res = np.asarray([[5]*3]*3)
     self.assertEqual(np.all(array1==5),True)
     self.assertTrue(allequal(layer_o._data, res))
示例#7
0
def time_column(table, ifo=None):
    """Extract the 'time' column from the given table.

    This function uses time_func to determine the correct column to
    use as a proxy for 'time' and returns that column.
    The following mappings are used:
    - `sngl_inspiral` -> 'end' time
    - `sngl_burst` -> 'peak' time
    - `sngl_ringdown` -> 'start' time

    @param table
        any `LIGO_LW` table
    @param ifo
        an interferometer prefix if you want single-detector times

    @returns a numpy array object with a 'time' element for each row in
    the table
    """
    if hasattr(table, "get_time"):
        return numpy.asarray(table.get_time())
    func_name = time_func(ligolw_table.StripTableName(table.tableName)).__name__
    if hasattr(table, func_name):
        return numpy.asarray(getattr(table, func_name)())
    else:
        return numpy.asarray(map(func_name, table))
示例#8
0
 def test_layer_add(self):
 #  Ensure layer addition  gives the correct output
     layer_o=self.layer6+self.layer7
     array1=np.asarray(layer_o.get_nparray())
     res = np.asarray([[6]*3]*3)
     self.assertEqual(np.all(array1==6),True)
     self.assertTrue(allequal(layer_o._data, res))
示例#9
0
 def test_layer_sub(self):
 # Ensure layer subtraction  gives the correct output
     layer_o=self.layer6-self.layer7
     array1=np.asarray(layer_o.get_nparray())
     res = np.asarray([[-4]*3]*3)
     self.assertEqual(np.all(array1==-4),True)
     self.assertTrue(allequal(layer_o._data, res))
def learn(tuned_parameters,model):

	# produceFeature(trainfile)
	dataset = genfromtxt(open('Data/'+trainfile,'r'), delimiter=',',dtype='f8')[0:]
	target = [x[0] for x in dataset]
	train = [x[1:] for x in dataset]
	# print train[1:10]
	# print target
	# print len(train)

	# produceFeature(testfile)
	test = genfromtxt(open('Data/'+testfile,'r'),delimiter=',',dtype='f8')[0:]
	test_target = [x[1:] for x in test]


	# X, y = digits.data, digits.target
	trainnp = np.asarray(train)
	targetnp = np.asarray(target)


	# turn the data in a (samples, feature) matrix:
	X, y = trainnp, targetnp
	# X = digits.images.reshape((n_samples, -1))
	# y = digits.target

	# Split the dataset in two equal parts
	X_train, X_test, y_train, y_test = train_test_split(
	    X, y, test_size=0.5, random_state=0)



	scores = ['precision', 'recall']

	for score in scores:
	    print("# Tuning hyper-parameters for %s" % score)
	    print()

	    clf = GridSearchCV(model, tuned_parameters, cv=5,
	                       scoring='%s_weighted' % score)
	    clf.fit(X_train, y_train)

	    print("Best parameters set found on development set:")
	    print()
	    print(clf.best_params_)
	    print()
	    print("Grid scores on development set:")
	    print()
	    for params, mean_score, scores in clf.grid_scores_:
	        print("%0.3f (+/-%0.03f) for %r"
	              % (mean_score, scores.std() * 2, params))
	    print()

	    print("Detailed classification report:")
	    print()
	    print("The model is trained on the full development set.")
	    print("The scores are computed on the full evaluation set.")
	    print()
	    y_true, y_pred = y_test, clf.predict(X_test)
	    print(classification_report(y_true, y_pred))
	    print()
示例#11
0
  def test_layer_div(self):
 # Ensure layer division  gives the correct output
      layer_o=self.layer6/self.layer7
      array1=np.asarray(layer_o.get_nparray())
      res = np.asarray([[0.2]*3]*3)
      self.assertEqual(np.all(array1==0.2),True)
      self.assertTrue(allequal(layer_o._data, res))
def sample_every_two_correlation_times(energy_data, magnetization_data, correlation_time, no_of_sites):
    """Sample the given data every 2 correlation times and determine value and error."""
    magnet_samples = []
    energy_samples = []

    for t in np.arange(0, len(energy_data), 2 * int(np.ceil(correlation_time))):
        magnet_samples.append(magnetization_data[t])
        energy_samples.append(energy_data[t])

    magnet_samples = np.asarray(magnet_samples)
    energy_samples = np.asarray(energy_samples)

    abs_magnetization = np.mean(np.absolute(magnet_samples))
    abs_magnetization_error = calculate_error(magnet_samples)
    print("<m> (<|M|/N>) = {0} +/- {1}".format(abs_magnetization, abs_magnetization_error))

    magnetization = np.mean(magnet_samples)
    magnetization_error = calculate_error(magnet_samples)
    print("<M/N> = {0} +/- {1}".format(magnetization, magnetization_error))

    energy = np.mean(energy_samples)
    energy_error = calculate_error(energy_samples)
    print("<E/N> = {0} +/- {1}".format(energy, energy_error))

    magnetization_squared = np.mean((magnet_samples * no_of_sites)**2)
    magnetization_squared_error = calculate_error((magnet_samples * no_of_sites)**2)
    print("<M^2> = {0} +/- {1}".format(magnetization_squared, magnetization_squared_error))
示例#13
0
 def GetAllData(self):
     Cal = self.GetCal()
     pars = self.query('CALC:PAR:CAT?')
     pars = pars.strip('\n').strip("'").split(',')
     parnames = pars[1::2]
     pars = pars[::2]
     names = ['Frequency (Hz)']
     alltrc = [self.GetFrequency()]
     for pp in parnames:
         names.append('%sre ()' % pp)
         names.append('%sim ()' % pp)
     if Cal:
         for pp in parnames:
             names.append('%sre unc ()' % pp)
             names.append('%sim unc ()' % pp)
     for par in pars:
         yy = self.query("CALC:DATA:TRAC? '%s', SDAT" % par)
         yy = np.asarray([float(xx) for xx in yy.split(',')])
         yyre = yy[::2]
         yyim = yy[1::2]
         alltrc.append(yyre)
         alltrc.append(yyim)
     if Cal:
         for par in pars:
             yy = self.query("CALC:DATA:TRAC? '%s', NCD" % par)
             yy = np.asarray([float(xx) for xx in yy.split(',')])
             yyre = yy[::2]
             yyim = yy[1::2]
             alltrc.append(yyre)
             alltrc.append(yyim)
     final = OrderedDict()
     for name,data in zip(names,alltrc):
         final[name]=data
     return final
示例#14
0
    def add_lines(self, levels, colors, linewidths, erase=True):
        """
        Draw lines on the colorbar.

        *colors* and *linewidths* must be scalars or
        sequences the same length as *levels*.

        Set *erase* to False to add lines without first
        removing any previously added lines.
        """
        y = self._locate(levels)
        igood = (y < 1.001) & (y > -0.001)
        y = y[igood]
        if cbook.iterable(colors):
            colors = np.asarray(colors)[igood]
        if cbook.iterable(linewidths):
            linewidths = np.asarray(linewidths)[igood]
        N = len(y)
        x = np.array([0.0, 1.0])
        X, Y = np.meshgrid(x, y)
        if self.orientation == "vertical":
            xy = [list(zip(X[i], Y[i])) for i in range(N)]
        else:
            xy = [list(zip(Y[i], X[i])) for i in range(N)]
        col = collections.LineCollection(xy, linewidths=linewidths)

        if erase and self.lines:
            for lc in self.lines:
                lc.remove()
            self.lines = []
        self.lines.append(col)
        col.set_color(colors)
        self.ax.add_collection(col)
示例#15
0
  def save_nodes_to_store(self, store, queue):
    for node_id, node in self.nodes.items():
      features = {}
      features['neighbors'] = node['neighbors']
      if 'soft_label' in self.nodes_features:
        features['soft_label'] = node['soft_label']
      if 'size' in self.nodes_features:
        features['size'] = len(node['pos'])
      if 'pos' in self.nodes_features:
        features['pos'] = np.asarray(node['pos'])
        if features['pos'].shape == (0,):
          features['pos'] = np.zeros(shape=(0,3))
      if 'mesh' in self.nodes_features:
        #Because ml incluedes the overlap is possible
        #That a node has a mesh in the overlap
        #But not a single voxel in the non-overlap region
        vertices, triangles = mesh.marche_cubes( node_id , self.ml )
        vertices += np.asarray(self.start).astype(np.uint16) * 2 #translate mesh
        features['mesh'] = mesh.get_adjacent( vertices, triangles )
      if 'semantic_sum' in self.nodes_features:
        features['semantic_sum'] = node['semantic_sum']

      features['tree'] = Tree(node_id)
      existent_node_features = store.get_node(node_id)
      if existent_node_features:
        features = self.sum_nodes_features(existent_node_features, features )
      store.put_node(node_id, features)
示例#16
0
def estimate_transition_matrix(count_matrix):
    """
    Simple Maximum Likelihood estimator of transition matrix.

    Parameters
    ----------
    count_matrix : array or sparse matrix
        A square matrix of transition counts

    Returns
    -------
    tProb : array or sparse matrix
         Most likely transition matrix given `tCount`
    """
    # 1.  Make sure you don't modify tCounts.
    # 2.  Make sure you handle both floats and ints
    if scipy.sparse.isspmatrix(count_matrix):
        C = scipy.sparse.csr_matrix(count_matrix).asfptype()
        weights = np.asarray(C.sum(axis=1)).flatten()
        inv_weights = np.zeros(len(weights))
        inv_weights[weights != 0] = 1.0 / weights[weights != 0]
        D = scipy.sparse.dia_matrix((inv_weights, 0), C.shape).tocsr()
        tProb = D.dot(C)
    else:
        tProb = np.asarray(count_matrix.astype(float))  # astype creates a copy
        weights = tProb.sum(axis=1)
        inv_weights = np.zeros(len(weights))
        inv_weights[weights != 0] = 1.0 / weights[weights != 0]
        tProb = tProb * inv_weights.reshape((weights.shape[0], 1))

    return tProb
 def shared(data):
     """ Place the data into shared variables. This allows Theano to copy
     the data to the GPU, if one is available.
     """
     shared_x = theano.shared(numpy.asarray(data[:,0].tolist(), dtype=theano.config.floatX), borrow=True)
     shared_y = theano.shared(numpy.asarray(data[:,1].tolist(), dtype=theano.config.floatX), borrow=True)
     return shared_x, T.cast(shared_y, "int32")
示例#18
0
def Seuil_var(img):
    """
    This fonction compute threshold value. In first the image's histogram is calculated. The threshold value is set to the first indexe of histogram wich respect the following criterion : DH > 0, DH(i)/H(i) > 0.1 , H(i) < 0.01 % of the Norm. 

    In : img : ipl Image : image to treated
    Out: seuil : Int : Value of the threshold 
    """
    dim=255
    MaxValue=np.amax(np.asarray(img[:]))
    Norm = np.asarray(img[:]).shape[0]*np.asarray(img[:]).shape[1]
    scale=MaxValue/dim
    Wdim=dim*scale
    MaxValue=np.amax(np.asarray(img[:]))
    bins= [float(x) for x in range(dim)]
    hist,bin_edges = np.histogram(np.asarray(img[:]), bins)
    Norm = Norm -hist[0]
    median=np.median(hist)
    mean=0
    var=0
    i=1
    som = 0
    while (som < 0.8*Norm and i <len(hist)-1):
      som = som + hist[i]
      i=i+1
    while ((hist[i]-hist[i-1] < 0 or (hist[i]-hist[i-1])/hist[i-1]>0.1 or hist[i]> 0.01*Norm ) and i < len(hist)-1):
      i=i+1
    if( i == len(hist)-1):
      seuil=0
      

    seuil = i
    var = 0
    return seuil
def test_point_in_poly3(point):
    """
    tests points that should be in the polygon
    """

    assert point_in_poly(poly2_ccw, np.asarray(point, dtype=np.float64))
    assert point_in_poly(poly2_cw, np.asarray(point, dtype=np.float64))
示例#20
0
文件: geometry.py 项目: ratnania/caid
    def Draw(self, nrb=None, MeshColor=None, NurbsColor=None, PointsColor=None, alpha=ALPHA, blend=False):

        if NurbsColor is None:
            if self.NurbsColor is None:
                NurbsColor = list(asarray(Theme().color_viewer("default_patch")).copy())
            else:
                NurbsColor = list(asarray(self.NurbsColor).copy())
        if self.show:
            if nrb is not None:
                list_nrb = [nrb]
            else:
                list_nrb = self._list

            for i in range(0, len(list_nrb)):
                nrb = list_nrb[i]
                nrbInfo = self.list_patchInfo[i]
                if nrbInfo.show:
                    _NurbsColor = asarray(NurbsColor).copy()
                    if nrbInfo.NurbsColor is not None:
                        _NurbsColor = asarray(nrbInfo.NurbsColor).copy()
                    NurbsSteps = nrbInfo.steps
                    evaluator = self.GetEvaluator(
                        nrb, MeshColor=MeshColor, NurbsColor=_NurbsColor, alpha=alpha, steps=NurbsSteps
                    )
                    showMesh = self.showMesh or nrbInfo.showMesh
                    evaluator.draw(mesh=showMesh, nurbs=True, blend=blend)
                if self.showPoints or nrbInfo.showPoints:
                    # Draw control points
                    self.DrawControlPoints(nrb, PointsColor=PointsColor, alpha=alpha, blend=blend)
示例#21
0
def _threshold_brier_score_vectorized(observations, forecasts, thresholds):
    observations = np.asarray(observations)
    thresholds = np.asarray(thresholds)
    forecasts = np.asarray(forecasts)

    def exceedances(x):
        # NaN safe calculation of threshold exceedances
        # add an extra dimension to `x` and broadcast `thresholds` so that it
        # varies along that new dimension
        with suppress_warnings('invalid value encountered in greater'):
            exceeds = (x[..., np.newaxis] >
                       thresholds.reshape((1,) * x.ndim + (-1,))
                       ).astype(float)
        if x.ndim == 0 and np.isnan(x):
            exceeds[:] = np.nan
        else:
            exceeds[np.where(np.isnan(x))] = np.nan
        return exceeds

    binary_obs = exceedances(observations)
    if observations.shape == forecasts.shape:
        prob_forecast = exceedances(forecasts)
    elif observations.shape == forecasts.shape[:-1]:
        # axis=-2 should be the 'realization' axis, after swapping that axes
        # to the end of forecasts and inserting one extra axis
        with suppress_warnings('Mean of empty slice'):
            prob_forecast = np.nanmean(exceedances(forecasts), axis=-2)
    else:
        raise AssertionError
    return brier_score(binary_obs, prob_forecast)
def test_cross_validator_with_default_indices():
    n_samples = 4
    n_unique_labels = 4
    n_folds = 2
    p = 2
    n_iter = 10  # (the default value)

    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    X_1d = np.array([1, 2, 3, 4])
    y = np.array([1, 1, 2, 2])
    labels = np.array([1, 2, 3, 4])
    loo = LeaveOneOut()
    lpo = LeavePOut(p)
    kf = KFold(n_folds)
    skf = StratifiedKFold(n_folds)
    lolo = LeaveOneLabelOut()
    lopo = LeavePLabelOut(p)
    ss = ShuffleSplit(random_state=0)
    ps = PredefinedSplit([1, 1, 2, 2])  # n_splits = np of unique folds = 2

    n_splits = [n_samples, comb(n_samples, p), n_folds, n_folds,
                n_unique_labels, comb(n_unique_labels, p), n_iter, 2]

    for i, cv in enumerate([loo, lpo, kf, skf, lolo, lopo, ss, ps]):
        # Test if get_n_splits works correctly
        assert_equal(n_splits[i], cv.get_n_splits(X, y, labels))

        # Test if the cross-validator works as expected even if
        # the data is 1d
        np.testing.assert_equal(list(cv.split(X, y, labels)),
                                list(cv.split(X_1d, y, labels)))
        # Test that train, test indices returned are integers
        for train, test in cv.split(X, y, labels):
            assert_equal(np.asarray(train).dtype.kind, 'i')
            assert_equal(np.asarray(train).dtype.kind, 'i')
示例#23
0
def encode_doc(doc, max_len):
    if doc is None:
        return np.asarray([])

    # enc = np.asarray([max(min(ord(c), max_char-1), 0) for c in doc[:max_len]])
    enc = np.asarray([vocab.token2id.get(c, default_id) for c in itertools.islice(gensim.utils.tokenize(doc, to_lower=True), max_len)])
    return enc
示例#24
0
文件: image.py 项目: CTPUG/matplotlib
    def set_data(self, x, y, A):
        A = cbook.safe_masked_invalid(A)
        if x is None:
            x = np.arange(0, A.shape[1]+1, dtype=np.float64)
        else:
            x = np.asarray(x, np.float64).ravel()
        if y is None:
            y = np.arange(0, A.shape[0]+1, dtype=np.float64)
        else:
            y = np.asarray(y, np.float64).ravel()

        if A.shape[:2] != (y.size-1, x.size-1):
            print A.shape
            print y.size
            print x.size
            raise ValueError("Axes don't match array shape")
        if A.ndim not in [2, 3]:
            raise ValueError("A must be 2D or 3D")
        if A.ndim == 3 and A.shape[2] == 1:
            A.shape = A.shape[:2]
        self.is_grayscale = False
        if A.ndim == 3:
            if A.shape[2] in [3, 4]:
                if (A[:,:,0] == A[:,:,1]).all() and (A[:,:,0] == A[:,:,2]).all():
                    self.is_grayscale = True
            else:
                raise ValueError("3D arrays must have RGB or RGBA as last dim")
        self._A = A
        self._Ax = x
        self._Ay = y
        self._rgbacache = None
def test_sym_matrix_to_vec():
    sym = np.ones((3, 3))
    sqrt2 = 1. / sqrt(2.)
    vec = np.array([sqrt2, 1., sqrt2, 1., 1., sqrt2])
    assert_array_almost_equal(sym_matrix_to_vec(sym), vec)

    vec = np.array([1., 1., 1.])
    assert_array_almost_equal(sym_matrix_to_vec(sym, discard_diagonal=True),
                              vec)

    # Check sym_matrix_to_vec is the inverse function of vec_to_sym_matrix
    n = 5
    p = n * (n + 1) // 2
    rand_gen = np.random.RandomState(0)
    # when diagonal is included
    vec = rand_gen.rand(p)
    sym = vec_to_sym_matrix(vec)
    assert_array_almost_equal(sym_matrix_to_vec(sym), vec)

    # when diagonal given separately
    diagonal = rand_gen.rand(n + 1)
    sym = vec_to_sym_matrix(vec, diagonal=diagonal)
    assert_array_almost_equal(sym_matrix_to_vec(sym, discard_diagonal=True),
                              vec)

    # multiple matrices case when diagonal is included
    vecs = np.asarray([vec, 2. * vec, 0.5 * vec])
    syms = vec_to_sym_matrix(vecs)
    assert_array_almost_equal(sym_matrix_to_vec(syms), vecs)

    # multiple matrices case when diagonal is given seperately
    diagonals = np.asarray([diagonal, 3. * diagonal, -diagonal])
    syms = vec_to_sym_matrix(vecs, diagonal=diagonals)
    assert_array_almost_equal(sym_matrix_to_vec(syms, discard_diagonal=True),
                              vecs)
  def _testTensorArrayWriteConcat(self, tf_dtype):
    dtype = tf_dtype.as_numpy_dtype()
    with self.test_session(use_gpu=self._use_gpu):
      ta = tensor_array_ops.TensorArray(
          dtype=tf_dtype, tensor_array_name="foo", size=3, infer_shape=False)

      if tf_dtype == tf.string:
        # In Python3, np.str is unicode, while we always want bytes
        convert = lambda x: np.asarray(x).astype("|S")
      else:
        convert = lambda x: np.asarray(x).astype(dtype)

      w0 = ta.write(0, convert([[4.0, 5.0], [104.0, 105.0], [204.0, 205.0]]))
      w1 = w0.write(1, convert([[6.0, 7.0], [106.0, 107.0]]))
      w2 = w1.write(2, convert([[8.0, 9.0]]))

      c0 = w2.concat()

      self.assertAllEqual(
          convert([[4.0, 5.0],
                   [104.0, 105.0],
                   [204.0, 205.0],
                   [6.0, 7.0],
                   [106.0, 107.0],
                   [8.0, 9.0]]), c0.eval())
示例#27
0
文件: haar.py 项目: zengfengbo/cnvkit
def segment_haar(cnarr):
    """Do segmentation for CNVkit.

    Calculate copy number segmentation by HaarSeg
    (http://haarseg.r-forge.r-project.org/)
    Input: log2 coverage data in Nexus 'basic' format
    Output: the CBS data table

    """
    chrom_tables = []
    # Segment each chromosome individually
    # ENH - skip large gaps (segment chrom. arms separately)
    for chrom, subprobes in cnarr.by_chromosome():
        # echo(chrom, ':')  # DBG
        segtable = haarSeg(subprobes['log2'])
        chromtable = pd.DataFrame({
            'chromosome': chrom,
            'start': np.asarray(subprobes['start']).take(segtable['start']),
            'end': np.asarray(subprobes['end']
                             ).take(segtable['start']+segtable['size']-1),
            'gene': '.',
            'log2': segtable['log2'],
            'probes': segtable['size'],
        })
        # echo(chromtable)  # DBG
        chrom_tables.append(chromtable)
    result = pd.concat(chrom_tables)
    echo("haar: Found", len(result), "segments")
    segarr = cnarr.as_dataframe(result)
    segarr.sort_columns()
    return segarr
示例#28
0
文件: rpa.py 项目: rhambach/EELcalc
  def __init__(self, qdir='GM', verbosity=1, filepattern=None):
    """
      qdir      ... (opt) 'GM' or 'GK' for symmetry direction of q
      verbosity ... (opt) 0 silent, 1 minimal output, 3 debug, >3 debug interpolation
      filepattern.. (opt) read eps2D from files matching filepattern
                          qdir has no effect in this case
    """
    self.qdir      = qdir;
    self.verbosity = verbosity;

    # read DP-output files containing EPS2D (sorted by momentum transfer)
    if filepattern is None:
      self.path   = os.path.dirname(os.path.abspath(__file__))+'/data/'+qdir;
      filepattern = self.path+'/CUTOFF_R12.6_grapheneAA-2d0-HIGH-RPA*-high-%s-q*_outlf.eps'%(self.qdir);
    self.spectra= dp_mdf.GetMDF(filepattern);
    self.spectra.sort(key=lambda mdf: np.linalg.norm(mdf.get_q('cc','au')));

    # check that we have eps2D
    assert len(self.spectra)>0
    for mdf in self.spectra:
      assert mdf.param['quantity']=='mdf';
      assert (''.join(mdf.param['comment'])).find('eps2D'); 

    # extract data
    self.eps2D  = np.asarray([ mdf.eps for mdf in self.spectra ]);
    q           = [ np.linalg.norm(mdf.get_q('cc','au')) for mdf in self.spectra ];
    self.q      = np.asarray(q, dtype=float);  # in 1/bohr
    self.E      = self.spectra[0].get_E();     # in eV
    self.calc_param = deepcopy(self.spectra[0].param);
    self.set_qprecision();
示例#29
0
def access_Measurement(lat, long, year):
    path_ccsm4 = '/Users/DavidKMYang/ClimateResearch/WBGT/ccsm4_tasmax_nepal/'

    os.chdir(path_ccsm4)
    file_names_ccsm4 = glob.glob("tasmax_" + str(year)+"*.mat")

    for i in range(len(file_names_ccsm4)):
        lat_index = 0
        long_index = 0
        print (file_names_ccsm4[i])
        tempData = scipy.io.loadmat(path_ccsm4 + file_names_ccsm4[i])
        tempData = tempData[file_names_ccsm4[i][:-4]][0]

        tempLatList = []
        for k in range(len(tempData[0])):
            tempLatList.append(tempData[0][k][0])
        tempLatList = np.asarray(tempLatList)
        lat_index = find_nearest(tempLatList, lat)

        tempLongList = tempData[1][0]
        tempLongList = np.asarray(tempLongList)

        long_index = find_nearest(tempLongList, long)
        print (tempLatList[lat_index])
        print (tempLongList[long_index])
        print (tempData[2][lat_index][long_index])


        access_Measurement(25, 30, 2001)
def test_stratified_shuffle_split_init():
    X = np.arange(7)
    y = np.asarray([0, 1, 1, 1, 2, 2, 2])
    # Check that error is raised if there is a class with only one sample
    assert_raises(ValueError, next,
                  StratifiedShuffleSplit(3, 0.2).split(X, y))

    # Check that error is raised if the test set size is smaller than n_classes
    assert_raises(ValueError, next, StratifiedShuffleSplit(3, 2).split(X, y))
    # Check that error is raised if the train set size is smaller than
    # n_classes
    assert_raises(ValueError, next,
                  StratifiedShuffleSplit(3, 3, 2).split(X, y))

    X = np.arange(9)
    y = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2])
    # Check that errors are raised if there is not enough samples
    assert_raises(ValueError, StratifiedShuffleSplit, 3, 0.5, 0.6)
    assert_raises(ValueError, next,
                  StratifiedShuffleSplit(3, 8, 0.6).split(X, y))
    assert_raises(ValueError, next,
                  StratifiedShuffleSplit(3, 0.6, 8).split(X, y))

    # Train size or test size too small
    assert_raises(ValueError, next,
                  StratifiedShuffleSplit(train_size=2).split(X, y))
    assert_raises(ValueError, next,
                  StratifiedShuffleSplit(test_size=2).split(X, y))
示例#31
0
def extract_emission_features(sentence_tokens: List[Token], word_index: int,
                              tag: str, feature_indexer: Indexer,
                              words_to_tag_counters, tf_idf_score,
                              add_to_indexer: bool):
    """
    Extracts emission features for tagging the word at word_index with tag.
    :param sentence_tokens: sentence to extract over
    :param word_index: word index to consider
    :param tag: the tag that we're featurizing for
    :param feature_indexer: Indexer over features
    :param add_to_indexer: boolean variable indicating whether we should be expanding the indexer or not. This should
    be True at train time (since we want to learn weights for all features) and False at test time (to avoid creating
    any features we don't have weights for).
    :return: an ndarray
    """
    feats = []
    # curr_word = lemmatizer.lemmatize(sentence_tokens[word_index].word)
    curr_word = sentence_tokens[word_index].word
    # Lexical and POS features on this word, the previous, and the next (Word-1, Word0, Word1)
    for idx_offset in range(-1, 2):
        if word_index + idx_offset < 0:
            active_word = "<s>"
        elif word_index + idx_offset >= len(sentence_tokens):
            active_word = "</s>"
        else:
            active_word = sentence_tokens[word_index + idx_offset].word
        if word_index + idx_offset < 0:
            active_pos = "<S>"
        elif word_index + idx_offset >= len(sentence_tokens):
            active_pos = "</S>"
        else:
            active_pos = sentence_tokens[word_index + idx_offset].pos
        maybe_add_feature(feats, feature_indexer, add_to_indexer,
                          tag + ":Word" + repr(idx_offset) + "=" + active_word)
        maybe_add_feature(feats, feature_indexer, add_to_indexer,
                          tag + ":Pos" + repr(idx_offset) + "=" + active_pos)
    # Character n-grams of the current word
    max_ngram_size = 3
    for ngram_size in range(1, max_ngram_size + 1):
        start_ngram = curr_word[0:min(ngram_size, len(curr_word))]
        maybe_add_feature(feats, feature_indexer, add_to_indexer,
                          tag + ":StartNgram=" + start_ngram)
        end_ngram = curr_word[max(0, len(curr_word) - ngram_size):]
        maybe_add_feature(feats, feature_indexer, add_to_indexer,
                          tag + ":EndNgram=" + end_ngram)
    # Look at a few word shape features
    maybe_add_feature(feats, feature_indexer, add_to_indexer,
                      tag + ":IsCap=" + repr(curr_word[0].isupper()))
    # Compute word shape
    new_word = []
    for i in range(0, len(curr_word)):
        if curr_word[i].isupper():
            new_word += "X"
        elif curr_word[i].islower():
            new_word += "x"
        elif curr_word[i].isdigit():
            new_word += "0"
        else:
            new_word += "?"
    maybe_add_feature(feats, feature_indexer, add_to_indexer,
                      tag + ":WordShape=" + repr(new_word))
    maybe_add_feature(
        feats, feature_indexer, add_to_indexer,
        tag + ":WordCount=" + repr(words_to_tag_counters[curr_word]))

    if tf_idf_score >= 0.75:
        maybe_add_feature(feats, feature_indexer, add_to_indexer,
                          tag + ":TF-IDF=" + "1-TFIDF")
    elif tf_idf_score >= 0.5:
        maybe_add_feature(feats, feature_indexer, add_to_indexer,
                          tag + ":TF-IDF=" + "0.75-TFIDF")
    elif tf_idf_score >= 0.25:
        maybe_add_feature(feats, feature_indexer, add_to_indexer,
                          tag + ":TF-IDF=" + "0.5-TFIDF")
    else:
        maybe_add_feature(feats, feature_indexer, add_to_indexer,
                          tag + ":TF-IDF=" + "0.25-TFIDF")
    return np.asarray(feats, dtype=int)
def get_kmeans_random_batch(model, X, X_noisy, X_adv, dataset, k=10, batch_size=100, pca=False):
    """
    Get the mean distance of each Xi in X_adv to its k nearest neighbors.

    :param model:
    :param X: normal images
    :param X_noisy: noisy images
    :param X_adv: advserial images    
    :param dataset: 'mnist', 'cifar', 'svhn', has different DNN architectures  
    :param k: the number of nearest neighbours for LID estimation  
    :param batch_size: default 100
    :param pca: using pca or not, if True, apply pca to the referenced sample and a 
            minibatch of normal samples, then compute the knn mean distance of the referenced sample.
    :return: kms_normal: kmean of normal images (num_examples, 1)
            kms_noisy: kmean of normal images (num_examples, 1)
            kms_adv: kmean of adv images (num_examples, 1)
    """
    # get deep representations
    funcs = [K.function([model.layers[0].input, K.learning_phase()], [model.layers[-2].output])]
    km_dim = len(funcs)
    print("Number of layers to use: ", km_dim)

    def estimate(i_batch):
        start = i_batch * batch_size
        end = np.minimum(len(X), (i_batch + 1) * batch_size)
        n_feed = end - start
        km_batch = np.zeros(shape=(n_feed, km_dim))
        km_batch_adv = np.zeros(shape=(n_feed, km_dim))
        km_batch_noisy = np.zeros(shape=(n_feed, km_dim))
        for i, func in enumerate(funcs):
            X_act = func([X[start:end], 0])[0]
            X_act = np.asarray(X_act, dtype=np.float32).reshape((n_feed, -1))
            # print("X_act: ", X_act.shape)

            X_adv_act = func([X_adv[start:end], 0])[0]
            X_adv_act = np.asarray(X_adv_act, dtype=np.float32).reshape((n_feed, -1))
            # print("X_adv_act: ", X_adv_act.shape)

            X_noisy_act = func([X_noisy[start:end], 0])[0]
            X_noisy_act = np.asarray(X_noisy_act, dtype=np.float32).reshape((n_feed, -1))
            # print("X_noisy_act: ", X_noisy_act.shape)

            # Maximum likelihood estimation of local intrinsic dimensionality (LID)
            if pca:
                km_batch[:, i] = kmean_pca_batch(X_act, X_act, k=k)
            else:
                km_batch[:, i] = kmean_batch(X_act, X_act, k=k)
            # print("lid_batch: ", lid_batch.shape)
            if pca:
                km_batch_adv[:, i] = kmean_pca_batch(X_act, X_adv_act, k=k)
            else:
                km_batch_adv[:, i] = kmean_batch(X_act, X_adv_act, k=k)
            # print("lid_batch_adv: ", lid_batch_adv.shape)
            if pca:
                km_batch_noisy[:, i] = kmean_pca_batch(X_act, X_noisy_act, k=k)
            else:
                km_batch_noisy[:, i] = kmean_batch(X_act, X_noisy_act, k=k)
                # print("lid_batch_noisy: ", lid_batch_noisy.shape)
        return km_batch, km_batch_noisy, km_batch_adv

    kms = []
    kms_adv = []
    kms_noisy = []
    n_batches = int(np.ceil(X.shape[0] / float(batch_size)))
    for i_batch in tqdm(range(n_batches)):
        km_batch, km_batch_noisy, km_batch_adv = estimate(i_batch)
        kms.extend(km_batch)
        kms_adv.extend(km_batch_adv)
        kms_noisy.extend(km_batch_noisy)
        # print("kms: ", kms.shape)
        # print("kms_adv: ", kms_noisy.shape)
        # print("kms_noisy: ", kms_noisy.shape)

    kms = np.asarray(kms, dtype=np.float32)
    kms_noisy = np.asarray(kms_noisy, dtype=np.float32)
    kms_adv = np.asarray(kms_adv, dtype=np.float32)

    return kms, kms_noisy, kms_adv
def get_lids_random_batch(model, X, X_noisy, X_adv, dataset, k=10, batch_size=100):
    """
    Get the local intrinsic dimensionality of each Xi in X_adv
    estimated by k close neighbours in the random batch it lies in.
    :param model:
    :param X: normal images
    :param X_noisy: noisy images
    :param X_adv: advserial images    
    :param dataset: 'mnist', 'cifar', 'svhn', has different DNN architectures  
    :param k: the number of nearest neighbours for LID estimation  
    :param batch_size: default 100
    :return: lids: LID of normal images of shape (num_examples, lid_dim)
            lids_adv: LID of advs images of shape (num_examples, lid_dim)
    """
    # get deep representations
    funcs = [K.function([model.layers[0].input, K.learning_phase()], [out])
                 for out in get_layer_wise_activations(model, dataset)]
    lid_dim = len(funcs)
    print("Number of layers to estimate: ", lid_dim)

    def estimate(i_batch):
        start = i_batch * batch_size
        end = np.minimum(len(X), (i_batch + 1) * batch_size)
        n_feed = end - start
        lid_batch = np.zeros(shape=(n_feed, lid_dim))
        lid_batch_adv = np.zeros(shape=(n_feed, lid_dim))
        lid_batch_noisy = np.zeros(shape=(n_feed, lid_dim))
        for i, func in enumerate(funcs):
            X_act = func([X[start:end], 0])[0]
            X_act = np.asarray(X_act, dtype=np.float32).reshape((n_feed, -1))
            # print("X_act: ", X_act.shape)

            X_adv_act = func([X_adv[start:end], 0])[0]
            X_adv_act = np.asarray(X_adv_act, dtype=np.float32).reshape((n_feed, -1))
            # print("X_adv_act: ", X_adv_act.shape)

            X_noisy_act = func([X_noisy[start:end], 0])[0]
            X_noisy_act = np.asarray(X_noisy_act, dtype=np.float32).reshape((n_feed, -1))
            # print("X_noisy_act: ", X_noisy_act.shape)

            # random clean samples
            # Maximum likelihood estimation of local intrinsic dimensionality (LID)
            lid_batch[:, i] = mle_batch(X_act, X_act, k=k)
            # print("lid_batch: ", lid_batch.shape)
            lid_batch_adv[:, i] = mle_batch(X_act, X_adv_act, k=k)
            # print("lid_batch_adv: ", lid_batch_adv.shape)
            lid_batch_noisy[:, i] = mle_batch(X_act, X_noisy_act, k=k)
            # print("lid_batch_noisy: ", lid_batch_noisy.shape)
        return lid_batch, lid_batch_noisy, lid_batch_adv

    lids = []
    lids_adv = []
    lids_noisy = []
    n_batches = int(np.ceil(X.shape[0] / float(batch_size)))
    for i_batch in tqdm(range(n_batches)):
        lid_batch, lid_batch_noisy, lid_batch_adv = estimate(i_batch)
        lids.extend(lid_batch)
        lids_adv.extend(lid_batch_adv)
        lids_noisy.extend(lid_batch_noisy)
        # print("lids: ", lids.shape)
        # print("lids_adv: ", lids_noisy.shape)
        # print("lids_noisy: ", lids_noisy.shape)

    lids = np.asarray(lids, dtype=np.float32)
    lids_noisy = np.asarray(lids_noisy, dtype=np.float32)
    lids_adv = np.asarray(lids_adv, dtype=np.float32)

    return lids, lids_noisy, lids_adv
示例#34
0
    def test3(self):

        #define nodal points and triangles of a small test grid
        #got this grid from http://matplotlib.org/examples/pylab_examples/triplot_demo.html
        xy = numpy.asarray([[-0.101, 0.872], [-0.080, 0.883], [-0.069, 0.888],
                            [-0.054, 0.890], [-0.045, 0.897], [-0.057, 0.895],
                            [-0.073, 0.900], [-0.087, 0.898], [-0.090, 0.904],
                            [-0.069, 0.907], [-0.069, 0.921], [-0.080, 0.919],
                            [-0.073, 0.928], [-0.052, 0.930], [-0.048, 0.942],
                            [-0.062, 0.949], [-0.054, 0.958], [-0.069, 0.954],
                            [-0.087, 0.952], [-0.087, 0.959], [-0.080, 0.966],
                            [-0.085, 0.973], [-0.087, 0.965], [-0.097, 0.965],
                            [-0.097, 0.975], [-0.092, 0.984], [-0.101, 0.980],
                            [-0.108, 0.980], [-0.104, 0.987], [-0.102, 0.993],
                            [-0.115, 1.001], [-0.099, 0.996], [-0.101, 1.007],
                            [-0.090, 1.010], [-0.087, 1.021], [-0.069, 1.021],
                            [-0.052, 1.022], [-0.052, 1.017], [-0.069, 1.010],
                            [-0.064, 1.005], [-0.048, 1.005], [-0.031, 1.005],
                            [-0.031, 0.996], [-0.040, 0.987], [-0.045, 0.980],
                            [-0.052, 0.975], [-0.040, 0.973], [-0.026, 0.968],
                            [-0.020, 0.954], [-0.006, 0.947], [0.003, 0.935],
                            [0.006, 0.926], [0.005, 0.921], [0.022, 0.923],
                            [0.033, 0.912], [0.029, 0.905], [0.017, 0.900],
                            [0.012, 0.895], [0.027, 0.893], [0.019, 0.886],
                            [0.001, 0.883], [-0.012, 0.884], [-0.029, 0.883],
                            [-0.038, 0.879], [-0.057, 0.881], [-0.062, 0.876],
                            [-0.078, 0.876], [-0.087, 0.872], [-0.030, 0.907],
                            [-0.007, 0.905], [-0.057, 0.916], [-0.025, 0.933],
                            [-0.077, 0.990], [-0.059, 0.993]])
        triangles = numpy.asarray([[67, 66, 1], [65, 2, 66], [1, 66, 2],
                                   [64, 2, 65], [63, 3, 64], [60, 59, 57],
                                   [2, 64, 3], [3, 63, 4], [0, 67, 1],
                                   [62, 4, 63], [57, 59, 56], [59, 58, 56],
                                   [61, 60, 69], [57, 69, 60], [4, 62, 68],
                                   [6, 5, 9], [61, 68, 62], [69, 68, 61],
                                   [9, 5, 70], [6, 8, 7], [4, 70, 5],
                                   [8, 6, 9], [56, 69, 57], [69, 56, 52],
                                   [70, 10, 9], [54, 53, 55], [56, 55, 53],
                                   [68, 70, 4], [52, 56, 53], [11, 10, 12],
                                   [69, 71, 68], [68, 13, 70], [10, 70, 13],
                                   [51, 50, 52], [13, 68, 71], [52, 71, 69],
                                   [12, 10, 13], [71, 52, 50], [71, 14, 13],
                                   [50, 49, 71], [49, 48, 71], [14, 16, 15],
                                   [14, 71, 48], [17, 19, 18], [17, 20, 19],
                                   [48, 16, 14], [48, 47, 16], [47, 46, 16],
                                   [16, 46, 45], [23, 22, 24], [21, 24, 22],
                                   [17, 16, 45], [20, 17, 45], [21, 25, 24],
                                   [27, 26, 28], [20, 72, 21], [25, 21, 72],
                                   [45, 72, 20], [25, 28, 26], [44, 73, 45],
                                   [72, 45, 73], [28, 25, 29], [29, 25, 31],
                                   [43, 73, 44], [73, 43, 40], [72, 73, 39],
                                   [72, 31, 25], [42, 40, 43], [31, 30, 29],
                                   [39, 73, 40], [42, 41, 40], [72, 33, 31],
                                   [32, 31, 33], [39, 38, 72], [33, 72, 38],
                                   [33, 38, 34], [37, 35, 38], [34, 38, 35],
                                   [35, 37, 36]])

        num_elems = len(triangles)
        elements = UnstructuredGrid(num_elems)
        elements.n1 = triangles[:, 0] - 1
        elements.n2 = triangles[:, 1] - 1
        elements.n3 = triangles[:, 2] - 1

        nodes = UnstructuredGrid(len(xy))
        nodes.lon = (xy[:, 0] | units.rad)
        nodes.lat = (xy[:, 1] | units.rad)

        grid = StaggeredGrid(elements, nodes)

        values = numpy.random.random(num_elems)
        print values
        elements.values = values

        nodes.values = grid.map_elements_to_nodes(values)
        print nodes.values

        remapped_values = grid.map_nodes_to_elements(nodes.values)
        print remapped_values

        before_sum = values.sum()
        after_sum = remapped_values.sum()

        print 'before', before_sum, 'after', after_sum

        self.assertAlmostEquals(
            after_sum,
            before_sum,
            msg="Sum of values before and after remapping should be the same")
示例#35
0
def _symbols_to_ints(symbols, unit_dict):

    ints = [unit_dict[symbol] for symbol in symbols]
    return np.asarray(ints, dtype=np.int32)
示例#36
0
import matplotlib.image as mpimg
import numpy as np
from PIL import Image
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
from sklearn.externals import joblib

path = 'Dataset'
imagePaths = [os.path.join(path, f) for f in os.listdir(path)]
print imagePaths
faces = []
IDs = []
data = []
for imagePath in imagePaths:
    if imagePath != 'Dataset/.DS_Store':
        faceImg = Image.open(imagePath).convert('L')
        faceNp = np.array(faceImg, 'uint8')
        faceNp = cv2.resize(faceNp, (40, 40))
        dataNp = faceNp.reshape((1600,))
        ID = int(os.path.split(imagePath)[-1].split('.')[1])
        faces.append(faceNp)
        IDs.append(ID)
        data.append(dataNp)
final_images = np.asarray(faces)
final_data = np.asarray(data)
n_features = final_data.shape[1]
y = np.asarray(IDs)
n_classes = 3
np.save('final_data', final_data)
np.save('final_labels', y)
示例#37
0
def main():
    parser = argparse.ArgumentParser()

    # filname of image 
    parser.add_argument('-f', action='store', dest='fname', help='Image to detect edges on.', required=True)

    # user inputs what sigma value they want to use for Gaussian kernel
    parser.add_argument('-s', action='store', dest='sigma', type=float, help='Sigma value for the Gaussian kernel.', required=True)

    # low value for hysteresis thresholding
    parser.add_argument('-L', action='store', dest='low', type=int, help='Lower end of the threshold.', required=True)

    # high value for hysteresis thresholding
    parser.add_argument('-H', action='store', dest='high', type=int, help='Higher end of the threshold.', required=True)

    # thredhold to determine if R score belongs to a corner or not
    parser.add_argument('-R', action='store', dest='rval', type=int, help='Threshold for the R scores.', required=True)

    # desired dimensions of the kernel
    parser.add_argument('-S', action='store', dest='size', type=int, default=5, help='Size of the Gaussian kernel. Default is 5x5 kernel.')

    args = parser.parse_args()

    if(args.low >= args.high):
        raise ValueError("High value threshold must be greater than low value!")

    if(args.high > 255):
        raise ValueError("High threshold cannot be greater than 255!")

    if(args.low < 0):
        raise ValueError("Low threshold cannot be lower than 0!")

    img = Image.open(args.fname)

    # need image to be gray scale for algorithm to work
    img = np.asarray( ImageOps.grayscale(img) )

    # get spatially separated kernel
    dGx, dGy = gaussian_kernel_d(args.sigma, args.size)

    # get x and y gradients
    Ix = convolve(img, dGx)
    Iy = convolve(img, dGy)

    # get the R scores for the image
    R = harris_scores(Ix, Iy, args.sigma)

    # will colour the spots that are greater than the R threshold as white
    corn = show_corners(R, img, args.rval)

    # calclate the magnitudes of the gradient at each pixel
    dG = np.sqrt(Ix**2 + Iy**2)

    # this calculates the angle of the gradient at each pixel
    angles = np.arctan2(Iy, Ix)

    # we only want the largest value along the gradient to be visible
    nm = non_max_spr(dG, angles)

    corn_sup = non_max_spr(corn, angles)

    # exacts the most important pixels based on the threshold values
    thresholding(nm, args.low, args.high)

    edgeImg = Image.fromarray(nm)

    cornerImg = Image.fromarray(corn_sup)

    # show edges
    edgeImg.show()

    # show corners
    cornerImg.show()
示例#38
0
    validation2_labels = validation2[:, 1]
    validation2 = train_scaler.transform((validation2[:, 0]).reshape(-1,1))
    test_labels = test[:, 1]
    test = train_scaler.transform((test[:, 0]).reshape(-1,1))

    # prepare sequence data and labels
    X_train, y_train = prepare_seq2seq_data(train, look_back, look_ahead)
    X_validation1, y_validation1 = prepare_seq2seq_data(validation1, look_back, look_ahead)
    X_validation2, y_validation2 = prepare_seq2seq_data(validation2, look_back, look_ahead)
    X_validation2_labels, y_validation2_labels = prepare_seq2seq_data(validation2_labels, look_back, look_ahead)
    X_test, y_test = prepare_seq2seq_data(test, look_back, look_ahead)
    X_test_labels, y_test_labels = prepare_seq2seq_data(test_labels, look_back, look_ahead)
    return train_scaler, X_train, y_train, X_validation1, y_validation1, X_validation2, y_validation2, y_validation2_labels, X_test, y_test, y_test_labels


if __name__ == "__main__":
    print 22
    arr = np.array([[1,2,3,4,5],[2,3,4,5,6],[3,4,5,6,7],[4,5,6,7,8],[5,6,7,8,9],[6,7,8,9,10],[7.1,8.1,9.1,10,11]])
    train_diagonals = get_diagonals(arr)
    # diagonals contains a reading's values calculated at different points in time


    # the top left and bottom right predictions do not contain predictions for all timesteps
    # fill the missing prediction values in diagonals. curenttly using the first predicted value for all missing timesteps
    for idx, diag in enumerate(train_diagonals):
        diag = diag.flatten()
        # missing value filled with the first value

        train_diagonals[idx] = np.hstack((diag, np.full(5 - len(diag), diag[0])))
    train_diagonals = np.asarray(train_diagonals)
    print train_diagonals
示例#39
0
 def __call__(self, a):
     self.variables = (a,)
     self.back = np.asarray(a > 0, dtype=a.dtype)
     return a.data * self.back
示例#40
0
	model.load_state_dict(torch.load(best_path))

	data_loader_test = DataLoader(TID_test,batch_size=16,num_workers=2)
	batch_test_losses = []
	model.eval()
	i=0
	mos_idx = torch.arange(10)
	mos_op_list = []
	mos_gt_list = []
	mos_idx = mos_idx.to(device)
	for i,data in enumerate(data_loader_test):
		images = data['img'].to(device)
		labels = data['label'].to(device)
		with torch.no_grad():
			outputs = model(images)
		outputs = outputs.view(-1, 10)
		test_loss = emd_loss(labels, outputs)
		mos_op = torch.mean(outputs*mos_idx,1)
		mos_gt = torch.mean(labels*mos_idx,1)
		mos_op_list.extend(mos_op.cpu().detach().numpy())
		mos_gt_list.extend(mos_gt.cpu().detach().numpy())
		batch_test_losses.append((test_loss.item())*(images.size()[0]))
	avg_test_loss = sum(batch_test_losses)/len(TID_test)
	print('Test EMD loss is {:.4f}'.format(avg_test_loss))
	mos_op_list = np.asarray(mos_op_list)
	mos_gt_list = np.asarray(mos_gt_list)
	r,_= scipy.stats.spearmanr(mos_op_list, mos_gt_list, axis=0)
	num = mos_gt_list.shape[0]
	upper,lower = srcc_conf_bounds(r,num)
	print(r)
	print("SRCC  95 % confidence interval, lower {} upper {}".format(lower, upper))
示例#41
0
def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any:
    """
    Check if `indexer` is a valid array indexer for `array`.

    For a boolean mask, `array` and `indexer` are checked to have the same
    length. The dtype is validated, and if it is an integer or boolean
    ExtensionArray, it is checked if there are missing values present, and
    it is converted to the appropriate numpy array. Other dtypes will raise
    an error.

    Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed
    through as is.

    .. versionadded:: 1.0.0

    Parameters
    ----------
    array : array-like
        The array that is being indexed (only used for the length).
    indexer : array-like or list-like
        The array-like that's used to index. List-like input that is not yet
        a numpy array or an ExtensionArray is converted to one. Other input
        types are passed through as is.

    Returns
    -------
    numpy.ndarray
        The validated indexer as a numpy array that can be used to index.

    Raises
    ------
    IndexError
        When the lengths don't match.
    ValueError
        When `indexer` cannot be converted to a numpy ndarray to index
        (e.g. presence of missing values).

    See Also
    --------
    api.types.is_bool_dtype : Check if `key` is of boolean dtype.

    Examples
    --------
    When checking a boolean mask, a boolean ndarray is returned when the
    arguments are all valid.

    >>> mask = pd.array([True, False])
    >>> arr = pd.array([1, 2])
    >>> pd.api.indexers.check_array_indexer(arr, mask)
    array([ True, False])

    An IndexError is raised when the lengths don't match.

    >>> mask = pd.array([True, False, True])
    >>> pd.api.indexers.check_array_indexer(arr, mask)
    Traceback (most recent call last):
    ...
    IndexError: Boolean index has wrong length: 3 instead of 2.

    NA values in a boolean array are treated as False.

    >>> mask = pd.array([True, pd.NA])
    >>> pd.api.indexers.check_array_indexer(arr, mask)
    array([ True, False])

    A numpy boolean mask will get passed through (if the length is correct):

    >>> mask = np.array([True, False])
    >>> pd.api.indexers.check_array_indexer(arr, mask)
    array([ True, False])

    Similarly for integer indexers, an integer ndarray is returned when it is
    a valid indexer, otherwise an error is  (for integer indexers, a matching
    length is not required):

    >>> indexer = pd.array([0, 2], dtype="Int64")
    >>> arr = pd.array([1, 2, 3])
    >>> pd.api.indexers.check_array_indexer(arr, indexer)
    array([0, 2])

    >>> indexer = pd.array([0, pd.NA], dtype="Int64")
    >>> pd.api.indexers.check_array_indexer(arr, indexer)
    Traceback (most recent call last):
    ...
    ValueError: Cannot index with an integer indexer containing NA values

    For non-integer/boolean dtypes, an appropriate error is raised:

    >>> indexer = np.array([0., 2.], dtype="float64")
    >>> pd.api.indexers.check_array_indexer(arr, indexer)
    Traceback (most recent call last):
    ...
    IndexError: arrays used as indices must be of integer or boolean type
    """
    from pandas.core.construction import array as pd_array

    # whatever is not an array-like is returned as-is (possible valid array
    # indexers that are not array-like: integer, slice, Ellipsis, None)
    # In this context, tuples are not considered as array-like, as they have
    # a specific meaning in indexing (multi-dimensional indexing)
    if is_list_like(indexer):
        if isinstance(indexer, tuple):
            return indexer
    else:
        return indexer

    # convert list-likes to array
    if not is_array_like(indexer):
        indexer = pd_array(indexer)
        if len(indexer) == 0:
            # empty list is converted to float array by pd.array
            indexer = np.array([], dtype=np.intp)

    dtype = indexer.dtype
    if is_bool_dtype(dtype):
        if is_extension_array_dtype(dtype):
            indexer = indexer.to_numpy(dtype=bool, na_value=False)
        else:
            indexer = np.asarray(indexer, dtype=bool)

        # GH26658
        if len(indexer) != len(array):
            raise IndexError(
                f"Boolean index has wrong length: "
                f"{len(indexer)} instead of {len(array)}"
            )
    elif is_integer_dtype(dtype):
        try:
            indexer = np.asarray(indexer, dtype=np.intp)
        except ValueError as err:
            raise ValueError(
                "Cannot index with an integer indexer containing NA values"
            ) from err
    else:
        raise IndexError("arrays used as indices must be of integer or boolean type")

    return indexer
示例#42
0
def naive_bayes_multiclass(samples_file, outfile, mkplots=False):
    """Naive Bayes training function for two or more classes from sampled pixel RGB values
    
    Inputs:
    samples_file = Input text file containing sampled pixel RGB values for each training class. The file should be a
                   tab-delimited table with one training class per column. The required first row must contain header
                   labels for each class. The row values for each class must be comma-delimited RGB values. See the
                   file plantcv/tests/data/sampled_rgb_points.txt for an example.
    outfile      = Name of the output text file that will store the color channel probability density functions.
    mkplots      = Make PDF plots (True or False).
    
    :param samples_file: str
    :param outfile: str
    :param mkplots: bool
    """
    # Initialize a dictionary to store sampled RGB pixel values for each input class
    sample_points = {}
    # Open the sampled points text file
    f = open(samples_file, "r")
    # Read the first line and use the column headers as class labels
    header = f.readline()
    header = header.rstrip("\n")
    class_list = header.split("\t")
    # Initialize a dictionary for the red, green, and blue channels for each class
    for cls in class_list:
        sample_points[cls] = {"red": [], "green": [], "blue": []}
    # Loop over the rest of the data in the input file
    for row in f:
        # Remove newlines and quotes
        row = row.rstrip("\n")
        row = row.replace('"', '')
        # If this is not a blank line, parse the data
        if len(row) > 0:
            # Split the row into a list of points per class
            points = row.split("\t")
            # For each point per class
            for i, point in enumerate(points):
                if len(point) > 0:
                    # Split the point into red, green, and blue integer values
                    red, green, blue = map(int, point.split(","))
                    # Append each intensity value into the appropriate class list
                    sample_points[class_list[i]]["red"].append(red)
                    sample_points[class_list[i]]["green"].append(green)
                    sample_points[class_list[i]]["blue"].append(blue)
    f.close()
    # Initialize a dictionary to store probability density functions per color channel in HSV colorspace
    pdfs = {"hue": {}, "saturation": {}, "value": {}}
    # For each class
    for cls in class_list:
        # Create a blue, green, red-formatted image ndarray with the class RGB values
        bgr_img = cv2.merge((np.asarray(sample_points[cls]["blue"], dtype=np.uint8),
                             np.asarray(sample_points[cls]["green"], dtype=np.uint8),
                             np.asarray(sample_points[cls]["red"], dtype=np.uint8)))
        # Convert the BGR ndarray to an HSV ndarray
        hsv_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2HSV)
        # Split the HSV ndarray into the component HSV channels
        hue, saturation, value = cv2.split(hsv_img)
        # Create an HSV channel dictionary that stores the channels as lists (horizontally stacked ndarrays)
        channels = {"hue": np.hstack(hue), "saturation": np.hstack(saturation), "value": np.hstack(value)}
        # For each channel
        for channel in channels.keys():
            # Create a kernel density estimator for the channel values (Gaussian kernel)
            kde = stats.gaussian_kde(channels[channel])
            # Use the KDE to calculate a probability density function for the channel
            # Sample at each of the possible 8-bit values
            pdfs[channel][cls] = kde(range(0, 256))
    if mkplots:
        # If mkplots is True, generate a density curve plot per channel for each class
        for channel, cls in pdfs.items():
            _plot_pdf(channel, os.path.dirname(outfile), **cls)
    # Write the PDFs to a text file
    out = open(outfile, "w")
    # Write the column labels
    out.write("class\tchannel\t" + "\t".join(map(str, range(0, 256))) + "\n")
    # For each channel
    for channel, cls in pdfs.items():
        # For each class
        for class_name, pdf in cls.items():
            # Each row is the PDF for the given class and color channel
            out.write(class_name + "\t" + channel + "\t" + "\t".join(map(str, pdf)) + "\n")
示例#43
0
def dsm_gen(dsms):
    """Generator for DSMs"""
    for dsm in dsms:
        yield np.asarray(dsm)
示例#44
0
    def hotspot2sine(self, element, x_size, y_size, hs_group, posMat, data, thetas):
        '''
        aligns projections to a sine curve based on hotspot information

        Variables
        -----------
        element: int
            element index
        x_size: int
            ROI pixel dimension in x
        y_size: int
            ROI pixel dimension in y
        hs_group: int
            hotspot group number
        posMat: ndarray
            position matrix. 2
        data: ndarray
            4D xrf dataset ndarray [elements, theta, y,x]
        thetas: ndarray
            sorted projection angle list
        '''
        self.posMat = posMat
        self.posMat[0] = posMat[0] + x_size//2
        self.posMat[1] = posMat[1] + y_size//2

        hs_x_pos, hs_y_pos, firstPosOfHotSpot, hotSpotX, hotSpotY, data = self.alignment_parameters(element, x_size, y_size, hs_group, self.posMat, data)
#****************
        num_projections = data.shape[1]
        y_shifts = np.zeros(num_projections)
        x_shifts = np.zeros(num_projections)
        thetas  = np.asarray(thetas)
        for j in range(num_projections):

            if hs_x_pos[j] != 0 and hs_y_pos[j] != 0:
                xxshift = int(round(x_size//2 - hotSpotX[j]))
                yyshift = int(round(y_size//2 - hotSpotY[j]))
            if hs_x_pos[j] == 0:
                xxshift = 0
            if hs_y_pos[j] == 0:
                yyshift = 0

            x_shifts[j] = xxshift
            y_shifts[j] = yyshift

        hotspotXPos = np.zeros(num_projections, dtype=np.int)
        hotspotYPos = np.zeros(num_projections, dtype=np.int)
        for i in range(num_projections):
            hotspotYPos[i] = int(round(hs_y_pos[i]))
            hotspotXPos[i] = int(round(hs_x_pos[i]))
        hotspotProj = np.where(hotspotXPos != 0)[0]

        theta_tmp = thetas[hotspotProj]
        com = hotspotXPos[hotspotProj]

        if hs_group == 0:
            self.fitCenterOfMass(com, x=theta_tmp)
        else:
            self.fitCenterOfMass2(com, self.centers, x=theta_tmp)
        self.alignCenterOfMass2(hotspotProj, data)

        ## yfit
        for i in hotspotProj:
            y_shifts[i] = int(hotspotYPos[hotspotProj[0]]) - int(hotspotYPos[i])
            # data[:, i] = np.roll(data[:, i], y_shifts[i], axis=1)
            data = self.shiftProjection(data, 0,y_shifts[i],i)

        #update reconstruction slider value
        # self.recon.sld.setValue(self.centers[2])

        print("align done")
        self.centers = list(np.round(self.centers))
        return data, x_shifts, y_shifts
示例#45
0
def deepLPI_train(dataset, dataset_str, lncRNA_len, mRNA_len, lncRNA_struct_len, mRNA_struct_len):
    train_bags = dataset['train']
    test_bags = dataset['test']

    train_mRNA_bags = dataset['train_mRNA']
    test_mRNA_bags = dataset['test_mRNA']
    train_lncRNA_bags = dataset['train_lncRNA']
    test_lncRNA_bags = dataset['test_lncRNA']

    train_bags_nm = dataset['train_bags_nm']
    train_ins_nm = dataset['train_ins_nm']
    test_bags_nm = dataset['test_bags_nm']
    test_ins_nm = dataset['test_ins_nm']


    train_bags_str = dataset_str['train']
    test_bags_str = dataset_str['test']

    train_mRNA_bags_str = dataset_str['train_mRNA']
    test_mRNA_bags_str = dataset_str['test_mRNA']
    train_lncRNA_bags_str = dataset_str['train_lncRNA']
    test_lncRNA_bags_str = dataset_str['test_lncRNA']

    train_bags_nm_str = dataset_str['train_bags_nm']
    train_ins_nm_str = dataset_str['train_ins_nm']
    test_bags_nm_str = dataset_str['test_bags_nm']
    test_ins_nm_str = dataset_str['test_ins_nm']

    # convert bag to batch
    train_mRNA_set = convertToBatch(train_mRNA_bags)
    test_mRNA_set = convertToBatch(test_mRNA_bags)
    train_lncRNA_set = convertToBatch(train_lncRNA_bags)
    test_lncRNA_set = convertToBatch(test_lncRNA_bags)
    train_set = convertToBatch(train_bags)
    test_set = convertToBatch(test_bags)
    dimension = train_set[0][0].shape[0]

    train_mRNA_set_str = convertToBatch(train_mRNA_bags_str)
    test_mRNA_set_str = convertToBatch(test_mRNA_bags_str)
    train_lncRNA_set_str = convertToBatch(train_lncRNA_bags_str)
    test_lncRNA_set_str = convertToBatch(test_lncRNA_bags_str)
    train_set_str = convertToBatch(train_bags_str)
    test_set_str = convertToBatch(test_bags_str)
    dimension_str = train_set_str[0][0].shape[0]

    model = model_func(lncRNA_len, mRNA_len, lncRNA_struct_len, mRNA_struct_len)

    # train model
    t1 = time.time()
    num_batch = len(train_set)
    all_auc=[]
    all_auprc=[]
    iso_expr_data_all=get_expr_data("./dataset/isoform_expression_data.txt")
    lnc_expr_data_all=get_expr_data("./dataset/lncRNA_expression_data.txt")
    lncRNA_feature_colum=188 #small dataset    
    for epoch in range(args.max_epoch):
        #Training
        initial_score_all = np.array([])
        crf_bag_index=[]
        y_all=np.array([])
        lnc_expr_data=[]
        iso_expr_data=[]
        num_train_batch = len(train_set)
        train_loss = np.zeros((num_train_batch, 1), dtype=float)
        train_acc = np.zeros((num_train_batch, 1), dtype=float)
        for ibatch, batch in enumerate(train_mRNA_set):
            if train_set[ibatch][0].shape[0]!=train_set_str[ibatch][0].shape[0]:
                continue
            y_all=np.hstack((y_all, train_mRNA_set[ibatch][1]))
            initial_score_all_ = model.predict_on_batch([train_lncRNA_set[ibatch][0], train_mRNA_set[ibatch][0], train_lncRNA_set_str[ibatch][0], train_mRNA_set_str[ibatch][0]])
            initial_score_all = np.hstack((initial_score_all, np.transpose(initial_score_all_)[0]))
            i=0
            for i in range(train_mRNA_set[ibatch][0].shape[0]):
                crf_bag_index.append(ibatch)
            ibag_name=train_bags_nm[ibatch].encode('ascii','ignore').strip()
            ibag_name.replace("'","-")
            if len(ibag_name.split('-'))>2:
                lncRNA_name=ibag_name.split('-')[0]+'-'+ibag_name.split('-')[1]
            else:
                lncRNA_name=ibag_name.split('-')[0]
            for ins in train_ins_nm[ibatch]:
                if lncRNA_name in lnc_expr_data_all:
                    lnc_expr_data.append(lnc_expr_data_all[lncRNA_name])
                else:
                    lnc_expr_data.append([0] * lncRNA_feature_colum)
                iso_expr_data.append(iso_expr_data_all[ins.encode('ascii','ignore').strip()])#nicodedata.normalize("NFKD", ins)])

        y_all=np.asarray(y_all, dtype=np.int)

        #WGCNA for isoform expression data
        iso_expr_data=np.asarray(iso_expr_data)
        co_exp_net=np.corrcoef(iso_expr_data)
        # Set nan to be zero
        nan_where = np.isnan(co_exp_net)
        co_exp_net[nan_where] = 0
        # Diagnal to be zero
        for ii in range(co_exp_net.shape[0]):
            co_exp_net[ii, ii] = 0
        # Apply soft threshold
        co_exp_net = np.fabs(co_exp_net)
        co_exp_net = pow(co_exp_net, 6)
        co_exp_net_isoform=co_exp_net

        #WGCNA for lncRNA expression data
        lnc_expr_data=np.asarray(lnc_expr_data)
        lnc_co_exp_net=np.corrcoef(lnc_expr_data)
        # Set nan to be zero
        lnc_nan_where = np.isnan(lnc_co_exp_net)
        lnc_co_exp_net[lnc_nan_where] = 0
        # Diagnal to be zero
        for ii in range(lnc_co_exp_net.shape[0]):
            lnc_co_exp_net[ii, ii] = 0
        # Apply soft threshold
        lnc_co_exp_net = np.fabs(lnc_co_exp_net)
        lnc_co_exp_net = pow(lnc_co_exp_net, 6)
        co_exp_net_lncRNA=lnc_co_exp_net

        crf_bag_index=np.asarray(crf_bag_index)
        K_training_size=y_all.shape[0]
        K_testing_size=0
        theta = np.array([1.0, 1.0])
        new_label, theta, pos_prob_crf, unary_potential, pairwise_potential = run_crf(epoch, initial_score_all, y_all, crf_bag_index, co_exp_net_isoform, co_exp_net_lncRNA, K_training_size, K_testing_size, theta, sigma=0.1)
        if epoch > 0:
            s_index=0
            updated_train_label=[]
            for ibatch, batch in enumerate(train_mRNA_set):
                e_index=s_index+train_lncRNA_set[ibatch][1].shape[0]
                updated_train_label.append((train_lncRNA_set[ibatch][0], np.asarray(new_label[s_index:e_index])))
                s_index=e_index
            train_lncRNA_set=updated_train_label
        for ibatch, batch in enumerate(train_mRNA_set):
            if train_set[ibatch][0].shape[0]!=train_set_str[ibatch][0].shape[0] : continue
            if train_set[ibatch][0].shape[0]!=train_lncRNA_set[ibatch][1].shape[0]: continue
            result = model.train_on_batch([train_lncRNA_set[ibatch][0], train_mRNA_set[ibatch][0], train_lncRNA_set_str[ibatch][0], train_mRNA_set_str[ibatch][0]], train_lncRNA_set[ibatch][1])
            train_loss[ibatch] = result[0]
            train_acc[ibatch] = result[1]
            model, mean_train_loss, mean_train_acc = model, np.mean(train_loss), np.mean(train_acc)
    
    return model
import pickle
import numpy as np
# db = pickle.load(open('bert_fine_tune.p', 'rb'))
from utils import Config, safe_pickle_dump, strip_version
db = pickle.load(open(Config.db_path, 'rb'))
orig = pickle.load(open('elmo_embed.p', 'rb'))
# db = pickle.load(open('bert_out.p', 'rb'))
# print(len(db))
# X = np.array(list(db.values()))
# normalization
X = orig / np.linalg.norm(orig, axis=1, keepdims=1)
# print(X.shape)
pids = list(db.keys())
# B = N
ds = -np.asarray(np.dot(X, X.T))  #NxD * DxB => NxB
# print(ds[0][0])
IX = np.argsort(ds, axis=0)  # NxB
# pid = '1407.2515'
# pid = '1904.05856'
# pid = '1904.07460'
# ID = pids.index(pid)
# print(IX.shape)
ARXIV_PATH = 'https://arxiv.org/abs/'
# print(ARXIV_PATH + pids[ID])
# print(orig[ID])
# for i in range(0,6):
#     # print(IX[ID][i])
#     # print(orig[IX[i][ID]])
#     # print(1+ds[ID][IX[i][ID]], end=' ')
#     sim_pid = pids[IX[i][ID]]
#     print(ARXIV_PATH + sim_pid)
示例#47
0
                    
                    mindata1 = halfmin[str(table.stock1[pair])][32:332].values
                    mindata2 = halfmin[str(table.stock2[pair])][32:332].values

                    # print(mindata1)
                    # print(mindata2)
                    mindata1 = preprocessing.scale(mindata1)
                    mindata2 = preprocessing.scale(mindata2)
                    
                            
                    new_spread[1,106:406] = mindata1
                    new_spread[2,106:406] = mindata2

                    whole_day.append(new_spread)

                whole_day = np.asarray(whole_day)

                whole_day = torch.FloatTensor(whole_day)
                output = model(whole_day)

                if table.shape[0] == 1:
                    _, predicted = torch.max(output,0)
                else:
                    _, predicted = torch.max(output,1)
                action_choose = predicted.cpu().numpy()
                action_choose = action_choose.tolist()
                table["action"] = pd.Series(action_choose)



matData = scipy.io.loadmat('rat3_all.mat')
matrixA = matData['EEGandEMG']
matrixB = matData['labels'] 
matrixRowSize = matrixA.shape[1]

A = matrixA
B = matrixB
A = A.T
B = B.T
train_len = 10000
x_train = A[0:train_len,:]
y_train = B[0:train_len,:]
x_test = A[train_len:matrixRowSize,:]
y_test = B[train_len:matrixRowSize,:]

x_train = np.asarray(x_train).flatten()
y_train = np.asarray(y_train).flatten()
x_train = x_train.reshape(train_len, 1, 4000)
y_train = y_train.reshape(train_len, 1, 6)
x_test = np.asarray(x_test).flatten()
x_test = x_test.reshape(matrixRowSize-train_len, 1, 4000)
y_test =  y_test.reshape(matrixRowSize-train_len, 1, 6)

from keras.layers import Dense, Dropout, Activation, Flatten, Reshape, LSTM, regularizers
from keras.optimizers import SGD as SGD
from keras.optimizers import Adam

print('LSTMaspiration2')
model = Sequential()
model.add(LSTM(6, return_sequences = True, input_shape = (1, 4000), activation = 'softsign'))
model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
name_array = []
mean_array = []
sd_array = []
median_array = []
diff = False
scal = False

#looking through each directory
for dir in directoryname_list:
    print("First argument is", sys.argv[1])

    #change directory to each directory in argument
    os.chdir(dir)

    #create a list of all the led fits files we are interested in
    initial_img_list = np.asarray(glob.glob("*exp_"+exposure_time+"_t00*_940nm_led.*.1.1.fits")) 
    final_img_list = np.asarray(glob.glob("*exp_"+exposure_time+"_t00*_940nm_led.*.1.103.fits"))
    
    print initial_img_list
    print final_img_list

    
    name = initial_img_list[0].split('.')[0]
    initial_list = name+"_initial.lis"
    final_list = name+"_final.lis"
    sub_list = name+"_sub_image.lis"
    gain_sub_list = name+"_gain_sub_image.lis"
    
    np.savetxt(initial_list,initial_img_list,fmt='%s')
    np.savetxt(name+"_final.lis",final_img_list,fmt='%s')
    os.system("awk '{gsub(/.fits/,\".diff.fits\");print$0}' "+initial_list+" > "+sub_list)
示例#50
0
    other_outputs = []

    with torch.no_grad():
        for f in tqdm(listdir(test_dir)):
            if '.png' in f:
                img = cv2.imread(path.join(test_dir, f), cv2.IMREAD_COLOR)
                img2 = cv2.imread(path.join(test_dir2, f), cv2.IMREAD_COLOR)
                img3 = cv2.imread(path.join(test_dir3, f), cv2.IMREAD_COLOR)
                img = np.concatenate([img, img2, img3], axis=2)
                img = cv2.copyMakeBorder(img, 14, 14, 14, 14,
                                         cv2.BORDER_REFLECT_101)

                inp = []
                inp.append(img)
                inp = np.asarray(inp, dtype='float')

                inp = preprocess_inputs(inp)

                inp = torch.from_numpy(inp.transpose((0, 3, 1, 2))).float()

                inp = Variable(inp).cuda()

                nadir, cat_inp, coord_inp = parse_img_id(f)
                nadir = torch.from_numpy(np.asarray([nadir / 60.0
                                                     ]).copy()).float()
                cat_inp = torch.from_numpy(cat_inp.copy()[np.newaxis,
                                                          ...]).float()
                coord_inp = torch.from_numpy(coord_inp.copy()[np.newaxis,
                                                              ...]).float()
W = [0]
W = W + [np.deg2rad(angle) for angle in range(-180, 180, 5)]  # heading/angular velocity
# print('actions:', len(W))

stepSize = 0.5  # velocity
tau = 0.1  # discrertization
U = [0]
U = U + [stepSize] * (len(W) - 1)
# print('vel:', len(U))

control_in = []
for i in range(len(U)):
    control_in.append([W[i], U[i]])

control_in = np.asarray(
    control_in)  # convention: the first control action should correspond to "null" action (stay idle)

# obstacle_space = [[2, 8], [35, 15], [5, 65], [95, 2]]  # points occupied by obstacles


############################################------ Main Algorithm------- #############################

count = 0
replan_step = 10 # how often to replan the trajectory
robot_pose = p_init
robot_ori  = theta_init
actual_path = []
future_step = 20


# initiate the ros node for publishing trajectory msg
MAX_NUM_WORDS = 2000
NUM_CATEGORIES = dp.categories_size()

# ----------------------
# prepare dataset:
# ----------------------
tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
tokenizer.fit_on_texts(dp.fetch_dataset_train().data)
word_index = tokenizer.word_index
# integer encode documents
X_train = tokenizer.texts_to_matrix(dp.fetch_dataset_train().data,
                                    mode='tfidf')
X_test = tokenizer.texts_to_matrix(dp.fetch_dataset_test().data, mode='tfidf')

y_train = dp.fetch_dataset_train().target
y_train = to_categorical(np.asarray(y_train))
y_test = dp.fetch_dataset_test().target
y_test = to_categorical(np.asarray(y_test))

keras_model = Sequential()
keras_model.add(Embedding(MAX_NUM_WORDS, 512, input_length=MAX_NUM_WORDS))
keras_model.add(Dense(64, activation='relu'))
keras_model.add(Flatten())
keras_model.add(Dense(NUM_CATEGORIES, activation='softmax'))
keras_model.compile(loss='categorical_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

print(keras_model.summary())
keras_model.fit(X_train, y_train, epochs=10, batch_size=128)
loss, accuracy = keras_model.evaluate(X_test, y_test, batch_size=128)
示例#53
0
    img = cv2.resize(img, dsize=None, fx=f, fy=f)  # 同比例缩放
    data = np.zeros([max_size, max_size, 3])
    w_new, h_new, c = img.shape
    s_x = int((max_size - w_new) / 2)
    s_y = int((max_size - h_new) / 2)
    data[s_x:s_x + w_new, s_y:s_y + h_new, :] = img
    return data

if __name__== "__main__":



    labels = {'NILM': 0, 'HSIL': 1}
    img = cv2.imread('./2017-07-24-16_51_57_0.png')
    img = size_process(img, 224)
    data = img[:, :, ::-1]
    data = np.asarray(data, dtype=np.float32)
    data = data_process(data)
    data = np.expand_dims(data, axis=0)

    fr = open('./checkpoint_dir/weights.pkl', 'rb')
    weights = pickle.load(fr)
    with tf.Session() as sess:
        model = ResNet50_fpn(sess=sess, inputs=(None, 224, 224, 3), blocks=[3, 4, 6, 3], weights=weights)
        init = tf.global_variables_initializer()
        sess.run(init)
        print(model.var_list)
        saver = tf.train.Saver(var_list=model.var_list)
        save_path = saver.save(sess, "./checkpoint_dir/MyModel")
        output = model.predict(data)
        print(output)
示例#54
0
    def find_nearest(centers, point):
        centers = np.asarray(centers)
        # for center in centers:

        idx = np.array((centers - point) ** 2).sum(axis=1).argmin()
        return centers[idx]
    def sampleZ(self, NUMBER_OF_MCMC_SAMPLES_TOTAL):

        invEst = numpy.linalg.inv(self.X.transpose() @ self.X +
                                  1.0 * numpy.eye(self.p))
        ridgeBetaEst = (invEst @ self.X.transpose()) @ self.y

        z = numpy.zeros(self.p, dtype=numpy.int)
        z[numpy.absolute(ridgeBetaEst) > self.delta] = 1

        beta = ridgeBetaEst

        # get a sparse initial solution in order to ensure faster convergence
        maxNrInitialSelectedVars = int(self.p * 0.01)
        if maxNrInitialSelectedVars > 0 and numpy.sum(
                z) > maxNrInitialSelectedVars:
            largestIds = numpy.argsort(
                -numpy.absolute(ridgeBetaEst))[0:maxNrInitialSelectedVars]
            z = numpy.zeros(self.p, dtype=numpy.int)
            z[largestIds] = 1

        beta[z == 0] = 0

        sigmaSquareR = numpy.mean(numpy.square(self.y - self.X @ beta))

        print("beta = ")
        print(beta)

        print("sigmaSquareR = ")
        print(sigmaSquareR)

        print("z = ")
        print(z)

        BURN_IN_SAMPLES = int(0.1 * NUMBER_OF_MCMC_SAMPLES_TOTAL)
        assert (BURN_IN_SAMPLES >= 1)
        NUMBER_OF_MCMC_SAMPLES_USED = NUMBER_OF_MCMC_SAMPLES_TOTAL - BURN_IN_SAMPLES

        print("BURN_IN_SAMPLES = ", BURN_IN_SAMPLES)
        print("NUMBER_OF_MCMC_SAMPLES_USED = ", NUMBER_OF_MCMC_SAMPLES_USED)

        posteriorAssignments = numpy.zeros(
            (NUMBER_OF_MCMC_SAMPLES_USED, self.p))
        averagePosteriorBeta = numpy.zeros(self.p)
        averageSigmaSquareR = 0.0

        spikeAndSlabVar = numpy.asarray([self.sigmaSquare0, self.etaSquare1])

        print("spikeAndSlabVar = ", spikeAndSlabVar)

        for mcmcIt in range(NUMBER_OF_MCMC_SAMPLES_TOTAL):
            print("mcmcIt = ", mcmcIt)

            # if self.delta > 0:
            for j in range(self.p):
                # sample p(z_j | beta, z_-j, y, sigmaSquareR, X)
                z[j] = self.sampleZjConditionedOnRest(sigmaSquareR,
                                                      spikeAndSlabVar, beta, z,
                                                      j)

                # sample p(beta_j | beta_-j, z, y, sigmaSquareR, X)
                meanTilde, sigmaSquareTilde, _ = self.getMeanAndVarOfBetaConditional(
                    sigmaSquareR, spikeAndSlabVar, beta, z, j)
                beta[j] = scipy.stats.norm.rvs(
                    loc=meanTilde, scale=numpy.sqrt(sigmaSquareTilde))

            if self.delta == 0:
                # safety check for delta == 0
                assert (numpy.all(beta[z == 0] == 0)
                        and numpy.all(beta[z == 1] != 0))

            # sample p(sigmaSquareR | beta, z, y, X)
            etaSquareForsigmaSquareR = (
                SpikeAndSlabProposedModelSearch.NU_R *
                SpikeAndSlabProposedModelSearch.ETA_SQUARE_R +
                numpy.sum(numpy.square(self.y - numpy.matmul(self.X, beta)))
            ) / (SpikeAndSlabProposedModelSearch.NU_R + self.n)
            sigmaSquareR = samplingHelper.getScaledInvChiSquareSample(
                nu=SpikeAndSlabProposedModelSearch.NU_R + self.n,
                etaSquare=etaSquareForsigmaSquareR,
                numberOfSamples=1)[0]

            # sample p(sigmaSquare_0 | beta, z, y, X) and p(sigmaSquare_1 | beta, z, y, X)
            spikeAndSlabVar[1] = self.sampleSigmaSquareConditional(beta, z)

            print("slab variance = ", spikeAndSlabVar[1])

            if mcmcIt >= BURN_IN_SAMPLES:
                posteriorAssignments[mcmcIt - BURN_IN_SAMPLES] = z
                averagePosteriorBeta += beta
                averageSigmaSquareR += sigmaSquareR

        averagePosteriorBeta = averagePosteriorBeta / float(
            NUMBER_OF_MCMC_SAMPLES_USED)
        averageSigmaSquareR = averageSigmaSquareR / float(
            NUMBER_OF_MCMC_SAMPLES_USED)

        # print("posteriorAssignments = ")
        # print(posteriorAssignments)

        # print("averagePosteriorBeta = ")
        # print(averagePosteriorBeta)

        countAssignments = defaultdict(lambda: 0)
        for mcmcIt in range(NUMBER_OF_MCMC_SAMPLES_USED):
            nonZeroPos = numpy.where(posteriorAssignments[mcmcIt] != 0)[0]
            nonZeroPosAsStr = [str(num) for num in nonZeroPos]
            nonZeroPosAsStr = " ".join(nonZeroPosAsStr)
            countAssignments[nonZeroPosAsStr] += 1

        sortedAssignmentsByFrequency = sorted(countAssignments.items(),
                                              key=lambda kv: kv[1],
                                              reverse=True)
        print("sortedAssignmentsByFrequency = ")
        print(sortedAssignmentsByFrequency)

        mostFrequentAssignment = showResultsText.getNumpyArray(
            sortedAssignmentsByFrequency[0][0])
        # print("mostFrequentAssignment = ", mostFrequentAssignment)

        # see "Optimal predictive model selection", 2004
        assignmentProbs = numpy.mean(posteriorAssignments, axis=0)
        medianProbabilityModel = numpy.where(assignmentProbs > 0.5)[0]
        # print("assignmentProbs = ", assignmentProbs)
        # print("medianProbabilityModel = ", medianProbabilityModel)

        return mostFrequentAssignment, medianProbabilityModel, assignmentProbs, averagePosteriorBeta, averageSigmaSquareR, sortedAssignmentsByFrequency
def main():
	save_plots = False

	### Set parameters ###
	nugget = 1.e-10
	all_n_clusters = [1,2]
	corr_kernel = 'exponential_periodic'
	GCP_mapWithNoise= False
	sampling_model = 'GCP'
	integratedPrediction = False
	coef_latent_mapping = 0.1
	prediction_size = 1000

	### Set parameters ###
	parameter_bounds = np.asarray( [[0,400]] )
	training_size = 40

	if (save_plots):
		if not os.path.exists('data_UCB'):
			os.mkdir('data_UCB')

	abs = np.atleast_2d(range(0,400)).T
	f_plot = [scoring_function(i) for i in abs[:,0]]

	x_training = []
	y_training = []
	for i in range(training_size):
		x = np.random.uniform(0,400)
		x_training.append(x)
		y_training.append(scoring_function(x))
	x_training = np.atleast_2d(x_training).T

	candidates = []
	real_y = []
	for i in range(prediction_size):
		x = [np.random.uniform(0,400)]
		candidates.append(x)
		real_y.append(scoring_function(x[0]))
	real_y = np.asarray(real_y)
	candidates = np.asarray(candidates)

	count = -1
	fig = plt.figure()

	for n_clusters in all_n_clusters:

		count += 2
		ax = fig.add_subplot(len(all_n_clusters),2,count)
		ax.set_title("GCP prediction")

		gcp = GaussianCopulaProcess(nugget = nugget,
									corr = corr_kernel,
									random_start = 5,
									n_clusters = n_clusters,
		                            coef_latent_mapping = coef_latent_mapping,
								 	mapWithNoise = GCP_mapWithNoise,
					 				useAllNoisyY = False,
					 				model_noise = None,
									try_optimize = True)
		gcp.fit(x_training,y_training)

		print '\nGCP fitted'
		print 'Likelihood', np.exp(gcp.reduced_likelihood_function_value_)

		predictions,MSE,boundL,boundU = \
							gcp.predict(candidates,
										eval_MSE=True,
										eval_confidence_bounds=True,
										coef_bound = 1.96,
										integratedPrediction=integratedPrediction)

		pred_error = np.mean( (predictions - np.asarray(real_y) ) **2. )
		print 'SMSE', pred_error / (np.std(real_y) **2.)

		idx = np.argsort(candidates[:,0])
		s_candidates = candidates[idx,0]
		s_boundL = boundL[idx]
		s_boundU = boundU[idx]

		pred,MSE_bis = gcp.predict(np.atleast_2d(s_candidates).T,
								   eval_MSE=True,
								   transformY=False,
								   eval_confidence_bounds=False,
								   coef_bound = 1.96)

		gp_boundL = pred - 1.96*np.sqrt(MSE_bis)
		gp_boundU = pred + 1.96*np.sqrt(MSE_bis)
		t_f_plot =  [gcp.mapping(abs[i],f_plot[i],normalize=True) for i in range(len(f_plot))]
		t_y_training =  [gcp.mapping(x_training[i],y_training[i],normalize=True) for i in range(len(y_training))]

		if(save_plots):
			save_data = np.asarray([s_candidates,boundL,boundU,predictions,f_plot]).T
			np.savetxt('data_UCB/data_plot.csv',save_data,delimiter=',')

		ax.plot(abs,f_plot)
		l1, = ax.plot(candidates,predictions,'r+',label='GCP predictions')
		l3, = ax.plot(x_training,y_training,'bo',label='Training points')
		ax.fill(np.concatenate([s_candidates,s_candidates[::-1]]),np.concatenate([s_boundL,s_boundU[::-1]]),alpha=.5, fc='c', ec='None')


		ax = fig.add_subplot(len(all_n_clusters),2,count+1)
		ax.set_title('GP space')
		ax.plot(abs,t_f_plot)
		ax.plot(s_candidates,pred,'r+',label='GCP predictions')
		ax.plot(x_training,t_y_training,'bo',label='Training points')
		ax.fill(np.concatenate([s_candidates,s_candidates[::-1]]),np.concatenate([gp_boundL,gp_boundU[::-1]]),alpha=.5, fc='c', ec='None')

		if(save_plots):
			t_save_data = np.asarray([s_candidates,gp_boundL,gp_boundU,pred,np.asarray(t_f_plot)[:,0]]).T
			np.savetxt('data_UCB/gpspace_data_plot.csv',t_save_data,delimiter=',')
			training_points = np.asarray([x_training[:,0],y_training,np.asarray(t_y_training)[:,0]]).T
			np.savetxt('data_UCB/train_data_plot.csv',training_points,delimiter=',')

	plt.legend()
	plt.show()
示例#57
0
class TestiLocBaseIndependent:
    """Tests Independent Of Base Class"""

    @pytest.mark.parametrize(
        "key",
        [
            slice(None),
            slice(3),
            range(3),
            [0, 1, 2],
            Index(range(3)),
            np.asarray([0, 1, 2]),
        ],
    )
    @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
    def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manager):
        frame = DataFrame({0: range(3)}, dtype=object)

        cat = Categorical(["alpha", "beta", "gamma"])

        if not using_array_manager:
            assert frame._mgr.blocks[0]._can_hold_element(cat)

        df = frame.copy()
        orig_vals = df.values
        indexer(df)[key, 0] = cat

        overwrite = isinstance(key, slice) and key == slice(None)

        if overwrite or using_array_manager:
            # TODO(ArrayManager) we always overwrite because ArrayManager takes
            #  the "split" path, which still overwrites
            # TODO: GH#39986 this probably shouldn't behave differently
            expected = DataFrame({0: cat})
            assert not np.shares_memory(df.values, orig_vals)
        else:
            expected = DataFrame({0: cat}).astype(object)
            if not using_array_manager:
                assert np.shares_memory(df[0].values, orig_vals)

        tm.assert_frame_equal(df, expected)

        # check we dont have a view on cat (may be undesired GH#39986)
        df.iloc[0, 0] = "gamma"
        if overwrite:
            assert cat[0] != "gamma"
        else:
            assert cat[0] != "gamma"

        # TODO with mixed dataframe ("split" path), we always overwrite the column
        frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)})
        df = frame.copy()
        orig_vals = df.values
        indexer(df)[key, 0] = cat
        expected = DataFrame({0: cat, 1: range(3)})
        tm.assert_frame_equal(df, expected)

    # TODO(ArrayManager) does not yet update parent
    @td.skip_array_manager_not_yet_implemented
    @pytest.mark.parametrize("box", [array, Series])
    def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager):
        # GH#38952 Case with not setting a full column
        #  IntegerArray without NAs
        arr = array([1, 2, 3, 4])
        obj = frame_or_series(arr.to_numpy("i8"))

        if frame_or_series is Series or not using_array_manager:
            values = obj.values
        else:
            values = obj[0].values

        obj.iloc[:2] = box(arr[2:])
        expected = frame_or_series(np.array([3, 4, 3, 4], dtype="i8"))
        tm.assert_equal(obj, expected)

        # Check that we are actually in-place
        if frame_or_series is Series:
            assert obj.values is values
        else:
            if using_array_manager:
                assert obj[0].values is values
            else:
                assert obj.values.base is values.base and values.base is not None

    def test_is_scalar_access(self):
        # GH#32085 index with duplicates doesn't matter for _is_scalar_access
        index = Index([1, 2, 1])
        ser = Series(range(3), index=index)

        assert ser.iloc._is_scalar_access((1,))

        df = ser.to_frame()
        assert df.iloc._is_scalar_access((1, 0))

    def test_iloc_exceeds_bounds(self):

        # GH6296
        # iloc should allow indexers that exceed the bounds
        df = DataFrame(np.random.random_sample((20, 5)), columns=list("ABCDE"))

        # lists of positions should raise IndexError!
        msg = "positional indexers are out-of-bounds"
        with pytest.raises(IndexError, match=msg):
            df.iloc[:, [0, 1, 2, 3, 4, 5]]
        with pytest.raises(IndexError, match=msg):
            df.iloc[[1, 30]]
        with pytest.raises(IndexError, match=msg):
            df.iloc[[1, -30]]
        with pytest.raises(IndexError, match=msg):
            df.iloc[[100]]

        s = df["A"]
        with pytest.raises(IndexError, match=msg):
            s.iloc[[100]]
        with pytest.raises(IndexError, match=msg):
            s.iloc[[-100]]

        # still raise on a single indexer
        msg = "single positional indexer is out-of-bounds"
        with pytest.raises(IndexError, match=msg):
            df.iloc[30]
        with pytest.raises(IndexError, match=msg):
            df.iloc[-30]

        # GH10779
        # single positive/negative indexer exceeding Series bounds should raise
        # an IndexError
        with pytest.raises(IndexError, match=msg):
            s.iloc[30]
        with pytest.raises(IndexError, match=msg):
            s.iloc[-30]

        # slices are ok
        result = df.iloc[:, 4:10]  # 0 < start < len < stop
        expected = df.iloc[:, 4:]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, -4:-10]  # stop < 0 < start < len
        expected = df.iloc[:, :0]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, 10:4:-1]  # 0 < stop < len < start (down)
        expected = df.iloc[:, :4:-1]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, 4:-10:-1]  # stop < 0 < start < len (down)
        expected = df.iloc[:, 4::-1]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, -10:4]  # start < 0 < stop < len
        expected = df.iloc[:, :4]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, 10:4]  # 0 < stop < len < start
        expected = df.iloc[:, :0]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, -10:-11:-1]  # stop < start < 0 < len (down)
        expected = df.iloc[:, :0]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, 10:11]  # 0 < len < start < stop
        expected = df.iloc[:, :0]
        tm.assert_frame_equal(result, expected)

        # slice bounds exceeding is ok
        result = s.iloc[18:30]
        expected = s.iloc[18:]
        tm.assert_series_equal(result, expected)

        result = s.iloc[30:]
        expected = s.iloc[:0]
        tm.assert_series_equal(result, expected)

        result = s.iloc[30::-1]
        expected = s.iloc[::-1]
        tm.assert_series_equal(result, expected)

        # doc example
        def check(result, expected):
            str(result)
            result.dtypes
            tm.assert_frame_equal(result, expected)

        dfl = DataFrame(np.random.randn(5, 2), columns=list("AB"))
        check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index))
        check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
        check(dfl.iloc[4:6], dfl.iloc[[4]])

        msg = "positional indexers are out-of-bounds"
        with pytest.raises(IndexError, match=msg):
            dfl.iloc[[4, 5, 6]]
        msg = "single positional indexer is out-of-bounds"
        with pytest.raises(IndexError, match=msg):
            dfl.iloc[:, 4]

    @pytest.mark.parametrize("index,columns", [(np.arange(20), list("ABCDE"))])
    @pytest.mark.parametrize(
        "index_vals,column_vals",
        [
            ([slice(None), ["A", "D"]]),
            (["1", "2"], slice(None)),
            ([datetime(2019, 1, 1)], slice(None)),
        ],
    )
    def test_iloc_non_integer_raises(self, index, columns, index_vals, column_vals):
        # GH 25753
        df = DataFrame(
            np.random.randn(len(index), len(columns)), index=index, columns=columns
        )
        msg = ".iloc requires numeric indexers, got"
        with pytest.raises(IndexError, match=msg):
            df.iloc[index_vals, column_vals]

    @pytest.mark.parametrize("dims", [1, 2])
    def test_iloc_getitem_invalid_scalar(self, dims):
        # GH 21982

        if dims == 1:
            s = Series(np.arange(10))
        else:
            s = DataFrame(np.arange(100).reshape(10, 10))

        with pytest.raises(TypeError, match="Cannot index by location index"):
            s.iloc["a"]

    def test_iloc_array_not_mutating_negative_indices(self):

        # GH 21867
        array_with_neg_numbers = np.array([1, 2, -1])
        array_copy = array_with_neg_numbers.copy()
        df = DataFrame(
            {"A": [100, 101, 102], "B": [103, 104, 105], "C": [106, 107, 108]},
            index=[1, 2, 3],
        )
        df.iloc[array_with_neg_numbers]
        tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)
        df.iloc[:, array_with_neg_numbers]
        tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy)

    def test_iloc_getitem_neg_int_can_reach_first_index(self):
        # GH10547 and GH10779
        # negative integers should be able to reach index 0
        df = DataFrame({"A": [2, 3, 5], "B": [7, 11, 13]})
        s = df["A"]

        expected = df.iloc[0]
        result = df.iloc[-3]
        tm.assert_series_equal(result, expected)

        expected = df.iloc[[0]]
        result = df.iloc[[-3]]
        tm.assert_frame_equal(result, expected)

        expected = s.iloc[0]
        result = s.iloc[-3]
        assert result == expected

        expected = s.iloc[[0]]
        result = s.iloc[[-3]]
        tm.assert_series_equal(result, expected)

        # check the length 1 Series case highlighted in GH10547
        expected = Series(["a"], index=["A"])
        result = expected.iloc[[-1]]
        tm.assert_series_equal(result, expected)

    def test_iloc_getitem_dups(self):
        # GH 6766
        df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}])
        df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}])
        df = concat([df1, df2], axis=1)

        # cross-sectional indexing
        result = df.iloc[0, 0]
        assert isna(result)

        result = df.iloc[0, :]
        expected = Series([np.nan, 1, 3, 3], index=["A", "B", "A", "B"], name=0)
        tm.assert_series_equal(result, expected)

    def test_iloc_getitem_array(self):
        df = DataFrame(
            [
                {"A": 1, "B": 2, "C": 3},
                {"A": 100, "B": 200, "C": 300},
                {"A": 1000, "B": 2000, "C": 3000},
            ]
        )

        expected = DataFrame([{"A": 1, "B": 2, "C": 3}])
        tm.assert_frame_equal(df.iloc[[0]], expected)

        expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}])
        tm.assert_frame_equal(df.iloc[[0, 1]], expected)

        expected = DataFrame([{"B": 2, "C": 3}, {"B": 2000, "C": 3000}], index=[0, 2])
        result = df.iloc[[0, 2], [1, 2]]
        tm.assert_frame_equal(result, expected)

    def test_iloc_getitem_bool(self):
        df = DataFrame(
            [
                {"A": 1, "B": 2, "C": 3},
                {"A": 100, "B": 200, "C": 300},
                {"A": 1000, "B": 2000, "C": 3000},
            ]
        )

        expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}])
        result = df.iloc[[True, True, False]]
        tm.assert_frame_equal(result, expected)

        expected = DataFrame(
            [{"A": 1, "B": 2, "C": 3}, {"A": 1000, "B": 2000, "C": 3000}], index=[0, 2]
        )
        result = df.iloc[lambda x: x.index % 2 == 0]
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]])
    def test_iloc_getitem_bool_diff_len(self, index):
        # GH26658
        s = Series([1, 2, 3])
        msg = f"Boolean index has wrong length: {len(index)} instead of {len(s)}"
        with pytest.raises(IndexError, match=msg):
            s.iloc[index]

    def test_iloc_getitem_slice(self):
        df = DataFrame(
            [
                {"A": 1, "B": 2, "C": 3},
                {"A": 100, "B": 200, "C": 300},
                {"A": 1000, "B": 2000, "C": 3000},
            ]
        )

        expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}])
        result = df.iloc[:2]
        tm.assert_frame_equal(result, expected)

        expected = DataFrame([{"A": 100, "B": 200}], index=[1])
        result = df.iloc[1:2, 0:2]
        tm.assert_frame_equal(result, expected)

        expected = DataFrame(
            [{"A": 1, "C": 3}, {"A": 100, "C": 300}, {"A": 1000, "C": 3000}]
        )
        result = df.iloc[:, lambda df: [0, 2]]
        tm.assert_frame_equal(result, expected)

    def test_iloc_getitem_slice_dups(self):

        df1 = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"])
        df2 = DataFrame(
            np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"]
        )

        # axis=1
        df = concat([df1, df2], axis=1)
        tm.assert_frame_equal(df.iloc[:, :4], df1)
        tm.assert_frame_equal(df.iloc[:, 4:], df2)

        df = concat([df2, df1], axis=1)
        tm.assert_frame_equal(df.iloc[:, :2], df2)
        tm.assert_frame_equal(df.iloc[:, 2:], df1)

        exp = concat([df2, df1.iloc[:, [0]]], axis=1)
        tm.assert_frame_equal(df.iloc[:, 0:3], exp)

        # axis=0
        df = concat([df, df], axis=0)
        tm.assert_frame_equal(df.iloc[0:10, :2], df2)
        tm.assert_frame_equal(df.iloc[0:10, 2:], df1)
        tm.assert_frame_equal(df.iloc[10:, :2], df2)
        tm.assert_frame_equal(df.iloc[10:, 2:], df1)

    def test_iloc_setitem(self):
        df = DataFrame(
            np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3)
        )

        df.iloc[1, 1] = 1
        result = df.iloc[1, 1]
        assert result == 1

        df.iloc[:, 2:3] = 0
        expected = df.iloc[:, 2:3]
        result = df.iloc[:, 2:3]
        tm.assert_frame_equal(result, expected)

        # GH5771
        s = Series(0, index=[4, 5, 6])
        s.iloc[1:2] += 1
        expected = Series([0, 1, 0], index=[4, 5, 6])
        tm.assert_series_equal(s, expected)

    def test_iloc_setitem_list(self):

        # setitem with an iloc list
        df = DataFrame(
            np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"]
        )
        df.iloc[[0, 1], [1, 2]]
        df.iloc[[0, 1], [1, 2]] += 100

        expected = DataFrame(
            np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)),
            index=["A", "B", "C"],
            columns=["A", "B", "C"],
        )
        tm.assert_frame_equal(df, expected)

    def test_iloc_setitem_pandas_object(self):
        # GH 17193
        s_orig = Series([0, 1, 2, 3])
        expected = Series([0, -1, -2, 3])

        s = s_orig.copy()
        s.iloc[Series([1, 2])] = [-1, -2]
        tm.assert_series_equal(s, expected)

        s = s_orig.copy()
        s.iloc[Index([1, 2])] = [-1, -2]
        tm.assert_series_equal(s, expected)

    def test_iloc_setitem_dups(self):

        # GH 6766
        # iloc with a mask aligning from another iloc
        df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}])
        df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}])
        df = concat([df1, df2], axis=1)

        expected = df.fillna(3)
        inds = np.isnan(df.iloc[:, 0])
        mask = inds[inds].index
        df.iloc[mask, 0] = df.iloc[mask, 2]
        tm.assert_frame_equal(df, expected)

        # del a dup column across blocks
        expected = DataFrame({0: [1, 2], 1: [3, 4]})
        expected.columns = ["B", "B"]
        del df["A"]
        tm.assert_frame_equal(df, expected)

        # assign back to self
        df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]]
        tm.assert_frame_equal(df, expected)

        # reversed x 2
        df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True)
        df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True)
        tm.assert_frame_equal(df, expected)

    def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(
        self, using_array_manager
    ):
        # Same as the "assign back to self" check in test_iloc_setitem_dups
        #  but on a DataFrame with multiple blocks
        df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"])

        df.iloc[:, 0] = df.iloc[:, 0].astype("f8")
        if not using_array_manager:
            assert len(df._mgr.blocks) == 2
        expected = df.copy()

        # assign back to self
        df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]]

        tm.assert_frame_equal(df, expected)

    # TODO: GH#27620 this test used to compare iloc against ix; check if this
    #  is redundant with another test comparing iloc against loc
    def test_iloc_getitem_frame(self):
        df = DataFrame(
            np.random.randn(10, 4), index=range(0, 20, 2), columns=range(0, 8, 2)
        )

        result = df.iloc[2]
        exp = df.loc[4]
        tm.assert_series_equal(result, exp)

        result = df.iloc[2, 2]
        exp = df.loc[4, 4]
        assert result == exp

        # slice
        result = df.iloc[4:8]
        expected = df.loc[8:14]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, 2:3]
        expected = df.loc[:, 4:5]
        tm.assert_frame_equal(result, expected)

        # list of integers
        result = df.iloc[[0, 1, 3]]
        expected = df.loc[[0, 2, 6]]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[[0, 1, 3], [0, 1]]
        expected = df.loc[[0, 2, 6], [0, 2]]
        tm.assert_frame_equal(result, expected)

        # neg indices
        result = df.iloc[[-1, 1, 3], [-1, 1]]
        expected = df.loc[[18, 2, 6], [6, 2]]
        tm.assert_frame_equal(result, expected)

        # dups indices
        result = df.iloc[[-1, -1, 1, 3], [-1, 1]]
        expected = df.loc[[18, 18, 2, 6], [6, 2]]
        tm.assert_frame_equal(result, expected)

        # with index-like
        s = Series(index=range(1, 5), dtype=object)
        result = df.iloc[s.index]
        expected = df.loc[[2, 4, 6, 8]]
        tm.assert_frame_equal(result, expected)

    def test_iloc_getitem_labelled_frame(self):
        # try with labelled frame
        df = DataFrame(
            np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD")
        )

        result = df.iloc[1, 1]
        exp = df.loc["b", "B"]
        assert result == exp

        result = df.iloc[:, 2:3]
        expected = df.loc[:, ["C"]]
        tm.assert_frame_equal(result, expected)

        # negative indexing
        result = df.iloc[-1, -1]
        exp = df.loc["j", "D"]
        assert result == exp

        # out-of-bounds exception
        msg = "single positional indexer is out-of-bounds"
        with pytest.raises(IndexError, match=msg):
            df.iloc[10, 5]

        # trying to use a label
        msg = (
            r"Location based indexing can only have \[integer, integer "
            r"slice \(START point is INCLUDED, END point is EXCLUDED\), "
            r"listlike of integers, boolean array\] types"
        )
        with pytest.raises(ValueError, match=msg):
            df.iloc["j", "D"]

    def test_iloc_getitem_doc_issue(self, using_array_manager):

        # multi axis slicing issue with single block
        # surfaced in GH 6059

        arr = np.random.randn(6, 4)
        index = date_range("20130101", periods=6)
        columns = list("ABCD")
        df = DataFrame(arr, index=index, columns=columns)

        # defines ref_locs
        df.describe()

        result = df.iloc[3:5, 0:2]
        str(result)
        result.dtypes

        expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=columns[0:2])
        tm.assert_frame_equal(result, expected)

        # for dups
        df.columns = list("aaaa")
        result = df.iloc[3:5, 0:2]
        str(result)
        result.dtypes

        expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=list("aa"))
        tm.assert_frame_equal(result, expected)

        # related
        arr = np.random.randn(6, 4)
        index = list(range(0, 12, 2))
        columns = list(range(0, 8, 2))
        df = DataFrame(arr, index=index, columns=columns)

        if not using_array_manager:
            df._mgr.blocks[0].mgr_locs
        result = df.iloc[1:5, 2:4]
        str(result)
        result.dtypes
        expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4])
        tm.assert_frame_equal(result, expected)

    def test_iloc_setitem_series(self):
        df = DataFrame(
            np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD")
        )

        df.iloc[1, 1] = 1
        result = df.iloc[1, 1]
        assert result == 1

        df.iloc[:, 2:3] = 0
        expected = df.iloc[:, 2:3]
        result = df.iloc[:, 2:3]
        tm.assert_frame_equal(result, expected)

        s = Series(np.random.randn(10), index=range(0, 20, 2))

        s.iloc[1] = 1
        result = s.iloc[1]
        assert result == 1

        s.iloc[:4] = 0
        expected = s.iloc[:4]
        result = s.iloc[:4]
        tm.assert_series_equal(result, expected)

        s = Series([-1] * 6)
        s.iloc[0::2] = [0, 2, 4]
        s.iloc[1::2] = [1, 3, 5]
        result = s
        expected = Series([0, 1, 2, 3, 4, 5])
        tm.assert_series_equal(result, expected)

    def test_iloc_setitem_list_of_lists(self):

        # GH 7551
        # list-of-list is set incorrectly in mixed vs. single dtyped frames
        df = DataFrame(
            {"A": np.arange(5, dtype="int64"), "B": np.arange(5, 10, dtype="int64")}
        )
        df.iloc[2:4] = [[10, 11], [12, 13]]
        expected = DataFrame({"A": [0, 1, 10, 12, 4], "B": [5, 6, 11, 13, 9]})
        tm.assert_frame_equal(df, expected)

        df = DataFrame(
            {"A": ["a", "b", "c", "d", "e"], "B": np.arange(5, 10, dtype="int64")}
        )
        df.iloc[2:4] = [["x", 11], ["y", 13]]
        expected = DataFrame({"A": ["a", "b", "x", "y", "e"], "B": [5, 6, 11, 13, 9]})
        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])])
    @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])])
    def test_iloc_setitem_with_scalar_index(self, indexer, value):
        # GH #19474
        # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
        # elementwisely, not using "setter('A', ['Z'])".

        df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
        df.iloc[0, indexer] = value
        result = df.iloc[0, 0]

        assert is_scalar(result) and result == "Z"

    def test_iloc_mask(self):

        # GH 3631, iloc with a mask (of a series) should raise
        df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"])
        mask = df.a % 2 == 0
        msg = "iLocation based boolean indexing cannot use an indexable as a mask"
        with pytest.raises(ValueError, match=msg):
            df.iloc[mask]
        mask.index = range(len(mask))
        msg = "iLocation based boolean indexing on an integer type is not available"
        with pytest.raises(NotImplementedError, match=msg):
            df.iloc[mask]

        # ndarray ok
        result = df.iloc[np.array([True] * len(mask), dtype=bool)]
        tm.assert_frame_equal(result, df)

        # the possibilities
        locs = np.arange(4)
        nums = 2 ** locs
        reps = [bin(num) for num in nums]
        df = DataFrame({"locs": locs, "nums": nums}, reps)

        expected = {
            (None, ""): "0b1100",
            (None, ".loc"): "0b1100",
            (None, ".iloc"): "0b1100",
            ("index", ""): "0b11",
            ("index", ".loc"): "0b11",
            ("index", ".iloc"): (
                "iLocation based boolean indexing cannot use an indexable as a mask"
            ),
            ("locs", ""): "Unalignable boolean Series provided as indexer "
            "(index of the boolean Series and of the indexed "
            "object do not match).",
            ("locs", ".loc"): "Unalignable boolean Series provided as indexer "
            "(index of the boolean Series and of the "
            "indexed object do not match).",
            ("locs", ".iloc"): (
                "iLocation based boolean indexing on an "
                "integer type is not available"
            ),
        }

        # UserWarnings from reindex of a boolean mask
        with catch_warnings(record=True):
            simplefilter("ignore", UserWarning)
            for idx in [None, "index", "locs"]:
                mask = (df.nums > 2).values
                if idx:
                    mask = Series(mask, list(reversed(getattr(df, idx))))
                for method in ["", ".loc", ".iloc"]:
                    try:
                        if method:
                            accessor = getattr(df, method[1:])
                        else:
                            accessor = df
                        answer = str(bin(accessor[mask]["nums"].sum()))
                    except (ValueError, IndexingError, NotImplementedError) as e:
                        answer = str(e)

                    key = (
                        idx,
                        method,
                    )
                    r = expected.get(key)
                    if r != answer:
                        raise AssertionError(
                            f"[{key}] does not match [{answer}], received [{r}]"
                        )

    def test_iloc_non_unique_indexing(self):

        # GH 4017, non-unique indexing (on the axis)
        df = DataFrame({"A": [0.1] * 3000, "B": [1] * 3000})
        idx = np.arange(30) * 99
        expected = df.iloc[idx]

        df3 = concat([df, 2 * df, 3 * df])
        result = df3.iloc[idx]

        tm.assert_frame_equal(result, expected)

        df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000})
        df2 = concat([df2, 2 * df2, 3 * df2])

        with pytest.raises(KeyError, match="with any missing labels"):
            df2.loc[idx]

    def test_iloc_empty_list_indexer_is_ok(self):

        df = tm.makeCustomDataframe(5, 2)
        # vertical empty
        tm.assert_frame_equal(
            df.iloc[:, []],
            df.iloc[:, :0],
            check_index_type=True,
            check_column_type=True,
        )
        # horizontal empty
        tm.assert_frame_equal(
            df.iloc[[], :],
            df.iloc[:0, :],
            check_index_type=True,
            check_column_type=True,
        )
        # horizontal empty
        tm.assert_frame_equal(
            df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True
        )

    def test_identity_slice_returns_new_object(self, using_array_manager):
        # GH13873
        original_df = DataFrame({"a": [1, 2, 3]})
        sliced_df = original_df.iloc[:]
        assert sliced_df is not original_df

        # should be a shallow copy
        original_df["a"] = [4, 4, 4]
        if using_array_manager:
            # TODO(ArrayManager) verify it is expected that the original didn't change
            # setitem is replacing full column, so doesn't update "viewing" dataframe
            assert not (sliced_df["a"] == 4).all()
        else:
            assert (sliced_df["a"] == 4).all()

        original_series = Series([1, 2, 3, 4, 5, 6])
        sliced_series = original_series.iloc[:]
        assert sliced_series is not original_series

        # should also be a shallow copy
        original_series[:3] = [7, 8, 9]
        assert all(sliced_series[:3] == [7, 8, 9])

    def test_indexing_zerodim_np_array(self):
        # GH24919
        df = DataFrame([[1, 2], [3, 4]])
        result = df.iloc[np.array(0)]
        s = Series([1, 2], name=0)
        tm.assert_series_equal(result, s)

    def test_series_indexing_zerodim_np_array(self):
        # GH24919
        s = Series([1, 2])
        result = s.iloc[np.array(0)]
        assert result == 1

    @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457")
    def test_iloc_setitem_categorical_updates_inplace(self):
        # Mixed dtype ensures we go through take_split_path in setitem_with_indexer
        cat = Categorical(["A", "B", "C"])
        df = DataFrame({1: cat, 2: [1, 2, 3]})

        # This should modify our original values in-place
        df.iloc[:, 0] = cat[::-1]

        expected = Categorical(["C", "B", "A"])
        tm.assert_categorical_equal(cat, expected)

    def test_iloc_with_boolean_operation(self):
        # GH 20627
        result = DataFrame([[0, 1], [2, 3], [4, 5], [6, np.nan]])
        result.iloc[result.index <= 2] *= 2
        expected = DataFrame([[0, 2], [4, 6], [8, 10], [6, np.nan]])
        tm.assert_frame_equal(result, expected)

        result.iloc[result.index > 2] *= 2
        expected = DataFrame([[0, 2], [4, 6], [8, 10], [12, np.nan]])
        tm.assert_frame_equal(result, expected)

        result.iloc[[True, True, False, False]] *= 2
        expected = DataFrame([[0, 4], [8, 12], [8, 10], [12, np.nan]])
        tm.assert_frame_equal(result, expected)

        result.iloc[[False, False, True, True]] /= 2
        expected = DataFrame([[0.0, 4.0], [8.0, 12.0], [4.0, 5.0], [6.0, np.nan]])
        tm.assert_frame_equal(result, expected)

    def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self):
        # GH#29521
        df = DataFrame({"x": Categorical("a b c d e".split())})
        result = df.iloc[0]
        raw_cat = Categorical(["a"], categories=["a", "b", "c", "d", "e"])
        expected = Series(raw_cat, index=["x"], name=0, dtype="category")

        tm.assert_series_equal(result, expected)

    def test_iloc_getitem_categorical_values(self):
        # GH#14580
        # test iloc() on Series with Categorical data

        ser = Series([1, 2, 3]).astype("category")

        # get slice
        result = ser.iloc[0:2]
        expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
        tm.assert_series_equal(result, expected)

        # get list of indexes
        result = ser.iloc[[0, 1]]
        expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
        tm.assert_series_equal(result, expected)

        # get boolean array
        result = ser.iloc[[True, False, False]]
        expected = Series([1]).astype(CategoricalDtype([1, 2, 3]))
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("value", [None, NaT, np.nan])
    def test_iloc_setitem_td64_values_cast_na(self, value):
        # GH#18586
        series = Series([0, 1, 2], dtype="timedelta64[ns]")
        series.iloc[0] = value
        expected = Series([NaT, 1, 2], dtype="timedelta64[ns]")
        tm.assert_series_equal(series, expected)

    def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self):
        idx = Index([])
        obj = DataFrame(np.random.randn(len(idx), len(idx)), index=idx, columns=idx)
        nd3 = np.random.randint(5, size=(2, 2, 2))

        msg = f"Cannot set values with ndim > {obj.ndim}"
        with pytest.raises(ValueError, match=msg):
            obj.iloc[nd3] = 0

    @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
    def test_iloc_getitem_read_only_values(self, indexer):
        # GH#10043 this is fundamentally a test for iloc, but test loc while
        #  we're here
        rw_array = np.eye(10)
        rw_df = DataFrame(rw_array)

        ro_array = np.eye(10)
        ro_array.setflags(write=False)
        ro_df = DataFrame(ro_array)

        tm.assert_frame_equal(indexer(rw_df)[[1, 2, 3]], indexer(ro_df)[[1, 2, 3]])
        tm.assert_frame_equal(indexer(rw_df)[[1]], indexer(ro_df)[[1]])
        tm.assert_series_equal(indexer(rw_df)[1], indexer(ro_df)[1])
        tm.assert_frame_equal(indexer(rw_df)[1:3], indexer(ro_df)[1:3])

    def test_iloc_getitem_readonly_key(self):
        # GH#17192 iloc with read-only array raising TypeError
        df = DataFrame({"data": np.ones(100, dtype="float64")})
        indices = np.array([1, 3, 6])
        indices.flags.writeable = False

        result = df.iloc[indices]
        expected = df.loc[[1, 3, 6]]
        tm.assert_frame_equal(result, expected)

        result = df["data"].iloc[indices]
        expected = df["data"].loc[[1, 3, 6]]
        tm.assert_series_equal(result, expected)

    # TODO(ArrayManager) setting single item with an iterable doesn't work yet
    # in the "split" path
    @td.skip_array_manager_not_yet_implemented
    def test_iloc_assign_series_to_df_cell(self):
        # GH 37593
        df = DataFrame(columns=["a"], index=[0])
        df.iloc[0, 0] = Series([1, 2, 3])
        expected = DataFrame({"a": [Series([1, 2, 3])]}, columns=["a"], index=[0])
        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize("klass", [list, np.array])
    def test_iloc_setitem_bool_indexer(self, klass):
        # GH#36741
        df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]})
        indexer = klass([True, False, False])
        df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2
        expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize("indexer", [[1], slice(1, 2)])
    def test_iloc_setitem_pure_position_based(self, indexer):
        # GH#22046
        df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]})
        df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
        df2.iloc[:, indexer] = df1.iloc[:, [0]]
        expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]})
        tm.assert_frame_equal(df2, expected)

    def test_iloc_setitem_dictionary_value(self):
        # GH#37728
        df = DataFrame({"x": [1, 2], "y": [2, 2]})
        rhs = {"x": 9, "y": 99}
        df.iloc[1] = rhs
        expected = DataFrame({"x": [1, 9], "y": [2, 99]})
        tm.assert_frame_equal(df, expected)

        # GH#38335 same thing, mixed dtypes
        df = DataFrame({"x": [1, 2], "y": [2.0, 2.0]})
        df.iloc[1] = rhs
        expected = DataFrame({"x": [1, 9], "y": [2.0, 99.0]})
        tm.assert_frame_equal(df, expected)

    def test_iloc_getitem_float_duplicates(self):
        df = DataFrame(
            np.random.randn(3, 3), index=[0.1, 0.2, 0.2], columns=list("abc")
        )
        expect = df.iloc[1:]
        tm.assert_frame_equal(df.loc[0.2], expect)

        expect = df.iloc[1:, 0]
        tm.assert_series_equal(df.loc[0.2, "a"], expect)

        df.index = [1, 0.2, 0.2]
        expect = df.iloc[1:]
        tm.assert_frame_equal(df.loc[0.2], expect)

        expect = df.iloc[1:, 0]
        tm.assert_series_equal(df.loc[0.2, "a"], expect)

        df = DataFrame(
            np.random.randn(4, 3), index=[1, 0.2, 0.2, 1], columns=list("abc")
        )
        expect = df.iloc[1:-1]
        tm.assert_frame_equal(df.loc[0.2], expect)

        expect = df.iloc[1:-1, 0]
        tm.assert_series_equal(df.loc[0.2, "a"], expect)

        df.index = [0.1, 0.2, 2, 0.2]
        expect = df.iloc[[1, -1]]
        tm.assert_frame_equal(df.loc[0.2], expect)

        expect = df.iloc[[1, -1], 0]
        tm.assert_series_equal(df.loc[0.2, "a"], expect)

    def test_iloc_setitem_custom_object(self):
        # iloc with an object
        class TO:
            def __init__(self, value):
                self.value = value

            def __str__(self) -> str:
                return f"[{self.value}]"

            __repr__ = __str__

            def __eq__(self, other) -> bool:
                return self.value == other.value

            def view(self):
                return self

        df = DataFrame(index=[0, 1], columns=[0])
        df.iloc[1, 0] = TO(1)
        df.iloc[1, 0] = TO(2)

        result = DataFrame(index=[0, 1], columns=[0])
        result.iloc[1, 0] = TO(2)

        tm.assert_frame_equal(result, df)

        # remains object dtype even after setting it back
        df = DataFrame(index=[0, 1], columns=[0])
        df.iloc[1, 0] = TO(1)
        df.iloc[1, 0] = np.nan
        result = DataFrame(index=[0, 1], columns=[0])

        tm.assert_frame_equal(result, df)

    def test_iloc_getitem_with_duplicates(self):

        df = DataFrame(np.random.rand(3, 3), columns=list("ABC"), index=list("aab"))

        result = df.iloc[0]
        assert isinstance(result, Series)
        tm.assert_almost_equal(result.values, df.values[0])

        result = df.T.iloc[:, 0]
        assert isinstance(result, Series)
        tm.assert_almost_equal(result.values, df.values[0])

    def test_iloc_getitem_with_duplicates2(self):
        # GH#2259
        df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2])
        result = df.iloc[:, [0]]
        expected = df.take([0], axis=1)
        tm.assert_frame_equal(result, expected)

    def test_iloc_interval(self):
        # GH#17130
        df = DataFrame({Interval(1, 2): [1, 2]})

        result = df.iloc[0]
        expected = Series({Interval(1, 2): 1}, name=0)
        tm.assert_series_equal(result, expected)

        result = df.iloc[:, 0]
        expected = Series([1, 2], name=Interval(1, 2))
        tm.assert_series_equal(result, expected)

        result = df.copy()
        result.iloc[:, 0] += 1
        expected = DataFrame({Interval(1, 2): [2, 3]})
        tm.assert_frame_equal(result, expected)
    def posteriorParameterSamples(self, z, NUMBER_OF_MCMC_SAMPLES_TOTAL,
                                  fixedSlabVar, fixedSigmaSquareR,
                                  numberOfFreeBeta, fixedBetaPart):
        assert (fixedSlabVar is None or fixedSlabVar > 0.0)
        assert (fixedSigmaSquareR is None or fixedSigmaSquareR > 0.0)
        assert (numberOfFreeBeta + fixedBetaPart.shape[0] == self.p)

        invEst = numpy.linalg.inv(self.X.transpose() @ self.X +
                                  1.0 * numpy.eye(self.p))
        ridgeBetaEst = (invEst @ self.X.transpose()) @ self.y

        beta = ridgeBetaEst
        beta[numberOfFreeBeta:self.p] = fixedBetaPart

        if fixedSigmaSquareR is None:
            sigmaSquareR = numpy.mean(
                numpy.square(self.y - self.X @ ridgeBetaEst))
        else:
            sigmaSquareR = fixedSigmaSquareR

        # print("z = ")
        # print(z)
        # print("beta = ")
        # print(beta)
        # assert(False)

        # print("sigmaSquareR = ")
        # print(sigmaSquareR)

        BURN_IN_SAMPLES = int(0.1 * NUMBER_OF_MCMC_SAMPLES_TOTAL)
        assert (BURN_IN_SAMPLES >= 1)
        NUMBER_OF_MCMC_SAMPLES_USED = NUMBER_OF_MCMC_SAMPLES_TOTAL - BURN_IN_SAMPLES

        # print("BURN_IN_SAMPLES = ", BURN_IN_SAMPLES)
        # print("NUMBER_OF_MCMC_SAMPLES_USED = ", NUMBER_OF_MCMC_SAMPLES_USED)

        posteriorBeta = numpy.zeros((NUMBER_OF_MCMC_SAMPLES_USED, self.p))
        posteriorSigmaSquareR = numpy.zeros(NUMBER_OF_MCMC_SAMPLES_USED)
        posteriorSlabVar = numpy.zeros(NUMBER_OF_MCMC_SAMPLES_USED)

        spikeAndSlabVar = numpy.asarray([self.sigmaSquare0, self.etaSquare1])
        if fixedSlabVar is not None:
            spikeAndSlabVar[1] = fixedSlabVar

        for mcmcIt in range(NUMBER_OF_MCMC_SAMPLES_TOTAL):
            print("mcmcIt = ", mcmcIt)

            for j in range(numberOfFreeBeta):

                # sample p(beta_j | beta_-j, z, y, sigmaSquareR, X)
                meanTilde, sigmaSquareTilde, _ = self.getMeanAndVarOfBetaConditional(
                    sigmaSquareR, spikeAndSlabVar, beta, z, j)
                beta[j] = scipy.stats.norm.rvs(
                    loc=meanTilde, scale=numpy.sqrt(sigmaSquareTilde))

            if fixedSigmaSquareR is None:
                # sample p(sigmaSquareR | beta, z, y, X)
                etaSquareForsigmaSquareR = (
                    SpikeAndSlabProposedModelSearch.NU_R *
                    SpikeAndSlabProposedModelSearch.ETA_SQUARE_R + numpy.sum(
                        numpy.square(self.y - numpy.matmul(self.X, beta)))) / (
                            SpikeAndSlabProposedModelSearch.NU_R + self.n)
                sigmaSquareR = samplingHelper.getScaledInvChiSquareSample(
                    nu=SpikeAndSlabProposedModelSearch.NU_R + self.n,
                    etaSquare=etaSquareForsigmaSquareR,
                    numberOfSamples=1)[0]

            if fixedSlabVar is None:
                # sample p(sigmaSquare_1 | beta, z, y, X)
                spikeAndSlabVar[1] = self.sampleSigmaSquareConditional(beta, z)

            if mcmcIt >= BURN_IN_SAMPLES:
                posteriorBeta[mcmcIt - BURN_IN_SAMPLES] = beta
                posteriorSigmaSquareR[mcmcIt - BURN_IN_SAMPLES] = sigmaSquareR
                posteriorSlabVar[mcmcIt - BURN_IN_SAMPLES] = spikeAndSlabVar[1]

        return posteriorBeta, posteriorSigmaSquareR, posteriorSlabVar
        k=k+20
        j=j+20


    for l in range(20):
        hx = np.histogram(dx[l,0:],7)
        hy = np.histogram(dy[l,0:],7)
        hz = np.histogram(dz[l,0:],7)

            #normalize
        HX[l,:]=hx[0]/T1
        HY[l,:] =hy[0]/T1
        HZ[l,:] =hz[0]/T1
 
    
    rx=HX.reshape(1,140)
    ry=HY.reshape(1,140)
    rz=HZ.reshape(1,140)
    
    rx=(np.append(rx,ry)).reshape(1,280)
    rx=(np.append(rx,rz)).reshape(1,420)
    
    rx= (np.asarray(rx))
    vectors[dd,:]=rx
    dd=dd+1
    vectors.reshape((48,420))
    
np.savetxt('hjpd_d1.t', vectors, delimiter=' ')

示例#60
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pickle, os, time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from numpy import linalg as LA


init_time= time.time()


#################################### Loading the Dataset
ads_data_list= pickle.load( open("Ads400_3keyframes_july19.p", "rb") )
ads_data= np.asarray(ads_data_list) ## 12k samples of 112x112x3
vid_data_list= pickle.load( open("Vid500_10shots_3keyframes.p", "rb") )
vid_data= np.asarray(vid_data_list) ## 12k samples of 112x112x3
ads_audio_list= pickle.load( open("Ads400_audio_ad_mfcc_july19.p", "rb") )
ads_audio= np.asarray(ads_audio_list) ## 12k samples of 112x112x3
vid_audio_list= pickle.load( open("Vid500_10shots_mfcc.p", "rb") )
vid_audio= np.asarray(vid_audio_list) ## 12k samples of 112x112x3


################################### Audio normalization and repeating
for i in range(ads_audio.shape[0]):
    ads_audio[i,:]= ( ads_audio[i,:]- np.mean(ads_audio[i,:]) ) / ( np.std(ads_audio[i,:])+ 1e-6 )
for i in range(vid_audio.shape[0]):
    vid_audio[i,:]= ( vid_audio[i,:]- np.mean(vid_audio[i,:]) ) / ( np.std(vid_audio[i,:])+ 1e-6 )
     
ads_audio_3x= np.zeros([ads_audio.shape[0]*3, ads_audio.shape[1]])