Exemplo n.º 1
0
def get_randname(l=10, _type='a', length_of_chunk=10):
    """
    a - all
    d - digits
    w - letters
    r - russian letters
    p - punctuation
    s - whitespace
    """
    if 'a' == _type:
        text = string.printable
    else:
        text = ''
        letters_dict = {'d': string.digits,
                        'w': string.ascii_letters,
                        'r': 'абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ',
                        'p': string.punctuation,
                        's': string.whitespace}
        for t in _type:
            text += letters_dict.get(t, t)

    count_of_chunks = l // length_of_chunk
    n = ''.join([random.choice(text) for _ in xrange(length_of_chunk)]) * count_of_chunks + \
        ''.join([random.choice(text) for _ in xrange(l % length_of_chunk)])
    return n
Exemplo n.º 2
0
def visualize_grid(Xs, ubound=255.0, padding=1):
    """
    Reshape a 4D tensor of image data to a grid for easy visualization.

    Inputs:
    - Xs: Data of shape (N, H, W, C)
    - ubound: Output grid will have values scaled to the range [0, ubound]
    - padding: The number of blank pixels between elements of the grid
    """
    (N, H, W, C) = Xs.shape
    grid_size = int(ceil(sqrt(N)))
    grid_height = H * grid_size + padding * (grid_size - 1)
    grid_width = W * grid_size + padding * (grid_size - 1)
    grid = np.zeros((grid_height, grid_width, C))
    next_idx = 0
    y0, y1 = 0, H
    for y in xrange(grid_size):
        x0, x1 = 0, W
        for x in xrange(grid_size):
            if next_idx < N:
                img = Xs[next_idx]
                low, high = np.min(img), np.max(img)
                grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
                # grid[y0:y1, x0:x1] = Xs[next_idx]
                next_idx += 1
            x0 += W + padding
            x1 += W + padding
        y0 += H + padding
        y1 += H + padding
    # grid_max = np.max(grid)
    # grid_min = np.min(grid)
    # grid = ubound * (grid - grid_min) / (grid_max - grid_min)
    return grid
Exemplo n.º 3
0
    def change_sectors(self, before, after):
        """ Move from sector `before` to sector `after`. A sector is a
        contiguous x, y sub-region of world. Sectors are used to speed up
        world rendering.

        """
        before_set = set()
        after_set = set()
        pad = 4
        for dx in xrange(-pad, pad + 1):
            for dy in [0]:  # xrange(-pad, pad + 1):
                for dz in xrange(-pad, pad + 1):
                    if dx ** 2 + dy ** 2 + dz ** 2 > (pad + 1) ** 2:
                        continue
                    if before:
                        x, y, z = before
                        before_set.add((x + dx, y + dy, z + dz))
                    if after:
                        x, y, z = after
                        after_set.add((x + dx, y + dy, z + dz))
        show = after_set - before_set
        hide = before_set - after_set
        for sector in show:
            self.show_sector(sector)
        for sector in hide:
            self.hide_sector(sector)
Exemplo n.º 4
0
def combineData(xdata,ydata,xlabel):
    #if ydata is a simple vector, encapsulate it into a 2D list
    if type(ydata[1]) is not list:
        ydata = [[val] for val in ydata]

    #if xdata is time data, add HH:MM:SS if it is missing (just 00:00:00)
    if type(xdata[1]) is str:
        #check if first 4 characters of xdata is a valid year
        if len(xdata[1]) == 10 and int(xdata[1][:4]) > 0 and int(xdata[1][:4]) < 3000:
            xdata[1:] = [val+' 00:00:00' for val in xdata[1:]]

    #figure out independent variable headers
    # if there is a title row, use that title
    if type(ydata[0][0]) is str:
        data = [[xdata[0]] + ydata[0]]
        for i in xrange(1,len(xdata)):
            data.append([xdata[i]]+ydata[i])
    # otherwise, use a default labeling
    else:
        header = [xlabel]
        for i in xrange(len(ydata[0])):
            header.append('data'+str(i+1))

        data = [header]
        for i in xrange(len(xdata)):
            data.append([xdata[i]]+ydata[i])
    
    return data
Exemplo n.º 5
0
def read_file_draw_graph():
    """Create the graph and returns the networkx version of it 'G'."""
    global pos
    global G
    array2d = readFile()

    ROW, COLUMN = len(array2d), len(array2d[0])
    count = 0

    G = nx.Graph()

    for j in xrange(COLUMN):
        for i in xrange(ROW):
            if array2d[ROW - 1 - i][j] == 0:
                G.add_node(count, pos=(j, i))
                count += 1

    pos = nx.get_node_attributes(G, 'pos')

    for index in pos.keys():
        for index2 in pos.keys():
            if pos[index][0] == pos[index2][0] and pos[index][1] == pos[index2][1] - 1:
                G.add_edge(index, index2, weight=1)
            if pos[index][1] == pos[index2][1] and pos[index][0] == pos[index2][0] - 1:
                G.add_edge(index, index2, weight=1)

    return G
Exemplo n.º 6
0
  def compute_distances_two_loops(self, X):
    """
    Compute the distance between each test point in X and each training point
    in self.X_train using a nested loop over both the training data and the 
    test data.

    Inputs:
    - X: A numpy array of shape (num_test, D) containing test data.

    Returns:
    - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
      is the Euclidean distance between the ith test point and the jth training
      point.
    """
    num_test = X.shape[0]
    num_train = self.X_train.shape[0]
    dists = np.zeros((num_test, num_train))
    for i in xrange(num_test):
      for j in xrange(num_train):
        #####################################################################
        # TODO:                                                             #
        # Compute the l2 distance between the ith test point and the jth    #
        # training point, and store the result in dists[i, j]. You should   #
        # not use a loop over dimension.                                    #
        #####################################################################
        dists[i,j] = np.sqrt(np.sum(np.square(X[i] - self.X_train[j])))
        #####################################################################
        #                       END OF YOUR CODE                            #
        #####################################################################
    return dists
Exemplo n.º 7
0
def svm_loss_naive(W, X, y, reg):
  """
  Structured SVM loss function, naive implementation (with loops).

  Inputs have dimension D, there are C classes, and we operate on minibatches
  of N examples.

  Inputs:
  - W: A numpy array of shape (D, C) containing weights.
  - X: A numpy array of shape (N, D) containing a minibatch of data.
  - y: A numpy array of shape (N,) containing training labels; y[i] = c means
    that X[i] has label c, where 0 <= c < C.
  - reg: (float) regularization strength

  Returns a tuple of:
  - loss as single float
  - gradient with respect to weights W; an array of same shape as W
  """
  dW = np.zeros(W.shape) # initialize the gradient as zero

  # compute the loss and the gradient
  num_classes = W.shape[1]
  num_train = X.shape[0]
  loss = 0.0
  margin_count = 0  # the number of classes which did not reach desired margin
  for i in xrange(num_train):
    scores = X[i].dot(W)
    correct_class_score = scores[y[i]]
    margin_count = 0
    for j in xrange(num_classes):
      if j == y[i]:
        continue
      margin = scores[j] - correct_class_score + 1 # note delta = 1
      if margin > 0:
        loss += margin
        margin_count += 1
        dW.T[j] += X[i]            # accumulate gradient for incorrect classes

    dW.T[y[i]] -= margin_count * X[i]   # accumulate gradient of correct class

  # Right now the loss is a sum over all training examples, but we want it
  # to be an average instead so we divide by num_train.
  loss /= num_train

  # Add regularization to the loss.
  loss += 0.5 * reg * np.sum(W * W)

  dW = dW/num_train + reg * W     # average gradient + regularization gradient 

  #############################################################################
  # TODO:                                                                     #
  # Compute the gradient of the loss function and store it dW.                #
  # Rather that first computing the loss and then computing the derivative,   #
  # it may be simpler to compute the derivative at the same time that the     #
  # loss is being computed. As a result you may need to modify some of the    #
  # code above to compute the gradient.                                       #
  #############################################################################


  return loss, dW
Exemplo n.º 8
0
    def plot_redsequence_offdiags(self, fig, bands):
        """
        Plot the off-diagonal elements of the red-sequence model

        Parameters
        ----------
        fig: `matplotlib.Figure`
           Figure to add subplots to plot red-sequence model
        bands: `list`
           List of string names of bands for labeling
        """

        noff = (self.ncol * self.ncol - self.ncol) / 2

        nrow = (noff + 1) / 2

        not_extrap, = np.where(~self.extrapolated)

        ctr = 1
        for j in xrange(self.ncol):
            for k in xrange(j + 1, self.ncol):
                ax = fig.add_subplot(nrow, 2, ctr)
                ax.plot(self.z[: -1], self.sigma[j, k, : -1], 'r--')
                ax.plot(self.z[not_extrap], self.sigma[j, k, not_extrap], 'r-')
                ax.set_xlabel('Redshift')
                ax.set_ylabel('Corr %s-%s / %s-%s' % (bands[j], bands[j + 1], bands[k], bands[k + 1]))
                ctr += 1

        fig.tight_layout()
Exemplo n.º 9
0
    def _scanningPoll(self):
        if self._device_status.value == RUNNING:
            self._DLL.cbGetStatus(
                self.device_number, byref(
                    self._device_status), byref(
                    self._samples_received_count), byref(
                    self._current_sample_buffer_index))  # ,AIFUNCTION)

            logged_time = currentSec()

            currentIndex = self._current_sample_buffer_index.value
            currentSampleCount = self._samples_received_count.value

            if currentSampleCount > 0 and currentIndex > 0:
                lastIndex = self._last_sample_buffer_index.value
                samples = self._local_sample_buffer

                if lastIndex != currentIndex:
                    self._last_sample_buffer_index = c_long(currentIndex)

                    if lastIndex > currentIndex:
                        for v in xrange(
                                lastIndex, self._input_sample_buffer_size):
                            self._saveScannedEvent(logged_time, samples, v)
                        lastIndex = 0

                    for v in xrange(lastIndex, currentIndex):
                        self._saveScannedEvent(logged_time, samples, v)
        else:
            print2err('Error: MC DAQ not responding. Exiting...')
            self.getConfiguration['_ioServer'].shutDown()
            sys.exit(1)
Exemplo n.º 10
0
def matTransposed(mat):
    """Return the transposed of a nxn matrix.

    >>> matTransposed(((1, 2), (3, 4)))
    ((1, 3), (2, 4))"""
    dim = len(mat)
    return tuple(tuple(mat[i][j] for i in xrange(dim)) for j in xrange(dim))
Exemplo n.º 11
0
    def generate_grid(self):
        """Generates the grid of hyperparameter value combinations."""

        options = dict(self.options)
        params = {}

        # Remove 'p' to hold as a constant in the paramater combinations
        p = options.pop('p')
        params['p'] = [p for _ in xrange(self.n_selection_iters)]

        # Assign generators based on parameter type
        param_generators = {
            'c1': np.random.uniform,
            'c2': np.random.uniform,
            'w': np.random.uniform,
            'k': np.random.randint
        }

        # Generate random values for hyperparameters 'c1', 'c2', 'w', and 'k'
        for idx, bounds in options.items():
            params[idx] = param_generators[idx](
                              *bounds, size=self.n_selection_iters)

        # Return list of dicts of hyperparameter combinations
        return [{'c1': params['c1'][i],
                 'c2': params['c2'][i],
                 'w': params['w'][i],
                 'k': params['k'][i],
                 'p': params['p'][i]}
                for i in xrange(self.n_selection_iters)]
Exemplo n.º 12
0
def computeNormals(vertices, faces):
    numVertices = len(vertices)
    numFaces = len(faces)
    normalsPerFace = [None] * numFaces
    areasPerFace = [0.0] * numFaces
    normalsPerVertex = np.zeros(vertices.shape, dtype=vertices.dtype)

    for i in xrange(0, numFaces):
        face = faces[i]
        v0 = vertices[face[0]]
        v1 = vertices[face[1]]
        v2 = vertices[face[2]]

        normal = np.cross(c3d.subtract(v1, v0), c3d.subtract(v2, v0))

        area = triangleArea(v0, v1)
        areasPerFace[i] = area
        normalsPerFace[i] = normal

    for i in xrange(0, numFaces):
        face = faces[i]
        weightedNormal = [c * areasPerFace[i] for c in normalsPerFace[i]]
        for j in face:
            normalsPerVertex[j] = c3d.add(normalsPerVertex[j], weightedNormal)

    for i in xrange(0, numVertices):
        normalsPerVertex[i] = c3d.normalize(normalsPerVertex[i])

    return normalsPerVertex
Exemplo n.º 13
0
def partition(lst, n):
    """
        Divide list into n equal parts
    """
    q, r = divmod(len(lst), n)
    indices = [q*i + min(i,r) for i in xrange(n+1)]
    return [lst[indices[i]:indices[i+1]] for i in xrange(n)], \
           [list(xrange(indices[i],indices[i+1])) for i in xrange(n)]
Exemplo n.º 14
0
def svm_loss_naive(W, X, y, reg, delta=1):
    """
    Structured SVM loss function, naive implementation (with loops).

    Inputs have dimension D, there are C classes, and we operate on minibatches
    of N examples.

    Inputs:
    - W: A numpy array of shape (D, C) containing weights.
    - X: A numpy array of shape (N, D) containing a minibatch of data.
    - y: A numpy array of shape (N,) containing training labels; y[i] = c means
      that X[i] has label c, where 0 <= c < C.
    - reg: (float) regularization strength

    Returns a tuple of:
    - loss as single float
    - gradient with respect to weights W; an array of same shape as W
    """
    dW = np.zeros(W.shape)  # initialize the gradient as zero

    # compute the loss and the gradient
    num_classes = W.shape[1]
    num_train = X.shape[0]
    loss = 0.0
    for i in xrange(num_train):
        scores = X[i].dot(W)
        correct_class_score = scores[y[i]]
        for j in xrange(num_classes):
            if j == y[i]:
                continue
            margin = scores[j] - correct_class_score + delta
            if margin > 0:
                loss += margin

                # Partial derivative of L_i wrt w_y_i = -x_i.
                # Partial derivative of L_i wrt w_j = x_i.
                # In both cases, take the mean slope of all training examples.
                dW[:, y[i]] -= (X[i, :] / num_train)
                dW[:, j] += (X[i, :] / num_train)

    # Right now the loss is a sum over all training examples, but we want it
    # to be an average instead so we divide by num_train.
    loss /= num_train

    # Add regularization to the loss.
    loss += reg * np.sum(W * W)
    dW += reg * W

    #############################################################################
    # TODO:                                                                     #
    # Compute the gradient of the loss function and store it dW.                #
    # Rather that first computing the loss and then computing the derivative,   #
    # it may be simpler to compute the derivative at the same time that the     #
    # loss is being computed. As a result you may need to modify some of the    #
    # code above to compute the gradient.                                       #
    #############################################################################

    return loss, dW
Exemplo n.º 15
0
    def decode_message_body(self, data_iter):
        """
        Decodes the MMS message body

        :param data_iter: an iterator over the sequence of bytes of the MMS
                          body
        :type data_iter: iter
        """
        ######### MMS body: headers ###########
        # Get the number of data parts in the MMS body
        try:
            num_entries = self.decode_uint_var(data_iter)
        except StopIteration:
            return

        #print 'Number of data entries (parts) in MMS body:', num_entries

        ########## MMS body: entries ##########
        # For every data "part", we have to read the following sequence:
        # <length of content-type + other possible headers>,
        # <length of data>,
        # <content-type + other possible headers>,
        # <data>
        for part_num in xrange(num_entries):
            #print '\nPart %d:\n------' % part_num
            headers_len = self.decode_uint_var(data_iter)
            data_len = self.decode_uint_var(data_iter)

            # Prepare to read content-type + other possible headers
            ct_field_bytes = []
            for i in xrange(headers_len):
                ct_field_bytes.append(data_iter.next())

            ct_iter = PreviewIterator(ct_field_bytes)
            # Get content type
            ctype, ct_parameters = self.decode_content_type_value(ct_iter)
            headers = {'Content-Type': (ctype, ct_parameters)}

            # Now read other possible headers until <headers_len> bytes
            # have been read
            while True:
                try:
                    hdr, value = self.decode_header(ct_iter)
                    headers[hdr] = value
                except StopIteration:
                    break

            # Data (note: this is not null-terminated)
            data = array.array('B')
            for i in xrange(data_len):
                data.append(data_iter.next())

            part = message.DataPart()
            part.set_data(data, ctype)
            part.content_type_parameters = ct_parameters
            part.headers = headers
            self._mms_message.add_data_part(part)
Exemplo n.º 16
0
 def to_association_matrix(self, bias='none', progress_callback=None):
     """Return a table with Markov associativities between columns
     (cf. Bavaud & Xanthos 2005, Deneulin et al. 2014)
     """
     freq = self.to_numpy()
     total_freq = freq.sum()
     sum_col = freq.sum(axis=0)
     sum_row = freq.sum(axis=1)
     exchange = np.dot(
         np.transpose(freq),
         np.dot(
             np.diag(1 / sum_row),
             freq
         )
     ) / total_freq
     if bias == 'frequent':
         output_matrix = exchange
     elif bias == 'none':
         sqrt_pi_inv = np.diag(1 / np.sqrt(sum_col / total_freq))
         output_matrix = np.dot(sqrt_pi_inv, np.dot(exchange, sqrt_pi_inv))
     else:
         pi_inv = np.diag(1 / (sum_col / total_freq))
         output_matrix = np.dot(pi_inv, np.dot(exchange, pi_inv))
     col_ids = self.col_ids
     values = dict()
     for col_id_idx1 in xrange(len(col_ids)):
         col_id1 = col_ids[col_id_idx1]
         values.update(
             dict(
                 (
                     (col_id1, col_ids[i]),
                     output_matrix[col_id_idx1, i]
                 )
                 for i in xrange(len(col_ids))
             )
         )
         if progress_callback:
             progress_callback()
     new_header_row_id = (
         self.header_row_id[:-2]
         + "2"
         + self.header_row_id[-2:]
     )
     return (
         PivotCrosstab(
             self.col_ids[:],
             self.col_ids[:],
             values,
             new_header_row_id,
             self.header_row_type,
             self.header_row_id,
             self.header_row_type,
             col_type=self.col_type.copy(),
         )
     )
Exemplo n.º 17
0
 def test_correspond_2_and_up(self):
     # Tests correspond(Z, y) on linkage and CDMs over observation sets of
     # different sizes.
     for i in xrange(2, 4):
         y = np.random.rand(i*(i-1)//2)
         Z = linkage(y)
         self.assertTrue(correspond(Z, y))
     for i in xrange(4, 15, 3):
         y = np.random.rand(i*(i-1)//2)
         Z = linkage(y)
         self.assertTrue(correspond(Z, y))
Exemplo n.º 18
0
    def iterGrid(self, minZoom, maxZoom):
        "Yields the tileBounds, zoom, tileCol and tileRow"
        assert minZoom in range(0, len(self.RESOLUTIONS))
        assert maxZoom in range(0, len(self.RESOLUTIONS))
        assert minZoom <= maxZoom

        for zoom in xrange(minZoom, maxZoom + 1):
            [minRow, minCol, maxRow, maxCol] = self.getExtentAddress(zoom)
            for row in xrange(minRow, maxRow + 1):
                for col in xrange(minCol, maxCol + 1):
                    tileBounds = self.tileBounds(zoom, col, row)
                    yield (tileBounds, zoom, col, row)
Exemplo n.º 19
0
 def to_association_matrix(self, bias='none', progress_callback=None):
     """Return a table with Markov associativities between columns
     (cf. Bavaud & Xanthos 2005, Deneulin et al. 2014)
     """
     # orange_table = self.to_orange_table('utf8')
     # freq_table = Orange.data.preprocess.RemoveDiscrete(orange_table)
     # freq = freq_table.to_numpy()[0]
     freq = self.to_numpy()
     if self.header_col_type == 'continuous':
         freq = freq[::, 1::]
     total_freq = freq.sum()
     sum_col = freq.sum(axis=0)
     sum_row = freq.sum(axis=1)
     exchange = np.dot(
         np.transpose(freq),
         np.dot(
             np.diag(1 / sum_row),
             freq
         )
     ) / total_freq
     if bias == 'frequent':
         output_matrix = exchange
     elif bias == 'none':
         sqrt_pi_inv = np.diag(1 / np.sqrt(sum_col / total_freq))
         output_matrix = np.dot(sqrt_pi_inv, np.dot(exchange, sqrt_pi_inv))
     else:
         pi_inv = np.diag(1 / (sum_col / total_freq))
         output_matrix = np.dot(pi_inv, np.dot(exchange, pi_inv))
     col_ids = self.col_ids
     values = dict()
     for col_id_idx1 in xrange(len(col_ids)):
         col_id1 = text(col_ids[col_id_idx1])
         values.update(
             dict(
                 (
                     (col_id1, text(col_ids[i])),
                     output_matrix[col_id_idx1, i]
                 )
                 for i in xrange(len(col_ids))
             )
         )
         if progress_callback:
             progress_callback()
     return (
         PivotCrosstab(
             self.col_ids[:],
             self.col_ids[:],
             values,
             header_col_id='__unit__',
             header_col_type='string',
             col_type=self.col_type.copy(),
         )
     )
Exemplo n.º 20
0
def softmax_loss_naive(W, X, y, reg):
  """
  Softmax loss function, naive implementation (with loops)

  Inputs have dimension D, there are C classes, and we operate on minibatches
  of N examples.

  Inputs:
  - W: A numpy array of shape (D, C) containing weights.
  - X: A numpy array of shape (N, D) containing a minibatch of data.
  - y: A numpy array of shape (N,) containing training labels; y[i] = c means
    that X[i] has label c, where 0 <= c < C.
  - reg: (float) regularization strength

  Returns a tuple of:
  - loss as single float
  - gradient with respect to weights W; an array of same shape as W
  """
  # Initialize the loss and gradient to zero.
  loss = 0.0
  dW = np.zeros_like(W)
  num_train = X.shape[0]
  num_classes = W.shape[1]
  #############################################################################
  # TODO: Compute the softmax loss and its gradient using explicit loops.     #
  # Store the loss in loss and the gradient in dW. If you are not careful     #
  # here, it is easy to run into numeric instability. Don't forget the        #
  # regularization!                                                           #
  #############################################################################
  for i in xrange(num_train):
    scores = X[i].dot(W)
    scores -= np.max(scores) 
    scores = np.exp(scores)
    sum_scores = np.sum(scores)
    for j in xrange(num_classes):      
      if j == y[i]:
        loss_i = scores[y[i]] / sum_scores
        dW[:, j] = dW[:, j] - (loss_i - 1) * X[i].T
      else:
        dW[:, j] = dW[:, j] - (scores[j] / sum_scores) * X[i].T
    loss -= np.log(loss_i)
  
  loss /= num_train
  dW /= num_train
  loss += 0.5 * reg * np.sum(W * W)
  dW = -dW + reg * W 
  #############################################################################
  #                          END OF YOUR CODE                                 #
  #############################################################################

  return loss, dW
Exemplo n.º 21
0
 def to_flat(self, progress_callback=None):
     """Return a copy of the crosstab in 'flat' format"""
     new_col_ids = list([c for c in self.col_ids if c != '__weight__'])
     new_col_type = dict(self.col_type)
     del new_col_type['__weight__']
     row_counter = 1
     new_values = dict()
     new_row_ids = list()
     if len(self.col_ids) > 1:
         first_col_id = self.col_ids[0]
         second_col_id = self.col_ids[1]
         for row_id in self.row_ids:
             count = self.values[(row_id, '__weight__')]
             first_col_value = self.values[row_id, first_col_id]
             second_col_value = self.values[row_id, second_col_id]
             for i in xrange(count):
                 new_row_id = text(row_counter)
                 new_row_ids.append(new_row_id)
                 new_values[(new_row_id, first_col_id)] = first_col_value
                 new_values[(new_row_id, second_col_id)] = second_col_value
                 row_counter += 1
             if progress_callback:
                 progress_callback()
     else:
         col_id = self.col_ids[0]
         for row_id in self.row_ids:
             count = self.values[(row_id, '__weight__')]
             col_value = self.values[row_id, col_id]
             for i in xrange(count):
                 new_row_id = text(row_counter)
                 new_row_ids.append(new_row_id)
                 new_values[(new_row_id, col_id)] = col_value
                 row_counter += 1
             if progress_callback:
                 progress_callback()
     return (
         FlatCrosstab(
             new_row_ids,
             new_col_ids,
             new_values,
             self.header_row_id,
             self.header_row_type,
             self.header_col_id,
             self.header_col_type,
             new_col_type,
             None,
             self.missing,
             self._cached_row_id,
         )
     )
Exemplo n.º 22
0
def _mating_selection(population, mu, tournament_n):
    """Returns the n_of_parents individuals with the best fitness"""

    parents = []
    for _ in xrange(mu):
        winner = _choice(population)
        for _ in xrange(tournament_n - 1):
            individual = _choice(population)
            # Save winner is element with smallest fitness
            if individual.ibea_fitness < winner.ibea_fitness:
                winner = individual
        parents.append(winner)

    return parents
Exemplo n.º 23
0
def getBoundingBox(veclist):
    """Calculate bounding box (pair of vectors with minimum and maximum
    coordinates).

    >>> getBoundingBox([(0,0,0), (1,1,2), (0.5,0.5,0.5)])
    ((0, 0, 0), (1, 1, 2))"""
    if not veclist:
        # Assume 3 dimensions if veclist is empty
        return (0, 0, 0), (0, 0, 0)

    # Find bounding box
    dim = len(veclist[0])
    return (
        tuple((min(vec[i] for vec in veclist) for i in xrange(dim))),
        tuple((max(vec[i] for vec in veclist) for i in xrange(dim))))
Exemplo n.º 24
0
    def __init__(self, filename):
        """
        Instantiate a Zred Background

        Parameters
        ----------
        filename: `string`
           Zred background filename
        """
        obkg = Entry.from_fits_file(filename, ext='ZREDBKG')

        # Will want to make configurable
        self.refmagbinsize = 0.01
        self.zredbinsize = 0.001

        # Create the refmag bins
        refmagbins = np.arange(obkg.refmagrange[0], obkg.refmagrange[1], self.refmagbinsize)
        nrefmagbins = refmagbins.size

        # Leave the zred bins the same
        nzredbins = obkg.zredbins.size

        # Set up arrays to populate
        sigma_g_new = np.zeros((nrefmagbins, nzredbins))

        floor = np.min(obkg.sigma_g)

        for i in xrange(nzredbins):
            sigma_g_new[:, i] = np.clip(interpol(obkg.sigma_g[:, i], obkg.refmagbins, refmagbins), floor, None)

        sigma_g = sigma_g_new.copy()

        # And update zred
        zredbins = np.arange(obkg.zredrange[0], obkg.zredrange[1], self.zredbinsize)
        nzredbins = zredbins.size

        sigma_g_new = np.zeros((nrefmagbins, nzredbins))

        for i in xrange(nrefmagbins):
            sigma_g_new[i, :] = np.clip(interpol(sigma_g[i, :], obkg.zredbins, zredbins), floor, None)

        self.zredbins = zredbins
        self.zredrange = obkg.zredrange
        self.zred_index = 0
        self.refmag_index = 1
        self.refmagbins = refmagbins
        self.refmagrange = obkg.refmagrange
        self.sigma_g = sigma_g_new
Exemplo n.º 25
0
 def sendResponse(self, data, address):
     reply_data_sz = -1
     max_pkt_sz = int(MAX_PACKET_SIZE / 2 - 20)
     pkt_cnt = -1
     p = si = -1
     try:
         reply_data = self.pack(data)
         reply_data_sz = len(reply_data)
         if reply_data_sz >= max_pkt_sz:
             pkt_cnt = int(reply_data_sz // max_pkt_sz) + 1
             mpr_payload = ('IOHUB_MULTIPACKET_RESPONSE', pkt_cnt)
             self.sendResponse(mpr_payload, address)
             for p in xrange(pkt_cnt - 1):
                 si = p*max_pkt_sz
                 self.socket.sendto(reply_data[si:si+max_pkt_sz], address)
             si = (p+1)*max_pkt_sz
             self.socket.sendto(reply_data[si:reply_data_sz], address)
         else:
             self.socket.sendto(reply_data, address)
     except Exception:
         print2err('=============================')
         print2err('Error trying to send data to experiment process:')
         print2err('max_pkt_sz: ', max_pkt_sz)
         print2err('reply_data_sz: ', reply_data_sz)
         print2err('pkt_cnt: ', pkt_cnt)
         print2err('packet index, byte index: ', p, si)
         printExceptionDetailsToStdErr()
         print2err('=============================')
         pktdata = self.pack('IOHUB_SERVER_RESPONSE_ERROR')
         self.socket.sendto(pktdata, address)
Exemplo n.º 26
0
 def test_num_obs_linkage_4_and_up(self):
     # Tests num_obs_linkage(Z) on linkage on observation sets between sizes
     # 4 and 15 (step size 3).
     for i in xrange(4, 15, 3):
         y = np.random.rand(i*(i-1)//2)
         Z = linkage(y)
         self.assertTrue(num_obs_linkage(Z) == i)
Exemplo n.º 27
0
 def test_num_obs_linkage_multi_matrix(self):
     # Tests num_obs_linkage with observation matrices of multiple sizes.
     for n in xrange(2, 10):
         X = np.random.rand(n, 4)
         Y = pdist(X)
         Z = linkage(Y)
         self.assertTrue(num_obs_linkage(Z) == n)
Exemplo n.º 28
0
def test_arm_postprocess_ret():

    for i in xrange(3):
        # e91ba8f0
        # ldmdb  R11, {R4,R11,SP,PC}
        irsb = pyvex.IRSB(data=b'\xe9\x1b\xa8\xf0',
                          mem_addr=0xed4028,
                          arch=archinfo.ArchARMEL(endness=archinfo.Endness.BE),
                          num_inst=1,
                          opt_level=i
                          )
        nose.tools.assert_equal(irsb.jumpkind, 'Ijk_Ret')

        # e91badf0
        # ldmdb  R11, {R4-R8,R10,R11,SP,PC}
        irsb = pyvex.IRSB(data=b'\xe9\x1b\xa8\xf0',
                          mem_addr=0x4d4028,
                          arch=archinfo.ArchARMEL(endness=archinfo.Endness.BE),
                          num_inst=1,
                          opt_level=i
                          )
        nose.tools.assert_equal(irsb.jumpkind, 'Ijk_Ret')

        # 00a89de8
        # ldmfd SP, {R11,SP,PC}
        # Fixed by Fish in the VEX fork, commit 43c78f608490f9a5c71c7fca87c04759c1b93741
        irsb = pyvex.IRSB(data=b'\x00\xa8\x9d\xe8',
                          mem_addr=0xc800b57c,
                          arch=archinfo.ArchARMEL(endness=archinfo.Endness.LE),
                          num_inst=1,
                          opt_level=1
                          )
        nose.tools.assert_equal(irsb.jumpkind, 'Ijk_Ret')
Exemplo n.º 29
0
def softmax_loss_naive(W, X, y, reg):
  """
  Softmax loss function, naive implementation (with loops)

  Inputs have dimension D, there are C classes, and we operate on minibatches
  of N examples.

  Inputs:
  - W: A numpy array of shape (D, C) containing weights.
  - X: A numpy array of shape (N, D) containing a minibatch of data.
  - y: A numpy array of shape (N,) containing training labels; y[i] = c means
    that X[i] has label c, where 0 <= c < C.
  - reg: (float) regularization strength

  Returns a tuple of:
  - loss as single float
  - gradient with respect to weights W; an array of same shape as W
  """
  # Initialize the loss and gradient to zero.
  loss = 0.0
  dW = np.zeros_like(W)

  #############################################################################
  # TODO: Compute the softmax loss and its gradient using explicit loops.     #
  # Store the loss in loss and the gradient in dW. If you are not careful     #
  # here, it is easy to run into numeric instability. Don't forget the        #
  # regularization!                                                           #
  #############################################################################

  # compute the loss and the gradient
  num_classes = W.shape[1]
  num_train = X.shape[0]
  loss = 0.0
  dscores = np.zeros((num_train, num_classes))

  for i in xrange(num_train):
    scores = X[i].dot(W)
    scores -= max(scores)  # for numerical stability of following equation
    probs = np.exp(scores) / np.sum(np.exp(scores))

    loss -= np.log(probs[y[i]])

    dscores[i] = probs
    dscores[i, y[i]] -= 1

    dW += np.outer(X[i], dscores[i])
   
  # Right now the loss is a sum over all training examples, but we want it
  # to be an average instead so we divide by num_train.
  loss /= num_train

  # Add regularization to the loss.
  loss += 0.5 * reg * np.sum(W * W)

  dW = dW/num_train + reg * W     # average gradient + regularization gradient 
  #############################################################################
  #                          END OF YOUR CODE                                 #
  #############################################################################

  return loss, dW
Exemplo n.º 30
0
 def test_is_valid_linkage_4_and_up(self):
     # Tests is_valid_linkage(Z) on linkage on observation sets between
     # sizes 4 and 15 (step size 3).
     for i in xrange(4, 15, 3):
         y = np.random.rand(i*(i-1)//2)
         Z = linkage(y)
         assert_(is_valid_linkage(Z) == True)
Exemplo n.º 31
0
def softmax_loss_naive(W, X, y, reg):
    """
  Softmax loss function, naive implementation (with loops)

  Inputs have dimension D, there are C classes, and we operate on minibatches
  of N examples.

  Inputs:
  - W: A numpy array of shape (D, C) containing weights.
  - X: A numpy array of shape (N, D) containing a minibatch of data.
  - y: A numpy array of shape (N,) containing training labels; y[i] = c means
    that X[i] has label c, where 0 <= c < C.
  - reg: (float) regularization strength

  Returns a tuple of:
  - loss as single float
  - gradient with respect to weights W; an array of same shape as W
  """
    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)

    #############################################################################
    # TODO: Compute the softmax loss and its gradient using explicit loops.     #
    # Store the loss in loss and the gradient in dW. If you are not careful     #
    # here, it is easy to run into numeric instability. Don't forget the        #
    # regularization!                                                           #
    #############################################################################
    denominator = 0
    num_train = X.shape[0]
    num_classes = W.shape[1]
    for i in xrange(num_train):
        scores = X[i].dot(W)  # predict a matrix of shape (1, C)
        denominator = 0
        # prevent gradient explosion
        scores -= np.max(scores)
        for j in xrange(num_classes):
            denominator += np.exp(scores[j])
        correct_score = scores[y[i]]
        loss += -1 * np.log(np.exp(correct_score) / denominator)
        for j in xrange(num_classes):
            numerator = np.exp(scores[j])
            probability = numerator / denominator
            # Reference: https://math.stackexchange.com/questions/945871/derivative-of-softmax-loss-function
            if j == y[i]:
                # For matching class
                dW[:, j] += (-X[i].T * (1 - probability))
            else:
                # For non-matching class
                dW[:, j] += X[i].T * probability

    loss /= num_train
    dW /= num_train
    # add regularization
    loss += reg * np.sum(W * W)
    dW += 2 * reg * W
    #############################################################################
    #                          END OF YOUR CODE                                 #
    #############################################################################

    return loss, dW
Exemplo n.º 32
0
    def optimize(self, objective_func, iters, print_step=1, verbose=1):
        """Optimizes the swarm for a number of iterations.

        Performs the optimization to evaluate the objective
        function :code:`f` for a number of iterations :code:`iter.`

        Parameters
        ----------
        objective_func : function
            objective function to be evaluated
        iters : int
            number of iterations
        print_step : int (default is 1)
            amount of steps for printing into console.
        verbose : int  (default is 1)
            verbosity setting.

        Returns
        -------
        tuple
            the global best cost and the global best position.
        """
        for i in xrange(iters):
            # Compute cost for current position and personal best
            current_cost = objective_func(self.pos)
            pbest_cost = objective_func(self.personal_best_pos)

            # Update personal bests if the current position is better
            # Create 1-D mask then update pbest_cost
            m = (current_cost < pbest_cost)
            pbest_cost = np.where(~m, pbest_cost, current_cost)
            # Create 2-D mask to update positions
            _m = np.repeat(m[:, np.newaxis], self.dimensions, axis=1)
            self.personal_best_pos = np.where(~_m, self.personal_best_pos,
                                              self.pos)

            # Get the minima of the pbest and check if it's less than
            # the saved gbest
            if np.min(pbest_cost) < self.best_cost:
                self.best_cost = np.min(pbest_cost)
                self.best_pos = self.personal_best_pos[np.argmin(pbest_cost)]

            # Print to console
            if i % print_step == 0:
                cli_print('Iteration %s/%s, cost: %s' %
                          (i+1, iters, self.best_cost), verbose, 2,
                          logger=self.logger)

            # Save to history
            hist = self.ToHistory(
                best_cost=self.best_cost,
                mean_pbest_cost=np.mean(pbest_cost),
                mean_neighbor_cost=self.best_cost,
                position=self.pos,
                velocity=self.velocity
            )
            self._populate_history(hist)

            # Verify stop criteria based on the relative acceptable cost ftol
            if np.min(self.best_cost) < self.ftol:
                break

            # Perform velocity and position updates
            self._update_velocity()
            self._update_position()

        # Obtain the final best_cost and the final best_position
        final_best_cost = self.best_cost.copy()  # Make deep copies
        final_best_pos = self.best_pos.copy()

        end_report(final_best_cost, final_best_pos, verbose,
                   logger=self.logger)
        return final_best_cost, final_best_pos
Exemplo n.º 33
0
def svm_loss_naive(W, X, y, reg):
    """
    Structured SVM loss function, naive implementation (with loops).

    Inputs have dimension D, there are C classes, and we operate on minibatches
    of N examples.

    Inputs:
    - W: A numpy array of shape (D, C) containing weights.
    - X: A numpy array of shape (N, D) containing a minibatch of data.
    - y: A numpy array of shape (N,) containing training labels; y[i] = c means
    that X[i] has label c, where 0 <= c < C.
    - reg: (float) regularization strength

    Returns a tuple of:
    - loss as single float
    - gradient with respect to weights W; an array of same shape as W
    """
    dW = np.zeros(W.shape)  # initialize the gradient as zero

    # compute the loss and the gradient
    num_classes = W.shape[1]
    num_train = X.shape[0]
    loss = 0.0
    #i from 0 to 499
    for i in xrange(num_train):
        scores = X[i].dot(W)
        correct_class_score = scores[y[i]]
        loss_count = 0
        for j in xrange(num_classes):
            if j == y[i]:
                continue
            margin = scores[j] - correct_class_score + 1  # note delta = 1
            if margin > 0:
                loss += margin
                # incorrect class gradient part
                dW[:, j] += X[i]
                # count contribution of terms to loss function max(0,X) count number of X
                loss_count += 1

        dW[:, y[i]] += (-1) * loss_count * X[i]

    # Right now the loss is a sum over all training examples, but we want it
    # to be an average instead so we divide by num_train.
    loss /= num_train
    dW /= num_train

    # Add regularization to the loss.
    loss += reg * np.sum(W * W)
    # Add regularization to the gradient
    dW += 2 * reg * W

    #############################################################################
    # TODO:                                                                     #
    # Compute the gradient of the loss function and store it dW.                #
    # Rather that first computing the loss and then computing the derivative,   #
    # it may be simpler to compute the derivative at the same time that the     #
    # loss is being computed. As a result you may need to modify some of the    #
    # code above to compute the gradient.                                       #
    #############################################################################

    return loss, dW
Exemplo n.º 34
0
def svm_loss_naive(W, X, y, reg):
    """
  Structured SVM loss function, naive implementation (with loops).

  Inputs have dimension D, there are C classes, and we operate on minibatches
  of N examples.

  Inputs:
  - W: A numpy array of shape (D, C) containing weights.                          3073*10
  - X: A numpy array of shape (N, D) containing a minibatch of data.              minibatch*3073
  - y: A numpy array of shape (N,) containing training labels; y[i] = c means     minibatch        
    that X[i] has label c, where 0 <= c < C.
  - reg: (float) regularization strength

  Returns a tuple of:
  - loss as single float
  - gradient with respect to weights W; an array of same shape as W
  """
    dW = np.zeros(W.shape)  # initialize the gradient as zero

    # compute the loss and the gradient
    num_classes = W.shape[1]
    num_train = X.shape[0]  # minibatch
    loss = 0.0
    for i in xrange(num_train):
        #print('wshape:',X[i])
        scores = X[i].dot(W)  #3073 .dot 3073*10 = 10x1
        #print('scores.shape:',scores.shape)
        correct_class_score = scores[y[i]]
        for j in xrange(num_classes):
            if j == y[i]:  # y[i] correct class
                continue
            margin = scores[j] - correct_class_score + 1  # note delta = 1
            if margin > 0:
                loss += margin
                v = np.zeros((1, num_classes))  #1x10
                v[0, j] = 1  #incorrect class
                v[0, y[i]] = -1  #correct class
                u = np.resize(X[i], (X[i].shape[0], 1))  #X[i]=3073  u = 3073x1
                #print('ushape:',u.shape)
                dW += u.dot(v)  #3073x1 .dot 1||-1

    # Right now the loss is a sum over all training examples, but we want it
    # to be an average instead so we divide by num_train.
    loss /= num_train
    dW /= num_train
    # Add regularization to the loss.
    #loss += reg * np.sum(W * W)
    loss += 0.5 * reg * np.sum(
        W * W)  # them 1/2 ben ngoai cong thuc cua loss cho de dao ham
    dW += reg * W
    #############################################################################
    # TODO:                                                                     #
    # Compute the gradient of the loss function and store it dW.                #
    # Rather that first computing the loss and then computing the derivative,   #
    # it may be simpler to compute the derivative at the same time that the     #
    # loss is being computed. As a result you may need to modify some of the    #
    # code above to compute the gradient.                                       #
    #############################################################################
    # doc optimization note

    return loss, dW
Exemplo n.º 35
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        # 1. Create a variable to store the cache data from every layer
        cache_list = []

        # 2. Create a copy of the input data(X)
        input_data = X.copy()

        # 3. If dropout in use
        if self.use_dropout:
            for i in xrange(1, self.num_layers):
                # Do Affine Forward Pass
                a, fc_cache = affine_forward(input_data,
                                             self.params['W' + str(i)],
                                             self.params['b' + str(i)])
                # Do Relu activation for Forward Pass
                r, relu_cache = relu_forward(a)
                # Relu activation's output used to perform dropout forward pass
                dropout_output, dropout_cache = dropout_forward(
                    r, self.dropout_param)
                # Store the cached values
                cache = (fc_cache, relu_cache, dropout_cache)
                # Store all the cached values into the cache_list
                cache_list.append(cache)
                # Set value of input data as the current output for next layer
                input_data = dropout_output

            # Perfrom the affine forward pass for the last layer
            scores, dr_cache_ln = affine_forward(
                input_data, self.params['W' + str(self.num_layers)],
                self.params['b' + str(self.num_layers)])
            # Store current cache to Cache list
            cache_list.append(dr_cache_ln)

        # 4. If dropout not in use, run normal mode
        else:
            for i in xrange(1, self.num_layers):
                # Do the forward pass with Relu activation
                layer_output, layer_cache = affine_relu_forward(
                    input_data, self.params['W' + str(i)],
                    self.params['b' + str(i)])
                # Store current cached values into the cache list
                cache_list.append(layer_cache)
                # Set input data as current output for next layer
                input_data = layer_output

            # Do the Affine Forward Pass into the last layer
            scores, cache_ln = affine_forward(
                input_data, self.params['W' + str(self.num_layers)],
                self.params['b' + str(self.num_layers)])
            # Store the value of current cache to Cache list
            cache_list.append(cache_ln)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        # 1. Using softmax_loss() to get the loss and dout value
        loss, dout = softmax_loss(scores, y)

        # 2. Initialize variable to store derivatives of weights (dw) and bias (db)
        list_dw = []
        list_db = []

        # 3. Extract and delete the last layer entry in the Cache List
        cache = cache_list.pop()

        # 4. Do the affine backward pass on cached data for the last layer
        dx, dw, db = affine_backward(dout, cache)

        # 5. Store the derivative of weights and biases at index 0
        list_dw.insert(0, dw)
        list_db.insert(0, db)
        dout = dx

        # 6. If dropout is in used
        if self.use_dropout:
            # Loop through the cached entries for all the intermediary layers
            for i in xrange(len(cache_list)):
                cache = cache_list.pop()
                # Get data by extracting it from the cache file
                fc_cache, relu_cache, dropout_cache = cache
                # Do the dropout backward pass
                dd = dropout_backward(dout, dropout_cache)
                # Do the Relu activation for backward pass
                dr = relu_backward(dd, relu_cache)
                # Do the Perfrom Affine backward Pass
                dx, dw, db = affine_backward(dr, fc_cache)
                # Update list of derivatives of weights and biases
                list_dw.insert(0, dw)
                list_db.insert(0, db)
                # Set derivative of output as derivative of x
                dout = dx

            para_loss = 0

            # Loop through the values in list of derivatives of weights
            for i in xrange(len(list_dw)):
                # Apply regularization to the weights
                W = self.params['W' + str(i + 1)]
                list_dw[i] += self.reg * W
                # Use para_loss variable to store the iterative penalty terms for the regularization
                para_loss += np.sum(W**2)
            # Regularize the loss
            loss += 0.5 * self.reg * para_loss

            # Loop through and update the grads dictionary entries for derivatives of weights and biases
            for i in xrange(len(list_dw)):
                grads['W' + str(i + 1)] = list_dw[i]
                grads['b' + str(i + 1)] = list_db[i]

            # If dropout is not specified, run normal mode
        else:
            # Loop through the cached entries for all the intermediary layers
            for i in range(len(cache_list)):
                # Extract and remove the last entry in the cache list
                cache = cache_list.pop()
                # Perform Backward pass with Relu activation
                dx, dw, db = affine_relu_backward(dout, cache)
                # Update list of derivatives of weights and biases
                list_dw.insert(0, dw)
                list_db.insert(0, db)
                # Set derivative of output as derivative of x
                dout = dx
            para_loss = 0

            # Loop through the values in list of derivatives of weights
            for i in xrange(len(list_dw)):
                # Apply regularization to the weights
                W = self.params['W' + str(i + 1)]
                list_dw[i] += self.reg * W
                # Use para_loss variable to store the iterative penalty terms for the regularization
                para_loss += np.sum(W**2)
            # Regularize the loss
            loss += 0.5 * self.reg * para_loss

            # Loop through and update the grads dictionary entries for derivatives of weights and biases
            for i in xrange(len(list_dw)):
                grads['W' + str(i + 1)] = list_dw[i]
                grads['b' + str(i + 1)] = list_db[i]

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Exemplo n.º 36
0
    def test(self, data):
        source_data, source_loc_data, target_data, target_label, _ = data
        N = int(math.ceil(len(source_data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, 1], dtype=np.int32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, 3])  # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])
        context.fill(self.pad_idx)

        m, acc = 0, 0
        for i in xrange(N):
            target.fill(0)
            time.fill(self.mem_size)
            context.fill(self.pad_idx)

            emb_a = self.A.eval()
            emb_a[self.pad_idx, :] = 0
            emb_b = self.B.eval()
            emb_b[self.pad_idx, :] = 0
            emb_c = self.C.eval()
            emb_c[self.pad_idx, :] = 0
            emb_ta = self.T_A.eval()
            emb_ta[self.mem_size, :] = 0
            emb_tb = self.T_B.eval()
            emb_tb[self.mem_size, :] = 0
            self.sess.run(self.A.assign(emb_a))
            self.sess.run(self.B.assign(emb_b))
            self.sess.run(self.C.assign(emb_c))
            self.sess.run(self.T_A.assign(emb_ta))
            self.sess.run(self.T_B.assign(emb_tb))

            raw_labels = []
            for b in xrange(self.batch_size):
                x[b][0] = target_data[m]
                target[b][target_label[m]] = 1
                time[b, :len(source_loc_data[m])] = source_loc_data[m]
                context[b, :len(source_data[m])] = source_data[m]
                m += 1
                raw_labels.append(target_label[m])

            loss = self.sess.run(
                [self.loss],
                feed_dict={
                    self.input: x,
                    self.time: time,
                    self.target: target,
                    self.context: context
                })
            cost += np.sum(loss)

            predictions = self.sess.run(self.correct_prediction,
                                        feed_dict={
                                            self.input: x,
                                            self.time: time,
                                            self.target: target,
                                            self.context: context
                                        })

            for b in xrange(self.batch_size):
                if raw_labels[b] == predictions[b]:
                    acc = acc + 1

        return cost, acc / float(len(source_data))
Exemplo n.º 37
0
 def _chunker(self, seq, size):
     return [seq[pos:pos + size] for pos in xrange(0, len(seq), size)]
Exemplo n.º 38
0
def softmax_loss_naive(W, X, y, reg):
    """
    Softmax loss function, naive implementation (with loops)

    Inputs have dimension D, there are C classes, and we operate on minibatches
    of N examples.

    Inputs:
    - W: A numpy array of shape (D, C) containing weights.
    - X: A numpy array of shape (N, D) containing a minibatch of data.
    - y: A numpy array of shape (N,) containing training labels; y[i] = c means
      that X[i] has label c, where 0 <= c < C.
    - reg: (float) regularization strength

    Returns a tuple of:
    - loss as single float
    - gradient with respect to weights W; an array of same shape as W
    """
    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)

    #############################################################################
    # TODO: Compute the softmax loss and its gradient using explicit loops.     #
    # Store the loss in loss and the gradient in dW. If you are not careful     #
    # here, it is easy to run into numeric instability. Don't forget the        #
    # regularization!                                                           #
    #############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    num_train = X.shape[0]
    num_classes = W.shape[1]
    
    for i in xrange(num_train):
        
        # loss
        scores = X[i].dot(W)
        # shift values for scores for numeric reason (overflow cautious)
        scores -= scores.max()
        scores_expsum = np.sum(np.exp(scores))
        cor_ex = np.exp(scores[y[i]])
        loss += -np.log(cor_ex / scores_expsum)
        
        # Gradient
        # for correct class
        dW[:, y[i]] += (-1) * (scores_expsum - cor_ex) / scores_expsum * X[i]
        for j in xrange(num_classes):
            # pass correct class gradient
            if j == y[i]:
                continue
            # for incorrect classes
            dW[:, j] += np.exp(scores[j]) / scores_expsum * X[i]
            
    loss /= num_train
    loss += reg * np.sum(np.multiply(W, W))
    dW /= num_train
    dW += reg * W

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    return loss, dW
Exemplo n.º 39
0
    def _calc_corrections(self, gals, mode2=False):
        """
        Calculate zred afterburner correction parameters.

        Sets self.pars.corr, self.pars.corr_slope, self.pars.corr_r or
        self.pars.corr2, self.pars.corr2_slope, self.pars.corr2_r

        Parameters
        ----------
        gals: `redmapper.GalaxyCatalog`
           Galaxy catalog being fit.  Must contain zred_uncorr information.
        mode2: `bool`, optional
           Default is False.  When False, corrections are computed such that
           <zred|ztrue> is unbiased.  When True, corrections are computed
           such that <ztrue|zred> is unbiased.
        """

        # p or pcol
        if self.config.calib_use_pcol:
            probs = gals.pcol
        else:
            probs = gals.p

        # Set a threshold removing 5% worst lkhd outliers
        st = np.argsort(gals.lkhd)
        thresh = gals.lkhd[st[int(0.05 * gals.size)]]

        # This is an arbitrary 2sigma cut...
        guse, = np.where((gals.lkhd > thresh)
                         & (np.abs(gals.z - gals.zred) < 2. * gals.zred_e))

        spl = CubicSpline(self.pars.pivotmag_z, self.pars.pivotmag)
        pivotmags = spl(gals.z)

        w = 1. / (np.exp((thresh - gals.lkhd[guse]) / 0.2) + 1.0)

        # The offset cvals
        cvals = np.zeros(self.pars.corr_z.size)
        # The slope svals
        svals = np.zeros(self.pars.corr_slope_z.size)
        # And the r value to be multiplied by error
        rvals = np.ones(self.pars.corr_slope_z.size)
        # And the background vals
        bkg_cvals = np.zeros(self.pars.corr_slope_z.size)

        cvals[:], _ = self._compute_startvals(self.pars.corr_z,
                                              gals.z,
                                              gals.z - gals.zred,
                                              median=True)

        # Initial guess for bkg_cvals is trickier and not generalizable (sadly)
        for i in xrange(self.pars.corr_slope_z.size):
            if i == 0:
                zlo = self.pars.corr_slope_z[0]
            else:
                zlo = (self.pars.corr_slope_z[i - 1] +
                       self.pars.corr_slope_z[i]) / 2.
            if i == (self.pars.corr_slope_z.size - 1):
                zhi = self.pars.corr_slope_z[i]
            else:
                zhi = (self.pars.corr_slope_z[i] +
                       self.pars.corr_slope_z[i + 1]) / 2.

            if mode2:
                u, = np.where((gals.zred[guse] > zlo)
                              & (gals.zred[guse] < zhi))
            else:
                u, = np.where((gals.z[guse] > zlo) & (gals.z[guse] < zhi))

            if u.size < 3:
                if i > 0:
                    bkg_cvals[i] = bkg_cvals[i - 1]
            else:
                st = np.argsort(probs[guse[u]])
                uu = u[st[0:u.size // 2]]
                bkg_cvals[i] = np.std(gals.z[guse[uu]] -
                                      gals.zred[guse[uu]])**2.

        if mode2:
            self.config.logger.info("Fitting zred2 corrections...")
            z = gals.zred
        else:
            self.config.logger.info("Fitting zred corrections...")
            z = gals.z

        corrfitter = CorrectionFitter(self.pars.corr_z,
                                      z[guse],
                                      gals.z[guse] - gals.zred[guse],
                                      gals.zred_e[guse],
                                      slope_nodes=self.pars.corr_slope_z,
                                      probs=np.clip(probs[guse], None, 0.99),
                                      dmags=gals.refmag[guse] -
                                      pivotmags[guse],
                                      ws=w)

        # self.config.calib_corr_nocorrslope
        # first fit the mean
        cvals, = corrfitter.fit(cvals, svals, rvals, bkg_cvals, fit_mean=True)
        # fit the slope (if desired)
        if not self.config.calib_corr_nocorrslope:
            svals, = corrfitter.fit(cvals,
                                    svals,
                                    rvals,
                                    bkg_cvals,
                                    fit_slope=True)
        # Fit r
        rvals, = corrfitter.fit(cvals, svals, rvals, bkg_cvals, fit_r=True)
        # Fit bkg
        bkg_cvals, = corrfitter.fit(cvals,
                                    svals,
                                    rvals,
                                    bkg_cvals,
                                    fit_bkg=True)

        # Combined fit
        if not self.config.calib_corr_nocorrslope:
            cvals, svals, rvals, bkg_cvals = corrfitter.fit(cvals,
                                                            svals,
                                                            rvals,
                                                            bkg_cvals,
                                                            fit_mean=True,
                                                            fit_slope=True,
                                                            fit_r=True,
                                                            fit_bkg=True)
        else:
            cvals, rvals, bkg_cvals = corrfitter.fit(cvals,
                                                     svals,
                                                     rvals,
                                                     bkg_cvals,
                                                     fit_mean=True,
                                                     fit_r=True,
                                                     fit_bkg=True)

        # And record the values
        if mode2:
            self.pars.corr2 = cvals
            self.pars.corr2_slope = svals
            self.pars.corr2_r = rvals
        else:
            self.pars.corr = cvals
            self.pars.corr_slope = svals
            self.pars.corr_r = rvals
Exemplo n.º 40
0
    def _calc_offdiagonal_pars(self, gals, doRaise=True):
        """
        Calculate the off-diagonal elements of the covariance matrix.

        Sets self.pars.sigma, self.pars.covmat_amp (off-diagonal).

        Parameters
        ----------
        gals: `redmapper.GalaxyCatalog`
           Galaxy catalog with fields required for fit.
        doRaise: `bool`, optional
           Raise if there's a problem with the background?  Default is True.
        """
        # The routine to compute the off-diagonal elements

        ncol = self.config.nmag - 1

        galcolor = gals.galcol
        galcolor_err = gals.galcol_err

        # compute the pivot mags
        spl = CubicSpline(self.pars.pivotmag_z, self.pars.pivotmag)
        pivotmags = spl(gals.z)

        # And set the right probabilities
        if self.config.calib_use_pcol:
            probs = gals.pcol
        else:
            probs = gals.p

        # Compute c, slope, and median and width for all galaxies/colors
        ci = np.zeros((gals.size, ncol))
        si = np.zeros_like(ci)
        medci = np.zeros_like(ci)
        medwidthi = np.zeros_like(ci)
        gsig = np.zeros_like(ci)

        for j in xrange(ncol):
            spl = CubicSpline(self.pars._ndarray[self.ztag[j]],
                              self.pars._ndarray[self.ctag[j]])
            ci[:, j] = spl(gals.z)
            spl = CubicSpline(self.pars._ndarray[self.zstag[j]],
                              self.pars._ndarray[self.stag[j]])
            si[:, j] = spl(gals.z)
            spl = CubicSpline(self.pars.pivotmag_z, self.pars.medcol[:, j])
            medci[:, j] = spl(gals.z)
            spl = CubicSpline(self.pars.pivotmag_z, self.pars.medcol_width[:,
                                                                           j])
            medwidthi[:, j] = spl(gals.z)
            spl = CubicSpline(self.pars.covmat_z, self.pars.sigma[j, j, :])
            gsig[:, j] = spl(gals.z)

        if self.do_lupcorr:
            mags = np.zeros((gals.size, self.config.nmag))
            lups = np.zeros_like(mags)

            mags[:, self.config.ref_ind] = gals.refmag

            for j in xrange(self.config.ref_ind + 1, self.config.nmag):
                mags[:, j] = mags[:, j - 1] - (ci[:, j - 1] + si[:, j - 1] *
                                               (gals.refmag - pivotmags))
            for j in xrange(self.config.ref_ind - 1, -1, -1):
                mags[:, j] = mags[:, j + 1] + (ci[:, j] + si[:, j] *
                                               (gals.refmag - pivotmags))

            for j in xrange(self.config.nmag):
                flux = 10.**((mags[:, j] - self.lupzp) / (-2.5))
                lups[:,
                     j] = 2.5 * np.log10(1.0 / self.config.b[j]) - np.arcsinh(
                         0.5 * flux / self.bnmgy[j]) / (0.5 * np.log(10.0))

            magcol = mags[:, :-1] - mags[:, 1:]
            lupcol = lups[:, :-1] - lups[:, 1:]

            lupcorr = lupcol - magcol
        else:
            lupcorr = np.zeros((gals.size, ncol))

        template_col = np.zeros((gals.size, ncol))
        for j in xrange(ncol):
            template_col[:,
                         j] = ci[:, j] + si[:, j] * (gals.refmag -
                                                     pivotmags) + lupcorr[:, j]

        res = galcolor - template_col

        # figure out order with a ranking based on the configured order
        bits = 2**np.arange(ncol, dtype=np.int32)
        covmat_rank = np.zeros((ncol * ncol - ncol) // 2, dtype=np.int32)
        covmat_order = self.config.calib_color_order
        ctr = 0
        for j in xrange(ncol):
            for k in xrange(j + 1, ncol):
                covmat_rank[ctr] = bits[covmat_order[j]] + bits[
                    covmat_order[k]]
                ctr += 1

        covmat_rank = np.sort(covmat_rank)

        full_covmats = self.pars.covmat_amp.copy()

        for ctr in xrange(covmat_rank.size):
            starttime = time.time()

            j = -1
            k = -1
            for tctr in xrange(ncol):
                if ((covmat_rank[ctr] & bits[tctr]) > 0):
                    if j < 0:
                        j = covmat_order[tctr]
                    else:
                        k = covmat_order[tctr]

            # swap if necessary
            if k < j:
                temp = k
                k = j
                j = temp

            self.config.logger.info("Working on %d, %d" % (j, k))

            u, = np.where((galcolor[:, j] > medci[:, j] -
                           self.config.calib_color_nsig * medwidthi[:, j])
                          & (galcolor[:, j] < medci[:, j] +
                             self.config.calib_color_nsig * medwidthi[:, j])
                          & (galcolor[:, k] > medci[:, k] -
                             self.config.calib_color_nsig * medwidthi[:, k])
                          & (galcolor[:, k] < medci[:, k] +
                             self.config.calib_color_nsig * medwidthi[:, k]))

            bvals = self.bkg.lookup_offdiag(j,
                                            k,
                                            galcolor[:, j],
                                            galcolor[:, k],
                                            gals.refmag,
                                            doRaise=doRaise)

            odfitter = RedSequenceOffDiagonalFitter(
                self.pars.covmat_z,
                gals.z[u],
                res[u, j],
                res[u, k],
                gsig[u, j],
                gsig[u, k],
                gals.mag_err[u, :],
                j,
                k,
                probs[u],
                bvals[u],
                self.config.calib_covmat_prior,
                min_eigenvalue=self.config.calib_covmat_min_eigenvalue)

            rvals = odfitter.fit(np.zeros(self.pars.covmat_z.size),
                                 full_covmats=full_covmats)

            self.pars.sigma[j, k, :] = rvals
            self.pars.sigma[k, j, :] = rvals

            self.pars.covmat_amp[j, k, :] = rvals * self.pars.sigma[
                j, j, :] * self.pars.sigma[k, k, :]
            self.pars.covmat_amp[k, j, :] = self.pars.covmat_amp[j, k, :]

            full_covmats[j, k, :] = self.pars.covmat_amp[j, k, :]
            full_covmats[k, j, :] = self.pars.covmat_amp[k, j, :]

            self.config.logger.info("Done in %.2f seconds." %
                                    (time.time() - starttime))
Exemplo n.º 41
0
def lcs(X, Y, m, n):
    L = [[0 for x in xrange(n + 1)] for x in xrange(m + 1)]
    a = 20
    b = 30

    for i in xrange(m + 1):
        a = a + 40
        b = 30
        for j in xrange(n + 1):
            if i == 0:
                L[i][j] = 0
                cv2.putText(image, str(L[i][j]), (a, b),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.2, (80, 170, 89), 1, 1)
            elif j == 0:
                L[i][j] = 0
                cv2.putText(image, str(L[i][j]), (a, b),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.2, (80, 170, 89), 1, 1)
            elif X[i - 1] == Y[j - 1]:
                L[i][j] = L[i - 1][j - 1] + 1
                cv2.putText(image, str(L[i][j]), (a, b),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.2, (80, 170, 89), 1, 1)
                cv2.arrowedLine(image, (a - 5, b - 5), (a - 30, b - 20),
                                (0, 0, 255), 1)
            else:
                L[i][j] = max(L[i - 1][j], L[i][j - 1])
                cv2.putText(image, str(L[i][j]), (a, b),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.2, (80, 170, 89), 1, 1)
                if (L[i - 1][j] > L[i][j - 1]):
                    cv2.arrowedLine(image, (a - 5, b - 5), (a - 30, b - 5),
                                    (0, 0, 255), 1)
                elif (L[i - 1][j] < L[i][j - 1]):
                    cv2.arrowedLine(image, (a - 5, b - 5), (a - 5, b - 20),
                                    (0, 0, 255), 1)
                else:
                    cv2.arrowedLine(image, (a - 7, b - 5), (a - 30, b - 5),
                                    (0, 0, 255), 1)
                    cv2.arrowedLine(image, (a - 5, b - 7), (a - 5, b - 20),
                                    (0, 0, 255), 1)
            b = b + 20

    index = L[m][n]
    lcs = [""] * (index + 1)
    lcs[index] = ""
    i = m
    j = n
    b = b - 20
    cv2.putText(image, str(L[i][j]), (a, b), cv2.FONT_HERSHEY_SIMPLEX, 0.2,
                (255, 70, 89), 1, 1)
    while i > 0 and j > 0:
        if X[i - 1] == Y[j - 1]:
            lcs[index - 1] = X[i - 1]
            i -= 1
            j -= 1
            a = a - 40
            b = b - 20
            cv2.putText(image, str(L[i][j]), (a, b), cv2.FONT_HERSHEY_SIMPLEX,
                        0.2, (255, 70, 89), 1, 1)
            index -= 1
        elif L[i - 1][j] > L[i][j - 1]:
            i -= 1
            a = a - 40
            cv2.putText(image, str(L[i][j]), (a, b), cv2.FONT_HERSHEY_SIMPLEX,
                        0.2, (255, 70, 89), 1, 1)
        else:
            b = b - 20
            j -= 1
            cv2.putText(image, str(L[i][j]), (a, b), cv2.FONT_HERSHEY_SIMPLEX,
                        0.2, (255, 70, 89), 1, 1)
    cv2.putText(image, "".join(lcs), (9, 21), cv2.FONT_HERSHEY_SIMPLEX, 0.4,
                (25, 70, 189), 1, 1)
    print("LCS of " + X + " and " + Y + " is " + "".join(lcs))
Exemplo n.º 42
0
    def __init__(self,
                 hidden_dims,
                 input_dim=3 * 32 * 32,
                 num_classes=10,
                 dropout=0,
                 use_batchnorm=False,
                 reg=0.0,
                 weight_scale=1e-2,
                 dtype=np.float32,
                 seed=None):
        """
        Initialize a new FullyConnectedNet.

        Inputs:
        - hidden_dims: A list of integers giving the size of each hidden layer.
        - input_dim: An integer giving the size of the input.
        - num_classes: An integer giving the number of classes to classify.
        - dropout: Scalar between 0 and 1 giving dropout strength. If dropout=0 then
          the network should not use dropout at all.
        - use_batchnorm: Whether or not the network should use batch normalization.
        - reg: Scalar giving L2 regularization strength.
        - weight_scale: Scalar giving the standard deviation for random
          initialization of the weights.
        - dtype: A numpy datatype object; all computations will be performed using
          this datatype. float32 is faster but less accurate, so you should use
          float64 for numeric gradient checking.
        - seed: If not None, then pass this random seed to the dropout layers. This
          will make the dropout layers deteriminstic so we can gradient check the
          model.
        """
        self.use_batchnorm = use_batchnorm
        self.use_dropout = dropout > 0
        self.reg = reg
        self.num_layers = 1 + len(hidden_dims)
        self.dtype = dtype
        self.params = {}

        ############################################################################
        # TODO: Initialize the parameters of the network, storing all values in    #
        # the self.params dictionary. Store weights and biases for the first layer #
        # in W1 and b1; for the second layer use W2 and b2, etc. Weights should be #
        # initialized from a normal distribution with standard deviation equal to  #
        # weight_scale and biases should be initialized to zero.                   #
        #                                                                          #
        # When using batch normalization, store scale and shift parameters for the #
        # first layer in gamma1 and beta1; for the second layer use gamma2 and     #
        # beta2, etc. Scale parameters should be initialized to one and shift      #
        # parameters should be initialized to zero.                                #
        ############################################################################
        # Find random weights (with mean 0) for the first layer based on the scale
        # defined while maintianing the right dimensions
        W1_layer1 = np.random.normal(0, weight_scale,
                                     input_dim * hidden_dims[0])
        # Store first layer weights in teh params dictionary, reshaping just to be sure
        self.params["W1"] = W1_layer1.reshape((input_dim, hidden_dims[0]))
        # Initialize first layer biases with zeros
        self.params['b1'] = np.zeros((hidden_dims[0]))

        # Loop through initialization for intermediary layers
        for i in xrange(1, self.num_layers - 1):
            # Initialize the indexed weights in the dictionary with random numbers with mean 0 and defined weight scale,
            # trying to maintain dimensionality
            self.params['W' + str(i + 1)] = np.random.normal(
                0, weight_scale, hidden_dims[i - 1] * hidden_dims[i]).reshape(
                    (hidden_dims[i - 1], hidden_dims[i]))
            # Initialize the indexed biases in the dictionary with zeros
            self.params['b' + str(i + 1)] = np.zeros((hidden_dims[i]))

            # Initialize the final layer weights with random numbers with mean 0 and defined weight scale.
        self.params['W' + str(self.num_layers)] = np.random.normal(
            0, weight_scale, hidden_dims[-1] * num_classes).reshape(
                (hidden_dims[-1], num_classes))
        # Initialize the final layer biases in the dictionary with zeros
        self.params['b' + str(self.num_layers)] = np.zeros((num_classes))

        # If batchnorm is being used, initialize the relevant parameters (gamma and beta).
        # Maintain the right dimensionality
        if self.use_batchnorm:
            for i in xrange(1, self.num_layers):
                self.params['beta' + str(i)] = np.zeros(hidden_dims[i - 1])
                self.params['gamma' + str(i)] = np.ones(hidden_dims[i - 1])

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # When using dropout we need to pass a dropout_param dictionary to each
        # dropout layer so that the layer knows the dropout probability and the mode
        # (train / test). You can pass the same dropout_param to each dropout layer.
        self.dropout_param = {}
        if self.use_dropout:
            self.dropout_param = {'mode': 'train', 'p': dropout}
            if seed is not None:
                self.dropout_param['seed'] = seed

        # With batch normalization we need to keep track of running means and
        # variances, so we need to pass a special bn_param object to each batch
        # normalization layer. You should pass self.bn_params[0] to the forward pass
        # of the first batch normalization layer, self.bn_params[1] to the forward
        # pass of the second batch normalization layer, etc.
        self.bn_params = []
        if self.use_batchnorm:
            self.bn_params = [{
                'mode': 'train'
            } for i in range(self.num_layers - 1)]

        # Cast all parameters to the correct datatype
        for k, v in self.params.items():
            self.params[k] = v.astype(dtype)
Exemplo n.º 43
0
  '百': 100,
  '千': 1000
}

MULTIPLES = {  # noqa
  '万': 10000,
  '億': 100000000,
  '兆': 1000000000000
}

NUMBERS = {  # noqa
  x[1]: x[0] + 1
  for x in enumerate(('一', '二', '三', '四', '五', '六', '七', '八', '九', '十'))
}

NUMERICS = list(map(str, xrange(0, 10)))

KANJI_NUMBER_MAP = {  # noqa
  x[1]: x[0]
  for x in enumerate(('〇', '一', '二', '三', '四', '五', '六', '七', '八', '九'))
}

MULTIBYTE_NUMBER_MAP = {  # noqa
  x[1]: x[0]
  for x in enumerate(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'))
}


class Tokenized(object):

  def __init__(self, val):
Exemplo n.º 44
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'
        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode
        scores = None
        ############################################################################
        # TODO: Implement the forward pass for the fully-connected net, computing  #
        # the class scores for X and storing them in the scores variable.          #
        #                                                                          #
        # When using dropout, you'll need to pass self.dropout_param to each       #
        # dropout forward pass.                                                    #
        #                                                                          #
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################
        # Container to store the cache data from each layer
        list_cache = []
        # Make a copy of the input data to make it easier to handle during iterations
        input_data = X.copy()
        # If dropout is being used
        if self.use_dropout:
            # Loop through the layers
            for i in xrange(1, self.num_layers):
                # Perfrom Affine Forward Pass
                a, fc_cache = affine_forward(input_data,
                                             self.params['W' + str(i)],
                                             self.params['b' + str(i)])
                # Perfrom Relu activation for forward pass
                r, relu_cache = relu_forward(a)
                # Use the ouptut from Relu activation to perform dropout forward pass
                dropout_output, dropout_cache = dropout_forward(
                    r, self.dropout_param)
                # Store the cached values
                cache = (fc_cache, relu_cache, dropout_cache)
                # Store current cache to the Cache list
                list_cache.append(cache)
                # Set input data as current output for next layer
                input_data = dropout_output

            # Perfrom the affine forward pass for the last layer
            scores, dr_cache_ln = affine_forward(
                input_data, self.params['W' + str(self.num_layers)],
                self.params['b' + str(self.num_layers)])
            # Store current cache to Cache list
            list_cache.append(dr_cache_ln)

        # If use of batchnorm is desired
        elif self.use_batchnorm:
            # Loop throught the layers
            for i in xrange(1, self.num_layers):
                # Perform Affine forward
                affine_out, fc_cache = affine_forward(
                    input_data, self.params['W' + str(i)],
                    self.params['b' + str(i)])
                # Perform Batchnorm first then Relu (Combo 1)
                bn_out, bn_cache = batchnorm_forward(
                    affine_out, self.params['gamma' + str(i)],
                    self.params['beta' + str(i)], self.bn_params[i - 1])
                relu_out, relu_cache = relu_forward(bn_out)
                # Perform Relu activation & then Batchnorm (Combo 2) for Question 2
                # relu_out, relu_cache = relu_forward(affine_out)
                # bn_out, bn_cache = batchnorm_forward(relu_out, self.params['gamma'+str(i)], self.params['beta'+str(i)], self.bn_params[i-1])
                # Store all the cache
                batchnorm_cache = (fc_cache, bn_cache, relu_cache)
                list_cache.append(batchnorm_cache)
                input_data = relu_out

        # Otherwise run normal mode
        else:
            # Loop through the layers
            for i in xrange(1, self.num_layers):
                # Perform forward pass with Relu Activation at given indexed layer
                layer_output, layer_cache = affine_relu_forward(
                    input_data, self.params['W' + str(i)],
                    self.params['b' + str(i)])
                # Store current cache to the cache list
                list_cache.append(layer_cache)
                # Set input data as current output for next layer
                input_data = layer_output

        # Perfrom the affine forward pass for the last layer
        scores, cache_ln = affine_forward(
            input_data, self.params['W' + str(self.num_layers)],
            self.params['b' + str(self.num_layers)])
        # Store current cache to Cache list
        list_cache.append(cache_ln)

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        ############################################################################
        # TODO: Implement the backward pass for the fully-connected net. Store the #
        # loss in the loss variable and gradients in the grads dictionary. Compute #
        # data loss using softmax, and make sure that grads[k] holds the gradients #
        # for self.params[k]. Don't forget to add L2 regularization!               #
        #                                                                          #
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        # Evaluate the Softmax loss and derivative of scores
        loss, dout = softmax_loss(scores, y)
        # Initialize containers to store Derivatives of weights and biases
        list_dw = []
        list_db = []
        list_dgamma = []
        list_dbeta = []

        # Extract and remove the last layer entry in the Cache List
        cache = list_cache.pop()
        # Perform affine backward pass on the cached data for the last layer
        dx, dw, db = affine_backward(dout, cache)
        # Store the derivative of weights and biases at index 0 of respective lists
        list_dw.insert(0, dw)
        list_db.insert(0, db)
        dout = dx

        # If Dropout is being used
        if self.use_dropout:

            # Loop through the cached entries for all the intermediary layers
            for i in xrange(len(list_cache)):
                # Extract and remove the last entry in the cache list
                cache = list_cache.pop()
                # Extract the data from the cache file
                fc_cache, relu_cache, dropout_cache = cache
                # Perform dropout backward pass
                dd = dropout_backward(dout, dropout_cache)
                # Perfrom Relu activation for backward pass
                dr = relu_backward(dd, relu_cache)
                # Perfrom Affine backward Pass
                dx, dw, db = affine_backward(dr, fc_cache)
                # Update list of derivatives of weights and biases
                list_dw.insert(0, dw)
                list_db.insert(0, db)
                # Set derivative of output as derivative of x
                dout = dx

            para_loss = 0

            # Loop through the values in list of derivatives of weights
            for i in xrange(len(list_dw)):
                # Apply regularization to the weights
                W = self.params['W' + str(i + 1)]
                list_dw[i] += self.reg * W
                # Use para_loss variable to store the iterative penalty terms for the regularization
                para_loss += np.sum(W**2)
            # Regularize the loss
            loss += 0.5 * self.reg * para_loss

            # Loop through and update the grads dictionary entries for derivatives of weights and biases
            for i in xrange(len(list_dw)):
                grads['W' + str(i + 1)] = list_dw[i]
                grads['b' + str(i + 1)] = list_db[i]

        # If use of batchnorm is desired
        elif self.use_batchnorm:
            # Loop through the cached entries for all the intermediary layers
            for i in xrange(len(list_cache)):
                # Get the last entry from the cache list and store relevant layer caches
                cache = list_cache.pop()
                fc_cache, bn_cache, relu_cache = cache
                # Perform Relu Backward Pass then Batchnorm Backward (Combo1)
                drelu_out = relu_backward(dout, relu_cache)
                dbn_out, dgamma, dbeta = batchnorm_backward(
                    drelu_out, bn_cache)
                dx, dw, db = affine_backward(dbn_out, fc_cache)
                # Perform Batchnorm Backward then Relu Backward Pass  (Combo2) for Question 2
                # dbn_out, dgamma, dbeta = batchnorm_backward(dout, bn_cache)
                # drelu_out = relu_backward(dbn_out, relu_cache)
                # dx, dw, db = affine_backward(drelu_out, fc_cache)
                # Store relevant gradients in the containers and update computed dx for next layer
                list_dw.insert(0, dw)
                list_db.insert(0, db)
                list_dgamma.insert(0, dgamma)
                list_dbeta.insert(0, dbeta)
                dout = dx

                # Regularize the loss and compute the parametric loss for all layers
            para_loss = 0
            for i in xrange(len(list_dw)):
                list_dw[i] += self.reg * self.params['W' + str(i + 1)]
                para_loss += np.sum((self.params['W' + str(i + 1)])**2)
                # Store the gradients of Weights and biases for all the layers
                grads['W' + str(i + 1)] = list_dw[i]
                grads['b' + str(i + 1)] = list_db[i]

            # Calculate the loss and store sore respective gradients of Gamma and beta
            for i in xrange(len(list_dgamma)):
                list_dgamma[i] += self.reg * self.params['gamma' + str(i + 1)]
                list_dbeta[i] += self.reg * self.params['beta' + str(i + 1)]
                para_loss += np.sum(
                    (self.params['gamma' + str(i + 1)])**2) + np.sum(
                        (self.params['beta' + str(i + 1)])**2)
                grads['gamma' + str(i + 1)] = list_dgamma[i]
                grads['beta' + str(i + 1)] = list_dbeta[i]

            # Put it all together to find the total loss.
            loss += 0.5 * self.reg * para_loss

        # If dropout is not specified, run normal mode
        else:
            # Loop through the cached entries for all the intermediary layers
            for i in xrange(len(list_cache)):
                # Extract and remove the last entry in the cache list
                cache = list_cache.pop()
                # Perform Backward pass with Relu activation
                dx, dw, db = affine_relu_backward(dout, cache)
                # Update list of derivatives of weights and biases
                list_dw.insert(0, dw)
                list_db.insert(0, db)
                # Set derivative of output as derivative of x
                dout = dx
            para_loss = 0

            # Loop through the values in list of derivatives of weights
            for i in xrange(len(list_dw)):
                # Apply regularization to the weights
                W = self.params['W' + str(i + 1)]
                list_dw[i] += self.reg * W
                # Use para_loss variable to store the iterative penalty terms for the regularization
                para_loss += np.sum(W**2)
            # Regularize the loss
            loss += 0.5 * self.reg * para_loss

            # Loop through and update the grads dictionary entries for derivatives of weights and biases
            for i in xrange(len(list_dw)):
                grads['W' + str(i + 1)] = list_dw[i]
                grads['b' + str(i + 1)] = list_db[i]
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        return loss, grads
Exemplo n.º 45
0
    def select_by_expr(
        self,
        expr,
        uniprots=None,
        return_uniprots=False,
    ):
        """
        Selects UniProts based on an expression of Gene Ontology terms.
        Operator precedence not considered, please use parentheses.
        Return indices of the selected elements in the ``uniprots`` list
        or the set of selected UniProt IDs.
        
        :param str expr:
            An expression of Gene Ontology terms. E.g.
            ``'(GO:0005576 and not GO:0070062) or GO:0005887'``. Parentheses
            and operators ``and``, ``or`` and ``not`` can be used.
        :param bool return_uniprots:
            By default returns list of indices; if ``True`` returns a set of
            the selected UniProt IDs.
        """

        ops = {
            'and': 'intersection',
            'or': 'union',
        }

        # if no UniProts provided does not make sense to return indices
        return_uniprots = return_uniprots or uniprots is None

        uniprots = uniprots or sorted(self.all_uniprots())

        if isinstance(expr, basestring):

            # tokenizing expression if it is a string
            # (method is recursive)
            expr = _rego.findall(expr)

        # initial values
        result = set()
        stack = []
        sub = False
        negate = False
        op = None
        this_set = None

        for it in expr:

            # processing expression by tokens

            # we are in a sub-selection part
            if sub:

                if it == ')':

                    # token is a closing parenthesis
                    # execute sub-selection
                    this_set = self.select_by_expr(
                        expr=stack,
                        uniprots=uniprots,
                    )
                    # empty stack
                    stack = []

                else:

                    # token is something else
                    # add to sub-selection stack
                    stack.append(it)

            # we do actual processing of the expression
            elif it == 'not':

                # token is negation
                # turn on negation for the next set
                negate = True
                continue

            # open a sub-selection part
            elif it == '(':

                # token is a parenthesis
                # start a new sub-selection
                sub = True
                continue

            elif it[:3] == 'GO:':

                # token is a GO term
                # get the vertex selection by the single term method
                this_set = self._select_by_go(it)

                if negate:

                    # take the inverse of the current set
                    this_set = set(xrange(len(uniprots))) - this_set
                    # set negation again to False
                    negate = False

            elif it in ops:

                # token is an operator
                # set it for use at the next operation
                op = ops[it]

            # we found a set
            if this_set is not None:

                # and an operator
                if op is not None:

                    result = getattr(result, op)(this_set)

                # this normally happens only at the first set
                else:

                    result = this_set

                this_set = None
                op = None

        return self._uniprot_return(result, uniprots, return_uniprots)
Exemplo n.º 46
0
    def _calc_diagonal_pars(self, gals, doRaise=True):
        """
        Calculate the model parameters and diagonal elements of the covariance
        matrix (one color at a time).

        Sets self.pars.sigma, self.pars.covmat_amp, self.pars.cXX, self.pars.slopeXX

        Parameters
        ----------
        gals: `redmapper.GalaxyCatalog`
           Galaxy catalog with fields required for fit.
        doRaise: `bool`, optional
           Raise if there's a problem with the background?  Default is True.
        """

        # The main routine to compute the red sequence on the diagonal

        ncol = self.config.nmag - 1

        galcolor = gals.galcol
        galcolor_err = gals.galcol_err

        # compute the pivot mags
        spl = CubicSpline(self.pars.pivotmag_z, self.pars.pivotmag)
        pivotmags = spl(gals.z)

        # And set the right probabilities
        if self.config.calib_use_pcol:
            probs = gals.pcol
        else:
            probs = gals.p

        # Figure out the order of the colors for luptitude corrections
        mags = np.zeros((gals.size, self.config.nmag))

        if self.do_lupcorr:
            col_indices = np.zeros(ncol, dtype=np.int32)
            sign_indices = np.zeros(ncol, dtype=np.int32)
            mind_indices = np.zeros(ncol, dtype=np.int32)

            c = 0
            for j in xrange(self.config.ref_ind, self.config.nmag):
                col_indices[c] = j - 1
                sign_indices[c] = -1
                mind_indices[c] = j
                c += 1
            for j in xrange(self.config.ref_ind - 2, -1, -1):
                col_indices[c] = j
                sign_indices[c] = 1
                mind_indices[c] = j
                c += 1

            lups = np.zeros_like(mags)

            mags[:, self.config.ref_ind] = gals.mag[:, self.config.ref_ind]
            flux = 10.**((mags[:, self.config.ref_ind] - self.lupzp) / (-2.5))
            lups[:, self.config.ref_ind] = 2.5 * np.log10(
                1.0 / self.config.b[self.config.ref_ind]) - np.arcsinh(
                    0.5 * flux /
                    self.bnmgy[self.config.ref_ind]) / (0.4 * np.log(10.0))
        else:
            col_indices = np.arange(ncol)
            sign_indices = np.ones(ncol, dtype=np.int32)
            mind_indices = col_indices

        # One color at a time along the diagonal
        for c in xrange(ncol):
            starttime = time.time()

            # The order is given by col_indices, which ensures that we work from the
            # reference mag outward
            j = col_indices[c]
            sign = sign_indices[c]
            mind = mind_indices[c]

            self.config.logger.info("Working on diagonal for color %d" % (j))

            col = galcolor[:, j]
            col_err = galcolor_err[:, j]

            # Need to go through the _ndarray because ztag and zstag are strings
            cvals = np.zeros(self.pars._ndarray[self.ztag[j]].size)
            svals = np.zeros(self.pars._ndarray[self.zstag[j]].size)
            scvals = np.zeros(self.pars.covmat_z.size) + 0.05
            photo_err = np.zeros_like(cvals)

            # Calculate median truncation
            spl = CubicSpline(self.pars.pivotmag_z, self.pars.medcol[:, j])
            med = spl(gals.z)
            spl = CubicSpline(self.pars.pivotmag_z, self.pars.medcol_width[:,
                                                                           j])
            sc = spl(gals.z)

            # What is the maximum scatter in each node?
            # This is based on the median fit, which does not include photometric
            # error, and should always be larger.  This helps regularize the edges
            # where things otherwise can run away.
            scatter_max = spl(self.pars.covmat_z)

            u, = np.where(
                (galcolor[:, j] > (med - self.config.calib_color_nsig * sc))
                & (galcolor[:, j] < (med + self.config.calib_color_nsig * sc)))
            trunc = self.config.calib_color_nsig * sc[u]

            dmags = gals.refmag - pivotmags

            # And the starting values...
            # Note that this returns the slope values (svals) at the nodes from the cvals
            # but these might not be the same nodes, so we have to approximate
            cvals_temp, svals_temp, _ = self._compute_startvals(
                self.pars._ndarray[self.ztag[j]],
                gals.z[u],
                col[u],
                xval=dmags[u],
                fit=True,
                mincomp=5)
            cvals[:] = cvals_temp
            inds = np.searchsorted(self.pars._ndarray[self.ztag[j]],
                                   self.pars._ndarray[self.zstag[j]])
            svals[:] = svals_temp[inds]

            # And do the luptitude correction if necessary.
            if self.do_lupcorr:
                lupcorr = self._compute_single_lupcorr(j, cvals, svals, gals,
                                                       dmags, mags, lups, mind,
                                                       sign)
            else:
                lupcorr = np.zeros(gals.size)

            # We fit in stages: first the mean, then the slope, then the scatter,
            # and finally all three
            rsfitter = RedSequenceFitter(
                self.pars._ndarray[self.ztag[j]],
                gals.z[u],
                col[u],
                col_err[u],
                dmags=dmags[u],
                trunc=trunc,
                slope_nodes=self.pars._ndarray[self.zstag[j]],
                scatter_nodes=self.pars.covmat_z,
                lupcorrs=lupcorr[u],
                probs=probs[u],
                bkgs=self.bkg.lookup_diagonal(j,
                                              col[u],
                                              gals.refmag[u],
                                              doRaise=doRaise),
                scatter_max=scatter_max,
                use_scatter_prior=True)

            # fit the mean
            cvals, = rsfitter.fit(cvals, svals, scvals, fit_mean=True)
            # Update the lupcorr...
            if self.do_lupcorr:
                rsfitter._lupcorrs[:] = self._compute_single_lupcorr(
                    j, cvals, svals, gals, dmags, mags, lups, mind, sign)[u]
            # fit the slope
            svals, = rsfitter.fit(cvals, svals, scvals, fit_slope=True)
            # fit the scatter
            scvals, = rsfitter.fit(cvals, svals, scvals, fit_scatter=True)
            # fit combined
            cvals, svals, scvals = rsfitter.fit(cvals,
                                                svals,
                                                scvals,
                                                fit_mean=True,
                                                fit_slope=True,
                                                fit_scatter=True)
            # Re-fit...
            #cvals, svals, scvals = rsfitter.fit(cvals, svals, scvals,
            #                                    fit_mean=True, fit_slope=True, fit_scatter=True)

            # And record in the parameters
            self.pars._ndarray[self.ctag[j]] = cvals
            self.pars._ndarray[self.stag[j]] = svals
            self.pars.sigma[j, j, :] = scvals
            self.pars.covmat_amp[j, j, :] = scvals**2.

            # And print the time taken
            self.config.logger.info('Done in %.2f seconds.' %
                                    (time.time() - starttime))
Exemplo n.º 47
0
def softmax_loss_naive(W, X, y, reg):
    """
  Softmax loss function, naive implementation (with loops)

  Inputs have dimension D, there are C classes, and we operate on minibatches
  of N examples.

  Inputs:
  - W: A numpy array of shape (D, C) containing weights.
  - X: A numpy array of shape (N, D) containing a minibatch of data.
  - y: A numpy array of shape (N,) containing training labels; y[i] = c means
    that X[i] has label c, where 0 <= c < C.
  - reg: (float) regularization strength

  Returns a tuple of:
  - loss as single float
  - gradient with respect to weights W; an array of same shape as W
  """
    # Initialize the loss and gradient to zero.

    loss = 0.0
    dW = np.zeros_like(W)

    #############################################################################
    # TODO:                                                                     #
    # Compute the gradient of the loss function and store it dW.                #
    # Rather that first computing the loss and then computing the derivative,   #
    # it may be simpler to compute the derivative at the same time that the     #
    # loss is being computed. You may need to modify some of the                #
    # code above to compute the gradient.                                       #
    #############################################################################

    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)
    num_train = X.shape[0]
    num_classes = W.shape[1]

    for i in xrange(num_train):
        # Calculo los puntajes
        scores = X[i].dot(W)
        scores -= np.max(scores)  #To avoid numerical issues

        # Aplico Softmax a los puntajes
        q = np.exp(scores) / np.sum(np.exp(scores))

        # Calculo el Loss de Entropīa Cruzada
        loss += -np.log(q[y[i]])

        # Calculo el Gradiente de W utilizando la regla de la cadena y el gradiente
        # de los puntajes (qi-yi).
        for j in xrange(num_classes):
            dW[:, j] += (q - (j == y[i]))[j] * X[i]

    # Divido entre la cantidad de elementos de entrenamiento
    loss /= num_train
    dW /= num_train

    # Agrego la regularización al loss
    loss += 0.5 * reg * np.sum(W * W)

    # Agrego la regularización al Gradiente
    dW += reg * W

    #############################################################################
    #                          END OF YOUR CODE                                 #
    #############################################################################

    return loss, dW
Exemplo n.º 48
0
    def _compute_startvals(self,
                           nodes,
                           z,
                           val,
                           xval=None,
                           err=None,
                           median=False,
                           fit=False,
                           mincomp=3):
        """
        Compute the starting fit values using a simple algorithm.

        Must select one (and only one) of median=True (median fit) or
        fit=True (weighted mean fit).

        Parameters
        ----------
        nodes: `np.array`
           Float array of redshift nodes
        z: `np.array`
           Float array of redshifts
        val: `np.array`
           Float array of values to fit (e.g. refmag, color)
        xval: `np.array`, optional
           X-axis value for color-magnitude relation if fitting slope.
           Usually refmag.
           Default is None, which means not fitting a slope.
        err: `np.array`, optional
           Float array of error on val.  Not used if fitting median.
           Default is None.
        median: `bool`, optional
           Perform median fit.  Default is False.
        fit: `bool`, optional
           Perform weighted mean fit.  Default is False.
        """
        def _linfunc(p, x, y):
            return (p[1] + p[0] * x) - y

        if (not median and not fit) or (median and fit):
            raise RuntimeError(
                "Must select one and only one of median and fit")

        if median:
            mvals = np.zeros(nodes.size)
            scvals = np.zeros(nodes.size)
        else:
            cvals = np.zeros(nodes.size)
            svals = np.zeros(nodes.size)

        if err is not None:
            if err.size != val.size:
                raise ValueError("val and err must be the same length")

            # default all to 0.1
            evals = np.zeros(nodes.size) + 0.1
        else:
            evals = None

        for i in xrange(nodes.size):
            if i == 0:
                zlo = nodes[0]
            else:
                zlo = (nodes[i - 1] + nodes[i]) / 2.
            if i == nodes.size - 1:
                zhi = nodes[i]
            else:
                zhi = (nodes[i] + nodes[i + 1]) / 2.

            u, = np.where((z > zlo) & (z < zhi))

            if u.size < mincomp:
                if i > 0:
                    if median:
                        mvals[i] = mvals[i - 1]
                        scvals[i] = scvals[i - 1]
                    else:
                        cvals[i] = cvals[i - 1]
                        svals[i] = svals[i - 1]

                    if err is not None:
                        evals[i] = evals[i - 1]
            else:
                if median:
                    mvals[i] = np.median(val[u])
                    scvals[i] = np.median(np.abs(val[u] - mvals[i]))
                else:
                    fit = least_squares(_linfunc, [0.0, 0.0],
                                        loss='soft_l1',
                                        args=(xval[u], val[u]))
                    cvals[i] = fit.x[1]
                    svals[i] = np.clip(fit.x[0], None, 0.0)

                if err is not None:
                    evals[i] = np.median(err[u])

        if median:
            return mvals, scvals
        else:
            return cvals, svals, evals
Exemplo n.º 49
0
def _cigar_fix_lengths(cigar, sequence):
    """

    :return:
    """
    # Assign length to -1's
    #
    # Since N's aren't mapped we look at the surrounding M's to find the length of the N's
    #
    # Example: 35M49N65M ==> 4M150D31M-1N65M, the -1 will be corrected by finding the last position of the previous
    # M and first position of the next M
    #
    # there are a few special cases that are handled
    # since there were multiple mappings, we will need to figure out the location on the N's
    done = False

    while not done:
        done = True

        # find first element without a length
        i = 0
        for cm in cigar:
            if cm.length == -1:
                break
            i += 1

        if i == len(cigar):
            done = True
            break

        LOG.debug("Found '{0}' at {1}: {2}".format(cm.code, i, cm))

        before = None
        after = None

        # Simple case is surrounded by mapping positions, but might not be the case
        for x in reversed(xrange(i)):
            if cigar[x].code == CIGAR_M:
                before = cigar[x]
                break

        for x in xrange(i + 1, len(cigar)):
            if cigar[x].code == CIGAR_M:
                after = cigar[x]
                break

        # special case of 89M2000N11M
        # what happens when thi sis converted to 89M-1N11S (no M at end)
        # we should have 89M11S

        LOG.debug("Before: {0}".format(before))
        LOG.debug("After: {0}".format(after))

        # check if all cigar elements from here to end do not have a length
        a = i
        while a < len(cigar) - 1:
            if cigar[a].length != -1:
                break
            a += 1

        # if a == len(cigar_mapping) -1 than all the rest have no length
        LOG.debug("a={0}, len(cigar_mapping) - 1={1}".format(
            a,
            len(cigar) - 1))
        if (a == len(cigar) - 1
                and cigar[a].start == -1) or not after or not before:
            # take the rest as a clip
            LOG.debug("Found a clip")
            temp_cigar_mappings = cigar[:i]
            temp_total = 0
            for t in temp_cigar_mappings:
                if t.code in [CIGAR_M, CIGAR_I, CIGAR_S]:
                    temp_total += t.length

            temp_cigar_mappings.append(
                Cigar(CIGAR_S,
                      len(sequence) - temp_total, -1, -1))
            cigar = temp_cigar_mappings
            done = True
        else:
            c = cigar[i]
            new_c = Cigar(c.code, after.start - before.end, before.end,
                          after.start)
            LOG.debug("Replacing, old = {0}, new = {1}".format(c, new_c))
            cigar[i] = new_c

            done = False

    LOG.debug("Removing 0 length elements, if any")
    new_cigar = []
    for cm in cigar:
        if cm[1] == 0:
            LOG.debug("Removing {}".format(cm))
            continue
        new_cigar.append(cm)

    return new_cigar
Exemplo n.º 50
0
    def _make_diagnostic_plots(self, gals):
        """
        Make diagnostic plots.

        Parameters
        ----------
        gals: `redmapper.GalaxyCatalog`
           Galaxy catalog being fit.  Must contain zred information.
        """

        import matplotlib.pyplot as plt

        # what plots do we want?
        # We want to split this out into different modules?

        # For each color, plot
        #  Color(z)
        #  Slope(z)
        #  scatter(z)
        # And a combined
        #  All off-diagonal r value plots

        zredstr = RedSequenceColorPar(self.config.parfile, zbinsize=0.005)

        for j in xrange(self.config.nmag - 1):
            fig = plt.figure(figsize=(10, 6))
            fig.clf()

            zredstr.plot_redsequence_diag(fig, j, self.config.bands)
            fig.savefig(
                os.path.join(
                    self.config.outpath, self.config.plotpath, '%s_%s-%s.png' %
                    (self.config.d.outbase, self.config.bands[j],
                     self.config.bands[j + 1])))
            plt.close(fig)

        fig = plt.figure(figsize=(10, 6))
        fig.clf()
        zredstr.plot_redsequence_offdiags(fig, self.config.bands)
        fig.savefig(
            os.path.join(self.config.outpath, self.config.plotpath,
                         '%s_offdiags.png' % (self.config.d.outbase)))

        # And two panel plot with
        #  left panel is offset, scatter, outliers as f(z)
        #  Right panel is zred vs z (whichever)
        # We need to do this for both zred and zred2.

        zbinsize = 0.02
        pcut = 0.9
        ntrial = 1000

        mlim = zredstr.mstar(gals.zred) - 2.5 * np.log10(0.2)

        use, = np.where((gals.p > pcut) & (gals.refmag < mlim)
                        & (gals.zred < 2.0))

        ugals = gals[use]

        zbins = np.arange(self.config.zrange[0], self.config.zrange[1],
                          zbinsize)

        dtype = [('ztrue', 'f4'), ('zuse', 'f4'), ('delta', 'f4'),
                 ('delta_err', 'f4'), ('delta_std', 'f4'), ('zuse_e', 'f4'),
                 ('f_out', 'f4')]

        # There are two modes to plot
        for mode in xrange(2):
            if mode == 0:
                zuse = ugals.z
                dzuse = ugals.zred - ugals.z
                zuse_e = ugals.zred_e
                xlabel = r'$z_{\mathrm{true}}$'
                ylabel_left = r'$z_{\mathrm{red}} - z_{\mathrm{true}}$'
                ylabel_right = r'$z_{\mathrm{red}}$'
                xcol = 'ztrue'
                modename = 'zred'
            else:
                zuse = ugals.zred2
                dzuse = ugals.z - ugals.zred2
                zuse_e = ugals.zred2_e
                xlabel = r'$z_{\mathrm{red2}}$'
                ylabel_left = r'$z_{\mathrm{true}} - z_{\mathrm{red2}}$'
                ylabel_right = r'$z_{\mathrm{true}}$'
                xcol = 'zuse'
                modename = 'zred2'

            zstr = np.zeros(zbins.size, dtype=dtype)

            for i, z in enumerate(zbins):
                # Get all the galaxies in the bin
                u1, = np.where((zuse >= z) & (zuse < (z + zbinsize)))

                if u1.size < 3:
                    self.config.logger.info(
                        'Warning: not enough galaxies in zbin: %.2f, %.2f' %
                        (z, z + zbinsize))
                    continue

                med = np.median(dzuse[u1])
                gsigma = 1.4826 * np.abs(dzuse[u1] - med) / zuse_e[u1]

                u2, = np.where(np.abs(gsigma) < 3.0)
                if u2.size < 3:
                    self.config.logger.info(
                        'Warning: not enough galaxies in zbin: %.2f, %.2f' %
                        (z, z + zbinsize))

                use = u1[u2]

                zstr['ztrue'][i] = np.median(ugals.z[use])
                zstr['zuse'][i] = np.median(zuse[use])
                zstr['delta'][i] = np.median(dzuse[use])

                barr = np.zeros(ntrial)
                for t in xrange(ntrial):
                    r = np.random.choice(dzuse[use],
                                         size=use.size,
                                         replace=True)
                    barr[t] = np.median(r)

                # Error on median as determined from bootstrap resampling
                zstr['delta_err'][i] = np.std(barr)

                # The typical error
                zstr['delta_std'][i] = 1.4826 * np.median(
                    np.abs(dzuse[use] - zstr['delta'][i]))

                # And outliers ...
                u4, = np.where(
                    np.abs(dzuse[u1] - zstr['delta'][i]) > 4.0 *
                    zstr['delta_std'][i])
                zstr['f_out'][i] = float(u4.size) / float(u1.size)

                zstr['zuse_e'][i] = np.median(zuse_e[use])

            # Cut out bins that didn't get a fit
            cut, = np.where(zstr['ztrue'] > 0.0)
            zstr = zstr[cut]

            # Now we can make the plots
            fig = plt.figure(figsize=(10, 6))
            fig.clf()

            # Left panel is offset, scatter, etc.
            ax = fig.add_subplot(121)
            ax.errorbar(zstr[xcol],
                        zstr['delta'],
                        yerr=zstr['delta_err'],
                        fmt='k^')
            ax.plot(self.config.zrange, [0.0, 0.0], 'k:')
            ax.plot(zstr[xcol], zstr['delta_std'], 'r-')
            ax.plot(zstr[xcol], zstr['zuse_e'], 'b-')
            ax.plot(zstr[xcol], zstr['f_out'], 'm-')
            ax.set_xlim(self.config.zrange)
            ax.set_ylim(-0.05, 0.05)
            ax.set_xlabel(xlabel)
            ax.set_ylabel(ylabel_left)

            ax = fig.add_subplot(122)
            if mode == 0:
                ax.hexbin(ugals.z,
                          ugals.zred,
                          bins='log',
                          extent=[
                              self.config.zrange[0], self.config.zrange[1],
                              self.config.zrange[0], self.config.zrange[1]
                          ])
            else:
                ax.hexbin(ugals.zred2,
                          ugals.z,
                          bins='log',
                          extent=[
                              self.config.zrange[0], self.config.zrange[1],
                              self.config.zrange[0], self.config.zrange[1]
                          ])
            ax.plot(self.config.zrange, self.config.zrange, 'r--')
            ax.set_xlim(self.config.zrange)
            ax.set_ylim(self.config.zrange)
            ax.set_xlabel(xlabel)
            ax.set_ylabel(ylabel_right)

            fig.tight_layout()
            fig.savefig(
                os.path.join(
                    self.config.outpath, self.config.plotpath,
                    '%s_%s_plots.png' % (self.config.d.outbase, modename)))

            plt.close(fig)
Exemplo n.º 51
0
    def build_memory(self):
        self.global_step = tf.Variable(0, name="global_step")

        self.A = tf.Variable(
            tf.random_normal([self.nwords, self.edim], stddev=self.init_std))
        self.B = tf.Variable(
            tf.random_normal([self.nwords, self.edim], stddev=self.init_std))
        self.ASP = tf.Variable(
            tf.random_normal([self.pre_trained_target_wt.shape[0], self.edim],
                             stddev=self.init_std))
        self.C = tf.Variable(
            tf.random_normal([self.edim, self.edim], stddev=self.init_std))
        self.BL_W = tf.Variable(
            tf.random_normal([2 * self.edim, 1], stddev=self.init_std))
        self.BL_B = tf.Variable(tf.zeros([1, 1]))

        # Location Encoding
        self.T_A = tf.Variable(
            tf.random_normal([self.mem_size + 1, self.edim],
                             stddev=self.init_std))
        self.T_B = tf.Variable(
            tf.random_normal([self.mem_size + 1, self.edim],
                             stddev=self.init_std))

        # m_i = sum A_ij * x_ij + T_A_i
        Ain_c = tf.nn.embedding_lookup(self.A, self.context)
        Ain_t = tf.nn.embedding_lookup(self.T_A, self.time)
        Ain = tf.add(Ain_c, Ain_t)

        # c_i = sum B_ij * u + T_B_i
        Bin_c = tf.nn.embedding_lookup(self.B, self.context)
        Bin_t = tf.nn.embedding_lookup(self.T_B, self.time)
        Bin = tf.add(Bin_c, Bin_t)

        ASPin = tf.nn.embedding_lookup(self.ASP, self.input)
        ASPout2dim = tf.reshape(ASPin, [-1, self.edim])
        self.hid.append(ASPout2dim)

        for h in xrange(self.nhop):
            '''
            Bi-linear scoring function for a context word and aspect term
            '''
            til_hid = tf.tile(self.hid[-1], [1, self.mem_size])
            til_hid3dim = tf.reshape(til_hid, [-1, self.mem_size, self.edim])
            a_til_concat = tf.concat(axis=2, values=[til_hid3dim, Ain])
            til_bl_wt = tf.tile(self.BL_W, [self.batch_size, 1])
            til_bl_3dim = tf.reshape(til_bl_wt,
                                     [self.batch_size, -1, 2 * self.edim])
            att = tf.matmul(a_til_concat, til_bl_3dim, adjoint_b=True)
            til_bl_b = tf.tile(self.BL_B, [self.batch_size, self.mem_size])
            til_bl_3dim = tf.reshape(til_bl_b, [-1, self.mem_size, 1])
            g = tf.nn.tanh(tf.add(att, til_bl_3dim))
            g_2dim = tf.reshape(g, [-1, self.mem_size])
            P = tf.nn.softmax(g_2dim)

            probs3dim = tf.reshape(P, [-1, 1, self.mem_size])
            Bout = tf.matmul(probs3dim, Bin)
            Bout2dim = tf.reshape(Bout, [-1, self.edim])

            Cout = tf.matmul(self.hid[-1], self.C)
            Dout = tf.add(Cout, Bout2dim)

            if self.lindim == self.edim:
                self.hid.append(Dout)
            elif self.lindim == 0:
                self.hid.append(tf.nn.relu(Dout))
            else:
                F = tf.slice(Dout, [0, 0], [self.batch_size, self.lindim])
                G = tf.slice(Dout, [0, self.lindim],
                             [self.batch_size, self.edim - self.lindim])
                K = tf.nn.relu(G)
                self.hid.append(tf.concat(axis=1, values=[F, K]))
Exemplo n.º 52
0
    def train(self,
              X,
              y,
              X_val,
              y_val,
              learning_rate=1e-3,
              learning_rate_decay=0.95,
              reg=5e-6,
              num_iters=100,
              batch_size=200,
              verbose=False):
        """
    Train this neural network using stochastic gradient descent.

    Inputs:
    - X: A numpy array of shape (N, D) giving training data.
    - y: A numpy array f shape (N,) giving training labels; y[i] = c means that
      X[i] has label c, where 0 <= c < C.
    - X_val: A numpy array of shape (N_val, D) giving validation data.
    - y_val: A numpy array of shape (N_val,) giving validation labels.
    - learning_rate: Scalar giving learning rate for optimization.
    - learning_rate_decay: Scalar giving factor used to decay the learning rate
      after each epoch.
    - reg: Scalar giving regularization strength.
    - num_iters: Number of steps to take when optimizing.
    - batch_size: Number of training examples to use per step.
    - verbose: boolean; if true print progress during optimization.
    """
        num_train = X.shape[0]
        iterations_per_epoch = max(num_train / batch_size, 1)

        # Use SGD to optimize the parameters in self.model
        loss_history = []
        train_acc_history = []
        val_acc_history = []

        for it in xrange(num_iters):
            X_batch = None
            y_batch = None

            #########################################################################
            # TODO: Create a random minibatch of training data and labels, storing  #
            # them in X_batch and y_batch respectively.                             #
            #########################################################################
            indices = np.random.choice(num_train, batch_size, replace=True)
            X_batch = X[indices]
            y_batch = y[indices]
            pass
            #########################################################################
            #                             END OF YOUR CODE                          #
            #########################################################################

            # Compute loss and gradients using the current minibatch
            loss, grads = self.loss(X_batch, y=y_batch, reg=reg)
            loss_history.append(loss)

            #########################################################################
            # TODO: Use the gradients in the grads dictionary to update the         #
            # parameters of the network (stored in the dictionary self.params)      #
            # using stochastic gradient descent. You'll need to use the gradients   #
            # stored in the grads dictionary defined above.                         #
            #########################################################################
            pass
            self.params['W1'] -= learning_rate * grads['W1']
            self.params['b1'] -= learning_rate * grads['b1']
            self.params['W2'] -= learning_rate * grads['W2']
            self.params['b2'] -= learning_rate * grads['b2']

            #########################################################################
            #                             END OF YOUR CODE                          #
            #########################################################################

            if verbose and it % 100 == 0:
                print('iteration %d / %d: loss %f' % (it, num_iters, loss))

            # Every epoch, check train and val accuracy and decay learning rate.
            if it % iterations_per_epoch == 0:
                # Check accuracy
                train_acc = np.mean(self.predict(X_batch) == y_batch)
                val_acc = np.mean(self.predict(X_val) == y_val)
                train_acc_history.append(train_acc)
                val_acc_history.append(val_acc)

                # Decay learning rate
                learning_rate *= learning_rate_decay

        return {
            'loss_history': loss_history,
            'train_acc_history': train_acc_history,
            'val_acc_history': val_acc_history,
        }
Exemplo n.º 53
0
def createCoordsPairs(l):
    coordsPairs = []
    for i in xrange(0, len(l)):
        coordsPairs.append([l[i], l[(i + 2) % len(l)]])
    return coordsPairs
Exemplo n.º 54
0
    def train(self, data):
        source_data, source_loc_data, target_data, target_label, _ = data
        N = int(math.ceil(len(source_data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, 1], dtype=np.float32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, 3])  # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=N)

        rand_idx, cur = np.random.permutation(len(source_data)), 0
        for idx in xrange(N):
            if self.show: bar.next()

            context.fill(self.pad_idx)
            time.fill(self.mem_size)
            target.fill(0)
            '''
            Initilialize all the padding vector to 0 before backprop.
            TODO: Code is 5x slower due to the following initialization.
            '''
            emb_a = self.A.eval()
            emb_a[self.pad_idx, :] = 0
            emb_b = self.B.eval()
            emb_b[self.pad_idx, :] = 0
            emb_c = self.C.eval()
            emb_c[self.pad_idx, :] = 0
            emb_ta = self.T_A.eval()
            emb_ta[self.mem_size, :] = 0
            emb_tb = self.T_B.eval()
            emb_tb[self.mem_size, :] = 0
            self.sess.run(self.A.assign(emb_a))
            self.sess.run(self.B.assign(emb_b))
            self.sess.run(self.C.assign(emb_c))
            self.sess.run(self.T_A.assign(emb_ta))
            self.sess.run(self.T_B.assign(emb_tb))

            for b in xrange(self.batch_size):
                m = rand_idx[cur]
                x[b][0] = target_data[m]
                target[b][target_label[m]] = 1
                time[b, :len(source_loc_data[m])] = source_loc_data[m]
                context[b, :len(source_data[m])] = source_data[m]
                cur = cur + 1

            a, loss, self.step = self.sess.run(
                [self.optim, self.loss, self.global_step],
                feed_dict={
                    self.input: x,
                    self.time: time,
                    self.target: target,
                    self.context: context
                })
            cost += np.sum(loss)

        if self.show: bar.finish()
        _, train_acc = self.test(data)
        return cost / N / self.batch_size, train_acc
Exemplo n.º 55
0
  def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
            batch_size=200, verbose=False):
    """
    Train this linear classifier using stochastic gradient descent.

    Inputs:
    - X: A numpy array of shape (N, D) containing training data; there are N
      training samples each of dimension D.
    - y: A numpy array of shape (N,) containing training labels; y[i] = c
      means that X[i] has label 0 <= c < C for C classes.
    - learning_rate: (float) learning rate for optimization.
    - reg: (float) regularization strength.
    - num_iters: (integer) number of steps to take when optimizing
    - batch_size: (integer) number of training examples to use at each step.
    - verbose: (boolean) If true, print progress during optimization.

    Outputs:
    A list containing the value of the loss function at each training iteration.
    """
    num_train, dim = X.shape
    num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
    if self.W is None:
      # lazily initialize W
      self.W = 0.001 * np.random.randn(dim, num_classes)

    # Run stochastic gradient descent to optimize W
    loss_history = []
    for it in xrange(num_iters):
      X_batch = None
      y_batch = None

      #########################################################################
      # TODO:                                                                 #
      # Sample batch_size elements from the training data and their           #
      # corresponding labels to use in this round of gradient descent.        #
      # Store the data in X_batch and their corresponding labels in           #
      # y_batch; after sampling X_batch should have shape (dim, batch_size)   #
      # and y_batch should have shape (batch_size,)                           #
      #                                                                       #
      # Hint: Use np.random.choice to generate indices. Sampling with         #
      # replacement is faster than sampling without replacement.              #
      #########################################################################
      batch_indeces = np.random.choice(num_train, batch_size, replace=True)
      X_batch = X[batch_indeces, :]
      y_batch = y[batch_indeces]
      #########################################################################
      #                       END OF YOUR CODE                                #
      #########################################################################

      # evaluate loss and gradient
      loss, grad = self.loss(X_batch, y_batch, reg)
      loss_history.append(loss)

      # perform parameter update
      #########################################################################
      # TODO:                                                                 #
      # Update the weights using the gradient and the learning rate.          #
      #########################################################################
      self.W -= learning_rate * grad
      #########################################################################
      #                       END OF YOUR CODE                                #
      #########################################################################

      if verbose and it % 100 == 0:
        print('iteration %d / %d: loss %f' % (it, num_iters, loss))

    return loss_history
Exemplo n.º 56
0
    def run(self, doRaise=True):
        """
        Run the red-sequence calibration.

        Parameters
        ----------
        doRaise: `bool`, optional
           Raise an error if background cannot be computed for any galaxies
           Default is True. Can be set to False for certain testing.
        """

        gals = GalaxyCatalog.from_galfile(self._galfile)

        if self.config.calib_use_pcol:
            use, = np.where((gals.z > self.config.zrange[0])
                            & (gals.z < self.config.zrange[1])
                            & (gals.pcol > self.config.calib_pcut))
        else:
            use, = np.where((gals.z > self.config.zrange[0])
                            & (gals.z < self.config.zrange[1])
                            & (gals.p > self.config.calib_pcut))

        if use.size == 0:
            raise RuntimeError("No good galaxies in %s!" % (self._galfile))

        gals = gals[use]

        nmag = self.config.nmag
        ncol = nmag - 1

        # Reference mag nodes for pivot
        pivotnodes = make_nodes(self.config.zrange,
                                self.config.calib_pivotmag_nodesize)

        # Covmat nodes
        covmatnodes = make_nodes(self.config.zrange,
                                 self.config.calib_covmat_nodesize)

        # correction nodes
        corrnodes = make_nodes(self.config.zrange,
                               self.config.calib_corr_nodesize)

        # correction slope nodes
        corrslopenodes = make_nodes(self.config.zrange,
                                    self.config.calib_corr_slope_nodesize)

        # volume factor (hard coded)
        volnodes = make_nodes(self.config.zrange, 0.01)

        # Start building the par dtype
        dtype = [('pivotmag_z', 'f4', pivotnodes.size),
                 ('pivotmag', 'f4', pivotnodes.size),
                 ('minrefmag', 'f4', pivotnodes.size),
                 ('maxrefmag', 'f4', pivotnodes.size),
                 ('medcol', 'f4', (pivotnodes.size, ncol)),
                 ('medcol_width', 'f4', (pivotnodes.size, ncol)),
                 ('covmat_z', 'f4', covmatnodes.size),
                 ('sigma', 'f4', (ncol, ncol, covmatnodes.size)),
                 ('covmat_amp', 'f4', (ncol, ncol, covmatnodes.size)),
                 ('covmat_slope', 'f4', (ncol, ncol, covmatnodes.size)),
                 ('corr_z', 'f4', corrnodes.size),
                 ('corr', 'f4', corrnodes.size),
                 ('corr_slope_z', 'f4', corrslopenodes.size),
                 ('corr_slope', 'f4', corrslopenodes.size),
                 ('corr_r', 'f4', corrslopenodes.size),
                 ('corr2', 'f4', corrnodes.size),
                 ('corr2_slope', 'f4', corrslopenodes.size),
                 ('corr2_r', 'f4', corrslopenodes.size),
                 ('volume_factor_z', 'f4', volnodes.size),
                 ('volume_factor', 'f4', volnodes.size)]

        # And for each color, make the nodes
        node_dict = {}
        self.ztag = [None] * ncol
        self.ctag = [None] * ncol
        self.zstag = [None] * ncol
        self.stag = [None] * ncol
        for j in xrange(ncol):
            self.ztag[j] = 'z%02d' % (j)
            self.ctag[j] = 'c%02d' % (j)
            self.zstag[j] = 'zs%02d' % (j)
            self.stag[j] = 'slope%02d' % (j)

            node_dict[self.ztag[j]] = make_nodes(
                self.config.zrange,
                self.config.calib_color_nodesizes[j],
                maxnode=self.config.calib_color_maxnodes[j])
            node_dict[self.zstag[j]] = make_nodes(
                self.config.zrange,
                self.config.calib_slope_nodesizes[j],
                maxnode=self.config.calib_color_maxnodes[j])

            dtype.extend([(self.ztag[j], 'f4', node_dict[self.ztag[j]].size),
                          (self.ctag[j], 'f4', node_dict[self.ztag[j]].size),
                          (self.zstag[j], 'f4', node_dict[self.zstag[j]].size),
                          (self.stag[j], 'f4', node_dict[self.zstag[j]].size)])

        # Make the pars ... and fill them with the defaults
        self.pars = Entry(np.zeros(1, dtype=dtype))

        self.pars.pivotmag_z = pivotnodes
        self.pars.covmat_z = covmatnodes
        self.pars.corr_z = corrnodes
        self.pars.corr_slope_z = corrslopenodes
        self.pars.volume_factor_z = volnodes

        for j in xrange(ncol):
            self.pars._ndarray[self.ztag[j]] = node_dict[self.ztag[j]]
            self.pars._ndarray[self.zstag[j]] = node_dict[self.zstag[j]]

        # And a special subset of color galaxies
        if self.config.calib_use_pcol:
            coluse, = np.where(gals.pcol > self.config.calib_color_pcut)
        else:
            coluse, = np.where(gals.p > self.config.calib_color_pcut)

        colgals = gals[coluse]

        # And a placeholder zredstr which allows us to do stuff
        self.zredstr = RedSequenceColorPar(None, config=self.config)

        # And read the color background
        self.bkg = ColorBackground(self.config.bkgfile_color)

        # And prepare for luptitude corrections
        if self.config.b[0] == 0.0:
            self.do_lupcorr = False
        else:
            self.do_lupcorr = True
            self.bnmgy = self.config.b * 1e9
            self.lupzp = 22.5

        # Compute pivotmags
        self._calc_pivotmags(colgals)

        # Compute median colors
        self._calc_medcols(colgals)

        # Compute diagonal parameters
        self._calc_diagonal_pars(gals, doRaise=doRaise)

        # Compute off-diagonal parameters
        self._calc_offdiagonal_pars(gals, doRaise=doRaise)

        # Compute volume factor
        self._calc_volume_factor(self.config.zrange[1])

        # Write out the parameter file
        self.save_pars(self.config.parfile, clobber=False)

        # Compute zreds without corrections
        # Later will want this parallelized, I think
        self._calc_zreds(gals, do_correction=False)

        # Compute correction (mode1)
        self._calc_corrections(gals)

        # Compute correction (mode2)
        self._calc_corrections(gals, mode2=True)

        # And re-save the parameter file
        self.save_pars(self.config.parfile, clobber=True)

        # Recompute zreds with corrections
        # Later will want this parallelized, I think
        self._calc_zreds(gals, do_correction=True)

        # And want to save galaxies and zreds
        zredfile = os.path.join(
            self.config.outpath,
            os.path.basename(self._galfile.rstrip('.fit') + '_zreds.fit'))
        gals.to_fits_file(zredfile)

        # Make diagnostic plots
        self._make_diagnostic_plots(gals)
Exemplo n.º 57
0
def cv_main():
    ##--------------parameters-------------------##
    space = "words"
    is_freeze = True
    hidden_size = 100  ##TODO
    num_layers = 2
    bidirectional = True
    lstm_drop_p = 0.6  ##TODO
    lstm_input_drop_p = 0.6
    linear_hidden_size = 200  ##TODO
    linear_hid_drop_p = 0.3
    batch_size = 512
    folder = 5
    early_stop = 10
    LR = 0.001
    Gamma = 0.99
    num_epochs = 150
    version = "v0.1"
    ##--------------parameters-------------------##

    kf = KFold(n_splits=folder, shuffle=True, random_state=19920618)
    all_train_df = DataSet.load_train()
    test_df = DataSet.load_test()
    test_dg = DataGenerator(data_df=test_df,
                            space=space,
                            bucket_num=5,
                            batch_size=256,
                            is_prefix_pad=False,
                            is_shuffle=False,
                            is_test=True)
    print("prepare test data generator")
    test_dg.prepare()
    item_embed = test_dg.get_item_embed_tensor(space)
    train_eval = np.zeros(len(all_train_df))
    test_eval = np.zeros((len(test_df), folder))
    for i, (train_index, val_index) in enumerate(kf.split(all_train_df)):
        print()
        train_name = version + "_cv_%s" % (i)
        xtr_df = all_train_df.iloc[train_index]
        xval_df = all_train_df.iloc[val_index]
        train_dg = DataGenerator(data_df=xtr_df,
                                 space=space,
                                 bucket_num=5,
                                 batch_size=batch_size,
                                 is_prefix_pad=False,
                                 is_shuffle=True,
                                 is_test=False)
        val_dg = DataGenerator(data_df=xval_df,
                               space=space,
                               bucket_num=5,
                               batch_size=256,
                               is_prefix_pad=False,
                               is_shuffle=False,
                               is_test=False)
        print("prepare train data generator, cv_%s" % i)
        train_dg.prepare()
        print("prepare val data generator, cv_%s" % i)
        val_dg.prepare()
        siamese_lstm = Siamese_LSTM(pre_trained_embedding=item_embed,
                                    is_freeze=is_freeze,
                                    hidden_size=hidden_size,
                                    number_layers=num_layers,
                                    lstm_dropout_p=lstm_drop_p,
                                    bidirectional=bidirectional,
                                    linear_hid_size=linear_hidden_size,
                                    linear_hid_drop_p=linear_hid_drop_p,
                                    input_drop_p=lstm_input_drop_p)
        siamese_model = Model(train_name, siamese_lstm)
        criteria = nn.BCEWithLogitsLoss()
        optimizer_ft = optim.Adam(ifilter(lambda p: p.requires_grad,
                                          siamese_lstm.parameters()),
                                  lr=LR)  ##TODO 0.001
        exp_lr_scheduler = lr_scheduler.ExponentialLR(optimizer_ft,
                                                      gamma=Gamma)  ##TODO 0.99
        ### Train
        siamese_model.train(train_dg=train_dg,
                            valid_dg=val_dg,
                            criterion=criteria,
                            optimizer=optimizer_ft,
                            scheduler=exp_lr_scheduler,
                            num_epochs=num_epochs,
                            early_stop_rounds=early_stop)
        siamese_model.plot_()
        val_pred = siamese_model.predict(val_dg).numpy()
        train_eval[val_index] = val_pred
        test_preds = siamese_model.predict(test_dg).numpy()
        test_eval[:, i] = test_preds
    train_pred_df = pd.DataFrame({"train_pred": train_eval})
    train_pred_df.to_csv(version + "_train_pred.csv", index=False)
    test_pred_df = pd.DataFrame(
        test_eval,
        columns=[version + "_test_pred_cv_%s" % (i) for i in xrange(folder)])
    test_pred_df["y_pre"] = test_pred_df.mean(axis=1)
    test_pred_df.to_csv(version + "_test_pred.csv", index=False)
    test_pred_df[["y_pre"]].to_csv(version + "_submission.csv", index=False)
Exemplo n.º 58
0
def handle_task(task, datasets_dir='/datasets', models_path='/models'):
    """
    Runs a tensorflow task.
    """
    model_config = task['model_config']
    model_type = model_config['type']
    logger.info('loading model with config %s', task['model_config'])
    model = load_from_config(task['model_config'])
    dataset_path = os.path.join(datasets_dir, task['dataset_path'])
    dataset = load_dataset(dataset_path)
    baseline_mse = dataset.get_baseline_mse()

    snapshot_dir = os.path.join(models_path, 'snapshots', model_type,
                                task['task_id'])
    snapshot = SnapshotCallback(model,
                                snapshot_dir=snapshot_dir,
                                score_metric=task.get('score_metric',
                                                      'val_rmse'))

    earlystop = EarlyStopping(monitor=task.get('score_metric', 'val_rmse'),
                              patience=12,
                              mode='min')

    callbacks = [snapshot, earlystop]

    logger.info('Baseline mse = %.4f  rmse = %.4f' %
                (baseline_mse, np.sqrt(baseline_mse)))
    model.fit(dataset,
              task['training_args'],
              final=task.get('final', False),
              callbacks=callbacks)

    output_model_path = os.path.join(models_path, 'output',
                                     '%s.h5' % task['task_id'])
    output_config = model.save(output_model_path)
    logger.info('Maximum snapshot had score %s=%.6f, saved to %s',
                snapshot.score_metric, snapshot.max_score, snapshot.max_path)
    logger.info('Minimum snapshot had score %s=%.6f, saved to %s',
                snapshot.score_metric, snapshot.min_score, snapshot.min_path)
    logger.info('Wrote final model to %s', output_model_path)

    # assume evaluation is mse
    evaluation = model.evaluate(dataset)
    training_mse = evaluation[0]

    improvement = -(training_mse - baseline_mse) / baseline_mse
    logger.info('Evaluation: %s', evaluation)
    logger.info('Baseline MSE %.5f, training MSE %.5f, improvement %.2f%%',
                baseline_mse, training_mse, improvement * 100)
    logger.info('output config: %s' % output_config)

    if model.output_dim() == 1:
        example_ranges = 10
        range_size = 20
        testing_size = dataset.get_testing_size()
        for _ in xrange(example_ranges):
            # print out some sample prediction/label pairs
            skip_to = int(np.random.random() * (testing_size - range_size))
            example_images, example_labels = (
                dataset.sequential_generator(range_size).skip(skip_to).next())

            predictions = model.predict_on_batch(example_images)
            for pred, label in zip(predictions, example_labels):
                logger.info('p=%.5f  l=%.5f', pred, label)
Exemplo n.º 59
0
    def _process_cluster(self, cluster):
        """
        Process a single cluster with RunFirstpass.

        Parameters
        ----------
        cluster: `redmapper.Cluster`
           Cluster to compute richness.
        """
        bad = False
        iteration = 0
        done = False

        zuse = cluster.z_init.copy()

        for i in xrange(self.maxiter):
            if bad:
                done = True
                continue

            lam = cluster.calc_richness(self.mask, calc_err=False)

            if (lam < np.abs(
                    self.config.firstpass_minlambda / cluster.scaleval)):
                bad = True
                done = True
                self._reset_bad_values(cluster)
                continue

            if i < self.maxiter:
                # only on first iteration, compute z_lambda
                # Really, this should be on at most n-1th iteration
                zlam = Zlambda(cluster)
                z_lambda, z_lambda_e = zlam.calc_zlambda(cluster.redshift,
                                                         self.mask,
                                                         calc_err=True,
                                                         calcpz=False)

                if z_lambda < self.config.zrange[
                        0] or z_lambda > self.config.zrange[1]:
                    bad = True
                    done = True
                    self._reset_bad_values(cluster)
                    continue

                if not self.keepz:
                    cluster.redshift = z_lambda

        if bad:
            cluster.z_lambda = -1.0
            cluster.z_lambda_e = -1.0
            cluster.z_lambda_niter = 0
        else:
            cluster.z_lambda = z_lambda
            cluster.z_lambda_e = z_lambda_e
            cluster.z_lambda_niter = zlam.niter

        cind = np.argmin(cluster.neighbors.r)
        cluster.chisq = cluster.neighbors.chisq[cind]

        # and record the .z for the next round
        if (self.specmode):
            cluster.z = cluster.z_spec_init
        else:
            cluster.z = cluster.z_lambda

        # All done
        return bad
Exemplo n.º 60
0
def locate_localizations(
    organism=9606,
    literature=True,
    external=True,
    predictions=False,
):

    record = collections.namedtuple(
        'LocateAnnotation',
        ('source', 'location', 'cls', 'pmid', 'score'),
    )
    record.__new__.__defaults__ = (None, None, None)

    organism_uniprots = set(
        uniprot_input.all_uniprots(organism=organism, swissprot=True))

    organism_str = taxonomy.taxids[organism]
    url = urls.urls['locate']['url'] % organism_str
    fname = url.split('/')[-1][:-4]

    c = curl.Curl(
        url,
        large=True,
        default_mode='rb',
        silent=False,
        files_needed=[fname],
    )
    c.result[fname]

    parser = etree.iterparse(c.result[fname], events=('start', 'end'))

    result = collections.defaultdict(set)
    root = next(parser)
    used_elements = []

    for ev, elem in parser:

        if ev == 'end' and elem.tag == 'LOCATE_protein':

            tag_protein = elem.find('protein')
            this_uniprot = None
            this_uniprots = None
            this_entrez = None
            this_organism = (tag_protein.find('organism').text
                             if tag_protein is not None else None)
            this_class = (tag_protein.find('class').text
                          if tag_protein is not None else None)

            xrefs = elem.find('xrefs')

            if xrefs is None:
                continue

            for xref in xrefs.findall('xref'):
                src = xref.find('source')
                src_name = src.find('source_name').text

                if src_name == 'UniProtKB-SwissProt':
                    this_uniprot = src.find('accn').text

                if src_name == 'Entrez Gene':
                    this_entrez = src.find('accn').text

                if src_name == 'UniProt/SPTrEMBL' and this_uniprot is None:
                    this_uniprot = src.find('accn').text

            # if we don't know what it is, does not make sense to proceed
            if this_uniprot is None and this_entrez is None:
                continue

            if this_uniprot:
                this_uniprots = mapping.map_name(
                    this_uniprot,
                    'uniprot',
                    'uniprot',
                    ncbi_tax_id=organism,
                )

            if not this_uniprots and this_entrez:
                this_uniprots = mapping.map_name(
                    this_entrez,
                    'entrez',
                    'uniprot',
                    ncbi_tax_id=organism,
                )

            this_uniprots = set(this_uniprots) & organism_uniprots

            # if we don't know what it is, does not make sense to proceed
            if not this_uniprots:
                continue

            if external:
                # External database annotations
                extannot = elem.find('externalannot')

                if extannot is not None:
                    for extannotref in extannot.findall('reference'):
                        sources = []

                        for src in extannotref.findall('source'):
                            src_name = src.find('source_name')

                            if src_name is not None:
                                sources.append(src_name.text)

                        sources = ';'.join(sources) if sources else None
                        locations = extannotref.find('locations')

                        if locations is not None:
                            for location in locations.findall('location'):
                                for loc in location.iterchildren():
                                    if loc.tag[:4] == 'tier':
                                        this_loc = loc.text.lower().split(',')

                                        for uniprot in this_uniprots:
                                            for _loc in this_loc:
                                                result[uniprot].add(
                                                    record(
                                                        source=sources,
                                                        location=_loc.strip(),
                                                        cls=this_class,
                                                        score=None,
                                                    ))

            if predictions:
                # Predictions
                sclpred = elem.find('scl_prediction')

                if sclpred is not None:
                    for sclpred_src in sclpred.findall('source'):
                        score = float(sclpred_src.find('evaluation').text)

                        if score == 0.0:
                            continue

                        this_src = sclpred_src.find('method').text
                        this_loc = sclpred_src.find('location').text.lower()

                        if this_loc == 'no prediction':
                            continue

                        for uniprot in this_uniprots:
                            result[uniprot].add(
                                record(
                                    source=this_src,
                                    location=this_loc,
                                    cls=this_class,
                                    score=score,
                                ))

            if literature:
                # Literature curation
                lit = elem.find('literature')

                if lit is not None:

                    for litref in lit.findall('reference'):

                        locs = set()

                        for lloc in (
                                litref.find('locations').findall('location')):

                            for loc in lloc.iterchildren():
                                if loc.tag[:4] == 'tier':
                                    locs.add(loc.text.lower())

                        pmid = litref.find('source')
                        pmid = (None
                                if pmid is None else pmid.find('accn').text)

                        for loc in locs:

                            for uniprot in this_uniprots:

                                result[uniprot].add(
                                    record(
                                        source='literature',
                                        location=loc,
                                        pmid=pmid,
                                        cls=this_class,
                                        score=None,
                                    ))

        used_elements.append(elem)

        # removing used elements to keep memory low
        if len(used_elements) > 1000:
            for _ in xrange(500):
                e = used_elements.pop(0)
                e.clear()

    # closing the XML
    c.fileobj.close()
    del c

    return result