def call(self, input_tensor: tf.Tensor): """Takes a dense range image and projects it to a sparse 2d grid Args: input_tensor (tf.Tensor): Input range image. Expected shape is B x H x W x CK x 2, where CK is each mixture model component for each class. Innermost values should be relevant xy pair training (bool, optional): [description]. Defaults to False. Returns: [type]: [description] """ # Input Size B x Hi x Wi x C x 2 # Reorder to B x C x Hi x Wi x 2 reordered_input = tf.transpose(input_tensor, perm=[0, 3, 1, 2, 4]) # Reshape to B*C*Hi*Wi x 2 flattened_xy = tf.reshape(reordered_input, [self.input_size, 2]) scaled_xy = tf.scalar_mul(self.scale_factor, flattened_xy) + self.xy_offset quantized_xy = tf.cast(scaled_xy, tf.int64) # [B, X, Y, C, Hi, Wi] presence_indices = tf.concat([self.presence_b, quantized_xy, self.presence_chw], -1) presence_tensor = sparse.SparseTensor(indices=presence_indices, values=self.presence_vals, dense_shape=self.presence_shape)\ position_vals = tf.reshape(scaled_xy, [self.input_size * 2]) position_indices = tf.concat([self.position_b, tf.repeat(quantized_xy, 2, 0), self.position_chwd], -1) position_tensor = sparse.SparseTensor(indices=position_indices, values=position_vals, dense_shape=self.position_shape) return position_tensor, position_tensor
def decode_libsvm(content, num_features, dtype=None, label_dtype=None): """Convert Libsvm records to a tensor of label and a tensor of feature. Args: content: A `Tensor` of type `string`. Each string is a record/row in the Libsvm format. num_features: The number of features. dtype: The type of the output feature tensor. Default to tf.float32. label_dtype: The type of the output label tensor. Default to tf.int64. Returns: features: A `SparseTensor` of the shape `[input_shape, num_features]`. labels: A `Tensor` of the same shape as content. """ labels, indices, values, shape = core_ops.io_decode_libsvm( content, num_features, dtype=dtype, label_dtype=label_dtype) return sparse.SparseTensor(indices, values, shape), labels
def main(): DATA = path_arg( "A script to parse the result of a searchhmm to a sparse tensor for use in the neural network", "Path to the folder to be used for i/o").parse_args().path # ------------------------------------------------------------------------------------------------------ # Read files with open(DATA + "Pfam-A.hmm", encoding="utf-8") as pfamfile: pfam = pfamfile.readlines() with open(DATA + "uniprot_sprot.fasta") as fastafile: fasta = fastafile.readlines() with open(DATA + "hmmresult_full") as infile: result = infile.readlines() # ------------------------------------------------------------------------------------------------------ # Prepare empty sparse tensor # Make a list of all Pfam accessions from the HMM file pfam_Accessions = getaccessions(pfam, "ACC", r"\s", 3) # Make a list of all protein accessions in swissprot from the fasta file protein_Accessions = getaccessions(fasta, ">", "\|", 1) # Create an empty, sparse tensor with the proportions of the lists that were made hmmtensor = sparse.SparseTensor( indices=[[0, 0]], values=[float(0)], dense_shape=[len(protein_Accessions), len(pfam_Accessions)]) # ------------------------------------------------------------------------------------------------------ # Add scores to rows and columns denoted by protein and family respectively for line in result: if not line.startswith("#"): # parse line for score splitline = [col for col in re.split(r"\s", line) if col != ""] protein_index = protein_Accessions.index( splitline[0].split("|")[1]) pfam_index = pfam_Accessions.index(splitline[3]) hmmtensor = sparse.add( hmmtensor, sparse.SparseTensor(indices=[[protein_index, pfam_index]], values=[float(splitline[5])], dense_shape=[ len(protein_Accessions), len(pfam_Accessions) ])) # ------------------------------------------------------------------------------------------------------ # Write lists and sparse tensor to files with open(DATA + "sparse_tensor", "wb") as tensor, open(DATA + "proteins(r)", "w") as rows, open(DATA + "families(c)", "w") as columns: pickle.dump(hmmtensor, tensor) rows.write("\n".join(protein_Accessions)) columns.write("\n".join(pfam_Accessions))
def __init__(self, deviceIdx, KarrayA, inputStoichio, maskA, maskComplementary, derivLeak, isSparse=False): """ Builds the neural network according to the provided masks. We begin with a dense coding, but prepare to switch to the sparse case, which will probably appear. In such case, tensorflow only enable product of at most 2D sparse matrix (rank==2 in tensorflow terms) against some dense matrix. :param deviceIdx: the device on which the computation shall be done :param KarrayA: A 2d-array, if sparse :param inputStoichio: :param maskA: :param maskComplementary: :param derivLeak: """ super(derivativeNetwork, self).__init__() try: assert len(KarrayA.shape) == 2 assert len(inputStoichio.shape) == 3 assert len(maskA.shape) == 3 assert len(maskComplementary.shape) == 3 except: print("Wrong shape for masks") raise self.repeat = RepeatVector(maskA.shape[1]) self.maskIdxList = [] self.maskComplementaryList = [] self.stoichioList = [] self.deviceIdx = deviceIdx self.isSparse = isSparse if not isSparse: for m in range(maskA.shape[0]): self.maskIdxList += [tf.convert_to_tensor(maskA[m])] self.maskComplementaryList += [ tf.convert_to_tensor(maskComplementary[m]) ] self.stoichioList += [tf.convert_to_tensor(inputStoichio[m])] self.tfmask = tf.convert_to_tensor(maskA) self.tfMaskComplementary = tf.convert_to_tensor( maskComplementary) self.tfStoichio = tf.convert_to_tensor(inputStoichio) else: """ In tensorflow, the element-wise product is not defined for sparse matrix. Therefore two possible solution is offered to us: We can either compute a boolean mask for each row (of last axis) of the 3D masks, and compute on these masks and then aggregate. We can also try to use the sparse_embedding_lookup function. We implement the second solution. """ self.maskWeightList = [] ## First we need to convert to the look-up made of two sparse matrix ## In the sparse matrix module, .coords is defined as a (ndim,nnz) shaped array. for m in range(maskA.shape[0]): coords = [] idxValues = [] weightValues = [] for idx, e in maskA.coords[0]: if (e == m): coords += [maskA.coords[1:, idx]] idxValues += [maskA.coords[2, idx]] weightValues += [inputStoichio.data[idx]] if (len(coords) > 0): self.maskIdxList += [ sparse.SparseTensor(indices=coords, values=idxValues, dense_shape=maskA.shape[1:]) ] self.maskWeightList += [ sparse.SparseTensor(indices=coords, values=weightValues, dense_shape=maskA.shape[1:]) ] self.Karray = tf.convert_to_tensor(KarrayA) self.derivLeak = tf.convert_to_tensor(derivLeak, dtype=self.Karray.dtype)