Exemplo n.º 1
0
def arc_by_radian(x, y, height, radian_range, thickness, gaussian_width):
    """
    Radial arc with Gaussian fall-off after the solid ring-shaped
    region with the given thickness, with shape specified by the
    (start,end) radian_range.
    """

    # Create a circular ring (copied from the ring function)
    radius = height/2.0
    half_thickness = thickness/2.0

    distance_from_origin = sqrt(x**2+y**2)
    distance_outside_outer_disk = distance_from_origin - radius - half_thickness
    distance_inside_inner_disk = radius - half_thickness - distance_from_origin

    ring = 1.0-bitwise_xor(greater_equal(distance_inside_inner_disk,0.0),greater_equal(distance_outside_outer_disk,0.0))

    sigmasq = gaussian_width*gaussian_width

    if sigmasq==0.0:
        inner_falloff = x*0.0
        outer_falloff = x*0.0
    else:
        with float_error_ignore():
            inner_falloff = exp(divide(-distance_inside_inner_disk*distance_inside_inner_disk, 2.0*sigmasq))
            outer_falloff = exp(divide(-distance_outside_outer_disk*distance_outside_outer_disk, 2.0*sigmasq))
            
    output_ring = maximum(inner_falloff,maximum(outer_falloff,ring))

    # Calculate radians (in 4 phases) and cut according to the set range)

    # RZHACKALERT:
    # Function float_error_ignore() cannot catch the exception when
    # both dividend and divisor are 0.0, and when only divisor is 0.0
    # it returns 'Inf' rather than 0.0. In x, y and
    # distance_from_origin, only one point in distance_from_origin can
    # be 0.0 (circle center) and in this point x and y must be 0.0 as
    # well. So here is a hack to avoid the 'invalid value encountered
    # in divide' error by turning 0.0 to 1e-5 in distance_from_origin.
    distance_from_origin += where(distance_from_origin == 0.0, 1e-5, 0)

    with float_error_ignore():
        sines = divide(y, distance_from_origin)
        cosines = divide(x, distance_from_origin)
        arcsines = arcsin(sines)

    phase_1 = where(logical_and(sines >= 0, cosines >= 0), 2*pi-arcsines, 0)
    phase_2 = where(logical_and(sines >= 0, cosines <  0), pi+arcsines,   0)
    phase_3 = where(logical_and(sines <  0, cosines <  0), pi+arcsines,   0)
    phase_4 = where(logical_and(sines <  0, cosines >= 0), -arcsines,     0)
    arcsines = phase_1 + phase_2 + phase_3 + phase_4

    if radian_range[0] <= radian_range[1]:
        return where(logical_and(arcsines >= radian_range[0], arcsines <= radian_range[1]),
                     output_ring, 0.0)
    else:
        return where(logical_or(arcsines >= radian_range[0], arcsines <= radian_range[1]),
                     output_ring, 0.0)
def arc_by_radian(x, y, height, radian_range, thickness, gaussian_width):
    """
    Radial arc with Gaussian fall-off after the solid ring-shaped
    region with the given thickness, with shape specified by the
    (start,end) radian_range.
    """

    # Create a circular ring (copied from the ring function)
    radius = height/2.0
    half_thickness = thickness/2.0

    distance_from_origin = sqrt(x**2+y**2)
    distance_outside_outer_disk = distance_from_origin - radius - half_thickness
    distance_inside_inner_disk = radius - half_thickness - distance_from_origin

    ring = 1.0-bitwise_xor(greater_equal(distance_inside_inner_disk,0.0),greater_equal(distance_outside_outer_disk,0.0))

    sigmasq = gaussian_width*gaussian_width

    if sigmasq==0.0:
        inner_falloff = x*0.0
        outer_falloff = x*0.0
    else:
        with float_error_ignore():
            inner_falloff = exp(divide(-distance_inside_inner_disk*distance_inside_inner_disk, 2.0*sigmasq))
            outer_falloff = exp(divide(-distance_outside_outer_disk*distance_outside_outer_disk, 2.0*sigmasq))

    output_ring = maximum(inner_falloff,maximum(outer_falloff,ring))

    # Calculate radians (in 4 phases) and cut according to the set range)

    # RZHACKALERT:
    # Function float_error_ignore() cannot catch the exception when
    # both dividend and divisor are 0.0, and when only divisor is 0.0
    # it returns 'Inf' rather than 0.0. In x, y and
    # distance_from_origin, only one point in distance_from_origin can
    # be 0.0 (circle center) and in this point x and y must be 0.0 as
    # well. So here is a hack to avoid the 'invalid value encountered
    # in divide' error by turning 0.0 to 1e-5 in distance_from_origin.
    distance_from_origin += where(distance_from_origin == 0.0, 1e-5, 0)

    with float_error_ignore():
        sines = divide(y, distance_from_origin)
        cosines = divide(x, distance_from_origin)
        arcsines = arcsin(sines)

    phase_1 = where(logical_and(sines >= 0, cosines >= 0), 2*pi-arcsines, 0)
    phase_2 = where(logical_and(sines >= 0, cosines <  0), pi+arcsines,   0)
    phase_3 = where(logical_and(sines <  0, cosines <  0), pi+arcsines,   0)
    phase_4 = where(logical_and(sines <  0, cosines >= 0), -arcsines,     0)
    arcsines = phase_1 + phase_2 + phase_3 + phase_4

    if radian_range[0] <= radian_range[1]:
        return where(logical_and(arcsines >= radian_range[0], arcsines <= radian_range[1]),
                     output_ring, 0.0)
    else:
        return where(logical_or(arcsines >= radian_range[0], arcsines <= radian_range[1]),
                     output_ring, 0.0)
Exemplo n.º 3
0
def kNNimputeMA(arr2d, K=20, callback=None):
    """Returns a new 2D MA.array with missing values imputed from K nearest neighbours.
    Find K rows (axis 0) with the most similar values where similarity measure corresponds to weighted Euclidean distance.
    Imputed value = weighted average of the corresponding values of K nearest neighbours,
    where weights equal to tricubic distribution of distances to all rows.
    Impute missing rows by average over all rows.
    Version: 30.8.2005
    """
    arr2d = MA.asarray(arr2d)
    assert len(arr2d.shape) == 2, "2D array expected"
    # make a copy for imputation
    aImp2 = MA.array(arr2d)
    # leave out columns with 0 known values (columnInd: non-zero columns)
    columnCond = Numeric.greater(MA.count(arr2d, axis=0), 0)
    columnIndAll = Numeric.arange(arr2d.shape[1])
    columnInd = Numeric.compress(columnCond, columnIndAll)
    # impute the rows where 0 < #known_values < #non_zero_columns, i.e. exclude the rows with 0 and all (non-zero-column) values
    countByRows = MA.count(arr2d, axis=1)
    for rowIdx in Numeric.compress(Numeric.logical_and(Numeric.greater(countByRows, 0), Numeric.less(countByRows, columnInd.shape[0])), Numeric.arange(arr2d.shape[0])):
        rowResized = MA.resize(arr2d[rowIdx], arr2d.shape)
        diff = arr2d - rowResized
        distances = MA.sqrt(MA.add.reduce((diff)**2, 1) / MA.count(diff, axis=1))
        # nearest neighbours row indices (without the current row index)
        indSorted = MA.argsort(distances)[1:]
        distSorted = distances.take(indSorted)
        # number of distances different from MA.masked
        numNonMasked = distSorted.shape[0] - Numeric.add.reduce(Numeric.asarray(MA.getmaskarray(distSorted), Numeric.Int))
        # number of distances to account for (K or less)
        if numNonMasked > 1:
            weightsSorted = MA.power(1-MA.power(distSorted/distSorted[numNonMasked-1],3),3) # tricubic distribution of all weights
        else:
            weightsSorted = Numeric.ones(distSorted.shape[0])
        # compute average for each column separately in order to account for K non-masked values
        colInd4CurrRow = Numeric.compress(Numeric.logical_and(MA.getmaskarray(arr2d[rowIdx]), columnCond), columnIndAll)
        for colIdx in colInd4CurrRow:
            # column values sorted by distances
            columnVals = arr2d[:,colIdx].take(indSorted)
            # take only those weights where columnVals does not equal MA.masked
            weightsSortedCompressed = MA.compress(1-MA.getmaskarray(columnVals), weightsSorted)
            # impute from K (or possibly less) values
            aImp2[rowIdx,colIdx] = MA.average(columnVals.compressed()[:K], weights=weightsSortedCompressed[:K])
        if callback:
            callback()
    # impute the unknown rows with average profile
    avrgRow = MA.average(arr2d, 0)
    for rowIdx in Numeric.compress(Numeric.equal(countByRows, 0), Numeric.arange(arr2d.shape[0])):
        aImp2[rowIdx] = avrgRow
        if callback:
            callback()
    return aImp2
Exemplo n.º 4
0
def histogram(data, nbins, range=None):
    """
    Create a histogram.
    Comes from Konrad Hinsen: Scientific Python

    @param data: data list or array
    @type  data: [any]
    @param nbins: number of bins
    @type  nbins: int
    @param range: data range to create histogram from (min val, max val)
    @type  range: (float, float) OR None

    @return: array (2 x len(data) ) with start of bin and witdh of bin. 
    @rtype: array
    """
    data = Numeric.array(data, Numeric.Float)
    if range is None:
        min = Numeric.minimum.reduce(data)
        max = Numeric.maximum.reduce(data)
    else:
        min, max = range
        data = Numeric.repeat(
            data,
            Numeric.logical_and(Numeric.less_equal(data, max),
                                Numeric.greater_equal(data, min)))
    bin_width = (max - min) / nbins
    data = Numeric.floor((data - min) / bin_width).astype(Numeric.Int)
    histo = Numeric.add.reduce(
        Numeric.equal(Numeric.arange(nbins)[:, Numeric.NewAxis], data), -1)
    histo[-1] = histo[-1] + Numeric.add.reduce(Numeric.equal(nbins, data))
    bins = min + bin_width * (Numeric.arange(nbins) + 0.5)
    return Numeric.transpose(Numeric.array([bins, histo]))
Exemplo n.º 5
0
def histogram(data, nbins, range = None):
    """
    Comes from Konrad Hinsen: Scientific Python
    """
    
    data = Numeric.array(data, Numeric.Float)
    
    if range is None:
        min = Numeric.minimum.reduce(data)
        max = Numeric.maximum.reduce(data)
    else:
        min, max = range
        data = Numeric.repeat(data,
                  Numeric.logical_and(Numeric.less_equal(data, max),
                          Numeric.greater_equal(data,
                                    min)))
    # end if
    bin_width = (max-min)/nbins
    
    data = Numeric.floor((data - min)/bin_width).astype(Numeric.Int)
    histo = Numeric.add.reduce(Numeric.equal(
    Numeric.arange(nbins)[:,Numeric.NewAxis], data), -1)
    histo[-1] = histo[-1] + Numeric.add.reduce(Numeric.equal(nbins, data))
    bins = min + bin_width*(Numeric.arange(nbins)+0.5)
    return Numeric.transpose(Numeric.array([bins, histo]))
Exemplo n.º 6
0
def histogram(data, nbins, range=None):
    """
    Comes from Konrad Hinsen: Scientific Python
    """

    data = Numeric.array(data, Numeric.Float)

    if range is None:
        min = Numeric.minimum.reduce(data)
        max = Numeric.maximum.reduce(data)
    else:
        min, max = range
        data = Numeric.repeat(
            data,
            Numeric.logical_and(Numeric.less_equal(data, max),
                                Numeric.greater_equal(data, min)))
    # end if
    bin_width = (max - min) / nbins

    data = Numeric.floor((data - min) / bin_width).astype(Numeric.Int)
    histo = Numeric.add.reduce(
        Numeric.equal(Numeric.arange(nbins)[:, Numeric.NewAxis], data), -1)
    histo[-1] = histo[-1] + Numeric.add.reduce(Numeric.equal(nbins, data))
    bins = min + bin_width * (Numeric.arange(nbins) + 0.5)
    return Numeric.transpose(Numeric.array([bins, histo]))
Exemplo n.º 7
0
def histogram(data, nbins, range = None):
    """
    Create a histogram.
    Comes from Konrad Hinsen: Scientific Python

    @param data: data list or array
    @type  data: [any]
    @param nbins: number of bins
    @type  nbins: int
    @param range: data range to create histogram from (min val, max val)
    @type  range: (float, float) OR None

    @return: array (2 x len(data) ) with start of bin and witdh of bin. 
    @rtype: array
    """
    data = Numeric.array(data, Numeric.Float)
    if range is None:
        min = Numeric.minimum.reduce(data)
        max = Numeric.maximum.reduce(data)
    else:
        min, max = range
        data = Numeric.repeat(data,
                              Numeric.logical_and(Numeric.less_equal(data, max),
                                                  Numeric.greater_equal(data, min)))
    bin_width = (max-min)/nbins
    data = Numeric.floor((data - min)/bin_width).astype(Numeric.Int)
    histo = Numeric.add.reduce(Numeric.equal(
        Numeric.arange(nbins)[:,Numeric.NewAxis], data), -1)
    histo[-1] = histo[-1] + Numeric.add.reduce(Numeric.equal(nbins, data))
    bins = min + bin_width*(Numeric.arange(nbins)+0.5)
    return Numeric.transpose(Numeric.array([bins, histo]))
Exemplo n.º 8
0
def divide_unary(a, b):
    """Returns a*b with masked values only in places where both a and b are masked.
    """
    a = MA.asarray(a)
    b = MA.asarray(b)
    el = MA.divide(a.filled(1), b.filled(1))
    mask = Numeric.logical_and(MA.getmaskarray(a), MA.getmaskarray(b))
    return MA.array(el, mask=mask)
Exemplo n.º 9
0
def subtract_unary(a, b):
    """Returns a-b with masked values only in places where both a and b are masked.
    """
    a = MA.asarray(a)
    b = MA.asarray(b)
    el = MA.subtract(a.filled(0), b.filled(0))
    mask = Numeric.logical_and(MA.getmaskarray(a), MA.getmaskarray(b))
    return MA.array(el, mask=mask)
Exemplo n.º 10
0
    def fractionNativeSurface(self, cont, contRef ):
        """
        fraction of atoms/residues that are involved in B{any} contacts
        in both complexes.

        @param cont: contact matrix
        @type  cont: matrix
        @param contRef: reference contact matrix
        @type  contRef: matrix
        
        @return: (fractRec, fractLig), fraction of atoms/residues that
                  are involved in any contacts in both complexes
        @rtype: (float, float)
           
        """
        lig, ligRef = N.clip( N.sum(cont),0,1),  N.clip( N.sum(contRef), 0,1)
        rec    = N.clip( N.sum(cont, 1),0,1)
        recRef = N.clip( N.sum(contRef, 1), 0,1)

        fLig = N.sum( N.logical_and( lig, ligRef )) *1./ N.sum( ligRef )
        fRec = N.sum( N.logical_and( rec, recRef )) *1./ N.sum( recRef )

        return (fRec, fLig)
Exemplo n.º 11
0
 def contactsShared(self, reference, cutoff=None):
     """
     Number of equal B{residue-residue} contacts in this and
     reference complex.
     
     @param reference: reference complex
     @type  reference: Complex
     @param cutoff: cutoff for atom-atom contact to be counted
     @type  cutoff: float
     @return: the number or residue-residue contacts that are common to
              both this and reference::
                abs( N.sum( N.sum( contactMatrix_a - contactMatrix_b )))
     @rtype: int
     """
     equality = N.logical_and(self.resContacts( cutoff=cutoff ),
                            reference.resContacts( cutoff=cutoff ) )
     return abs(N.sum(N.sum( equality )))
Exemplo n.º 12
0
    def contactsOverlap(self, ref, cutoff=None):
        """
        Fraction of overlapping B{residue-residue} contacts between this and
        reference complex.
        
        @param ref: reference complex
        @type  ref: Complex
        @param cutoff: maximal atom-atom distance, None .. previous setting
        @type  cutoff: float
        
        @return: fraction of contacts shared between this and ref
                 (normalized to number of all contacts)
        @rtype: float
        """
        equal = N.logical_and(self.resContacts( cutoff=cutoff ),
                            ref.resContacts( cutoff=cutoff ) )
        total = N.logical_or( self.resContacts(cutoff),
                              ref.resContacts(cutoff) )

        return N.sum(N.sum( equal )) * 1.0 / N.sum(N.sum( total ))
Exemplo n.º 13
0
 def filter(self, dlg):
     fptr = open(dlg)
     dlg_lines = fptr.readlines()
     fptr.close()
     #STEP 1:accumulate lines of various poses
     model_lines = []
     #keep all of them
     all_models = []
     in_model = False
     for ll in dlg_lines:
         if ll.find("DOCKED:") == 0:
             #check for a new model
             if ll.find("DOCKED: MODEL") == 0:
                 model_lines = []
             in_model = True
             model_lines.append(ll)
         if ll.find("_") == 0 and in_model:
             all_models.append(model_lines)
             model_lines = []
             in_model = False
     #initialize this ligand
     # loop over the models:
     for model_lines in all_models:
         self.setup_ligand(model_lines)
         bigR = self.bigRC[:self.lenK]
         bigM = self.bigC[:self.lenK]
         cutoff = bigR + self.keyRadii
         d = bigM - self.smallM
         dSQ = d * d
         dSQMAT = Numeric.sum(dSQ, 2)
         cutoffSQMAT = cutoff * cutoff
         ansMat = Numeric.logical_and(Numeric.less(dSQMAT, cutoffSQMAT),
                                      Numeric.not_equal(dSQMAT, 0.))
         rowIndices = Numeric.nonzero(Numeric.sum(ansMat, 1))
         num_contacts = 0
         for ind in rowIndices:
             for j in ansMat[ind]:
                 if j: num_contacts += 1
         if num_contacts > 0:
             break
     return num_contacts
Exemplo n.º 14
0
 def filter(self, dlg):
     fptr = open(dlg)
     dlg_lines = fptr.readlines()
     fptr.close()
     #STEP 1:accumulate lines of various poses
     model_lines = []
     #keep all of them
     all_models = []
     in_model = False
     for ll in dlg_lines:
         if ll.find("DOCKED:")==0:
             #check for a new model
             if ll.find("DOCKED: MODEL")==0:
                 model_lines = []
             in_model = True
             model_lines.append(ll)
         if ll.find("_")==0 and in_model:
             all_models.append(model_lines)
             model_lines = []
             in_model = False
     #initialize this ligand 
     # loop over the models:
     for model_lines in all_models:
         self.setup_ligand(model_lines)
         bigR = self.bigRC[:self.lenK]
         bigM = self.bigC[:self.lenK]
         cutoff = bigR + self.keyRadii
         d = bigM - self.smallM
         dSQ = d*d
         dSQMAT = Numeric.sum(dSQ,2)
         cutoffSQMAT = cutoff*cutoff
         ansMat = Numeric.logical_and(Numeric.less(dSQMAT, cutoffSQMAT),Numeric.not_equal(dSQMAT, 0.))
         rowIndices = Numeric.nonzero(Numeric.sum(ansMat,1))
         num_contacts = 0
         for ind in rowIndices:
             for j in ansMat[ind]: 
                 if j: num_contacts+=1
         if num_contacts > 0:
             break 
     return num_contacts
Exemplo n.º 15
0
      Y = Numeric.ones(N).astype('f')    
      for i in range(numproc):
        Y = Y*Numeric.array(range(N))*(i+1)    
      #print X_float
      #print Y  
      assert Numeric.allclose(X_float, Y)
      print "Raw reduce using pypar.PROD OK"
  else:
    if myid == 0:
      print "Skipping product-reduce - try again with numproc < 20"    

  pypar.raw_reduce(testArray, X, pypar.LAND, 0, 0)
  if myid == 0:  
    Y = Numeric.ones(N)    
    for i in range(numproc):
      Y = Numeric.logical_and(Y, Numeric.array(range(N))*(i+1))  
    assert Numeric.allclose(X, Y)
    print "Raw reduce using pypar.LAND OK"    
    
  pypar.raw_reduce(testArray, X, pypar.BAND, 0, 0)
  if myid == 0:
    Y = Numeric.ones(N)*255  #Neutral element for &   
    for i in range(numproc):
      Y = Numeric.bitwise_and(Y, Numeric.array(range(N))*(i+1))
    assert Numeric.allclose(X, Y)
    print "Raw reduce using pypar.BAND OK"    

  pypar.raw_reduce(testArray, X, pypar.LOR, 0, 0)
  if myid == 0:  
    Y = Numeric.zeros(N)    
    for i in range(numproc):
Exemplo n.º 16
0
def logical_unary_and(m1, m2):
    el = Numeric.logical_and(m1.filled(1), m2.filled(1))
    mask = Numeric.logical_and(MA.getmaskarray(m1), MA.getmaskarray(m2))
    return MA.array(el, mask=mask)    
Exemplo n.º 17
0
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05):
    """
    Matches two arrays onto each other, while iteratively removing outliers.
    Superimposed array y would be C{ N.dot(y, N.transpose(r)) + t }.

    @param n_iterations: number of calculations::
                           1 .. no iteration 
                           0 .. until convergence
    @type  n_iterations: 1|0
    @param z: number of standard deviations for outlier definition (default: 2)
    @type  z: float
    @param eps_rmsd: tolerance in rmsd (default: 0.5)
    @type  eps_rmsd: float
    @param eps_stdv: tolerance in standard deviations (default: 0.05)
    @type  eps_stdv: float

    @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ]
    @rtype: (array, array), [float, float, int]
    """
    iter_trace = []

    rmsd_old = 0
    stdv_old = 0

    n = 0
    converged = 0

    mask = N.ones(len(y), N.int32)

    while not converged:

        ## find transformation for best match
        r, t = findTransformation(N.compress(mask, x, 0),
                                  N.compress(mask, y, 0))

        ## transform coordinates
        xt = N.dot(y, N.transpose(r)) + t

        ## calculate row distances
        d = N.sqrt(N.sum(N.power(x - xt, 2), 1)) * mask

        ## calculate rmsd and stdv
        rmsd = N.sqrt(N.average(N.compress(mask, d)**2))
        stdv = MU.SD(N.compress(mask, d))

        ## check conditions for convergence
        d_rmsd = abs(rmsd - rmsd_old)
        d_stdv = abs(1 - stdv_old / stdv)

        if d_rmsd < eps_rmsd and d_stdv < eps_stdv:
            converged = 1
        else:
            rmsd_old = rmsd
            stdv_old = stdv

        ## store result
        perc = round(float(N.sum(mask)) / float(len(mask)), 2)

        ## throw out non-matching rows
        mask = N.logical_and(mask, N.less(d, rmsd + z * stdv))
        outliers = N.nonzero(N.logical_not(mask))
        iter_trace.append([perc, round(rmsd, 3), outliers])

        n += 1

        if n_iterations and n >= n_iterations:
            break

    return (r, t), iter_trace
Exemplo n.º 18
0
    def senddata(self):
        """computes selectionList, partitions the examples and updates infoc;
        sends out selectionList and selected/other dataStructure or None;
        """
        if self.dataStructure and self.ps.shape[1]:
            # set selectionList
            alphas = [self.alphaA, self.alphaB, self.alphaI]
            selectors = [self.selectorA, self.selectorB, self.selectorI]
            selectionList = Numeric.ones((self.numExamples,))
            boxSelectors = [self.boxSelectorA, self.boxSelectorB, self.boxSelectorI]
            for si in range(3):
                try:
##                    if selectors[si] and self.anovaType in [[0,1,3,4],[2,3,4],[4]][si]:
                    if selectors[si] and boxSelectors[si].isEnabled():
                        selectionList = Numeric.logical_and(selectionList, Numeric.less(self.ps[si], float(alphas[si])))
                except ValueError:
                    print "Warning: cannot convert %s to float" % str(alphas[si])
                    pass
            self.infoc.setText('Sending out data...')
            
            if self.sendProbabilities:
                # create example table with probabilities
##                print self.ps
##                print Numeric.transpose(self.ps).shape
                etProb = orange.ExampleTable(orange.Domain([orange.FloatVariable("Factor A p-val"),orange.FloatVariable("Factor B p-val"),orange.FloatVariable("Interaction p-val")]), Numeric.transpose(self.ps))
                # in etProb, convert p-val to meta attribute
                domProb = orange.Domain([])
                domProb.addmetas(dict(zip([orange.newmetaid(),orange.newmetaid(),orange.newmetaid()], etProb.domain.variables)))
                etProb = orange.ExampleTable(domProb, etProb)
            else:
                # create new etProb without attributes/metas and of length equal to etProb
                etProb = orange.ExampleTable(orange.Domain([]), Numeric.zeros((selectionList.shape[0],0)))

            # partition dataStructure and send out data
            selectionList = selectionList.tolist()
            self.send("Example Selection", (self.selectorName, selectionList))
            dataStructS = []
            dataStructN = []
            self.progressBarInit()

            if self.sendNotSelectedData:
                pbStep = 50./len(self.dataStructure)
            else:
                pbStep = 100./len(self.dataStructure)

            for (dsName, etList) in self.dataStructure:
                etListS = [et.select(selectionList) for et in etList]
                for i in range(len(etList)):
                    # append probabilities (if etProb not empty)
                    etListS[i] = orange.ExampleTable([etListS[i], etProb.select(selectionList)])
                    # add name
                    etListS[i].name = etList[i].name
                dataStructS.append((dsName, etListS))
                self.progressBarAdvance(pbStep)
            self.send("Selected Structured Data", dataStructS)

            if self.sendNotSelectedData:
                for (dsName, etList) in self.dataStructure:
                    etListN = [et.select(selectionList, negate=1) for et in etList]
                    for i in range(len(etList)):
                        # append probabilities (if etProb not empty)
                        etListN[i] = orange.ExampleTable([etListN[i], etProb.select(selectionList, negate=1)])
                        # add name
                        etListN[i].name = etList[i].name
                    dataStructN.append((dsName, etListN))
                    self.progressBarAdvance(pbStep)
                self.send("Other Structured Data", dataStructN)
            else:
                self.send("Other Structured Data", None)

            self.progressBarFinished()
            # report the number of selected examples
            numExamples = Numeric.add.reduce(Numeric.greater(selectionList, 0))
            self.infoc.setText('Total of %d example%s match criteria.' % (numExamples, ['', 's'][int(numExamples!=1)]))
        else:
            self.send("Example Selection", None)
            self.send("Selected Structured Data", None)
            self.send("Other Structured Data", None)
Exemplo n.º 19
0
    def select(self, keyAts, checkAts, cutoff=3.0, percentCutoff=1.0, 
                keyMat=None, checkMat=None):
        """ keyAts, checkAts, cutoff, percentCutoff
            keyAts: first set of atoms
            checkAts: a second set of atoms which is checked vs. keyAts
            cutoff: 
                either a single float by default 3.0 
                or a matrix with shape:
            (max(len(keyAts),len(checkAts)), min(len(keyAts),len(checkAts)))
            percentCutoff: by default 1.0 (cutoff is multiplied by this value)
            keyMat: transformation of keyAts
            checkMat: transformation of checkAts

        returns 'pairDict' whose keys are atoms used as reference points and whose 
        values are atoms within cutoff distance of corresponding key. 

        If 'return_dist' flag is set, 
        'distDict' is returned also, whose keys are 
        the same atoms which are used as reference points and whose values are 
        lists of distances to atoms  within cutoff distance of corresponding key

        """
        lenK = len(keyAts)
        lenC = len(checkAts)

        #data arrays are used to find atoms with given indices quickly
        atar = Numeric.array(checkAts.data)
        keyAtar = Numeric.array(keyAts.data)

        #basic arrays of coords used to build others
        c = Numeric.array(checkAts.coords, 'f')
        if checkMat:
            c = self.mul(c, checkMat)
        k = Numeric.array(keyAts.coords, 'f')
        if keyMat:
            k = self.mul(k, keyMat)

        # first build matrix of distances between all pairs of ats
        # rows correspond to ats in larger set, columns to those in smaller
        # first build square matrix
        if lenC >= lenK:
            bigC = Numeric.resize(c, (lenC, lenC, 3))
            k.shape = (lenK,1,3)
            bigM = bigC[:lenK]
            smallM = k

            cutoff = self.setupCutoff(checkAts, keyAts, cutoff)
            #print "0a:cutoff[0][0]=", cutoff[0][0]
            cutoff.shape = (lenK, -1)

        else:
            bigK = Numeric.resize(k, (lenK, lenK, 3))
            c.shape = (lenC,1,3)
            bigM = bigK[:lenC]
            smallM = c
            cutoff = self.setupCutoff(keyAts, checkAts, cutoff)
            #print "0b:cutoff[0][0]=", cutoff[0][0]
            cutoff.shape = (lenC, -1)

        # distance matrix
        d = bigM - smallM
        # distance squared matrix
        dSQ = d * d
        # next step sums deltaX**2, deltaY**2, deltaZ**2
        dSQMAT = Numeric.sum(dSQ,2)

        #percentCutoff lets user relax sum of radii
        #the smaller the percentCutoff the smaller the key 
        #dSQ has to be less than
        cutoff = cutoff * percentCutoff
        cutoffSQMAT = cutoff * cutoff
        #cutoffSQMAT = cutoffSQMAT * percentCutoff

        # ansMat has 1 where sq dist. is smaller than cutoff
        ansMat = Numeric.logical_and(self.func(dSQMAT, cutoffSQMAT) , \
                    Numeric.not_equal(dSQMAT, 0.))
        if lenK > lenC:
            # in this case need to rearrange matrix
            # which got shuffled in if-else above
            ansMat = Numeric.swapaxes(ansMat, 0, 1)
            dSQMAT = Numeric.swapaxes(dSQMAT, 0, 1)

        # finally, build result dictionaries which have atom keys:
        #   pairDict has values which are lists of close atoms
        #   distDict has values which are lists of distances
        pairDict = {}
        distDict = {}
        # get a list of rows which have non-zero entries 
        # to loop over in next section
        rowIndices = Numeric.nonzero(Numeric.sum(ansMat,1))
        # rows correspond to ats in keyAts
        # columns correspond to ats in checkAts
        for i in rowIndices:
            # atindex is a list [7 8 9] indexing into checkAts 
            atindex = Numeric.nonzero(ansMat[i])
            # keyAtar[i] is ith atom in keyAts
            keyAt = keyAtar[i]
            pairDict[keyAt] = Numeric.take(atar, atindex)
            if self.return_dist:
                distDict[keyAt] = []
                for ind in atindex:
                    distDict[keyAt].append(math.sqrt(dSQMAT[i][ind]))
        
        #getting distDict back is optional
        if self.return_dist: return pairDict, distDict
        else: return pairDict
Exemplo n.º 20
0
def kNNimputeMA(arr2d, K=20, callback=None):
    """Returns a new 2D MA.array with missing values imputed from K nearest neighbours.
    Find K rows (axis 0) with the most similar values where similarity measure corresponds to weighted Euclidean distance.
    Imputed value = weighted average of the corresponding values of K nearest neighbours,
    where weights equal to tricubic distribution of distances to all rows.
    Impute missing rows by average over all rows.
    Version: 30.8.2005
    """
    arr2d = MA.asarray(arr2d)
    assert len(arr2d.shape) == 2, "2D array expected"
    # make a copy for imputation
    aImp2 = MA.array(arr2d)
    # leave out columns with 0 known values (columnInd: non-zero columns)
    columnCond = Numeric.greater(MA.count(arr2d, axis=0), 0)
    columnIndAll = Numeric.arange(arr2d.shape[1])
    columnInd = Numeric.compress(columnCond, columnIndAll)
    # impute the rows where 0 < #known_values < #non_zero_columns, i.e. exclude the rows with 0 and all (non-zero-column) values
    countByRows = MA.count(arr2d, axis=1)
    for rowIdx in Numeric.compress(
            Numeric.logical_and(Numeric.greater(countByRows, 0),
                                Numeric.less(countByRows, columnInd.shape[0])),
            Numeric.arange(arr2d.shape[0])):
        rowResized = MA.resize(arr2d[rowIdx], arr2d.shape)
        diff = arr2d - rowResized
        distances = MA.sqrt(
            MA.add.reduce((diff)**2, 1) / MA.count(diff, axis=1))
        # nearest neighbours row indices (without the current row index)
        indSorted = MA.argsort(distances)[1:]
        distSorted = distances.take(indSorted)
        # number of distances different from MA.masked
        numNonMasked = distSorted.shape[0] - Numeric.add.reduce(
            Numeric.asarray(MA.getmaskarray(distSorted), Numeric.Int))
        # number of distances to account for (K or less)
        if numNonMasked > 1:
            weightsSorted = MA.power(
                1 - MA.power(distSorted / distSorted[numNonMasked - 1], 3),
                3)  # tricubic distribution of all weights
        else:
            weightsSorted = Numeric.ones(distSorted.shape[0])
        # compute average for each column separately in order to account for K non-masked values
        colInd4CurrRow = Numeric.compress(
            Numeric.logical_and(MA.getmaskarray(arr2d[rowIdx]), columnCond),
            columnIndAll)
        for colIdx in colInd4CurrRow:
            # column values sorted by distances
            columnVals = arr2d[:, colIdx].take(indSorted)
            # take only those weights where columnVals does not equal MA.masked
            weightsSortedCompressed = MA.compress(
                1 - MA.getmaskarray(columnVals), weightsSorted)
            # impute from K (or possibly less) values
            aImp2[rowIdx,
                  colIdx] = MA.average(columnVals.compressed()[:K],
                                       weights=weightsSortedCompressed[:K])
        if callback:
            callback()
    # impute the unknown rows with average profile
    avrgRow = MA.average(arr2d, 0)
    for rowIdx in Numeric.compress(Numeric.equal(countByRows, 0),
                                   Numeric.arange(arr2d.shape[0])):
        aImp2[rowIdx] = avrgRow
        if callback:
            callback()
    return aImp2
Exemplo n.º 21
0
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05):
    """
    Matches two arrays onto each other, while iteratively removing outliers.
    Superimposed array y would be C{ N.dot(y, N.transpose(r)) + t }.

    @param n_iterations: number of calculations::
                           1 .. no iteration 
                           0 .. until convergence
    @type  n_iterations: 1|0
    @param z: number of standard deviations for outlier definition (default: 2)
    @type  z: float
    @param eps_rmsd: tolerance in rmsd (default: 0.5)
    @type  eps_rmsd: float
    @param eps_stdv: tolerance in standard deviations (default: 0.05)
    @type  eps_stdv: float

    @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ]
    @rtype: (array, array), [float, float, int]
    """
    iter_trace = []

    rmsd_old = 0
    stdv_old = 0

    n = 0
    converged = 0

    mask = N.ones(len(y), N.int32 )

    while not converged:

        ## find transformation for best match
        r, t = findTransformation(N.compress(mask, x, 0),
                                  N.compress(mask, y, 0))

        ## transform coordinates
        xt = N.dot(y, N.transpose(r)) + t

        ## calculate row distances
        d = N.sqrt(N.sum(N.power(x - xt, 2), 1)) * mask

        ## calculate rmsd and stdv
        rmsd = N.sqrt(N.average(N.compress(mask, d)**2))
        stdv = MU.SD(N.compress(mask, d))

        ## check conditions for convergence
        d_rmsd = abs(rmsd - rmsd_old)
        d_stdv = abs(1 - stdv_old / stdv)

        if d_rmsd < eps_rmsd and d_stdv < eps_stdv:
            converged = 1
        else:
            rmsd_old = rmsd
            stdv_old = stdv

        ## store result
        perc = round(float(N.sum(mask)) / float(len(mask)), 2)

        ## throw out non-matching rows
        mask = N.logical_and(mask, N.less(d, rmsd + z * stdv))
        outliers = N.nonzero( N.logical_not( mask ) )
        iter_trace.append([perc, round(rmsd, 3), outliers])

        n += 1

        if n_iterations and n >= n_iterations:
            break

    return (r, t), iter_trace