示例#1
0
def load_PCA_Subspace(catalog):

  # HCube leaf size of 500 points
  settings = systemsettings()
  vectfile = settings.PCA_VECTOR_FILE

  logging.info("Loading PCA Vectors from %s", vectfile)
  pc_vect = np.load(vectfile)
  max_pc = pc_vect.shape[1]
  num_pc = min(settings.PCA_NUMPC, max_pc)
  pc = pc_vect[:num_pc]
  logging.info("Storing PCA Vectors to key:  %s", 'pcaVectors')
  catalog.storeNPArray(pc, 'pcaVectors')

  logging.info("Loading Pre-Calculated PCA projections from Historical BPTI Trajectory")
  pre_calc_deshaw = np.load('data/pca_applied.npy')

  # Extract only nec'y PC's
  pts = pre_calc_deshaw.T[:num_pc].T

  pipe = catalog.pipeline()
  for si in pts:
    pipe.rpush('subspace:pca', bytes(si))
  pipe.execute()
  logging.debug("PCA Subspace stored in Catalog")

  logging.info('Creating KD Tree')
  kd = KDTree(500, maxdepth=8, data=pts)
  logging.info('Encoding KD Tree')
  packaged = kd.encode()
  encoded = json.dumps(packaged)
  logging.info('Storing in catalog')
  catalog.delete('hcube:pca')
  catalog.set('hcube:pca', encoded)
  logging.info('PCA Complete')
示例#2
0
文件: initialize.py 项目: DaMSL/ddc
def calcDEShaw_PCA(catalog, force=False):
  numPC = 3

  numpts = catalog.llen('subspace:pca')
  if numpts == 0 or force:
    catalog.delete('subspace:pca')
    logging.debug("Projecting DEshaw PCA Vectors (assuming PC's are pre-calc'd")
    pcavect = catalog.loadNPArray('pcaVectors')
    logging.debug("Loaded PCA Vectors: %s, %s", str(pcavect.shape), str(pcavect.dtype))
    src = np.load(DESHAW_PTS_FILE)
    logging.debug("Loaded source points: %s, %s", str(src.shape), str(src.dtype))
    pcalist = np.zeros(shape=(len(src), numPC))
    start = dt.datetime.now()
    pdbfile, dcdfile = deshaw.getHistoricalTrajectory(0)
    traj = md.load(dcdfile, top=pdbfile, frame=0)
    filt = traj.top.select_atom_indices(selection='heavy')
    pipe = catalog.pipeline()
    for i, conform in enumerate(src):
      if i % 10000 == 0:
        logging.debug("Projecting: %d", i)
      heavy = np.array([conform[k] for k in filt])
      np.copyto(pcalist[i], np.array([np.dot(heavy.reshape(pc.shape),pc) for pc in pcavect[:numPC]]))
      raw_index = i * DESHAW_SAMPLE_FACTOR
      pipe.rpush('xid:reference', '(-1, %d)' % raw_index)
    end = dt.datetime.now()
    logging.debug("Projection time = %d", (end-start).seconds)

    rIdx = []
    for si in pcalist:
      rIdx.append(pipe.rpush('subspace:pca', bytes(si)))
    pipe.execute()
    logging.debug("R_Index Created (pca)")
  else:
    logging.info('PCA Already created. Retrieving existing lower dim pts')
    X = catalog.lrange('subspace:pca', 0, -1)
    pcalist = np.array([np.fromstring(si) for si in X])

  # HCube leaf size of 500 points
  logging.info('Creating KD Tree')
  kd = KDTree(500, data=pcalist)
  logging.info('Encoding KD Tree')
  encoded = json.dumps(kd.encode())
  logging.info('Storing in catalog')
  catalog.delete('hcube:pca')
  catalog.set('hcube:pca', encoded)
  logging.info('PCA Complete')