示例#1
0
 def write():
     with h5py.File(cache_path, 'a') as h5:
         group = h5.require_group(result_coords)
         infile_expids = set(group['expids'][:]) if 'expids' in group else {}
         expidss = []
         oks = 0
         losses = []
         foldss = []
         molids = None
         for expid in expids:
             if verbose:
                 print expid, lso
             if expid in infile_expids:
                 if verbose:
                     print '\tAlready done, skipping...'
                 continue
             try:
                 # look for the results corresponding to the desired expid, lso
                 res = ManysourcesResult(expid=expid, dset=dset, feats=feats, model=model).lsocv() if lso else \
                     ManysourcesResult(expid=expid, dset=dset, feats=feats, model=model).crscv()
                 # Merge the "CV" scores to have one score per compound in the dataset
                 scores, labels, folds = res.merge_scores(calibration=calibration)
                 if verbose:
                     print roc_auc_score(labels, scores, average='samples')
                 losses.append((labels - scores) ** 2)
                 foldss.append(folds)
                 if molids is None:
                     molids = res.molids()
                 expidss.append((expid, len(infile_expids) + oks))
                 oks += 1
             except:
                 # We guess that this happens when the external set only contains one class, but we need to check
                 print 'Warning, had troubles with', expid, lso
                 expidss.append((expid, -1))
         # write molids - N.B. assume same for all of them, which is reasonable
         if 'molids' not in group:
             group['molids'] = molids
         # write expids index
         expids_dset = group.require_dataset('expids',
                                             shape=(len(infile_expids) + len(expidss), 2),
                                             dtype=np.int32,
                                             maxshape=(None, 2))
         expids_dset.resize((len(infile_expids) + len(expidss), 2))
         expids_dset[len(infile_expids):] = expidss
         # write losses
         losses_dset = group.require_dataset('losses',
                                             shape=(len(infile_expids) + len(losses), len(molids)),
                                             dtype=np.float64,
                                             maxshape=(None, len(molids)))
         losses_dset.resize((len(infile_expids) + len(losses), len(molids)))
         losses_dset[len(infile_expids):] = losses
         # write folds (should be optional)
         folds_dset = group.require_dataset('folds',
                                            shape=(len(infile_expids) + len(losses), len(molids)),
                                            dtype=np.int32,
                                            maxshape=(None, len(molids)))
         folds_dset.resize((len(infile_expids) + len(losses), len(molids)))
         folds_dset[len(infile_expids):] = foldss