def write(): with h5py.File(cache_path, 'a') as h5: group = h5.require_group(result_coords) infile_expids = set(group['expids'][:]) if 'expids' in group else {} expidss = [] oks = 0 losses = [] foldss = [] molids = None for expid in expids: if verbose: print expid, lso if expid in infile_expids: if verbose: print '\tAlready done, skipping...' continue try: # look for the results corresponding to the desired expid, lso res = ManysourcesResult(expid=expid, dset=dset, feats=feats, model=model).lsocv() if lso else \ ManysourcesResult(expid=expid, dset=dset, feats=feats, model=model).crscv() # Merge the "CV" scores to have one score per compound in the dataset scores, labels, folds = res.merge_scores(calibration=calibration) if verbose: print roc_auc_score(labels, scores, average='samples') losses.append((labels - scores) ** 2) foldss.append(folds) if molids is None: molids = res.molids() expidss.append((expid, len(infile_expids) + oks)) oks += 1 except: # We guess that this happens when the external set only contains one class, but we need to check print 'Warning, had troubles with', expid, lso expidss.append((expid, -1)) # write molids - N.B. assume same for all of them, which is reasonable if 'molids' not in group: group['molids'] = molids # write expids index expids_dset = group.require_dataset('expids', shape=(len(infile_expids) + len(expidss), 2), dtype=np.int32, maxshape=(None, 2)) expids_dset.resize((len(infile_expids) + len(expidss), 2)) expids_dset[len(infile_expids):] = expidss # write losses losses_dset = group.require_dataset('losses', shape=(len(infile_expids) + len(losses), len(molids)), dtype=np.float64, maxshape=(None, len(molids))) losses_dset.resize((len(infile_expids) + len(losses), len(molids))) losses_dset[len(infile_expids):] = losses # write folds (should be optional) folds_dset = group.require_dataset('folds', shape=(len(infile_expids) + len(losses), len(molids)), dtype=np.int32, maxshape=(None, len(molids))) folds_dset.resize((len(infile_expids) + len(losses), len(molids))) folds_dset[len(infile_expids):] = foldss