def _copy_if_needed(source_file=None, target_file=None, name='build'): if source_file is None: source_file = env.input_file if target_file is None: target_file = env.output_file if os.path.isfile(source_file) is False: puts("[{0}]: Input file '{1}' does not exist.".format(name, source_file)) raise InvalidPath elif os.path.isfile(target_file) is False: if not os.path.exists(os.path.dirname(target_file)): os.makedirs(os.path.dirname(target_file)) shutil.copyfile(source_file, target_file) if name is not None: puts('[{0}]: created "{1}" which did not exist.'.format(name, source_file)) else: if md5_file(source_file) == md5_file(target_file): if name is not None: puts('[{0}]: "{1}" not changed.'.format(name, source_file)) else: shutil.copyfile(source_file, target_file) if name is not None: puts('[{0}]: "{1}" changed. Updated: {2}'.format(name, source_file, target_file))
def average_positions(filenames, chi2cutoff=1.15, write=True, plot=1): """Filter and average over positions in a capillary. """ filenames.sort() stack = stack_datafiles(filenames) incinds, cdm, links = cluster_reps(stack, threshold=chi2cutoff, plot=plot) ms = mean_stack(stack[incinds,...]) disinds = range(len(filenames)) for i in incinds: disinds.remove(i) included = [ [filenames[i], md5_file(filenames[i])] for i in incinds ] discarded = [ [filenames[i], md5_file(filenames[i])] for i in disinds ] ad = { 'chi2cutoff': float(chi2cutoff), 'included': included, 'discarded': discarded, 'chi2matrix' : map(float, list(cdm)), 'incinds' : map(int, list(incinds)), 'linkage' : [ map(float, ll) for ll in list(links) ] } outarr = np.zeros((7, ms.shape[1])) outarr[0:3,:] = ms outarr[3:5,:] = stack[0,1:3,:] outarr[5:7,:] = mean_stack(stack)[1:3,:] if write: fname = filenames[0] fname = "%s.clu.ydat" % fname[:(fname.find('.p'))] print(fname) write_ydat(outarr, fname, addict=ad, cols=['q', 'I', 'Ierr', 'I_first', 'Ierr_first', 'I_all', 'Ierr_all']) return ms
def subtract_background_from_ydats(scanfile, indir, outdir, scannumber=-1, highqnorm=False): """Subtract backround from SAXS data in .ydat files. If `highqnorm` is True, normalize the buffer to the sample intensity in q-range [4.0, 5.0] 1/nm and adjust with a constant before subtracting. """ scans = read_yaml(scanfile) if scannumber > 0: scannos = [ scannumber ] else: scannos = scans.keys() scannos.sort() for scanno in scannos: print("Scan #%03d" % scanno) try: bufscan = scans[scanno][0] except TypeError: print("Scan #%03d is a buffer" % scanno) continue try: conc = scans[scanno][1] except TypeError: print("No concentration for scan #02d." % scanno) conc = 1.0 print("Using concentration %g g/l." % conc) filelist = glob.glob(indir+"/s%03d.*.fil.ydat" % scanno) for posno in xrange(len(filelist)): bufname = indir + "/bufs%03d.p%02d.out.ydat" % (bufscan, posno) buf, dbuf = read_ydat(bufname, addict=1) fname = indir + "/s%03d.p%02d.fil.ydat" % (scanno, posno) sam, dsam = read_ydat(fname, addict=1) outname = os.path.basename(fname) outname = outdir+'/'+outname[:outname.find('.fil.ydat')]+'.sub.ydat' ad = { 'samfile': [os.path.basename(fname), md5_file(fname)], 'buffile': [os.path.basename(bufname), md5_file(bufname)], 'position' : dsam.get('inputposition', "unknown"), 'q~unit' : dsam.get('q~unit', "unknown"), 'I~unit' : dsam.get('I~unit', "unknown"), 'Ierr~unit' : dsam.get('Ierr~unit', "unknown"), } if highqnorm: # 1 + 0.007 1/(g/l) is the excess of scattered intensity # in a protein sample versus buffer in the q-range # used [4.0, 5.0] 1/nm per concentration. scale = highq_scale(sam, buf) bufscale = scale * 1.0/(1.0 + 0.007*conc) print("scale: %g, bufscale: %g" % (scale, bufscale)) buf[1,:] = bufscale * buf[1,:] buf[2,:] = bufscale * buf[2,:] ad['normalization'] = float(bufscale) else: ad['normalization'] = 'transmission' # Assumes the standard q, I, Ierr ordering in index 0 columns sub = errsubtract(sam, buf) sub[1:3,:] = sub[1:3,:] / conc write_ydat(sub, outname, addict=ad, attributes=['~unit']) print(os.path.basename(outname))
def copy_if_needed(builder='build'): if os.path.isfile(env.input_file) is False: abort("[{0}]: Input file does not exist.".format(builder)) elif os.path.isfile(env.output_file) is False: if not os.path.exists(os.path.dirname(env.output_file)): os.makedirs(os.path.dirname(env.output_file)) shutil.copyfile(env.input_file, env.output_file) puts('[{0}]: created "{1}" which did not exist.'.format(builder, env.input_file)) else: if md5_file(env.input_file) == md5_file(env.output_file): puts('[{0}]: "{1}" not changed.'.format(builder, env.input_file)) else: shutil.copyfile(env.input_file, env.output_file) puts('[{0}]: "{1}" changed.'.format(builder, env.input_file))
def copy_if_needed(builder='build'): if os.path.isfile(env.input_file) is False: abort("[{0}]: Input file does not exist.".format(builder)) elif os.path.isfile(env.output_file) is False: if not os.path.exists(os.path.dirname(env.output_file)): os.makedirs(os.path.dirname(env.output_file)) shutil.copyfile(env.input_file, env.output_file) puts('[{0}]: created "{1}" which did not exist.'.format( builder, env.input_file)) else: if md5_file(env.input_file) == md5_file(env.output_file): puts('[{0}]: "{1}" not changed.'.format(builder, env.input_file)) else: shutil.copyfile(env.input_file, env.output_file) puts('[{0}]: "{1}" changed.'.format(builder, env.input_file))
def fetch_egg(self, egg, force=False): """ fetch an egg, i.e. copy or download the distribution into local dir force: force download or copy if MD5 mismatches """ if not isdir(self.local_dir): os.makedirs(self.local_dir) info = self.remote.get_metadata(egg) path = self.path(egg) # if force is used, make sure the md5 is the expected, otherwise # merely see if the file exists if isfile(path): if force: if md5_file(path) == info.get('md5'): if self.verbose: print "Not refetching, %r MD5 match" % path return else: if self.verbose: print "Not forcing refetch, %r exists" % path return if not force and self.patch_egg(egg): return self.fetch(egg)
def fetch_egg(self, egg, force=False, execution_aborted=None): """ fetch an egg, i.e. copy or download the distribution into local dir force: force download or copy if MD5 mismatches execution_aborted: a threading.Event object which signals when the execution needs to be aborted, or None, if we don't want to abort the fetching at all. """ if not isdir(self.local_dir): os.makedirs(self.local_dir) info = self.remote.get_metadata(egg) path = self.path(egg) # if force is used, make sure the md5 is the expected, otherwise # merely see if the file exists if isfile(path): if force: if md5_file(path) == info.get('md5'): if self.verbose: print "Not refetching, %r MD5 match" % path return else: if self.verbose: print "Not forcing refetch, %r exists" % path return self.fetch(egg, execution_aborted)
def execute (self): op = self._build_operation ('download file', {'channel': self.channel, 'file': self.filename}) f = self._get_url_handler (op) # Customize the HTTP handler def download_f_custom_read (self_f, *args): tmp = self_f.read_orig (*args) download_f_custom_read.received += len(tmp) if self.callback_step: self.callback_step (download_f_custom_read.received) return tmp f.read_orig = f.read f.read = types.MethodType (download_f_custom_read, f) download_f_custom_read.received = 0 # Decrypt out_dir = os.path.join (self.download_dir, self.channel) if not os.path.exists (out_dir): os.makedirs (out_dir, 0700) out_fullpath = os.path.join (out_dir, self.filename) self.keys.decrypt_file_to_path (f, out_fullpath) # Set file attributes xattr.setxattr (out_fullpath, 'md5_time', str(time.time())) xattr.setxattr (out_fullpath, 'md5', utils.md5_file(out_fullpath))
def execute(self): op = self._build_operation('download file', { 'channel': self.channel, 'file': self.filename }) f = self._get_url_handler(op) # Customize the HTTP handler def download_f_custom_read(self_f, *args): tmp = self_f.read_orig(*args) download_f_custom_read.received += len(tmp) if self.callback_step: self.callback_step(download_f_custom_read.received) return tmp f.read_orig = f.read f.read = types.MethodType(download_f_custom_read, f) download_f_custom_read.received = 0 # Decrypt out_dir = os.path.join(self.download_dir, self.channel) if not os.path.exists(out_dir): os.makedirs(out_dir, 0700) out_fullpath = os.path.join(out_dir, self.filename) self.keys.decrypt_file_to_path(f, out_fullpath) # Set file attributes xattr.setxattr(out_fullpath, 'md5_time', str(time.time())) xattr.setxattr(out_fullpath, 'md5', utils.md5_file(out_fullpath))
def get_binstore_filename(self, filename): """ get the real filename of a given file in the binstore. """ # Note: this function assumes that the filename is in the binstore. You # probably want to check that first. if os.path.islink(filename): # return os.readlink(filename) return os.path.realpath(filename) digest = utils.md5_file(filename) return os.path.join(self.localpath, digest)
def download_to_cache(meta): if not isdir(SRC_CACHE): os.makedirs(SRC_CACHE) fn = meta['fn'] md5 = meta.get('md5') path = join(SRC_CACHE, fn) if not isfile(path): download(meta['url'], path, md5) if md5 and not md5_file(path) == md5: raise Exception("MD5 mismatch: %r" % meta) return path
def _copy_file_to_external_images(path): """copy file with unique name into extrenal images dir""" md5 = md5_file(path) ext = os.path.splitext(path)[1].lower() ext = ".jpg" if ".jpeg" == ext else ext new_file_name = f"{md5}{ext}" new_file_path = f"{images_out_dir}/{new_file_name}" if not os.path.isfile(new_file_path): shutil.copyfile(path, new_file_path) return new_file_name
def write_filtered(filtered, first, aver, incmap, fname, inputfile=None, pos=-1): """Write an 'ydat' YAML file `fname` with filtered data and index array. `filtered` contains the filtered data, `incmap` the point by point inclusion array (bool matrix) of points used in the averaging and `first` the data set used in comparison for the filtering. """ # FIXME: take a list of filenames which are filtered as an argument # and write them to the file with open(fname, "w") as fp: indent = ' ' fp.write('incmap: !!seq [\n' + indent) slist = incmap_to_strings(incmap.T) perrow = 1 + (80 / len(slist[0])) i = 0 while i < perrow * ((len(slist)-1)/perrow): # until last row fp.write(slist[i]) if (i+1) % perrow or not i: fp.write(', ') else: fp.write(',\n' + indent) i += 1 while i < len(slist): # last row fp.write(slist[i]) if i < len(slist)-1: fp.write(', ') else: fp.write(']\n') i += 1 ad = { 'method' : "filter_repetitions", 'q~unit' : '1/nm', 'I~unit' : 'arb.', 'Ierr~unit' : 'arb.', 'I_first~unit' : 'arb.', 'Ierr_first~unit' : 'arb.', 'I_all~unit' : 'arb.', 'Ierr_all~unit' : 'arb.', } if inputfile: ad['inputfile'] = [ inputfile, md5_file(inputfile) ] if pos >= 0: ad['inputposition'] = int(pos) outarr = np.zeros((7, filtered.shape[1])) outarr[0:3,:] = filtered outarr[3:5,:] = first[1:3,:] outarr[5:7,:] = aver[1:3,:] cols = ['q', 'I', 'Ierr', 'I_first', 'Ierr_first', 'I_all', 'Ierr_all'] write_ydat(outarr, fp, cols=cols, addict=ad, attributes=['~unit'])
def checkout(self, filenames): """ Revert local modifications to a list of files """ printv("GitBin.checkout(%s)" % filenames) for filename in filenames: # if the filename is a directory, recurse into it. # TODO: maybe make recursive directory crawls optional/configurable if os.path.isdir(filename): printv("\trecursing into %s" % filename) for root, dirs, files in os.walk(filename): # first checkout_dashdash all directories recursively len(dirs) and self.checkout([os.path.join(root, dn) for dn in dirs]) # now checkout_dashdash all the files len(files) and self.checkout([os.path.join(root, fn) for fn in files]) continue status = self.gitrepo.status(filename) if (status & git.STATUS_STAGED_MASK) == git.STATUS_STAGED: # staged, skip it. print "you probably meant to do: git bin reset %s" % filename continue if not (status & git.STATUS_CHANGED_MASK): # the file hasn't changed, skip it. continue # The first two cases can just be passed through to regular git # checkout --. # {1} (GBAdded[MSs] -> Reset[MS]) # {2} (GBEdit[TF]) # In the third case, there is some local modification that we should # save 'just in case' first. # {3} (GBEdit[TF] -> Modified[TF]) (*) if (status & git.STATUS_TYPECHANGED) and not self.binstore.has(filename): justincase_filename = os.path.join( "/tmp", "%s.%s.justincase" % (filename, utils.md5_file(filename))) commands = cmd.CompoundCommand( cmd.CopyFileCommand( self.binstore.get_binstore_filename(filename), filename, justincase_filename), ) commands.execute() self.gitrepo.restore(filename)
def checkout(self, filenames): """ Revert local modifications to a list of files """ printv("GitBin.checkout(%s)" % filenames) for filename in filenames: # if the filename is a directory, recurse into it. # TODO: maybe make recursive directory crawls optional/configurable if os.path.isdir(filename): printv("\trecursing into %s" % filename) for root, dirs, files in os.walk(filename): # first checkout_dashdash all directories recursively len(dirs) and self.checkout( [os.path.join(root, dn) for dn in dirs]) # now checkout_dashdash all the files len(files) and self.checkout( [os.path.join(root, fn) for fn in files]) continue status = self.gitrepo.status(filename) if (status & git.STATUS_STAGED_MASK) == git.STATUS_STAGED: # staged, skip it. print "you probably meant to do: git bin reset %s" % filename continue if not (status & git.STATUS_CHANGED_MASK): # the file hasn't changed, skip it. continue # The first two cases can just be passed through to regular git # checkout --. # {1} (GBAdded[MSs] -> Reset[MS]) # {2} (GBEdit[TF]) # In the third case, there is some local modification that we should # save 'just in case' first. # {3} (GBEdit[TF] -> Modified[TF]) (*) if (status & git.STATUS_TYPECHANGED ) and not self.binstore.has(filename): justincase_filename = os.path.join( "/tmp", "%s.%s.justincase" % (filename, utils.md5_file(filename))) commands = cmd.CompoundCommand( cmd.CopyFileCommand( self.binstore.get_binstore_filename(filename), filename, justincase_filename), ) commands.execute() self.gitrepo.restore(filename)
def diff(src_path, dst_path, patch_path): x = zipfile.ZipFile(src_path) y = zipfile.ZipFile(dst_path) z = zipfile.ZipFile(patch_path, 'w', zipfile.ZIP_STORED) xnames = set(x.namelist()) ynames = set(y.namelist()) count = 0 for name in xnames | ynames: xdata = x.read(name) if name in xnames else None ydata = y.read(name) if name in ynames else None if xdata == ydata: continue if ydata is not None: bz2_data = bz2.compress(ydata) # startswith BZ if xdata is not None and ydata is not None: diff_data = bsdiff4.diff(xdata, ydata) if len(diff_data) < len(bz2_data): zdata = diff_data # startswith BSDIFF4 else: zdata = bz2_data elif xdata is not None and ydata is None: zdata = 'RM' elif ydata is not None and xdata is None: zdata = bz2_data else: raise Exception("Hmm, didn't expect to get here.") #print zdata[:2], name z.writestr(name, zdata) count += 1 info = {} for path, pre in (src_path, 'src'), (dst_path, 'dst'): info.update({pre: basename(path), pre + '_size': getsize(path), pre + '_mtime': getmtime(path), pre + '_md5': md5_file(path)}) z.writestr('__zdiff_info__.json', json.dumps(info, indent=2, sort_keys=True)) z.close() y.close() x.close() return count
def filter_matfile(fname, outstem, p_reject=0.001, plot=1): stack = read_mat(fname) md5 = md5_file(fname) print("Rejection probability: %0.3g" % p_reject) N = np.sum(np.logical_not(np.isnan(stack[0,0,1,:]))) print("Number of valid channels: %d" % N) threshold = chi2.ppf(1.0 - p_reject, N) / N print("Chisq rejection threshold: %0.3g" % threshold) for pos in range(stack.shape[0]): reps = stack[pos,...] incinds, cdm = filter_outliers(reps, threshold=threshold, plot=plot) ms = mean_stack(reps[incinds,...]) disinds = range(reps.shape[0]) for i in incinds: disinds.remove(i) print("Pos %d, discarded: %s" % (pos, str(disinds))) ad = { 'chi2cutoff' : float(threshold), 'rejection_prob' : float(p_reject), 'incinds' : map(int, list(incinds)), 'disinds' : map(int, list(disinds)), 'chi2matrix' : map(float, list(cdm)), 'method' : "filter_outliers", 'inputfile' : [ fname, md5 ], 'inputposition' : int(pos), 'q~unit' : '1/nm', 'I~unit' : 'arb.', 'Ierr~unit' : 'arb.', 'I_first~unit' : 'arb.', 'Ierr_first~unit' : 'arb.', 'I_all~unit' : 'arb.', 'Ierr_all~unit' : 'arb.', } outarr = np.zeros((7, ms.shape[1])) outarr[0:3,:] = ms outarr[3:5,:] = reps[0,1:3,:] outarr[5:7,:] = mean_stack(reps)[1:3,:] outname = "%s.p%02d.out.ydat" % (outstem, pos) print(outname) write_ydat(outarr, outname, addict=ad, cols=['q','I','Ierr','I_first','Ierr_first','I_all','Ierr_all'], attributes=['~unit'])
def force_linked_actions(dists, index, prefix): actions = defaultdict(list) actions[PREFIX] = prefix actions['op_order'] = (RM_FETCHED, FETCH, RM_EXTRACTED, EXTRACT, UNLINK, LINK) for dist in dists: fn = dist + '.tar.bz2' pkg_path = join(config.pkgs_dir, fn) if isfile(pkg_path): if md5_file(pkg_path) != index[fn]['md5']: actions[RM_FETCHED].append(dist) actions[FETCH].append(dist) else: actions[FETCH].append(dist) actions[RM_EXTRACTED].append(dist) actions[EXTRACT].append(dist) if isfile(join(prefix, 'conda-meta', dist + '.json')): actions[UNLINK].append(dist) actions[LINK].append(dist) return actions