Exemplo n.º 1
0
def _copy_if_needed(source_file=None, target_file=None, name='build'):
    if source_file is None:
        source_file = env.input_file

    if target_file is None:
        target_file = env.output_file

    if os.path.isfile(source_file) is False:
        puts("[{0}]: Input file '{1}' does not exist.".format(name, source_file))
        raise InvalidPath
    elif os.path.isfile(target_file) is False:
        if not os.path.exists(os.path.dirname(target_file)):
            os.makedirs(os.path.dirname(target_file))
        shutil.copyfile(source_file, target_file)

        if name is not None:
            puts('[{0}]: created "{1}" which did not exist.'.format(name, source_file))
    else:
        if md5_file(source_file) == md5_file(target_file):
            if name is not None:
                puts('[{0}]: "{1}" not changed.'.format(name, source_file))
        else:
            shutil.copyfile(source_file, target_file)

            if name is not None:
                puts('[{0}]: "{1}" changed. Updated: {2}'.format(name, source_file, target_file))
Exemplo n.º 2
0
def average_positions(filenames, chi2cutoff=1.15, write=True, plot=1):
    """Filter and average over positions in a capillary.

    """
    filenames.sort()
    stack = stack_datafiles(filenames)

    incinds, cdm, links = cluster_reps(stack, threshold=chi2cutoff, plot=plot)
    ms = mean_stack(stack[incinds,...])

    disinds = range(len(filenames))
    for i in incinds:
        disinds.remove(i)
    included  = [ [filenames[i], md5_file(filenames[i])]
        for i in incinds ]
    discarded = [ [filenames[i], md5_file(filenames[i])]
        for i in disinds ]
    ad = { 'chi2cutoff': float(chi2cutoff),
        'included': included,
        'discarded': discarded,
        'chi2matrix' : map(float, list(cdm)),
        'incinds' : map(int, list(incinds)),
        'linkage' : [ map(float, ll) for ll in list(links) ] }

    outarr = np.zeros((7, ms.shape[1]))
    outarr[0:3,:] = ms
    outarr[3:5,:] = stack[0,1:3,:]
    outarr[5:7,:] = mean_stack(stack)[1:3,:]

    if write:
        fname = filenames[0]
        fname = "%s.clu.ydat" % fname[:(fname.find('.p'))]
        print(fname)
        write_ydat(outarr, fname, addict=ad, cols=['q', 'I', 'Ierr', 'I_first', 'Ierr_first', 'I_all', 'Ierr_all'])
    return ms
Exemplo n.º 3
0
def subtract_background_from_ydats(scanfile, indir, outdir, scannumber=-1, highqnorm=False):
    """Subtract backround from SAXS data in .ydat files.

    If `highqnorm` is True, normalize the buffer to the sample intensity
    in q-range [4.0, 5.0] 1/nm and adjust with a constant before subtracting.
    """
    scans = read_yaml(scanfile)
    if scannumber > 0:
        scannos = [ scannumber ]
    else:
        scannos = scans.keys()
        scannos.sort()
    for scanno in scannos:
        print("Scan #%03d" % scanno)
        try:
            bufscan = scans[scanno][0]
        except TypeError:
            print("Scan #%03d is a buffer" % scanno)
            continue
        try:
            conc = scans[scanno][1]
        except TypeError:
            print("No concentration for scan #02d." % scanno)
            conc = 1.0
        print("Using concentration %g g/l." % conc)
        filelist = glob.glob(indir+"/s%03d.*.fil.ydat" % scanno)
        for posno in xrange(len(filelist)):
            bufname = indir + "/bufs%03d.p%02d.out.ydat" % (bufscan, posno)
            buf, dbuf = read_ydat(bufname, addict=1)
            fname = indir + "/s%03d.p%02d.fil.ydat" % (scanno, posno)
            sam, dsam = read_ydat(fname, addict=1)
            outname = os.path.basename(fname)
            outname = outdir+'/'+outname[:outname.find('.fil.ydat')]+'.sub.ydat'
            ad = {
                'samfile': [os.path.basename(fname), md5_file(fname)],
                'buffile': [os.path.basename(bufname), md5_file(bufname)],
                'position' : dsam.get('inputposition', "unknown"),
                'q~unit' : dsam.get('q~unit', "unknown"),
                'I~unit' : dsam.get('I~unit', "unknown"),
                'Ierr~unit' : dsam.get('Ierr~unit', "unknown"),
                }
            if highqnorm:
                # 1 + 0.007 1/(g/l) is the excess of scattered intensity
                # in a protein sample versus buffer in the q-range
                # used [4.0, 5.0] 1/nm per concentration.
                scale = highq_scale(sam, buf)
                bufscale = scale * 1.0/(1.0 + 0.007*conc)
                print("scale: %g, bufscale: %g" % (scale, bufscale))
                buf[1,:] = bufscale * buf[1,:]
                buf[2,:] = bufscale * buf[2,:]
                ad['normalization'] = float(bufscale)
            else:
                ad['normalization'] = 'transmission'
            # Assumes the standard q, I, Ierr ordering in index 0 columns
            sub = errsubtract(sam, buf)
            sub[1:3,:] = sub[1:3,:] / conc
            write_ydat(sub, outname, addict=ad, attributes=['~unit'])
            print(os.path.basename(outname))
Exemplo n.º 4
0
def copy_if_needed(builder='build'):
    if os.path.isfile(env.input_file) is False:
        abort("[{0}]: Input file does not exist.".format(builder))
    elif os.path.isfile(env.output_file) is False:
        if not os.path.exists(os.path.dirname(env.output_file)):
            os.makedirs(os.path.dirname(env.output_file))
        shutil.copyfile(env.input_file, env.output_file)
        puts('[{0}]: created "{1}" which did not exist.'.format(builder, env.input_file))
    else:
        if md5_file(env.input_file) == md5_file(env.output_file):
            puts('[{0}]: "{1}" not changed.'.format(builder, env.input_file))
        else:
            shutil.copyfile(env.input_file, env.output_file)
            puts('[{0}]: "{1}" changed.'.format(builder, env.input_file))
Exemplo n.º 5
0
def copy_if_needed(builder='build'):
    if os.path.isfile(env.input_file) is False:
        abort("[{0}]: Input file does not exist.".format(builder))
    elif os.path.isfile(env.output_file) is False:
        if not os.path.exists(os.path.dirname(env.output_file)):
            os.makedirs(os.path.dirname(env.output_file))
        shutil.copyfile(env.input_file, env.output_file)
        puts('[{0}]: created "{1}" which did not exist.'.format(
            builder, env.input_file))
    else:
        if md5_file(env.input_file) == md5_file(env.output_file):
            puts('[{0}]: "{1}" not changed.'.format(builder, env.input_file))
        else:
            shutil.copyfile(env.input_file, env.output_file)
            puts('[{0}]: "{1}" changed.'.format(builder, env.input_file))
Exemplo n.º 6
0
    def fetch_egg(self, egg, force=False):
        """
        fetch an egg, i.e. copy or download the distribution into local dir
        force: force download or copy if MD5 mismatches
        """
        if not isdir(self.local_dir):
            os.makedirs(self.local_dir)
        info = self.remote.get_metadata(egg)
        path = self.path(egg)

        # if force is used, make sure the md5 is the expected, otherwise
        # merely see if the file exists
        if isfile(path):
            if force:
                if md5_file(path) == info.get('md5'):
                    if self.verbose:
                        print "Not refetching, %r MD5 match" % path
                    return
            else:
                if self.verbose:
                    print "Not forcing refetch, %r exists" % path
                return

        if not force and self.patch_egg(egg):
            return

        self.fetch(egg)
Exemplo n.º 7
0
    def fetch_egg(self, egg, force=False, execution_aborted=None):
        """
        fetch an egg, i.e. copy or download the distribution into local dir
        force: force download or copy if MD5 mismatches
        execution_aborted: a threading.Event object which signals when the execution
            needs to be aborted, or None, if we don't want to abort the fetching at all.
        """
        if not isdir(self.local_dir):
            os.makedirs(self.local_dir)
        info = self.remote.get_metadata(egg)
        path = self.path(egg)

        # if force is used, make sure the md5 is the expected, otherwise
        # merely see if the file exists
        if isfile(path):
            if force:
                if md5_file(path) == info.get('md5'):
                    if self.verbose:
                        print "Not refetching, %r MD5 match" % path
                    return
            else:
                if self.verbose:
                    print "Not forcing refetch, %r exists" % path
                return

        self.fetch(egg, execution_aborted)
Exemplo n.º 8
0
    def execute (self):
        op = self._build_operation ('download file', {'channel': self.channel, 'file': self.filename})
        f = self._get_url_handler (op)

        # Customize the HTTP handler
        def download_f_custom_read (self_f, *args):
            tmp = self_f.read_orig (*args)
            download_f_custom_read.received += len(tmp)

            if self.callback_step:
                self.callback_step (download_f_custom_read.received)

            return tmp

        f.read_orig = f.read
        f.read = types.MethodType (download_f_custom_read, f)
        download_f_custom_read.received = 0

        # Decrypt
        out_dir = os.path.join (self.download_dir, self.channel)
        if not os.path.exists (out_dir):
            os.makedirs (out_dir, 0700)

        out_fullpath = os.path.join (out_dir, self.filename)
        self.keys.decrypt_file_to_path (f, out_fullpath)

        # Set file attributes
        xattr.setxattr (out_fullpath, 'md5_time', str(time.time()))
        xattr.setxattr (out_fullpath, 'md5', utils.md5_file(out_fullpath))
Exemplo n.º 9
0
    def execute(self):
        op = self._build_operation('download file', {
            'channel': self.channel,
            'file': self.filename
        })
        f = self._get_url_handler(op)

        # Customize the HTTP handler
        def download_f_custom_read(self_f, *args):
            tmp = self_f.read_orig(*args)
            download_f_custom_read.received += len(tmp)

            if self.callback_step:
                self.callback_step(download_f_custom_read.received)

            return tmp

        f.read_orig = f.read
        f.read = types.MethodType(download_f_custom_read, f)
        download_f_custom_read.received = 0

        # Decrypt
        out_dir = os.path.join(self.download_dir, self.channel)
        if not os.path.exists(out_dir):
            os.makedirs(out_dir, 0700)

        out_fullpath = os.path.join(out_dir, self.filename)
        self.keys.decrypt_file_to_path(f, out_fullpath)

        # Set file attributes
        xattr.setxattr(out_fullpath, 'md5_time', str(time.time()))
        xattr.setxattr(out_fullpath, 'md5', utils.md5_file(out_fullpath))
Exemplo n.º 10
0
 def get_binstore_filename(self, filename):
     """ get the real filename of a given file in the binstore. """
     # Note: this function assumes that the filename is in the binstore. You
     # probably want to check that first.
     if os.path.islink(filename):
         # return os.readlink(filename)
         return os.path.realpath(filename)
     digest = utils.md5_file(filename)
     return os.path.join(self.localpath, digest)
Exemplo n.º 11
0
 def get_binstore_filename(self, filename):
     """ get the real filename of a given file in the binstore. """
     # Note: this function assumes that the filename is in the binstore. You
     # probably want to check that first.
     if os.path.islink(filename):
         # return os.readlink(filename)
         return os.path.realpath(filename)
     digest = utils.md5_file(filename)
     return os.path.join(self.localpath, digest)
Exemplo n.º 12
0
def download_to_cache(meta):
    if not isdir(SRC_CACHE):
        os.makedirs(SRC_CACHE)

    fn = meta['fn']
    md5 = meta.get('md5')
    path = join(SRC_CACHE, fn)
    if not isfile(path):
        download(meta['url'], path, md5)

    if md5 and not md5_file(path) == md5:
        raise Exception("MD5 mismatch: %r" % meta)
    return path
Exemplo n.º 13
0
def _copy_file_to_external_images(path):
    """copy file with unique name into extrenal images dir"""

    md5 = md5_file(path)
    ext = os.path.splitext(path)[1].lower()
    ext = ".jpg" if ".jpeg" == ext else ext

    new_file_name = f"{md5}{ext}"
    new_file_path = f"{images_out_dir}/{new_file_name}"

    if not os.path.isfile(new_file_path):
        shutil.copyfile(path, new_file_path)

    return new_file_name
Exemplo n.º 14
0
def write_filtered(filtered, first, aver, incmap, fname, inputfile=None, pos=-1):
    """Write an 'ydat' YAML file `fname` with filtered data and index array.

    `filtered` contains the filtered data, `incmap` the point by point inclusion
    array (bool matrix) of points used in the averaging and `first` the
    data set used in comparison for the filtering.
    """
    # FIXME: take a list of filenames which are filtered as an argument
    #        and write them to the file
    with open(fname, "w") as fp:
        indent = '  '
        fp.write('incmap: !!seq [\n' + indent)
        slist = incmap_to_strings(incmap.T)
        perrow = 1 + (80 / len(slist[0]))
        i = 0
        while i < perrow * ((len(slist)-1)/perrow): # until last row
            fp.write(slist[i])
            if (i+1) % perrow or not i:
                fp.write(', ')
            else:
                fp.write(',\n' + indent)
            i += 1
        while i < len(slist): # last row
            fp.write(slist[i])
            if i < len(slist)-1:
                fp.write(', ')
            else:
                fp.write(']\n')
            i += 1
        ad = {
            'method' : "filter_repetitions",
            'q~unit' : '1/nm',
            'I~unit' : 'arb.',
            'Ierr~unit' : 'arb.',
            'I_first~unit' : 'arb.',
            'Ierr_first~unit' : 'arb.',
            'I_all~unit' : 'arb.',
            'Ierr_all~unit' : 'arb.',
            }
        if inputfile:
            ad['inputfile'] = [ inputfile, md5_file(inputfile) ]
        if pos >= 0:
            ad['inputposition'] = int(pos)
        outarr = np.zeros((7, filtered.shape[1]))
        outarr[0:3,:] = filtered
        outarr[3:5,:] = first[1:3,:]
        outarr[5:7,:] = aver[1:3,:]
        cols = ['q', 'I', 'Ierr', 'I_first', 'Ierr_first', 'I_all', 'Ierr_all']
        write_ydat(outarr, fp, cols=cols, addict=ad, attributes=['~unit'])
Exemplo n.º 15
0
    def checkout(self, filenames):
        """ Revert local modifications to a list of files """
        printv("GitBin.checkout(%s)" % filenames)
        for filename in filenames:

            # if the filename is a directory, recurse into it.
            # TODO: maybe make recursive directory crawls optional/configurable
            if os.path.isdir(filename):
                printv("\trecursing into %s" % filename)
                for root, dirs, files in os.walk(filename):
                    # first checkout_dashdash all directories recursively
                    len(dirs) and self.checkout([os.path.join(root, dn) for dn in dirs])
                    # now checkout_dashdash all the files
                    len(files) and self.checkout([os.path.join(root, fn) for fn in files])
                continue

            status = self.gitrepo.status(filename)
            if (status & git.STATUS_STAGED_MASK) == git.STATUS_STAGED:
                # staged, skip it.
                print "you probably meant to do: git bin reset %s" % filename
                continue

            if not (status & git.STATUS_CHANGED_MASK):
                # the file hasn't changed, skip it.
                continue

            # The first two cases can just be passed through to regular git
            # checkout --.
            # {1} (GBAdded[MSs] -> Reset[MS])
            # {2} (GBEdit[TF])
            # In the third case, there is some local modification that we should
            # save 'just in case' first.
            # {3} (GBEdit[TF] -> Modified[TF]) (*)

            if (status & git.STATUS_TYPECHANGED) and not self.binstore.has(filename):
                justincase_filename = os.path.join(
                    "/tmp",
                    "%s.%s.justincase" % (filename,
                                          utils.md5_file(filename)))
                commands = cmd.CompoundCommand(
                    cmd.CopyFileCommand(
                        self.binstore.get_binstore_filename(filename),
                        filename,
                        justincase_filename),
                )
                commands.execute()

            self.gitrepo.restore(filename)
Exemplo n.º 16
0
    def checkout(self, filenames):
        """ Revert local modifications to a list of files """
        printv("GitBin.checkout(%s)" % filenames)
        for filename in filenames:

            # if the filename is a directory, recurse into it.
            # TODO: maybe make recursive directory crawls optional/configurable
            if os.path.isdir(filename):
                printv("\trecursing into %s" % filename)
                for root, dirs, files in os.walk(filename):
                    # first checkout_dashdash all directories recursively
                    len(dirs) and self.checkout(
                        [os.path.join(root, dn) for dn in dirs])
                    # now checkout_dashdash all the files
                    len(files) and self.checkout(
                        [os.path.join(root, fn) for fn in files])
                continue

            status = self.gitrepo.status(filename)
            if (status & git.STATUS_STAGED_MASK) == git.STATUS_STAGED:
                # staged, skip it.
                print "you probably meant to do: git bin reset %s" % filename
                continue

            if not (status & git.STATUS_CHANGED_MASK):
                # the file hasn't changed, skip it.
                continue

            # The first two cases can just be passed through to regular git
            # checkout --.
            # {1} (GBAdded[MSs] -> Reset[MS])
            # {2} (GBEdit[TF])
            # In the third case, there is some local modification that we should
            # save 'just in case' first.
            # {3} (GBEdit[TF] -> Modified[TF]) (*)

            if (status & git.STATUS_TYPECHANGED
                ) and not self.binstore.has(filename):
                justincase_filename = os.path.join(
                    "/tmp",
                    "%s.%s.justincase" % (filename, utils.md5_file(filename)))
                commands = cmd.CompoundCommand(
                    cmd.CopyFileCommand(
                        self.binstore.get_binstore_filename(filename),
                        filename, justincase_filename), )
                commands.execute()

            self.gitrepo.restore(filename)
Exemplo n.º 17
0
def diff(src_path, dst_path, patch_path):
    x = zipfile.ZipFile(src_path)
    y = zipfile.ZipFile(dst_path)
    z = zipfile.ZipFile(patch_path, 'w', zipfile.ZIP_STORED)

    xnames = set(x.namelist())
    ynames = set(y.namelist())

    count = 0
    for name in xnames | ynames:
        xdata = x.read(name) if name in xnames else None
        ydata = y.read(name) if name in ynames else None
        if xdata == ydata:
            continue

        if ydata is not None:
            bz2_data = bz2.compress(ydata) # startswith BZ

        if xdata is not None and ydata is not None:
            diff_data = bsdiff4.diff(xdata, ydata)
            if len(diff_data) < len(bz2_data):
                zdata = diff_data # startswith BSDIFF4
            else:
                zdata = bz2_data
        elif xdata is not None and ydata is None:
            zdata = 'RM'
        elif ydata is not None and xdata is None:
            zdata = bz2_data
        else:
            raise Exception("Hmm, didn't expect to get here.")

        #print zdata[:2], name
        z.writestr(name, zdata)
        count += 1

    info = {}
    for path, pre in (src_path, 'src'), (dst_path, 'dst'):
        info.update({pre: basename(path),
                     pre + '_size': getsize(path),
                     pre + '_mtime': getmtime(path),
                     pre + '_md5': md5_file(path)})
    z.writestr('__zdiff_info__.json',
               json.dumps(info, indent=2, sort_keys=True))
    z.close()
    y.close()
    x.close()
    return count
Exemplo n.º 18
0
def diff(src_path, dst_path, patch_path):
    x = zipfile.ZipFile(src_path)
    y = zipfile.ZipFile(dst_path)
    z = zipfile.ZipFile(patch_path, 'w', zipfile.ZIP_STORED)

    xnames = set(x.namelist())
    ynames = set(y.namelist())

    count = 0
    for name in xnames | ynames:
        xdata = x.read(name) if name in xnames else None
        ydata = y.read(name) if name in ynames else None
        if xdata == ydata:
            continue

        if ydata is not None:
            bz2_data = bz2.compress(ydata) # startswith BZ

        if xdata is not None and ydata is not None:
            diff_data = bsdiff4.diff(xdata, ydata)
            if len(diff_data) < len(bz2_data):
                zdata = diff_data # startswith BSDIFF4
            else:
                zdata = bz2_data
        elif xdata is not None and ydata is None:
            zdata = 'RM'
        elif ydata is not None and xdata is None:
            zdata = bz2_data
        else:
            raise Exception("Hmm, didn't expect to get here.")

        #print zdata[:2], name
        z.writestr(name, zdata)
        count += 1

    info = {}
    for path, pre in (src_path, 'src'), (dst_path, 'dst'):
        info.update({pre: basename(path),
                     pre + '_size': getsize(path),
                     pre + '_mtime': getmtime(path),
                     pre + '_md5': md5_file(path)})
    z.writestr('__zdiff_info__.json',
               json.dumps(info, indent=2, sort_keys=True))
    z.close()
    y.close()
    x.close()
    return count
Exemplo n.º 19
0
def filter_matfile(fname, outstem, p_reject=0.001, plot=1):
    stack = read_mat(fname)
    md5 = md5_file(fname)
    print("Rejection probability: %0.3g" % p_reject)
    N = np.sum(np.logical_not(np.isnan(stack[0,0,1,:])))
    print("Number of valid channels: %d" % N)
    threshold = chi2.ppf(1.0 - p_reject, N) / N
    print("Chisq rejection threshold: %0.3g" % threshold)

    for pos in range(stack.shape[0]):
        reps = stack[pos,...]
        incinds, cdm = filter_outliers(reps, threshold=threshold, plot=plot)
        ms = mean_stack(reps[incinds,...])
        disinds = range(reps.shape[0])
        for i in incinds:
            disinds.remove(i)
        print("Pos %d, discarded: %s" % (pos, str(disinds)))
        ad = { 'chi2cutoff' : float(threshold),
            'rejection_prob' : float(p_reject),
            'incinds' : map(int, list(incinds)),
            'disinds' : map(int, list(disinds)),
            'chi2matrix' : map(float, list(cdm)),
            'method' : "filter_outliers",
            'inputfile' : [ fname, md5 ],
            'inputposition' : int(pos),
            'q~unit' : '1/nm',
            'I~unit' : 'arb.',
            'Ierr~unit' : 'arb.',
            'I_first~unit' : 'arb.',
            'Ierr_first~unit' : 'arb.',
            'I_all~unit' : 'arb.',
            'Ierr_all~unit' : 'arb.',
            }
        outarr = np.zeros((7, ms.shape[1]))
        outarr[0:3,:] = ms
        outarr[3:5,:] = reps[0,1:3,:]
        outarr[5:7,:] = mean_stack(reps)[1:3,:]

        outname = "%s.p%02d.out.ydat" % (outstem, pos)
        print(outname)
        write_ydat(outarr, outname, addict=ad,
            cols=['q','I','Ierr','I_first','Ierr_first','I_all','Ierr_all'],
            attributes=['~unit'])
Exemplo n.º 20
0
def force_linked_actions(dists, index, prefix):
    actions = defaultdict(list)
    actions[PREFIX] = prefix
    actions['op_order'] = (RM_FETCHED, FETCH, RM_EXTRACTED, EXTRACT,
                           UNLINK, LINK)
    for dist in dists:
        fn = dist + '.tar.bz2'
        pkg_path = join(config.pkgs_dir, fn)
        if isfile(pkg_path):
            if md5_file(pkg_path) != index[fn]['md5']:
                actions[RM_FETCHED].append(dist)
                actions[FETCH].append(dist)
        else:
            actions[FETCH].append(dist)
        actions[RM_EXTRACTED].append(dist)
        actions[EXTRACT].append(dist)
        if isfile(join(prefix, 'conda-meta', dist + '.json')):
            actions[UNLINK].append(dist)
        actions[LINK].append(dist)
    return actions