Exemplo n.º 1
0
	def remove_tmp_file(self):
		'''
		removes the temporary file
		'''
		if (db_check_dict(self.__tmp_db_name)):
			db_remove_dict(self.__tmp_db_name)
		else:
			pass
Exemplo n.º 2
0
    def remove_tmp_file(self):
        '''
		removes the temporary file
		'''
        if (db_check_dict(self.__tmp_db_name)):
            db_remove_dict(self.__tmp_db_name)
        else:
            pass
Exemplo n.º 3
0
def main():
    global debug
    progname = os.path.basename(sys.argv[0])
    usage = """prog [options] <path or db> ...
	
Various utilities related to BDB databases.

examples :
e2bdb.py -c   Is perhaps the most critical function, as it cleans up the database cache. See the Wiki for more.
e2bdb.py <path> -s    will list the contents of the database in a directory in bdb: notation
e2bdb.py <path> -l    Will give useful summary info about stacks in a directory
e2bdb.py <database> --dump    Gives a mechanism to dump all of the metadata in a database, even if the database contains no images
"""

    parser = EMArgumentParser(usage=usage, version=EMANVERSION)

    parser.add_argument(
        "--cleanup",
        "-c",
        action="store_true",
        default=False,
        help=
        "This option will clean up the database cache so files can safely be moved or accessed on another computer via NFS."
    )
    parser.add_argument(
        "--force",
        "-F",
        action="store_true",
        default=False,
        help=
        "This will force an action that would normally fail due to failed checks."
    )
    parser.add_argument(
        "--delete",
        action="store_true",
        default=False,
        help="This will delete (or at least empty) the named database(s)")
    parser.add_argument("--all",
                        "-a",
                        action="store_true",
                        help="List per-particle info",
                        default=False)
    parser.add_argument("--long",
                        "-l",
                        action="store_true",
                        help="Long listing",
                        default=False)
    parser.add_argument("--short",
                        "-s",
                        action="store_true",
                        help="Dense listing of names only",
                        default=False)
    parser.add_argument(
        "--filt",
        type=str,
        help="Only include dictionary names containing the specified string",
        default=None)
    parser.add_argument(
        "--filtexclude",
        type=str,
        help="Exclude dictionary names containing the specified string",
        default=None)
    parser.add_argument(
        "--match",
        type=str,
        help=
        "Only include dictionaries matching the provided Python regular expression",
        default=None)
    parser.add_argument(
        "--exclude",
        type=str,
        help="The name of a database containing a list of exclusion keys",
        default=None)
    parser.add_argument(
        "--dump",
        "-D",
        action="store_true",
        help=
        "List contents of an entire database, eg 'e2bdb.py -D refine_01#register",
        default=False)
    parser.add_argument(
        "--smalldump",
        action="store_true",
        help=
        "Lists contents of an entire database, but only list 2 items per dictionary to better see headers",
        default=False)
    parser.add_argument(
        "--extractplots",
        action="store_true",
        help=
        "If a database contains sets of plots, such as bdb:refine_xx#convergence.results, this will extract the plots as text files."
    )
    parser.add_argument(
        "--check",
        action="store_true",
        help=
        "Check for self-consistency and errors in the structure of specified databases",
        default=False)
    parser.add_argument("--nocache",
                        action="store_true",
                        help="Don't use the database cache for this operation",
                        default=False)
    parser.add_argument(
        "--merge",
        action="store_true",
        help=
        "This will merge the contents of BDB 2-N into BDB 1 (including BDB 1's contents)",
        default=False)

    parser.add_argument(
        "--makevstack",
        type=str,
        help=
        "Creates a 'virtual' BDB stack with its own metadata, but the binary data taken from the (filtered) list of stacks",
        default=None)
    parser.add_argument(
        "--appendvstack",
        type=str,
        help=
        "Appends to/creates a 'virtual' BDB stack with its own metadata, but the binary data taken from the (filtered) list of stacks",
        default=None)
    parser.add_argument(
        "--verbose",
        "-v",
        dest="verbose",
        action="store",
        metavar="n",
        type=int,
        default=0,
        help=
        "verbose level [0-9], higher number means higher level of verboseness")
    parser.add_argument(
        "--list",
        type=str,
        help=
        "Specify the name of a file with a list of images to use in creation of virtual stacks. Please see source for details.",
        default=None)
    parser.add_argument(
        "--exlist",
        type=str,
        help=
        "Specify the name of a file with a list of images to exclude in creation of virtual stacks. Please see source for details.",
        default=None)
    parser.add_argument(
        "--restore",
        type=str,
        help=
        "Write changes in the derived virtual stack back to the original stack",
        default=None)
    parser.add_argument(
        "--ppid",
        type=int,
        help="Set the PID of the parent process, used for cross platform PPID",
        default=-1)
    parser.add_argument(
        "--checkctf",
        action="store_true",
        help=
        "Verfies that all images in the file contain CTF information, and gives some basic statistics",
        default=False)

    parser.add_argument(
        "--step",
        type=str,
        default="0,1",
        help=
        "Specify <init>,<step>[,<max>]. Processes only a subset of the input data. For example, 0,2 would process only the even numbered particles"
    )
    (options, args) = parser.parse_args()

    if options.nocache: EMAN2db.BDB_CACHE_DISABLE = True

    if options.cleanup:
        db_cleanup(options.force)
        sys.exit(0)

    try:
        options.step = int(options.step.split(",")[0]), int(
            options.step.split(",")[1]), int(
                options.step.split(",")[2])  # convert strings to tuple
    except:
        try:
            options.step = int(options.step.split(",")[0]), int(
                options.step.split(",")[1])
        except:
            print("Invalid --step specification")
            sys.exit(1)

    if options.all: options.long = 1
    if len(args) == 0: args.append("bdb:.")

    logid = 0
    if options.makevstack:
        logid = E2init(sys.argv)
        vstack = db_open_dict(options.makevstack)
        vstackn = 0
    elif options.appendvstack:
        logid = E2init(sys.argv)
        vstack = db_open_dict(options.appendvstack)
        vstackn = len(vstack)
    else:
        vstack = None

    if options.merge:
        print("WARNING: Merge mode\nCombining contents of: ",
              ", ".join(args[1:]))
        print("into ", args[0])

        if input("Proceed (y/n) :").lower() != "y":
            print("Aborting")
            sys.exit(1)

        for i, path in enumerate(args):
            if path.lower()[:4] == "bdb:" and not "#" in path:
                path = "bdb:.#" + path[4:]
            if path.lower()[:4] != "bdb:": path = "bdb:" + path

            if i == 0:
                outdb = db_open_dict(path)
                continue

            indb = db_open_dict(path, True)
            for k in list(indb.keys()):
                outdb[k] = indb[k]

        print("Merging complete")
        sys.exit(0)

    for path in args:
        if path.lower()[:4] == "bdb:" and not "#" in path:
            uu = os.path.split(path)
            if (uu[0] == ''): path = "bdb:.#" + path[4:]
            else: path = uu[0] + "#" + uu[1]
        if path.lower()[:4] != "bdb:": path = "bdb:" + path
        if '#' in path:
            if len(args) > 1: print("\n", path, ":")
            path, dbs = path.rsplit("#", 1)
            path += "#"
            dbs = [dbs]
        else:
            if not '#' in path and path[-1] != '/': path += '#'
            if len(args) > 1: print("\n", path[:-1], ":")
            dbs = db_list_dicts(path)

        dbs.sort()
        if options.filt:
            dbs = [db for db in dbs if options.filt in db]

        if options.filtexclude:
            dbs = [db for db in dbs if options.filtexclude not in db]

        if options.match != None:
            dbs = [db for db in dbs if re.match(options.match, db)]

        if options.list:
            if options.makevstack == None and options.appendvstack == None:
                print(
                    "ERROR, this option is used for virtual stack creation, please add makevstack or appendvstack options, and restart"
                )
                sys.exit(1)
            vdata = open(options.list, 'r').readlines()
            n = len(vdata[0].split())
            slist = []
            for line in vdata:
                line = line.split()
                for i in range(n):
                    val = int(line[i])
                    slist.append(val)
            del n, val, vdata

        if options.exlist:
            if options.makevstack == None:
                print(
                    "ERROR, this option is used for virtual stack creation, please add makevstack or appendvstack options, and restart"
                )
                sys.exit(1)
            vdata = open(options.exlist, 'r').readlines()
            n = len(vdata[0].split())
            slist = []
            for line in vdata:
                line = line.split()
                for i in range(n):
                    val = int(line[i])
                    slist.append(val)
            n = EMUtil.get_image_count(args[0])
            good = set(range(n)) - set(slist)
            slist = [i for i in good]
            slist.sort()
            del n, val, vdata, good

        if options.makevstack != None or options.appendvstack != None:

            vspath = os.path.realpath(vstack.path) + "/"
            if options.verbose > 2: print("vspath: ", vspath)
            for db in dbs:
                dct, keys = db_open_dict(path + db, ro=True, with_keys=True)
                if dct == vstack: continue
                if len(options.step) == 2:
                    if keys == None:
                        vals = list(
                            range(options.step[0], len(dct), options.step[1]))
                    else:
                        vals = keys[options.step[0]::options.step[
                            1]]  # we apply --step even if we have a list of keys
                else:
                    if keys == None:
                        vals = list(
                            range(options.step[0], options.step[2],
                                  options.step[1]))
                    else:
                        vals = keys[options.step[0]:options.step[2]:options.step[
                            1]]  # we apply --step even if we have a list of keys

                if options.list != None or options.exlist != None: vals = slist
                for n in vals:
                    try:
                        d = dct.get(n, nodata=1).get_attr_dict()
                    except:
                        traceback.print_exc()
                        print("---\nerror reading ", db, n)
                        continue
                    # This block converts an absolute path to the actual data to a relative path
                    try:
                        dpath = os.path.realpath(dct.get_data_path(n))
                        if options.verbose > 2: print("dpath: ", dpath)
                        if os.name == 'nt':
                            vspath = vspath.replace("\\", '/')
                            dpath = dpath.replace('\\', '/')
                        rpath = makerelpath(vspath, dpath)
                        if options.verbose > 2: print("rpath: ", rpath)
                    except:
                        print("error with data_path ", db, n)
                        continue
                    d["data_path"] = rpath
                    d["data_n"] = n
                    d["data_source"] = path + db
                    if d["data_path"] == None:
                        print("error with data_path ", db, n)
                        continue
                    vstack[vstackn] = d
                    vstackn += 1
                    if vstackn % 100 == 0:
                        try:
                            print("\r  ", vstackn, "     ", end=' ')
                            sys.stdout.flush()
                        except:
                            pass
                print("\r  ", vstackn, "     ")
                dct.close()

        try:
            maxname = max([len(s) for s in dbs])
        except:
            print("Error reading ", path)

        if options.restore:
            nima = EMUtil.get_image_count(options.restore)
            IB = db_open_dict(options.restore)
            source_old = None
            if len(options.step) == 3: nima = min(options.step[2], nima)
            for i in range(options.step[0], nima, options.step[1]):
                source = IB.get_header(i)
                source_path = source["source_path"]
                ID = source["source_n"]
                if (source_old != source_path):
                    if (source_old != None): DB.close()
                    DB = db_open_dict(source_path, ro=True)
                    source_old = source_path
                target = DB.get_header(ID)
                try:
                    source["data_path"] = target["data_path"]
                    source["data_n"] = target["data_n"]
                    source["source_path"] = target["source_path"]
                    source["source_n"] = target["source_n"]
                except:
                    #  top level does not have data_path
                    del source['data_path']
                    del source['data_n']
                    source["source_path"] = target["source_path"]
                    source["source_n"] = target["source_n"]
                DB.set_header(ID, source)
            DB.close()

        if options.extractplots:
            for db in dbs:
                print("####  Extracting plots from ", db)
                dct = db_open_dict(path + db, ro=True)

                #### Dump
                keys = list(dct.keys())
                keys.sort()
                for k in keys:
                    v = dct[k]
                    try:
                        ns = [len(i) for i in v]
                        fsp = db + "-" + k + ".txt"
                        print("%s  (%d columns)" % (fsp, len(ns)))
                        out = open(fsp, "w")
                        for i in range(ns[0]):
                            for j in range(len(ns)):
                                out.write(str(v[j][i]))
                                if j < len(ns) - 1: out.write("\t")
                            out.write("\n")
                        out.close()
                    except:
                        continue
                dct.close()

        if options.smalldump:
            for db in dbs:
                print("##### ", db)
                dct = db_open_dict(path + db, ro=True)

                #### Dump
                keys = list(dct.keys())
                keys.sort()
                if len(options.step) == 3: keys = keys[:options.step[2]]
                for k in keys[options.step[0]::options.step[1]]:
                    v = dct[k]
                    print("%s : " % k, end=' ')
                    if isinstance(v, list) or isinstance(v, tuple):
                        for i in v:
                            print("\n\t%s" % str(i), end=' ')
                        print("")
                    elif isinstance(v, dict):
                        ks2 = list(v.keys())
                        ks2.sort()
                        kc = 0
                        for i in ks2:
                            if kc >= 2:
                                print("...")
                                break
                            print("\n\t%s : %s" % (i, v[i]), end=' ')
                            kc += 1
                        print("")
                    else:
                        print(str(v))
                dct.close()
        if options.checkctf:
            for db in dbs:
                print("##### CTF -> ", db)
                dct = db_open_dict(path + db, ro=True)
                keys = list(dct.keys())
                if len(options.step) == 3: keys = keys[:options.step[2]]
                defocus = set()
                for k in keys[options.step[0]::options.step[1]]:
                    v = dct.get_header(k)
                    try:
                        ctf = v["ctf"]
                    except:
                        if k != "maxrec": print("CTF missing on image %s" % k)
                        continue

                    defocus.add(ctf.defocus)

                defocus = list(defocus)
                print("Defocuses found: ", end=' ')
                for i in defocus:
                    print("%1.3f, " % i, end=' ')
                print("\n\nRange: %1.3f - %1.3f  (%d unique values)" %
                      (min(defocus), max(defocus), len(defocus)))

        if options.dump:
            for db in dbs:
                print("##### ", db)
                dct = db_open_dict(path + db, ro=True)

                #### Dump
                keys = list(dct.keys())
                if len(options.step) == 3: keys = keys[:options.step[2]]
                keys.sort()
                for k in keys[options.step[0]::options.step[1]]:
                    v = dct[k]
                    print("%s : " % k, end=' ')
                    if isinstance(v, list) or isinstance(v, tuple):
                        for i in v:
                            print("\n\t%s" % str(i), end=' ')
                        print("")
                    elif isinstance(v, dict):
                        ks2 = list(v.keys())
                        ks2.sort()
                        for i in ks2:
                            print("\n\t%s : %s" % (i, v[i]), end=' ')
                        print("")
                    else:
                        print(str(v))
                dct.close()

        # long listing, one db per line
        elif options.long:
            width = maxname + 3
            fmt = "%%-%ds %%-07d %%14s  %%s" % width
            fmt2 = "%%-%ds (not an image stack)" % width
            total = [0, 0]
            for db in dbs:
                dct = db_open_dict(path + db, True)

                ### Info on all particles
                if options.all:
                    mx = len(dct)
                    if len(options.step) == 3: mx = min(mx, options.step[2])
                    for i in range(options.step[0], mx, options.step[1]):
                        try:
                            im = dct[i]
                            if im == None: raise Exception
                        except:
                            continue
                        print(
                            "%d. %d x %d x %d\tA/pix=%1.2f\tM=%1.4f\tS=%1.4f\tSk=%1.4f"
                            % (i, im["nx"], im["ny"], im["nz"], im["apix_x"],
                               im["mean"], im["sigma"], im["skewness"]),
                            end=' ')
                        try:
                            print("\t%s" % str(im["model_id"]))
                        except:
                            pass
                        try:
                            print("\tdf=%1.3f\tB=%1.1f" %
                                  (im["ctf"].defocus, im["ctf"].bfactor))
                        except:
                            print(" ")

                first = EMData()
                try:
                    first.read_image(path + db, 0, True)
                    size = first.get_xsize() * first.get_ysize(
                    ) * first.get_zsize() * len(dct) * 4
                    total[0] += len(dct)
                    total[1] += size
                    print(fmt % (db, len(dct), "%dx%dx%d   apix: %1.2f" %
                                 (first.get_xsize(), first.get_ysize(),
                                  first.get_zsize(), first["apix_x"]),
                                 human_size(size)),
                          end=' ')
                except:
                    print(fmt2 % db)
                try:
                    print("\tdf: %1.3f\tB: %1.0f" %
                          (first["ctf"].defocus, first["ctf"].bfactor))
                except:
                    print("")
                dct.close()
            print(fmt % ("TOTAL", total[0], "", human_size(total[1])))
        elif options.check:
            from pickle import loads
            for db in dbs:
                dct = db_open_dict(path + db, ro=True)
                dct.realopen()
                keys = list(dct.bdb.keys())
                allkvp = {}
                for k in keys:
                    s1, s2 = k.split("\x80", 1)  # start of a pickled string.
                    s2 = loads("\x80" + s2)  # the pickled part
                    if len(
                            s1
                    ) > 0:  # If anything unpickled, then it is an axbxc prefix identifying the location of a binary
                        st = allkvp.setdefault(
                            s1, set())  # set of all positions seen so far
                        v = loads(dct.bdb.get(k))  # position in binary file
                        if v in st:
                            print(
                                "Error: value %d seen multiple times in %s (%s,%s)"
                                % (v, db, s1, s2))
                        st.add(v)
                print("%s : " % db, end=' ')
                for i in list(allkvp.keys()):
                    if options.verbose > 0:
                        print("%s %d/%d\t" %
                              (i, len(allkvp[i]), int(max(allkvp[i])) + 1),
                              end=' ')
                    if len(allkvp[i]) != int(max(allkvp[i]) + 1):
                        print(
                            "\nMismatch found in %s. Could be normal if file has been rewritten multiple times, but is unusual"
                            % db)
                if options.verbose > 0: print("")
                else: print(" done")
                dct.close()

        elif options.short:
            for db in dbs:
                print(path + db, end=' ')
            print(" ")

        elif not options.makevstack and not options.appendvstack:
            # Nicely formatted 'ls' style display
            cols = int(floor(old_div(80.0, (maxname + 3))))
            width = old_div(80, cols)
            rows = int(ceil(old_div(float(len(dbs)), cols)))

            fmt = "%%-%ds" % width
            for r in range(rows):
                for c in range(cols):
                    try:
                        print(fmt % dbs[r + c * rows], end=' ')
                    except:
                        pass
                print(" ")

        if options.delete:
            if not options.force:
                print("You are requesting to delete the following databases:")
                for db in dbs:
                    print(db, " ", end=' ')
                if input("\nAre you sure (y/n) ? ")[0].lower() != 'y':
                    print("Aborted")
                    sys.exit(1)

            for db in dbs:
                db_remove_dict(path + db)

    if logid: E2end(logid)
Exemplo n.º 4
0
			for r in range(rows):
				for c in range(cols):
					try: print fmt%dbs[r+c*rows],
					except: pass
				print " "

		if options.delete :
			if not options.force :
				print "You are requesting to delete the following databases:"
				for db in dbs:
					print db," ",
				if raw_input("\nAre you sure (y/n) ? ")[0].lower()!='y' :
					print "Aborted"
					sys.exit(1)
			
			for db in dbs: db_remove_dict(path+db)
			

	if logid : E2end(logid)

def makerelpath(p1,p2):
	"""Takes a pair of paths /a/b/c/d and /a/b/e/f/g and returns a relative path to b from a, ../../e/f/g"""
	
	p1s=[i for i in p1.split("/") if len(i)>0]
	p2s=[i for i in p2.split("/") if len(i)>0]

	for dv in range(min(len(p1s),len(p2s))):
		if p1s[dv]!=p2s[dv] : break
	else: dv+=1

	p1s=p1s[dv:]
Exemplo n.º 5
0
                        print fmt % dbs[r + c * rows],
                    except:
                        pass
                print " "

        if options.delete:
            if not options.force:
                print "You are requesting to delete the following databases:"
                for db in dbs:
                    print db, " ",
                if raw_input("\nAre you sure (y/n) ? ")[0].lower() != 'y':
                    print "Aborted"
                    sys.exit(1)

            for db in dbs:
                db_remove_dict(path + db)

    if logid: E2end(logid)


def makerelpath(p1, p2):
    """Takes a pair of paths /a/b/c/d and /a/b/e/f/g and returns a relative path to b from a, ../../e/f/g"""

    p1s = [i for i in p1.split("/") if len(i) > 0]
    p2s = [i for i in p2.split("/") if len(i) > 0]

    for dv in range(min(len(p1s), len(p2s))):
        if p1s[dv] != p2s[dv]: break
    else: dv += 1

    p1s = p1s[dv:]