Exemplo n.º 1
0
    def restore(self, log=False):
        """Restore a database from an existing root."""
        docdirs = os.listdir(self.root)
        docdirs.sort()
        for ddir in docdirs:
            if ddir == '.xapers':
                continue
            docdir = os.path.join(self.root, ddir)
            if not os.path.isdir(docdir):
                # skip things that aren't directories
                continue

            if log:
                print >>sys.stderr, docdir

            # if we can't convert the directory name into an integer,
            # assume it's not relevant to us and continue
            try:
                docid = int(ddir)
            except ValueError:
                continue

            docfiles = os.listdir(docdir)
            if not docfiles:
                # skip empty directories
                continue

            if log:
                print >>sys.stderr, '  docid:', docid

            try:
                doc = self[docid]
            except xapian.DocNotFoundError:
                doc = Document(self, docid=docid)

            for dfile in docfiles:
                dpath = os.path.join(docdir, dfile)
                if dfile == 'bibtex':
                    if log:
                        print >>sys.stderr, '  adding bibtex'
                    doc.add_bibtex(dpath)
                elif os.path.splitext(dpath)[1] == '.pdf':
                    if log:
                        print >>sys.stderr, '  adding file:', dfile
                    doc.add_file(dpath)
                elif dfile == 'tags':
                    if log:
                        print >>sys.stderr, '  adding tags'
                    with open(dpath, 'r') as f:
                        tags = f.read().strip().split('\n')
                    doc.add_tags(tags)
            doc.sync()
Exemplo n.º 2
0
    def restore(self, log=False):
        """Restore a database from an existing root."""
        docdirs = os.listdir(self.root)
        docdirs.sort()
        for ddir in docdirs:
            if ddir == '.xapers':
                continue
            docdir = os.path.join(self.root, ddir)
            if not os.path.isdir(docdir):
                # skip things that aren't directories
                continue

            if log:
                print >> sys.stderr, docdir

            # if we can't convert the directory name into an integer,
            # assume it's not relevant to us and continue
            try:
                docid = int(ddir)
            except ValueError:
                continue

            docfiles = os.listdir(docdir)
            if not docfiles:
                # skip empty directories
                continue

            if log:
                print >> sys.stderr, '  docid:', docid

            try:
                doc = self[docid]
            except xapian.DocNotFoundError:
                doc = Document(self, docid=docid)

            for dfile in docfiles:
                dpath = os.path.join(docdir, dfile)
                if dfile == 'bibtex':
                    if log:
                        print >> sys.stderr, '  adding bibtex'
                    doc.add_bibtex(dpath)
                elif os.path.splitext(dpath)[1] == '.pdf':
                    if log:
                        print >> sys.stderr, '  adding file:', dfile
                    doc.add_file(dpath)
                elif dfile == 'tags':
                    if log:
                        print >> sys.stderr, '  adding tags'
                    with open(dpath, 'r') as f:
                        tags = f.read().strip().split('\n')
                    doc.add_tags(tags)
            doc.sync()
Exemplo n.º 3
0
def add(db, query_string, infile=None, sid=None, tags=None, prompt=False):

    doc = None
    bibtex = None

    sources = Sources()
    doc_sid = sid
    source = None
    file_data = None

    if infile and infile is not True:
        infile = os.path.expanduser(infile)

    ##################################
    # if query provided, find single doc to update

    if query_string:
        if db.count(query_string) != 1:
            print >> sys.stderr, "Search '%s' did not match a single document." % query_string
            print >> sys.stderr, "Aborting."
            sys.exit(1)

        for doc in db.search(query_string):
            break

    ##################################
    # do fancy option prompting

    if prompt:
        doc_sids = []
        if doc_sid:
            doc_sids = [doc_sid]
        # scan the file for source info
        if infile is not True:
            infile = prompt_for_file(infile)

            print >> sys.stderr, "Scanning document for source identifiers..."
            try:
                ss = sources.scan_file(infile)
            except ParseError as e:
                print >> sys.stderr, "\n"
                print >> sys.stderr, "Parse error: %s" % e
                sys.exit(1)
            if len(ss) == 0:
                print >> sys.stderr, "0 source ids found."
            else:
                if len(ss) == 1:
                    print >> sys.stderr, "1 source id found:"
                else:
                    print >> sys.stderr, "%d source ids found:" % (len(ss))
                for sid in ss:
                    print >> sys.stderr, "  %s" % (sid)
                doc_sids += [s.sid for s in ss]
        doc_sid = prompt_for_source(db, doc_sids)
        tags = prompt_for_tags(db, tags)

    if not query_string and not infile and not doc_sid:
        print >> sys.stderr, "Must specify file or source to import, or query to update existing document."
        sys.exit(1)

    ##################################
    # process source and get bibtex

    # check if source is a file, in which case interpret it as bibtex
    if doc_sid and os.path.exists(doc_sid):
        bibtex = doc_sid

    elif doc_sid:
        # get source object for sid string
        try:
            source = sources.match_source(doc_sid)
        except SourceError as e:
            print >> sys.stderr, e
            sys.exit(1)

        # check that the source doesn't match an existing doc
        sdoc = db.doc_for_source(source.sid)
        if sdoc:
            if doc and sdoc != doc:
                print >> sys.stderr, "A different document already exists for source '%s'." % (
                    doc_sid)
                print >> sys.stderr, "Aborting."
                sys.exit(1)
            print >> sys.stderr, "Source '%s' found in database.  Updating existing document..." % (
                doc_sid)
            doc = sdoc

        try:
            print >> sys.stderr, "Retrieving bibtex...",
            bibtex = source.fetch_bibtex()
            print >> sys.stderr, "done."
        except SourceError as e:
            print >> sys.stderr, "\n"
            print >> sys.stderr, "Could not retrieve bibtex: %s" % e
            sys.exit(1)

        if infile is True:
            try:
                print >> sys.stderr, "Retrieving file...",
                file_name, file_data = source.fetch_file()
                print >> sys.stderr, "done."
            except SourceError as e:
                print >> sys.stderr, "\n"
                print >> sys.stderr, "Could not retrieve file: %s" % e
                sys.exit(1)

    elif infile is True:
        print >> sys.stderr, "Must specify source with retrieve file option."
        sys.exit(1)

    if infile and not file_data:
        with open(infile, 'r') as f:
            file_data = f.read()
        file_name = os.path.basename(infile)

    ##################################

    # if we still don't have a doc, create a new one
    if not doc:
        doc = Document(db)

    ##################################
    # add stuff to the doc

    if bibtex:
        try:
            print >> sys.stderr, "Adding bibtex...",
            doc.add_bibtex(bibtex)
            print >> sys.stderr, "done."
        except BibtexError as e:
            print >> sys.stderr, "\n"
            print >> sys.stderr, e
            print >> sys.stderr, "Bibtex must be a plain text file with a single bibtex entry."
            sys.exit(1)
        except:
            print >> sys.stderr, "\n"
            raise

    # add source sid if it hasn't been added yet
    if source and not doc.get_sids():
        doc.add_sid(source.sid)

    if infile:
        try:
            print >> sys.stderr, "Adding file...",
            doc.add_file_data(file_name, file_data)
            print >> sys.stderr, "done."
        except ParseError as e:
            print >> sys.stderr, "\n"
            print >> sys.stderr, "Parse error: %s" % e
            sys.exit(1)
        except:
            print >> sys.stderr, "\n"
            raise

    if tags:
        try:
            print >> sys.stderr, "Adding tags...",
            doc.add_tags(tags)
            print >> sys.stderr, "done."
        except:
            print >> sys.stderr, "\n"
            raise

    ##################################
    # sync the doc to db and disk

    try:
        print >> sys.stderr, "Syncing document...",
        doc.sync()
        print >> sys.stderr, "done.\n",
    except:
        print >> sys.stderr, "\n"
        raise

    print_doc_summary(doc)
    return doc.docid
Exemplo n.º 4
0
def add(db, query_string, infile=None, sid=None, tags=None, prompt=False):

    doc = None
    bibtex = None

    sources = Sources()
    doc_sid = sid
    source = None
    file_data = None

    if infile and infile is not True:
        infile = os.path.expanduser(infile)

    ##################################
    # if query provided, find single doc to update

    if query_string:
        if db.count(query_string) != 1:
            print >>sys.stderr, "Search '%s' did not match a single document." % query_string
            print >>sys.stderr, "Aborting."
            sys.exit(1)

        for doc in db.search(query_string):
            break

    ##################################
    # do fancy option prompting

    if prompt:
        doc_sids = []
        if doc_sid:
            doc_sids = [doc_sid]
        # scan the file for source info
        if infile is not True:
            infile = prompt_for_file(infile)

            print >>sys.stderr, "Scanning document for source identifiers..."
            try:
                ss = sources.scan_file(infile)
            except ParseError as e:
                print >>sys.stderr, "\n"
                print >>sys.stderr, "Parse error: %s" % e
                sys.exit(1)
            if len(ss) == 0:
                print >>sys.stderr, "0 source ids found."
            else:
                if len(ss) == 1:
                    print >>sys.stderr, "1 source id found:"
                else:
                    print >>sys.stderr, "%d source ids found:" % (len(ss))
                for sid in ss:
                    print >>sys.stderr, "  %s" % (sid)
                doc_sids += [s.sid for s in ss]
        doc_sid = prompt_for_source(db, doc_sids)
        tags = prompt_for_tags(db, tags)

    if not query_string and not infile and not doc_sid:
        print >>sys.stderr, "Must specify file or source to import, or query to update existing document."
        sys.exit(1)

    ##################################
    # process source and get bibtex

    # check if source is a file, in which case interpret it as bibtex
    if doc_sid and os.path.exists(doc_sid):
        bibtex = doc_sid

    elif doc_sid:
        # get source object for sid string
        try:
            source = sources.match_source(doc_sid)
        except SourceError as e:
            print >>sys.stderr, e
            sys.exit(1)

        # check that the source doesn't match an existing doc
        sdoc = db.doc_for_source(source.sid)
        if sdoc:
            if doc and sdoc != doc:
                print >>sys.stderr, "A different document already exists for source '%s'." % (doc_sid)
                print >>sys.stderr, "Aborting."
                sys.exit(1)
            print >>sys.stderr, "Source '%s' found in database.  Updating existing document..." % (doc_sid)
            doc = sdoc

        try:
            print >>sys.stderr, "Retrieving bibtex...",
            bibtex = source.fetch_bibtex()
            print >>sys.stderr, "done."
        except SourceError as e:
            print >>sys.stderr, "\n"
            print >>sys.stderr, "Could not retrieve bibtex: %s" % e
            sys.exit(1)

        if infile is True:
            try:
                print >>sys.stderr, "Retrieving file...",
                file_name, file_data = source.fetch_file()
                print >>sys.stderr, "done."
            except SourceError as e:
                print >>sys.stderr, "\n"
                print >>sys.stderr, "Could not retrieve file: %s" % e
                sys.exit(1)

    elif infile is True:
        print >>sys.stderr, "Must specify source with retrieve file option."
        sys.exit(1)

    if infile and not file_data:
        with open(infile, 'r') as f:
            file_data = f.read()
        file_name = os.path.basename(infile)

    ##################################

    # if we still don't have a doc, create a new one
    if not doc:
        doc = Document(db)

    ##################################
    # add stuff to the doc

    if bibtex:
        try:
            print >>sys.stderr, "Adding bibtex...",
            doc.add_bibtex(bibtex)
            print >>sys.stderr, "done."
        except BibtexError as e:
            print >>sys.stderr, "\n"
            print >>sys.stderr, e
            print >>sys.stderr, "Bibtex must be a plain text file with a single bibtex entry."
            sys.exit(1)
        except:
            print >>sys.stderr, "\n"
            raise

    # add source sid if it hasn't been added yet
    if source and not doc.get_sids():
        doc.add_sid(source.sid)

    if infile:
        try:
            print >>sys.stderr, "Adding file...",
            doc.add_file_data(file_name, file_data)
            print >>sys.stderr, "done."
        except ParseError as e:
            print >>sys.stderr, "\n"
            print >>sys.stderr, "Parse error: %s" % e
            sys.exit(1)
        except:
            print >>sys.stderr, "\n"
            raise

    if tags:
        try:
            print >>sys.stderr, "Adding tags...",
            doc.add_tags(tags)
            print >>sys.stderr, "done."
        except:
            print >>sys.stderr, "\n"
            raise

    ##################################
    # sync the doc to db and disk

    try:
        print >>sys.stderr, "Syncing document...",
        doc.sync()
        print >>sys.stderr, "done.\n",
    except:
        print >>sys.stderr, "\n"
        raise

    print_doc_summary(doc)
    return doc.docid