Python write_metadata примеры, uplib.plibUtil.write_metadata Python примеры использования

Пример #1

0

Показать файл

Файл: externalAPI.py Проект: project-renard-survey/xerox-parc-uplib-mirror

def doc_metadata (repo, response, params):
    """
    Return the metadata for the specified document.

    :param doc_id: the document to fetch the info for.
    :type doc_id: an UpLib doc ID string
    :param format: optional, can be specified as "xml" to return the results as \
           an XML document instead of a zip file.  Or you can specify the \
           HTTP Accept header as "application/xml" to obtain the same result.
    :type format: string constant "xml"
    :return: the metadata for the specified documents
    :rtype: an XML data structure, if the "Accept: application/xml" header \
            was passed in the request, otherwise a value of MIME type "text/rfc822-headers".
    """

    id = params.get("doc_id");
    if not id:
        response.error(HTTPCodes.BAD_REQUEST, "No doc_id parameter specified for request.\n")
        return

    if not repo.valid_doc_id(id):
        response.error(HTTPCodes.NOT_FOUND, "Invalid doc_id parameter %s specified for request.\n" % params.get("doc_id"))
        return

    doc = repo.get_document(id)

    if response.xml_request or (params.get("format") == "xml"):

        retval = getDOMImplementation().createDocument(None, "result", None)
        d = retval.createElement('document')
        d.setAttribute('id', doc.id)
        title = doc.get_metadata("title") or u""
        title = title.replace("\r", " ")
        d.setAttribute('title', title)
        md = retval.createElement('metadata')
        dmd = doc.get_metadata()
        for element in dmd:
            md.setAttribute(element, dmd[element])
        d.appendChild(md)
        retval.documentElement.appendChild(d)
        fp = response.open("application/xml;charset=utf-8")
        fp.write(retval.toxml("UTF-8") + "\n")
        fp.close()
        return

    else:

        fp = response.open("text/rfc822-headers")
        write_metadata(fp, doc.get_metadata())
        fp.close()

Пример #2

0

Показать файл

Файл: externalAPI.py Проект: project-renard-survey/xerox-parc-uplib-mirror

def repo_properties (repo, response, params):
    """
    Return the properties of the repository.  These include values like
    `name`, `port`, `uplib-home`, `uplib-bin`, `uplib-lib`, `uplib-version`,
    `categories` (a comma-separated list of category names),
    `docs` (a comma-separated list of doc IDs), `collections` (a comma-separated list
    of collection IDs), `last-modified-time` (a timestamp with the last-modified
    time of the repository, as a floating point string giving seconds past the Unix epoch).

    :return: the repository properties specified above
    :rtype: either an XML-formatted data set, if "Accept: application/xml" is specified, \
            or a plain text list of properties, with one per line (lines can be very long)
    """
    d = {}
    d['name'] = repo.name()
    d['port'] = repo.port()
    d['uplib-home'] = configurator.default_configurator().get("uplib-home")
    d['uplib-bin'] = configurator.default_configurator().get("uplib-bin")
    d['uplib-lib'] = configurator.default_configurator().get("uplib-lib")
    d['uplib-version'] = configurator.default_configurator().get("UPLIB_VERSION")
    c = repo.categories()
    c.sort(lambda x, y: cmp(string.lower(x), string.lower(y)))
    d['categories'] = ','.join(c)
    d['docs'] = ','.join([doc.id for doc in repo.generate_docs()])
    d['collections'] = ','.join([x.id for x in repo.list_collections()])
    d['last-modified-time'] = str(repo.mod_time())

    if response.xml_request or (params.get("format") == "xml"):

        retval = getDOMImplementation().createDocument(None, "repository", None)
        e = retval.createElement('properties')
        for element in d:
            e.setAttribute(element, str(d[element]))
        retval.documentElement.appendChild(e)
        fp = response.open("application/xml;charset=utf-8")
        fp.write(retval.toxml("UTF-8") + "\n")
        fp.close()
        return

    else:

        fp = response.open("text/plain")
        write_metadata(fp, d)
        fp.close()

Пример #3

0

Показать файл

Файл: RSSReader.py Проект: project-renard-survey/xerox-parc-uplib-mirror

def main(argv):
    global _IGNORE_KEYBOARD_INTERRUPTS
    try:
        import feedparser
    except ImportError:
        sys.stderr.write("RSSReader:  Python feedparser module not available -- can't run RSS scanner.\n")
        sys.exit(1)
    if argv[0] == "run":
        sys.path.append("/local/share/UpLib-1.7.9/code")
        from uplib.plibUtil import set_verbosity, set_note_sink, uthread
        from uplib.repository import Repository
        uthread.initialize()
        set_note_sink(sys.stderr)
        set_verbosity(4)
        _IGNORE_KEYBOARD_INTERRUPTS = False
        if len(argv) > 1:
            repo = Repository("1.7.9", argv[1], {})
        else:
            repo = None
        _scan_rss_sites(repo)
    elif argv[0] == "scan":
        sys.path.append("/local/share/UpLib-1.7.9/code")
        from uplib.plibUtil import write_metadata
        for arg in argv[1:]:
            for feed in find_feeds(arg):
                print feed.feed.title, feed.href, len(feed.entries)
                for entry in feed.entries:
                    d = process_entry(entry)
                    if d:
                        print (u'%s, by %s, at %s' % (d.get("title"), d.get("authors"), time.ctime(int(d.get("rss-timestamp"))))).encode("UTF-8", "strict")
                        if "'" in d.get("title"):
                            mdoutput = StringIO.StringIO()
                            write_metadata(mdoutput, d)
                            md = mdoutput.getvalue()
                            mdoutput.close()
                            for line in md.split("\n"):
                                line = line.strip()
                                print '    ' + line

    else:
        sys.exit(0)

Пример #4

0

Показать файл

Файл: RSSReader.py Проект: project-renard-survey/xerox-parc-uplib-mirror

def _scan_rss_sites(repo):

    global _ADDED_SITES, _REMOVED_SITES

    try:
        from uplib.plibUtil import configurator, note, write_metadata, id_to_time, create_new_id
        from uplib.extensions import find_and_load_extension
        conf = configurator.default_configurator()

        if repo:
            sys_inits_path = os.path.join(conf.get('uplib-lib'), 'site-extensions')
            repo_inits_path = os.path.join(repo.root(), "overhead", "extensions", "active")
            upload_m = find_and_load_extension("UploadDocument", "%s|%s" % (repo_inits_path, sys_inits_path), None, True)
            if not upload_m:
                note(0, "Can't load UploadDocument extension!")
                sys.exit(1)
            else:
                note("UploadDocument extension is %s", upload_m)

        scan_period = conf.get_int("rss-scan-period", 60 * 2)
        startup_delay = conf.get_int("rss-startup-delay", 0)
        del conf

        import feedparser

        if startup_delay > 0:
            note(3, "startup delay is %d", startup_delay)
            time.sleep(startup_delay)

    except:
        note(0, "RSSReader:  exception starting RSS scan thread:\n%s",
             ''.join(traceback.format_exception(*sys.exc_info())))
        return

    rss_sites = -1
    while True:
        try:
            conf = configurator()       # re-read uplibrc file
            old_rss_sites = rss_sites
            rss_sites = conf.get("rss-sites")
            if old_rss_sites == -1 or (old_rss_sites != rss_sites):
                note(2, "rss_sites are %s", rss_sites)
            scan_period = conf.get_int("rss-scan-period", scan_period)
            expiration_period = conf.get_int("rss-expiration-period", 30 * 24 * 60 * 60)        # 30 days
            if rss_sites:
                rss_sites = rss_sites.split() + _ADDED_SITES
            else:
                rss_sites = _ADDED_SITES[:]
            if rss_sites:
                for site in _REMOVED_SITES:
                    if site in rss_sites:
                        rss_sites.remove(site)
            if rss_sites:
                feeds = []
                for site in rss_sites:
                    if site.startswith("feed:"):
                        feeds.append(feedparser.parse(site))
                    elif site.startswith("http:") or site.startswith("https:"):
                        feeds += find_feeds(site)
                note("feeds are:\n%s", [(x.feed.title, x.href, len(x.entries)) for x in feeds])
                for feed in feeds:
                    note("RSSReader:  %s: %s entries in feed %s", time.ctime(), len(feed.entries), feed.feed.title)
                    for entry in feed.entries:
                        d = process_entry(entry)
                        if not d:
                            continue
                        id = d.get("rss-id")
                        hits = repo.do_query('+rss-id:"%s"' % id)
                        if hits:
                            # already in repo
                            continue
                        if repo:
                            response = FakeResponse(repo)
                            mdoutput = StringIO.StringIO()
                            write_metadata(mdoutput, d)
                            md = mdoutput.getvalue()
                            mdoutput.close()
                            upload_m.add(repo, response, { 'URL': d.get("original-url"),
                                                           'wait': "true",
                                                           'no-redirect': "true",
                                                           'metadata': md,
                                                           'md-categories': "RSSReader/%s" % feed.feed.title,
                                                           })
                            if response.thread:
                                while response.thread.isAlive():
                                    response.thread.join(1.0)
                            note("RSSReader:  %s:  %s (%s: %s)", time.ctime(), repr(d.get("title")), response.code, response.message)
                        else:
                            note("RSSReader:  %s:  %s (%s)\n    %s", time.ctime(), repr(d.get("title")), d.get("date"), d.get("summary"))
            # now do expiries
            old_id = create_new_id(time.time() - expiration_period)[:-5]
            hits = repo.do_query("categories:RSSReader AND id:[00000-00-0000-000 TO %s] AND NOT categories:RSSReader/_noexpire_" % old_id)
            for score, doc in hits:
                # check to see if the user has looked at it
                if os.path.exists(os.path.join(doc.folder(), "activity")):
                    doc.add_category("RSSReader/_noexpire_", True)
                # and if not, remove it
                else:
                    repo.delete_document(doc.id)
            time.sleep(scan_period)
        except KeyboardInterrupt:
            if _IGNORE_KEYBOARD_INTERRUPTS:
                note(0, "RSSReader:  %s", ''.join(traceback.format_exception(*sys.exc_info())))
            else:
                sys.exit(0)                
        except:
            note(0, "RSSReader:  %s", ''.join(traceback.format_exception(*sys.exc_info())))

Python write_metadata примеры использования