def _common(self): global REPO_EXTENSION_DIRS, ALLOW_OLD_EXTENSIONS if not REPO_EXTENSION_DIRS: conf = configurator.default_configurator() REPO_EXTENSION_DIRS = PATH_SEPARATOR.join(( os.path.join(self.repo.overhead_folder(), "extensions", "active"), os.path.join(conf.get('uplib-lib'), 'site-extensions'))) ALLOW_OLD_EXTENSIONS = conf.get_bool("allow-old-extensions") module_name, function_name = self.angel_action[0] exception = None callable = None try: callable = find_action_function(module_name, function_name, self.repo.get_actions_path()) except: t, v, b = sys.exc_info() exception = ''.join(traceback.format_exception(t, v, b)) note(0, "find_action_function(%s/%s) raised an exception:\n%s", module_name, function_name, exception) if callable: field_values = request_to_field_dict(self.request) or {} try: resp = response(self, self.current_user is not None) callable(self.repo, resp, field_values) return True except ForkRequestInNewThread, x: note(4, "forked off request") self._auto_finish = False return False except:
def update_configuration(): global JAVA, INDEXING_ADD_CMD, INDEXING_REMOVE_CMD, INDEXING_JAR, LUCENE_JAR, INDEXING_PROPERTIES, INDEXING_BATCHADD_CMD, DEBUG_FLAGS conf = configurator.default_configurator() props = conf.get("indexing-properties") if LUCENE == "java": JAVA = conf.get("java") LUCENE_JAR = conf.get("lucene-jarfile") INDEXING_JAR = conf.get("uplib-indexing-jarfile") INDEXING_ADD_CMD = conf.get("indexing-add-command") INDEXING_BATCHADD_CMD = conf.get("indexing-batch-add-command") INDEXING_REMOVE_CMD = conf.get("indexing-remove-command") if plibUtil._verbosity > 1: DEBUG_FLAGS = " -Dcom.parc.uplib.indexing.debugMode=true" else: DEBUG_FLAGS = "" if props: INDEXING_PROPERTIES = "\"-Dcom.parc.uplib.indexing.indexProperties=%s\"" % props else: INDEXING_PROPERTIES = "" elif LUCENE == 'jcc': import uplib.indexing uplib.indexing.initialize()
def get_default_rippers(repo): """Returns a default set of Ripper instances. :param repo: the repository instance :type repo: uplib.repository.Repository :return: the default set of rippers, in order :rtype: list(uplib.ripper.Ripper) """ from uplib import createThumbnails, createHTML, createIndexEntry, createPageBboxes, paragraphs conf = configurator.default_configurator() default = [SimpleSummaryRipper(repo, int(repo.get_param("summary-length") or conf.get_int("summary-length") or 250)), paragraphs.ParagraphRipper(repo), createThumbnails.ThumbnailRipper(repo), createPageBboxes.BboxesRipper(repo), createHTML.HTMLRipper(repo), createIndexEntry.LuceneRipper(repo), ] if (sys.platform == "darwin") and conf.get_bool("install-finder-icon-ripper", True): from macstuff import MacRipper # add the Mac ripper just before the Lucene ripper default.insert(-2, MacRipper(repo)) # if we have language support, add that ripper, too try: from uplib.language import GuessLanguageRipper except ImportError: pass else: default.insert(0, GuessLanguageRipper(repo)) return default
def after_repository_instantiation(repo): from uplib.plibUtil import note, configurator, uthread conf = configurator.default_configurator() rss_enabled = conf.get_bool("enable-rss-reader", True) if not rss_enabled: note("RSSReader: explicitly disabled -- not initializing.") return start(repo)
def repo_properties (repo, response, params): """ Return the properties of the repository. These include values like `name`, `port`, `uplib-home`, `uplib-bin`, `uplib-lib`, `uplib-version`, `categories` (a comma-separated list of category names), `docs` (a comma-separated list of doc IDs), `collections` (a comma-separated list of collection IDs), `last-modified-time` (a timestamp with the last-modified time of the repository, as a floating point string giving seconds past the Unix epoch). :return: the repository properties specified above :rtype: either an XML-formatted data set, if "Accept: application/xml" is specified, \ or a plain text list of properties, with one per line (lines can be very long) """ d = {} d['name'] = repo.name() d['port'] = repo.port() d['uplib-home'] = configurator.default_configurator().get("uplib-home") d['uplib-bin'] = configurator.default_configurator().get("uplib-bin") d['uplib-lib'] = configurator.default_configurator().get("uplib-lib") d['uplib-version'] = configurator.default_configurator().get("UPLIB_VERSION") c = repo.categories() c.sort(lambda x, y: cmp(string.lower(x), string.lower(y))) d['categories'] = ','.join(c) d['docs'] = ','.join([doc.id for doc in repo.generate_docs()]) d['collections'] = ','.join([x.id for x in repo.list_collections()]) d['last-modified-time'] = str(repo.mod_time()) if response.xml_request or (params.get("format") == "xml"): retval = getDOMImplementation().createDocument(None, "repository", None) e = retval.createElement('properties') for element in d: e.setAttribute(element, str(d[element])) retval.documentElement.appendChild(e) fp = response.open("application/xml;charset=utf-8") fp.write(retval.toxml("UTF-8") + "\n") fp.close() return else: fp = response.open("text/plain") write_metadata(fp, d) fp.close()
def __init__(self, filesystem, repository): default_handler.default_handler.__init__(self, filesystem) self.__repo__ = repository conf = configurator.default_configurator() if conf.get('no-caching'): self.allow_cache = False else: self.allow_cache = True self.version = repository.get_version() self.version = (isinstance(self.version, unicode) and self.version.encode("ASCII", "replace")) or self.version
def after_repository_instantiation(repo): global CITATION_PARSER, HEADER_PARSER conf = configurator.default_configurator() CITATION_PARSER = conf.get("citeseer-citation-parser") HEADER_PARSER = conf.get("citeseer-header-parser") if CITATION_PARSER or HEADER_PARSER: rippers = repo.rippers() rippers.insert(-3, CiteSeerParserRipper(repo))
def update_configuration(): global TIFFINFO, TIFFCP, TIFFSET, TAR, UNTAR_CMD, SUMMARY_LENGTH, CODETIMER_ON conf = configurator.default_configurator() TIFFINFO = conf.get("tiffinfo") TIFFCP = conf.get("tiffcp") TIFFSET = conf.get("tiffset") TAR = conf.get("tar") UNTAR_CMD = conf.get("untar-command") SUMMARY_LENGTH = conf.get_int("summary-length") CODETIMER_ON = conf.get_bool("codetimer-on", False)
def update_configuration(): global TIFFSPLIT, TIFFCP, THUMBNAIL_TYPE, TIFF_SPLIT_CMD, NUMBERING_FONT, LEGEND_FONT, PREVIOUS_ICON, NEXT_ICON, MAX_SCALING_FACTOR global PAGEIMAGE_MAXWIDTH, PAGEIMAGE_MAXHEIGHT, TOP_ICON, CONSTANT_AREA_FACTOR, USE_VIRTUAL_INK, UNDER_CONSTRUCTION global AUTO_CROP_BIG_THUMBNAILS, DISTORT_VERY_SMALL_THUMBNAILS note(3, "in createThumbnails.update_configuration()") conf = configurator.default_configurator() TIFFSPLIT = conf.get("tiffsplit") TIFFCP = conf.get("tiffcp") TIFF_SPLIT_CMD = conf.get("tiff-split-command") THUMBNAIL_TYPE = conf.get("thumbnail-strategy", "log-area") NUMBERING_FONT = conf.get("numbering-font-file") LEGEND_FONT = conf.get("legend-font-file") previous_page_icon_file = conf.get("previous-page-icon-file") if not previous_page_icon_file: note(0, "No previous-page-icon-file parameter in site.config nor .uplibrc") note(0, "Aborting update_configuration!") raise IOError("No previous-page-icon-file parameter in site.config nor .uplibrc") try: PREVIOUS_ICON = Image.open(previous_page_icon_file) except IOError: note(0, "Could not load %s as an image." % previous_page_icon_file); note(0, "Aborting update_configuration!") raise IOError("Could not load %s." % previous_page_icon_file) next_page_icon_file = conf.get("next-page-icon-file") if not next_page_icon_file: note(0, "No next-page-icon-file parameter was found in site.config nor .uplibrc."); note(0, "Aborting update_configuration!") raise IOError("No next-page-icon-file parameter in config") try: NEXT_ICON = Image.open(next_page_icon_file) except IOError: note(0, "Could not load %s as an image." % next_page_icon_file); note(0, "Aborting update_configuration!") raise IOError("Cound not load %s." % next_page_icon_file) temp = Image.open(conf.get("top-icon-file")) TOP_ICON = Image.new(temp.mode, temp.size, (255, 255, 255)) TOP_ICON.paste(temp, (0, 0), temp) MAX_SCALING_FACTOR = float(conf.get("page-image-max-scaling-factor") or "0.33") PAGEIMAGE_MAXWIDTH = float(conf.get("page-image-max-width-pixels") or "680") PAGEIMAGE_MAXHEIGHT = float(conf.get("page-image-max-height-pixels") or "880") CONSTANT_AREA_FACTOR = float(conf.get("constant-area-factor") or "4.5") USE_VIRTUAL_INK = conf.get_bool("use-alpha-channel-thumbnails", false) AUTO_CROP_BIG_THUMBNAILS = conf.get_bool("auto-crop-big-thumbnails", true) DISTORT_VERY_SMALL_THUMBNAILS = conf.get_bool("keep-very-small-thumbnails", false) images_dir = os.path.join(conf.get("uplib-share"), "images")
def excluded_categories (repo): """Return a dict mapping category name strings to whether or not they match the "excluded-categories" RE. :param repo: the repository :type repo: uplib.repository.Repository :return: mapping of category names to whether they are excluded :rtype: dict """ conf = configurator.default_configurator() excluded_pattern = re.compile(conf.get("excluded-categories") or "email/.*") categories = {} for category in repo.categories(): categories[category] = (excluded_pattern.match and True) or False return categories
def do_action (self, request, field_values, content): note(4, "in do_action (%s)", request.angel_action) global REPO_EXTENSION_DIRS, ALLOW_OLD_EXTENSIONS if not REPO_EXTENSION_DIRS: conf = configurator.default_configurator() REPO_EXTENSION_DIRS = PATH_SEPARATOR.join(( os.path.join(self.__repo__.overhead_folder(), "extensions", "active"), os.path.join(conf.get('uplib-lib'), 'site-extensions'))) ALLOW_OLD_EXTENSIONS = conf.get_bool("allow-old-extensions") module_name, function_name = request.angel_action[0] exception = None callable = None try: callable = find_action_function(module_name, function_name, self.__repo__.get_actions_path()) except: t, v, b = sys.exc_info() exception = ''.join(traceback.format_exception(t, v, b)) note(0, "find_action_function(%s/%s) raised an exception:\n%s", module_name, function_name, exception) if callable: try: self.action_counter.increment() if field_values == None: field_values = {} logged_in = is_logged_in(self.__repo__, request) resp = response(request, content, self.__repo__, logged_in) if module_name == 'basic' and function_name == 'repo_status_json': # try not to log this call request.log = _ignore_request_logging callable(self.__repo__, resp, field_values) return true except ForkRequestInNewThread, x: note(4, "forked off request") return false except Exception, x: note(0, "signalling exception <%s> at point 1a:", x) excn_data = sys.exc_info() signal_python_exception(request, excn_data) # s2 = python_exception_html (excn_data, None) # request.reply_code = 500 # request['Content-Type'] = 'text/html' # request['Content-Length'] = len(s2) # request.push(s2) return true
def matching_ids_and_filenames(repo, response, params): """Search for documents matching query=terms and return zero or more matches. Each line lists a matching document as: doc_id ' ' filename """ # This is an abbreviated version of basicPlugins.py _repo_search(). query = params['query'] global INTERACTION_CHARSET if not INTERACTION_CHARSET: conf = configurator.default_configurator() INTERACTION_CHARSET = conf.get('interaction-charset', 'UTF-8') query = unicode(query, INTERACTION_CHARSET, 'replace') cutoff = 0.0 coll = PrestoCollection(repo, None, query, None, None, cutoff) result = [] for doc in coll.docs(): title, mtype = doc_title_and_type(doc) result.append('%s %s' % (doc.id, title)) response.reply('\n'.join(result), 'text/plain')
def __init__(self, doc, options): global UPLIB_SHARE DocumentParser.__init__(self, doc, options) self.video = options.get("video") if not self.video: self.video = pyglet.media.load(doc) if not self.video.video_format: raise ValueError("Unknown video format encountered") self.size = self.get_video_size(self.video.video_format.width, self.video.video_format.height, self.video.video_format.sample_aspect) if (UPLIB_SHARE is None) or (self.NSAMPLES is None): c = configurator.default_configurator() UPLIB_SHARE = c.get("uplib-share") self.NSAMPLES = c.get_int("number-of-video-sample-frames", 5) duration = self.video.duration if duration: self.metadata['duration'] = str(duration) if have_hachoir: try: md = hachoir_metadata.extractMetadata(hachoir_parser.createParser( unicode(doc), doc)) d = {} for v in md: if v.values: d[v.key] = v.values[0].value v = d.get("last_modification") if v: self.metadata['last-modified'] = v.isoformat('Z') note("last-modified is %s", self.metadata['last-modified']) v = d.get("creation_date") or v if v: self.metadata['date'] = v.strftime("%m/%d/%Y") mime_type = d.get("mime_type") if mime_type: self.metadata['apparent-mime-type'] = mime_type except: pass # don't try to optimize away blank frames if we don't have many frames self.saveblanks = self.saveblanks or (self.NSAMPLES < 2)
def update_configuration(): global CONTROLS_TEMPLATE_FILE, CONTROLS_TEMPLATE, CONTROLS_TEMPLATE_FILE_MODDATE, CONTROLS_HEIGHT global THUMBNAIL_COLWIDTH, USE_VIRTUAL_INK conf = configurator.default_configurator() template = conf.get("default-html-controls-template-file") if template: template = os.path.expanduser(template) if template and os.path.exists(template): moddate = os.path.getmtime(template) note(3, "default-html-controls-template-file is %s (was %s)", template, CONTROLS_TEMPLATE_FILE) if (template and os.path.exists(template) and (CONTROLS_TEMPLATE_FILE != template or CONTROLS_TEMPLATE_FILE_MODDATE < moddate)): note(3, "re-reading controls template file") fp = open(template, 'r') CONTROLS_TEMPLATE = fp.read() fp.close() CONTROLS_TEMPLATE_FILE = template CONTROLS_TEMPLATE_FILE_MODDATE = moddate CONTROLS_HEIGHT = conf.get_int('html-controls-panel-height') or 200 THUMBNAIL_COLWIDTH = conf.get_int('html-thumbnails-column-width') or 130 USE_VIRTUAL_INK = conf.get_bool("use-alpha-channel-thumbnails") or false plib_path = conf.get("plib-path")
def doc_categorize (repo, response, params): from uplib.basicPlugins import show_abstract, _is_sensible_browser from uplib.basicPlugins import show_title, STANDARD_BACKGROUND_COLOR, STANDARD_TOOLS_COLOR, STANDARD_LEGEND_COLOR from uplib.basicPlugins import __issue_javascript_head_boilerplate as issue_javascript_head_boilerplate from uplib.basicPlugins import __issue_menu_definition as issue_menu_definition from uplib.basicPlugins import __issue_title_styles as issue_title_styles global _CONFIGURATION if _CONFIGURATION is None: _CONFIGURATION = { "exclusions": [ re.compile(x.strip()) for x in configurator.default_configurator().get("categorize-excluded-categories", "").split(",") if x.strip()]} def figure_size(count, avgsize): if avgsize < 0.0001: return 0.0001 return math.sqrt(math.log((count * (math.e - 1))/avgsize + 1)) doc_id = params.get("doc_id") if not doc_id: response.error(HTTPCodes.BAD_REQUEST, "No doc_id parameter specified.") return doc = repo.valid_doc_id(doc_id) and repo.get_document(doc_id) if not doc: response.error(HTTPCodes.BAD_REQUEST, "Invalid doc_id parameter '%s' specified." % doc_id) return fp = response.open() title = (doc.get_metadata("title") or doc.id).encode("UTF-8", "strict") fp.write("<head><title>Categorizing '%s'</title>\n" % htmlescape(title)) fp.write('<meta http-equiv="Content-Script-Type" content="text/javascript">\n') fp.write('<link REL="SHORTCUT ICON" HREF="/favicon.ico">\n') fp.write('<link REL="ICON" type="image/ico" HREF="/favicon.ico">\n') issue_javascript_head_boilerplate(fp) issue_title_styles(fp) fp.write('</head><body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR) issue_menu_definition(fp) show_abstract(repo, doc, fp, _is_sensible_browser(response.user_agent), showpagesearch=False) fp.write("<hr />\n") doccats = [x.lower() for x in doc.get_category_strings()] for cat in doccats[:]: if cat.find('/') >= 0: parts = cat.split('/') for i in range(1, len(parts)): doccats.append('/'.join(parts[:i])) tags = find_likely_tags(doc) if tags: # try to remove duplicates stags = min(10, len(tags)) # tagnames = [tag[0].split('/')[0] for tag in tags[:stags] if tag[0].find('/') >= 0] # count = 0 # i = 0 # while tagnames and (i < stags): # if tags[i][0] in tagnames: # del tags[i] # stags = min(10, len(tags)) # tagnames = [tag[0].split('/')[0] for tag in tags[:stags] if tag[0].find('/') >= 0] # else: # i += 1 fp.write("<center><small><i>Likely categories</i></small><br />") count = 0 topscore = _adjust_score(*tags[0][1][:2]) exclusions = _CONFIGURATION and _CONFIGURATION.get("exclusions") for name, (score, ndocs, ascore) in tags: if count > stags: break skip = False for exclusion in exclusions: if exclusion.match(name.lower()): skip = True break if skip: continue if count > 0: fp.write(" · ") #size = max(0.5, (2/topscore) * ascore) size = 1 color = (name.lower() in doccats) and "red" or "black" action = '/'.join(response.request_path.split('/')[:3]) + '/doc_%s_category?doc_id=%s&tag=%s' % ( (name.lower() in doccats) and "remove" or "add", doc.id, urllib.quote_plus(name)) fp.write('<a style="font-size: %fem; color: %s;" href="%s" title="%s the \'%s\' category (score=%.3f)">%s</a>' % ( size, color, action, (name.lower() in doccats) and "remove" or "add", htmlescape(name), ascore, htmlescape(name))) count += 1 fp.write("</center></p><hr />\n") fp.write('<form action="%s" method=get><center>Add a new category to this document: ' % ('/'.join(response.request_path.split('/')[:3]) + '/doc_add_category')) fp.write('<input type=hidden name="doc_id" value="%s">\n' % doc.id) fp.write('<input type=text name="tag" value="" size=40></form></center>\n') note(4, "doc_categorize: retrieving repository categories... (%s)", time.ctime()) cats = repo.get_categories_with_docs() note(4, "doc_categorize: have categories (%s)", time.ctime()) if cats: fp.write("<hr>\n<center><small><i>All categories</i></small><br />") avgsize = sum([len(x) for x in cats.values()]) / float(len(cats)) catkeys = cats.keys() catkeys.sort(lambda x, y: cmp(x.lower(), y.lower())) first = True exclusions = _CONFIGURATION and _CONFIGURATION.get("exclusions") for name in catkeys: skip = False for exclusion in exclusions: if exclusion.match(name.lower()): skip = True break if skip: continue if not first: fp.write(" · ") else: first = False size = max(0.5, figure_size(len(cats[name]), avgsize)) color = (name.lower() in doccats) and "red" or "black" action = '/'.join(response.request_path.split('/')[:3]) + '/doc_%s_category?doc_id=%s&tag=%s' % ( (name.lower() in doccats) and "remove" or "add", doc.id, urllib.quote_plus(name)) actionsee = '/action/basic/repo_search?query=%s' % ( urllib.quote_plus('categories:"%s"' % name)) fp.write('<a style="font-size: %fem; color: %s;" href="%s" title="%s the \'%s\' category">%s</a>' % ( size, color, action, (name.lower() in doccats) and "remove" or "add", htmlescape(name), htmlescape(name))) fp.write('<a style="font-size: %fem; color: %s; vertical-align: super;" href="%s" ' % ( max(0.4, size/2), STANDARD_LEGEND_COLOR, actionsee) + 'title="see the %s document%s in the \'%s\' category" target="_blank">%d</a>' % ( (len(cats[name]) == 1) and "one" or str(len(cats[name])), (len(cats[name]) != 1) and "s" or "", htmlescape(name), len(cats[name]))) fp.write("</body>\n")
def upload_document (repository, response, fields): """Upload a complete UpLib document folder to the repository. Used by `uplib-add-document`. :Parameters: file the folder, as a zip file or tar file. Using a tar file is considered obsolete. filetype must be either 'tarred-folder' or 'zipped-folder' format the format of the response to send back. If 'xml', an XML document will be generated, containing a single text node called 'id', containing the document ID of the new document, but the default is to send back simply the new doc ID as a plain text string. title optionally, the title of the document id optionally, a pre-assigned doc ID to use. If there is no folder with this ID in the `pending` directory, this will raise an error. authors optionally, a list of authors, each name separated from the next with the string " and ". source optionally, a string describing the source of the document. date optionally, an UpLib-format date string `[[DD/]MM/]YYYY` keywords optionally, a comma-separated list of keywords to associate with the document categories optionally, a comma-separated list of categories (tags) to associate with the document abstract optionally, an abstract for the document citation optionally, a citation in some citation format for the document comment optionally, some text giving a comment on the document :return: the document ID for the new document :rtype: plain text string, or if XML is specified, an XML ``result`` element containing an ``id`` node with the ID as its text """ global INTERACTION_CHARSET def possibly_set (db, fields, valuename, unfold_lines=false): if fields.has_key(valuename): if unfold_lines: value = string.replace(string.replace(fields[valuename], '\n', ' '), '\r', ' ') else: value = fields[valuename] value = unicode(value, INTERACTION_CHARSET, "replace") db[valuename] = value if not INTERACTION_CHARSET: conf = configurator.default_configurator() INTERACTION_CHARSET = conf.get('interaction-charset', 'UTF-8') if (not fields.has_key('newfile')) or (not fields.has_key('filetype')): response.error(HTTPCodes.BAD_REQUEST, "Badly formed upload request.\n") return known_content_types = repository.content_types() msgtag = "" try: doc_bits = fields['newfile'] doc_type = fields['filetype'] if not doc_type in known_content_types: response.error(HTTPCodes.UNSUPPORTED_MEDIA_TYPE, "Can't upload files of type '%s'.\n" % doc_type) return metadata = {} possibly_set(metadata, fields, "title") possibly_set(metadata, fields, "id") possibly_set(metadata, fields, "authors") possibly_set(metadata, fields, "source") possibly_set(metadata, fields, "date") possibly_set(metadata, fields, "keywords") possibly_set(metadata, fields, "categories") possibly_set(metadata, fields, "abstract", true) possibly_set(metadata, fields, "citation", true) possibly_set(metadata, fields, "comment", true) possibly_set(metadata, fields, "name") note(2, "Adding new document; len(bits) = %d, type='%s'", len(doc_bits), doc_type) id = repository.create_new_document(doc_bits, doc_type, metadata) # update the global list of categories categories_value = fields.has_key('categories') and fields['categories'] cleaned_categories = (categories_value and map(lambda x: string.strip(x), string.split(categories_value, ','))) or [] db_categories = repository.categories() for category in cleaned_categories: if not category in db_categories: repository.add_category(category) if response.xml_request or (fields.get("format") == "xml"): retval = getDOMImplementation().createDocument(None, "result", None) e = retval.createTextNode('id') e.data = id retval.documentElement.appendChild(e) fp = response.open("application/xml;charset=utf-8") fp.write(retval.toxml("UTF-8") + "\n") fp.close() return else: fp = response.open("text/plain") fp.write(id) fp.close() return except: typ, ex, tb = sys.exc_info() raise ex, None, tb
def _add_icalendar_file (repo, response, tfile): try: conf = configurator.default_configurator() update_configuration(conf) tal = ensure_assembly_line(conf.get("assembly-line")) try: parsed = iCalendar.myformat(tfile) if not isinstance(parsed, dict): note(0, "Can't parse supposed iCalendar file %s", tfile) response.error(HTTPCodes.INTERNAL_SERVER_ERROR, "Can't parse file") return resp = response.open("text/plain") for event, name, uid in parsed.get('parsed-events'): if hasattr(event, "dtstart"): identifier = "%s @ %s" % (name, event.dtstart.value) else: identifier = name # see if there's already a event for this name query = 'apparent-mime-type:"%s" AND event-uid:"%s"' % ( iCalendarEventParser.format_mimetype, uid) hits = repo.do_query(query) if hits: if 'metadata' not in parsed: parsed['metadata'] = {} parsed['metadata']['version-of'] = hits[0][1].id if event.name == "VEVENT": p = iCalendarEventParser(name, {"icsname": name, "icsuid": uid, "icsevent": event, "upload": False, "usepng": True, "metadata": parsed.get("metadata") or {}, }) else: note(3, "No supported iCalendar subtype found in %s", identifier) p = None if p: # calculate fingerprint fd, filename = tempfile.mkstemp(".ics") fp = os.fdopen(fd, "wb") p.write_to_file(fp) fp.close() fingerprint = calculate_originals_fingerprint(filename) # look up fingerprint in repo to see if we already have it hits = repo.do_query('sha-hash:%s' % fingerprint) if hits: # already there, so skip this one note(3, "skipping '%s', already in repo...", identifier) resp.write("skipping '%s', already in repo\n" % identifier) continue # new event, so add it p.metadata["sha-hash"] = fingerprint pinst = p.process() if isinstance(pinst, DocumentParser): try: folder = repo.create_document_folder(repo.pending_folder()) id = os.path.basename(folder) # add the tfolder to the repository process_folder(repo, id, pinst.folder, True) flesh_out_folder(id, None, None, repo, None, None) resp.write("added event for %s\n" % identifier) except: msg = "Exception processing event; event is\n%s\nException was\n%s\n" % ( event, ''.join(traceback.format_exception(*sys.exc_info()))) note(0, msg) resp.write(msg) finally: if tal: from uplib.addDocument import AssemblyLine shutil.rmtree(AssemblyLine) if os.path.exists(tfile): os.unlink(tfile) except: msg = "Exception processing iCalendar:\n%s\n" % ''.join(traceback.format_exception(*sys.exc_info())) note(0, msg) response.error(HTTPCodes.INTERNAL_SERVER_ERROR, msg)
def _scan_rss_sites(repo): global _ADDED_SITES, _REMOVED_SITES try: from uplib.plibUtil import configurator, note, write_metadata, id_to_time, create_new_id from uplib.extensions import find_and_load_extension conf = configurator.default_configurator() if repo: sys_inits_path = os.path.join(conf.get('uplib-lib'), 'site-extensions') repo_inits_path = os.path.join(repo.root(), "overhead", "extensions", "active") upload_m = find_and_load_extension("UploadDocument", "%s|%s" % (repo_inits_path, sys_inits_path), None, True) if not upload_m: note(0, "Can't load UploadDocument extension!") sys.exit(1) else: note("UploadDocument extension is %s", upload_m) scan_period = conf.get_int("rss-scan-period", 60 * 2) startup_delay = conf.get_int("rss-startup-delay", 0) del conf import feedparser if startup_delay > 0: note(3, "startup delay is %d", startup_delay) time.sleep(startup_delay) except: note(0, "RSSReader: exception starting RSS scan thread:\n%s", ''.join(traceback.format_exception(*sys.exc_info()))) return rss_sites = -1 while True: try: conf = configurator() # re-read uplibrc file old_rss_sites = rss_sites rss_sites = conf.get("rss-sites") if old_rss_sites == -1 or (old_rss_sites != rss_sites): note(2, "rss_sites are %s", rss_sites) scan_period = conf.get_int("rss-scan-period", scan_period) expiration_period = conf.get_int("rss-expiration-period", 30 * 24 * 60 * 60) # 30 days if rss_sites: rss_sites = rss_sites.split() + _ADDED_SITES else: rss_sites = _ADDED_SITES[:] if rss_sites: for site in _REMOVED_SITES: if site in rss_sites: rss_sites.remove(site) if rss_sites: feeds = [] for site in rss_sites: if site.startswith("feed:"): feeds.append(feedparser.parse(site)) elif site.startswith("http:") or site.startswith("https:"): feeds += find_feeds(site) note("feeds are:\n%s", [(x.feed.title, x.href, len(x.entries)) for x in feeds]) for feed in feeds: note("RSSReader: %s: %s entries in feed %s", time.ctime(), len(feed.entries), feed.feed.title) for entry in feed.entries: d = process_entry(entry) if not d: continue id = d.get("rss-id") hits = repo.do_query('+rss-id:"%s"' % id) if hits: # already in repo continue if repo: response = FakeResponse(repo) mdoutput = StringIO.StringIO() write_metadata(mdoutput, d) md = mdoutput.getvalue() mdoutput.close() upload_m.add(repo, response, { 'URL': d.get("original-url"), 'wait': "true", 'no-redirect': "true", 'metadata': md, 'md-categories': "RSSReader/%s" % feed.feed.title, }) if response.thread: while response.thread.isAlive(): response.thread.join(1.0) note("RSSReader: %s: %s (%s: %s)", time.ctime(), repr(d.get("title")), response.code, response.message) else: note("RSSReader: %s: %s (%s)\n %s", time.ctime(), repr(d.get("title")), d.get("date"), d.get("summary")) # now do expiries old_id = create_new_id(time.time() - expiration_period)[:-5] hits = repo.do_query("categories:RSSReader AND id:[00000-00-0000-000 TO %s] AND NOT categories:RSSReader/_noexpire_" % old_id) for score, doc in hits: # check to see if the user has looked at it if os.path.exists(os.path.join(doc.folder(), "activity")): doc.add_category("RSSReader/_noexpire_", True) # and if not, remove it else: repo.delete_document(doc.id) time.sleep(scan_period) except KeyboardInterrupt: if _IGNORE_KEYBOARD_INTERRUPTS: note(0, "RSSReader: %s", ''.join(traceback.format_exception(*sys.exc_info()))) else: sys.exit(0) except: note(0, "RSSReader: %s", ''.join(traceback.format_exception(*sys.exc_info())))
def _add_vcards_file (repo, response, tfile): try: fp = response.open("text/plain") conf = configurator.default_configurator() update_configuration(conf) tal = ensure_assembly_line(conf.get("assembly-line")) cards = [] try: parsed = vCards.myformat(tfile) parsed['upload'] = False parsed['usepng'] = True for card in parsed.get('parsed-cards'): # see if there's already a card for this name query = 'apparent-mime-type:"%s" AND vcard-name:"%s"' % ( vCard.format_mimetype, card.fn.value) hits = repo.do_query(query) if hits: if 'metadata' not in parsed: parsed['metadata'] = {} parsed['metadata']['version-of'] = hits[0][1].id p = vCard(card, parsed) # calculate fingerprint fd, filename = tempfile.mkstemp() fp = os.fdopen(fd, "wb") p.write_to_file(fp) fp.close() fingerprint = calculate_originals_fingerprint(filename) # look up fingerprint in repo to see if we already have it hits = repo.do_query('sha-hash:%s' % fingerprint) if hits: # already there, so skip this one note(3, "skipping '%s', already in repo...", card.fn.value) continue # new card, so add it pinst = p.process() if isinstance(pinst, DocumentParser): try: folder = repo.create_document_folder(repo.pending_folder()) id = os.path.basename(folder) note("using id %s for %s...", id, card.fn.value) # add the tfolder to the repository process_folder(repo, id, pinst.folder, True) flesh_out_folder(id, None, None, repo, None, None) note("added card for %s\n" % card.fn.value) cards.append((id, card.fn.value)) except: msg = "Exception processing vCard; vCard is\n%s\nException was\n%s\n" % ( card, ''.join(traceback.format_exception(*sys.exc_info()))) note(0, msg) finally: if tal: from uplib.addDocument import AssemblyLine shutil.rmtree(AssemblyLine) if os.path.exists(tfile): os.unlink(tfile) except: msg = "Exception processing vcards:\n%s\n" % ''.join(traceback.format_exception(*sys.exc_info())) note(0, msg) response.error(HTTPCodes.INTERNAL_SERVER_ERROR, msg) else: response.reply('\n'.join(['%20s: %s' % (x[0], x[1]) for x in cards]))
# # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # # papers over differences between Medusa and Tornado and any other server framework # we may use in the future, like Twisted # import sys, os, re from uplib.plibUtil import configurator conf = configurator.default_configurator() service_framework = conf.get("service-framework") if service_framework == "Medusa": from uplib.angelHandler import ForkRequestInNewThread, run_fn_in_new_thread from uplib.startAngel import darwin_launchd, daemon, unix_mainloop, start_angel def set_top_level_action(handler): if not isinstance(handler, tuple) or (len(handler) != 2): raise RuntimeError("toplevel handler must be tuple of ('MODULENAME', 'FUNCTIONNAME')") import uplib.angelHandler setattr(uplib.angelHandler, "TOP_LEVEL_ACTION", handler) elif service_framework == "Tornado": from uplib.tornadoHandler import ForkRequestInNewThread, run_fn_in_new_thread from uplib.startTornado import darwin_launchd, daemon, unix_mainloop, start_angel