def Filter(self, name, argument, value): value = bm_extract.coerce_string(value, otherwise = "") argument = bm_extract.coerce_string(argument, otherwise = "") while value and argument: x = value.find(argument) if x == -1: break value = value[:x] + value[x + len(argument):] return value
def ScrubPerson(self, itemd, person_key): persons = bm_extract.as_list(itemd, person_key) if persons: npersons = [] for persond in persons: person_name = bm_extract.coerce_string(persond) if not person_name: person_name = bm_extract.as_string(persond, "name") if not person_name: continue npersond = { "name" : person_name, } for key in [ "uri", "email" ]: value = bm_extract.as_string(persond, key) if value: npersond[key] = value npersons.append(npersond) persons = npersons if not persons: try: del itemd[person_key] except: pass else: itemd[person_key] = persons
def Flatten(self, item, path = None): path = path or [] children = bm_extract.as_list(item, "outline") # # # nitem = dict() for key, value in item.iteritems(): if key != "outline": nitem[key] = value item = nitem if path: item["tags"] = bm_extract.coerce_string(path, separator = ", ") item["@@children"] = len(children) # # # if not self._leaf_only or len(children) == 0: yield item # # # for child in children: for child_item in self.Flatten(child, list(path) + [ item.get("text", "") ]): yield child_item
def Render(self, context = {}): if not isinstance(context, Context): assert(isinstance(context, dict)), "context must be a Context object or dict" context = Context(context) results = [] self.node.Render(results, context) results = filter(lambda r: r != None, results) # # See Filter_raw # - we need this complicated logic due to empty spaces # sometimes being inserted into the template # if len(results) > 0: for result in results: if isinstance(result, bm_extract.AsIs): return result.value elif result != '': break results = map(lambda x: bm_extract.coerce_string(x, separator = ", "), results) return "".join(results)
def ScrubCategory(self, itemd): cats = bm_extract.as_list(itemd, "category") if cats: ncats = [] for catd in cats: cat_name = bm_extract.coerce_string(catd) if not cat_name: cat_name = bm_extract.as_string(catd, "term") if not cat_name: continue ncatd = { "@term" : cat_name, } for key in [ "scheme", "label" ]: value = bm_extract.as_string(catd, key) if value: ncatd["@" + key] = value ncats.append(ncatd) cats = ncats if not cats: try: del itemd["category"] except: pass else: itemd["category"] = cats
def Filter(self, name, argument, value): """ Returns a plural suffix if the value is not 1. By default, this suffix is 's'. Example: You have {{ num_messages }} message{{ num_messages|pluralize }}. For words that require a suffix other than 's', you can provide an alternate suffix as a parameter to the filter. Example: You have {{ num_walruses }} walrus{{ num_walrus|pluralize:"es" }}. For words that don't pluralize by simple suffix, you can specify both a singular and plural suffix, separated by a comma. Example: You have {{ num_cherries }} cherr{{ num_cherries|pluralize:"y,ies" }}. """ ivalue = bm_extract.coerce_int(value) plurals = filter(None, bm_extract.coerce_string(argument, otherwise = "").split(",", 1)) if len(plurals) == 0: plurals = [ "", "s" ] elif len(plurals) == 1: plurals = [ "", plurals[0] ] if ivalue == 1: return plurals[0] else: return plurals[1]
def Filter(self, name, argument, value): value = bm_extract.coerce_string(value) value = sluggie_rex.sub("_", value) value = underscore_rex.sub("_", value) value = value.strip("_") value = value.lower() return value
def Filter(self, name, argument, value): """ Same as default, without the empty string rule """ bvalue = bm_extract.coerce_bool(value, otherwise = False) if bvalue: return value else: return bm_extract.coerce_string(argument, otherwise = "")
def Render(self, results, context): import djolt value = context.get(self.path) indirection = self.indirection while indirection > 0: try: indirection -= 1 if not value: break value = bm_extract.coerce_string(value, separator = None) context.Push() try: context[SPECIAL_SAFE] = True value = djolt.Template(value).Render(context) finally: context.Pop() except: results.append("<pre>\n" + Log( "Indirect template exception", exception = True, indirection = self.indirection, path = self.path, ).replace("\n", "<br />") + "\n</pre>") return is_safe = context.get(SPECIAL_SAFE, True) for filter_name, filter_argument in self.filters: if filter_name == "safe": is_safe = False continue elif filter_name == "escape": is_safe = True continue filter = djolt_base.Filter.Find(filter_name) if not filter: raise djolt_base.DjoltNoSuchFilterError, filter_name value = filter.Filter(filter_name, filter_argument, value) if is_safe and type(value) in types.StringTypes: value = value.replace("&", "&"); value = value.replace("<", "<"); value = value.replace(">", ">"); value = value.replace("'", "'"); value = value.replace("\"", """); results.append(value)
def Filter(self, name, argument, value): """ Converts a string into all lowercase. For example: {{ value|lower }} """ value = bm_extract.coerce_string(value, separator = None) if value: return value.lower()
def Filter(self, name, argument, value): """\ URL encodes a string For example: {{ value|urlencode }} If value is "Joel is a/slug", the output will be "Joel%20is%20a%2Fslug". """ value = bm_extract.coerce_string(value, separator = None) if value: return urllib.quote(value, safe = '')
def Filter(self, name, argument, value): """\ Make the first letter uppercase, the rest lower For example: {{ value|capitalize }} If value is "Joel is a slug", the output will be "Joel is a slug". """ value = bm_extract.coerce_string(value, separator = None) if value: return string.capwords(value)
def Filter(self, name, argument, value): """\ Converts a string into all uppercase. For example: {{ value|upper }} If value is "Joel is a slug", the output will be "JOEL IS A SLUG". """ value = bm_extract.coerce_string(value, separator = None) if value: return value.upper()
def Filter(self, name, argument, value): """\ Converts a string into titlecase. For example: {{ value|title }} If value is "Joel is a slug", the output will be "Joel Is A Slug". """ value = bm_extract.coerce_string(value, separator = None) if value: return string.capwords(value)
def Filter(self, name, argument, value): """ Joins a list with a string, like Python's str.join(list) For example: {{ value|join:" // " }} If value is the list ['a', 'b', 'c'], the output will be the string "a // b // c". """ value = bm_extract.coerce_list(value, separator = None) value = map(bm_extract.coerce_string, value) argument = bm_extract.coerce_string(argument, otherwise = "") if value: return argument.join(value)
def Filter(self, name, argument, value): """ If (and only if) value is None, use given default. Otherwise, use the value. Note that if an empty string is given, the default value will not be used. Use the default filter if you want to fallback for empty strings. For example: {{ value|default_if_none:"nothing" }} If value is None, the output will be the string "nothing". """ if value == None: return bm_extract.coerce_string(argument, otherwise = "") else: return value
def ScrubLinks(self, itemd): links = bm_extract.as_list(itemd, "links") if links: nlinks = [] for linkd in links: link_href = bm_extract.coerce_string(linkd) if not link_href: link_href = bm_extract.as_string(linkd, "href") if not link_href: continue nlinkd = { "@href" : link_href, } for key in [ "rel", "type", "hreflang", "title", "length", ]: value = bm_extract.as_string(linkd, key) if value: nlinkd["@" + key] = value nlinks.append(nlinkd) links = nlinks link = bm_extract.as_string(itemd, "link") if link: found = False for linkd in links: if link == bm_extract.as_string(linkd, "@href"): found = True break if not found: links.append({ "@href" : link, "@rel" : "alternate", }) for key in [ "link", "links" ]: try: del itemd[key] except: pass if links: itemd["link"] = links
def Filter(self, name, argument, value): """ If value evaluates to False, use given default. Otherwise, use the value. For example: {{ value|default:"nothing" }} If value is "" (the empty string), the output will be nothing. """ if value == "": return "" bvalue = bm_extract.coerce_bool(value, otherwise = False) if bvalue: return value else: return bm_extract.coerce_string(argument, otherwise = "")
def as_string(self, path, **ad): return bm_extract.coerce_string(self.get(path), **ad)
def ScrubEntry(self, itemd): if bm_extract.is_dict(itemd): nd = {} seen_html = False seen_rss = False seen_url = False for key, value in itemd.iteritems(): if self.AtomLike(): if key == "link": key = "htmlUrl" elif key == "feeds": key = "rssUrl" elif key == "content": key = "description" elif key == "title": key = "text" elif key == "category": key = "tags" value = ", ".join(map(lambda d: d["term"], value)) elif key == "links": for ld in bm_extract.coerce_list(value): if bm_extract.as_string(ld, "rel") == "alternate": key = "rssUrl" value = bm_extract.as_string(ld, "href") # # datetimes (?) # try: created = itemd.pop("created") itemd["created"] = bm_extract.coerce_datetime(created, otherwise = created, rfc822 = True) except KeyError: pass if key == "rssUrl": value = self.FirstInListLikeObject(value, value) if value == None: continue seen_rss = True elif key == "htmlUrl": value = self.FirstInListLikeObject(value, value) if value == None: continue seen_html = True elif key == "url": seen_url = True if key in [ "items", "outline" ]: nd["outline"] = self.ScrubEntry(value) elif value == None: pass elif bm_extract.is_atomic(value): nd['@%s' % key] = value if seen_rss: nd.setdefault("@type", "rss") elif seen_html: nd.setdefault("@type", "link") elif seen_url: nd.setdefault("@type", "link") nd.setdefault("@text", "") return nd elif bm_extract.is_atomic(itemd): return { "@title" : bm_extract.coerce_string(itemd) } elif bm_extract.is_list(itemd) or bm_extract.is_list_like(itemd): return map(self.ScrubEntry, itemd) return itemd
def Filter(self, name, argument, value): value = bm_extract.coerce_string(value) if argument: return value.split(argument) else: return value.split()
def Filter(self, name, argument, value): value = bm_extract.coerce_string(value) value = json.dumps(value, cls = bm_work.IterEncoder)[1:-1] return value
def Execute(self, context): import bm_extract id = self.id if id == ID_NAME: ## Log("HERE:B", value = self.value, result = context.get(self.value)) return context.get(self.value) elif id == ID_LITERAL_STRING: return self.value elif id == ID_LITERAL_NUMBER: if self.value.find('.') > -1: return bm_extract.coerce_float(self.value) else: return bm_extract.coerce_int(self.value) elif id == ID_LITERAL_TRUE: return True elif id == ID_LITERAL_FALSE: return False elif id == ID_LITERAL_NONE: return False elif id == '==': return self.first.Execute(context) == self.second.Execute(context) elif id == '!=': return self.first.Execute(context) != self.second.Execute(context) elif id == '<': ## Log("HERE:XXX", ## first = self.first.Execute(context), ## second = self.second.Execute(context), ## ) return self.first.Execute(context) < self.second.Execute(context) elif id == '<=': return self.first.Execute(context) <= self.second.Execute(context) elif id == '>': return self.first.Execute(context) > self.second.Execute(context) elif id == '>=': return self.first.Execute(context) >= self.second.Execute(context) elif id == '+': if self.second == None: return self.first.Execute(context) return self.first.Execute(context) + self.second.Execute(context) elif id == '-': if self.second == None: return -self.first.Execute(context) return self.first.Execute(context) - self.second.Execute(context) elif id == '.': ## key = "%s.%s" % ( self.first.Literal(context), self.second.Literal(context), ) ## value = context.get(key) ## print "<%s>/<%s>" % ( key, value ) return context.get("%s.%s" % ( self.first.Literal(context), self.second.Literal(context), )) elif id == '[': ## key = "%s[%s]" % ( self.first.Literal(context), self.second.Literal(context), ) ## value = context.get(key) ## print >> sys.stderr, "!!! <%s>/<%s>" % ( key, value ) return context.get("%s[%s]" % ( self.first.Literal(context), self.second.Literal(context), )) elif id == 'and': a = bm_extract.coerce_bool(self.first.Execute(context)) b = bm_extract.coerce_bool(self.second.Execute(context)) return a and b elif id == 'or': a = bm_extract.coerce_bool(self.first.Execute(context)) b = bm_extract.coerce_bool(self.second.Execute(context)) return a or b elif id == 'not': a = bm_extract.coerce_bool(self.first.Execute(context)) return not a elif id == '(': if len(self.second) != 1: raise NotImplementedError, self.id + ": functions take exactly one argument" if self.first.value == "int": b = bm_extract.coerce_int(self.second[0].Execute(context)) ## Log("HERE:A", b = b, first = self.first, second = self.second[0], valuewas = self.second[0].Execute(context)) elif self.first.value == "string": b = bm_extract.coerce_string(self.second[0].Execute(context)) elif self.first.value == "bool": b = bm_extract.coerce_bool(self.second[0].Execute(context)) elif self.first.value == "float": b = bm_extract.coerce_float(self.second[0].Execute(context)) else: raise NotImplementedError, self.id + ": function can only be int|string|bool|float" return b else: print self.id, self.first, self.second, self.third raise NotImplementedError, self.id
def TranscribeNode(self, e_node, o): """Convert a dictionary into XML""" if bm_extract.is_list(o): for sub in o: self.TranscribeNode(e_node, sub) elif bm_extract.is_dict(o): # # Get the attributes # ad = {} for key, sub in o.iteritems(): if key.startswith("@@") or key.find(":@@") > -1: continue if key == '@': e_node.text = bm_extract.coerce_string(sub, separator = ",") continue if key.startswith("@") or key.find(":@") > -1: if bm_extract.is_atomic(sub): e_node.set(key.replace('@', ''), bm_extract.coerce_string(sub)) ## ad[key.replace('@', '')] = bm_extract.coerce_string(sub) elif bm_extract.is_list(sub) or bm_extract.is_list_like(sub): e_node.set(key.replace('@', ''), bm_extract.coerce_string(sub, separator = ",")) ## ad[key.replace('@', '')] = bm_extract.coerce_string(sub, separator = ",") # # Note here that: # - @@ means an attribute it hidden # - @ are attributes, and are found in the previous step # - lists are processed specially, as they result in repeated children # for key, sub in o.iteritems(): if key.startswith("@@") or key.find(":@@") > -1: continue if key.startswith("@") or key.find(":@") > -1: continue # # # if bm_extract.is_list_like(sub): sub = list(sub) if bm_extract.is_list(sub): any = False for subsub in sub: any = True e_child = ElementTree.SubElement(e_node, key) self.TranscribeNode(e_child, subsub) if any: continue sub = None # # # e_child = ElementTree.SubElement(e_node, key) self.TranscribeNode(e_child, sub) elif bm_extract.is_list_like(o): for sub in list(o): self.TranscribeNode(e_node, sub) elif bm_extract.is_none(o): pass else: if e_node.text: e_node.text += "\n" e_node.text += bm_extract.coerce_string(o) else: e_node.text = bm_extract.coerce_string(o)
def ScrubEntry(self, itemd): """Make sure we look like an RSS entry""" # # Look for known items and namespaced items # nd, xd = self.Separate(itemd, self._known_item, "rss") # # atom links # try: links = xd.pop('links') if links: nd["atom:links"] = links # # default an RSS value # if not nd.get("link"): ld = dict([ ( l["rel"], l ) for l in links ]) v = ld.get("alternate") or ld.get("self") if v: nd["link"] = v["href"] except KeyError: pass # # author.uri # try: value = bm_extract.as_string(xd, 'author.uri') if value: nd["source"] = value except KeyError: pass # # author # try: value = xd.pop('author') if value: value = bm_extract.coerce_string(value) if value: nd["atom:author"] = value nd["dc:creator"] = value except KeyError: pass # # atom published/updated # 'updated': '2009-01-09T12:20:02+00:00'} # for key in [ 'updated', 'published' ]: # # atom updated / published # 'updated': '2009-01-09T12:20:02+00:00'} # try: value = xd.pop('%s' % key) if value: nd["atom:%s" % key] = value except KeyError: pass # # default a pubDate # if not nd.get("pubDate"): dts = nd.get("atom:updated") or nd.get("atom:published") if dts: try: import dateutil.parser dt = dateutil.parser.parse(dts) if dt: nd["pubDate"] = dt.strftime("%a, %d %b %Y %H:%M:%S %z") except: Log("date could not be parsed - maybe a missing module?", exception = True, dts = dts) # # Our fake composite value, body # try: value = xd.pop("body") if value: nd["description"] = value except KeyError: pass # # Atom content # try: value = xd.pop("content") if value: nd.setdefault("description", value) nd["atom:content"] = value except KeyError: pass # # Atom summary # try: value = xd.pop("summary") if value: nd.setdefault("description", value) nd["atom:summary"] = value except KeyError: pass # # Atom ID # try: value = xd.pop("id") if value: nd.setdefault("guid", value) nd["atom:id"] = value except KeyError: pass # # Required item elements # nd.setdefault("title", ""); nd.setdefault("link", "#"); nd.setdefault("description", ""); # # Remaining items # if xd: for key, item in xd.iteritems(): nd["unknown:%s" % key] = item return nd