def unix(value): if not _convert: _late_import() if isinstance(value, (date, builtin_datetime)): pass elif value < 10000000000: value = _convert.unix2datetime(value) else: value = _convert.milli2datetime(value) return str(_convert.datetime2unix(value))
def _scrub(value, is_done): type = value.__class__ if type in (NoneType, NullType): return None elif type in (date, datetime): return float(datetime2unix(value)) elif type is timedelta: return value.total_seconds() elif type is Date: return float(value.unix) elif type is Duration: return value.seconds elif type is str: return utf82unicode(value) elif type is Decimal: return float(value) elif isinstance(value, Mapping): _id = id(value) if _id in is_done: Log.error("possible loop in structure detected") is_done.add(_id) output = {} for k, v in value.iteritems(): if not isinstance(k, basestring): Log.error("keys must be strings") v = _scrub(v, is_done) if v != None: output[k] = v is_done.discard(_id) return output elif type in (list, DictList): output = [] for v in value: v = _scrub(v, is_done) output.append(v) return output elif type.__name__ == "bool_": # DEAR ME! Numpy has it's own booleans (value==False could be used, but 0==False in Python. DOH!) if value == False: return False else: return True elif hasattr(value, '__json__'): try: output = json._default_decoder.decode(value.__json__()) return output except Exception, e: Log.error("problem with calling __json__()", e)
def get_changesets(date_range=None, revision_range=None, repo=None): # GET ALL CHANGESET INFO args = [ "hg", "log", "--cwd", File(repo.directory).filename, "-v", # "-p", # TO GET PATCH CONTENTS "--style", TEMPLATE_FILE.filename ] if date_range is not None: if date_range.max == None: if date_range.min == None: drange = ">0 0" else: drange = ">" + unicode(convert.datetime2unix(date_range.min)) + " 0" else: if date_range.min == None: drange = "<" + unicode(convert.datetime2unix(date_range.max) - 1) + " 0" else: drange = unicode(convert.datetime2unix(date_range.min)) + " 0 to " + unicode(convert.datetime2unix(date_range.max) - 1) + " 0" args.extend(["--date", drange]) if revision_range is not None: args.extend(["-r", str(revision_range.min) + ":" + str(revision_range.max)]) proc = subprocess.Popen( args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1 ) def iterator(): try: while True: try: line = proc.stdout.readline() if line == '': proc.wait() if proc.returncode: Log.error("Unable to pull hg log: return code {{return_code}}", { "return_code": proc.returncode }) return except Exception, e: Log.error("Problem getting another line", e) if line.strip() == "": continue Log.note(line) # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n" # branch = "{branch}%0A" # parent = "{parent}%0A" # tag = "{tag}%0A" # child = "{child}%0A" ( date, node, rev, author, branches, files, file_adds, file_dels, p1rev, p1node, parents, children, tags, desc ) = (urllib.unquote(c) for c in line.split("\t")) file_adds = set(file_adds.split("\n")) - {""} file_dels = set(file_dels.split("\n")) - {""} files = set(files.split("\n")) - set() doc = { "repos": repo.name, "date": convert.unix2datetime(convert.value2number(date.split(" ")[0])), "node": node, "revision": rev, "author": author, "branches": set(branches.split("\n")) - {""}, "file_changes": files - file_adds - file_dels - {""}, "file_adds": file_adds, "file_dels": file_dels, "parents": set(parents.split("\n")) - {""} | {p1rev+":"+p1node}, "children": set(children.split("\n")) - {""}, "tags": set(tags.split("\n")) - {""}, "description": desc } doc = elasticsearch.scrub(doc) yield doc except Exception, e: if isinstance(e, ValueError) and e.message.startswith("need more than "): Log.error("Problem iterating through log ({{message}})", { "message": line }, e) Log.error("Problem iterating through log", e)
def _scrub(value, is_done): type_ = value.__class__ if type_ in (NoneType, NullType): return None elif type_ is unicode: value_ = value.strip() if value_: return value_ else: return None elif type_ is float: if math.isnan(value) or math.isinf(value): return None return value elif type_ in (int, long, bool): return value elif type_ in (date, datetime): return float(datetime2unix(value)) elif type_ is timedelta: return value.total_seconds() elif type_ is Date: return float(value.unix) elif type_ is Duration: return float(value.seconds) elif type_ is str: return utf82unicode(value) elif type_ is Decimal: return float(value) elif type_ is Dict: return _scrub(unwrap(value), is_done) elif isinstance(value, Mapping): _id = id(value) if _id in is_done: _Log.warning("possible loop in structure detected") return '"<LOOP IN STRUCTURE>"' is_done.add(_id) output = {} for k, v in value.iteritems(): if isinstance(k, basestring): pass elif hasattr(k, "__unicode__"): k = unicode(k) else: _Log.error("keys must be strings") v = _scrub(v, is_done) if v != None or isinstance(v, Mapping): output[k] = v is_done.discard(_id) return output elif type_ in (tuple, list, DictList): output = [] for v in value: v = _scrub(v, is_done) output.append(v) return output elif type_ is type: return value.__name__ elif type_.__name__ == "bool_": # DEAR ME! Numpy has it's own booleans (value==False could be used, but 0==False in Python. DOH!) if value == False: return False else: return True elif hasattr(value, '__json__'): try: output = json._default_decoder.decode(value.__json__()) return output except Exception, e: _Log.error("problem with calling __json__()", e)
def get_changesets(date_range=None, revision_range=None, repo=None): # GET ALL CHANGESET INFO args = [ "hg", "log", "--cwd", File(repo.directory).filename, "-v", # "-p", # TO GET PATCH CONTENTS "--style", TEMPLATE_FILE.filename ] if date_range is not None: if date_range.max == None: if date_range.min == None: drange = ">0 0" else: drange = ">" + unicode(convert.datetime2unix( date_range.min)) + " 0" else: if date_range.min == None: drange = "<" + unicode( convert.datetime2unix(date_range.max) - 1) + " 0" else: drange = unicode(convert.datetime2unix( date_range.min)) + " 0 to " + unicode( convert.datetime2unix(date_range.max) - 1) + " 0" args.extend(["--date", drange]) if revision_range is not None: args.extend( ["-r", str(revision_range.min) + ":" + str(revision_range.max)]) proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1) def iterator(): try: while True: try: line = proc.stdout.readline() if line == '': proc.wait() if proc.returncode: Log.error( "Unable to pull hg log: return code {{return_code}}", {"return_code": proc.returncode}) return except Exception, e: Log.error("Problem getting another line", e) if line.strip() == "": continue Log.note(line) # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n" # branch = "{branch}%0A" # parent = "{parent}%0A" # tag = "{tag}%0A" # child = "{child}%0A" (date, node, rev, author, branches, files, file_adds, file_dels, p1rev, p1node, parents, children, tags, desc) = (urllib.unquote(c) for c in line.split("\t")) file_adds = set(file_adds.split("\n")) - {""} file_dels = set(file_dels.split("\n")) - {""} files = set(files.split("\n")) - set() doc = { "repos": repo.name, "date": convert.unix2datetime( convert.value2number(date.split(" ")[0])), "node": node, "revision": rev, "author": author, "branches": set(branches.split("\n")) - {""}, "file_changes": files - file_adds - file_dels - {""}, "file_adds": file_adds, "file_dels": file_dels, "parents": set(parents.split("\n")) - {""} | {p1rev + ":" + p1node}, "children": set(children.split("\n")) - {""}, "tags": set(tags.split("\n")) - {""}, "description": desc } doc = elasticsearch.scrub(doc) yield doc except Exception, e: if isinstance( e, ValueError) and e.message.startswith("need more than "): Log.error("Problem iterating through log ({{message}})", {"message": line}, e) Log.error("Problem iterating through log", e)