def _activity_prepare(c, activities): ''' Creates a list of (when, field, removed, added, inconsistent) pairs. It groups those activities that happened at the same time on the same field. The list is sorted descending by when. ''' d = defaultdict(lambda: defaultdict(list)) for act in activities: what = act['what'] when = act['when'] d[when][what].append(act) res = [] for when in d: for what in d[when]: acts = d[when][what] field = c.fieldmap[what] fieldtype = c.get_field_property('type', field) container = c.get_field_property('container', field) try: if container: # we must group the added and removed added = [cast_to_list(act['added'], fieldtype) for act in acts] added = sum(added, []) removed = [cast_to_list(act['removed'], fieldtype) for act in acts] removed = sum(removed, []) else: # there should be only one activity for this, # otherwise it is corrupted removed = [type_cast(act['removed'], fieldtype) for act in acts] added = [type_cast(act['added'], fieldtype) for act in acts] res.append((when, field, removed, added, False)) except: added = [act['added'] for act in acts] removed = [act['removed'] for act in acts] res.append((when, field, removed, added, True)) res.sort(reverse=True) return res
def save_commit(self, commit, uri): c = commit if c.type != 'commit': raise TypeError("Expected 'commit' type objects. Got (%s)" % c.type) msg = c.message acked_by = acked_by_re.findall(msg) signed_off_by = signed_off_by_re.findall(msg) resolves = resolves_re.findall(msg) related = related_re.findall(msg) obj = { 'uri': uri, 'hexsha': c.hexsha, 'parents': [t.hexsha for t in c.parents], 'author_name': c.author.name, 'author_email': c.author.email, 'author_tz_offset': c.author_tz_offset, 'authored_dt': c.authored_date, 'committer_name': c.committer.name, 'committer_email': c.committer.email, 'committer_tz_offset': c.committer_tz_offset, 'committed_dt': c.committed_date, # FIXME: Can this be sped up? # These commented out are all very slow #'count': c.count(), #'name_rev': c.name_rev, #'size': c.size, #'files': dict([(k.replace('.', '%2E'), v) for k, v in c.stats.files.iteritems()]), # stats is very slow too; but it's useful... 'stats': c.stats.total, 'summary': c.summary, 'message': msg, 'acked_by': acked_by, 'signed_off_by': signed_off_by, 'resolves': resolves, 'related': related, } _obj = obj.copy() for f, v in _obj.iteritems(): convert = self.get_field_property('convert', f) _type = self.get_field_property('type', f) if convert: v = convert(v) obj[f] = type_cast(v, _type) return save_object2(self.name, obj)
def save_commit(self, commit, uri): c = commit if c.type != "commit": raise TypeError("Expected 'commit' type objects. Got (%s)" % c.type) msg = c.message acked_by = acked_by_re.findall(msg) signed_off_by = signed_off_by_re.findall(msg) resolves = resolves_re.findall(msg) related = related_re.findall(msg) obj = { "uri": uri, "hexsha": c.hexsha, "parents": [t.hexsha for t in c.parents], "author_name": c.author.name, "author_email": c.author.email, "author_tz_offset": c.author_tz_offset, "authored_dt": c.authored_date, "committer_name": c.committer.name, "committer_email": c.committer.email, "committer_tz_offset": c.committer_tz_offset, "committed_dt": c.committed_date, # FIXME: Can this be sped up? # These commented out are all very slow #'count': c.count(), #'name_rev': c.name_rev, #'size': c.size, #'files': dict([(k.replace('.', '%2E'), v) for k, v in c.stats.files.iteritems()]), # stats is very slow too; but it's useful... "stats": c.stats.total, "summary": c.summary, "message": msg, "acked_by": acked_by, "signed_off_by": signed_off_by, "resolves": resolves, "related": related, } _obj = obj.copy() for f, v in _obj.iteritems(): convert = self.get_field_property("convert", f) _type = self.get_field_property("type", f) if convert: v = convert(v) obj[f] = type_cast(v, _type) return save_object2(self.name, obj)
def _get_row(self, row, field, convert, token_type): # id 'column' is expected first id = row[0] # and raw token 'lookup' second raw = row[1] if type(raw) is date: # force convert dates into datetimes... otherwise mongo barfs raw = datetime.combine(raw, dt_time()).replace(tzinfo=UTC) # convert based on driver defined conversion method # and cast to appropriate data type if convert: tokens = convert(self, raw) else: tokens = raw tokens = type_cast(tokens, token_type) return {'id': id, 'field': field, 'tokens': tokens}
def _get_row(self, row, field, convert, token_type): # id 'column' is expected first id = row[0] # and raw token 'lookup' second raw = row[1] if type(raw) is date: # force convert dates into datetimes... otherwise mongo barfs raw = datetime.combine(raw, dt_time()).replace(tzinfo=UTC) # convert based on driver defined conversion method # and cast to appropriate data type if convert: tokens = convert(self, raw) else: tokens = raw tokens = type_cast(tokens, token_type) return {'id': id, 'field': field, 'tokens': tokens}
def _extract_func(cube, field, **kwargs): c = get_cube(cube) # id_x if None will become ObjectID() id_x = c.get_field_property('id_x', field) # raw_x if None will become field raw_x = c.get_field_property('raw_x', field, field) # convert if None will skip convert step convert = c.get_field_property('convert', field) # _type will be default if not set _type = c.get_field_property('type', field) saved = 0 failed = [] for item in c._reader: if not item: continue try: id_ = id_x(item) except TypeError: id_ = item[id_x] try: raw = raw_x(item) except TypeError: raw = item[raw_x] tokens = type_cast(raw, _type) if convert: tokens = convert(tokens) saved += save_doc(c.name, field, tokens, id_) if not saved: failed.append(id_) result = {'saved': saved} if failed: result.update({'failed_ids': failed}) return result
def _extract_func(cube, field, **kwargs): c = get_cube(cube) # id_x if None will become ObjectID() id_x = c.get_field_property('id_x', field) # raw_x if None will become field raw_x = c.get_field_property('raw_x', field, field) # convert if None will skip convert step convert = c.get_field_property('convert', field) # _type will be default if not set _type = c.get_field_property('type', field) saved = 0 failed = [] for item in c._reader: if not item: continue try: id_ = id_x(item) except TypeError: id_ = item[id_x] try: raw = raw_x(item) except TypeError: raw = item[raw_x] tokens = type_cast(raw, _type) if convert: tokens = convert(tokens) saved += save_doc(c.name, field, tokens, id_) if not saved: failed.append(id_) result = {'saved': saved} if failed: result.update({'failed_ids': failed}) return result
def cast_to_list(value, fieldtype): if value is None: return [] value = type_cast([s.strip() for s in value.split(',')], fieldtype) return value if (type(value) is list) else [value]