def reindexItems(): items = {} fetchedItems = yield db.get_range_slice('items', count=10000, reverse=True) for row in fetchedItems: items[row.key] = utils.supercolumnsToDict(row.columns) log.msg("Total items:", len(fetchedItems)) for i, row in enumerate(fetchedItems): itemId = row.key item = items[itemId] log.msg(i+1, itemId) if 'meta' not in item or 'owner' not in item['meta']: continue owner = item['meta']['owner'] try: col = yield db.get(owner, "entities", "org", "basic") ownerOrgId = col.column.value except: log.msg("Error when indexing:", itemId) continue parentId = item['meta'].get('parent', None) if not parentId: yield search.solr.updateItemIndex(itemId, item, ownerOrgId) else: yield search.solr.updateItemIndex(itemId, item, ownerOrgId, conv=items[parentId])
def reindexProfileContent(): rows = yield db.get_range_slice('entities', count=1000) for row in rows: entityId = row.key log.msg(entityId) entity = Entity(entityId, utils.supercolumnsToDict(row.columns)) if entity.basic.get('type', '') == 'user': orgId = entity.basic.get('org', '') if orgId: yield search.solr.updatePeopleIndex(entityId, entity, orgId)
def updateData(): convIds = set() rows = yield db.get_range_slice('item_files', count=1000) for row in rows: convId = row.key convIds.add(convId) attachments = utils.supercolumnsToDict(row.columns) for attachmentId in attachments: for timeuuid in attachments[attachmentId]: encodedTimeUUID, aid, name, size, ftype = attachments[attachmentId][timeuuid].split(':') yield db.insert(attachmentId, "attachmentVersions", "%s:%s:%s:%s" %(aid, name, size, ftype), timeuuid) rows = yield db.get_range_slice('items', count=10000) for row in rows: itemId = row.key item = utils.supercolumnsToDict(row.columns) attachments = {} for attachmentId in item.get('attachments', {}): if len(item['attachments'][attachmentId].split(':')) == 4: x,name, size, ftype = item['attachments'][attachmentId].split(':') attachments[attachmentId] = "%s:%s:%s" %(name, size, ftype) if attachments: yield db.remove(itemId, 'items', super_column='attachments') yield db.batch_insert(itemId, "items", {"attachments": attachments}) rows = yield db.get_range_slice('mConversations', count=10000) for row in rows: messageId = row.key message = utils.supercolumnsToDict(row.columns) attachments = {} print messageId for attachmentId in message.get('attachments', {}): if len(message['attachments'][attachmentId].split(':')) == 4: x,name, size, ftype = message['attachments'][attachmentId].split(':') attachments[attachmentId] = "%s:%s:%s" %(name, size, ftype) if attachments: yield db.remove(messageId, 'mConversations', super_column='attachments') yield db.batch_insert(messageId, "mConversations", {"attachments": attachments})
def updateData(): yield db.truncate('user_files') try: yield db.get('asdf', 'entityFeed_files', uuid.uuid1().bytes) except ttypes.InvalidRequestException as exception: log.msg(exception) raise Exception('entityFeed_files CF missing, create the CF') except ttypes.NotFoundException: pass entities = {} items = {} rows = yield db.get_range_slice('items', count=10000, reverse=True) for row in rows: itemId = row.key item = utils.supercolumnsToDict(row.columns) items[itemId]=item for itemId in items: item = items[itemId] log.msg(itemId) if 'meta' not in item: continue # Add org to all items try: owner = item['meta']['owner'] col = yield db.get(owner, "entities", 'org', 'basic') ownerOrgId = col.column.value yield db.insert(itemId, 'items', ownerOrgId, 'org', 'meta') except Exception as e: if item['meta'].get('type', '') == 'feedback': yield db.insert(itemId, 'items', owner, 'org', 'meta') # Fix ACLs if 'parent' not in item['meta']: acl = item['meta']['acl'] convOwner = item['meta']['owner'] convId = itemId if acl == 'company': col = yield db.get(convOwner, "entities", "org", "basic") ownerOrgId = col.column.value acl = pickle.dumps({"accept":{"orgs":[ownerOrgId]}}) yield db.insert(convId, 'items', acl, 'acl', 'meta') else: try: acl = pickle.loads(acl) if 'accept' in acl and 'friends' in acl['accept'] and isinstance(acl['accept']['friends'], bool): del acl['accept']['friends'] acl = pickle.dumps(acl) yield db.insert(convId, 'items', acl, 'acl', 'meta') except : log.msg('cannot unpack acl', acl) # Migrate files # truncate user_files # update user_files and entityFeed_files if 'owner' in item['meta'] and 'attachments' in item: ownerId = item['meta']['owner'] if ownerId not in entities: cols = yield db.get_slice(ownerId, 'entities', ['basic']) entities.update({ownerId: utils.supercolumnsToDict(cols)}) for attachmentId in item['attachments']: orgId = entities[ownerId]['basic']['org'] timeuuid, name = item['attachments'][attachmentId].split(':')[:2] timeuuid = utils.decodeKey(timeuuid) val = '%s:%s:%s:%s' % (attachmentId, name, itemId, ownerId) yield db.insert(ownerId, "user_files", val, timeuuid) if 'parent' not in item['meta'] and item['meta'].get('acl', ''): _entities = yield utils.expandAcl(ownerId, orgId, item['meta']['acl'], itemId, ownerId, True) for entityId in _entities: yield db.insert(entityId, "entityFeed_files", val, timeuuid) # Migrate items # Meta fields in "link", "event" and "poll" if item['meta'].get('type', None) in ['link', 'poll', 'event']: itemMeta = item['meta'] itemType = itemMeta['type'] updated = {} if itemType == "link": if 'url' in itemMeta: updated['link_url'] = itemMeta['url'] if 'title' in itemMeta: updated['link_title'] = itemMeta['title'] if 'summary' in itemMeta: updated['link_summary'] = itemMeta['summary'] if 'imgSrc' in itemMeta: updated['link_imgSrc'] = itemMeta['imgSrc'] if 'embedType' in itemMeta: updated['link_embedType'] = itemMeta['embedType'] if 'embedSrc' in itemMeta: updated['link_embedSrc'] = itemMeta['embedSrc'] if 'embedHeight' in itemMeta: updated['link_embedHeight'] = itemMeta['embedHeight'] if 'embedWidth' in itemMeta: updated['link_embedWidth'] = itemMeta['embedWidth'] elif itemType == 'poll': if 'question' in itemMeta: updated['comment'] = itemMeta['question'] else: print 'Found an event:', itemId if updated: yield db.batch_insert(itemId, 'items', {'meta': updated}) # # Create poll indexes for feed and userItems # rows = yield db.get_range_slice('entities', count=10000, reverse=True) mutations = {} for row in rows: entityId = row.key entity = utils.supercolumnsToDict(row.columns) if entity['basic']['type'] != 'user': continue d1 = db.get_slice(entityId, 'feed', count=10000) d2 = db.get_slice(entityId, 'userItems', count=10000) results = yield d1 for col in results: value = col.column.value if value in items: if items.get(value, {}).get('meta', {}).get('type', '') == 'poll': mutations.setdefault(entityId, {}).setdefault('feed_poll', {}).update({col.column.name: value}) results = yield d2 for col in results: value = col.column.value responseType, itemId, convId, convType, others = value.split(':', 4) if convType == 'poll': mutations.setdefault(entityId, {}).setdefault('userItems_poll', {}).update({col.column.name: value}) yield db.batch_mutate(mutations) #Group type changed from public-private to open-closed. rows = yield db.get_range_slice('entityGroupsMap', count=1000) groupIds = set() for row in rows: for col in row.columns: name_, groupId = col.column.name.split(':') groupIds.add(groupId) cols = yield db.multiget_slice(groupIds, "entities") groups = utils.multiSuperColumnsToDict(cols) for groupId in groups: access = groups[groupId]['basic']['access'].lower() if access == 'public': yield db.insert(groupId, 'entities', 'open', 'access', 'basic') elif access.lower() == 'private': yield db.insert(groupId, 'entities', 'closed', 'access', 'basic') #Fix entityGroupsMap rows = yield db.get_range_slice('entityGroupsMap', count=1000) for row in rows: entityId = row.key for col in row.columns: name_, groupId = col.column.name.split(':') if col.column.name != '%s:%s'%(groups[groupId]['basic']['name'].lower(), groupId): yield db.remove(entityId, 'entityGroupsMap', col.column.name) yield db.insert(entityId, 'entityGroupsMap', '', '%s:%s' %(groups[groupId]['basic']['name'].lower(), groupId))
def getNewUserCount(startDate, endDate, count=100, column_count=100, mail_to=''): frm_to = startDate + ' ' + endDate startDate = datetime.datetime.strptime(startDate, dateFormat) endDate = datetime.datetime.strptime(endDate, dateFormat) if endDate <= startDate: log.msg("end-date should be later than start-date") raise Exception("end-date should be later than start-date") startTime = time.mktime(startDate.timetuple()) endTime = time.mktime(endDate.timetuple()) toFetchCount = count +1 toFetchColumnCount = column_count +1 new_domains = [] start = '' stats = {} data = {} while 1: domains = yield db.get_range_slice('domainOrgMap', count=toFetchCount, start=start) for row in domains[:count]: domain = row.key for col in row.columns[:count]: if domain not in data.setdefault(col.column.name, {}).setdefault("domain", []): data[col.column.name]["domain"].append((domain, col.column.timestamp/1e6)) column_timestamp = col.column.timestamp/1000000.0 if column_timestamp < endTime and column_timestamp >= startTime: if domain not in new_domains: new_domains.append(domain) if len(domains) < toFetchCount: break else: start = domains[-1].key stats = {frm_to: {"newDomains":new_domains, "newDomainCount": len(new_domains) }} start = '' new_users = {} usersOrgMap = {} totalNewUsers = 0 totalUsers ={} while 1: users = yield db.get_range_slice('orgUsers', start=start, count=toFetchCount, column_count=toFetchColumnCount) for row in users[:count]: orgId = row.key totalUsers[orgId] = 0 for col in row.columns[:column_count]: userId = col.column.name usersOrgMap[userId] = orgId if userId not in data.setdefault(orgId, {}).setdefault("users", {}): data[orgId]['users'][userId] = {"newItems":0, "items":0} column_timestamp = col.column.timestamp/1000000.0 if column_timestamp < endTime and column_timestamp >= startTime: if col.column.name not in new_users.setdefault(orgId, []): new_users[orgId].append(userId) if column_timestamp < endTime: totalUsers[orgId] +=1 if len(row.columns) == toFetchColumnCount: column_start = row.columns[-1].column.name while 1: _users = yield db.get_range_slice('orgUsers', count=1, start=orgId, column_start=column_start, column_count=toFetchColumnCount) for col in _users[0].columns[:column_count]: userId = col.column.name usersOrgMap[userId] = orgId if userId not in data.setdefault(orgId, {}).setdefault("users", {}): data[orgId]['users'][userId] = {'newItems':0, 'items':0} column_timestamp = col.column.timestamp/1000000.0 if column_timestamp < endTime and column_timestamp >= startTime: if col.column.name not in new_users[orgId]: new_users[orgId].append(userId) if column_timestamp < endTime: totalUsers[orgId] +=1 if len(_users[0].columns) == toFetchColumnCount: column_start = _users[0].columns[-1].column.name else: break totalNewUsers += len(new_users.get(orgId, [])) if len(users) < toFetchCount: break else: start = users[-1].key stats[frm_to]["signups"] = totalNewUsers start = '' while 1: rows = yield db.get_range_slice('userItems', start=start, count=toFetchCount, column_count = toFetchColumnCount) for row in rows[:count]: userId = row.key for col in row.columns[:column_count]: if userId not in usersOrgMap: data['no-org'] = {"users":{userId:{"items": 0, "newItems": 0}}} orgId = 'no-org' else: orgId = usersOrgMap[userId] if userId not in data[orgId]['users'] : data[orgId]['users'] = {'items': 0 , 'newItems': 0} column_timestamp = col.column.timestamp/1000000.0 if column_timestamp < endTime and column_timestamp >= startTime: data[orgId]['users'][userId]['newItems'] += 1 if column_timestamp < endTime: data[orgId]['users'][userId]['items'] += 1 if len(row.columns) == toFetchColumnCount: cstart = row.columns[-1].column.name while 1: userItems = yield db.get_range_slice('userItems', count=1, start=userId, column_start= cstart, column_count= toFetchColumnCount) for col in userItems[0].columns[:column_count]: column_timestamp = col.column.timestamp/1000000.0 if column_timestamp < endTime and column_timestamp >= startTime: data[orgId]['users'][userId]['newItems'] += 1 #if userId in data[orgId]['users'] : if column_timestamp < endTime: data[orgId]['users'][userId]['items'] += 1 if len(userItems[0].columns) == toFetchColumnCount: cstart = userItems[0].columns[-1].column.name else: break if len(rows) < toFetchCount: break else: start = rows[-1].key stats["domain"] = OrderedDict() sortedOrgIds = sorted(data, key=lambda x: data[x]["domain"][0][1]) for orgId in sortedOrgIds: domainName = ",".join([x[0] for x in data[orgId]['domain']]) stats["domain"][domainName] = {} stats["domain"][domainName]["newUsers"] = len(new_users.get(orgId, [])) stats["domain"][domainName]["totalUsers"] = totalUsers.get(orgId, 0) stats["domain"][domainName]["newItems"] = sum([data[orgId]['users'][x]['newItems'] for x in data[orgId].get('users', {})]) stats["domain"][domainName]["items"] = sum([data[orgId]['users'][x]['items'] for x in data[orgId].get('users', {})]) if not mail_to: print pprint.pprint(stats) subject = "Stats: %s to %s" % (startDate.strftime(dateFormat), endDate.strftime(dateFormat)) textPart = repr(stats) rootUrl = config.get('General', 'URL') brandName = config.get('Branding', 'Name') htmlPart = getBlock("emails.mako", "html_stats", **{"stats":stats, "frm_to": frm_to, 'rootUrl': rootUrl, 'brandName': brandName}) for mailId in mail_to: yield utils.sendmail(mailId, subject, textPart, htmlPart)
def migrateFriendsToFollowers(): # Migrate all friends to followers/subscriptions. connectionRows = yield db.get_range_slice('connections', count=10000) for connectionRow in connectionRows: userId = connectionRow.key friends = [x.super_column.name for x in connectionRow.columns] yield db.batch_insert(userId, "followers", dict([(x, '') for x in friends])) yield db.batch_mutate(dict([(x, {'subscriptions': {userId: ''}}) for x in friends])) log.msg('>>>>>>>> Converted all connections to following.') # Remove name indices of friends entityRows = yield db.get_range_slice('entities', count=10000, names=['basic']) entities = dict([(x.key, utils.supercolumnsToDict(x.columns)) for x in entityRows]) userIds = [x for x in entities.keys() if entities[x]['basic']['type'] == 'user'] for userId in userIds: yield db.remove(userId, 'displayNameIndex') yield db.remove(userId, 'nameIndex') log.msg('>>>>>>>> Removed name indices for friends.') # Convert all "connection" activity to "follow". # We already have two separate items, so subtype conversion should be good. itemRows = yield db.get_range_slice('items', count=10000, names=['meta']) items = dict([(x.key, utils.supercolumnsToDict(x.columns)) for x in itemRows]) connectionItems = [x for x in items.keys()\ if items[x]['meta'].get('type', '') == 'activity'\ and items[x]['meta']['subType'] == 'connection'] yield db.batch_mutate(dict([(x, {'items':{'meta':{'subType':'following'}}}) for x in connectionItems])) log.msg('>>>>>>>> All connection items converted to following.') # Remove all friend requests from pendingConnections pendingRows = yield db.get_range_slice('pendingConnections', count=10000) for pendingRow in pendingRows: userId = pendingRow.key pendingFriendRequestIds = [x.column.name for x in pendingRow.columns \ if not x.column.name.startswith('G')] if pendingFriendRequestIds: yield db.batch_remove({'pendingConnections': [userId]}, names=pendingFriendRequestIds) log.msg('>>>>>>>> Removed pending friend requests.') # Remove all friend requests from latest yield db.batch_remove({'latest': userIds}, names='people') log.msg('>>>>>>>> Removed friend requests from latest.') # Remove all friend-request-accepted notifications notifyMutations = {} for userId in userIds: items = yield db.get_slice(userId, "notificationItems", super_column=':FA') if items: names = [col.column.name for col in items] colmap = dict([(x, None) for x in names]) deletion = Deletion(time.time() * 1000000, 'notifications', SlicePredicate(column_names=names)) notifyMutations[userId] = {'notifications': colmap, 'latest': [deletion]} yield db.remove(userId, 'notificationItems', super_column=':FA') if notifyMutations: yield db.batch_mutate(notifyMutations) log.msg('>>>>>>>> Removed friend notifications from notifications and latest.') # Finally, remove the connections column family. yield db.system_drop_column_family('connections') yield db.system_drop_column_family('connectionsByTag') log.msg('>>>>>>>> Removed the connections column family.')