def update_trustees(session, charity_collection, lower_limit, upper_limit, batch_size=10000): logging.info("Updating trustees") while lower_limit <= upper_limit: q = session\ .query(ExtractMainCharity.regno, ExtractTrustee)\ .join(ExtractTrustee, ExtractTrustee.regno==ExtractMainCharity.regno)\ .filter(ExtractMainCharity.regno >= lower_limit)\ .filter(ExtractMainCharity.regno < lower_limit + batch_size) trustees = {} for i, x in enumerate(q): regno = x.ExtractTrustee.regno if regno not in trustees: trustees[regno] = [] trustees[regno].append( stripped_or_none(x.ExtractTrustee.trustee, 'title')) requests = [ UpdateOne({'ids.GB-CHC': i}, {'$set': { 'trustees.names': trustees[i] }}) for i in trustees.keys() ] logging.info(lower_limit) lower_limit += batch_size if len(requests) == 0: continue try: result = charity_collection.bulk_write(requests) # print(lower_limit, lower_limit + batch_size, result.matched_count) except BulkWriteError as bwe: logging.error(bwe.details)
def update_beneficiaries(session, charity_collection, lower_limit, upper_limit, batch_size=10000): logging.info("Updating beneficiaries.") while lower_limit <= upper_limit: q = session\ .query(ExtractMainCharity.regno, ExtractClass.id, ExtractClassRef)\ .join(ExtractClass, ExtractClass.regno==ExtractMainCharity.regno)\ .join(ExtractClassRef, ExtractClassRef.classno==ExtractClass.classno)\ .filter(ExtractClass.classno >= 200)\ .filter(ExtractClass.classno < 300)\ .filter(ExtractMainCharity.regno >= lower_limit)\ .filter(ExtractMainCharity.regno < lower_limit + batch_size) beneficiaries = {} for i, x in enumerate(q): regno = x.regno if regno not in beneficiaries: beneficiaries[regno] = [] beneficiaries[regno].append({ 'id': int(x.ExtractClassRef.classno), 'name': stripped_or_none(x.ExtractClassRef.classtext, 'sentence'), }) requests = [UpdateOne( {'ids.GB-CHC': i}, {'$set': {'beneficiaries': beneficiaries[i]}} ) for i in beneficiaries.keys()] logging.info(lower_limit) lower_limit += batch_size if len(requests) == 0: continue try: result = charity_collection.bulk_write(requests) # print(lower_limit, lower_limit + batch_size, result.matched_count) except BulkWriteError as bwe: logging.error(bwe.details)
def create_subsidiary(charity): c = {} c['id'] = charity.subno c['name'] = stripped_or_none(charity.name, 'title') c['isRegistered'] = charity.orgtype and charity.orgtype.strip() == 'R' c['governingDoc'] = stripped_or_none(charity.gd) c['areaOfBenefit'] = stripped_or_none(charity.aob) c['contact'] = { 'email': None, 'person': stripped_or_none(charity.corr, 'title'), 'phone': stripped_or_none(charity.phone), 'postcode': stripped_or_none(charity.postcode), 'address': [ stripped_or_none(getattr(charity, add), 'title') for add in ['add1', 'add2', 'add3', 'add4', 'add5'] if getattr(charity, add) != None ], } return c
def create_charity(charity, main_charity): c = {} c['regulator'] = 'GB-CHC' c['ids'] = { 'charityId': 'GB-CHC-%d' % charity.regno, 'GB-CHC': charity.regno, } c['name'] = stripped_or_none(charity.name, 'title') c['isRegistered'] = charity.orgtype and charity.orgtype.strip() == 'R' c['governingDoc'] = stripped_or_none(charity.gd) c['areaOfBenefit'] = stripped_or_none(charity.aob, 'title') c['contact'] = { 'email': stripped_or_none(main_charity.email, 'lower'), 'person': stripped_or_none(charity.corr, 'title'), 'phone': stripped_or_none(charity.phone), 'postcode': stripped_or_none(charity.postcode), 'address': [stripped_or_none(getattr(charity, add), 'title') for add in ['add1', 'add2', 'add3', 'add4', 'add5'] if getattr(charity, add) != None] } c['isWelsh'] = boolean_on_value(main_charity.welsh, 'T') c['trustees'] = { 'incorporated': boolean_on_value(main_charity.trustees, 'T'), 'names': [] } c['website'] = stripped_or_none(main_charity.web, 'lower') c['isSchool'] = boolean_on_value(main_charity.grouptype, 'SCH') c['income'] = { 'latest': { 'date': main_charity.incomedate, 'total': int(main_charity.income) if main_charity.income != None else None, }, 'annual': [] } c['fyend'] = stripped_or_none(main_charity.fyend) c['companiesHouseNumber'] = stripped_or_none(main_charity.coyno) c['areasOfOperation'] = [] c['causes'] = [] c['beneficiaries'] = [] c['operations'] = [] c['subsidiaries'] = [] c['alternativeNames'] = [] c['activities'] = None return c
def update_aoo(session, charity_collection, lower_limit, upper_limit, batch_size=10000): logging.info("Updating areas of operation.") while lower_limit <= upper_limit: q = session\ .query(ExtractMainCharity.regno, ExtractCharityAoo.id, ExtractAooRef)\ .join(ExtractCharityAoo, ExtractCharityAoo.regno==ExtractMainCharity.regno)\ .join(ExtractAooRef, and_(ExtractAooRef.aootype==ExtractCharityAoo.aootype, ExtractAooRef.aookey==ExtractCharityAoo.aookey))\ .filter(ExtractMainCharity.regno >= lower_limit)\ .filter(ExtractMainCharity.regno < lower_limit + batch_size) aoo = {} for i, x in enumerate(q): regno = x.regno if regno not in aoo: aoo[regno] = [] aootype = stripped_or_none(x.ExtractAooRef.aootype) scale = { 'A': 'UK Division', 'B': 'Local Authority', 'C': 'Metropolitan County', 'D': 'Country', 'E': 'Continent', } parent_aoo_id = None if aootype in [ 'B', 'D' ] and x.ExtractAooRef.master and x.ExtractAooRef.master > 0: parent_aoo_id = '%s-%d' % (chr(ord(aootype) + 1), x.ExtractAooRef.master) aoo[regno].append({ 'id': '%s-%d' % (x.ExtractAooRef.aootype, x.ExtractAooRef.aookey), 'parentId': parent_aoo_id, 'name': stripped_or_none(x.ExtractAooRef.aooname, 'title'), 'alternativeName': stripped_or_none(x.ExtractAooRef.aoosort, 'title'), 'locationType': scale[aootype], 'isWelsh': boolean_on_value(x.ExtractAooRef.welsh, 'Y'), }) requests = [ UpdateOne({'ids.GB-CHC': i}, {'$set': { 'areasOfOperation': aoo[i] }}) for i in aoo.keys() ] logging.info(lower_limit) lower_limit += batch_size if len(requests) == 0: continue try: result = charity_collection.bulk_write(requests) # print(lower_limit, lower_limit + batch_size, result.matched_count) except BulkWriteError as bwe: logging.error(bwe.details)