def parse_activity(new_identifiers, old_xml, resource): for activity in parse.document(resource.document, resource): activity.resource = resource if activity.iati_identifier not in new_identifiers: new_identifiers.add(activity.iati_identifier) try: if hash(activity.raw_xml) == old_xml[ activity.iati_identifier][1]: activity.last_change_datetime = old_xml[ activity.iati_identifier][0] else: activity.last_change_datetime = datetime.datetime.now() except KeyError: activity.last_change_datetime = datetime.datetime.now() db.session.add(activity) check_for_duplicates([activity]) else: parse.log.warn(_( "Duplicate identifier {0} in same resource document".format( activity.iati_identifier), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info='') db.session.flush() db.session.commit()
def process(ele): data = { 'description': xval(ele, "description/text()", None), 'provider_org_text': xval(ele, "provider-org/text()", None), 'provider_org_activity_id': xval(ele, "provider-org/@provider-activity-id", None), 'receiver_org_text': xval(ele, "receiver-org/text()", None), 'receiver_org_activity_id': xval(ele, "receiver-org/@receiver-activity-id", None), 'ref': xval(ele, "@ref", None), 'value_amount': iati_decimal(xval(ele, "value/text()")), } field_functions = { 'date': partial(xpath_date, "transaction-date/@iso-date"), 'flow_type': partial(from_codelist, cl.FlowType, "./flow-type/@code"), 'finance_type': partial(from_codelist, cl.FinanceType, "./finance-type/@code"), 'aid_type': partial(from_codelist, cl.AidType, "./aid-type/@code"), 'tied_status': partial(from_codelist, cl.TiedStatus, "./tied-status/@code"), 'disbursement_channel': partial(from_codelist, cl.DisbursementChannel, "./disbursement-channel/@code"), 'provider_org': partial(from_org, "./provider-org"), 'receiver_org': partial(from_org, "./receiver-org"), 'type': partial(from_codelist, cl.TransactionType, "./transaction-type/@code"), 'value_currency': partial(currency, "value/@currency"), 'value_date': partial(xpath_date, "value/@value-date"), } for field, function in field_functions.items(): try: data[field] = function(ele, resource) except (MissingValue, InvalidDateError, ValueError), exe: data[field] = None iati_identifier = xval( xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn(_( "Failed to import a valid {0} in activity {1}, error was: {2}" .format(field, iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe)
def parse_activity(new_identifiers, old_xml, resource): for activity in parse.document(resource.document, resource): activity.resource = resource if activity.iati_identifier not in new_identifiers: new_identifiers.add(activity.iati_identifier) try: if hash(activity.raw_xml) == old_xml[activity.iati_identifier][1]: activity.last_change_datetime = old_xml[activity.iati_identifier][0] else: activity.last_change_datetime = datetime.datetime.now() except KeyError: activity.last_change_datetime = datetime.datetime.now() db.session.add(activity) check_for_duplicates([activity]) else: parse.log.warn( _("Duplicate identifier {0} in same resource document".format( activity.iati_identifier), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info='' ) db.session.flush() db.session.commit()
def reporting_org(element, resource=no_resource, major_version='1'): try: xml = element.xpath("./reporting-org")[0] except IndexError: if major_version == '1': return None raise data = { "ref": xval(xml, "@ref"), "name": xval(xml, TEXT_ELEMENT[major_version], u""), } try: data.update({ "type": codelists.by_major_version[major_version].OrganisationType.from_string(xval(xml, "@type")) }) except (MissingValue, ValueError) as exe: data['type'] = None iati_identifier = xval(xml, "/iati-identifier/text()", 'no_identifier') log.warn( _(u"Failed to import a valid reporting-org.type in activity {0}, error was: {1}".format( iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe ) return Organisation.as_unique(db.session, **data)
def reporting_org(element, resource=no_resource, major_version='1'): try: xml = element.xpath("./reporting-org")[0] except IndexError: if major_version == '1': return None raise data = { "ref": xval(xml, "@ref"), "name": xval(xml, TEXT_ELEMENT[major_version], u""), } try: data.update({ "type": codelists.by_major_version[major_version].OrganisationType. from_string(xval(xml, "@type")) }) except (MissingValue, ValueError) as exe: data['type'] = None iati_identifier = xval(xml, "/iati-identifier/text()", 'no_identifier') log.warn(_( u"Failed to import a valid reporting-org.type in activity {0}, error was: {1}" .format(iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe) return Organisation.as_unique(db.session, **data)
def process(ele): field_functions = { 'type': budget_type, 'value_currency': partial(currency, "value/@currency"), 'value_amount': partial(xpath_decimal, "value/text()"), 'period_start': partial(xpath_date, "period-start/@iso-date"), 'period_end': partial(xpath_date, "period-end/@iso-date"), } data = {} for field, function in field_functions.items(): try: data[field] = function(ele, resource) except (MissingValue, InvalidDateError, ValueError, InvalidOperation) as exe: data[field] = None iati_identifier = xval( xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn(_( "uFailed to import a valid budget:{0} in activity {1}, error was: {2}" .format(field, iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe) return Budget(**data)
def activities(xmlfile, resource=no_resource): major_version = '1' version = None try: for event, elem in ET.iterparse(xmlfile, events=('start', 'end')): if event == 'start' and elem.tag == 'iati-activities': version = elem.attrib.get('version') if version and version.startswith('2.'): major_version = '2' elif event == 'end' and elem.tag == 'iati-activity': try: yield activity(elem, resource=resource, major_version=major_version, version=version) except MissingValue, exe: log.error( _("Failed to import a valid Activity error was: {0}". format(exe), logger='failed_activity', dataset=resource.dataset_id, resource=resource.url), exc_info=exe) elem.clear() except ET.XMLSyntaxError, exe: raise XMLError()
def sector_percentages(xml, resource=no_resource): ret = [] for ele in xml.xpath("./sector"): sp = SectorPercentage() field_functions = { 'sector': partial(from_codelist, cl.Sector, "@code"), 'vocabulary': partial(from_codelist, cl.Vocabulary, "@vocabulary"), } for field, function in field_functions.items(): try: setattr(sp, field, function(ele, resource)) except (MissingValue, ValueError), exe: iati_identifier = xval( xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn(_( "uFailed to import a valid {0} in activity {1}, error was: {2}" .format(field, iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe) if ele.xpath("@percentage"): try: sp.percentage = int(xval(ele, "@percentage")) except ValueError: sp.percentage = None if ele.xpath("text()"): sp.text = xval(ele, "text()") if any( getattr(sp, attr) for attr in "sector vocabulary percentage".split()): ret.append(sp)
def sector_percentages(xml, resource=no_resource): ret = [] for ele in xml.xpath("./sector"): sp = SectorPercentage() field_functions = { 'sector' : partial(from_codelist, cl.Sector, "@code"), 'vocabulary' : partial(from_codelist, cl.Vocabulary, "@vocabulary"), } for field, function in field_functions.items(): try: setattr(sp, field, function(ele, resource)) except (MissingValue, ValueError), exe: iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn( _("uFailed to import a valid {0} in activity {1}, error was: {2}".format( field, iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe ) if ele.xpath("@percentage"): try: sp.percentage = int(xval(ele, "@percentage")) except ValueError: sp.percentage = None if ele.xpath("text()"): sp.text = xval(ele, "text()") if any(getattr(sp, attr) for attr in "sector vocabulary percentage".split()): ret.append(sp)
def activities(xmlfile, resource=no_resource): try: for event, elem in ET.iterparse(xmlfile): if elem.tag == 'iati-activity': try: yield activity(elem, resource=resource) except MissingValue, exe: log.error(_("Failed to import a valid Activity error was: {0}".format(exe), logger='failed_activity', dataset=resource.dataset_id, resource=resource.url), exc_info=exe) elem.clear() except ET.XMLSyntaxError, exe: raise XMLError()
def activity(xml_resource, resource=no_resource): xml = ET.parse(_open_resource(xml_resource)) data = { "iati_identifier": xval(xml.getroot(), "./iati-identifier/text()"), "title": xval(xml, "./title/text()", u""), "description": xval(xml, "./description/text()", u""), "raw_xml": ET.tostring(xml, encoding=unicode) } field_functions = { "default_currency": partial(currency, "@default-currency"), "hierarchy": hierarchy, "last_updated_datetime": last_updated_datetime, "default_language": default_language, "reporting_org": reporting_org, "websites": websites, "participating_orgs": participating_orgs, "recipient_country_percentages": recipient_country_percentages, "recipient_region_percentages": recipient_region_percentages, "transactions": transactions, "start_planned": start_planned, "end_planned": end_planned, "start_actual": start_actual, "end_actual": end_actual, "sector_percentages": sector_percentages, "budgets": budgets, "policy_markers": policy_markers, "related_activities": related_activities, 'activity_status': activity_status, 'collaboration_type': collaboration_type, 'default_finance_type': default_finance_type, 'default_flow_type': default_flow_type, 'default_aid_type': default_aid_type, 'default_tied_status': default_tied_status, } for field, function in field_functions.items(): try: data[field] = function(xml, resource) except (MissingValue, InvalidDateError, ValueError, InvalidOperation), exe: data[field] = None log.warn(_( u"Failed to import a valid {0} in activity {1}, error was: {2}" .format(field, data['iati_identifier'], exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe)
def activity(xml_resource, resource=no_resource): xml = ET.parse(_open_resource(xml_resource)) data = { "iati_identifier": xval(xml.getroot(), "./iati-identifier/text()"), "title": xval(xml, "./title/text()", u""), "description": xval(xml, "./description/text()", u""), "raw_xml": ET.tostring(xml, encoding=unicode) } field_functions = { "default_currency" : partial(currency, "@default-currency"), "hierarchy": hierarchy, "last_updated_datetime" : last_updated_datetime, "default_language" : default_language, "reporting_org": reporting_org, "websites": websites, "participating_orgs": participating_orgs, "recipient_country_percentages": recipient_country_percentages, "recipient_region_percentages": recipient_region_percentages, "transactions": transactions, "start_planned": start_planned, "end_planned": end_planned, "start_actual": start_actual, "end_actual": end_actual, "sector_percentages": sector_percentages, "budgets": budgets, "policy_markers": policy_markers, "related_activities": related_activities, 'activity_status' : activity_status, 'collaboration_type' : collaboration_type, 'default_finance_type' : default_finance_type, 'default_flow_type' : default_flow_type, 'default_aid_type' : default_aid_type, 'default_tied_status' : default_tied_status, } for field, function in field_functions.items(): try: data[field] = function(xml, resource) except (MissingValue, InvalidDateError, ValueError, InvalidOperation), exe: data[field] = None log.warn( _(u"Failed to import a valid {0} in activity {1}, error was: {2}".format( field, data['iati_identifier'], exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe )
def related_activities(xml, resource=no_resource): element = xml.xpath("./related-activity") results = [] for ele in element: text=xval(ele, "text()", None) try: ref = xval(ele, "@ref") results.append(RelatedActivity(ref=ref, text=text)) except MissingValue as e: iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn( _(u"Failed to import a valid related-activity in activity {0}, error was: {1}".format( iati_identifier, e), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=e ) return results
def activities(xmlfile, resource=no_resource): try: for event, elem in ET.iterparse(xmlfile): if elem.tag == 'iati-activity': try: yield activity(elem, resource=resource) except MissingValue, exe: log.error( _("Failed to import a valid Activity error was: {0}". format(exe), logger='failed_activity', dataset=resource.dataset_id, resource=resource.url), exc_info=exe) elem.clear() except ET.XMLSyntaxError, exe: raise XMLError()
def related_activities(xml, resource=no_resource, major_version='1'): element = xml.xpath("./related-activity") results = [] for ele in element: text=xval(ele, TEXT_ELEMENT[major_version], None) try: ref = xval(ele, "@ref") results.append(RelatedActivity(ref=ref, text=text)) except MissingValue as e: iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn( _(u"Failed to import a valid related-activity in activity {0}, error was: {1}".format( iati_identifier, e), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=e ) return results
def from_codelist(codelist, path, xml, resource=no_resource): code = xval(xml, path, None) if code: try: return codelist.from_string(code) except (MissingValue, ValueError) as e: iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn(_( (u"Failed to import a valid {0} in activity" "{1}, error was: {2}".format(codelist, iati_identifier, e)), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=e) return None
def activities(xmlfile, resource=no_resource): major_version = '1' version = None try: for event, elem in ET.iterparse(xmlfile, events=('start','end')): if event=='start' and elem.tag == 'iati-activities': version = elem.attrib.get('version') if version and version.startswith('2.'): major_version = '2' elif event=='end' and elem.tag == 'iati-activity': try: yield activity(elem, resource=resource, major_version=major_version, version=version) except MissingValue, exe: log.error(_("Failed to import a valid Activity error was: {0}".format(exe), logger='failed_activity', dataset=resource.dataset_id, resource=resource.url), exc_info=exe) elem.clear() except ET.XMLSyntaxError, exe: raise XMLError()
def participating_orgs(xml, resource=None): ret = [] seen = set() for ele in xml.xpath("./participating-org"): try: role = cl.OrganisationRole.from_string(xval(ele, "@role").title()) organisation = parse_org(ele) if not (role, organisation.ref) in seen: seen.add((role, organisation.ref)) ret.append(Participation(role=role, organisation=organisation)) except ValueError as e: iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn( _(u"Failed to import a valid sector percentage:{0} in activity {1}, error was: {2}".format( 'organisation_role', iati_identifier, e), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=e ) return ret
def participating_orgs(xml, resource=None, major_version='1'): ret = [] seen = set() for ele in xml.xpath("./participating-org"): try: role = codelists.by_major_version[major_version].OrganisationRole.from_string(xval(ele, "@role").title()) organisation = parse_org(ele, major_version=major_version) if not (role, organisation.ref) in seen: seen.add((role, organisation.ref)) ret.append(Participation(role=role, organisation=organisation)) except ValueError as e: iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn( _(u"Failed to import a valid sector percentage:{0} in activity {1}, error was: {2}".format( 'organisation_role', iati_identifier, e), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=e ) return ret
def from_codelist(codelist, path, xml, resource=no_resource): code = xval(xml, path, None) if code: try: return codelist.from_string(code) except (MissingValue, ValueError) as e: iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn( _((u"Failed to import a valid {0} in activity" "{1}, error was: {2}".format(codelist, iati_identifier, e)), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url ), exc_info=e ) return None
def process(ele): data = { 'description' : xval(ele, "description/" + TEXT_ELEMENT[major_version], None), 'provider_org_text' : xval(ele, "provider-org/" + TEXT_ELEMENT[major_version], None), 'provider_org_activity_id' : xval( ele, "provider-org/@provider-activity-id", None), 'receiver_org_text' : xval(ele, "receiver-org/" + TEXT_ELEMENT[major_version], None), 'receiver_org_activity_id' : xval(ele, "receiver-org/@receiver-activity-id", None), 'ref' : xval(ele, "@ref", None), } field_functions = { 'date' : partial(xpath_date, "transaction-date/@iso-date"), 'flow_type' : partial(from_codelist_with_major_version, 'FlowType', "./flow-type/@code"), 'finance_type' : partial(from_codelist_with_major_version, 'FinanceType', "./finance-type/@code"), 'aid_type' : partial(from_codelist_with_major_version, 'AidType', "./aid-type/@code"), 'tied_status' : partial(from_codelist_with_major_version, 'TiedStatus', "./tied-status/@code"), 'disbursement_channel' : partial(from_codelist_with_major_version, 'DisbursementChannel', "./disbursement-channel/@code"), 'provider_org' : partial(from_org, "./provider-org"), 'receiver_org' : partial(from_org, "./receiver-org"), 'type' : partial(from_codelist_with_major_version, 'TransactionType', "./transaction-type/@code"), 'value_currency' : partial(currency, "value/@currency"), 'value_date' : partial(xpath_date, "value/@value-date"), 'value_amount' : partial(xpath_decimal, "value/text()"), "recipient_country_percentages": recipient_country_percentages, "recipient_region_percentages": recipient_region_percentages, "sector_percentages": sector_percentages, } for field, function in field_functions.items(): try: data[field] = function(ele, resource, major_version) except (MissingValue, InvalidDateError, ValueError, InvalidOperation), exe: data[field] = None iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn( _(u"Failed to import a valid {0} in activity {1}, error was: {2}".format( field, iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe )
def reporting_org(element, resource=no_resource): xml = element.xpath("./reporting-org")[0] data = { "ref": xval(xml, "@ref"), "name": xval(xml, 'text()', u""), } try: data.update({ "type": cl.OrganisationType.from_string(xval(xml, "@type")) }) except (MissingValue, ValueError) as exe: data['type'] = None iati_identifier = xval(xml, "/iati-identifier/text()", 'no_identifier') log.warn( _(u"Failed to import a valid reporting-org.type in activity {0}, error was: {1}".format( iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe ) return Organisation.as_unique(db.session, **data)
def reporting_org(element, resource=no_resource): xml = element.xpath("./reporting-org")[0] data = { "ref": xval(xml, "@ref"), "name": xval(xml, 'text()', u""), } try: data.update( {"type": cl.OrganisationType.from_string(xval(xml, "@type"))}) except (MissingValue, ValueError) as exe: data['type'] = None iati_identifier = xval(xml, "/iati-identifier/text()", 'no_identifier') log.warn(_( u"Failed to import a valid reporting-org.type in activity {0}, error was: {1}" .format(iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe) return Organisation.as_unique(db.session, **data)
def process(ele): field_functions = { 'type' : budget_type, 'value_currency' : partial(currency, "value/@currency"), 'value_amount' : partial(xpath_decimal, "value/text()"), 'period_start' : partial(xpath_date, "period-start/@iso-date"), 'period_end' : partial(xpath_date, "period-end/@iso-date"), } data = {} for field, function in field_functions.items(): try: data[field] = function(ele, resource) except (MissingValue, InvalidDateError, ValueError, InvalidOperation) as exe: data[field] = None iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn( _("uFailed to import a valid budget:{0} in activity {1}, error was: {2}".format( field, iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe ) return Budget(**data)
def activity(xml_resource, resource=no_resource, major_version='1', version=None): xml = ET.parse(_open_resource(xml_resource)) if major_version == '2': start_planned = partial(xval_date, "./activity-date[@type='1']") start_actual = partial(xval_date, "./activity-date[@type='2']") end_planned = partial(xval_date, "./activity-date[@type='3']") end_actual = partial(xval_date, "./activity-date[@type='4']") else: start_planned = partial(xval_date, "./activity-date[@type='start-planned']") end_planned = partial(xval_date, "./activity-date[@type='end-planned']") start_actual = partial(xval_date, "./activity-date[@type='start-actual']") end_actual = partial(xval_date, "./activity-date[@type='end-actual']") data = { "iati_identifier": xval(xml.getroot(), "./iati-identifier/text()"), "title": xval(xml, "./title/"+TEXT_ELEMENT[major_version], u""), "description": xval(xml, "./description/"+TEXT_ELEMENT[major_version], u""), "raw_xml": ET.tostring(xml, encoding=unicode) } cl = codelists.by_major_version[major_version] activity_status = partial(from_codelist_with_major_version, 'ActivityStatus', "./activity-status/@code") collaboration_type = partial(from_codelist_with_major_version, 'CollaborationType', "./collaboration-type/@code") default_finance_type = partial(from_codelist_with_major_version, 'FinanceType', "./default-finance-type/@code") default_flow_type = partial(from_codelist_with_major_version, 'FlowType', "./default-flow-type/@code") default_aid_type = partial(from_codelist_with_major_version, 'AidType', "./default-aid-type/@code") default_tied_status = partial(from_codelist_with_major_version, 'TiedStatus', "./default-tied-status/@code") field_functions = { "default_currency" : partial(currency, "@default-currency"), "hierarchy": hierarchy, "last_updated_datetime" : last_updated_datetime, "default_language" : default_language, "reporting_org": reporting_org, "websites": websites, "participating_orgs": participating_orgs, "recipient_country_percentages": recipient_country_percentages, "recipient_region_percentages": recipient_region_percentages, "transactions": transactions, "start_planned": start_planned, "end_planned": end_planned, "start_actual": start_actual, "end_actual": end_actual, "sector_percentages": sector_percentages, "budgets": budgets, "policy_markers": policy_markers, "related_activities": related_activities, 'activity_status' : activity_status, 'collaboration_type' : collaboration_type, 'default_finance_type' : default_finance_type, 'default_flow_type' : default_flow_type, 'default_aid_type' : default_aid_type, 'default_tied_status' : default_tied_status, 'major_version': lambda *args, **kwargs: major_version, 'version': lambda *args, **kwargs: version, } for field, function in field_functions.items(): try: data[field] = function(xml, resource, major_version) except (MissingValue, InvalidDateError, ValueError, InvalidOperation), exe: data[field] = None log.warn( _(u"Failed to import a valid {0} in activity {1}, error was: {2}".format( field, data['iati_identifier'], exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe )
def activity(xml_resource, resource=no_resource, major_version='1', version=None): xml = ET.parse(_open_resource(xml_resource)) if major_version == '2': start_planned = partial(xval_date, "./activity-date[@type='1']") start_actual = partial(xval_date, "./activity-date[@type='2']") end_planned = partial(xval_date, "./activity-date[@type='3']") end_actual = partial(xval_date, "./activity-date[@type='4']") else: start_planned = partial(xval_date, "./activity-date[@type='start-planned']") end_planned = partial(xval_date, "./activity-date[@type='end-planned']") start_actual = partial(xval_date, "./activity-date[@type='start-actual']") end_actual = partial(xval_date, "./activity-date[@type='end-actual']") data = { "iati_identifier": xval(xml.getroot(), "./iati-identifier/text()"), "title": xval(xml, "./title/" + TEXT_ELEMENT[major_version], u""), "description": xval(xml, "./description/" + TEXT_ELEMENT[major_version], u""), "raw_xml": ET.tostring(xml, encoding=unicode) } cl = codelists.by_major_version[major_version] activity_status = partial(from_codelist_with_major_version, 'ActivityStatus', "./activity-status/@code") collaboration_type = partial(from_codelist_with_major_version, 'CollaborationType', "./collaboration-type/@code") default_finance_type = partial(from_codelist_with_major_version, 'FinanceType', "./default-finance-type/@code") default_flow_type = partial(from_codelist_with_major_version, 'FlowType', "./default-flow-type/@code") default_aid_type = partial(from_codelist_with_major_version, 'AidType', "./default-aid-type/@code") default_tied_status = partial(from_codelist_with_major_version, 'TiedStatus', "./default-tied-status/@code") field_functions = { "default_currency": partial(currency, "@default-currency"), "hierarchy": hierarchy, "last_updated_datetime": last_updated_datetime, "default_language": default_language, "reporting_org": reporting_org, "websites": websites, "participating_orgs": participating_orgs, "recipient_country_percentages": recipient_country_percentages, "recipient_region_percentages": recipient_region_percentages, "transactions": transactions, "start_planned": start_planned, "end_planned": end_planned, "start_actual": start_actual, "end_actual": end_actual, "sector_percentages": sector_percentages, "budgets": budgets, "policy_markers": policy_markers, "related_activities": related_activities, 'activity_status': activity_status, 'collaboration_type': collaboration_type, 'default_finance_type': default_finance_type, 'default_flow_type': default_flow_type, 'default_aid_type': default_aid_type, 'default_tied_status': default_tied_status, 'major_version': lambda *args, **kwargs: major_version, 'version': lambda *args, **kwargs: version, } for field, function in field_functions.items(): try: data[field] = function(xml, resource, major_version) except (MissingValue, InvalidDateError, ValueError, InvalidOperation), exe: data[field] = None log.warn(_( u"Failed to import a valid {0} in activity {1}, error was: {2}" .format(field, data['iati_identifier'], exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe)
def transactions(xml, resource=no_resource, major_version='1'): def from_cl(code, codelist): return codelist.from_string(code) if code is not None else None def from_org(path, ele, resource=None, major_version='1'): organisation = ele.xpath(path) if organisation: return parse_org(organisation[0], major_version=major_version) # return Organisation.as_unique(db.session, ref=org) if org else Nonejk def process(ele): data = { 'description': xval(ele, "description/" + TEXT_ELEMENT[major_version], None), 'provider_org_text': xval(ele, "provider-org/" + TEXT_ELEMENT[major_version], None), 'provider_org_activity_id': xval(ele, "provider-org/@provider-activity-id", None), 'receiver_org_text': xval(ele, "receiver-org/" + TEXT_ELEMENT[major_version], None), 'receiver_org_activity_id': xval(ele, "receiver-org/@receiver-activity-id", None), 'ref': xval(ele, "@ref", None), } field_functions = { 'date': partial(xpath_date, "transaction-date/@iso-date"), 'flow_type': partial(from_codelist_with_major_version, 'FlowType', "./flow-type/@code"), 'finance_type': partial(from_codelist_with_major_version, 'FinanceType', "./finance-type/@code"), 'aid_type': partial(from_codelist_with_major_version, 'AidType', "./aid-type/@code"), 'tied_status': partial(from_codelist_with_major_version, 'TiedStatus', "./tied-status/@code"), 'disbursement_channel': partial(from_codelist_with_major_version, 'DisbursementChannel', "./disbursement-channel/@code"), 'provider_org': partial(from_org, "./provider-org"), 'receiver_org': partial(from_org, "./receiver-org"), 'type': partial(from_codelist_with_major_version, 'TransactionType', "./transaction-type/@code"), 'value_currency': partial(currency, "value/@currency"), 'value_date': partial(xpath_date, "value/@value-date"), 'value_amount': partial(xpath_decimal, "value/text()"), "recipient_country_percentages": recipient_country_percentages, "recipient_region_percentages": recipient_region_percentages, "sector_percentages": sector_percentages, } for field, function in field_functions.items(): try: data[field] = function(ele, resource, major_version) except (MissingValue, InvalidDateError, ValueError, InvalidOperation) as exe: data[field] = None iati_identifier = xval( xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn(_( u"Failed to import a valid {0} in activity {1}, error was: {2}" .format(field, iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe) return Transaction(**data) ret = [] for ele in xml.xpath("./transaction"): try: ret.append(process(ele)) except MissingValue as exe: iati_identifier = xval(xml, "/iati-identifier/text()", 'no_identifier') log.warn(_( u"Failed to import a valid transaction in activity {0}, error was: {1}" .format(iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe) return ret
dataset=resource.dataset_id, resource=resource.url), exc_info=exe) return Transaction(**data) ret = [] for ele in xml.xpath("./transaction"): try: ret.append(process(ele)) except MissingValue as exe: iati_identifier = xval(xml, "/iati-identifier/text()", 'no_identifier') log.warn(_( u"Failed to import a valid transaction in activity {0}, error was: {1}" .format(iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe) return ret def sector_percentages(xml, resource=no_resource, major_version='1'): cl = codelists.by_major_version[major_version] ret = [] for ele in xml.xpath("./sector"): sp = SectorPercentage() field_functions = { 'sector': partial(from_codelist, cl.Sector, "@code"), 'vocabulary': partial(from_codelist, cl.Vocabulary, "@vocabulary"), }
def parse_resource(resource): db.session.add(resource) now = datetime.datetime.utcnow() current = Activity.query.filter_by(resource_url=resource.url) current_identifiers = set([ i.iati_identifier for i in current.all() ]) old_xml = dict([ (i[0], (i[1], hash(i[2]))) for i in db.session.query( Activity.iati_identifier, Activity.last_change_datetime, Activity.raw_xml).filter_by(resource_url=resource.url) ]) db.session.query(Activity).filter_by(resource_url=resource.url).delete() new_identifiers = set() activities = [] for activity in parse.document(resource.document, resource): activity.resource = resource if activity.iati_identifier not in new_identifiers: new_identifiers.add(activity.iati_identifier) try: if hash(activity.raw_xml) == old_xml[activity.iati_identifier][1]: activity.last_change_datetime = old_xml[activity.iati_identifier][0] else: activity.last_change_datetime = datetime.datetime.now() except KeyError: activity.last_change_datetime = datetime.datetime.now() activities.append(activity) db.session.add(activity) if len(db.session.new) > 50: activities = check_for_duplicates(activities) db.session.commit() activities = [] else: parse.log.warn( _("Duplicate identifier {0} in same resource document".format( activity.iati_identifier), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info='' ) db.session.add_all(activities) activities = check_for_duplicates(activities) db.session.commit() resource.version = parse.document_metadata(resource.document) #add any identifiers that are no longer present to deleted_activity table diff = current_identifiers - new_identifiers now = datetime.datetime.utcnow() deleted = [ DeletedActivity(iati_identifier=deleted_activity, deletion_date=now) for deleted_activity in diff ] if deleted: db.session.add_all(deleted) #remove any new identifiers from the deleted_activity table if new_identifiers: db.session.query(DeletedActivity)\ .filter(DeletedActivity.iati_identifier.in_(new_identifiers))\ .delete(synchronize_session="fetch") log.info( "Parsed %d activities from %s", len(resource.activities), resource.url) resource.last_parsed = now return resource#, new_identifiers
def process(ele): data = { 'description': xval(ele, "description/" + TEXT_ELEMENT[major_version], None), 'provider_org_text': xval(ele, "provider-org/" + TEXT_ELEMENT[major_version], None), 'provider_org_activity_id': xval(ele, "provider-org/@provider-activity-id", None), 'receiver_org_text': xval(ele, "receiver-org/" + TEXT_ELEMENT[major_version], None), 'receiver_org_activity_id': xval(ele, "receiver-org/@receiver-activity-id", None), 'ref': xval(ele, "@ref", None), } field_functions = { 'date': partial(xpath_date, "transaction-date/@iso-date"), 'flow_type': partial(from_codelist_with_major_version, 'FlowType', "./flow-type/@code"), 'finance_type': partial(from_codelist_with_major_version, 'FinanceType', "./finance-type/@code"), 'aid_type': partial(from_codelist_with_major_version, 'AidType', "./aid-type/@code"), 'tied_status': partial(from_codelist_with_major_version, 'TiedStatus', "./tied-status/@code"), 'disbursement_channel': partial(from_codelist_with_major_version, 'DisbursementChannel', "./disbursement-channel/@code"), 'provider_org': partial(from_org, "./provider-org"), 'receiver_org': partial(from_org, "./receiver-org"), 'type': partial(from_codelist_with_major_version, 'TransactionType', "./transaction-type/@code"), 'value_currency': partial(currency, "value/@currency"), 'value_date': partial(xpath_date, "value/@value-date"), 'value_amount': partial(xpath_decimal, "value/text()"), "recipient_country_percentages": recipient_country_percentages, "recipient_region_percentages": recipient_region_percentages, "sector_percentages": sector_percentages, } for field, function in field_functions.items(): try: data[field] = function(ele, resource, major_version) except (MissingValue, InvalidDateError, ValueError, InvalidOperation) as exe: data[field] = None iati_identifier = xval( xml, "/iati-activity/iati-identifier/text()", 'no_identifier') log.warn(_( u"Failed to import a valid {0} in activity {1}, error was: {2}" .format(field, iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe) return Transaction(**data)
def activity(xml, resource=no_resource, major_version='1', version=None): """ Expects xml argument of type lxml.etree._Element """ if major_version == '2': start_planned = partial(xval_date, "./activity-date[@type='1']") start_actual = partial(xval_date, "./activity-date[@type='2']") end_planned = partial(xval_date, "./activity-date[@type='3']") end_actual = partial(xval_date, "./activity-date[@type='4']") else: start_planned = partial(xval_date, "./activity-date[@type='start-planned']") end_planned = partial(xval_date, "./activity-date[@type='end-planned']") start_actual = partial(xval_date, "./activity-date[@type='start-actual']") end_actual = partial(xval_date, "./activity-date[@type='end-actual']") data = { "iati_identifier": xval(xml, "./iati-identifier/text()"), "title": xval(xml, "./title/" + TEXT_ELEMENT[major_version], u""), "description": xval(xml, "./description/" + TEXT_ELEMENT[major_version], u""), "raw_xml": ET.tostring(xml, encoding='utf-8').decode() } activity_status = partial(from_codelist_with_major_version, 'ActivityStatus', "./activity-status/@code") collaboration_type = partial(from_codelist_with_major_version, 'CollaborationType', "./collaboration-type/@code") default_finance_type = partial(from_codelist_with_major_version, 'FinanceType', "./default-finance-type/@code") default_flow_type = partial(from_codelist_with_major_version, 'FlowType', "./default-flow-type/@code") default_aid_type = partial(from_codelist_with_major_version, 'AidType', "./default-aid-type/@code") default_tied_status = partial(from_codelist_with_major_version, 'TiedStatus', "./default-tied-status/@code") field_functions = { "default_currency": partial(currency, "@default-currency"), "hierarchy": hierarchy, "last_updated_datetime": last_updated_datetime, "default_language": default_language, "reporting_org": reporting_org, "websites": websites, "participating_orgs": participating_orgs, "recipient_country_percentages": recipient_country_percentages, "recipient_region_percentages": recipient_region_percentages, "transactions": transactions, "start_planned": start_planned, "end_planned": end_planned, "start_actual": start_actual, "end_actual": end_actual, "sector_percentages": sector_percentages, "budgets": budgets, "policy_markers": policy_markers, "related_activities": related_activities, 'activity_status': activity_status, 'collaboration_type': collaboration_type, 'default_finance_type': default_finance_type, 'default_flow_type': default_flow_type, 'default_aid_type': default_aid_type, 'default_tied_status': default_tied_status, 'major_version': lambda *args, **kwargs: major_version, 'version': lambda *args, **kwargs: version, } for field, function in field_functions.items(): try: data[field] = function(xml, resource, major_version) except (MissingValue, InvalidDateError, ValueError, InvalidOperation) as exe: if field in [ 'websites', 'participating_orgs', 'recipient_country_percentages', 'recipient_region_percentages', 'sector_percentages', 'transactions', 'budgets', 'policy_markers', 'related_activities' ]: data[field] = [] else: data[field] = None log.warn(_( u"Failed to import a valid {0} in activity {1}, error was: {2}" .format(field, data['iati_identifier'], exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe) dict_for_raw_json = xmltodict.parse(data['raw_xml'], attr_prefix='', cdata_key='text', strip_whitespace=False) dict_for_raw_json['iati-extra:version'] = data.get('version') data["raw_json"] = dict_for_raw_json return Activity(**data)
field, iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe ) return Transaction(**data) ret = [] for ele in xml.xpath("./transaction"): try: ret.append(process(ele)) except MissingValue as exe: iati_identifier = xval(xml, "/iati-identifier/text()", 'no_identifier') log.warn( _(u"Failed to import a valid transaction in activity {0}, error was: {1}".format( iati_identifier, exe), logger='activity_importer', dataset=resource.dataset_id, resource=resource.url), exc_info=exe ) return ret def sector_percentages(xml, resource=no_resource): ret = [] for ele in xml.xpath("./sector"): sp = SectorPercentage() field_functions = { 'sector' : partial(from_codelist, cl.Sector, "@code"), 'vocabulary' : partial(from_codelist, cl.Vocabulary, "@vocabulary"), }