Пример #1
0
def parse_activity(new_identifiers, old_xml, resource):
    for activity in parse.document(resource.document, resource):
        activity.resource = resource

        if activity.iati_identifier not in new_identifiers:
            new_identifiers.add(activity.iati_identifier)
            try:
                if hash(activity.raw_xml) == old_xml[
                        activity.iati_identifier][1]:
                    activity.last_change_datetime = old_xml[
                        activity.iati_identifier][0]
                else:
                    activity.last_change_datetime = datetime.datetime.now()
            except KeyError:
                activity.last_change_datetime = datetime.datetime.now()
            db.session.add(activity)
            check_for_duplicates([activity])
        else:
            parse.log.warn(_(
                "Duplicate identifier {0} in same resource document".format(
                    activity.iati_identifier),
                logger='activity_importer',
                dataset=resource.dataset_id,
                resource=resource.url),
                           exc_info='')

        db.session.flush()
    db.session.commit()
Пример #2
0
    def process(ele):
        data = {
            'description':
            xval(ele, "description/text()", None),
            'provider_org_text':
            xval(ele, "provider-org/text()", None),
            'provider_org_activity_id':
            xval(ele, "provider-org/@provider-activity-id", None),
            'receiver_org_text':
            xval(ele, "receiver-org/text()", None),
            'receiver_org_activity_id':
            xval(ele, "receiver-org/@receiver-activity-id", None),
            'ref':
            xval(ele, "@ref", None),
            'value_amount':
            iati_decimal(xval(ele, "value/text()")),
        }

        field_functions = {
            'date':
            partial(xpath_date, "transaction-date/@iso-date"),
            'flow_type':
            partial(from_codelist, cl.FlowType, "./flow-type/@code"),
            'finance_type':
            partial(from_codelist, cl.FinanceType, "./finance-type/@code"),
            'aid_type':
            partial(from_codelist, cl.AidType, "./aid-type/@code"),
            'tied_status':
            partial(from_codelist, cl.TiedStatus, "./tied-status/@code"),
            'disbursement_channel':
            partial(from_codelist, cl.DisbursementChannel,
                    "./disbursement-channel/@code"),
            'provider_org':
            partial(from_org, "./provider-org"),
            'receiver_org':
            partial(from_org, "./receiver-org"),
            'type':
            partial(from_codelist, cl.TransactionType,
                    "./transaction-type/@code"),
            'value_currency':
            partial(currency, "value/@currency"),
            'value_date':
            partial(xpath_date, "value/@value-date"),
        }

        for field, function in field_functions.items():
            try:
                data[field] = function(ele, resource)
            except (MissingValue, InvalidDateError, ValueError), exe:
                data[field] = None
                iati_identifier = xval(
                    xml, "/iati-activity/iati-identifier/text()",
                    'no_identifier')
                log.warn(_(
                    "Failed to import a valid {0} in activity {1}, error was: {2}"
                    .format(field, iati_identifier, exe),
                    logger='activity_importer',
                    dataset=resource.dataset_id,
                    resource=resource.url),
                         exc_info=exe)
Пример #3
0
def parse_activity(new_identifiers, old_xml, resource):
    for activity in parse.document(resource.document, resource):
        activity.resource = resource

        if activity.iati_identifier not in new_identifiers:
            new_identifiers.add(activity.iati_identifier)
            try:
                if hash(activity.raw_xml) == old_xml[activity.iati_identifier][1]:
                    activity.last_change_datetime = old_xml[activity.iati_identifier][0]
                else:
                    activity.last_change_datetime = datetime.datetime.now()
            except KeyError:
                activity.last_change_datetime = datetime.datetime.now()
            db.session.add(activity)
            check_for_duplicates([activity])
        else:
            parse.log.warn(
                    _("Duplicate identifier {0} in same resource document".format(
                            activity.iati_identifier),
                            logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                    exc_info=''
            )

        db.session.flush()
    db.session.commit()
Пример #4
0
def reporting_org(element, resource=no_resource, major_version='1'):
    try:
        xml = element.xpath("./reporting-org")[0]
    except IndexError:
        if major_version == '1':
            return None
        raise
    data = {
        "ref": xval(xml, "@ref"),
        "name": xval(xml, TEXT_ELEMENT[major_version], u""),
    }
    try:
        data.update({
            "type": codelists.by_major_version[major_version].OrganisationType.from_string(xval(xml, "@type"))
        })
    except (MissingValue, ValueError) as exe:
        data['type'] = None
        iati_identifier = xval(xml, "/iati-identifier/text()", 'no_identifier')
        log.warn(
            _(u"Failed to import a valid reporting-org.type in activity {0}, error was: {1}".format(
                iati_identifier, exe),
            logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
            exc_info=exe
        )

    return Organisation.as_unique(db.session, **data)
Пример #5
0
def reporting_org(element, resource=no_resource, major_version='1'):
    try:
        xml = element.xpath("./reporting-org")[0]
    except IndexError:
        if major_version == '1':
            return None
        raise
    data = {
        "ref": xval(xml, "@ref"),
        "name": xval(xml, TEXT_ELEMENT[major_version], u""),
    }
    try:
        data.update({
            "type":
            codelists.by_major_version[major_version].OrganisationType.
            from_string(xval(xml, "@type"))
        })
    except (MissingValue, ValueError) as exe:
        data['type'] = None
        iati_identifier = xval(xml, "/iati-identifier/text()", 'no_identifier')
        log.warn(_(
            u"Failed to import a valid reporting-org.type in activity {0}, error was: {1}"
            .format(iati_identifier, exe),
            logger='activity_importer',
            dataset=resource.dataset_id,
            resource=resource.url),
                 exc_info=exe)

    return Organisation.as_unique(db.session, **data)
Пример #6
0
    def process(ele):
        field_functions = {
            'type': budget_type,
            'value_currency': partial(currency, "value/@currency"),
            'value_amount': partial(xpath_decimal, "value/text()"),
            'period_start': partial(xpath_date, "period-start/@iso-date"),
            'period_end': partial(xpath_date, "period-end/@iso-date"),
        }
        data = {}
        for field, function in field_functions.items():
            try:
                data[field] = function(ele, resource)
            except (MissingValue, InvalidDateError, ValueError,
                    InvalidOperation) as exe:
                data[field] = None
                iati_identifier = xval(
                    xml, "/iati-activity/iati-identifier/text()",
                    'no_identifier')
                log.warn(_(
                    "uFailed to import a valid budget:{0} in activity {1}, error was: {2}"
                    .format(field, iati_identifier, exe),
                    logger='activity_importer',
                    dataset=resource.dataset_id,
                    resource=resource.url),
                         exc_info=exe)

        return Budget(**data)
Пример #7
0
def activities(xmlfile, resource=no_resource):
    major_version = '1'
    version = None
    try:
        for event, elem in ET.iterparse(xmlfile, events=('start', 'end')):
            if event == 'start' and elem.tag == 'iati-activities':
                version = elem.attrib.get('version')
                if version and version.startswith('2.'):
                    major_version = '2'
            elif event == 'end' and elem.tag == 'iati-activity':
                try:
                    yield activity(elem,
                                   resource=resource,
                                   major_version=major_version,
                                   version=version)
                except MissingValue, exe:
                    log.error(
                        _("Failed to import a valid Activity error was: {0}".
                          format(exe),
                          logger='failed_activity',
                          dataset=resource.dataset_id,
                          resource=resource.url),
                        exc_info=exe)
                elem.clear()
    except ET.XMLSyntaxError, exe:
        raise XMLError()
Пример #8
0
def sector_percentages(xml, resource=no_resource):
    ret = []
    for ele in xml.xpath("./sector"):
        sp = SectorPercentage()
        field_functions = {
            'sector': partial(from_codelist, cl.Sector, "@code"),
            'vocabulary': partial(from_codelist, cl.Vocabulary, "@vocabulary"),
        }

        for field, function in field_functions.items():
            try:
                setattr(sp, field, function(ele, resource))
            except (MissingValue, ValueError), exe:
                iati_identifier = xval(
                    xml, "/iati-activity/iati-identifier/text()",
                    'no_identifier')
                log.warn(_(
                    "uFailed to import a valid {0} in activity {1}, error was: {2}"
                    .format(field, iati_identifier, exe),
                    logger='activity_importer',
                    dataset=resource.dataset_id,
                    resource=resource.url),
                         exc_info=exe)

        if ele.xpath("@percentage"):
            try:
                sp.percentage = int(xval(ele, "@percentage"))
            except ValueError:
                sp.percentage = None
        if ele.xpath("text()"):
            sp.text = xval(ele, "text()")
        if any(
                getattr(sp, attr)
                for attr in "sector vocabulary percentage".split()):
            ret.append(sp)
Пример #9
0
def sector_percentages(xml, resource=no_resource):
    ret = []
    for ele in xml.xpath("./sector"):
        sp = SectorPercentage()
        field_functions = {
            'sector' : partial(from_codelist, cl.Sector, "@code"),
            'vocabulary' : partial(from_codelist, cl.Vocabulary, "@vocabulary"),
        }

        for field, function in field_functions.items():
            try:
                setattr(sp, field, function(ele, resource))
            except (MissingValue, ValueError), exe:
                iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier')
                log.warn(
                    _("uFailed to import a valid {0} in activity {1}, error was: {2}".format(
                        field, iati_identifier, exe),
                    logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                    exc_info=exe
                )
        
        if ele.xpath("@percentage"):
            try:
                sp.percentage = int(xval(ele, "@percentage"))
            except ValueError:
                sp.percentage = None
        if ele.xpath("text()"):
            sp.text = xval(ele, "text()")
        if any(getattr(sp, attr) for attr in "sector vocabulary percentage".split()):
            ret.append(sp)
Пример #10
0
def activities(xmlfile, resource=no_resource):
    try:
        for event, elem in ET.iterparse(xmlfile):
            if elem.tag == 'iati-activity':
                try:
                    yield activity(elem, resource=resource)
                except MissingValue, exe:
                    log.error(_("Failed to import a valid Activity error was: {0}".format(exe),
                            logger='failed_activity', dataset=resource.dataset_id, resource=resource.url),
                            exc_info=exe)
                elem.clear()
    except ET.XMLSyntaxError, exe:
        raise XMLError()
Пример #11
0
def activity(xml_resource, resource=no_resource):

    xml = ET.parse(_open_resource(xml_resource))

    data = {
        "iati_identifier": xval(xml.getroot(), "./iati-identifier/text()"),
        "title": xval(xml, "./title/text()", u""),
        "description": xval(xml, "./description/text()", u""),
        "raw_xml": ET.tostring(xml, encoding=unicode)
    }

    field_functions = {
        "default_currency": partial(currency, "@default-currency"),
        "hierarchy": hierarchy,
        "last_updated_datetime": last_updated_datetime,
        "default_language": default_language,
        "reporting_org": reporting_org,
        "websites": websites,
        "participating_orgs": participating_orgs,
        "recipient_country_percentages": recipient_country_percentages,
        "recipient_region_percentages": recipient_region_percentages,
        "transactions": transactions,
        "start_planned": start_planned,
        "end_planned": end_planned,
        "start_actual": start_actual,
        "end_actual": end_actual,
        "sector_percentages": sector_percentages,
        "budgets": budgets,
        "policy_markers": policy_markers,
        "related_activities": related_activities,
        'activity_status': activity_status,
        'collaboration_type': collaboration_type,
        'default_finance_type': default_finance_type,
        'default_flow_type': default_flow_type,
        'default_aid_type': default_aid_type,
        'default_tied_status': default_tied_status,
    }

    for field, function in field_functions.items():
        try:
            data[field] = function(xml, resource)
        except (MissingValue, InvalidDateError, ValueError,
                InvalidOperation), exe:
            data[field] = None
            log.warn(_(
                u"Failed to import a valid {0} in activity {1}, error was: {2}"
                .format(field, data['iati_identifier'], exe),
                logger='activity_importer',
                dataset=resource.dataset_id,
                resource=resource.url),
                     exc_info=exe)
Пример #12
0
def activity(xml_resource, resource=no_resource):

    xml = ET.parse(_open_resource(xml_resource))

    data = {
        "iati_identifier": xval(xml.getroot(), "./iati-identifier/text()"),
        "title": xval(xml, "./title/text()", u""),
        "description": xval(xml, "./description/text()", u""),
        "raw_xml": ET.tostring(xml, encoding=unicode)
    }

    field_functions = {
        "default_currency" : partial(currency, "@default-currency"),
        "hierarchy": hierarchy,
        "last_updated_datetime" : last_updated_datetime,
        "default_language" : default_language,
        "reporting_org": reporting_org,
        "websites": websites,
        "participating_orgs": participating_orgs,
        "recipient_country_percentages": recipient_country_percentages,
        "recipient_region_percentages": recipient_region_percentages,
        "transactions": transactions,
        "start_planned": start_planned,
        "end_planned": end_planned,
        "start_actual": start_actual,
        "end_actual": end_actual,
        "sector_percentages": sector_percentages,
        "budgets": budgets,
        "policy_markers": policy_markers,
        "related_activities": related_activities,
        'activity_status' : activity_status,
        'collaboration_type' : collaboration_type,
        'default_finance_type' : default_finance_type,
        'default_flow_type' : default_flow_type,
        'default_aid_type' : default_aid_type,
        'default_tied_status' : default_tied_status,
    }

    for field, function in field_functions.items():
        try:
            data[field] = function(xml, resource)
        except (MissingValue, InvalidDateError, ValueError, InvalidOperation), exe:
            data[field] = None
            log.warn(
                _(u"Failed to import a valid {0} in activity {1}, error was: {2}".format(
                    field, data['iati_identifier'], exe),
                logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                exc_info=exe
            )
Пример #13
0
def related_activities(xml, resource=no_resource):
    element = xml.xpath("./related-activity")
    results = []
    for ele in element:
        text=xval(ele, "text()", None)
        try:
            ref = xval(ele, "@ref")
            results.append(RelatedActivity(ref=ref, text=text))
        except MissingValue as e:
            iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier')
            log.warn(
                _(u"Failed to import a valid related-activity in activity {0}, error was: {1}".format(
                    iati_identifier, e),
                logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                exc_info=e
            )
    return results
Пример #14
0
def activities(xmlfile, resource=no_resource):
    try:
        for event, elem in ET.iterparse(xmlfile):
            if elem.tag == 'iati-activity':
                try:
                    yield activity(elem, resource=resource)
                except MissingValue, exe:
                    log.error(
                        _("Failed to import a valid Activity error was: {0}".
                          format(exe),
                          logger='failed_activity',
                          dataset=resource.dataset_id,
                          resource=resource.url),
                        exc_info=exe)
                elem.clear()
    except ET.XMLSyntaxError, exe:
        raise XMLError()
Пример #15
0
def related_activities(xml, resource=no_resource, major_version='1'):
    element = xml.xpath("./related-activity")
    results = []
    for ele in element:
        text=xval(ele, TEXT_ELEMENT[major_version], None)
        try:
            ref = xval(ele, "@ref")
            results.append(RelatedActivity(ref=ref, text=text))
        except MissingValue as e:
            iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier')
            log.warn(
                _(u"Failed to import a valid related-activity in activity {0}, error was: {1}".format(
                    iati_identifier, e),
                logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                exc_info=e
            )
    return results
Пример #16
0
def from_codelist(codelist, path, xml, resource=no_resource):
    code = xval(xml, path, None)
    if code:
        try:
            return codelist.from_string(code)
        except (MissingValue, ValueError) as e:
            iati_identifier = xval(xml,
                                   "/iati-activity/iati-identifier/text()",
                                   'no_identifier')

            log.warn(_(
                (u"Failed to import a valid {0} in activity"
                 "{1}, error was: {2}".format(codelist, iati_identifier, e)),
                logger='activity_importer',
                dataset=resource.dataset_id,
                resource=resource.url),
                     exc_info=e)
    return None
Пример #17
0
def activities(xmlfile, resource=no_resource):
    major_version = '1'
    version = None
    try:
        for event, elem in ET.iterparse(xmlfile, events=('start','end')):
            if event=='start' and elem.tag == 'iati-activities':
                version = elem.attrib.get('version')
                if version and version.startswith('2.'):
                    major_version = '2'
            elif event=='end' and elem.tag == 'iati-activity':
                try:
                    yield activity(elem, resource=resource, major_version=major_version, version=version)
                except MissingValue, exe:
                    log.error(_("Failed to import a valid Activity error was: {0}".format(exe),
                            logger='failed_activity', dataset=resource.dataset_id, resource=resource.url),
                            exc_info=exe)
                elem.clear()
    except ET.XMLSyntaxError, exe:
        raise XMLError()
Пример #18
0
def participating_orgs(xml, resource=None):
    ret = []
    seen = set()
    for ele in xml.xpath("./participating-org"):
        try:
            role = cl.OrganisationRole.from_string(xval(ele, "@role").title())
            organisation = parse_org(ele)
            if not (role, organisation.ref) in seen:
                seen.add((role, organisation.ref))
                ret.append(Participation(role=role, organisation=organisation))
        except ValueError as e:
            iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier')
            log.warn(
                _(u"Failed to import a valid sector percentage:{0} in activity {1}, error was: {2}".format(
                    'organisation_role', iati_identifier, e),
                logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                exc_info=e
            )
    return ret
Пример #19
0
def participating_orgs(xml, resource=None, major_version='1'):
    ret = []
    seen = set()
    for ele in xml.xpath("./participating-org"):
        try:
            role = codelists.by_major_version[major_version].OrganisationRole.from_string(xval(ele, "@role").title())
            organisation = parse_org(ele, major_version=major_version)
            if not (role, organisation.ref) in seen:
                seen.add((role, organisation.ref))
                ret.append(Participation(role=role, organisation=organisation))
        except ValueError as e:
            iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier')
            log.warn(
                _(u"Failed to import a valid sector percentage:{0} in activity {1}, error was: {2}".format(
                    'organisation_role', iati_identifier, e),
                logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                exc_info=e
            )
    return ret
Пример #20
0
def from_codelist(codelist, path, xml, resource=no_resource):
    code = xval(xml, path, None)
    if code:
        try:
            return codelist.from_string(code)
        except (MissingValue, ValueError) as e:
            iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()",
                'no_identifier')

            log.warn(
                _((u"Failed to import a valid {0} in activity"
                   "{1}, error was: {2}".format(codelist, iati_identifier, e)),
                   logger='activity_importer',
                   dataset=resource.dataset_id,
                   resource=resource.url
                ),
                exc_info=e
            )
    return None
Пример #21
0
    def process(ele):
        data = {
            'description' : xval(ele, "description/" + TEXT_ELEMENT[major_version], None),
            'provider_org_text' : xval(ele, "provider-org/" + TEXT_ELEMENT[major_version], None),
            'provider_org_activity_id' : xval(
                                ele, "provider-org/@provider-activity-id", None),
            'receiver_org_text' : xval(ele, "receiver-org/" + TEXT_ELEMENT[major_version], None),
            'receiver_org_activity_id' : xval(ele, "receiver-org/@receiver-activity-id", None),
            'ref' : xval(ele, "@ref", None),
        }

        field_functions = {
            'date' : partial(xpath_date, "transaction-date/@iso-date"),
            'flow_type' : partial(from_codelist_with_major_version, 'FlowType', "./flow-type/@code"),
            'finance_type' : partial(from_codelist_with_major_version, 'FinanceType', "./finance-type/@code"),
            'aid_type' : partial(from_codelist_with_major_version, 'AidType', "./aid-type/@code"),
            'tied_status' : partial(from_codelist_with_major_version, 'TiedStatus', "./tied-status/@code"),
            'disbursement_channel' : partial(from_codelist_with_major_version, 'DisbursementChannel', "./disbursement-channel/@code"),
            'provider_org' : partial(from_org, "./provider-org"),
            'receiver_org' : partial(from_org, "./receiver-org"),
            'type' : partial(from_codelist_with_major_version, 'TransactionType', "./transaction-type/@code"),
            'value_currency' : partial(currency, "value/@currency"),
            'value_date' : partial(xpath_date, "value/@value-date"),
            'value_amount' : partial(xpath_decimal, "value/text()"),
            "recipient_country_percentages": recipient_country_percentages,
            "recipient_region_percentages": recipient_region_percentages,
            "sector_percentages": sector_percentages,
        }

        for field, function in field_functions.items():
            try:
                data[field] = function(ele, resource, major_version)
            except (MissingValue, InvalidDateError, ValueError, InvalidOperation), exe:
                data[field] = None
                iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier')
                log.warn(
                    _(u"Failed to import a valid {0} in activity {1}, error was: {2}".format(
                        field, iati_identifier, exe),
                    logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                    exc_info=exe
                )
Пример #22
0
def reporting_org(element, resource=no_resource):
    xml = element.xpath("./reporting-org")[0]
    data = {
        "ref": xval(xml, "@ref"),
        "name": xval(xml, 'text()', u""),
    }
    try:
        data.update({
            "type": cl.OrganisationType.from_string(xval(xml, "@type"))
        })
    except (MissingValue, ValueError) as exe:
        data['type'] = None
        iati_identifier = xval(xml, "/iati-identifier/text()", 'no_identifier')
        log.warn(
            _(u"Failed to import a valid reporting-org.type in activity {0}, error was: {1}".format(
                iati_identifier, exe),
            logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
            exc_info=exe
        )

    return Organisation.as_unique(db.session, **data)
Пример #23
0
def reporting_org(element, resource=no_resource):
    xml = element.xpath("./reporting-org")[0]
    data = {
        "ref": xval(xml, "@ref"),
        "name": xval(xml, 'text()', u""),
    }
    try:
        data.update(
            {"type": cl.OrganisationType.from_string(xval(xml, "@type"))})
    except (MissingValue, ValueError) as exe:
        data['type'] = None
        iati_identifier = xval(xml, "/iati-identifier/text()", 'no_identifier')
        log.warn(_(
            u"Failed to import a valid reporting-org.type in activity {0}, error was: {1}"
            .format(iati_identifier, exe),
            logger='activity_importer',
            dataset=resource.dataset_id,
            resource=resource.url),
                 exc_info=exe)

    return Organisation.as_unique(db.session, **data)
Пример #24
0
    def process(ele):
        field_functions = {
            'type' : budget_type,
            'value_currency' : partial(currency, "value/@currency"),
            'value_amount' : partial(xpath_decimal, "value/text()"),
            'period_start' : partial(xpath_date, "period-start/@iso-date"),
            'period_end' : partial(xpath_date, "period-end/@iso-date"),
        }
        data = {}
        for field, function in field_functions.items():
            try:
                data[field] = function(ele, resource)
            except (MissingValue, InvalidDateError, ValueError, InvalidOperation) as exe:
                data[field] = None
                iati_identifier = xval(xml, "/iati-activity/iati-identifier/text()", 'no_identifier')
                log.warn(
                    _("uFailed to import a valid budget:{0} in activity {1}, error was: {2}".format(
                        field, iati_identifier, exe),
                    logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                    exc_info=exe
                )

        return Budget(**data)
Пример #25
0
def activity(xml_resource, resource=no_resource, major_version='1', version=None):
    xml = ET.parse(_open_resource(xml_resource))

    if major_version == '2':
        start_planned = partial(xval_date, "./activity-date[@type='1']")
        start_actual = partial(xval_date, "./activity-date[@type='2']")
        end_planned = partial(xval_date, "./activity-date[@type='3']")
        end_actual = partial(xval_date, "./activity-date[@type='4']")

    else:
        start_planned = partial(xval_date, "./activity-date[@type='start-planned']")
        end_planned = partial(xval_date, "./activity-date[@type='end-planned']")
        start_actual = partial(xval_date, "./activity-date[@type='start-actual']")
        end_actual = partial(xval_date, "./activity-date[@type='end-actual']")

    data = {
        "iati_identifier": xval(xml.getroot(), "./iati-identifier/text()"),
        "title": xval(xml, "./title/"+TEXT_ELEMENT[major_version], u""),
        "description": xval(xml, "./description/"+TEXT_ELEMENT[major_version], u""),
        "raw_xml": ET.tostring(xml, encoding=unicode)
    }

    cl = codelists.by_major_version[major_version]
    activity_status = partial(from_codelist_with_major_version, 'ActivityStatus', "./activity-status/@code")
    collaboration_type = partial(from_codelist_with_major_version, 'CollaborationType', "./collaboration-type/@code")
    default_finance_type = partial(from_codelist_with_major_version, 'FinanceType', "./default-finance-type/@code")
    default_flow_type = partial(from_codelist_with_major_version, 'FlowType', "./default-flow-type/@code")
    default_aid_type = partial(from_codelist_with_major_version, 'AidType', "./default-aid-type/@code")
    default_tied_status = partial(from_codelist_with_major_version, 'TiedStatus', "./default-tied-status/@code")
    
    field_functions = {
        "default_currency" : partial(currency, "@default-currency"),
        "hierarchy": hierarchy,
        "last_updated_datetime" : last_updated_datetime,
        "default_language" : default_language,
        "reporting_org": reporting_org,
        "websites": websites,
        "participating_orgs": participating_orgs,
        "recipient_country_percentages": recipient_country_percentages,
        "recipient_region_percentages": recipient_region_percentages,
        "transactions": transactions,
        "start_planned": start_planned,
        "end_planned": end_planned,
        "start_actual": start_actual,
        "end_actual": end_actual,
        "sector_percentages": sector_percentages,
        "budgets": budgets,
        "policy_markers": policy_markers,
        "related_activities": related_activities,
        'activity_status' : activity_status,
        'collaboration_type' : collaboration_type,
        'default_finance_type' : default_finance_type,
        'default_flow_type' : default_flow_type,
        'default_aid_type' : default_aid_type,
        'default_tied_status' : default_tied_status,
        'major_version': lambda *args, **kwargs: major_version,
        'version': lambda *args, **kwargs: version,
    }

    for field, function in field_functions.items():
        try:
            data[field] = function(xml, resource, major_version)
        except (MissingValue, InvalidDateError, ValueError, InvalidOperation), exe:
            data[field] = None
            log.warn(
                _(u"Failed to import a valid {0} in activity {1}, error was: {2}".format(
                    field, data['iati_identifier'], exe),
                logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                exc_info=exe
            )
Пример #26
0
def activity(xml_resource,
             resource=no_resource,
             major_version='1',
             version=None):
    xml = ET.parse(_open_resource(xml_resource))

    if major_version == '2':
        start_planned = partial(xval_date, "./activity-date[@type='1']")
        start_actual = partial(xval_date, "./activity-date[@type='2']")
        end_planned = partial(xval_date, "./activity-date[@type='3']")
        end_actual = partial(xval_date, "./activity-date[@type='4']")

    else:
        start_planned = partial(xval_date,
                                "./activity-date[@type='start-planned']")
        end_planned = partial(xval_date,
                              "./activity-date[@type='end-planned']")
        start_actual = partial(xval_date,
                               "./activity-date[@type='start-actual']")
        end_actual = partial(xval_date, "./activity-date[@type='end-actual']")

    data = {
        "iati_identifier":
        xval(xml.getroot(), "./iati-identifier/text()"),
        "title":
        xval(xml, "./title/" + TEXT_ELEMENT[major_version], u""),
        "description":
        xval(xml, "./description/" + TEXT_ELEMENT[major_version], u""),
        "raw_xml":
        ET.tostring(xml, encoding=unicode)
    }

    cl = codelists.by_major_version[major_version]
    activity_status = partial(from_codelist_with_major_version,
                              'ActivityStatus', "./activity-status/@code")
    collaboration_type = partial(from_codelist_with_major_version,
                                 'CollaborationType',
                                 "./collaboration-type/@code")
    default_finance_type = partial(from_codelist_with_major_version,
                                   'FinanceType',
                                   "./default-finance-type/@code")
    default_flow_type = partial(from_codelist_with_major_version, 'FlowType',
                                "./default-flow-type/@code")
    default_aid_type = partial(from_codelist_with_major_version, 'AidType',
                               "./default-aid-type/@code")
    default_tied_status = partial(from_codelist_with_major_version,
                                  'TiedStatus', "./default-tied-status/@code")

    field_functions = {
        "default_currency": partial(currency, "@default-currency"),
        "hierarchy": hierarchy,
        "last_updated_datetime": last_updated_datetime,
        "default_language": default_language,
        "reporting_org": reporting_org,
        "websites": websites,
        "participating_orgs": participating_orgs,
        "recipient_country_percentages": recipient_country_percentages,
        "recipient_region_percentages": recipient_region_percentages,
        "transactions": transactions,
        "start_planned": start_planned,
        "end_planned": end_planned,
        "start_actual": start_actual,
        "end_actual": end_actual,
        "sector_percentages": sector_percentages,
        "budgets": budgets,
        "policy_markers": policy_markers,
        "related_activities": related_activities,
        'activity_status': activity_status,
        'collaboration_type': collaboration_type,
        'default_finance_type': default_finance_type,
        'default_flow_type': default_flow_type,
        'default_aid_type': default_aid_type,
        'default_tied_status': default_tied_status,
        'major_version': lambda *args, **kwargs: major_version,
        'version': lambda *args, **kwargs: version,
    }

    for field, function in field_functions.items():
        try:
            data[field] = function(xml, resource, major_version)
        except (MissingValue, InvalidDateError, ValueError,
                InvalidOperation), exe:
            data[field] = None
            log.warn(_(
                u"Failed to import a valid {0} in activity {1}, error was: {2}"
                .format(field, data['iati_identifier'], exe),
                logger='activity_importer',
                dataset=resource.dataset_id,
                resource=resource.url),
                     exc_info=exe)
Пример #27
0
def transactions(xml, resource=no_resource, major_version='1'):
    def from_cl(code, codelist):
        return codelist.from_string(code) if code is not None else None

    def from_org(path, ele, resource=None, major_version='1'):
        organisation = ele.xpath(path)
        if organisation:
            return parse_org(organisation[0], major_version=major_version)
        # return Organisation.as_unique(db.session, ref=org) if org else Nonejk

    def process(ele):
        data = {
            'description':
            xval(ele, "description/" + TEXT_ELEMENT[major_version], None),
            'provider_org_text':
            xval(ele, "provider-org/" + TEXT_ELEMENT[major_version], None),
            'provider_org_activity_id':
            xval(ele, "provider-org/@provider-activity-id", None),
            'receiver_org_text':
            xval(ele, "receiver-org/" + TEXT_ELEMENT[major_version], None),
            'receiver_org_activity_id':
            xval(ele, "receiver-org/@receiver-activity-id", None),
            'ref':
            xval(ele, "@ref", None),
        }

        field_functions = {
            'date':
            partial(xpath_date, "transaction-date/@iso-date"),
            'flow_type':
            partial(from_codelist_with_major_version, 'FlowType',
                    "./flow-type/@code"),
            'finance_type':
            partial(from_codelist_with_major_version, 'FinanceType',
                    "./finance-type/@code"),
            'aid_type':
            partial(from_codelist_with_major_version, 'AidType',
                    "./aid-type/@code"),
            'tied_status':
            partial(from_codelist_with_major_version, 'TiedStatus',
                    "./tied-status/@code"),
            'disbursement_channel':
            partial(from_codelist_with_major_version, 'DisbursementChannel',
                    "./disbursement-channel/@code"),
            'provider_org':
            partial(from_org, "./provider-org"),
            'receiver_org':
            partial(from_org, "./receiver-org"),
            'type':
            partial(from_codelist_with_major_version, 'TransactionType',
                    "./transaction-type/@code"),
            'value_currency':
            partial(currency, "value/@currency"),
            'value_date':
            partial(xpath_date, "value/@value-date"),
            'value_amount':
            partial(xpath_decimal, "value/text()"),
            "recipient_country_percentages":
            recipient_country_percentages,
            "recipient_region_percentages":
            recipient_region_percentages,
            "sector_percentages":
            sector_percentages,
        }

        for field, function in field_functions.items():
            try:
                data[field] = function(ele, resource, major_version)
            except (MissingValue, InvalidDateError, ValueError,
                    InvalidOperation) as exe:
                data[field] = None
                iati_identifier = xval(
                    xml, "/iati-activity/iati-identifier/text()",
                    'no_identifier')
                log.warn(_(
                    u"Failed to import a valid {0} in activity {1}, error was: {2}"
                    .format(field, iati_identifier, exe),
                    logger='activity_importer',
                    dataset=resource.dataset_id,
                    resource=resource.url),
                         exc_info=exe)

        return Transaction(**data)

    ret = []
    for ele in xml.xpath("./transaction"):
        try:
            ret.append(process(ele))
        except MissingValue as exe:
            iati_identifier = xval(xml, "/iati-identifier/text()",
                                   'no_identifier')
            log.warn(_(
                u"Failed to import a valid transaction in activity {0}, error was: {1}"
                .format(iati_identifier, exe),
                logger='activity_importer',
                dataset=resource.dataset_id,
                resource=resource.url),
                     exc_info=exe)
    return ret
Пример #28
0
                    dataset=resource.dataset_id,
                    resource=resource.url),
                         exc_info=exe)

        return Transaction(**data)

    ret = []
    for ele in xml.xpath("./transaction"):
        try:
            ret.append(process(ele))
        except MissingValue as exe:
            iati_identifier = xval(xml, "/iati-identifier/text()",
                                   'no_identifier')
            log.warn(_(
                u"Failed to import a valid transaction in activity {0}, error was: {1}"
                .format(iati_identifier, exe),
                logger='activity_importer',
                dataset=resource.dataset_id,
                resource=resource.url),
                     exc_info=exe)
    return ret


def sector_percentages(xml, resource=no_resource, major_version='1'):
    cl = codelists.by_major_version[major_version]
    ret = []
    for ele in xml.xpath("./sector"):
        sp = SectorPercentage()
        field_functions = {
            'sector': partial(from_codelist, cl.Sector, "@code"),
            'vocabulary': partial(from_codelist, cl.Vocabulary, "@vocabulary"),
        }
Пример #29
0
def parse_resource(resource):
    db.session.add(resource)
    now = datetime.datetime.utcnow()
    current = Activity.query.filter_by(resource_url=resource.url)
    current_identifiers = set([ i.iati_identifier for i in current.all() ])

    old_xml = dict([ (i[0], (i[1], hash(i[2]))) for i in db.session.query(
        Activity.iati_identifier, Activity.last_change_datetime,
        Activity.raw_xml).filter_by(resource_url=resource.url) ])

    db.session.query(Activity).filter_by(resource_url=resource.url).delete()
    new_identifiers = set()
    activities = []
    for activity in parse.document(resource.document, resource):
        activity.resource = resource

        if activity.iati_identifier not in new_identifiers:
            new_identifiers.add(activity.iati_identifier)
            try:
                if hash(activity.raw_xml) == old_xml[activity.iati_identifier][1]:
                    activity.last_change_datetime = old_xml[activity.iati_identifier][0]
                else:
                    activity.last_change_datetime = datetime.datetime.now()
            except KeyError:
                activity.last_change_datetime = datetime.datetime.now()
            activities.append(activity)
            db.session.add(activity)
            if len(db.session.new) > 50:
                activities = check_for_duplicates(activities)
                db.session.commit()
                activities = []
        else:
            parse.log.warn(
                _("Duplicate identifier {0} in same resource document".format(
                    activity.iati_identifier),
                logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                exc_info=''
            )
    db.session.add_all(activities)
    activities = check_for_duplicates(activities)
    db.session.commit()

    resource.version = parse.document_metadata(resource.document)

    #add any identifiers that are no longer present to deleted_activity table
    diff = current_identifiers - new_identifiers 
    now = datetime.datetime.utcnow()
    deleted = [ 
            DeletedActivity(iati_identifier=deleted_activity, deletion_date=now)
            for deleted_activity in diff ]
    if deleted:
        db.session.add_all(deleted)

    #remove any new identifiers from the deleted_activity table
    if new_identifiers:
        db.session.query(DeletedActivity)\
                .filter(DeletedActivity.iati_identifier.in_(new_identifiers))\
                .delete(synchronize_session="fetch")

    log.info(
        "Parsed %d activities from %s",
        len(resource.activities),
        resource.url)
    resource.last_parsed = now
    return resource#, new_identifiers
Пример #30
0
    def process(ele):
        data = {
            'description':
            xval(ele, "description/" + TEXT_ELEMENT[major_version], None),
            'provider_org_text':
            xval(ele, "provider-org/" + TEXT_ELEMENT[major_version], None),
            'provider_org_activity_id':
            xval(ele, "provider-org/@provider-activity-id", None),
            'receiver_org_text':
            xval(ele, "receiver-org/" + TEXT_ELEMENT[major_version], None),
            'receiver_org_activity_id':
            xval(ele, "receiver-org/@receiver-activity-id", None),
            'ref':
            xval(ele, "@ref", None),
        }

        field_functions = {
            'date':
            partial(xpath_date, "transaction-date/@iso-date"),
            'flow_type':
            partial(from_codelist_with_major_version, 'FlowType',
                    "./flow-type/@code"),
            'finance_type':
            partial(from_codelist_with_major_version, 'FinanceType',
                    "./finance-type/@code"),
            'aid_type':
            partial(from_codelist_with_major_version, 'AidType',
                    "./aid-type/@code"),
            'tied_status':
            partial(from_codelist_with_major_version, 'TiedStatus',
                    "./tied-status/@code"),
            'disbursement_channel':
            partial(from_codelist_with_major_version, 'DisbursementChannel',
                    "./disbursement-channel/@code"),
            'provider_org':
            partial(from_org, "./provider-org"),
            'receiver_org':
            partial(from_org, "./receiver-org"),
            'type':
            partial(from_codelist_with_major_version, 'TransactionType',
                    "./transaction-type/@code"),
            'value_currency':
            partial(currency, "value/@currency"),
            'value_date':
            partial(xpath_date, "value/@value-date"),
            'value_amount':
            partial(xpath_decimal, "value/text()"),
            "recipient_country_percentages":
            recipient_country_percentages,
            "recipient_region_percentages":
            recipient_region_percentages,
            "sector_percentages":
            sector_percentages,
        }

        for field, function in field_functions.items():
            try:
                data[field] = function(ele, resource, major_version)
            except (MissingValue, InvalidDateError, ValueError,
                    InvalidOperation) as exe:
                data[field] = None
                iati_identifier = xval(
                    xml, "/iati-activity/iati-identifier/text()",
                    'no_identifier')
                log.warn(_(
                    u"Failed to import a valid {0} in activity {1}, error was: {2}"
                    .format(field, iati_identifier, exe),
                    logger='activity_importer',
                    dataset=resource.dataset_id,
                    resource=resource.url),
                         exc_info=exe)

        return Transaction(**data)
Пример #31
0
def activity(xml, resource=no_resource, major_version='1', version=None):
    """
    Expects xml argument of type lxml.etree._Element
    """

    if major_version == '2':
        start_planned = partial(xval_date, "./activity-date[@type='1']")
        start_actual = partial(xval_date, "./activity-date[@type='2']")
        end_planned = partial(xval_date, "./activity-date[@type='3']")
        end_actual = partial(xval_date, "./activity-date[@type='4']")

    else:
        start_planned = partial(xval_date,
                                "./activity-date[@type='start-planned']")
        end_planned = partial(xval_date,
                              "./activity-date[@type='end-planned']")
        start_actual = partial(xval_date,
                               "./activity-date[@type='start-actual']")
        end_actual = partial(xval_date, "./activity-date[@type='end-actual']")

    data = {
        "iati_identifier":
        xval(xml, "./iati-identifier/text()"),
        "title":
        xval(xml, "./title/" + TEXT_ELEMENT[major_version], u""),
        "description":
        xval(xml, "./description/" + TEXT_ELEMENT[major_version], u""),
        "raw_xml":
        ET.tostring(xml, encoding='utf-8').decode()
    }

    activity_status = partial(from_codelist_with_major_version,
                              'ActivityStatus', "./activity-status/@code")
    collaboration_type = partial(from_codelist_with_major_version,
                                 'CollaborationType',
                                 "./collaboration-type/@code")
    default_finance_type = partial(from_codelist_with_major_version,
                                   'FinanceType',
                                   "./default-finance-type/@code")
    default_flow_type = partial(from_codelist_with_major_version, 'FlowType',
                                "./default-flow-type/@code")
    default_aid_type = partial(from_codelist_with_major_version, 'AidType',
                               "./default-aid-type/@code")
    default_tied_status = partial(from_codelist_with_major_version,
                                  'TiedStatus', "./default-tied-status/@code")

    field_functions = {
        "default_currency": partial(currency, "@default-currency"),
        "hierarchy": hierarchy,
        "last_updated_datetime": last_updated_datetime,
        "default_language": default_language,
        "reporting_org": reporting_org,
        "websites": websites,
        "participating_orgs": participating_orgs,
        "recipient_country_percentages": recipient_country_percentages,
        "recipient_region_percentages": recipient_region_percentages,
        "transactions": transactions,
        "start_planned": start_planned,
        "end_planned": end_planned,
        "start_actual": start_actual,
        "end_actual": end_actual,
        "sector_percentages": sector_percentages,
        "budgets": budgets,
        "policy_markers": policy_markers,
        "related_activities": related_activities,
        'activity_status': activity_status,
        'collaboration_type': collaboration_type,
        'default_finance_type': default_finance_type,
        'default_flow_type': default_flow_type,
        'default_aid_type': default_aid_type,
        'default_tied_status': default_tied_status,
        'major_version': lambda *args, **kwargs: major_version,
        'version': lambda *args, **kwargs: version,
    }

    for field, function in field_functions.items():
        try:
            data[field] = function(xml, resource, major_version)
        except (MissingValue, InvalidDateError, ValueError,
                InvalidOperation) as exe:
            if field in [
                    'websites', 'participating_orgs',
                    'recipient_country_percentages',
                    'recipient_region_percentages', 'sector_percentages',
                    'transactions', 'budgets', 'policy_markers',
                    'related_activities'
            ]:
                data[field] = []
            else:
                data[field] = None
            log.warn(_(
                u"Failed to import a valid {0} in activity {1}, error was: {2}"
                .format(field, data['iati_identifier'], exe),
                logger='activity_importer',
                dataset=resource.dataset_id,
                resource=resource.url),
                     exc_info=exe)

    dict_for_raw_json = xmltodict.parse(data['raw_xml'],
                                        attr_prefix='',
                                        cdata_key='text',
                                        strip_whitespace=False)
    dict_for_raw_json['iati-extra:version'] = data.get('version')
    data["raw_json"] = dict_for_raw_json

    return Activity(**data)
Пример #32
0
                        field, iati_identifier, exe),
                    logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                    exc_info=exe
                )
        
        return Transaction(**data)

    ret = []
    for ele in xml.xpath("./transaction"):
        try:
            ret.append(process(ele))
        except MissingValue as exe:
            iati_identifier = xval(xml, "/iati-identifier/text()", 'no_identifier')
            log.warn(
                _(u"Failed to import a valid transaction in activity {0}, error was: {1}".format(
                    iati_identifier, exe),
                logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                exc_info=exe
            )
    return ret


def sector_percentages(xml, resource=no_resource):
    ret = []
    for ele in xml.xpath("./sector"):
        sp = SectorPercentage()
        field_functions = {
            'sector' : partial(from_codelist, cl.Sector, "@code"),
            'vocabulary' : partial(from_codelist, cl.Vocabulary, "@vocabulary"),
        }