Exemplo n.º 1
0
def get_subjects(tree):
    """
    Finds all the subject tags in the passed tree and returns the parsed subjects. All entries will have both the
    name and qcode populated.
    :param tree:
    :return: a list of subject dictionaries
    """
    subjects = []
    for elem in tree.findall('head/tobject/tobject.subject'):
        qcode = elem.get('tobject.subject.refnum')
        for field in subject_fields:
            if elem.get(field):
                if field == SUBJECT_TYPE:
                    field_qcode = qcode[:2] + '000000'
                elif field == SUBJECT_MATTER:
                    field_qcode = qcode[:5] + '000'
                else:
                    field_qcode = qcode

                if subject_codes.get(field_qcode):
                    subjects.append({
                        'name': elem.get(field),
                        'qcode': field_qcode
                    })
        if not any(c['qcode'] == qcode for c in subjects) and subject_codes.get(qcode):
            subjects.append({'name': subject_codes[qcode], 'qcode': qcode})
    return subjects
Exemplo n.º 2
0
def get_subjects(tree):
    """
    Finds all the subject tags in the passed tree and returns the parsed subjects. All entries will have both the
    name and qcode populated.
    :param tree:
    :return: a list of subject dictionaries
    """
    subjects = []
    for elem in tree.findall('head/tobject/tobject.subject'):
        qcode = elem.get('tobject.subject.refnum')
        for field in subject_fields:
            if elem.get(field):
                if field == SUBJECT_TYPE:
                    field_qcode = qcode[:2] + '000000'
                elif field == SUBJECT_MATTER:
                    field_qcode = qcode[:5] + '000'
                else:
                    field_qcode = qcode

                if subject_codes.get(field_qcode):
                    subjects.append({
                        'name': elem.get(field),
                        'qcode': field_qcode
                    })
        if not any(c['qcode'] == qcode for c in subjects) and subject_codes.get(qcode):
            subjects.append({'name': subject_codes[qcode], 'qcode': qcode})
    return subjects
Exemplo n.º 3
0
    def get_subjects(self, tree):
        """Finds all the subject tags in the passed tree and returns the parsed subjects.

        All entries will have both the name and qcode populated.

        :param tree:
        :return: a list of subject dictionaries
        """
        subjects = []
        qcodes = []  # we check qcodes to avoid duplicates
        for elem in tree.findall("head/tobject/tobject.subject"):
            qcode = elem.get("tobject.subject.refnum")
            if qcode in qcodes:
                # we ignore duplicates
                continue
            else:
                qcodes.append(qcode)
            for field in subject_fields:
                if elem.get(field):
                    if field == SUBJECT_TYPE:
                        field_qcode = qcode[:2] + "000000"
                    elif field == SUBJECT_MATTER:
                        field_qcode = qcode[:5] + "000"
                    else:
                        field_qcode = qcode

                    if subject_codes.get(field_qcode) and not any(c["qcode"] == field_qcode for c in subjects):
                        subjects.append({"name": elem.get(field), "qcode": field_qcode})

            # if the subject_fields are not specified.
            if not any(c["qcode"] == qcode for c in subjects) and subject_codes.get(qcode):
                subjects.append({"name": subject_codes[qcode], "qcode": qcode})
        return subjects
Exemplo n.º 4
0
    def format_subjects(self, subjects):
        """
        Maps the ingested Subject Codes to their corresponding names as per IPTC Specification.
        :returns [{"qcode": "01001000", "name": "archaeology"}, {"qcode": "01002000", "name": "architecture"}]
        """

        formatted_subjects = []

        def is_not_formatted(qcode):
            for formatted_subject in formatted_subjects:
                if formatted_subject['qcode'] == qcode:
                    return False

            return True

        for subject in subjects:
            formal_name = subject.get('FormalName')
            if formal_name and is_not_formatted(formal_name):
                formatted_subjects.append({
                    'qcode':
                    formal_name,
                    'name':
                    subject_codes.get(formal_name, '')
                })

        return formatted_subjects
Exemplo n.º 5
0
    def _set_default_item(self, sport_id, comp_id, match_id):
        """
        Construct an item with the common values as required
        :param sport_id:
        :param comp_id:
        :param match_id:
        :return:
        """
        item = dict()
        item[ITEM_TYPE] = CONTENT_TYPE.EVENT
        item[GUID_FIELD] = 'urn:aapsportsfixtures:{}:{}:{}:{}'.format(sport_id, comp_id,
                                                                      self.season, match_id).replace('/', '-')
        item['anpa_category'] = [{'qcode': 't'}] if comp_id.startswith('dom') else [{'qcode': 's'}]
        item['subject'] = [{'qcode': self.sport_map.get(sport_id, {}).get('iptc', ''),
                            'name': subject_codes.get(self.sport_map.get(sport_id, {}).get('iptc', ''), '')}]
        item['occur_status'] = [x for x in self.eocstat_map.get('items', []) if
                                x['qcode'] == 'eocstat:eos5' and x.get('is_active', True)][0]
        item['occur_status'].pop('is_active', None)
        item['versioncreated'] = utcnow()
        item['state'] = CONTENT_STATE.INGESTED
        item['pubstatus'] = None
        calendars = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='event_calendars')
        item['calendars'] = [c for c in calendars.get('items', [])
                             if c.get('qcode').lower() in ('sport', 'sportgeneral')]

        return item
Exemplo n.º 6
0
    def format_subjects(self, subjects):
        """Map the ingested Subject Codes to their corresponding names as per IPTC Specification.

        :param subjects: list of dicts where each dict gives the category the article is mapped to.
        :type subjects: list
        :returns [{"qcode": "01001000", "name": "archaeology"}, {"qcode": "01002000", "name": "architecture"}]
        :rtype list
        """

        formatted_subjects = []

        def is_not_formatted(qcode):
            for formatted_subject in formatted_subjects:
                if formatted_subject['qcode'] == qcode:
                    return False

            return True

        for subject in subjects:
            formal_name = subject.get('FormalName')
            if formal_name and is_not_formatted(formal_name):
                formatted_subjects.append({
                    'qcode':
                    formal_name,
                    'name':
                    subject_codes.get(formal_name, '')
                })

        return formatted_subjects
Exemplo n.º 7
0
    def format_subjects(self, subjects):
        """Map the ingested Subject Codes to their corresponding names as per IPTC Specification.

        :param subjects: list of dicts where each dict gives the category the article is mapped to.
        :type subjects: list
        :returns [{"qcode": "01001000", "name": "archaeology"}, {"qcode": "01002000", "name": "architecture"}]
        :rtype list
        """
        formatted_subjects = []

        def is_not_formatted(qcode):
            for formatted_subject in formatted_subjects:
                if formatted_subject['qcode'] == qcode:
                    return False

            return True

        iptcsc_cv = self._get_cv('iptc_subject_codes')
        for subject in subjects:
            formal_name = subject.get('FormalName')
            for item in iptcsc_cv['items']:
                if item.get('is_active'):
                    #: check formal_name, format formal_name and filter missing subjects
                    if formal_name and is_not_formatted(
                            formal_name) and item.get('qcode') == formal_name:
                        formatted_subjects.append({
                            'qcode':
                            formal_name,
                            'name':
                            subject_codes.get(formal_name, ''),
                            'scheme':
                            'iptc_subject_codes'
                        })

        return formatted_subjects
    def _set_default_item(self, title, _id, thumbprint, country):
        """
        Construct an item with the common values as required
        :param sport_id:
        :param comp_id:
        :param match_id:
        :return:
        """
        item = dict()
        item[ITEM_TYPE] = CONTENT_TYPE.EVENT
        item[GUID_FIELD] = 'urn:aapsportssheet:{}:{}'.format(_id, thumbprint)
        item['anpa_category'] = [
            {'qcode': 't', 'subject': '15000000', 'name': 'Domestic Sport'}] if country.lower() in ['australia',
                                                                                                    'aus'] else [
            {'qcode': 's', 'subject': '15000000', 'name': 'Overseas Sport'}]

        for k, v in self.sheet_map.items():
            if k in title:
                item['subject'] = [{'qcode': v,
                                    'name': subject_codes.get(v, ''), 'parent': '15000000'}]
                break
        item['occur_status'] = [x for x in self.eocstat_map.get('items', []) if
                                x['qcode'] == 'eocstat:eos5' and x.get('is_active', True)][0]
        item['occur_status'].pop('is_active', None)
        item['versioncreated'] = utcnow()
        item['state'] = CONTENT_STATE.SCHEDULED
        item['pubstatus'] = 'usable'
        calendars = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='event_calendars')
        item['calendars'] = [c for c in calendars.get('items', [])
                             if c.get('qcode').lower() in ('sport', 'sportgeneral')]

        return item
Exemplo n.º 9
0
    def get_subjects(self, tree):
        """Finds all the subject tags in the passed tree and returns the parsed subjects.

        All entries will have both the name and qcode populated.

        :param tree:
        :return: a list of subject dictionaries
        """
        subjects = []
        qcodes = []  # we check qcodes to avoid duplicates
        for elem in tree.findall('head/tobject/tobject.subject'):
            qcode = elem.get('tobject.subject.refnum')
            if qcode in qcodes:
                # we ignore duplicates
                continue
            else:
                qcodes.append(qcode)
            for field in subject_fields:
                if elem.get(field):
                    if field == SUBJECT_TYPE:
                        field_qcode = qcode[:2] + '000000'
                    elif field == SUBJECT_MATTER:
                        field_qcode = qcode[:5] + '000'
                    else:
                        field_qcode = qcode

                    if subject_codes.get(field_qcode) and \
                            not any(c['qcode'] == field_qcode for c in subjects):
                        subjects.append({
                            'name': elem.get(field),
                            'qcode': field_qcode
                        })

            # if the subject_fields are not specified.
            if not any(c['qcode'] == qcode
                       for c in subjects) and subject_codes.get(qcode):
                subjects.append({'name': subject_codes[qcode], 'qcode': qcode})
        return subjects
Exemplo n.º 10
0
    def format_subjects(self, subjects):
        """
        Maps the ingested Subject Codes to their corresponding names as per IPTC Specification.
        :returns [{"qcode": "01001000", "name": "archaeology"}, {"qcode": "01002000", "name": "architecture"}]
        """

        formatted_subjects = []

        def is_not_formatted(qcode):
            for formatted_subject in formatted_subjects:
                if formatted_subject['qcode'] == qcode:
                    return False

            return True

        for subject in subjects:
            formal_name = subject.get('FormalName')
            if formal_name and is_not_formatted(formal_name):
                formatted_subjects.append({'qcode': formal_name, 'name': subject_codes.get(formal_name, '')})

        return formatted_subjects
    def _set_default_item(self, sport_id, comp_id, match_id):
        """
        Construct an item with the common values as required
        :param sport_id:
        :param comp_id:
        :param match_id:
        :return:
        """
        item = dict()
        item[ITEM_TYPE] = CONTENT_TYPE.EVENT
        item[GUID_FIELD] = 'urn:aapsportsfixtures:{}:{}:{}:{}'.format(
            sport_id, comp_id, self.season, match_id).replace('/', '-')
        item['anpa_category'] = [{
            'qcode': 't'
        }] if comp_id.startswith('dom') else [{
            'qcode': 's'
        }]
        item['subject'] = [{
            'qcode':
            self.sport_map.get(sport_id, {}).get('iptc', ''),
            'name':
            subject_codes.get(
                self.sport_map.get(sport_id, {}).get('iptc', ''), '')
        }]
        item['occur_status'] = [
            x for x in self.eocstat_map.get('items', [])
            if x['qcode'] == 'eocstat:eos5' and x.get('is_active', True)
        ][0]
        item['occur_status'].pop('is_active', None)
        item['versioncreated'] = utcnow()
        item['state'] = CONTENT_STATE.INGESTED
        item['pubstatus'] = None
        calendars = superdesk.get_resource_service('vocabularies').find_one(
            req=None, _id='event_calendars')
        item['calendars'] = [
            c for c in calendars.get('items', [])
            if c.get('qcode').lower() == 'sport'
        ]

        return item
Exemplo n.º 12
0
    def get_subjects(self, tree):
        """Finds all the IPTC subject tags in the passed tree and returns the parsed subjects.

        All entries will have both the name and qcode populated.

        :param tree:
        :return: a list of subject dictionaries
        """
        subjects = []
        qcodes = []  # we check qcodes to avoid duplicates
        for elem in tree.findall('head/tobject/tobject.subject[@tobject.subject.ipr="IPTC"]'):
            qcode = elem.get('tobject.subject.refnum')
            if qcode in qcodes:
                # we ignore duplicates
                continue
            else:
                qcodes.append(qcode)

            # if the subject_fields are not specified.
            if not any(c['qcode'] == qcode for c in subjects) and subject_codes.get(qcode):
                subjects.append({'name': subject_codes[qcode], 'qcode': qcode})
        return subjects
Exemplo n.º 13
0
    def get_subjects(self, tree):
        """Finds all the IPTC subject tags in the passed tree and returns the parsed subjects.

        All entries will have both the name and qcode populated.

        :param tree:
        :return: a list of subject dictionaries
        """
        subjects = []
        qcodes = []  # we check qcodes to avoid duplicates
        for elem in tree.findall('head/tobject/tobject.subject[@tobject.subject.ipr="IPTC"]'):
            qcode = elem.get('tobject.subject.refnum')
            if qcode in qcodes:
                # we ignore duplicates
                continue
            else:
                qcodes.append(qcode)

            # if the subject_fields are not specified.
            if not any(c['qcode'] == qcode for c in subjects) and subject_codes.get(qcode):
                subjects.append({'name': subject_codes[qcode], 'qcode': qcode})
        return subjects
Exemplo n.º 14
0
    def format_subjects(self, subjects):
        """Map the ingested Subject Codes to their corresponding names as per IPTC Specification.

        :param subjects: list of dicts where each dict gives the category the article is mapped to.
        :type subjects: list
        :returns [{"qcode": "01001000", "name": "archaeology"}, {"qcode": "01002000", "name": "architecture"}]
        :rtype list
        """

        formatted_subjects = []

        def is_not_formatted(qcode):
            for formatted_subject in formatted_subjects:
                if formatted_subject['qcode'] == qcode:
                    return False

            return True

        for subject in subjects:
            formal_name = subject.get('FormalName')
            if formal_name and is_not_formatted(formal_name):
                formatted_subjects.append({'qcode': formal_name, 'name': subject_codes.get(formal_name, '')})

        return formatted_subjects