Пример #1
0
def determine_entry_tag(e):

    if e.mp and not e.speaker.startswith('Primeiro-Ministro') and not e.speaker.startswith('Ministr') and not (e.speaker.startswith(u'Secretári') and "Estado" in e.speaker) :
        if len(e.text) < 60:
            return 'deputado_aparte'
        else:
            return 'deputado_intervencao'
    elif e.speaker:
        if e.speaker in ('O Orador', 'A Oradora'):
            find_cont_speaker(e)
            return 'continuacao'
        if e.speaker.startswith('Primeiro-Ministro'):
            from deputados.utils import get_pm_from_date
            from deputados.models import MP
            pm_name = get_pm_from_date(e.day.date)
            e.mp = MP.objects.get(shortname=pm_name)
            e.save()
            if len(e.text) < 30:
                return 'pm_aparte'
            else:
                return 'pm_intervencao'
        elif e.speaker.startswith(u'Secretári') and "Estado" in e.speaker:
            if len(e.text) < 30:
                return 'secestado_aparte'
            else:
                return 'secestado_intervencao'
        elif e.speaker.startswith('Ministr'):
            if len(e.text) < 30:
                return 'ministro_aparte'
            else:
                return 'ministro_intervencao'
        elif e.speaker.startswith('Presidente'):
            if re_concluir.search(e.text):
                return 'presidente_aparte'
            return 'presidente'
        elif e.speaker.startswith(u'Secretári') and not "Estado" in e.speaker:
            return 'secretario'
        elif e.speaker.startswith(('Vozes', 'Uma voz d')):
            return 'vozes_aparte'
    else:
        if e.text.startswith(u'Aplauso'):
            return 'aplauso'
        elif e.text.startswith(u'Protesto'):
            return 'protesto'
        elif e.text.startswith(u'Risos'):
            return 'riso'
        elif re_voto.search(e.text):
            return 'voto'
        elif e.text.strip() == 'Pausa.':
            return 'pausa'
        elif not e.speaker and e.text.startswith('Entretanto, assumiu'):
            return 'nota'
        elif e.text.startswith((u'SUMÁRIO', u'S U M Á R I O')):
            return 'sumario'
        elif e.text.startswith(('ORDEM DO DIA', 'ANTES DA ORDEM DO DIA')):
            return 'nota'
        elif e.text.startswith('Eram ') and e.text.strip().endswith('minutos.'):
            return 'hora'
        elif e.text.strip(' :.').endswith((u'presentes à sessão', )) or e.text.startswith('Estavam presentes os seguintes Srs. Deputados:'):
            return 'chamada_presentes'
        elif e.text.endswith((u'faltaram à sessão:', )):
            return 'chamada_ausentes'
        elif e.text.endswith((u'por se encontrarem em missões internacionais:', )):
            return 'chamada_missao'

    return ''
Пример #2
0
    def parse_raw_text(self):
        if not self.raw_text:
            return None
        from parsing import parse_mp_from_raw_text, find_cont_speaker
        speaker, text = parse_mp_from_raw_text(self.raw_text)
        self.normalize_text()

        if isinstance(speaker, int):
            self.mp = MP.objects.get(id=speaker)
            self.party = self.mp.mandate_on(self.day.legislature.number).party.abbrev
            self.text = text
            self.save()
        elif speaker:
            if speaker == 'pm':
                from deputados.utils import get_pm_from_date
                self.mp = get_pm_from_date(self.day.date)
                self.speaker = 'Primeiro-Ministro'
                self.party = self.mp.current_party
                if self.type == 'deputado_intervencao':
                    self.type = 'pm_intervencao'
                self.save()
            elif speaker.startswith('ministro: '):
                from deputados.utils import get_minister
                speaker = speaker.replace('ministro: ', '').strip()
                if '(' in speaker:
                    speaker = speaker.split('(')[1].rstrip(')')
                    govpost = get_minister(self.day.date, shortname=speaker)
                else:
                    govpost = get_minister(self.day.date, post=speaker)
                if govpost:
                    if govpost.mp:
                        self.mp = govpost.mp
                    else:
                        self.speaker = govpost.person_name
                    self.party = govpost.name
                else:
                    self.speaker = speaker
                self.type = 'ministro_intervencao'
            elif speaker.startswith('secestado: '):
                from deputados.utils import get_minister
                speaker = speaker.replace('secestado: ', '').strip()
                if '(' in speaker:
                    speaker = speaker.split('(')[1].rstrip(')')
                    govpost = get_minister(self.day.date, shortname=speaker)
                else:
                    govpost = get_minister(self.day.date, post=speaker)
                if govpost:
                    if govpost.mp:
                        self.mp = govpost.mp
                    else:
                        self.speaker = govpost.person_name
                    self.party = govpost.name
                else:
                    self.speaker = speaker
                self.type = 'secestado_intervencao'

            elif len(speaker) > 100:
                speaker = speaker[:100]
            else:
                self.speaker = speaker
            self.text = text
            self.save()
        else:
            self.text = self.raw_text
            self.save()
        # special case
        if not self.type in ('continuacao', 'pm_intervencao', 'ministro_intervencao', 'secestado_intervencao'):
            self.determine_type()

        from parsing import guess_if_continuation
        if guess_if_continuation(self):
            self.type = 'continuacao'
            self.save()
            if not self.mp:
                find_cont_speaker(self)