Пример #1
0
 def parse(self, data):
     url, date, title, speaker, text = data
     text, speaker = parse_speech(text, speaker)
     speaker = self.get_or_create(Speaker, instance=self.instance, name=speaker)
     speech = Speech(instance=self.instance, text=text, speaker=speaker, start_date=date, title=title, source_url=url)
     if self.commit:
         speech.save()
Пример #2
0
    def parse(self, data):
        url = data['url']
        soup = data['soup']

        try:
            text, name, image_url, date = parse_speech(soup)
        except ParsingError as e:
            print 'SKIPPING {} - {}'.format(url, e.args[0])
            return

        # If get_or_create in BaseParser supported defaults, and returned
        # a 'created' boolean in the usual way, this could be slightly neater
        speaker = self.get_or_create(
            Speaker,
            instance=self.instance,
            name=name,
            )
        if not speaker.image:
            speaker.image = urljoin(self.index_url, image_url)
        speaker.save()

        speech = Speech(
            instance=self.instance,
            text=text,
            speaker=speaker,
            start_date=date,
            title=data['title'],
            source_url=url,
            type='speech',
            )
        if self.commit:
            speech.save()
Пример #3
0
    def parse(self, data):
        if self.skip_transcript(data):
            return

        date = data.get('date')
        top_section = self.get_or_create(
            Section,
            instance=self.instance,
            source_url=data['url'],
            heading=self.top_section_heading(data),
            parent=self.get_parent_section(data),
        )

        for speech in self.parse_transcript(data):
            if not speech:
                continue

            if speech.section:
                if speech.section.object:
                    section = speech.section.object
                else:
                    heading = self.prettify(speech.section.heading)
                    section = Section(
                        instance=self.instance,
                        heading=heading,
                        parent=top_section,
                    )
                    if self.commit:
                        section.save()
                    speech.section.object = section
            else:
                section = top_section
            if speech.speaker:
                speaker = self.prettify(speech.speaker)
                speaker = self.get_or_create(Speaker,
                                             instance=self.instance,
                                             name=speaker)
            else:
                speaker = None

            if not speech.type:
                speech.type = ('speech' if speaker or speech.speaker_display
                               else 'narrative')

            text = '</p>\n<p>'.join([' '.join(s) for s in speech.text])
            text = '<p>%s</p>' % text
            speech_date = speech.date or date
            speech = Speech(
                instance=self.instance,
                section=section,
                text=text,
                speaker=speaker,
                speaker_display=speech.speaker_display,
                type=speech.type,
                start_date=speech_date,
                start_time=speech.time,
            )
            if self.commit:
                speech.save()
Пример #4
0
    def parse(self, data):
        if self.skip_transcript(data):
            return

        date = data.get('date')
        top_section = self.get_or_create(
            Section, instance=self.instance, source_url=data['url'],
            heading=self.top_section_heading(data),
            parent=self.get_parent_section(data),
        )

        for speech in self.parse_transcript(data):
            if not speech:
                continue

            if speech.section:
                if speech.section.object:
                    section = speech.section.object
                else:
                    heading = self.prettify(speech.section.heading)
                    section = Section(
                        instance=self.instance,
                        heading=heading,
                        parent=top_section,
                        )
                    if self.commit:
                        section.save()
                    speech.section.object = section
            else:
                section = top_section
            if speech.speaker:
                speaker = self.prettify(speech.speaker)
                speaker = self.get_or_create(
                    Speaker, instance=self.instance, name=speaker)
            else:
                speaker = None

            if not speech.type:
                speech.type = ('speech'
                               if speaker or speech.speaker_display
                               else 'narrative')

            text = '</p>\n<p>'.join([' '.join(s) for s in speech.text])
            text = '<p>%s</p>' % text
            speech_date = speech.date or date
            speech = Speech(
                instance=self.instance,
                section=section,
                text=text,
                speaker=speaker,
                speaker_display=speech.speaker_display,
                type=speech.type,
                start_date=speech_date,
                start_time=speech.time,
            )
            if self.commit:
                speech.save()
Пример #5
0
def submit(request):
    if request.method == 'POST':
        if 'accuracy' not in request.POST:
            request.POST['accuracy'] = 0.0
        if 'pacing' not in request.POST:
            request.POST['pacing'] = 0.0
        if 'transcription' not in request.POST:
            request.POST['transcription'] = ''
        if 'title' not in request.POST or request.POST['title'] == '':
            request.POST['title'] = request.user.username + '_' + str(time.time())
        title = request.POST['title'] + '_' + str(time.time())
        filefield = request.FILES['audio']
        owner = request.user
        speech = Speech(title=title, filefield=filefield, owner=owner)
        speech.save()

    return HttpResponse('200')
Пример #6
0
 def parse(self, data):
     url, date, heading, speaker, text = data
     text, speaker = parse_speech(text, speaker)
     speaker = self.get_or_create(
         Speaker,
         instance=self.instance,
         name=speaker,
         )
     speech = Speech(
         instance=self.instance,
         text=text,
         speaker=speaker,
         start_date=date,
         heading=heading,
         source_url=url,
         type='speech',
         )
     if self.commit:
         speech.save()
Пример #7
0
    def parse(self, data):
        if self.skip_transcript(data):
            return

        date = data.get('date')
        top_section = self.get_or_create(
            Section, instance=self.instance, source_url=data['url'],
            title=self.top_section_title(data),
        )

        for speech in self.parse_transcript(data):
            if not speech: continue
            if speech.section:
                if speech.section.object:
                    section = speech.section.object
                else:
                    title = self.prettify(speech.section.title)
                    section = Section(instance=self.instance, title=title, parent=top_section)
                    if self.commit:
                        section.save()
                    speech.section.object = section
            else:
                section = top_section
            if speech.speaker:
                speaker = self.prettify(speech.speaker)
                speaker = self.get_or_create(Speaker, instance=self.instance, name=speaker)
            else:
                speaker = None
            text = '</p>\n<p>'.join([ ' '.join(s) for s in speech.text ])
            text = '<p>%s</p>' % text
            speech_date = speech.date or date
            speech = Speech(
                instance=self.instance, section=section, text=text,
                speaker=speaker, speaker_display=speech.speaker_display,
                start_date=speech_date, start_time=speech.time
            )
            if self.commit:
                speech.save()
Пример #8
0
def submit_silent(request):
    if request.method == 'POST':
        if 'title' not in request.POST or request.POST['title'] == '':
            request.POST['title'] = request.user.username + '_' + str(time.time())
        if 'fname' not in request.POST:
            request.POST['fname'] = request.POST['title']
        if 'accuracy' not in request.POST:
            request.POST['accuracy'] = 0.0
        if 'pacing' not in request.POST:
            request.POST['pacing'] = 0.0
        if 'transcription' not in request.POST:
            request.POST['transcription'] = ''
        title = request.POST['fname']
        filefield = request.FILES['data']
        transcription = request.POST['transcription']
        owner = request.user
        accuracy = request.POST['accuracy']
        pacing = request.POST['pacing']
        speech = Speech(title=title, filefield=filefield, transcription=transcription, owner=owner,
                accuracy=accuracy, pacing=pacing)
        speech.save()

    return HttpResponseRedirect('/accounts/profile')
Пример #9
0
        obj = model(**attrs)
        if commit:
            obj.save()
    return obj

# First we need an instance
instance = get_or_create(Instance, label='charles-taylor')

# And then we need to parse some transcripts
for date, url, text in get_transcripts():
    date_section = get_or_create(Section, instance=instance, title='Hearing, %s' % date.strftime('%d %B %Y').lstrip('0'))

    if date.isoformat() == '2006-07-21': continue # Is garbled

    for speech in parse_transcript(text, date):
        if not speech: continue
        if speech.section:
            section = get_or_create(Section, instance=instance, title=prettify(speech.section.title), parent=date_section)
        else:
            section = date_section
        if speech.speaker:
            speaker = prettify(speech.speaker)
            speaker = get_or_create(Speaker, instance=instance, name=speaker)
        else:
            speaker = None
        text = '\n\n'.join([ ' '.join(s) for s in speech.text ])
        #print speech.section, speaker, text
        speech = Speech(instance=instance, section=section, text=text, speaker=speaker, start_date=date, start_time=speech.time)
        if commit:
            speech.save()