def parse(self, data): url, date, title, speaker, text = data text, speaker = parse_speech(text, speaker) speaker = self.get_or_create(Speaker, instance=self.instance, name=speaker) speech = Speech(instance=self.instance, text=text, speaker=speaker, start_date=date, title=title, source_url=url) if self.commit: speech.save()
def parse(self, data): url = data['url'] soup = data['soup'] try: text, name, image_url, date = parse_speech(soup) except ParsingError as e: print 'SKIPPING {} - {}'.format(url, e.args[0]) return # If get_or_create in BaseParser supported defaults, and returned # a 'created' boolean in the usual way, this could be slightly neater speaker = self.get_or_create( Speaker, instance=self.instance, name=name, ) if not speaker.image: speaker.image = urljoin(self.index_url, image_url) speaker.save() speech = Speech( instance=self.instance, text=text, speaker=speaker, start_date=date, title=data['title'], source_url=url, type='speech', ) if self.commit: speech.save()
def parse(self, data): if self.skip_transcript(data): return date = data.get('date') top_section = self.get_or_create( Section, instance=self.instance, source_url=data['url'], heading=self.top_section_heading(data), parent=self.get_parent_section(data), ) for speech in self.parse_transcript(data): if not speech: continue if speech.section: if speech.section.object: section = speech.section.object else: heading = self.prettify(speech.section.heading) section = Section( instance=self.instance, heading=heading, parent=top_section, ) if self.commit: section.save() speech.section.object = section else: section = top_section if speech.speaker: speaker = self.prettify(speech.speaker) speaker = self.get_or_create(Speaker, instance=self.instance, name=speaker) else: speaker = None if not speech.type: speech.type = ('speech' if speaker or speech.speaker_display else 'narrative') text = '</p>\n<p>'.join([' '.join(s) for s in speech.text]) text = '<p>%s</p>' % text speech_date = speech.date or date speech = Speech( instance=self.instance, section=section, text=text, speaker=speaker, speaker_display=speech.speaker_display, type=speech.type, start_date=speech_date, start_time=speech.time, ) if self.commit: speech.save()
def parse(self, data): if self.skip_transcript(data): return date = data.get('date') top_section = self.get_or_create( Section, instance=self.instance, source_url=data['url'], heading=self.top_section_heading(data), parent=self.get_parent_section(data), ) for speech in self.parse_transcript(data): if not speech: continue if speech.section: if speech.section.object: section = speech.section.object else: heading = self.prettify(speech.section.heading) section = Section( instance=self.instance, heading=heading, parent=top_section, ) if self.commit: section.save() speech.section.object = section else: section = top_section if speech.speaker: speaker = self.prettify(speech.speaker) speaker = self.get_or_create( Speaker, instance=self.instance, name=speaker) else: speaker = None if not speech.type: speech.type = ('speech' if speaker or speech.speaker_display else 'narrative') text = '</p>\n<p>'.join([' '.join(s) for s in speech.text]) text = '<p>%s</p>' % text speech_date = speech.date or date speech = Speech( instance=self.instance, section=section, text=text, speaker=speaker, speaker_display=speech.speaker_display, type=speech.type, start_date=speech_date, start_time=speech.time, ) if self.commit: speech.save()
def submit(request): if request.method == 'POST': if 'accuracy' not in request.POST: request.POST['accuracy'] = 0.0 if 'pacing' not in request.POST: request.POST['pacing'] = 0.0 if 'transcription' not in request.POST: request.POST['transcription'] = '' if 'title' not in request.POST or request.POST['title'] == '': request.POST['title'] = request.user.username + '_' + str(time.time()) title = request.POST['title'] + '_' + str(time.time()) filefield = request.FILES['audio'] owner = request.user speech = Speech(title=title, filefield=filefield, owner=owner) speech.save() return HttpResponse('200')
def parse(self, data): url, date, heading, speaker, text = data text, speaker = parse_speech(text, speaker) speaker = self.get_or_create( Speaker, instance=self.instance, name=speaker, ) speech = Speech( instance=self.instance, text=text, speaker=speaker, start_date=date, heading=heading, source_url=url, type='speech', ) if self.commit: speech.save()
def parse(self, data): if self.skip_transcript(data): return date = data.get('date') top_section = self.get_or_create( Section, instance=self.instance, source_url=data['url'], title=self.top_section_title(data), ) for speech in self.parse_transcript(data): if not speech: continue if speech.section: if speech.section.object: section = speech.section.object else: title = self.prettify(speech.section.title) section = Section(instance=self.instance, title=title, parent=top_section) if self.commit: section.save() speech.section.object = section else: section = top_section if speech.speaker: speaker = self.prettify(speech.speaker) speaker = self.get_or_create(Speaker, instance=self.instance, name=speaker) else: speaker = None text = '</p>\n<p>'.join([ ' '.join(s) for s in speech.text ]) text = '<p>%s</p>' % text speech_date = speech.date or date speech = Speech( instance=self.instance, section=section, text=text, speaker=speaker, speaker_display=speech.speaker_display, start_date=speech_date, start_time=speech.time ) if self.commit: speech.save()
def submit_silent(request): if request.method == 'POST': if 'title' not in request.POST or request.POST['title'] == '': request.POST['title'] = request.user.username + '_' + str(time.time()) if 'fname' not in request.POST: request.POST['fname'] = request.POST['title'] if 'accuracy' not in request.POST: request.POST['accuracy'] = 0.0 if 'pacing' not in request.POST: request.POST['pacing'] = 0.0 if 'transcription' not in request.POST: request.POST['transcription'] = '' title = request.POST['fname'] filefield = request.FILES['data'] transcription = request.POST['transcription'] owner = request.user accuracy = request.POST['accuracy'] pacing = request.POST['pacing'] speech = Speech(title=title, filefield=filefield, transcription=transcription, owner=owner, accuracy=accuracy, pacing=pacing) speech.save() return HttpResponseRedirect('/accounts/profile')
obj = model(**attrs) if commit: obj.save() return obj # First we need an instance instance = get_or_create(Instance, label='charles-taylor') # And then we need to parse some transcripts for date, url, text in get_transcripts(): date_section = get_or_create(Section, instance=instance, title='Hearing, %s' % date.strftime('%d %B %Y').lstrip('0')) if date.isoformat() == '2006-07-21': continue # Is garbled for speech in parse_transcript(text, date): if not speech: continue if speech.section: section = get_or_create(Section, instance=instance, title=prettify(speech.section.title), parent=date_section) else: section = date_section if speech.speaker: speaker = prettify(speech.speaker) speaker = get_or_create(Speaker, instance=instance, name=speaker) else: speaker = None text = '\n\n'.join([ ' '.join(s) for s in speech.text ]) #print speech.section, speaker, text speech = Speech(instance=instance, section=section, text=text, speaker=speaker, start_date=date, start_time=speech.time) if commit: speech.save()