Пример #1
0
 def test_get_urls(self):
     text = """
     Some junk
     http://airmozilla/manage/events/1068/ stuff
     https://etherpad.mozilla.org/sumo-mobile
     """
     urls = list(scraper.get_urls(text))
     eq_(
         urls,
         [
             'http://airmozilla/manage/events/1068/',
             'https://etherpad.mozilla.org/sumo-mobile'
         ]
     )
Пример #2
0
    def handle(self, *args, **options):
        if not args:
            raise CommandError(self.args)

        for arg in args:
            if arg.isdigit():
                event = Event.objects.get(pk=arg)
            else:
                if '://' in arg:
                    slug = urlparse(arg).path.split('/')[1]
                else:
                    slug = arg
                event = Event.objects.get(slug=slug)
            print scrape_urls(get_urls(event.additional_links))['text']
Пример #3
0
def event_transcript(request, id):
    event = get_object_or_404(Event, id=id)
    context = {}

    from airmozilla.manage.scraper import get_urls, scrape_urls
    scrapeable_urls = list(get_urls(event.additional_links))

    if request.method == 'POST':
        form = forms.EventTranscriptForm(
            instance=event,
            data=request.POST,
        )
        if form.is_valid():
            form.save()
            messages.success(
                request,
                'Event transcript saved.'
            )
            return redirect('manage:event_edit', event.pk)
    else:
        initial = {}
        if request.GET.getlist('urls'):
            response = scrape_urls(request.GET.getlist('urls'))
            if response['text']:
                initial['transcript'] = response['text']

            errors = []
            for result in response['results']:
                if not result['worked']:
                    errors.append('%s: %s' % (result['url'], result['status']))
            if errors:
                errors.insert(0, 'Some things could not be scraped correctly')
                messages.error(
                    request,
                    '\n'.join(errors)
                )

        form = forms.EventTranscriptForm(instance=event, initial=initial)

    amara_videos = AmaraVideo.objects.filter(event=event)

    context['event'] = event
    context['amara_videos'] = amara_videos
    context['form'] = form
    context['scrapeable_urls'] = scrapeable_urls
    return render(request, 'manage/event_transcript.html', context)
Пример #4
0
 def test_get_urls(self):
     text = """
     Some junk
     http://airmozilla/manage/events/1068/ stuff
     https://etherpad.mozilla.org/sumo-mobile
     hello, this is madness
     https://docs.python.org/2/library/urlparse.html..
     madness I say https://github.com/mozilla/airmozilla........
     yes http://blog.mozilla.org/devtools/.
     """
     urls = list(scraper.get_urls(text))
     eq_(urls, [
         'http://airmozilla/manage/events/1068/',
         'https://etherpad.mozilla.org/sumo-mobile',
         'https://docs.python.org/2/library/urlparse.html',
         'https://github.com/mozilla/airmozilla',
         'http://blog.mozilla.org/devtools/'
     ])
Пример #5
0
 def test_get_urls(self):
     text = """
     Some junk
     http://airmozilla/manage/events/1068/ stuff
     https://etherpad.mozilla.org/sumo-mobile
     hello, this is madness
     https://docs.python.org/2/library/urlparse.html..
     madness I say https://github.com/mozilla/airmozilla........
     yes http://blog.mozilla.org/devtools/.
     """
     urls = list(scraper.get_urls(text))
     eq_(
         urls,
         [
             'http://airmozilla/manage/events/1068/',
             'https://etherpad.mozilla.org/sumo-mobile',
             'https://docs.python.org/2/library/urlparse.html',
             'https://github.com/mozilla/airmozilla',
             'http://blog.mozilla.org/devtools/'
         ]
     )