def add_presentation(url, category):
    print("Collecting from {}".format(url))
    xpath = '//div[contains(@class,"presentation")]/h3/a'
    entries = html.fromstring(requests.get(url).text).xpath(xpath)
    ## Iterate through and extract the relevant content
    for a in entries:
        title = a.text
        if 'canceled' in title.lower():
            continue
        root = html.fromstring(
            requests.get('https://us.pycon.org' + a.get('href')).text)
        speakers = root.xpath('//h4/a/text()')
        abstract = root.xpath('//div[@class="abstract"]')[0].text_content()
        try:
            level = root.xpath('//dl/dd/text()')[0]
        except ValueError:
            continue
        level = 'Beginner' if level == 'Novice' else level
        talk = Talk(category=category,
                    conference_id=conference.id,
                    title=title)
        data = db.TalkData(speakers, [], [])
        talk.abstract = abstract[:10000]
        talk.level = level
        db.add_talk(talk, **data._asdict())
def add_presentation_from_table(url, category):
    print("Collecting from {}".format(url))
    xpath = '//td[contains(@class,"slot")]'
    entries = html.fromstring(requests.get(url).text).xpath(xpath)
    ## Iterate through and extract the relevant content
    for td in entries:
        a = td.find('./span[@class="title"]/a')
        if a is None:
            print('skipping...')
            continue
        title = a.text
        abstract = a.get('title')
        if 'canceled' not in title.lower():
            if 'Jasmine Hsu' in title:
                speakers = title.split(',')
                title = 'Fire, bullets, and productivity'
                level = 'Beginner'
            else:
                speakers = td.findtext('./span[@class="speaker"]').strip()
                speakers = speakers.split(
                    ',') if ',' in speakers else speakers.split('&')
                speakers = [s for s in speakers if s.strip() and not '?' in s]
                level = td.xpath('./comment()')[0].text.splitlines()[1].strip()
                level = 'Beginner' if level == 'Novice' else level
            talk = Talk(category=category,
                        conference_id=conference.id,
                        title=title)
            data = db.TalkData(speakers, [], [])
            talk.abstract = abstract[:10000]
            talk.level = level
            db.add_talk(talk, **data._asdict())
Exemplo n.º 3
0
def add_new_talk(title, abstract, speaker, topic):
    if abstract and "__wbhack.init('https://web.archive.org/web');" in abstract:
        abstract = abstract.split(
            "__wbhack.init('https://web.archive.org/web');", 1)[-1]
    db.add_talk(
        Talk(category=Talk.TALK,
             conference_id=conference.id,
             title=title,
             abstract=abstract),
        **db.TalkData([speaker], [topic], [])._asdict())
    print("adding new one not in list:", speaker, title, "\n***\n")
def add_presentation(url, category):
    print("Collecting from {}".format(url))
    xpath = '//div[contains(@class,"box-content")]/*'
    entries = html.fromstring(requests.get(url).text).xpath(xpath)
    first = next(i for i, e in enumerate(entries) if e.tag == 'h2')
    ## Iterate through and extract the relevant content
    for i in range(int((len(entries) - first) / 3)):
        h2, p, div = entries[first + 3 * i:first + 3 * (1 + i)]
        title = h2.text_content()
        if 'canceled' in title.lower():
            continue
        speakers = p.text_content().strip('\n ').split('\n', 1)[0].split(',')
        speakers = [s for s in speakers if s.strip() and not '?' in s]
        abstract = div.text_content().strip()
        talk = Talk(category=category,
                    conference_id=conference.id,
                    title=title)
        data = db.TalkData(speakers, [], [])
        talk.abstract = abstract[:10000]
        db.add_talk(talk, **data._asdict())
def add_presentation_from_table(url, category):
    print("Collecting from {}".format(url))
    xpath = '//td[contains(@class,"slot")]'
    entries = html.fromstring(requests.get(url).text).xpath(xpath)
    ## Iterate through and extract the relevant content
    for td in entries:
        a = td.find('./span[@class="title"]/a')
        if a is None:
            continue
        title = a.text
        abstract = a.get('title')
        if 'canceled' not in title.lower():
            speakers = td.findtext('./span[@class="speaker"]').strip()
            speakers = speakers.split(
                ',') if ',' in speakers else speakers.split('&')
            level = td.findtext('./span[@class="audience_level"]').strip()
            level = 'Beginner' if level == 'Novice' else level
            talk = Talk(category=category,
                        conference_id=conference.id,
                        title=title)
            data = db.TalkData(speakers, [], [])
            talk.abstract = abstract[:10000]
            talk.level = level
            db.add_talk(talk, **data._asdict())
                volunteer.volunteering.append(conference)

db.session.commit()

## Talks
## ~~~~~~
##
## Keynotes
keynotes = (
    #(['Kelsey Hightower'], 'Kubernetes for Pythonistas', ['Google'], 'http://pyvideo.org/pycon-us-2017/keynote-kubernetes-for-pythonistas.html', ['voice', 'kubernetes', 'containers']),
)
for speaker_names, title, org, url, topics in keynotes:
    talk = Talk(category=Talk.KEYNOTE, conference_id=conference.id)
    talk.title = title
    talk.video_url = url
    data = db.TalkData(speaker_names, topics, org)
    db.add_talk(talk, **data._asdict())


## Tutorials, talks, and posters
def add_presentation(url, category):
    print("Collecting from {}".format(url))
    xpath = '//div[contains(@class,"box-content")]/*'
    entries = html.fromstring(requests.get(url).text).xpath(xpath)
    first = next(i for i, e in enumerate(entries) if e.tag == 'h2')
    ## Iterate through and extract the relevant content
    for i in range(int((len(entries) - first) / 3)):
        h2, p, div = entries[first + 3 * i:first + 3 * (1 + i)]
        title = h2.text_content()
        if 'canceled' in title.lower():
            continue
db.session.commit()


## Talks
keynotes = (
    (['Guido van Rossum'], 'Update on the state of Python', None),
    (['Steve Huffman', 'Alexis Ohanian'], 'Reddit', "Reddit's origin and the switch to Python")
)
for speaker_names, title, abstract in keynotes:
    talk = Talk(category=Talk.KEYNOTE, conference_id=conference.id)
    talk.title = title
    if title == 'Reddit':
        data.organization_names.append('Reddit')
    if abstract:
        talk.abstract = abstract
    data = db.TalkData(speaker_names, [], [])
    db.add_talk(talk, **data._asdict())
    

## Tutorials
##  ==> Ignore these...the links are broken and only the presenters'
##      last names are given, so it is hard to create an entry.
##
#wayback = 'https://web.archive.org/web/20090518174359/'
#url = wayback + 'http://us.pycon.org:80/2009/tutorials/schedule'
#xpath = '//div[@id="tutorials"]//li'
#entries = html.fromstring(requests.get(url).text).xpath(xpath)
## Iterate through and extract the relevant content
#for e in entries:
#    tmp = e.text_content()
#    if 'cancel' in tmp.lower():
Exemplo n.º 8
0
## Talks
## ~~~~~~
##
## Keynotes
keynotes = (
    (['Eben Upton'], 'The Raspberry Pi: providing children around the world the opportunity to learn programming', ['Raspberry Pi Foundation'], 'http://pyvideo.org/pycon-us-2013/keynote-2.html', ['education']),
    (['Raymond Hettinger'], 'What makes Python Awesome', [], 'http://pyvideo.org/pycon-us-2013/keynote-3.html', ['core']),
    #(['Jessica McKellar'], 'How the Internet works', [], 'http://pyvideo.org/pycon-us-2013/how-the-internet-works.html', ['web', 'twisted', 'scapy']),
    (['Guido van Rossum'], 'Announcing asyncio for the standard library (PEP 3156)', [], 'http://pyvideo.org/pycon-us-2013/keynote-1.html', ['concurrency', 'standard library'])
)
for speaker_names, title, org, url, topics in keynotes:
    talk = Talk(category=Talk.KEYNOTE, conference_id=conference.id)
    talk.title = title
    talk.video_url = url
    data = db.TalkData(speaker_names, topics, org)
    db.add_talk(talk, **data._asdict())



## Tutorials
##  ==> Ignore these...the links are broken and only the presenters'
##      last names are given, so it is hard to create an entry.
##
url = 'https://us.pycon.org/2013/schedule/tutorials/list/'
xpath = '//div[contains(@class,"presentation")]'
entries = html.fromstring(requests.get(url).text).xpath(xpath)
print('tutorials')
print(url)
## Iterate through and extract the relevant content
for e in entries:
print(url)
for volunteer_name in html.fromstring(requests.get(url).text).xpath(xpath):
    volunteer_name = volunteer_name.text_content().strip()
    if len(volunteer_name) == 0:
        continue
    # There can be multiple comma-separated names.
    for name in volunteer_name.split(','):
        volunteer = db.fetch_or_add(Human(name=name))
        if conference not in volunteer.volunteering:
            volunteer.volunteering.append(conference)

db.session.commit()

## Talks
talk = Talk(category=Talk.KEYNOTE, conference_id=conference.id)
data = db.TalkData([], [], [])
wayback = 'https://web.archive.org/web/20070207091801/'
url = wayback + 'http://us.pycon.org:80/TX2007/Keynotes'
xpath = '//div[@id="wikitext"]/*'
entries = html.fromstring(requests.get(url).text).xpath(xpath)
first_talk = next(i for i,e in enumerate(entries) if e.tag == 'h2')
entries = entries[first_talk:-1]
print('talks')
print(url)
for e in entries:
    if e.tag == 'h2':
        if talk.title is not None:
            # Finished one.
            db.add_talk(talk, **data._asdict())
            data = db.TalkData([], [], [])
            talk = Talk(category=Talk.KEYNOTE, conference_id=conference.id)
Exemplo n.º 10
0
keynotes = ((
    ['Paul Graham'], 'Frighteningly Ambitious Startup Ideas', ['YCombinator'],
    'http://pyvideo.org/pycon-us-2012/keynote-paul-graham-ycombinator.html'
), (['Stormy Peters'], 'Growing the web community and the Python community', [
    'Mozilla'
], 'http://pyvideo.org/pycon-us-2012/keynote-stormy-peters-mozilla-corporation.html'
    ), (['David Beazley'], 'Tinkering with PyPy', [],
        'http://pyvideo.org/pycon-us-2012/keynote-david-beazley.html'),
            (['Guido van Rossum'
              ], 'Addressing common questions about Python', [],
             'http://pyvideo.org/pycon-us-2012/keynote-guido-van-rossum.html'))
for speaker_names, title, org, url in keynotes:
    talk = Talk(category=Talk.KEYNOTE, conference_id=conference.id)
    talk.title = title
    talk.video_url = url
    data = db.TalkData(speaker_names, [], [])
    db.add_talk(talk, **data._asdict())

## Startup Series
url = 'https://us.pycon.org/2012/community/startuprow/'
xpath = '//div[@class="page"]/*'
entries = html.fromstring(requests.get(url).text).xpath(xpath)
first_talk = next(i for i, e in enumerate(entries) if e.tag == 'h2')
i = first_talk + 1
print('startup row')
print(url)
while i < len(entries):
    e = entries[i]
    i += 1
    if e.tag == 'p':
        title = e.findtext('./a')
            volunteer = db.fetch_or_add(Human(name=new_name))
            if conference not in volunteer.volunteering:
                volunteer.volunteering.append(conference)

db.session.commit()

## Talks
## ~~~~~~
##
## Keynotes
keynotes = ((['Hilary Mason'], 'Hello, PyCon'),
            (['Guido van Rossum'], 'A Fireside Chat with Guido van Rossum'))
for speaker_names, title in keynotes:
    talk = Talk(category=Talk.KEYNOTE, conference_id=conference.id)
    talk.title = title
    data = db.TalkData(speaker_names, [], [])
    db.add_talk(talk, **data._asdict())

## Startup Series
talk = Talk(category=Talk.PLENARY, conference_id=conference.id)
data = db.TalkData([], ['startup'], [])
wayback = 'https://web.archive.org/web/20110316093256/'
url = wayback + 'http://us.pycon.org:80/2011/home/keynotes/'
xpath = '//div[@class="page"]/*'
entries = html.fromstring(requests.get(url).text).xpath(xpath)
first_talk = next(i for i, e in enumerate(entries)
                  if e.tag == 'h1' and e.text.startswith('Startup'))
entries = entries[first_talk + 1:]
first_talk = next(i for i, e in enumerate(entries) if e.tag == 'h2')
i = first_talk
print('startup row')
Exemplo n.º 12
0
print(url)
for name in entries:
    volunteer = db.fetch_or_add(Human(name=name))
    if conference not in volunteer.volunteering:
        volunteer.volunteering.append(conference)

## Talks
# - Keynotes
entries = (('The virtues of Open Source', 'Mitch Kapor',
            'Open Source Applications Foundation (OSAF)'),
           ('Python State of the Union', 'Guido van Rossum',
            'Zope corporation'), ('How to argue about typing', 'Bruce Eckel',
                                  None))
for title, speaker, org in entries:
    talk = Talk(category=Talk.KEYNOTE, conference_id=conference.id)
    data = db.TalkData([], [], [])
    talk.title = title
    data.speaker_names.append(speaker)
    if org is not None:
        data.organization_names.append(org)
    db.add_talk(talk, **data._asdict())

# - Tutorials
# None? -- True. (PostMortem recommends tutorials for 2005)

# - Regular talks
#wayback = 'https://web.archive.org/web/20050206212138/'
#url = wayback + 'http://www.python.org:80/pycon/dc2004/schedule.html
url = 'https://wiki.python.org/moin/PyConDC2004/TalksByCategory'
#xpath = '//td[@class="body"]/table/tbody/tr'
xpath = '//div[@id="content"]/*'