volunteer = db.fetch_or_add(Human(name=new_name))
            if conference not in volunteer.volunteering:
                volunteer.volunteering.append(conference)

db.session.commit()

## Talks
## ~~~~~~
##
## Keynotes
keynotes = (
    #(['Kelsey Hightower'], 'Kubernetes for Pythonistas', ['Google'], 'http://pyvideo.org/pycon-us-2017/keynote-kubernetes-for-pythonistas.html', ['voice', 'kubernetes', 'containers']),
)
for speaker_names, title, org, url, topics in keynotes:
    talk = Talk(category=Talk.KEYNOTE, conference_id=conference.id)
    talk.title = title
    talk.video_url = url
    data = db.TalkData(speaker_names, topics, org)
    db.add_talk(talk, **data._asdict())


## Tutorials, talks, and posters
def add_presentation(url, category):
    print("Collecting from {}".format(url))
    xpath = '//div[contains(@class,"box-content")]/*'
    entries = html.fromstring(requests.get(url).text).xpath(xpath)
    first = next(i for i, e in enumerate(entries) if e.tag == 'h2')
    ## Iterate through and extract the relevant content
    for i in range(int((len(entries) - first) / 3)):
        h2, p, div = entries[first + 3 * i:first + 3 * (1 + i)]
        title = h2.text_content()
        if name and len(name) > 1:
            volunteer = db.fetch_or_add(Human(name=name))
            if conference not in volunteer.volunteering:
                volunteer.volunteering.append(conference)

db.session.commit()


## Talks
keynotes = (
    (['Guido van Rossum'], 'Update on the state of Python', None),
    (['Steve Huffman', 'Alexis Ohanian'], 'Reddit', "Reddit's origin and the switch to Python")
)
for speaker_names, title, abstract in keynotes:
    talk = Talk(category=Talk.KEYNOTE, conference_id=conference.id)
    talk.title = title
    if title == 'Reddit':
        data.organization_names.append('Reddit')
    if abstract:
        talk.abstract = abstract
    data = db.TalkData(speaker_names, [], [])
    db.add_talk(talk, **data._asdict())
    

## Tutorials
##  ==> Ignore these...the links are broken and only the presenters'
##      last names are given, so it is hard to create an entry.
##
#wayback = 'https://web.archive.org/web/20090518174359/'
#url = wayback + 'http://us.pycon.org:80/2009/tutorials/schedule'
#xpath = '//div[@id="tutorials"]//li'
                new_name = " ".join(name.strip().split()[:3])
            volunteer = db.fetch_or_add(Human(name=new_name))
            if conference not in volunteer.volunteering:
                volunteer.volunteering.append(conference)

db.session.commit()

## Talks
## ~~~~~~
##
## Keynotes
keynotes = ((['Hilary Mason'], 'Hello, PyCon'),
            (['Guido van Rossum'], 'A Fireside Chat with Guido van Rossum'))
for speaker_names, title in keynotes:
    talk = Talk(category=Talk.KEYNOTE, conference_id=conference.id)
    talk.title = title
    data = db.TalkData(speaker_names, [], [])
    db.add_talk(talk, **data._asdict())

## Startup Series
talk = Talk(category=Talk.PLENARY, conference_id=conference.id)
data = db.TalkData([], ['startup'], [])
wayback = 'https://web.archive.org/web/20110316093256/'
url = wayback + 'http://us.pycon.org:80/2011/home/keynotes/'
xpath = '//div[@class="page"]/*'
entries = html.fromstring(requests.get(url).text).xpath(xpath)
first_talk = next(i for i, e in enumerate(entries)
                  if e.tag == 'h1' and e.text.startswith('Startup'))
entries = entries[first_talk + 1:]
first_talk = next(i for i, e in enumerate(entries) if e.tag == 'h2')
i = first_talk
entries = html.fromstring(requests.get(url).text).xpath(xpath)
first_talk = next(i for i,e in enumerate(entries) if e.tag == 'h2')
entries = entries[first_talk:-1]
print('talks')
print(url)
for e in entries:
    if e.tag == 'h2':
        if talk.title is not None:
            # Finished one.
            db.add_talk(talk, **data._asdict())
            data = db.TalkData([], [], [])
            talk = Talk(category=Talk.KEYNOTE, conference_id=conference.id)
        speaker = e.text_content().split('(')[0].strip()
        data.speaker_names.extend(separators.split(speaker))
    elif e.tag == 'p' and e.text_content().startswith('Topic'):
        talk.title = e.text_content().split(' ', 1)[-1].strip().strip('"')

# don't forget the last one..
if talk.title is not None:
    db.add_talk(talk, **data._asdict())


# Tutorials
talk = Talk(category=Talk.TUTORIAL, conference_id=conference.id)
data = db.TalkData([], [], [])
wayback = 'https://web.archive.org/web/20070205022526/'
url = wayback + 'http://us.pycon.org:80/TX2007/Tutorials'
xpath = '//div[@id="wikitext"]/*'
entries = html.fromstring(requests.get(url).text).xpath(xpath)
header = next(i for i,e in enumerate(entries) if e.tag == 'h2')
entries = entries[header+1:-1]
## Talks
wayback = 'https://web.archive.org/web/20080907065646/'
url = wayback + 'http://us.pycon.org/2008/conference/keynotes/'
xpath = '//div[@id="keynote-talks"]/div[@class="section"]'
entries = html.fromstring(requests.get(url).text).xpath(xpath)
print('talks')
print(url)
for e in entries:
    talk = Talk(category=Talk.KEYNOTE, conference_id=conference.id)
    data = db.TalkData([], [], [])
    data.speaker_names.append(e.findtext('h1'))
    # Split off the abstract, and remove the 'Topic:' prefix
    tmp = e.xpath('*[text()[contains(.,"Topic")]]')
    if len(tmp) == 0:
        talk.title = "Keynote"
    else:
        tmp = re.split('[(:]', tmp[0].text_content()[7:].strip(')'))
        talk.title = tmp[0].strip()
        talk.abstract = ' '.join(tt for t in tmp[1:] for tt in t.split('\n'))
    db.add_talk(talk, **data._asdict())

# Tutorials
wayback = 'https://web.archive.org/web/20090202113211/'
url = wayback + 'http://us.pycon.org:80/2008/tutorials/schedule/'
xpath = '//div[@id="content"]//li'
entries = html.fromstring(requests.get(url).text).xpath(xpath)
# Iterate through and extract the relevant content
print('tutorials')
print(url)
for e in entries: