floc = p['pdf'].index('NIPS')
            fname = p['pdf'][floc:]
            txt = convertPDF('downloads/'+fname)
            processed = True
            print 'found %s in file!' % (p['title'],)
        except:
            pass
            
        if not processed:
            # download the PDF and convert to text
            try:
                print 'downloading pdf for [%s] and parsing...' % (p.get('title', 'an un-titled paper'))
                txt = convertPDF(p['pdf'])
                processed = True
                print 'processed from url!'
            except:
                print 'error: unable to open download the pdf from %s' % (p['pdf'],)
                print 'skipping...'
        
        if processed:
            # convert to bag of words and store
            try:
                p['pdf_text'] = stringToWordDictionary(txt)
            except:
                print 'was unable to convert text to bag of words. Skipped.'
                
        
    print '%d/%d = %.2f%% done.' % (i+1, len(pubs), 100*(i+1.0)/len(pubs))
    
savePubs('pubs_nips', pubs_all)
                    
                new_pub['authors'] = [x.strip() for x in author_list]
        
    # I hate myself a little for this
    # TODO LATER_MAYBE: CODE CHUNK DUPLICATION
    if not new_pub.has_key('authors'):
        warnings.append("oh oh no authors for publication... ")
    if not new_pub.has_key('title'):
        warnings.append("oh oh no title for publication... ")
    new_pub['venue'] = venue
    new_pub['year']= year
    pubs.append(new_pub)
    
    print "read in %d publications for year %d." % (len(pubs) - old_count, year)
    

# show warnings, if any were generated
if len(warnings)>0:
    print "%d warnings:" % (len(warnings),)
    for x in warnings:
        print x
else:
    print "No warnings generated."

# finally, save pickle as output
print "read in a total of %d publications." % (len(pubs),)
fname = "pubs_nips"
print "saving pickle in %s" % (fname,)
savePubs(fname, pubs)
print "all done."
                new_pub['authors'] = [x.strip() for x in author_list]

    # I hate myself a little for this
    # TODO LATER_MAYBE: CODE CHUNK DUPLICATION
    if not new_pub.has_key('authors'):
        warnings.append("oh oh no authors for publication... ")
    if not new_pub.has_key('title'):
        warnings.append("oh oh no title for publication... ")
    new_pub['venue'] = venue
    new_pub['year'] = year
    pubs.append(new_pub)

    print "read in %d publications for year %d." % (len(pubs) - old_count,
                                                    year)

# show warnings, if any were generated
if len(warnings) > 0:
    print "%d warnings:" % (len(warnings), )
    for x in warnings:
        print x
else:
    print "No warnings generated."

# finally, save pickle as output
print "read in a total of %d publications." % (len(pubs), )
fname = "pubs_nips"
print "saving pickle in %s" % (fname, )
savePubs(fname, pubs)
print "all done."