示例#1
0
from lxml import etree
from scrape import PaperFetcher

f = open ('filtered_dblp.xml', 'r')
s = f.read ()
f.close ()

r = etree.fromstring (s)

count = 0

pf = PaperFetcher ()

for article in r.xpath ('article'):
    count = count + 1

    authors = ' '.join (article.xpath ('author/text()')).encode ('utf-8')
    title = article.xpath ('title/text()')[0].encode ('utf-8')

    print '************************** Test number %d *****************************' % count
    print 'Test query AAAAAAAAAAAAAAAAAAA'
    print 'author = %s' % authors
    print 'title  = %s\n' % title
    print 'Test result BBBBBBBBBBBBBBBBBBB'
    print pf.get_paper_from_acm (title, authors)

    if count > 20:
        break