Пример #1
0
def main():
    form = cgi.FieldStorage()
    if (not form.has_key("title")):
        print 'Content-Type: text/plain'
        print ''
        print 'No title specified!'
        return

    title = form["title"].value

    querystring = urllib.urlencode([('titles', title)])
    url = "http://en.wikipedia.org/w/api.php?action=query&format=yaml&prop=info&" + querystring
    data = cachedFetch(url, 3600)
    if not data == '':
        if re.search('pageid"?:\s*\d+', data, re.MULTILINE):
            print 'Content-Type: text/plain'
            print ''
            print '1'
        elif re.search('missing"?:\s*', data, re.MULTILINE):
            print 'Content-Type: text/plain'
            print ''
            print '0'
        else:
            print 'Content-Type: text/plain'
            print ''
            print 'Data error!'
            return
    else:
        print 'Content-Type: text/html'
        print ''
        print 'Fail!'
Пример #2
0
def main():
    form = cgi.FieldStorage()
    if (not form.has_key("title")):
      print 'Content-Type: text/plain'
      print ''
      print 'No title specified!'
      return

    title = form["title"].value

    querystring = urllib.urlencode([('titles', title)])
    url = "http://en.wikipedia.org/w/api.php?action=query&format=yaml&prop=info&" + querystring
    data = cachedFetch(url, 3600)
    if not data == '':
      if re.search('pageid"?:\s*\d+', data, re.MULTILINE):
        print 'Content-Type: text/plain'
        print ''
        print '1'
      elif re.search('missing"?:\s*', data, re.MULTILINE):
        print 'Content-Type: text/plain'
        print ''
        print '0'
      else:
        print 'Content-Type: text/plain'
        print ''
        print 'Data error!'
        return
    else:
      print 'Content-Type: text/html'
      print ''
      print 'Fail!'
Пример #3
0
def doiFetchInfo(doi):

    querystring = urllib.urlencode([('id', 'doi:'+doi)])
    url = "http://www.crossref.org/openurl/?pid=" + crossref_id + "&noredirect=true&" + querystring
    xmldata = cachedFetch(url, 3600)
    if xmldata == '':
        error('Bad data from CrossRef')
    
    xmldoc = xml.dom.minidom.parseString(xmldata)
    citedata = {}
    
    simplefields = {'doi': 'doi',
    'issn': 'issn',
    'journal': 'journal_title',
    'volume': 'volume',
    'issue': 'issue',
    'year': 'year',
    'title': 'article_title',
    'isbn': 'isbn',
    }
    
    for key, cr_tagname in simplefields.iteritems():
        cr_node = xmldoc.getElementsByTagName(cr_tagname)
        if cr_node:
            citedata[key] = getText(cr_node[0])

    cr_contributors = xmldoc.getElementsByTagName('contributor')
    authors = []
    for cr_contributor in cr_contributors:
        author = {}
        cr_givenname = cr_contributor.getElementsByTagName('given_name')
        if cr_givenname:
            author['first'] = getText(cr_givenname[0])
        cr_surname = cr_contributor.getElementsByTagName('surname')
        if cr_surname:
            author['last'] = getText(cr_surname[0])
        if author['first'].isupper() and author['last'].isupper():
            author['first'] = author['first'].title()
            author['last'] = author['last'].title()
        authors.append(author)
    if authors:
        citedata['authors'] = authors
    
    cr_first_page = xmldoc.getElementsByTagName('first_page')
    cr_last_page = xmldoc.getElementsByTagName('last_page')
    if cr_first_page:
        citedata['pages'] = getText(cr_first_page[0])
    if cr_last_page:
        citedata['pages'] += u"\u2013" + getText(cr_last_page[0])
        

    return citedata
Пример #4
0
def doiFetchInfo(doi):

    querystring = urllib.urlencode([('id', 'doi:' + doi)])
    url = "http://www.crossref.org/openurl/?pid=" + crossref_id + "&noredirect=true&" + querystring
    xmldata = cachedFetch(url, 3600)
    if xmldata == '':
        error('Bad data from CrossRef')

    xmldoc = xml.dom.minidom.parseString(xmldata)
    citedata = {}

    simplefields = {
        'doi': 'doi',
        'issn': 'issn',
        'journal': 'journal_title',
        'volume': 'volume',
        'issue': 'issue',
        'year': 'year',
        'title': 'article_title',
        'isbn': 'isbn',
    }

    for key, cr_tagname in simplefields.iteritems():
        cr_node = xmldoc.getElementsByTagName(cr_tagname)
        if cr_node:
            citedata[key] = getText(cr_node[0])

    cr_contributors = xmldoc.getElementsByTagName('contributor')
    authors = []
    for cr_contributor in cr_contributors:
        author = {}
        cr_givenname = cr_contributor.getElementsByTagName('given_name')
        if cr_givenname:
            author['first'] = getText(cr_givenname[0])
        cr_surname = cr_contributor.getElementsByTagName('surname')
        if cr_surname:
            author['last'] = getText(cr_surname[0])
        if author['first'].isupper() and author['last'].isupper():
            author['first'] = author['first'].title()
            author['last'] = author['last'].title()
        authors.append(author)
    if authors:
        citedata['authors'] = authors

    cr_first_page = xmldoc.getElementsByTagName('first_page')
    cr_last_page = xmldoc.getElementsByTagName('last_page')
    if cr_first_page:
        citedata['pages'] = getText(cr_first_page[0])
    if cr_last_page:
        citedata['pages'] += u"\u2013" + getText(cr_last_page[0])

    return citedata
Пример #5
0
def getRedirectTarget(title):
    querystring = urllib.urlencode({'action': 'query',
                                    'format': 'json',
                                    'redirects': '1',
                                    'titles':title})
    url = "http://en.wikipedia.org/w/api.php?" + querystring
    #url = "http://en.wikipedia.org/w/api.php?action=query&prop=info&titles=appspot.com&redirects&format=json"
    #print >> sys.stderr, "url: " + url 
    jsonstring = cachedFetch(url, 3600)
    #print >> sys.stderr, jsonstring
    apidata = json.loads(jsonstring)
    try:
        return apidata['query']['redirects'][0]['to']
    except KeyError, e:
        return ''
Пример #6
0
def main():
    form = cgi.FieldStorage()
    if (not form.has_key("isbn")):
      error('No isbn!')
    isbn = form["isbn"].value
    isbn = re.sub('[^0-9]', '', isbn)

    if (not form.has_key("callback")):
      error('No callback.')
    callback = form["callback"].value


    url = "http://diberri.dyndns.org/cgi-bin/templatefiller/index.cgi?type=isbn&vertical=1&dont_use_etal=1&format=xml&id=" + isbn

    xmldata = cachedFetch(url, 3600)
    if xmldata == '':
        error('Bad data from Diberri tool')

    #error(xmldata)
    bookdata = {}

    for line in xmldata.splitlines():
        m = re.search('^\|title=(.+)$', line, re.I)
        if m:
            bookdata['title'] = m.group(1)
        m = re.search('\|publisher=(.+)$', line, re.I)
        if m:
            bookdata['publisher'] = m.group(1)
        m = re.search('\|location=(.+)$', line, re.I)
        if m:
            bookdata['location'] = m.group(1)
        m = re.search('\|year=(.+)$', line, re.I)
        if m:
            bookdata['year'] = m.group(1)
        m = re.search('\|isbn=(.+)$', line, re.I)
        if m:
            bookdata['isbn'] = m.group(1)
        m = re.search('\|author=(.+)$', line, re.I)
        if m:
            bookdata['authors'] = m.group(1)

    
    jsonstr = json.dumps(bookdata, sort_keys=True, indent=4, ensure_ascii=False)
    jsonp = callback + '(' + jsonstr + ');'

    print 'Content-Type: text/javascript'
    print ''
    print jsonp
Пример #7
0
def nytimesFetchInfo(url):
    # print >> sys.stderr, 'url: ' + url
    # http://api.nytimes.com/svc/search/v1/article?format=json&query=url%3Ahttp%3A%2F%2Fwww.nytimes.com%2F2009%2F03%2F26%2Fgarden%2F26slow.html&fields=title%2C+nytd_title%2C+date%2C+byline%2C+page_facet%2C+section_page_facet%2C+source_facet&api-key=####
    urlfixed = re.sub("\?.*$", "", url)
    # print >> sys.stderr, urlfixed
    querystring = urllib.urlencode(
        {
            "format": "json",
            "query": "url:" + urlfixed,
            "fields": "title, date, byline, page_facet, section_page_facet, source_facet, column_facet",
            "api-key": nyt_apikey,
        }
    )
    nyturl = "http://api.nytimes.com/svc/search/v1/article?" + querystring
    jsonstring = cachedFetch(nyturl, 3600)
    if jsonstring == "":
        error("Bad data from NYTimes")

    nytdata = json.loads(jsonstring)
    # print >> sys.stderr, nytdata
    if len(nytdata["results"]) == 0:
        sys.stderr.write("No data from " + nyturl + "\n")
        return {}

    result = nytdata["results"][0]
    citedata = {"work": "The New York Times"}
    citedata["title"] = result["title"]
    if result.has_key("page_facet"):
        citedata["page"] = result["page_facet"]
    if result.has_key("date"):
        m = re.search("(\d\d\d\d)(\d\d)(\d\d)", result["date"])
        if m:
            citedata["date"] = m.group(1) + "-" + m.group(2) + "-" + m.group(3)
    if result.has_key("byline"):
        byline = result["byline"]
        byline = re.sub("(?i)^By\s+", "", byline)
        byline = re.sub("(?i) And ", ", ", byline)
        byline = byline.title()
        authors = re.split("\s*,\s*", byline)
        citedata["authors"] = authors

    return citedata
Пример #8
0
def nytimesFetchInfo(url):
    #print >> sys.stderr, 'url: ' + url
    #http://api.nytimes.com/svc/search/v1/article?format=json&query=url%3Ahttp%3A%2F%2Fwww.nytimes.com%2F2009%2F03%2F26%2Fgarden%2F26slow.html&fields=title%2C+nytd_title%2C+date%2C+byline%2C+page_facet%2C+section_page_facet%2C+source_facet&api-key=####
    urlfixed = re.sub('\?.*$', '', url)
    #print >> sys.stderr, urlfixed
    querystring = urllib.urlencode({
        'format': 'json',
        'query': 'url:' + urlfixed,
        'fields':
        'title, date, byline, page_facet, section_page_facet, source_facet, column_facet',
        'api-key': nyt_apikey
    })
    nyturl = "http://api.nytimes.com/svc/search/v1/article?" + querystring
    jsonstring = cachedFetch(nyturl, 3600)
    if jsonstring == '':
        error('Bad data from NYTimes')

    nytdata = json.loads(jsonstring)
    #print >> sys.stderr, nytdata
    if len(nytdata['results']) == 0:
        sys.stderr.write("No data from " + nyturl + "\n")
        return {}

    result = nytdata['results'][0]
    citedata = {'work': 'The New York Times'}
    citedata['title'] = result['title']
    if result.has_key('page_facet'):
        citedata['page'] = result['page_facet']
    if result.has_key('date'):
        m = re.search('(\d\d\d\d)(\d\d)(\d\d)', result['date'])
        if m:
            citedata['date'] = m.group(1) + '-' + m.group(2) + '-' + m.group(3)
    if result.has_key('byline'):
        byline = result['byline']
        byline = re.sub('(?i)^By\s+', '', byline)
        byline = re.sub('(?i) And ', ', ', byline)
        byline = byline.title()
        authors = re.split('\s*,\s*', byline)
        citedata['authors'] = authors

    return citedata
Пример #9
0
def nytimesFetchInfo(url):
    #print >> sys.stderr, 'url: ' + url
    #http://api.nytimes.com/svc/search/v1/article?format=json&query=url%3Ahttp%3A%2F%2Fwww.nytimes.com%2F2009%2F03%2F26%2Fgarden%2F26slow.html&fields=title%2C+nytd_title%2C+date%2C+byline%2C+page_facet%2C+section_page_facet%2C+source_facet&api-key=#### 
    urlfixed = re.sub('\?.*$', '', url)
    #print >> sys.stderr, urlfixed
    querystring = urllib.urlencode({'format': 'json',
                                    'query': 'url:'+urlfixed,
                                    'fields': 'title, date, byline, page_facet, section_page_facet, source_facet, column_facet',
                                    'api-key': nyt_apikey})
    nyturl = "http://api.nytimes.com/svc/search/v1/article?" + querystring
    jsonstring = cachedFetch(nyturl, 3600)
    if jsonstring == '':
        error('Bad data from NYTimes')
    
    nytdata = json.loads(jsonstring)
    #print >> sys.stderr, nytdata
    if len(nytdata['results']) == 0:
        #error('No results')
        return {}
    
    result = nytdata['results'][0]
    citedata = {'work': 'The New York Times'}
    citedata['title'] = result['title']
    if result.has_key('page_facet'):
        citedata['page'] = result['page_facet']
    if result.has_key('date'):
        m = re.search('(\d\d\d\d)(\d\d)(\d\d)', result['date'])
        if m:
            citedata['date'] = m.group(1) + '-' + m.group(2) + '-' + m.group(3)
    if result.has_key('byline'):
        byline = result['byline']
        byline = re.sub('(?i)^By\s+', '', byline)
        byline = re.sub('(?i) And ', ', ', byline)
        byline = byline.title()
        authors = re.split('\s*,\s*', byline)
        citedata['authors'] = authors

    
    return citedata
Пример #10
0
def main():
    form = cgi.FieldStorage()
    if (not form.has_key("wikitext")):
        print 'Content-Type: text/plain'
        print ''
        print 'No wikitext!'
        exit()

    wikitext = form["wikitext"].value

    querystring = urllib.urlencode([('text', wikitext)])
    url = "http://en.wikipedia.org/w/api.php?action=parse&format=xml&prop=text&" + querystring

    xmldata = cachedFetch(url, 3600)
    if not xmldata == '':
        #xmldata = re.sub('Chicagogsdgsdgsdgsfgsrg', 'ugg' , xmldata)
        print 'Content-Type: text/xml; charset=utf-8'
        print ''
        print xmldata
    else:
        print 'Content-Type: text/html'
        print ''
        print 'Fail!'
Пример #11
0
def main():
    form = cgi.FieldStorage()
    if (not form.has_key("wikitext")):
      print 'Content-Type: text/plain'
      print ''
      print 'No wikitext!'
      exit()

    wikitext = form["wikitext"].value

    querystring = urllib.urlencode([('text', wikitext)])
    url = "http://en.wikipedia.org/w/api.php?action=parse&format=xml&prop=text&" + querystring

    xmldata = cachedFetch(url, 3600)
    if not xmldata == '':
      #xmldata = re.sub('Chicagogsdgsdgsdgsfgsrg', 'ugg' , xmldata)
      print 'Content-Type: text/xml; charset=utf-8'
      print ''
      print xmldata
    else:
      print 'Content-Type: text/html'
      print ''
      print 'Fail!'