def main(): form = cgi.FieldStorage() if (not form.has_key("title")): print 'Content-Type: text/plain' print '' print 'No title specified!' return title = form["title"].value querystring = urllib.urlencode([('titles', title)]) url = "http://en.wikipedia.org/w/api.php?action=query&format=yaml&prop=info&" + querystring data = cachedFetch(url, 3600) if not data == '': if re.search('pageid"?:\s*\d+', data, re.MULTILINE): print 'Content-Type: text/plain' print '' print '1' elif re.search('missing"?:\s*', data, re.MULTILINE): print 'Content-Type: text/plain' print '' print '0' else: print 'Content-Type: text/plain' print '' print 'Data error!' return else: print 'Content-Type: text/html' print '' print 'Fail!'
def doiFetchInfo(doi): querystring = urllib.urlencode([('id', 'doi:'+doi)]) url = "http://www.crossref.org/openurl/?pid=" + crossref_id + "&noredirect=true&" + querystring xmldata = cachedFetch(url, 3600) if xmldata == '': error('Bad data from CrossRef') xmldoc = xml.dom.minidom.parseString(xmldata) citedata = {} simplefields = {'doi': 'doi', 'issn': 'issn', 'journal': 'journal_title', 'volume': 'volume', 'issue': 'issue', 'year': 'year', 'title': 'article_title', 'isbn': 'isbn', } for key, cr_tagname in simplefields.iteritems(): cr_node = xmldoc.getElementsByTagName(cr_tagname) if cr_node: citedata[key] = getText(cr_node[0]) cr_contributors = xmldoc.getElementsByTagName('contributor') authors = [] for cr_contributor in cr_contributors: author = {} cr_givenname = cr_contributor.getElementsByTagName('given_name') if cr_givenname: author['first'] = getText(cr_givenname[0]) cr_surname = cr_contributor.getElementsByTagName('surname') if cr_surname: author['last'] = getText(cr_surname[0]) if author['first'].isupper() and author['last'].isupper(): author['first'] = author['first'].title() author['last'] = author['last'].title() authors.append(author) if authors: citedata['authors'] = authors cr_first_page = xmldoc.getElementsByTagName('first_page') cr_last_page = xmldoc.getElementsByTagName('last_page') if cr_first_page: citedata['pages'] = getText(cr_first_page[0]) if cr_last_page: citedata['pages'] += u"\u2013" + getText(cr_last_page[0]) return citedata
def doiFetchInfo(doi): querystring = urllib.urlencode([('id', 'doi:' + doi)]) url = "http://www.crossref.org/openurl/?pid=" + crossref_id + "&noredirect=true&" + querystring xmldata = cachedFetch(url, 3600) if xmldata == '': error('Bad data from CrossRef') xmldoc = xml.dom.minidom.parseString(xmldata) citedata = {} simplefields = { 'doi': 'doi', 'issn': 'issn', 'journal': 'journal_title', 'volume': 'volume', 'issue': 'issue', 'year': 'year', 'title': 'article_title', 'isbn': 'isbn', } for key, cr_tagname in simplefields.iteritems(): cr_node = xmldoc.getElementsByTagName(cr_tagname) if cr_node: citedata[key] = getText(cr_node[0]) cr_contributors = xmldoc.getElementsByTagName('contributor') authors = [] for cr_contributor in cr_contributors: author = {} cr_givenname = cr_contributor.getElementsByTagName('given_name') if cr_givenname: author['first'] = getText(cr_givenname[0]) cr_surname = cr_contributor.getElementsByTagName('surname') if cr_surname: author['last'] = getText(cr_surname[0]) if author['first'].isupper() and author['last'].isupper(): author['first'] = author['first'].title() author['last'] = author['last'].title() authors.append(author) if authors: citedata['authors'] = authors cr_first_page = xmldoc.getElementsByTagName('first_page') cr_last_page = xmldoc.getElementsByTagName('last_page') if cr_first_page: citedata['pages'] = getText(cr_first_page[0]) if cr_last_page: citedata['pages'] += u"\u2013" + getText(cr_last_page[0]) return citedata
def getRedirectTarget(title): querystring = urllib.urlencode({'action': 'query', 'format': 'json', 'redirects': '1', 'titles':title}) url = "http://en.wikipedia.org/w/api.php?" + querystring #url = "http://en.wikipedia.org/w/api.php?action=query&prop=info&titles=appspot.com&redirects&format=json" #print >> sys.stderr, "url: " + url jsonstring = cachedFetch(url, 3600) #print >> sys.stderr, jsonstring apidata = json.loads(jsonstring) try: return apidata['query']['redirects'][0]['to'] except KeyError, e: return ''
def main(): form = cgi.FieldStorage() if (not form.has_key("isbn")): error('No isbn!') isbn = form["isbn"].value isbn = re.sub('[^0-9]', '', isbn) if (not form.has_key("callback")): error('No callback.') callback = form["callback"].value url = "http://diberri.dyndns.org/cgi-bin/templatefiller/index.cgi?type=isbn&vertical=1&dont_use_etal=1&format=xml&id=" + isbn xmldata = cachedFetch(url, 3600) if xmldata == '': error('Bad data from Diberri tool') #error(xmldata) bookdata = {} for line in xmldata.splitlines(): m = re.search('^\|title=(.+)$', line, re.I) if m: bookdata['title'] = m.group(1) m = re.search('\|publisher=(.+)$', line, re.I) if m: bookdata['publisher'] = m.group(1) m = re.search('\|location=(.+)$', line, re.I) if m: bookdata['location'] = m.group(1) m = re.search('\|year=(.+)$', line, re.I) if m: bookdata['year'] = m.group(1) m = re.search('\|isbn=(.+)$', line, re.I) if m: bookdata['isbn'] = m.group(1) m = re.search('\|author=(.+)$', line, re.I) if m: bookdata['authors'] = m.group(1) jsonstr = json.dumps(bookdata, sort_keys=True, indent=4, ensure_ascii=False) jsonp = callback + '(' + jsonstr + ');' print 'Content-Type: text/javascript' print '' print jsonp
def nytimesFetchInfo(url): # print >> sys.stderr, 'url: ' + url # http://api.nytimes.com/svc/search/v1/article?format=json&query=url%3Ahttp%3A%2F%2Fwww.nytimes.com%2F2009%2F03%2F26%2Fgarden%2F26slow.html&fields=title%2C+nytd_title%2C+date%2C+byline%2C+page_facet%2C+section_page_facet%2C+source_facet&api-key=#### urlfixed = re.sub("\?.*$", "", url) # print >> sys.stderr, urlfixed querystring = urllib.urlencode( { "format": "json", "query": "url:" + urlfixed, "fields": "title, date, byline, page_facet, section_page_facet, source_facet, column_facet", "api-key": nyt_apikey, } ) nyturl = "http://api.nytimes.com/svc/search/v1/article?" + querystring jsonstring = cachedFetch(nyturl, 3600) if jsonstring == "": error("Bad data from NYTimes") nytdata = json.loads(jsonstring) # print >> sys.stderr, nytdata if len(nytdata["results"]) == 0: sys.stderr.write("No data from " + nyturl + "\n") return {} result = nytdata["results"][0] citedata = {"work": "The New York Times"} citedata["title"] = result["title"] if result.has_key("page_facet"): citedata["page"] = result["page_facet"] if result.has_key("date"): m = re.search("(\d\d\d\d)(\d\d)(\d\d)", result["date"]) if m: citedata["date"] = m.group(1) + "-" + m.group(2) + "-" + m.group(3) if result.has_key("byline"): byline = result["byline"] byline = re.sub("(?i)^By\s+", "", byline) byline = re.sub("(?i) And ", ", ", byline) byline = byline.title() authors = re.split("\s*,\s*", byline) citedata["authors"] = authors return citedata
def nytimesFetchInfo(url): #print >> sys.stderr, 'url: ' + url #http://api.nytimes.com/svc/search/v1/article?format=json&query=url%3Ahttp%3A%2F%2Fwww.nytimes.com%2F2009%2F03%2F26%2Fgarden%2F26slow.html&fields=title%2C+nytd_title%2C+date%2C+byline%2C+page_facet%2C+section_page_facet%2C+source_facet&api-key=#### urlfixed = re.sub('\?.*$', '', url) #print >> sys.stderr, urlfixed querystring = urllib.urlencode({ 'format': 'json', 'query': 'url:' + urlfixed, 'fields': 'title, date, byline, page_facet, section_page_facet, source_facet, column_facet', 'api-key': nyt_apikey }) nyturl = "http://api.nytimes.com/svc/search/v1/article?" + querystring jsonstring = cachedFetch(nyturl, 3600) if jsonstring == '': error('Bad data from NYTimes') nytdata = json.loads(jsonstring) #print >> sys.stderr, nytdata if len(nytdata['results']) == 0: sys.stderr.write("No data from " + nyturl + "\n") return {} result = nytdata['results'][0] citedata = {'work': 'The New York Times'} citedata['title'] = result['title'] if result.has_key('page_facet'): citedata['page'] = result['page_facet'] if result.has_key('date'): m = re.search('(\d\d\d\d)(\d\d)(\d\d)', result['date']) if m: citedata['date'] = m.group(1) + '-' + m.group(2) + '-' + m.group(3) if result.has_key('byline'): byline = result['byline'] byline = re.sub('(?i)^By\s+', '', byline) byline = re.sub('(?i) And ', ', ', byline) byline = byline.title() authors = re.split('\s*,\s*', byline) citedata['authors'] = authors return citedata
def nytimesFetchInfo(url): #print >> sys.stderr, 'url: ' + url #http://api.nytimes.com/svc/search/v1/article?format=json&query=url%3Ahttp%3A%2F%2Fwww.nytimes.com%2F2009%2F03%2F26%2Fgarden%2F26slow.html&fields=title%2C+nytd_title%2C+date%2C+byline%2C+page_facet%2C+section_page_facet%2C+source_facet&api-key=#### urlfixed = re.sub('\?.*$', '', url) #print >> sys.stderr, urlfixed querystring = urllib.urlencode({'format': 'json', 'query': 'url:'+urlfixed, 'fields': 'title, date, byline, page_facet, section_page_facet, source_facet, column_facet', 'api-key': nyt_apikey}) nyturl = "http://api.nytimes.com/svc/search/v1/article?" + querystring jsonstring = cachedFetch(nyturl, 3600) if jsonstring == '': error('Bad data from NYTimes') nytdata = json.loads(jsonstring) #print >> sys.stderr, nytdata if len(nytdata['results']) == 0: #error('No results') return {} result = nytdata['results'][0] citedata = {'work': 'The New York Times'} citedata['title'] = result['title'] if result.has_key('page_facet'): citedata['page'] = result['page_facet'] if result.has_key('date'): m = re.search('(\d\d\d\d)(\d\d)(\d\d)', result['date']) if m: citedata['date'] = m.group(1) + '-' + m.group(2) + '-' + m.group(3) if result.has_key('byline'): byline = result['byline'] byline = re.sub('(?i)^By\s+', '', byline) byline = re.sub('(?i) And ', ', ', byline) byline = byline.title() authors = re.split('\s*,\s*', byline) citedata['authors'] = authors return citedata
def main(): form = cgi.FieldStorage() if (not form.has_key("wikitext")): print 'Content-Type: text/plain' print '' print 'No wikitext!' exit() wikitext = form["wikitext"].value querystring = urllib.urlencode([('text', wikitext)]) url = "http://en.wikipedia.org/w/api.php?action=parse&format=xml&prop=text&" + querystring xmldata = cachedFetch(url, 3600) if not xmldata == '': #xmldata = re.sub('Chicagogsdgsdgsdgsfgsrg', 'ugg' , xmldata) print 'Content-Type: text/xml; charset=utf-8' print '' print xmldata else: print 'Content-Type: text/html' print '' print 'Fail!'