def get_page_id(v):
   pageid = None
   #http://it.wikipedia.org/w/api.php?action=query&titles=Abbazia_di_San_Galgano&format=json
   #{"query":{"normalized":[{"from":"Abbazia_di_San_Galgano","to":"Abbazia di San Galgano"}],"pages":{"83117":{"pageid":83117,"ns":0,"title":"Abbazia di San Galgano"}}}}
   queryurl = UrlBuilder(domain=WPDOMAIN,path="w/api.php",params="action=query")
   queryurl.set_attr('titles',v)
   queryurl.set_attr('format','json')
   query=queryurl.build()
   for ntry in range(1,MAXTRIES):
      print "Request no. %d - Requesting %s" %(ntry,query)
      jsonpage = urllib2.urlopen(query)
      
      try:
         jobj = json.load(jsonpage)
         pageid = int(jobj['query']['pages'].keys()[0])
         break
      except Exception as e:
         print e
         pageid = None
         time.sleep(5)
         continue
   
   return pageid
def query_api():
   queryurl = UrlBuilder(domain=WPDOMAIN,path="w/api.php",params="action=query")
   queryurl.set_attr('generator','embeddedin')
   queryurl.set_attr('geititle',WPTNAME)
   queryurl.set_attr('einamespace','0')
   queryurl.set_attr('geilimit','500')
   queryurl.set_attr('format','xml')
   
   inlist=list()
   
   while True:
      print "Requesting %s" %queryurl.build()
      infile = urllib2.urlopen(queryurl.build())
      inxml = infile.read()
      
      xml = parseString(inxml)
      
      pagelist=xml.getElementsByTagName("page")
      
      for page in pagelist:
         inlist.append(page.getAttribute("title"))
      
      querycont=xml.getElementsByTagName("embeddedin")
      if len(querycont) == 0:
         break
      
      geicontinue=querycont[0].getAttribute("geicontinue")
      queryurl.set_attr("geicontinue",geicontinue)
      
      time.sleep(5)
   
   return inlist
u'FineCostr',
u'Demolizione',
u'Sito',
u'lat',
u'long'
]

"""
Utility functions
"""
_jsonu = UrlBuilder(
               domain="json.it.dbpedia.org",
               path="annotate/resource/json/it%3A{wp-page}",
               params="filter=__type:template"
              )
_jsonu.set_attr('flags','-Extractors,Structure,')
_jsonbaseurl=_jsonu.build()

def get_jsonpedia_page(v):
   """
   Gets the corrisponding JSONpedia page (only templates)
   for Wikipedia article titled 'v'.
   Tries MAXTRIES times or returns none.
   """
   vsafe = v.replace(' ','_')
   jsonurl = _jsonbaseurl.replace('{wp-page}',urllib.quote(vsafe))
   for ntry in range(1,MAXTRIES):
      try:
         print "Request no. %d: requesting: %s" %(ntry,jsonurl)
         jsonpage = urllib2.urlopen(jsonurl)
         break