def get_page_id(v):
   pageid = None
   #http://it.wikipedia.org/w/api.php?action=query&titles=Abbazia_di_San_Galgano&format=json
   #{"query":{"normalized":[{"from":"Abbazia_di_San_Galgano","to":"Abbazia di San Galgano"}],"pages":{"83117":{"pageid":83117,"ns":0,"title":"Abbazia di San Galgano"}}}}
   queryurl = UrlBuilder(domain=WPDOMAIN,path="w/api.php",params="action=query")
   queryurl.set_attr('titles',v)
   queryurl.set_attr('format','json')
   query=queryurl.build()
   for ntry in range(1,MAXTRIES):
      print "Request no. %d - Requesting %s" %(ntry,query)
      jsonpage = urllib2.urlopen(query)
      
      try:
         jobj = json.load(jsonpage)
         pageid = int(jobj['query']['pages'].keys()[0])
         break
      except Exception as e:
         print e
         pageid = None
         time.sleep(5)
         continue
   
   return pageid
Exemplo n.º 2
0
for row in inlist:
   row[:] = [r.encode('utf-8') for r in row]
   wikipage = row[0]
   osm_id = row[1]
   osm_type = row[2]
   osm_lon = row[3]
   osm_lat = row[4]
   
   osm_element = ELEMENTS[osm_type]

   osmUrl = UrlBuilder(domain=OSMURL,
                       path='',
                       params='{osm_element}={osm_id}'.format(
                                                       osm_element=osm_element,
                                                       osm_id=osm_id),
                       attrs={'mlon': osm_lon,
                              'mlat': osm_lat
                             }
                      )
   osmurl = osmUrl.build()
   
   print row
   row[0] = '[[{wikipage}|{pagename}]]'.format(
                                          wikipage=wikipage,
                                          pagename=wikipage.replace('_',' '))
   
   row[1] = '[{osmurl} {osm_id}]'.format(osmurl=osmurl,
                                         osm_id=osm_id)
         
   txt = '|'
u'Architetto',
u'StileArchitett',
u'InizioCostr',
u'FineCostr',
u'Demolizione',
u'Sito',
u'lat',
u'long'
]

"""
Utility functions
"""
_jsonu = UrlBuilder(
               domain="json.it.dbpedia.org",
               path="annotate/resource/json/it%3A{wp-page}",
               params="filter=__type:template"
              )
_jsonu.set_attr('flags','-Extractors,Structure,')
_jsonbaseurl=_jsonu.build()

def get_jsonpedia_page(v):
   """
   Gets the corrisponding JSONpedia page (only templates)
   for Wikipedia article titled 'v'.
   Tries MAXTRIES times or returns none.
   """
   vsafe = v.replace(' ','_')
   jsonurl = _jsonbaseurl.replace('{wp-page}',urllib.quote(vsafe))
   for ntry in range(1,MAXTRIES):
      try:
def query_api():
   queryurl = UrlBuilder(domain=WPDOMAIN,path="w/api.php",params="action=query")
   queryurl.set_attr('generator','embeddedin')
   queryurl.set_attr('geititle',WPTNAME)
   queryurl.set_attr('einamespace','0')
   queryurl.set_attr('geilimit','500')
   queryurl.set_attr('format','xml')
   
   inlist=list()
   
   while True:
      print "Requesting %s" %queryurl.build()
      infile = urllib2.urlopen(queryurl.build())
      inxml = infile.read()
      
      xml = parseString(inxml)
      
      pagelist=xml.getElementsByTagName("page")
      
      for page in pagelist:
         inlist.append(page.getAttribute("title"))
      
      querycont=xml.getElementsByTagName("embeddedin")
      if len(querycont) == 0:
         break
      
      geicontinue=querycont[0].getAttribute("geicontinue")
      queryurl.set_attr("geicontinue",geicontinue)
      
      time.sleep(5)
   
   return inlist
Exemplo n.º 5
0
# You should have received a copy of the GNU General Public License
# along with this program (see COPYING).
# If not, see <http://www.gnu.org/licenses/>.
#########################################################################

import logging
from wocmod.wocurlbuilder import UrlBuilder
from wocmod.wocjson import JSONQuerier
from wocmod.wocdb import MySQLConnector,PostgreSQLConnector
from wocmod.wocglobal import WOC

logger = logging.getLogger('woc.woccoords')

_jsonu = UrlBuilder(
               domain="json.it.dbpedia.org",
               path="annotate/resource/json/it%3A{wppage}",
               params="filter=__type:template"
              )
_jsonu.set_attr('flags','-Extractors,Structure,')
JSONPEDIABASEURL=_jsonu.build()


class CoordinateGetter(object):

   def __init__(self,item):
      self.item = item
      self.coords = None
   
   def _from_db(self):
      pass