Пример #1
0
def exploreArticle(query):
  import xmlextract
  import wmapiurl
  import wmapifetch
  queryURL1 = wmapiurl.searchURL(query)
  sourcexml = wmapifetch.fetchURL(queryURL1)
  searchResult = xmlextract.extractSearch(sourcexml)
  labelList = {}#Create a dict to store the label-id pairs
  for labels in searchResult:
    if len(labels['labelSenses']) != 0:
      labelName = labels['labelName']
      labelSense = labels['labelSenses'][0]
      artID = labelSense['artID']
      labelList[labelName] = artID
    else:
      labelName = labels['labelName']
      labelList[labelName] = 'no_matched_article'
  idlist = []
  for pair in labelList:
    idlist.append(labelList[pair])
  identity = idlist[0]
  queryURL2 = wmapiurl.expArtURL(identity)
  xmlfile = wmapifetch.fetchURL(identity)
  return xmlfile
Пример #2
0
def fetchDefinition(artid):#get the entry summery according to a provided identifier
  import xml.dom.minidom
  import wmapifetch
  query = ''
  queryhead = 'http://wikipedia-miner.cms.waikato.ac.nz/services/exploreArticle?definition=true&definitionLength=LONG&id='
  query = queryhead + str(artid)
  xmlfile = wmapifetch.fetchURL(query)#below extracts xml file
  sourcefile = xml.dom.minidom.parse(xmlfile)
  rootnodes = sourcefile.documentElement#The root layer
  definitiontag = rootnodes.getElementsByTagName('definition')
  definition = ''
  if len(definitiontag) > 0:
    definitionup = definitiontag[0].childNodes
    if len(definitionup) > 0:
      definition = definitionup[0].data
  return definition
Пример #3
0
def searchWiki(query):
  import wmapiurl#Denpending modules
  import wmapifetch
  import xmlextract
  queryURL = wmapiurl.searchURL(query)
  sourcexml = wmapifetch.fetchURL(queryURL)
  searchResult = xmlextract.extractSearch(sourcexml)
  labelList = {}#Create a dict to store the label-id pairs
  for labels in searchResult:
    if len(labels['labelSenses']) != 0:
      labelName = labels['labelName']
      labelSense = labels['labelSenses'][0]
      artID = labelSense['artID']
      labelList[labelName] = artID
    else:
      labelName = labels['labelName']
      labelList[labelName] = 'no_matched_article'
  return labelList
Пример #4
0
def article(identity, infotype):
  import wmapiurl
  import wmapifetch
  import xmlextract
  queryURL = wmapiurl.expArtURL(identity)
  doc = wmapifetch.fetchURL(queryURL)
  extractresult = xmlextract.extractExplore(doc)
  infotype = infotype.lower()
  if infotype == 'l':
    result = extractresult['labels']
  elif infotype == 'p':
    result = extractresult['parentCategories']
  elif infotype == 'i':
    result = extractresult['inLinks']
  elif infotype == 'o':
    result = extractresult['outLinks']
  else:
    result = False
  return result
Пример #5
0
def category(identity, infotype):#infotype for the exact info you want: parentcategories (p) or childcategories (c)
  import wmapiurl
  import wmapifetch
  import xmlextract
  queryURL = wmapiurl.expCatURL(identity)
  doc = wmapifetch.fetchURL(queryURL)
  extractresult = xmlextract.extractCategory(doc)
  infotype = infotype.lower()
  if infotype == 'p':
    tempresult = extractresult['parentCategories']
    result = {}
    for i in tempresult:
      result[i['parentCategoryID']] = i['parentCategoryTitle']
  elif infotype == 'c':
    tempresult = extractresult['childCategories']
    result = {}
    for i in tempresult:
      result[i['childCategoryID']] = i['childCategoryTitle']
  else:
    result = False
  return result