Exemplo n.º 1
0
    def test_mqlreaditer(self):
        filmq = [{"id": None, "initial_release_date>=": "2009", "name": None, "type": "/film/film"}]
        r0 = freebase.mqlreaditer(filmq)
        r1 = freebase.mqlreaditer(
            filmq[0]
        )  # The difference between [{}] and []. mqlreaditer should be able to handle both
        self.assertNotEqual(r0, None)
        self.assertEqual([a for a in r0], [b for b in r1])

        # and let's test it for mqlread, just in case
        # actually, for mqlread, it must be [{}], because there are lots of elements
        m0 = freebase.mqlread(filmq)
        m1 = lambda: freebase.mqlread(filmq[0])
        self.assertRaises(MetawebError, m1)
        self.assertNotEqual(m0, None)
Exemplo n.º 2
0
def queryCityTown(taxonomy, tourist_locations):
    logger.debug('queryCityTown')
    logger.debug(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()))
    
    query = [{ 
              "type": "/location/citytown", 
              "name": None,
              "id":None,
              "limit": 10,
              "/common/topic/alias": [],
              "/common/topic/image": [{
                                       "id": None, 
                                       "optional": True,
                                       "/common/image/size": {
                                                       "/measurement_unit/rect_size/x" : None,
                                                       "/measurement_unit/rect_size/y" : None
                                        }
                                     }] 
            }]
    
    if fwiki.settings.DEBUG == True:
        print 'in test mode'
        r = freebase.mqlread(query)
    else:
        print 'in production mode'
        count = 0
        while count < 3 :
            try:
                r = freebase.mqlreaditer(query)
                break
            except:
                count += 1
                
    for i in r:            
        buildTBranch(i,taxonomy,tourist_locations)
Exemplo n.º 3
0
def seedGames():
    """Queries Freebase for all games.  Stores IDs in models.GameSeed.
    """
    logging.info("################## main.py:: seedGames() ################")
    query = {
        "type": "/games/game",
        "mid": None,
        "name": None,
        "key": {"namespace": BGG_NAMESPACE, "value": None, "optional": True},
    }
    results = freebase.mqlreaditer(query, extended=True)
    count = 0
    for r in results:
        logging.info("################ result:: " + str(r) + " #################")
        mid = r.mid
        if r.key is None:
            bgg_id = None
        else:
            bgg_id = r.key.value
        name = r.name
        game_seed = models.GameSeed.get_by_key_name(mid)
        if game_seed is None:
            game_seed = models.GameSeed(key_name=mid, mid=mid, bgg_id=bgg_id, name=name)

            logging.info("############# game_seed.put() ####################")
            game_seed.put()
        count += 1
        logging.info("############### Count:: " + str(count) + " ###############")
    logging.info("################ Total count:: " + str(count) + " ############")
    return True
Exemplo n.º 4
0
 def test_mqlreaditer(self):
     filmq = [{'id': None,
                 'initial_release_date>=': '2009',
                 'name': None,
                 'type': '/film/film'
                 }]
     r0 = freebase.mqlreaditer(filmq)
     r1 = freebase.mqlreaditer(filmq[0]) # The difference between [{}] and []. mqlreaditer should be able to handle both
     self.assertNotEqual(r0, None)
     self.assertEqual([a for a in r0], [b for b in r1])
     
     # and let's test it for mqlread, just in case
     # actually, for mqlread, it must be [{}], because there are lots of elements
     m0 = freebase.mqlread(filmq)
     m1 = lambda : freebase.mqlread(filmq[0])
     self.assertRaises(MetawebError, m1)
     self.assertNotEqual(m0, None)
Exemplo n.º 5
0
def queryDivisions(taxonomy, tourist_locations):
    logger.debug('queryDivisions')
    logger.debug(strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()))
    query =[{
             "type":          "/location/administrative_division",
              "name":          None,
              "/common/topic/image" : [
                                       {
                                            "id": None,
                                            "optional" : True,
                                            "/common/image/size": {
                                                                   "/measurement_unit/rect_size/x" : None,
                                                                   "/measurement_unit/rect_size/y" : None
                                            }
                                        }
                                       ],
              "id":            None,
              "/common/topic/alias": [],
              "/location/location/containedby": [{
                "type":          "/location/country",
                "name":          None,
                "id":            None,
                "/common/topic/alias": [],
                "/location/location/containedby": [{
                  "type":          "/location/continent",
                  "optional":      True,
                  "name":          None,
                  "id":            None,
                  "/common/topic/alias": []
                }]
              }]
           }]
    count = 0
    
    if fwiki.settings.DEBUG == True:
        print 'in test mode'
        iter = freebase.mqlread(query)
    else:
        print 'in production mode'
        while count < 3 :
            try:
                iter = freebase.mqlreaditer(query)
                break
            except:
                count += 1
            
    for i in iter:
        contained_by = []
        for c in i[C_BY]:
            for cc in c[C_BY]:
                buildTBranch(cc, taxonomy, tourist_locations)
                contained_by.append(cc[NAME].lower())
                
            buildTBranch(c, taxonomy, tourist_locations, contained_by )
            contained_by.append(c[NAME].lower())
            
        buildTBranch(i,taxonomy,tourist_locations, contained_by)
Exemplo n.º 6
0
def parse_fb(self):
    createurs = Artiste.objects.all()
    for createur in createurs:
        try:
            print "Scanne l'artiste "+str(createur.name_id).encode('<utf-8>')
            requete = {
                       "type": "/people/person",
                       "name": createur.name.encode('<utf8>'),
                       "date_of_birth" : None,
                       "place_of_birth" : None,
                       '/common/topic/description' : None,
                       '/common/topic/official_website' : [{}],
                       '/common/topic/topic_equivalent_webpage' : [{}],
                       "profession" : []
                       }
            essai = freebase.mqlreaditer(requete, asof=None)
            reponses = 0
            if essai is not None:
                for i in essai:
                    reponses = reponses+1
                print "Freebase trouve "+str(reponses).encode('<utf-8>')+" réponses pour ce créateur"
            if reponses==1:
                print "debut de collecte des informations"
                i = freebase.mqlread(requete, asof=None)
    
                if i["date_of_birth"] is not None and createur.date_naissance is None:
                    birth = i['date_of_birth']
                    birth = birth.split('-')
                    if len(birth)==1:
                        createur.date_naissance = datetime.datetime(int(birth[0]), 1, 1, 0, 0)
                    else:
                        createur.date_naissance = datetime.datetime(int(birth[0]), int(birth[1]), int(birth[2]), 0, 0)
                
                if i["place_of_birth"] is not None and createur.lieu_naissance is None:
                    createur.lieu_naissance = i['place_of_birth']
                    
                #chopper le site perso
                if len(i['/common/topic/official_website'])>0:
                    url1 = i['/common/topic/official_website'][0]['value']
                    test = LienWeb.objects.filter(URL = url1)
                    if len(test) == 0:
                        siteweb = LienWeb(URL = url1)
                        siteweb.name = "site personnel de "+str(createur.name)
                        siteweb.save()
                        createur.productions_web.add(siteweb)
                createur.save()
                print "fini de chercher dans freebase pour "+str(createur.name_id).encode('<utf-8>')
        except:
            print "probleme avec l'artiste "+str(createur.name_id).encode('<utf-8>')
Exemplo n.º 7
0
def retrieve_freebase_desc(name, stype):
    if not freebase:
        return ""

    print "Retrieving the description for %s" % name

    fb_type = settings.FREEBASE_TYPE_MAPPINGS.get(stype, None)
    value, data = None, ""
    try:
        value = freebase.mqlread({"name": name, "type": fb_type or [], FREEBASE_DESC_KEY: [{"id": None}]})
    except:
        try:
            values = freebase.mqlreaditer({"name": name, "type": fb_type or [], FREEBASE_DESC_KEY: [{"id": None}]})
            value = values.next()
        except Exception, e:
            # Only print error as freebase is only optional
            if settings.ST_DEBUG:
                print "Error using `freebase`: %s" % e
Exemplo n.º 8
0
def retrieve_freebase_name(name, stype):
    if not freebase:
        return name

    search_key = fix_name_for_freebase(name)
    fb_type = settings.FREEBASE_TYPE_MAPPINGS.get(stype, None)
    value = None
    try:
        # Try to get the exact match
        value = freebase.mqlread({"name": None, "type": fb_type or [], "key": {"value": search_key}})
    except:
        try:
            # Try to get a results has a generator and return its top result
            values = freebase.mqlreaditer({"name": None, "type": fb_type or [], "key": {"value": search_key}})
            value = values.next()
        except Exception, e:
            # Only print error as freebase is only optional
            if settings.ST_DEBUG:
                print "Error using `freebase`: %s" % e
Exemplo n.º 9
0
import freebase
import json
import sys
from pprint import pprint as pp

f = open(sys.argv[1], "r")
q = json.load(f)
f.close()
it = freebase.mqlreaditer(q)
print "["
for i in range(int(sys.argv[2])):
    out = it.next()
    print json.dumps(out, indent=2)
    if i != int(sys.argv[2]) - 1:
        print ","
    
print "]"
#!/usr/bin/python
import freebase
import pickle

query = [{ "/music/instrument/family": [{"name": None}], "id": None, "name": None,  "a:type": "/music/instrument",  "b:type": "/common/topic",  "/common/topic/article": {"id": None},  "/common/topic/image": [{"id": None}], "/music/instrument/instrumentalists": {"return": "count", "optional": True}}]

data={}
counter = 0
for row in freebase.mqlreaditer(query):
    datum = {}
    datum['name'] = row['name']
    datum['url'] = 'http://freebase.com/view%s' % (row['id'])
    datum['image'] = 'http://img.freebase.com/api/trans/raw%s' % (row['/common/topic/image'][0]['id'])
    datum['thumbnail'] = 'http://indextank.com/_static/common/demo/%s.jpg' % (row['/common/topic/image'][0]['id'].split('/')[2])
    datum['text'] = freebase.blurb(row['/common/topic/article']['id'], maxlength=500)
    datum['variables'] = {}
    datum['variables'][0] = row['/music/instrument/instrumentalists']
    families = []
    for f in row['/music/instrument/family']:
        families.append(f['name'])
    datum['families'] = families

    data[datum['name']] = datum
    print('done %i' % counter)
    counter+=1


family = {}
for datum in data.values():
    for f in datum['families']:
        if family.has_key(f):
    "b:type": "/common/topic",
    "/common/topic/article": {
        "id": None
    },
    "/common/topic/image": [{
        "id": None
    }],
    "/music/instrument/instrumentalists": {
        "return": "count",
        "optional": True
    }
}]

data = {}
counter = 0
for row in freebase.mqlreaditer(query):
    datum = {}
    datum['name'] = row['name']
    datum['url'] = 'http://freebase.com/view%s' % (row['id'])
    datum['image'] = 'http://img.freebase.com/api/trans/raw%s' % (
        row['/common/topic/image'][0]['id'])
    datum['thumbnail'] = 'http://indextank.com/_static/common/demo/%s.jpg' % (
        row['/common/topic/image'][0]['id'].split('/')[2])
    datum['text'] = freebase.blurb(row['/common/topic/article']['id'],
                                   maxlength=500)
    datum['variables'] = {}
    datum['variables'][0] = row['/music/instrument/instrumentalists']
    families = []
    for f in row['/music/instrument/family']:
        families.append(f['name'])
    datum['families'] = families
Exemplo n.º 12
0
     def get_together_DF(self,ngram,word2):
     	  print "in doc_freq"
     	  word1 = ' '.join(ngram)
     	  word1 = word1.lower()
          no_of_occ = 0
          query = {
                  "name~=":"*"+ word1 + "*",
               "id":[],
               "limit":7
                 }
		
          try:

               result = freebase.mqlreaditer(query)
               		 
               top_ten = []
               count = 0
         # print "\nIDs Retrieved"
               for i in result:
                    if count > 10:
                         break
                    for str1 in i["id"]:
                    #print str
                         top_ten.append(str1)
      
                         count += 1
   
          #print "\nPrinting top ten"
                    #for i in top_ten:
                     #    print i


                    for id_name in top_ten:

                         query = {
                              "id":id_name,
                              "/common/topic/article" : {"id" : None, "optional" : True, "limit" : 1}
                              }
                    try:

                         r = freebase.mqlread(query)
                    #print "currnt ID:"
                    #print id_name
       
                         article_id = r["/common/topic/article"]["id"]
                         text = freebase.blurb(article_id, maxlength=20000).lower()

                    #print text
                    #print "\nFind word now:"

                         pattern = re.compile(word2.lower())
                    
                         no_of_occ += len(pattern.findall(text))
                    #print "\nNO.OF OCC%d"%(no_of_occ)
                    except Exception:
                         pass
         #print "\nNone type error"
          except Exception:
               pass     
          print no_of_occ
          return no_of_occ
    return None

f_defs = codecs.open('gazetteer_lists/definitions.def', 'w', encoding="utf-8")

####################################################################################################
# sports
####################################################################################################

sports_query = [{
      "id":    None,
      "name":  None,
      "type":  "/sports/sport"
    }]

sports = freebase.mqlreaditer(sports_query, asof=None)

f_sports = codecs.open('gazetteer_lists/sports.lst', 'w', encoding="utf-8")

for sport in sports:
    write_perms(sport.name, f_sports)

f_sports.close()

f_defs.write("sports.lst:sport\n")

####################################################################################################
# teams
####################################################################################################

teams_query = [{
Exemplo n.º 14
0
import freebase
import freebase_fetch
import sys, traceback
import MySQLdb
import httplib

try:
    db = freebase_fetch.get_db()

    # Get all instances of all types in the "/music" domain
    r = freebase.mqlreaditer({"type": [{"type": "/type/type", "domain": "/music", "id": None}], "id": None, "creator": None, "timestamp": None})
    print "Got resource_type results, now iterating through them."
    chunksize = 100
    maxchunks = -1
    chunks = 0
    count = 0
    for i in r:
        if maxchunks == chunks:
            break
        freebase_fetch.insert_resource(i, db)
        resource_key = freebase_fetch.get_key(i["id"], db)
        type_key = freebase_fetch.get_key(i["type"][0]["id"], db)
        freebase_fetch.add_resource_type(resource_key, type_key, db)
        # Commit on each iteration, to avoid huge bulk commits
        db.commit()
        count = count + 1
        if chunksize == count:
            count = 0;
            chunks = chunks + 1
            print str(chunks) + ") Committed " + str(chunksize) + " resource_type records."
#!/usr/bin/python
import freebase, pdb
from urlparse import urlparse
from pprint import pprint

query = [{
        "type": "/common/document",
        "/common/document/source_uri": {
            "value":       None,
            "optional": True
            },
        "limit" : 1000
        }]


result = freebase.mqlreaditer(query)

data = { }

c = 0

for blurb in result:
  c += 1

  # Figure out the import domain.
  import_domain = "unknown"
  if blurb.get("/common/document/source_uri") and blurb["/common/document/source_uri"]["value"].startswith("http://wp/"):
      import_domain = "wikipedia"

  if not data.get(import_domain):
      data[import_domain] = 0
Exemplo n.º 16
0
image_query = [{
        "type": "/common/image",
        "/common/licensed_object/license": [{
            "id":       None,
            "optional": True
            }],
        "!/type/content_import/content": {
            "/type/content_import/uri": None,
            "optional": True,
            "limit" : 1
            },
        }]


result = freebase.mqlreaditer(image_query)

image_data = { }

c = 0
licenses = set()
for image in result:
  c += 1

  if not c % 10000:
      v = "%s (%s domains)" % (c, str(len(image_data.keys())))
      print >> sys.stderr, v

  # Figure out the import domain.
  import_domain = "unknown"
  if image.get("!/type/content_import/content") and image["!/type/content_import/content"]["/type/content_import/uri"]: