def load(secure,hostname,url,schema,table,codeset,verbose=False): if verbose: show("begin") row = makerow() dboperator.columns(row) if verbose: show("empty %s.%s"%(schema,table)) dboperator.empty(schema,table) url = url % codeset # replace placeholder if secure: httpconn = httplib.HTTPSConnection(hostname) show("load securely from "+hostname+url) else: httpconn = httplib.HTTPConnection(hostname) show("load from "+hostname+url) httpconn.request('GET', url) r = httpconn.getresponse() j = json.loads(r.read()) cnt = 0 for i in j: cnt += 1 row = makerow() row["koodi"] = jv(i,"koodiArvo") row["nimi"] = getnimi(i,"FI") row["nimi_sv"] = getnimi(i,"SV") row["nimi_en"] = getnimi(i,"EN") row["alkupvm"] = jv(i,"voimassaAlkuPvm") row["loppupvm"] = jv(i,"voimassaLoppuPvm") httpconn.request('GET', "/koodisto-service/rest/json/relaatio/sisaltyy-ylakoodit/%s" % i["koodiUri"]) rr = httpconn.getresponse() jj = json.loads(rr.read()) ss = "" for ii in jj: classification = "maanosat" level = "" if len(ii["koodiArvo"])==3: level = "3" elif len(ii["koodiArvo"])==2: level = "2" # else default if ii["koodisto"]["koodistoUri"] == classification: row[classification+level+"koodi"] = jv(ii,"koodiArvo") row[classification+level+"nimi"] = getnimi(ii,"FI") row[classification+level+"nimi_sv"] = getnimi(ii,"SV") row[classification+level+"nimi_en"] = getnimi(ii,"EN") if verbose: show("%d -- %s"%(cnt,row["koodi"])) dboperator.insert(hostname+url,schema,table,row) dboperator.close() if verbose: show("ready")
def load(secure,hostname,url,schema,table,codeset,verbose=False,debug=False): if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" begin" row = makerow() dboperator.columns(row,debug) if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" empty %s.%s"%(schema,table) dboperator.empty(schema,table,debug) url = url % codeset # replace placeholder if secure: httpconn = httplib.HTTPSConnection(hostname) print strftime("%Y-%m-%d %H:%M:%S", localtime())+" load securely from "+hostname+url else: httpconn = httplib.HTTPConnection(hostname) print strftime("%Y-%m-%d %H:%M:%S", localtime())+" load from "+hostname+url httpconn.request('GET', url) r = httpconn.getresponse() j = json.loads(r.read()) cnt = 0 for i in j: cnt += 1 row = makerow() row["koodi"] = jv(i,"koodiArvo") row["nimi"] = getnimi(i,"FI") row["nimi_sv"] = getnimi(i,"SV") row["nimi_en"] = getnimi(i,"EN") row["alkupvm"] = jv(i,"voimassaAlkuPvm") row["loppupvm"] = jv(i,"voimassaLoppuPvm") httpconn.request('GET', "/koodisto-service/rest/json/relaatio/sisaltyy-ylakoodit/%s" % i["koodiUri"]) rr = httpconn.getresponse() jj = json.loads(rr.read()) ss = "" for ii in jj: if ii["koodisto"]["koodistoUri"] == "julkaisunpaaluokka": row["julkaisunpaaluokkakoodi"] = jv(ii,"koodiArvo") row["julkaisunpaaluokkanimi"] = getnimi(ii,"FI") row["julkaisunpaaluokkanimi_sv"] = getnimi(ii,"SV") row["julkaisunpaaluokkanimi_en"] = getnimi(ii,"EN") if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" %d -- %s"%(cnt,row["koodi"]) dboperator.insert(hostname+url,schema,table,row,debug) dboperator.close(debug) if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" ready"
def load(hostname,url,schema,table,verbose=False,debug=False): if verbose: show("begin") row = makerow() dboperator.columns(row,debug) if verbose: show("empty %s.%s"%(schema,table)) dboperator.empty(schema,table,debug) httpconn = httplib.HTTPSConnection(hostname) show("load securely from "+hostname+url) # get list of oids httpconn.request('GET', url) rr = httpconn.getresponse() jj = json.loads(rr.read()) cnt = 0 for ii in jj["result"]: cnt += 1 # show some sign of being alive if cnt%100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt%1000 == 0: show("-- %d" % (cnt)) url = "/tarjonta-service/rest/v1/hakukohde/%s?populateAdditionalKomotoFields=true"%(ii["oid"]) try: httpconn.request('GET', url) r = httpconn.getresponse() j = json.loads(r.read()) except ValueError, e: show("-- %d -- could not load %s"%(cnt,ii["oid"])) else: if j["status"] == "NOT FOUND": continue if j["status"] == "OK": i = j["result"] row = makerow() for col in row: row[col] = None if col not in i else i[col] if type(row[col]) is list: row[col] = ''.join(map(str,json.dumps(row[col]))) if verbose: show("%d -- %s"%(cnt,row["oid"])) if debug: print row dboperator.insert(hostname+url,schema,table,row,debug)
def load(hostname,url,schema,table,verbose=False,debug=False): if verbose: show("begin") row = makerow() dboperator.columns(row,debug) if verbose: show("empty %s.%s"%(schema,table)) dboperator.empty(schema,table,debug) httpconn = httplib.HTTPSConnection(hostname) show("load securely from "+hostname+url) httpconn.request('GET', url) r = httpconn.getresponse() j = json.loads(r.read()) cnt = 0 for i in j["result"]: cnt += 1 # show some sign of being alive if cnt%100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt%1000 == 0: show("-- %d" % (cnt)) row = makerow() for col in row: if col == "nimi": row["nimi"] = getnimi(i,"fi") row["nimi_sv"] = getnimi(i,"sv") row["nimi_en"] = getnimi(i,"en") else: row[col] = None if col not in i else i[col] if type(row[col]) is list: row[col] = ''.join(map(str,json.dumps(row[col]))) if verbose: show("%d -- %s"%(cnt,row["oid"])) if debug: print row dboperator.insert(hostname+url,schema,table,row,debug) if verbose: show("ready")
except urllib2.URLError, e: show('We failed to reach a server.') show('Reason: %s'%(e.reason)) sys.exit(2) else: # everything is fine show("api call OK") # remove data conditionally, otherwise empty # merge operation could be considered here... if condition: show("remove from %s.%s with condition '%s'"%(schema,table,condition)) dboperator.execute("DELETE FROM %s.%s WHERE %s"%(schema,table,condition)) else: show("empty %s.%s"%(schema,table)) dboperator.empty(schema,table) show("insert data") cnt=0 for row in ijson.items(response,'item'): cnt+=1 # show some sign of being alive if cnt%100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt%1000 == 0: show("-- %d" % (cnt)) if verbose: show("%d -- %s"%(cnt,row)) # find out which columns to use on insert dboperator.resetcolumns(row)
def load(secure,hostname,url,schema,table,codeset,verbose=False,debug=False): if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" begin" row = makerow() dboperator.columns(row,debug) if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" empty %s.%s"%(schema,table) dboperator.empty(schema,table,debug) url = url % codeset # korvaa placeholder if secure: httpconn = httplib.HTTPSConnection(hostname) print strftime("%Y-%m-%d %H:%M:%S", localtime())+" load securely from "+hostname+url else: httpconn = httplib.HTTPConnection(hostname) print strftime("%Y-%m-%d %H:%M:%S", localtime())+" load from "+hostname+url httpconn.request('GET', url) r = httpconn.getresponse() j = json.loads(r.read()) cnt = 0 for i in j: cnt += 1 row = makerow() row["koodi"] = i["koodiArvo"] row["nimi"] = getnimi(i,"FI") row["nimi_sv"] = getnimi(i,"SV") row["nimi_en"] = getnimi(i,"EN") row["alkupvm"] = i["voimassaAlkuPvm"] row["loppupvm"] = i["voimassaLoppuPvm"] httpconn.request('GET', "/koodisto-service/rest/json/relaatio/sisaltyy-alakoodit/%s" % i["koodiUri"]) rr = httpconn.getresponse() jj = json.loads(rr.read()) ss = "" for ii in jj: if ii["koodisto"]["koodistoUri"] == "koulutusasteoph2002": row["koulutusaste2002koodi"] = ii["koodiArvo"] row["koulutusaste2002nimi"] = getnimi(ii,"FI") row["koulutusaste2002nimi_sv"] = getnimi(ii,"SV") row["koulutusaste2002nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "koulutusalaoph2002": row["koulutusala2002koodi"] = ii["koodiArvo"] row["koulutusala2002nimi"] = getnimi(ii,"FI") row["koulutusala2002nimi_sv"] = getnimi(ii,"SV") row["koulutusala2002nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "opintoalaoph2002": row["opintoala2002koodi"] = ii["koodiArvo"] row["opintoala2002nimi"] = getnimi(ii,"FI") row["opintoala2002nimi_sv"] = getnimi(ii,"SV") row["opintoala2002nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "koulutusasteoph1995": row["koulutusaste1995koodi"] = ii["koodiArvo"] row["koulutusaste1995nimi"] = getnimi(ii,"FI") row["koulutusaste1995nimi_sv"] = getnimi(ii,"SV") row["koulutusaste1995nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "koulutusalaoph1995": row["koulutusala1995koodi"] = ii["koodiArvo"] row["koulutusala1995nimi"] = getnimi(ii,"FI") row["koulutusala1995nimi_sv"] = getnimi(ii,"SV") row["koulutusala1995nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "opintoalaoph1995": row["opintoala1995koodi"] = ii["koodiArvo"] row["opintoala1995nimi"] = getnimi(ii,"FI") row["opintoala1995nimi_sv"] = getnimi(ii,"SV") row["opintoala1995nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "tutkinto": row["tutkintokoodi"] = ii["koodiArvo"] row["tutkintonimi"] = getnimi(ii,"FI") row["tutkintonimi_sv"] = getnimi(ii,"SV") row["tutkintonimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "tutkintotyyppi": row["tutkintotyyppikoodi"] = ii["koodiArvo"] row["tutkintotyyppinimi"] = getnimi(ii,"FI") row["tutkintotyyppinimi_sv"] = getnimi(ii,"SV") row["tutkintotyyppinimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "koulutustyyppi": row["koulutustyyppikoodi"] = ii["koodiArvo"] row["koulutustyyppinimi"] = getnimi(ii,"FI") row["koulutustyyppinimi_sv"] = getnimi(ii,"SV") row["koulutustyyppinimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "isced2011koulutusaste": row["isced2011koulutusastekoodi"] = ii["koodiArvo"] row["isced2011koulutusastenimi"] = getnimi(ii,"FI") row["isced2011koulutusastenimi_sv"] = getnimi(ii,"SV") row["isced2011koulutusastenimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "isced2011koulutusastetaso1": row["isced2011koulutusastetaso1koodi"] = ii["koodiArvo"] row["isced2011koulutusastetaso1nimi"] = getnimi(ii,"FI") row["isced2011koulutusastetaso1nimi_sv"] = getnimi(ii,"SV") row["isced2011koulutusastetaso1nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "isced2011koulutusastetaso2": row["isced2011koulutusastetaso2koodi"] = ii["koodiArvo"] row["isced2011koulutusastetaso2nimi"] = getnimi(ii,"FI") row["isced2011koulutusastetaso2nimi_sv"] = getnimi(ii,"SV") row["isced2011koulutusastetaso2nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "kansallinenkoulutusluokitus2016koulutusastetaso1": row["koulutusluokitus2016koulutusastetaso1koodi"] = ii["koodiArvo"] row["koulutusluokitus2016koulutusastetaso1nimi"] = getnimi(ii,"FI") row["koulutusluokitus2016koulutusastetaso1nimi_sv"] = getnimi(ii,"SV") row["koulutusluokitus2016koulutusastetaso1nimi_en"] = getnimi(ii,"EN") # huom! https://www.stat.fi/meta/luokitukset/koulutus/001-2016/kuvaus.html # kansallinenkoulutusluokitus2016koulutusastetaso2 -> isced2011koulutusastetaso2 if ii["koodisto"]["koodistoUri"] == "kansallinenkoulutusluokitus2016koulutusastetaso2": row["koulutusluokitus2016koulutusastetaso2koodi"] = ii["koodiArvo"] row["koulutusluokitus2016koulutusastetaso2nimi"] = getnimi(ii,"FI") row["koulutusluokitus2016koulutusastetaso2nimi_sv"] = getnimi(ii,"SV") row["koulutusluokitus2016koulutusastetaso2nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "isced2011koulutusalataso1": row["isced2011koulutusalataso1koodi"] = ii["koodiArvo"] row["isced2011koulutusalataso1nimi"] = getnimi(ii,"FI") row["isced2011koulutusalataso1nimi_sv"] = getnimi(ii,"SV") row["isced2011koulutusalataso1nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "kansallinenkoulutusluokitus2016koulutusalataso1": row["koulutusluokitus2016koulutusalataso1koodi"] = ii["koodiArvo"] row["koulutusluokitus2016koulutusalataso1nimi"] = getnimi(ii,"FI") row["koulutusluokitus2016koulutusalataso1nimi_sv"] = getnimi(ii,"SV") row["koulutusluokitus2016koulutusalataso1nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "isced2011koulutusalataso2": row["isced2011koulutusalataso2koodi"] = ii["koodiArvo"] row["isced2011koulutusalataso2nimi"] = getnimi(ii,"FI") row["isced2011koulutusalataso2nimi_sv"] = getnimi(ii,"SV") row["isced2011koulutusalataso2nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "kansallinenkoulutusluokitus2016koulutusalataso2": row["koulutusluokitus2016koulutusalataso2koodi"] = ii["koodiArvo"] row["koulutusluokitus2016koulutusalataso2nimi"] = getnimi(ii,"FI") row["koulutusluokitus2016koulutusalataso2nimi_sv"] = getnimi(ii,"SV") row["koulutusluokitus2016koulutusalataso2nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "isced2011koulutusalataso3": row["isced2011koulutusalataso3koodi"] = ii["koodiArvo"] row["isced2011koulutusalataso3nimi"] = getnimi(ii,"FI") row["isced2011koulutusalataso3nimi_sv"] = getnimi(ii,"SV") row["isced2011koulutusalataso3nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "kansallinenkoulutusluokitus2016koulutusalataso3": row["koulutusluokitus2016koulutusalataso3koodi"] = ii["koodiArvo"] row["koulutusluokitus2016koulutusalataso3nimi"] = getnimi(ii,"FI") row["koulutusluokitus2016koulutusalataso3nimi_sv"] = getnimi(ii,"SV") row["koulutusluokitus2016koulutusalataso3nimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "okmohjauksenala": row["okmohjauksenalakoodi"] = ii["koodiArvo"] row["okmohjauksenalanimi"] = getnimi(ii,"FI") row["okmohjauksenalanimi_sv"] = getnimi(ii,"SV") row["okmohjauksenalanimi_en"] = getnimi(ii,"EN") if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" %d -- %s"%(cnt,row["koodi"]) dboperator.insert(hostname+url,schema,table,row,debug) dboperator.close(debug) if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" ready"
def load(secure,hostname,url,schema,table,codeset,verbose=False,debug=False): if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" begin" row = makerow() dboperator.columns(row,debug) if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" empty %s.%s"%(schema,table) dboperator.empty(schema,table,debug) url = url % codeset # replace placeholder if secure: httpconn = httplib.HTTPSConnection(hostname) print strftime("%Y-%m-%d %H:%M:%S", localtime())+" load securely from "+hostname+url else: httpconn = httplib.HTTPConnection(hostname) print strftime("%Y-%m-%d %H:%M:%S", localtime())+" load from "+hostname+url httpconn.request('GET', url) r = httpconn.getresponse() j = json.loads(r.read()) cnt = 0 for i in j: cnt += 1 row = makerow() row["koodi"] = jv(i,"koodiArvo") row["nimi"] = getnimi(i,"FI") row["nimi_sv"] = getnimi(i,"SV") row["nimi_en"] = getnimi(i,"FI") row["alkupvm"] = jv(i,"voimassaAlkuPvm") row["loppupvm"] = jv(i,"voimassaLoppuPvm") # classifications (nb! avi is in different direction!) httpconn.request('GET', "/koodisto-service/rest/json/relaatio/sisaltyy-ylakoodit/%s" % i["koodiUri"]) rr = httpconn.getresponse() jj = json.loads(rr.read()) ss = "" for ii in jj: if ii["koodisto"]["koodistoUri"] == "aluehallintovirasto": row["avikoodi"] = jv(ii,"koodiArvo") row["avinimi"] = getnimi(ii,"FI") row["avinimi_sv"] = getnimi(ii,"SV") row["avinimi_en"] = getnimi(ii,"EN") # other classifications httpconn.request('GET', "/koodisto-service/rest/json/relaatio/sisaltyy-alakoodit/%s" % i["koodiUri"]) rr = httpconn.getresponse() jj = json.loads(rr.read()) ss = "" for ii in jj: if ii["koodisto"]["koodistoUri"] == "maakunta": row["maakuntakoodi"] = jv(ii,"koodiArvo") row["maakuntanimi"] = getnimi(ii,"FI") row["maakuntanimi_sv"] = getnimi(ii,"SV") row["maakuntanimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "elykeskus": row["elykoodi"] = jv(ii,"koodiArvo") row["elynimi"] = getnimi(ii,"FI") row["elynimi_sv"] = getnimi(ii,"SV") row["elynimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "kielisuhde": row["kielisuhdekoodi"] = jv(ii,"koodiArvo") row["kielisuhdenimi"] = getnimi(ii,"FI") row["kielisuhdenimi_sv"] = getnimi(ii,"SV") row["kielisuhdenimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "seutukunta": row["seutukuntakoodi"] = jv(ii,"koodiArvo") row["seutukuntanimi"] = getnimi(ii,"FI") row["seutukuntanimi_sv"] = getnimi(ii,"SV") row["seutukuntanimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "laani": row["laanikoodi"] = jv(ii,"koodiArvo") row["laaninimi"] = getnimi(ii,"FI") row["laaninimi_sv"] = getnimi(ii,"SV") row["laaninimi_en"] = getnimi(ii,"EN") if ii["koodisto"]["koodistoUri"] == "kuntaryhma": row["kuntaryhmakoodi"] = jv(ii,"koodiArvo") row["kuntaryhmanimi"] = getnimi(ii,"FI") row["kuntaryhmanimi_sv"] = getnimi(ii,"SV") row["kuntaryhmanimi_en"] = getnimi(ii,"EN") if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" %d -- %s"%(cnt,row["koodi"]) dboperator.insert(hostname+url,schema,table,row,debug) dboperator.close(debug) if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" ready"
def load(secure,hostname,url,schema,table,verbose=False): if verbose: show("begin") # make "columnlist" (type has no meaning as we're not creating table) row = makerow() # setup dboperator so other calls work dboperator.columns(row) if verbose: show("empty %s.%s"%(schema,table)) dboperator.empty(schema,table) # fetching could be as simple and fast as: """ geturi = "v2/hae?aktiiviset=true&suunnitellut=true&lakkautetut=true&organisaatiotyyppi=" tyyppis = [ "Koulutustoimija", "Oppilaitos", "Toimipiste" ] cnt = 0 for tyyppi in tyyppis: show("load from "+hostname+url+geturi+tyyppi) if secure: address = "https://"+hostname+url+geturi+tyyppi else: address = "http://"+hostname+url+geturi+tyyppi """ # ... but results don't contain address information :( if secure: address = "https://"+hostname+url else: address = "http://"+hostname+url #""" load from web show("load from "+address) try: # first create a "hash map" of liitokset liitosresponse = requests.get(address+"v2/liitokset") # actual data response = requests.get(address) except e: show('HTTP GET failed.') show('Reason: %s'%(e.reason)) sys.exit(2) else: # everything is fine show("api call OK") #""" # liitokset liitosmap = dict() for l in liitosresponse.json(): liitosmap[l["organisaatio"]["oid"]] = l["kohde"]["oid"] liitosresponse = None oids = response.json() cnt = 0 for o in oids: cnt+=1 # show some sign of being alive if cnt%100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt%1000 == 0: show("-- %d" % (cnt)) if verbose: show("%d -- %s"%(cnt,row)) # make another requets to actual organization data try: r = requests.get(address+o) i = r.json() # make "row" (clear values) row = makerow() row["oid"] = o row["parentoid"] = jv(i,"parentOid") # liitokset row["liitosoid"] = liitosmap[o] if o in liitosmap else None # TODO does the order here matter? if multiple tyyppi's, what to do? if "tyypit" in i and "Koulutustoimija" in i["tyypit"]: row["tyyppi"] = "Koulutustoimija" row["koodi"] = jv(i,"ytunnus") if not row["koodi"]: row["koodi"] = jv(i,"virastotunnus") # alternatively try virastotunnus if ytunnus is missing if not row["koodi"]: row["tyyppi"] = None # cancel this organization from loading elif "tyypit" in i and "Oppilaitos" in i["tyypit"]: row["tyyppi"] = "Oppilaitos" row["koodi"] = jv(i,"oppilaitosKoodi") if "oppilaitosTyyppiUri" in i and i["oppilaitosTyyppiUri"]: row["oppilaitostyyppi"] = i["oppilaitosTyyppiUri"].replace("oppilaitostyyppi_","").replace("#1","") # => just code, text values separately elif "tyypit" in i and "Toimipiste" in i["tyypit"]: row["tyyppi"] = "Toimipiste" row["koodi"] = jv(i,"toimipistekoodi") elif "tyypit" in i and "Oppisopimustoimipiste" in i["tyypit"]: row["tyyppi"] = "Oppisopimustoimipiste" row["koodi"] = jv(i,"toimipistekoodi") # was current organization of type of interest if row["tyyppi"]: if "nimi" in i and i["nimi"]: row["nimi"] = jv(jv(i,"nimi"),"fi") row["nimi_sv"] = jv(jv(i,"nimi"),"sv") row["nimi_en"] = jv(jv(i,"nimi"),"en") row["alkupvm"] = jv(i,"alkuPvm") row["loppupvm"] = jv(i,"lakkautusPvm") if "kotipaikkaUri" in i and i["kotipaikkaUri"]: row["kotikunta"] = jv(i,"kotipaikkaUri").replace("kunta_","") # => just code, text values separately if "kieletUris" in i and i["kieletUris"]: # todo what if many? row["oppilaitoksenopetuskieli"] = i["kieletUris"][0].replace("oppilaitoksenopetuskieli_","").replace("#1","") # => just code, text values separately # address, first kayntiosoite and if not exists then postiosoite josoite = None if "kayntiosoite" in i: josoite = jv(i,"kayntiosoite") row["osoitetyyppi"] = "kayntiosoite" elif "postiosoite" in i: josoite = jv(i,"postiosoite") row["osoitetyyppi"] = "postiosoite" if josoite: row["osoite"] = jv(josoite,"osoite") row["postinumero"] = josoite["postinumeroUri"].replace("posti_","") if "postinumeroUri" in josoite and josoite["postinumeroUri"] else None row["postitoimipaikka"] = jv(josoite,"postitoimipaikka") if (row["osoite"] is not None and row["osoite"] is not "" and row["postinumero"] is not None and row["postinumero"] is not "" and int(row["postinumero"]) is not 0 and row["postitoimipaikka"] is not None and row["postitoimipaikka"] is not ""): get_and_set_coordinates(row) if verbose: show(" %5d -- %s %s (%s)"%(cnt,row["tyyppi"],row["koodi"],row["nimi"])) dboperator.insert(hostname+url,schema,table,row) except ValueError, ve: print "Error: " + str(ve) print "vika: " + str(address) + " oid:" + str(o)
def load(url, schema, table, condition): """ Results from ARVO-API can come in multiple pages. If that's the case, we need to make multiple requests to the ARVO API, using the "next_url" parameter. """ FIRST_LOOP = True # This is used to make possible DELETE operation (due to condition) only once. while True: show("begin " + url + " " + schema + " " + table + " " + (condition or "")) show("load from " + url) reqheaders = {'Content-Type': 'application/json'} # api credentials from env vars if os.getenv("API_USERNAME"): show("using authentication") apiuser = os.getenv("API_USERNAME") apipass = os.getenv("API_PASSWORD") reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(apiuser + ":" + apipass) try: r = requests.get(url, headers=reqheaders) except requests.exceptions.RequestException as e: print e sys.exit(1) if r.status_code != 200: print "Error! HTTP status code: " + str(r.status_code) sys.exit(2) try: result = json.loads(r.content) except ValueError as e: print e sys.exit(3) if "pagination" not in result or "data" not in result: print "Error! Received JSON-data not valid." sys.exit(4) # everything is fine show("api call OK") """ Received data e.g. { "data": [ { "taustakysymykset": true, "koulutustoimija": "xxxx", "vastausid": 1111, "kyselykertaid": 123, "kysely_alkupvm": "2016-11-29T22:00:00Z", "suorituskieli": "fi", "tutkinto_fi": "xxxx", "valmistavan_koulutuksen_jarjestaja_en": "xxxx", "kysymys_en": "xxxx", "koulutustoimija_en": "xxxx", "tutkintotunnus": "xxxx", "numerovalinta": 2, "valmistavan_koulutuksen_oppilaitos_sv": "xxxx", "kysymys_sv": "xxxx", "kysymysjarjestys": 0, "monivalintavaihtoehto": "xxxx", "kysymysid": 1234, "valmistavan_koulutuksen_oppilaitos_fi": "xxxx", "kysely_en": "xxxx", "vastaustyyppi": "xxxx", "kysymysryhma": "xxxx", "tutkinto_en": null, "kunta": null, "kysymysryhmaid": 110, "kysymysryhmajarjestys": 0, "vaihtoehto": null, "kysymys_fi": "Ik", "vastaajaid": 123, "kyselyid": 111, "valmistavan_koulutuksen_jarjestaja_fi": "xxxx", "kysymysryhma_en": "xxxx", "kysely_sv": "xxxx", "kysymysryhma_fi": "xxxx", "valmistavan_koulutuksen_jarjestaja_sv": "xxxx", "vastausaika": "2017-02-05T22:00:00Z", "tunnus": "xxxx", "valmistavan_koulutuksen_jarjestaja": "xxxx", "koulutustoimija_fi": "xxxx", "kysely_loppupvm": null, "koulutusmuoto": null, "kyselykerta": "xxxx", "valmistavan_koulutuksen_oppilaitos_en": "xxxx", "valtakunnallinen": true, "tutkinto_sv": null, "koulutustoimija_sv": "xxxx", "valmistavan_koulutuksen_oppilaitos": "xxxx", "kysely_fi": "xxxx", "kysymysryhma_sv": "xxxx" } ], "pagination": { "next_url": "null" } } """ address = url.split("?")[0] # Save in DB only the part before ?-mark: https://arvo.csc.fi/api/vipunen?alkupvm=2018-01-01&loppupvm=2018-02-01 # remove data conditionally, otherwise empty # merge operation could be considered here... if FIRST_LOOP: # This is done only on the first go (no matter if Arvo returns one or multiple pages) if condition: show("remove from %s.%s with condition '%s'" % (schema, table, condition)) dboperator.execute("DELETE FROM %s.%s WHERE %s" % (schema, table, condition)) else: show("empty %s.%s" % (schema, table)) dboperator.empty(schema, table) show("insert data") cnt = 0 for row in result["data"]: cnt += 1 # show some sign of being alive if cnt % 100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt % 1000 == 0: show("-- %d" % (cnt)) # find out which columns to use on insert dboperator.resetcolumns(row) # flatten arrays/lists for col in row: if type(row[col]) is list: row[col] = ''.join(map(str, json.dumps(row[col]))) dboperator.insert(address, schema, table, row) show("wrote %d" % (cnt)) show("ready") if result["pagination"]["next_url"] == "null" or result["pagination"]["next_url"] == None: break # exit while-loop. We are done. else: url = result["pagination"]["next_url"] FIRST_LOOP = False # Do not make the possible DELETE-operation anymore!
def load(secure, hostname, url, schema, table, codeset, verbose=False, debug=False): if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " begin" row = makerow() dboperator.columns(row, debug) if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " empty %s.%s" % (schema, table) dboperator.empty(schema, table, debug) url = url % codeset # replace placeholder if secure: httpconn = httplib.HTTPSConnection(hostname) print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " load securely from " + hostname + url else: httpconn = httplib.HTTPConnection(hostname) print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " load from " + hostname + url httpconn.request('GET', url) r = httpconn.getresponse() j = json.loads(r.read()) cnt = 0 for i in j: cnt += 1 row = makerow() row["koodi"] = jv(i, "koodiArvo") row["nimi"] = getnimi(i, "FI") row["nimi_sv"] = getnimi(i, "SV") row["nimi_en"] = getnimi(i, "FI") row["alkupvm"] = jv(i, "voimassaAlkuPvm") row["loppupvm"] = jv(i, "voimassaLoppuPvm") # classifications (nb! avi is in different direction!) httpconn.request( 'GET', "/koodisto-service/rest/json/relaatio/sisaltyy-ylakoodit/%s" % i["koodiUri"]) rr = httpconn.getresponse() jj = json.loads(rr.read()) ss = "" for ii in jj: if ii["koodisto"]["koodistoUri"] == "aluehallintovirasto": row["avikoodi"] = jv(ii, "koodiArvo") row["avinimi"] = getnimi(ii, "FI") row["avinimi_sv"] = getnimi(ii, "SV") row["avinimi_en"] = getnimi(ii, "EN") # other classifications httpconn.request( 'GET', "/koodisto-service/rest/json/relaatio/sisaltyy-alakoodit/%s" % i["koodiUri"]) rr = httpconn.getresponse() jj = json.loads(rr.read()) ss = "" for ii in jj: if ii["koodisto"]["koodistoUri"] == "maakunta": row["maakuntakoodi"] = jv(ii, "koodiArvo") row["maakuntanimi"] = getnimi(ii, "FI") row["maakuntanimi_sv"] = getnimi(ii, "SV") row["maakuntanimi_en"] = getnimi(ii, "EN") if ii["koodisto"]["koodistoUri"] == "elykeskus": row["elykoodi"] = jv(ii, "koodiArvo") row["elynimi"] = getnimi(ii, "FI") row["elynimi_sv"] = getnimi(ii, "SV") row["elynimi_en"] = getnimi(ii, "EN") if ii["koodisto"]["koodistoUri"] == "kielisuhde": row["kielisuhdekoodi"] = jv(ii, "koodiArvo") row["kielisuhdenimi"] = getnimi(ii, "FI") row["kielisuhdenimi_sv"] = getnimi(ii, "SV") row["kielisuhdenimi_en"] = getnimi(ii, "EN") if ii["koodisto"]["koodistoUri"] == "seutukunta": row["seutukuntakoodi"] = jv(ii, "koodiArvo") row["seutukuntanimi"] = getnimi(ii, "FI") row["seutukuntanimi_sv"] = getnimi(ii, "SV") row["seutukuntanimi_en"] = getnimi(ii, "EN") if ii["koodisto"]["koodistoUri"] == "laani": row["laanikoodi"] = jv(ii, "koodiArvo") row["laaninimi"] = getnimi(ii, "FI") row["laaninimi_sv"] = getnimi(ii, "SV") row["laaninimi_en"] = getnimi(ii, "EN") if ii["koodisto"]["koodistoUri"] == "kuntaryhma": row["kuntaryhmakoodi"] = jv(ii, "koodiArvo") row["kuntaryhmanimi"] = getnimi(ii, "FI") row["kuntaryhmanimi_sv"] = getnimi(ii, "SV") row["kuntaryhmanimi_en"] = getnimi(ii, "EN") if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " %d -- %s" % (cnt, row["koodi"]) dboperator.insert(hostname + url, schema, table, row, debug) dboperator.close(debug) if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " ready"
def load(secure, hostname, url, schema, table, verbose=False): if verbose: show("begin") # make "columnlist" (type has no meaning as we're not creating table) row = makerow() # setup dboperator so other calls work dboperator.columns(row) if verbose: show("empty %s.%s" % (schema, table)) dboperator.empty(schema, table) # fetching could be as simple and fast as: """ geturi = "v2/hae?aktiiviset=true&suunnitellut=true&lakkautetut=true&organisaatiotyyppi=" tyyppis = [ "Koulutustoimija", "Oppilaitos", "Toimipiste" ] cnt = 0 for tyyppi in tyyppis: show("load from "+hostname+url+geturi+tyyppi) if secure: address = "https://"+hostname+url+geturi+tyyppi else: address = "http://"+hostname+url+geturi+tyyppi """ # ... but results don't contain address information :( if secure: address = "https://" + hostname + url else: address = "http://" + hostname + url #""" load from web show("load from " + address) try: # first create a "hash map" of liitokset liitosresponse = requests.get( address + "v2/liitokset", headers={ 'Caller-Id': '1.2.246.562.10.2013112012294919827487.vipunen' }) # actual data response = requests.get( address, headers={ 'Caller-Id': '1.2.246.562.10.2013112012294919827487.vipunen' }) except Exception as e: show('HTTP GET failed.') show('Reason: %s' % (str(e))) sys.exit(2) else: # everything is fine show("api call OK") #""" # liitokset liitosmap = dict() for l in liitosresponse.json(): liitosmap[l["organisaatio"]["oid"]] = l["kohde"]["oid"] liitosresponse = None oids = response.json() cnt = 0 for o in oids: cnt += 1 # show some sign of being alive if cnt % 100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt % 1000 == 0: show("-- %d" % (cnt)) if verbose: show("%d -- %s" % (cnt, row)) # make another requets to actual organization data try: r = requests.get( address + o, headers={ 'Caller-Id': '1.2.246.562.10.2013112012294919827487.vipunen' }) i = r.json() # make "row" (clear values) row = makerow() row["oid"] = o row["parentoid"] = jv(i, "parentOid") # liitokset row["liitosoid"] = liitosmap[o] if o in liitosmap else None # yritysmuoto 3.11.2021 vha testing if 'yritysmuoto' in i: row["yritysmuoto"] = jv(i, "yritysmuoto") # TODO does the order here matter? if multiple tyyppi's, what to do? if "tyypit" in i and "Koulutustoimija" in i["tyypit"]: row["tyyppi"] = "Koulutustoimija" row["koodi"] = jv(i, "ytunnus") if not row["koodi"]: row["koodi"] = jv( i, "virastoTunnus" ) # alternatively try virastotunnus if ytunnus is missing if not row["koodi"]: row["tyyppi"] = None # cancel this organization from loading elif "tyypit" in i and "Oppilaitos" in i["tyypit"]: row["tyyppi"] = "Oppilaitos" row["koodi"] = jv(i, "oppilaitosKoodi") if "oppilaitosTyyppiUri" in i and i["oppilaitosTyyppiUri"]: row["oppilaitostyyppi"] = i["oppilaitosTyyppiUri"].replace( "oppilaitostyyppi_", "").replace("#1", "") # => just code, text values separately elif "tyypit" in i and "Toimipiste" in i["tyypit"]: row["tyyppi"] = "Toimipiste" row["koodi"] = jv(i, "toimipistekoodi") elif "tyypit" in i and "Oppisopimustoimipiste" in i["tyypit"]: row["tyyppi"] = "Oppisopimustoimipiste" row["koodi"] = jv(i, "toimipistekoodi") elif "tyypit" in i and "Varhaiskasvatuksen toimipaikka" in i[ "tyypit"]: row["tyyppi"] = "Varhaiskasvatuksen toimipaikka" row["koodi"] = jv(i, "toimipistekoodi") elif "tyypit" in i and "Varhaiskasvatuksen jarjestaja" in i[ "tyypit"]: row["tyyppi"] = "Varhaiskasvatuksen järjestaja" row["koodi"] = jv(i, "toimipistekoodi") # was current organization of type of interest if row["tyyppi"]: if "nimi" in i and i["nimi"]: row["nimi"] = jv(jv(i, "nimi"), "fi") row["nimi_sv"] = jv(jv(i, "nimi"), "sv") row["nimi_en"] = jv(jv(i, "nimi"), "en") row["alkupvm"] = jv(i, "alkuPvm") row["loppupvm"] = jv(i, "lakkautusPvm") if "kotipaikkaUri" in i and i["kotipaikkaUri"]: row["kotikunta"] = jv(i, "kotipaikkaUri").replace( "kunta_", "") # => just code, text values separately if "kieletUris" in i and i["kieletUris"]: # todo what if many? row["oppilaitoksenopetuskieli"] = i["kieletUris"][ 0].replace("oppilaitoksenopetuskieli_", "").replace("#1", "") # => just code, text values separately # address, first kayntiosoite and if not exists then postiosoite josoite = None if "kayntiosoite" in i: josoite = jv(i, "kayntiosoite") row["osoitetyyppi"] = "kayntiosoite" elif "postiosoite" in i: josoite = jv(i, "postiosoite") row["osoitetyyppi"] = "postiosoite" if josoite: row["osoite"] = jv(josoite, "osoite") row["postinumero"] = josoite["postinumeroUri"].replace( "posti_", "") if "postinumeroUri" in josoite and josoite[ "postinumeroUri"] else None row["postitoimipaikka"] = jv(josoite, "postitoimipaikka") if (row["osoite"] is not None and row["osoite"] is not "" and row["postinumero"] is not None and row["postinumero"] is not "" and int(row["postinumero"]) is not 0 and row["postitoimipaikka"] is not None and row["postitoimipaikka"] is not ""): get_and_set_coordinates(row) if verbose: show(" %5d -- %s %s (%s)" % (cnt, row["tyyppi"], row["koodi"], row["nimi"])) dboperator.insert(hostname + url, schema, table, row) except ValueError as ve: print("ValueError: " + str(ve)) print("vika: " + str(address) + " oid:" + str(o)) dboperator.close() if verbose: show("ready")
def load(hostname, url, schema, table, verbose=False, debug=False): if verbose: show("begin") row = makerow() dboperator.columns(row, debug) if verbose: show("empty %s.%s" % (schema, table)) dboperator.empty(schema, table, debug) httpconn = httplib.HTTPSConnection(hostname) show("load securely from " + hostname + url) # get list of oids httpconn.request('GET', url) rr = httpconn.getresponse() jj = json.loads(rr.read()) cnt = 0 for iii in jj["result"]["tulokset"]: #NB! iii["oid"] on organisaation oid! talteen? for ii in iii["tulokset"]: cnt += 1 # show some sign of being alive if cnt % 100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt % 1000 == 0: show("-- %d" % (cnt)) # get one koulutus. skip koodisto meta etc url = "/tarjonta-service/rest/v1/koulutus/%s?meta=false&img=false" % ( ii["oid"]) try: httpconn.request('GET', url) r = httpconn.getresponse() j = json.loads(r.read()) except ValueError, e: show("-- %d -- could not load %s" % (cnt, ii["oid"])) else: i = j["result"] row = makerow() for col in row: if col == "nimi": row["nimi"] = getnimi(i, "fi") row["nimi_sv"] = getnimi(i, "sv") row["nimi_en"] = getnimi(i, "en") elif "_uri" in col: (colkey, coluri) = col.split("_") if colkey in i: if coluri in i[colkey]: row[col] = i[colkey][coluri] elif col == "koulutuskoodi_arvo": (colkey, colarvo) = col.split("_") if colkey in i: if colarvo in i[colkey]: row[col] = i[colkey][colarvo] elif col == "koulutuskoodi_nimi": (colkey, colnimi) = col.split("_") if colkey in i: if colnimi in i[colkey]: row[col] = i[colkey][colnimi] else: row[col] = None if col not in i else i[col] if type(row[col]) is list: row[col] = ''.join(map(str, json.dumps(row[col]))) # add organization oid stored from search results above row["organisaatio_oid"] = iii["oid"] if verbose: show("%d -- %s" % (cnt, row["oid"])) if debug: print row dboperator.insert(hostname + url, schema, table, row, debug)
def load(hostname, url, schema, table, verbose=False, debug=False): if verbose: show("begin") row = makerow() dboperator.columns(row, debug) if verbose: show("empty %s.%s" % (schema, table)) dboperator.empty(schema, table, debug) httpconn = httplib.HTTPSConnection(hostname) show("load securely from " + hostname + url) # get list of oids reqheaders = {'Caller-Id': '1.2.246.562.10.2013112012294919827487.vipunen'} httpconn.request('GET', url, headers=reqheaders) #httpconn.request('GET', url) rr = httpconn.getresponse() jj = json.loads(rr.read()) cnt = 0 rows = [] for ii in jj["result"]: cnt += 1 # show some sign of being alive if cnt % 100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt % 1000 == 0: show("-- %d" % (cnt)) url = "/tarjonta-service/rest/v1/hakukohde/%s?populateAdditionalKomotoFields=true" % ( ii["oid"]) try: httpconn.request('GET', url, headers=reqheaders) r = httpconn.getresponse() j = json.loads(r.read()) except ValueError as e: show("-- %d -- could not load %s" % (cnt, ii["oid"])) else: if j["status"] == "NOT FOUND": continue if j["status"] == "OK": i = j["result"] row = makerow() for col in row: row[col] = None if col not in i else i[col] if type(row[col]) is list: row[col] = ''.join(map(str, json.dumps(row[col]))) if verbose: show("%d -- %s" % (cnt, row["oid"])) if debug: print(row) rows.append(row) if cnt % 5000 == 0: dboperator.insertMany(hostname + url, schema, table, rows) #dboperator.insert(hostname+url,schema,table,row,debug) rows = [] #TÄHÄN VIELÄ INSERT dataset[] #< 5000 dboperator.insertMany(hostname + url, schema, table, rows) show("Total rows: %d" % (cnt)) if verbose: show("ready")
def load(secure, hostname, url, schema, table, codeset, verbose=False, debug=False): if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " begin" row = makerow() dboperator.columns(row, debug) if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " empty %s.%s" % (schema, table) dboperator.empty(schema, table, debug) url = url % codeset # replace placeholder if secure: httpconn = httplib.HTTPSConnection(hostname) print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " load securely from " + hostname + url else: httpconn = httplib.HTTPConnection(hostname) print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " load from " + hostname + url httpconn.request('GET', url) r = httpconn.getresponse() j = json.loads(r.read()) cnt = 0 for i in j: cnt += 1 row = makerow() row["koodi"] = jv(i, "koodiArvo") row["nimi"] = getnimi(i, "FI") row["nimi_sv"] = getnimi(i, "SV") row["nimi_en"] = getnimi(i, "FI") row["alkupvm"] = jv(i, "voimassaAlkuPvm") row["loppupvm"] = jv(i, "voimassaLoppuPvm") httpconn.request( 'GET', "/koodisto-service/rest/json/relaatio/sisaltyy-ylakoodit/%s" % i["koodiUri"]) rr = httpconn.getresponse() jj = json.loads(rr.read()) ss = "" for ii in jj: if ii["koodisto"]["koodistoUri"] == "paatieteenala": row["paatieteenalakoodi"] = jv(ii, "koodiArvo") row["paatieteenalanimi"] = getnimi(ii, "FI") row["paatieteenalanimi_sv"] = getnimi(ii, "SV") row["paatieteenalanimi_en"] = getnimi(ii, "EN") if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " %d -- %s" % (cnt, row["koodi"]) dboperator.insert(hostname + url, schema, table, row, debug) dboperator.close(debug) if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " ready"
show('Reason: %s' % (e.reason)) sys.exit(2) else: # everything is fine show("api call OK") # remove data conditionally, otherwise empty # merge operation could be considered here... if condition: show("remove from %s.%s with condition '%s'" % (schema, table, condition)) dboperator.execute("DELETE FROM %s.%s WHERE %s" % (schema, table, condition)) else: show("empty %s.%s" % (schema, table)) dboperator.empty(schema, table) show("insert data") cnt = 0 for row in ijson.items(response, 'item'): cnt += 1 # show some sign of being alive if cnt % 100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt % 1000 == 0: show("-- %d" % (cnt)) if verbose: show("%d -- %s" % (cnt, row)) # find out which columns to use on insert dboperator.resetcolumns(row)
def load(secure,hostname,url,schema,table,postdata,condition,verbose,rowcount): show("begin "+hostname+" "+url+" "+schema+" "+table+" "+(postdata or "")+" "+(condition or "")) if secure: address = "https://"+hostname+url else: address = "http://"+hostname+url show("load from "+address) reqheaders = {'Content-Type': 'application/json'} reqheaders['Caller-Id'] = '1.2.246.562.10.2013112012294919827487.vipunen' # api credentials from env vars if os.getenv("API_USERNAME"): show("using authentication") apiuser = os.getenv("API_USERNAME") apipass = os.getenv("API_PASSWORD") reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(apiuser+":"+apipass) # automatic POST with (post)data print("value used for , -r, --rowcount=", rowcount) request = Request(address, data=postdata, headers=reqheaders) print(request) try: response = urlopen(request) except http.client.IncompleteRead as e: show('IncompleteRead exception.') show('Received: %d'%(e.partial)) sys.exit(2) except HTTPError as e: show('The server couldn\'t fulfill the request.') show('Error code: %d'%(e.code)) sys.exit(2) except URLError as e: show('We failed to reach a server.') show('Reason: %s'%(e.reason)) sys.exit(2) else: # everything is fine show("api call OK") # remove data conditionally, otherwise empty # merge operation could be considered here... if condition: show("remove from %s.%s with condition '%s'"%(schema,table,condition)) dboperator.execute("DELETE FROM %s.%s WHERE %s"%(schema,table,condition)) else: show("empty %s.%s"%(schema,table)) dboperator.empty(schema,table) show("insert data") cnt=0 manycount = 0 rows = [] for row in ijson.items(response,'item'): cnt+=1 manycount+=1 # show some sign of being alive if cnt%100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt%1000 == 0: show("-- %d" % (cnt)) if verbose: show("%d -- %s"%(cnt,row)) # find out which columns to use on insert dboperator.resetcolumns(row) # flatten arrays/lists for col in row: if type(row[col]) is list: row[col] = ''.join(map(str,json.dumps(row[col]))) rows.append(row) if cnt == 1: dboperator.insert(address,schema,table,row) manycount = 0 rows = [] if cnt > 1: if manycount == rowcount: insert(address,schema,table,rows) manycount = 0 rows = [] if len(rows) <= manycount and len(rows) > 0: insert(address,schema,table,rows) rows = [] manycount = 0 show("wrote %d"%(cnt)) show("ready")