def load(secure,hostname,url,schema,table,codeset,verbose=False):
  if verbose: show("begin")

  row = makerow()
  dboperator.columns(row)

  if verbose: show("empty %s.%s"%(schema,table))
  dboperator.empty(schema,table)

  url = url % codeset # replace placeholder
  if secure:
    httpconn = httplib.HTTPSConnection(hostname)
    show("load securely from "+hostname+url)
  else:
    httpconn = httplib.HTTPConnection(hostname)
    show("load from "+hostname+url)

  httpconn.request('GET', url)
  r = httpconn.getresponse()
  j = json.loads(r.read())
  cnt = 0
  for i in j:
    cnt += 1
    row = makerow()

    row["koodi"] = jv(i,"koodiArvo")
    row["nimi"] = getnimi(i,"FI")
    row["nimi_sv"] = getnimi(i,"SV")
    row["nimi_en"] = getnimi(i,"EN")
    row["alkupvm"] = jv(i,"voimassaAlkuPvm")
    row["loppupvm"] = jv(i,"voimassaLoppuPvm")

    httpconn.request('GET', "/koodisto-service/rest/json/relaatio/sisaltyy-ylakoodit/%s" % i["koodiUri"])
    rr = httpconn.getresponse()
    jj = json.loads(rr.read())
    ss = ""
    for ii in jj:
      classification = "maanosat"
      level = ""
      if len(ii["koodiArvo"])==3:
        level = "3"
      elif len(ii["koodiArvo"])==2:
        level = "2"
      # else default
      if ii["koodisto"]["koodistoUri"] == classification:
        row[classification+level+"koodi"] = jv(ii,"koodiArvo")
        row[classification+level+"nimi"] = getnimi(ii,"FI")
        row[classification+level+"nimi_sv"] = getnimi(ii,"SV")
        row[classification+level+"nimi_en"] = getnimi(ii,"EN")

    if verbose: show("%d -- %s"%(cnt,row["koodi"]))
    dboperator.insert(hostname+url,schema,table,row)

  dboperator.close()

  if verbose: show("ready")
def load(secure,hostname,url,schema,table,codeset,verbose=False,debug=False):
  if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" begin"

  row = makerow()
  dboperator.columns(row,debug)

  if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" empty %s.%s"%(schema,table)
  dboperator.empty(schema,table,debug)

  url = url % codeset # replace placeholder
  if secure:
    httpconn = httplib.HTTPSConnection(hostname)
    print strftime("%Y-%m-%d %H:%M:%S", localtime())+" load securely from "+hostname+url
  else:
    httpconn = httplib.HTTPConnection(hostname)
    print strftime("%Y-%m-%d %H:%M:%S", localtime())+" load from "+hostname+url

  httpconn.request('GET', url)
  r = httpconn.getresponse()
  j = json.loads(r.read())
  cnt = 0
  for i in j:
    cnt += 1
    row = makerow()

    row["koodi"] = jv(i,"koodiArvo")
    row["nimi"] = getnimi(i,"FI")
    row["nimi_sv"] = getnimi(i,"SV")
    row["nimi_en"] = getnimi(i,"EN")
    row["alkupvm"] = jv(i,"voimassaAlkuPvm")
    row["loppupvm"] = jv(i,"voimassaLoppuPvm")

    httpconn.request('GET', "/koodisto-service/rest/json/relaatio/sisaltyy-ylakoodit/%s" % i["koodiUri"])
    rr = httpconn.getresponse()
    jj = json.loads(rr.read())
    ss = ""
    for ii in jj:
      if ii["koodisto"]["koodistoUri"] == "julkaisunpaaluokka":
        row["julkaisunpaaluokkakoodi"] = jv(ii,"koodiArvo")
        row["julkaisunpaaluokkanimi"] = getnimi(ii,"FI")
        row["julkaisunpaaluokkanimi_sv"] = getnimi(ii,"SV")
        row["julkaisunpaaluokkanimi_en"] = getnimi(ii,"EN")

    if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" %d -- %s"%(cnt,row["koodi"])
    dboperator.insert(hostname+url,schema,table,row,debug)

  dboperator.close(debug)

  if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" ready"
def load(hostname,url,schema,table,verbose=False,debug=False):
  if verbose: show("begin")

  row = makerow()
  dboperator.columns(row,debug)

  if verbose: show("empty %s.%s"%(schema,table))
  dboperator.empty(schema,table,debug)

  httpconn = httplib.HTTPSConnection(hostname)
  show("load securely from "+hostname+url)

  # get list of oids
  httpconn.request('GET', url)
  rr = httpconn.getresponse()
  jj = json.loads(rr.read())
  cnt = 0
  for ii in jj["result"]:
    cnt += 1
    # show some sign of being alive
    if cnt%100 == 0:
      sys.stdout.write('.')
      sys.stdout.flush()
    if cnt%1000 == 0:
      show("-- %d" % (cnt))

    url = "/tarjonta-service/rest/v1/hakukohde/%s?populateAdditionalKomotoFields=true"%(ii["oid"])
    try:
      httpconn.request('GET', url)
      r = httpconn.getresponse()
      j = json.loads(r.read())
    except ValueError, e:
      show("-- %d -- could not load %s"%(cnt,ii["oid"]))
    else:
      if j["status"] == "NOT FOUND":
          continue
      if j["status"] == "OK":
          i = j["result"]
          row = makerow()

          for col in row:
              row[col] = None if col not in i else i[col]
              if type(row[col]) is list:
                  row[col] = ''.join(map(str,json.dumps(row[col])))

      if verbose: show("%d -- %s"%(cnt,row["oid"]))
      if debug: print row
      dboperator.insert(hostname+url,schema,table,row,debug)
def load(hostname,url,schema,table,verbose=False,debug=False):
  if verbose: show("begin")

  row = makerow()
  dboperator.columns(row,debug)

  if verbose: show("empty %s.%s"%(schema,table))
  dboperator.empty(schema,table,debug)

  httpconn = httplib.HTTPSConnection(hostname)
  show("load securely from "+hostname+url)

  httpconn.request('GET', url)
  r = httpconn.getresponse()
  j = json.loads(r.read())
  cnt = 0
  for i in j["result"]:
    cnt += 1
    # show some sign of being alive
    if cnt%100 == 0:
      sys.stdout.write('.')
      sys.stdout.flush()
    if cnt%1000 == 0:
      show("-- %d" % (cnt))

    row = makerow()

    for col in row:
      if col == "nimi":
        row["nimi"] = getnimi(i,"fi")
        row["nimi_sv"] = getnimi(i,"sv")
        row["nimi_en"] = getnimi(i,"en")
      else:
        row[col] = None if col not in i else i[col]
      if type(row[col]) is list:
        row[col] = ''.join(map(str,json.dumps(row[col])))

    if verbose: show("%d -- %s"%(cnt,row["oid"]))
    if debug: print row
    dboperator.insert(hostname+url,schema,table,row,debug)

  if verbose: show("ready")
Exemplo n.º 5
0
  except urllib2.URLError, e:
    show('We failed to reach a server.')
    show('Reason: %s'%(e.reason))
    sys.exit(2)
  else:
    # everything is fine
    show("api call OK")

  # remove data conditionally, otherwise empty
  # merge operation could be considered here...
  if condition:
    show("remove from %s.%s with condition '%s'"%(schema,table,condition))
    dboperator.execute("DELETE FROM %s.%s WHERE %s"%(schema,table,condition))
  else:
    show("empty %s.%s"%(schema,table))
    dboperator.empty(schema,table)

  show("insert data")
  cnt=0
  for row in ijson.items(response,'item'):
    cnt+=1
    # show some sign of being alive
    if cnt%100 == 0:
      sys.stdout.write('.')
      sys.stdout.flush()
    if cnt%1000 == 0:
      show("-- %d" % (cnt))
    if verbose: show("%d -- %s"%(cnt,row))

    # find out which columns to use on insert
    dboperator.resetcolumns(row)
def load(secure,hostname,url,schema,table,codeset,verbose=False,debug=False):
  if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" begin"

  row = makerow()
  dboperator.columns(row,debug)

  if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" empty %s.%s"%(schema,table)
  dboperator.empty(schema,table,debug)

  url = url % codeset # korvaa placeholder
  if secure:
    httpconn = httplib.HTTPSConnection(hostname)
    print strftime("%Y-%m-%d %H:%M:%S", localtime())+" load securely from "+hostname+url
  else:
    httpconn = httplib.HTTPConnection(hostname)
    print strftime("%Y-%m-%d %H:%M:%S", localtime())+" load from "+hostname+url

  httpconn.request('GET', url)
  r = httpconn.getresponse()
  j = json.loads(r.read())
  cnt = 0
  for i in j:
    cnt += 1
    row = makerow()

    row["koodi"] = i["koodiArvo"]
    row["nimi"] = getnimi(i,"FI")
    row["nimi_sv"] = getnimi(i,"SV")
    row["nimi_en"] = getnimi(i,"EN")
    row["alkupvm"] = i["voimassaAlkuPvm"]
    row["loppupvm"] = i["voimassaLoppuPvm"]

    httpconn.request('GET', "/koodisto-service/rest/json/relaatio/sisaltyy-alakoodit/%s" % i["koodiUri"])
    rr = httpconn.getresponse()
    jj = json.loads(rr.read())
    ss = ""
    for ii in jj:
      if ii["koodisto"]["koodistoUri"] == "koulutusasteoph2002":
        row["koulutusaste2002koodi"] = ii["koodiArvo"]
        row["koulutusaste2002nimi"] = getnimi(ii,"FI")
        row["koulutusaste2002nimi_sv"] = getnimi(ii,"SV")
        row["koulutusaste2002nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "koulutusalaoph2002":
        row["koulutusala2002koodi"] = ii["koodiArvo"]
        row["koulutusala2002nimi"] = getnimi(ii,"FI")
        row["koulutusala2002nimi_sv"] = getnimi(ii,"SV")
        row["koulutusala2002nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "opintoalaoph2002":
        row["opintoala2002koodi"] = ii["koodiArvo"]
        row["opintoala2002nimi"] = getnimi(ii,"FI")
        row["opintoala2002nimi_sv"] = getnimi(ii,"SV")
        row["opintoala2002nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "koulutusasteoph1995":
        row["koulutusaste1995koodi"] = ii["koodiArvo"]
        row["koulutusaste1995nimi"] = getnimi(ii,"FI")
        row["koulutusaste1995nimi_sv"] = getnimi(ii,"SV")
        row["koulutusaste1995nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "koulutusalaoph1995":
        row["koulutusala1995koodi"] = ii["koodiArvo"]
        row["koulutusala1995nimi"] = getnimi(ii,"FI")
        row["koulutusala1995nimi_sv"] = getnimi(ii,"SV")
        row["koulutusala1995nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "opintoalaoph1995":
        row["opintoala1995koodi"] = ii["koodiArvo"]
        row["opintoala1995nimi"] = getnimi(ii,"FI")
        row["opintoala1995nimi_sv"] = getnimi(ii,"SV")
        row["opintoala1995nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "tutkinto":
        row["tutkintokoodi"] = ii["koodiArvo"]
        row["tutkintonimi"] = getnimi(ii,"FI")
        row["tutkintonimi_sv"] = getnimi(ii,"SV")
        row["tutkintonimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "tutkintotyyppi":
        row["tutkintotyyppikoodi"] = ii["koodiArvo"]
        row["tutkintotyyppinimi"] = getnimi(ii,"FI")
        row["tutkintotyyppinimi_sv"] = getnimi(ii,"SV")
        row["tutkintotyyppinimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "koulutustyyppi":
        row["koulutustyyppikoodi"] = ii["koodiArvo"]
        row["koulutustyyppinimi"] = getnimi(ii,"FI")
        row["koulutustyyppinimi_sv"] = getnimi(ii,"SV")
        row["koulutustyyppinimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "isced2011koulutusaste":
        row["isced2011koulutusastekoodi"] = ii["koodiArvo"]
        row["isced2011koulutusastenimi"] = getnimi(ii,"FI")
        row["isced2011koulutusastenimi_sv"] = getnimi(ii,"SV")
        row["isced2011koulutusastenimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "isced2011koulutusastetaso1":
        row["isced2011koulutusastetaso1koodi"] = ii["koodiArvo"]
        row["isced2011koulutusastetaso1nimi"] = getnimi(ii,"FI")
        row["isced2011koulutusastetaso1nimi_sv"] = getnimi(ii,"SV")
        row["isced2011koulutusastetaso1nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "isced2011koulutusastetaso2":
        row["isced2011koulutusastetaso2koodi"] = ii["koodiArvo"]
        row["isced2011koulutusastetaso2nimi"] = getnimi(ii,"FI")
        row["isced2011koulutusastetaso2nimi_sv"] = getnimi(ii,"SV")
        row["isced2011koulutusastetaso2nimi_en"] = getnimi(ii,"EN")	
      if ii["koodisto"]["koodistoUri"] == "kansallinenkoulutusluokitus2016koulutusastetaso1":
        row["koulutusluokitus2016koulutusastetaso1koodi"] = ii["koodiArvo"]
        row["koulutusluokitus2016koulutusastetaso1nimi"] = getnimi(ii,"FI")
        row["koulutusluokitus2016koulutusastetaso1nimi_sv"] = getnimi(ii,"SV")
        row["koulutusluokitus2016koulutusastetaso1nimi_en"] = getnimi(ii,"EN")
      # huom! https://www.stat.fi/meta/luokitukset/koulutus/001-2016/kuvaus.html
      # kansallinenkoulutusluokitus2016koulutusastetaso2 -> isced2011koulutusastetaso2
      if ii["koodisto"]["koodistoUri"] == "kansallinenkoulutusluokitus2016koulutusastetaso2":
        row["koulutusluokitus2016koulutusastetaso2koodi"] = ii["koodiArvo"]
        row["koulutusluokitus2016koulutusastetaso2nimi"] = getnimi(ii,"FI")
        row["koulutusluokitus2016koulutusastetaso2nimi_sv"] = getnimi(ii,"SV")
        row["koulutusluokitus2016koulutusastetaso2nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "isced2011koulutusalataso1":
        row["isced2011koulutusalataso1koodi"] = ii["koodiArvo"]
        row["isced2011koulutusalataso1nimi"] = getnimi(ii,"FI")
        row["isced2011koulutusalataso1nimi_sv"] = getnimi(ii,"SV")
        row["isced2011koulutusalataso1nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "kansallinenkoulutusluokitus2016koulutusalataso1":
        row["koulutusluokitus2016koulutusalataso1koodi"] = ii["koodiArvo"]
        row["koulutusluokitus2016koulutusalataso1nimi"] = getnimi(ii,"FI")
        row["koulutusluokitus2016koulutusalataso1nimi_sv"] = getnimi(ii,"SV")
        row["koulutusluokitus2016koulutusalataso1nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "isced2011koulutusalataso2":
        row["isced2011koulutusalataso2koodi"] = ii["koodiArvo"]
        row["isced2011koulutusalataso2nimi"] = getnimi(ii,"FI")
        row["isced2011koulutusalataso2nimi_sv"] = getnimi(ii,"SV")
        row["isced2011koulutusalataso2nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "kansallinenkoulutusluokitus2016koulutusalataso2":
        row["koulutusluokitus2016koulutusalataso2koodi"] = ii["koodiArvo"]
        row["koulutusluokitus2016koulutusalataso2nimi"] = getnimi(ii,"FI")
        row["koulutusluokitus2016koulutusalataso2nimi_sv"] = getnimi(ii,"SV")
        row["koulutusluokitus2016koulutusalataso2nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "isced2011koulutusalataso3":
        row["isced2011koulutusalataso3koodi"] = ii["koodiArvo"]
        row["isced2011koulutusalataso3nimi"] = getnimi(ii,"FI")
        row["isced2011koulutusalataso3nimi_sv"] = getnimi(ii,"SV")
        row["isced2011koulutusalataso3nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "kansallinenkoulutusluokitus2016koulutusalataso3":
        row["koulutusluokitus2016koulutusalataso3koodi"] = ii["koodiArvo"]
        row["koulutusluokitus2016koulutusalataso3nimi"] = getnimi(ii,"FI")
        row["koulutusluokitus2016koulutusalataso3nimi_sv"] = getnimi(ii,"SV")
        row["koulutusluokitus2016koulutusalataso3nimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "okmohjauksenala":
        row["okmohjauksenalakoodi"] = ii["koodiArvo"]
        row["okmohjauksenalanimi"] = getnimi(ii,"FI")
        row["okmohjauksenalanimi_sv"] = getnimi(ii,"SV")
        row["okmohjauksenalanimi_en"] = getnimi(ii,"EN")

    if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" %d -- %s"%(cnt,row["koodi"])
    dboperator.insert(hostname+url,schema,table,row,debug)

  dboperator.close(debug)

  if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" ready"
def load(secure,hostname,url,schema,table,codeset,verbose=False,debug=False):
  if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" begin"

  row = makerow()
  dboperator.columns(row,debug)

  if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" empty %s.%s"%(schema,table)
  dboperator.empty(schema,table,debug)

  url = url % codeset # replace placeholder
  if secure:
    httpconn = httplib.HTTPSConnection(hostname)
    print strftime("%Y-%m-%d %H:%M:%S", localtime())+" load securely from "+hostname+url
  else:
    httpconn = httplib.HTTPConnection(hostname)
    print strftime("%Y-%m-%d %H:%M:%S", localtime())+" load from "+hostname+url

  httpconn.request('GET', url)
  r = httpconn.getresponse()
  j = json.loads(r.read())
  cnt = 0
  for i in j:
    cnt += 1
    row = makerow()

    row["koodi"] = jv(i,"koodiArvo")
    row["nimi"] = getnimi(i,"FI")
    row["nimi_sv"] = getnimi(i,"SV")
    row["nimi_en"] = getnimi(i,"FI")
    row["alkupvm"] = jv(i,"voimassaAlkuPvm")
    row["loppupvm"] = jv(i,"voimassaLoppuPvm")

    # classifications (nb! avi is in different direction!)
    httpconn.request('GET', "/koodisto-service/rest/json/relaatio/sisaltyy-ylakoodit/%s" % i["koodiUri"])
    rr = httpconn.getresponse()
    jj = json.loads(rr.read())
    ss = ""
    for ii in jj:
      if ii["koodisto"]["koodistoUri"] == "aluehallintovirasto":
        row["avikoodi"] = jv(ii,"koodiArvo")
        row["avinimi"] = getnimi(ii,"FI")
        row["avinimi_sv"] = getnimi(ii,"SV")
        row["avinimi_en"] = getnimi(ii,"EN")
    # other classifications
    httpconn.request('GET', "/koodisto-service/rest/json/relaatio/sisaltyy-alakoodit/%s" % i["koodiUri"])
    rr = httpconn.getresponse()
    jj = json.loads(rr.read())
    ss = ""
    for ii in jj:
      if ii["koodisto"]["koodistoUri"] == "maakunta":
        row["maakuntakoodi"] = jv(ii,"koodiArvo")
        row["maakuntanimi"] = getnimi(ii,"FI")
        row["maakuntanimi_sv"] = getnimi(ii,"SV")
        row["maakuntanimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "elykeskus":
        row["elykoodi"] = jv(ii,"koodiArvo")
        row["elynimi"] = getnimi(ii,"FI")
        row["elynimi_sv"] = getnimi(ii,"SV")
        row["elynimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "kielisuhde":
        row["kielisuhdekoodi"] = jv(ii,"koodiArvo")
        row["kielisuhdenimi"] = getnimi(ii,"FI")
        row["kielisuhdenimi_sv"] = getnimi(ii,"SV")
        row["kielisuhdenimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "seutukunta":
        row["seutukuntakoodi"] = jv(ii,"koodiArvo")
        row["seutukuntanimi"] = getnimi(ii,"FI")
        row["seutukuntanimi_sv"] = getnimi(ii,"SV")
        row["seutukuntanimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "laani":
        row["laanikoodi"] = jv(ii,"koodiArvo")
        row["laaninimi"] = getnimi(ii,"FI")
        row["laaninimi_sv"] = getnimi(ii,"SV")
        row["laaninimi_en"] = getnimi(ii,"EN")
      if ii["koodisto"]["koodistoUri"] == "kuntaryhma":
        row["kuntaryhmakoodi"] = jv(ii,"koodiArvo")
        row["kuntaryhmanimi"] = getnimi(ii,"FI")
        row["kuntaryhmanimi_sv"] = getnimi(ii,"SV")
        row["kuntaryhmanimi_en"] = getnimi(ii,"EN")

    if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" %d -- %s"%(cnt,row["koodi"])
    dboperator.insert(hostname+url,schema,table,row,debug)

  dboperator.close(debug)

  if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime())+" ready"
def load(secure,hostname,url,schema,table,verbose=False):
  if verbose: show("begin")

  # make "columnlist" (type has no meaning as we're not creating table)
  row = makerow()
  # setup dboperator so other calls work
  dboperator.columns(row)

  if verbose: show("empty %s.%s"%(schema,table))
  dboperator.empty(schema,table)

  # fetching could be as simple and fast as:
  """
  geturi = "v2/hae?aktiiviset=true&suunnitellut=true&lakkautetut=true&organisaatiotyyppi="
  tyyppis = [
    "Koulutustoimija",
    "Oppilaitos",
    "Toimipiste"
  ]
  cnt = 0
  for tyyppi in tyyppis:
    show("load from "+hostname+url+geturi+tyyppi)

    if secure:
      address = "https://"+hostname+url+geturi+tyyppi
    else:
      address = "http://"+hostname+url+geturi+tyyppi
  """
  # ... but results don't contain address information :(

  if secure:
    address = "https://"+hostname+url
  else:
    address = "http://"+hostname+url

  #""" load from web
  show("load from "+address)

  try:
    # first create a "hash map" of liitokset
    liitosresponse = requests.get(address+"v2/liitokset")
    # actual data
    response = requests.get(address)
  except e:
    show('HTTP GET failed.')
    show('Reason: %s'%(e.reason))
    sys.exit(2)
  else:
    # everything is fine
    show("api call OK")
  #"""

  # liitokset
  liitosmap = dict()
  for l in liitosresponse.json():
    liitosmap[l["organisaatio"]["oid"]] = l["kohde"]["oid"]
  liitosresponse = None

  oids = response.json()
  cnt = 0
  for o in oids:
    cnt+=1
    # show some sign of being alive
    if cnt%100 == 0:
      sys.stdout.write('.')
      sys.stdout.flush()
    if cnt%1000 == 0:
      show("-- %d" % (cnt))
    if verbose: show("%d -- %s"%(cnt,row))

    # make another requets to actual organization data
    try:
        r = requests.get(address+o)
        i = r.json()

        # make "row" (clear values)
        row = makerow()

        row["oid"] = o
        row["parentoid"] = jv(i,"parentOid")
        # liitokset
        row["liitosoid"] = liitosmap[o] if o in liitosmap else None

        # TODO does the order here matter? if multiple tyyppi's, what to do?
        if "tyypit" in i and "Koulutustoimija" in i["tyypit"]:
          row["tyyppi"] = "Koulutustoimija"
          row["koodi"] = jv(i,"ytunnus")
          if not row["koodi"]:
            row["koodi"] = jv(i,"virastotunnus") # alternatively try virastotunnus if ytunnus is missing
          if not row["koodi"]:
            row["tyyppi"] = None # cancel this organization from loading
        elif "tyypit" in i and "Oppilaitos" in i["tyypit"]:
          row["tyyppi"] = "Oppilaitos"
          row["koodi"] = jv(i,"oppilaitosKoodi")
          if "oppilaitosTyyppiUri" in i and i["oppilaitosTyyppiUri"]:
            row["oppilaitostyyppi"] = i["oppilaitosTyyppiUri"].replace("oppilaitostyyppi_","").replace("#1","")
            # => just code, text values separately
        elif "tyypit" in i and "Toimipiste" in i["tyypit"]:
          row["tyyppi"] = "Toimipiste"
          row["koodi"] = jv(i,"toimipistekoodi")
        elif "tyypit" in i and "Oppisopimustoimipiste" in i["tyypit"]:
          row["tyyppi"] = "Oppisopimustoimipiste"
          row["koodi"] = jv(i,"toimipistekoodi")

        # was current organization of type of interest
        if row["tyyppi"]:

          if "nimi" in i and i["nimi"]:
            row["nimi"] = jv(jv(i,"nimi"),"fi")
            row["nimi_sv"] = jv(jv(i,"nimi"),"sv")
            row["nimi_en"] = jv(jv(i,"nimi"),"en")
          row["alkupvm"] = jv(i,"alkuPvm")
          row["loppupvm"] = jv(i,"lakkautusPvm")

          if "kotipaikkaUri" in i and i["kotipaikkaUri"]:
            row["kotikunta"] = jv(i,"kotipaikkaUri").replace("kunta_","")
            # => just code, text values separately

          if "kieletUris" in i and i["kieletUris"]:
            # todo what if many?
            row["oppilaitoksenopetuskieli"] = i["kieletUris"][0].replace("oppilaitoksenopetuskieli_","").replace("#1","")
            # => just code, text values separately

          # address, first kayntiosoite and if not exists then postiosoite
          josoite = None
          if "kayntiosoite" in i:
            josoite = jv(i,"kayntiosoite")
            row["osoitetyyppi"] = "kayntiosoite"
          elif "postiosoite" in i:
            josoite = jv(i,"postiosoite")
            row["osoitetyyppi"] = "postiosoite"
          if josoite:
            row["osoite"] = jv(josoite,"osoite")
            row["postinumero"] = josoite["postinumeroUri"].replace("posti_","") if "postinumeroUri" in josoite and josoite["postinumeroUri"] else None
            row["postitoimipaikka"] = jv(josoite,"postitoimipaikka")

            if (row["osoite"] is not None and row["osoite"] is not "" and row["postinumero"] is not None and row["postinumero"] is not ""
                and int(row["postinumero"]) is not 0 and row["postitoimipaikka"] is not None and row["postitoimipaikka"] is not ""):
              get_and_set_coordinates(row)

          if verbose: show(" %5d -- %s %s (%s)"%(cnt,row["tyyppi"],row["koodi"],row["nimi"]))
          dboperator.insert(hostname+url,schema,table,row)
    except ValueError, ve:
        print "Error: " + str(ve)
        print "vika: " + str(address) + " oid:" + str(o)
Exemplo n.º 9
0
def load(url, schema, table, condition):
    """
    Results from ARVO-API can come in multiple pages. If that's the case,
    we need to make multiple requests to the ARVO API, using the "next_url" parameter.
    """

    FIRST_LOOP = True  # This is used to make possible DELETE operation (due to condition) only once.

    while True:
        show("begin " + url + " " + schema + " " + table + " " + (condition or ""))
        show("load from " + url)

        reqheaders = {'Content-Type': 'application/json'}
        # api credentials from env vars
        if os.getenv("API_USERNAME"):
            show("using authentication")
            apiuser = os.getenv("API_USERNAME")
            apipass = os.getenv("API_PASSWORD")
            reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(apiuser + ":" + apipass)

        try:
            r = requests.get(url, headers=reqheaders)
        except requests.exceptions.RequestException as e:
            print e
            sys.exit(1)

        if r.status_code != 200:
            print "Error! HTTP status code: " + str(r.status_code)
            sys.exit(2)

        try:
            result = json.loads(r.content)
        except ValueError as e:
            print e
            sys.exit(3)

        if "pagination" not in result or "data" not in result:
            print "Error! Received JSON-data not valid."
            sys.exit(4)

        # everything is fine
        show("api call OK")

        """
          Received data e.g.

            {
              "data": [
                {
                  "taustakysymykset": true,
                  "koulutustoimija": "xxxx",
                  "vastausid": 1111,
                  "kyselykertaid": 123,
                  "kysely_alkupvm": "2016-11-29T22:00:00Z",
                  "suorituskieli": "fi",
                  "tutkinto_fi": "xxxx",
                  "valmistavan_koulutuksen_jarjestaja_en": "xxxx",
                  "kysymys_en": "xxxx",
                  "koulutustoimija_en": "xxxx",
                  "tutkintotunnus": "xxxx",
                  "numerovalinta": 2,
                  "valmistavan_koulutuksen_oppilaitos_sv": "xxxx",
                  "kysymys_sv": "xxxx",
                  "kysymysjarjestys": 0,
                  "monivalintavaihtoehto": "xxxx",
                  "kysymysid": 1234,
                  "valmistavan_koulutuksen_oppilaitos_fi": "xxxx",
                  "kysely_en": "xxxx",
                  "vastaustyyppi": "xxxx",
                  "kysymysryhma": "xxxx",
                  "tutkinto_en": null,
                  "kunta": null,
                  "kysymysryhmaid": 110,
                  "kysymysryhmajarjestys": 0,
                  "vaihtoehto": null,
                  "kysymys_fi": "Ik",
                  "vastaajaid": 123,
                  "kyselyid": 111,
                  "valmistavan_koulutuksen_jarjestaja_fi": "xxxx",
                  "kysymysryhma_en": "xxxx",
                  "kysely_sv": "xxxx",
                  "kysymysryhma_fi": "xxxx",
                  "valmistavan_koulutuksen_jarjestaja_sv": "xxxx",
                  "vastausaika": "2017-02-05T22:00:00Z",
                  "tunnus": "xxxx",
                  "valmistavan_koulutuksen_jarjestaja": "xxxx",
                  "koulutustoimija_fi": "xxxx",
                  "kysely_loppupvm": null,
                  "koulutusmuoto": null,
                  "kyselykerta": "xxxx",
                  "valmistavan_koulutuksen_oppilaitos_en": "xxxx",
                  "valtakunnallinen": true,
                  "tutkinto_sv": null,
                  "koulutustoimija_sv": "xxxx",
                  "valmistavan_koulutuksen_oppilaitos": "xxxx",
                  "kysely_fi": "xxxx",
                  "kysymysryhma_sv": "xxxx"
                }
              ],
              "pagination": {
                "next_url": "null"
              }
            }

        """

        address = url.split("?")[0]  # Save in DB only the part before ?-mark: https://arvo.csc.fi/api/vipunen?alkupvm=2018-01-01&loppupvm=2018-02-01

        # remove data conditionally, otherwise empty
        # merge operation could be considered here...
        if FIRST_LOOP:  # This is done only on the first go (no matter if Arvo returns one or multiple pages)
            if condition:
                show("remove from %s.%s with condition '%s'" % (schema, table, condition))
                dboperator.execute("DELETE FROM %s.%s WHERE %s" % (schema, table, condition))
            else:
                show("empty %s.%s" % (schema, table))
                dboperator.empty(schema, table)

        show("insert data")
        cnt = 0
        for row in result["data"]:
            cnt += 1
            # show some sign of being alive
            if cnt % 100 == 0:
                sys.stdout.write('.')
                sys.stdout.flush()
            if cnt % 1000 == 0:
                show("-- %d" % (cnt))

            # find out which columns to use on insert
            dboperator.resetcolumns(row)

            # flatten arrays/lists
            for col in row:
                if type(row[col]) is list:
                    row[col] = ''.join(map(str, json.dumps(row[col])))

            dboperator.insert(address, schema, table, row)

        show("wrote %d" % (cnt))
        show("ready")

        if result["pagination"]["next_url"] == "null" or result["pagination"]["next_url"] == None:
            break  # exit while-loop. We are done.
        else:
            url = result["pagination"]["next_url"]
            FIRST_LOOP = False  # Do not make the possible DELETE-operation anymore!
Exemplo n.º 10
0
def load(secure,
         hostname,
         url,
         schema,
         table,
         codeset,
         verbose=False,
         debug=False):
    if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " begin"

    row = makerow()
    dboperator.columns(row, debug)

    if verbose:
        print strftime("%Y-%m-%d %H:%M:%S",
                       localtime()) + " empty %s.%s" % (schema, table)
    dboperator.empty(schema, table, debug)

    url = url % codeset  # replace placeholder
    if secure:
        httpconn = httplib.HTTPSConnection(hostname)
        print strftime("%Y-%m-%d %H:%M:%S",
                       localtime()) + " load securely from " + hostname + url
    else:
        httpconn = httplib.HTTPConnection(hostname)
        print strftime("%Y-%m-%d %H:%M:%S",
                       localtime()) + " load from " + hostname + url

    httpconn.request('GET', url)
    r = httpconn.getresponse()
    j = json.loads(r.read())
    cnt = 0
    for i in j:
        cnt += 1
        row = makerow()

        row["koodi"] = jv(i, "koodiArvo")
        row["nimi"] = getnimi(i, "FI")
        row["nimi_sv"] = getnimi(i, "SV")
        row["nimi_en"] = getnimi(i, "FI")
        row["alkupvm"] = jv(i, "voimassaAlkuPvm")
        row["loppupvm"] = jv(i, "voimassaLoppuPvm")

        # classifications (nb! avi is in different direction!)
        httpconn.request(
            'GET',
            "/koodisto-service/rest/json/relaatio/sisaltyy-ylakoodit/%s" %
            i["koodiUri"])
        rr = httpconn.getresponse()
        jj = json.loads(rr.read())
        ss = ""
        for ii in jj:
            if ii["koodisto"]["koodistoUri"] == "aluehallintovirasto":
                row["avikoodi"] = jv(ii, "koodiArvo")
                row["avinimi"] = getnimi(ii, "FI")
                row["avinimi_sv"] = getnimi(ii, "SV")
                row["avinimi_en"] = getnimi(ii, "EN")
        # other classifications
        httpconn.request(
            'GET',
            "/koodisto-service/rest/json/relaatio/sisaltyy-alakoodit/%s" %
            i["koodiUri"])
        rr = httpconn.getresponse()
        jj = json.loads(rr.read())
        ss = ""
        for ii in jj:
            if ii["koodisto"]["koodistoUri"] == "maakunta":
                row["maakuntakoodi"] = jv(ii, "koodiArvo")
                row["maakuntanimi"] = getnimi(ii, "FI")
                row["maakuntanimi_sv"] = getnimi(ii, "SV")
                row["maakuntanimi_en"] = getnimi(ii, "EN")
            if ii["koodisto"]["koodistoUri"] == "elykeskus":
                row["elykoodi"] = jv(ii, "koodiArvo")
                row["elynimi"] = getnimi(ii, "FI")
                row["elynimi_sv"] = getnimi(ii, "SV")
                row["elynimi_en"] = getnimi(ii, "EN")
            if ii["koodisto"]["koodistoUri"] == "kielisuhde":
                row["kielisuhdekoodi"] = jv(ii, "koodiArvo")
                row["kielisuhdenimi"] = getnimi(ii, "FI")
                row["kielisuhdenimi_sv"] = getnimi(ii, "SV")
                row["kielisuhdenimi_en"] = getnimi(ii, "EN")
            if ii["koodisto"]["koodistoUri"] == "seutukunta":
                row["seutukuntakoodi"] = jv(ii, "koodiArvo")
                row["seutukuntanimi"] = getnimi(ii, "FI")
                row["seutukuntanimi_sv"] = getnimi(ii, "SV")
                row["seutukuntanimi_en"] = getnimi(ii, "EN")
            if ii["koodisto"]["koodistoUri"] == "laani":
                row["laanikoodi"] = jv(ii, "koodiArvo")
                row["laaninimi"] = getnimi(ii, "FI")
                row["laaninimi_sv"] = getnimi(ii, "SV")
                row["laaninimi_en"] = getnimi(ii, "EN")
            if ii["koodisto"]["koodistoUri"] == "kuntaryhma":
                row["kuntaryhmakoodi"] = jv(ii, "koodiArvo")
                row["kuntaryhmanimi"] = getnimi(ii, "FI")
                row["kuntaryhmanimi_sv"] = getnimi(ii, "SV")
                row["kuntaryhmanimi_en"] = getnimi(ii, "EN")

        if verbose:
            print strftime("%Y-%m-%d %H:%M:%S",
                           localtime()) + " %d -- %s" % (cnt, row["koodi"])
        dboperator.insert(hostname + url, schema, table, row, debug)

    dboperator.close(debug)

    if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " ready"
Exemplo n.º 11
0
def load(secure, hostname, url, schema, table, verbose=False):
    if verbose: show("begin")

    # make "columnlist" (type has no meaning as we're not creating table)
    row = makerow()
    # setup dboperator so other calls work
    dboperator.columns(row)

    if verbose: show("empty %s.%s" % (schema, table))
    dboperator.empty(schema, table)

    # fetching could be as simple and fast as:
    """
  geturi = "v2/hae?aktiiviset=true&suunnitellut=true&lakkautetut=true&organisaatiotyyppi="
  tyyppis = [
    "Koulutustoimija",
    "Oppilaitos",
    "Toimipiste"
  ]
  cnt = 0
  for tyyppi in tyyppis:
    show("load from "+hostname+url+geturi+tyyppi)

    if secure:
      address = "https://"+hostname+url+geturi+tyyppi
    else:
      address = "http://"+hostname+url+geturi+tyyppi
  """
    # ... but results don't contain address information :(

    if secure:
        address = "https://" + hostname + url
    else:
        address = "http://" + hostname + url

    #""" load from web
    show("load from " + address)

    try:
        # first create a "hash map" of liitokset
        liitosresponse = requests.get(
            address + "v2/liitokset",
            headers={
                'Caller-Id': '1.2.246.562.10.2013112012294919827487.vipunen'
            })
        # actual data
        response = requests.get(
            address,
            headers={
                'Caller-Id': '1.2.246.562.10.2013112012294919827487.vipunen'
            })
    except Exception as e:
        show('HTTP GET failed.')
        show('Reason: %s' % (str(e)))
        sys.exit(2)
    else:
        # everything is fine
        show("api call OK")
    #"""

    # liitokset
    liitosmap = dict()
    for l in liitosresponse.json():
        liitosmap[l["organisaatio"]["oid"]] = l["kohde"]["oid"]
    liitosresponse = None

    oids = response.json()
    cnt = 0
    for o in oids:
        cnt += 1
        # show some sign of being alive
        if cnt % 100 == 0:
            sys.stdout.write('.')
            sys.stdout.flush()
        if cnt % 1000 == 0:
            show("-- %d" % (cnt))
        if verbose: show("%d -- %s" % (cnt, row))

        # make another requets to actual organization data
        try:
            r = requests.get(
                address + o,
                headers={
                    'Caller-Id':
                    '1.2.246.562.10.2013112012294919827487.vipunen'
                })
            i = r.json()

            # make "row" (clear values)
            row = makerow()

            row["oid"] = o
            row["parentoid"] = jv(i, "parentOid")
            # liitokset
            row["liitosoid"] = liitosmap[o] if o in liitosmap else None
            # yritysmuoto 3.11.2021 vha testing
            if 'yritysmuoto' in i:
                row["yritysmuoto"] = jv(i, "yritysmuoto")
            # TODO does the order here matter? if multiple tyyppi's, what to do?
            if "tyypit" in i and "Koulutustoimija" in i["tyypit"]:
                row["tyyppi"] = "Koulutustoimija"
                row["koodi"] = jv(i, "ytunnus")
                if not row["koodi"]:
                    row["koodi"] = jv(
                        i, "virastoTunnus"
                    )  # alternatively try virastotunnus if ytunnus is missing
                if not row["koodi"]:
                    row["tyyppi"] = None  # cancel this organization from loading
            elif "tyypit" in i and "Oppilaitos" in i["tyypit"]:
                row["tyyppi"] = "Oppilaitos"
                row["koodi"] = jv(i, "oppilaitosKoodi")
                if "oppilaitosTyyppiUri" in i and i["oppilaitosTyyppiUri"]:
                    row["oppilaitostyyppi"] = i["oppilaitosTyyppiUri"].replace(
                        "oppilaitostyyppi_", "").replace("#1", "")
                    # => just code, text values separately
            elif "tyypit" in i and "Toimipiste" in i["tyypit"]:
                row["tyyppi"] = "Toimipiste"
                row["koodi"] = jv(i, "toimipistekoodi")
            elif "tyypit" in i and "Oppisopimustoimipiste" in i["tyypit"]:
                row["tyyppi"] = "Oppisopimustoimipiste"
                row["koodi"] = jv(i, "toimipistekoodi")
            elif "tyypit" in i and "Varhaiskasvatuksen toimipaikka" in i[
                    "tyypit"]:
                row["tyyppi"] = "Varhaiskasvatuksen toimipaikka"
                row["koodi"] = jv(i, "toimipistekoodi")
            elif "tyypit" in i and "Varhaiskasvatuksen jarjestaja" in i[
                    "tyypit"]:
                row["tyyppi"] = "Varhaiskasvatuksen järjestaja"
                row["koodi"] = jv(i, "toimipistekoodi")
            # was current organization of type of interest
            if row["tyyppi"]:

                if "nimi" in i and i["nimi"]:
                    row["nimi"] = jv(jv(i, "nimi"), "fi")
                    row["nimi_sv"] = jv(jv(i, "nimi"), "sv")
                    row["nimi_en"] = jv(jv(i, "nimi"), "en")
                row["alkupvm"] = jv(i, "alkuPvm")
                row["loppupvm"] = jv(i, "lakkautusPvm")

                if "kotipaikkaUri" in i and i["kotipaikkaUri"]:
                    row["kotikunta"] = jv(i, "kotipaikkaUri").replace(
                        "kunta_", "")
                    # => just code, text values separately

                if "kieletUris" in i and i["kieletUris"]:
                    # todo what if many?
                    row["oppilaitoksenopetuskieli"] = i["kieletUris"][
                        0].replace("oppilaitoksenopetuskieli_",
                                   "").replace("#1", "")
                    # => just code, text values separately

                # address, first kayntiosoite and if not exists then postiosoite
                josoite = None
                if "kayntiosoite" in i:
                    josoite = jv(i, "kayntiosoite")
                    row["osoitetyyppi"] = "kayntiosoite"
                elif "postiosoite" in i:
                    josoite = jv(i, "postiosoite")
                    row["osoitetyyppi"] = "postiosoite"
                if josoite:
                    row["osoite"] = jv(josoite, "osoite")
                    row["postinumero"] = josoite["postinumeroUri"].replace(
                        "posti_",
                        "") if "postinumeroUri" in josoite and josoite[
                            "postinumeroUri"] else None
                    row["postitoimipaikka"] = jv(josoite, "postitoimipaikka")

                    if (row["osoite"] is not None and row["osoite"] is not ""
                            and row["postinumero"] is not None
                            and row["postinumero"] is not ""
                            and int(row["postinumero"]) is not 0
                            and row["postitoimipaikka"] is not None
                            and row["postitoimipaikka"] is not ""):
                        get_and_set_coordinates(row)

                if verbose:
                    show(" %5d -- %s %s (%s)" %
                         (cnt, row["tyyppi"], row["koodi"], row["nimi"]))
                dboperator.insert(hostname + url, schema, table, row)
        except ValueError as ve:
            print("ValueError: " + str(ve))
            print("vika: " + str(address) + " oid:" + str(o))

    dboperator.close()

    if verbose: show("ready")
Exemplo n.º 12
0
def load(hostname, url, schema, table, verbose=False, debug=False):
    if verbose: show("begin")

    row = makerow()
    dboperator.columns(row, debug)

    if verbose: show("empty %s.%s" % (schema, table))
    dboperator.empty(schema, table, debug)

    httpconn = httplib.HTTPSConnection(hostname)
    show("load securely from " + hostname + url)

    # get list of oids
    httpconn.request('GET', url)
    rr = httpconn.getresponse()
    jj = json.loads(rr.read())
    cnt = 0
    for iii in jj["result"]["tulokset"]:
        #NB! iii["oid"] on organisaation oid! talteen?
        for ii in iii["tulokset"]:
            cnt += 1
            # show some sign of being alive
            if cnt % 100 == 0:
                sys.stdout.write('.')
                sys.stdout.flush()
            if cnt % 1000 == 0:
                show("-- %d" % (cnt))

            # get one koulutus. skip koodisto meta etc
            url = "/tarjonta-service/rest/v1/koulutus/%s?meta=false&img=false" % (
                ii["oid"])
            try:
                httpconn.request('GET', url)
                r = httpconn.getresponse()
                j = json.loads(r.read())
            except ValueError, e:
                show("-- %d -- could not load %s" % (cnt, ii["oid"]))
            else:
                i = j["result"]
                row = makerow()

                for col in row:
                    if col == "nimi":
                        row["nimi"] = getnimi(i, "fi")
                        row["nimi_sv"] = getnimi(i, "sv")
                        row["nimi_en"] = getnimi(i, "en")
                    elif "_uri" in col:
                        (colkey, coluri) = col.split("_")
                        if colkey in i:
                            if coluri in i[colkey]:
                                row[col] = i[colkey][coluri]
                    elif col == "koulutuskoodi_arvo":
                        (colkey, colarvo) = col.split("_")
                        if colkey in i:
                            if colarvo in i[colkey]:
                                row[col] = i[colkey][colarvo]
                    elif col == "koulutuskoodi_nimi":
                        (colkey, colnimi) = col.split("_")
                        if colkey in i:
                            if colnimi in i[colkey]:
                                row[col] = i[colkey][colnimi]
                    else:
                        row[col] = None if col not in i else i[col]
                        if type(row[col]) is list:
                            row[col] = ''.join(map(str, json.dumps(row[col])))

                # add organization oid stored from search results above
                row["organisaatio_oid"] = iii["oid"]

                if verbose: show("%d -- %s" % (cnt, row["oid"]))
                if debug: print row
                dboperator.insert(hostname + url, schema, table, row, debug)
Exemplo n.º 13
0
def load(hostname, url, schema, table, verbose=False, debug=False):
    if verbose: show("begin")

    row = makerow()
    dboperator.columns(row, debug)

    if verbose: show("empty %s.%s" % (schema, table))
    dboperator.empty(schema, table, debug)

    httpconn = httplib.HTTPSConnection(hostname)
    show("load securely from " + hostname + url)

    # get list of oids
    reqheaders = {'Caller-Id': '1.2.246.562.10.2013112012294919827487.vipunen'}
    httpconn.request('GET', url, headers=reqheaders)
    #httpconn.request('GET', url)
    rr = httpconn.getresponse()
    jj = json.loads(rr.read())
    cnt = 0
    rows = []
    for ii in jj["result"]:
        cnt += 1
        # show some sign of being alive
        if cnt % 100 == 0:
            sys.stdout.write('.')
            sys.stdout.flush()
        if cnt % 1000 == 0:
            show("-- %d" % (cnt))

        url = "/tarjonta-service/rest/v1/hakukohde/%s?populateAdditionalKomotoFields=true" % (
            ii["oid"])

        try:
            httpconn.request('GET', url, headers=reqheaders)
            r = httpconn.getresponse()
            j = json.loads(r.read())
        except ValueError as e:
            show("-- %d -- could not load %s" % (cnt, ii["oid"]))
        else:
            if j["status"] == "NOT FOUND":
                continue
            if j["status"] == "OK":
                i = j["result"]
                row = makerow()

                for col in row:
                    row[col] = None if col not in i else i[col]
                    if type(row[col]) is list:
                        row[col] = ''.join(map(str, json.dumps(row[col])))

            if verbose: show("%d -- %s" % (cnt, row["oid"]))
            if debug: print(row)
            rows.append(row)
            if cnt % 5000 == 0:
                dboperator.insertMany(hostname + url, schema, table, rows)
                #dboperator.insert(hostname+url,schema,table,row,debug)
                rows = []
        #TÄHÄN VIELÄ INSERT dataset[] #< 5000
    dboperator.insertMany(hostname + url, schema, table, rows)
    show("Total rows: %d" % (cnt))

    if verbose: show("ready")
Exemplo n.º 14
0
def load(secure,
         hostname,
         url,
         schema,
         table,
         codeset,
         verbose=False,
         debug=False):
    if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " begin"

    row = makerow()
    dboperator.columns(row, debug)

    if verbose:
        print strftime("%Y-%m-%d %H:%M:%S",
                       localtime()) + " empty %s.%s" % (schema, table)
    dboperator.empty(schema, table, debug)

    url = url % codeset  # replace placeholder
    if secure:
        httpconn = httplib.HTTPSConnection(hostname)
        print strftime("%Y-%m-%d %H:%M:%S",
                       localtime()) + " load securely from " + hostname + url
    else:
        httpconn = httplib.HTTPConnection(hostname)
        print strftime("%Y-%m-%d %H:%M:%S",
                       localtime()) + " load from " + hostname + url

    httpconn.request('GET', url)
    r = httpconn.getresponse()
    j = json.loads(r.read())
    cnt = 0
    for i in j:
        cnt += 1
        row = makerow()

        row["koodi"] = jv(i, "koodiArvo")
        row["nimi"] = getnimi(i, "FI")
        row["nimi_sv"] = getnimi(i, "SV")
        row["nimi_en"] = getnimi(i, "FI")
        row["alkupvm"] = jv(i, "voimassaAlkuPvm")
        row["loppupvm"] = jv(i, "voimassaLoppuPvm")

        httpconn.request(
            'GET',
            "/koodisto-service/rest/json/relaatio/sisaltyy-ylakoodit/%s" %
            i["koodiUri"])
        rr = httpconn.getresponse()
        jj = json.loads(rr.read())
        ss = ""
        for ii in jj:
            if ii["koodisto"]["koodistoUri"] == "paatieteenala":
                row["paatieteenalakoodi"] = jv(ii, "koodiArvo")
                row["paatieteenalanimi"] = getnimi(ii, "FI")
                row["paatieteenalanimi_sv"] = getnimi(ii, "SV")
                row["paatieteenalanimi_en"] = getnimi(ii, "EN")

        if verbose:
            print strftime("%Y-%m-%d %H:%M:%S",
                           localtime()) + " %d -- %s" % (cnt, row["koodi"])
        dboperator.insert(hostname + url, schema, table, row, debug)

    dboperator.close(debug)

    if verbose: print strftime("%Y-%m-%d %H:%M:%S", localtime()) + " ready"
Exemplo n.º 15
0
        show('Reason: %s' % (e.reason))
        sys.exit(2)
    else:
        # everything is fine
        show("api call OK")

    # remove data conditionally, otherwise empty
    # merge operation could be considered here...
    if condition:
        show("remove from %s.%s with condition '%s'" %
             (schema, table, condition))
        dboperator.execute("DELETE FROM %s.%s WHERE %s" %
                           (schema, table, condition))
    else:
        show("empty %s.%s" % (schema, table))
        dboperator.empty(schema, table)

    show("insert data")
    cnt = 0
    for row in ijson.items(response, 'item'):
        cnt += 1
        # show some sign of being alive
        if cnt % 100 == 0:
            sys.stdout.write('.')
            sys.stdout.flush()
        if cnt % 1000 == 0:
            show("-- %d" % (cnt))
        if verbose: show("%d -- %s" % (cnt, row))

        # find out which columns to use on insert
        dboperator.resetcolumns(row)
Exemplo n.º 16
0
def load(secure,hostname,url,schema,table,postdata,condition,verbose,rowcount):
  show("begin "+hostname+" "+url+" "+schema+" "+table+" "+(postdata or "")+" "+(condition or ""))
  if secure:
    address = "https://"+hostname+url
  else:
    address = "http://"+hostname+url
  show("load from "+address)

  reqheaders = {'Content-Type': 'application/json'}
  reqheaders['Caller-Id'] = '1.2.246.562.10.2013112012294919827487.vipunen'

  # api credentials from env vars
  if os.getenv("API_USERNAME"):
    show("using authentication")
    apiuser = os.getenv("API_USERNAME")
    apipass = os.getenv("API_PASSWORD")
    reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(apiuser+":"+apipass)

  # automatic POST with (post)data
  print("value used for , -r, --rowcount=", rowcount)
  request = Request(address, data=postdata, headers=reqheaders)
  print(request)
  try:
    response = urlopen(request)
  except http.client.IncompleteRead as e:
    show('IncompleteRead exception.')
    show('Received: %d'%(e.partial))
    sys.exit(2)
  except HTTPError as e:
    show('The server couldn\'t fulfill the request.')
    show('Error code: %d'%(e.code))
    sys.exit(2)
  except URLError as e:
    show('We failed to reach a server.')
    show('Reason: %s'%(e.reason))
    sys.exit(2)
  else:
    # everything is fine
    show("api call OK")

  # remove data conditionally, otherwise empty
  # merge operation could be considered here...
  if condition:
    show("remove from %s.%s with condition '%s'"%(schema,table,condition))
    dboperator.execute("DELETE FROM %s.%s WHERE %s"%(schema,table,condition))
  else:
    show("empty %s.%s"%(schema,table))
    dboperator.empty(schema,table)

  show("insert data")
  cnt=0
  manycount = 0
  rows = []

  for row in ijson.items(response,'item'):
        cnt+=1
        manycount+=1
        # show some sign of being alive
        if cnt%100 == 0:
          sys.stdout.write('.')
          sys.stdout.flush()
        if cnt%1000 == 0:
          show("-- %d" % (cnt))
        if verbose: show("%d -- %s"%(cnt,row))

        # find out which columns to use on insert
        dboperator.resetcolumns(row)

        # flatten arrays/lists
        for col in row:
            if type(row[col]) is list:
                row[col] = ''.join(map(str,json.dumps(row[col])))
        rows.append(row)
        if cnt == 1:
            dboperator.insert(address,schema,table,row)
            manycount = 0
            rows = []
        if cnt > 1:
            if manycount == rowcount:
                insert(address,schema,table,rows)
                manycount = 0
                rows = []
  if len(rows) <= manycount and len(rows) > 0:
      insert(address,schema,table,rows)
      rows = []
      manycount = 0

  show("wrote %d"%(cnt))
  show("ready")