def index_gcis(gcis_url, es_url, index, alias, dump_dir): """Index GCIS into PROV-ES ElasticSearch index.""" conn = get_es_conn(es_url, index, alias) refList = get_refList(dump_dir) art_path = "%s/article/"%(dump_dir) journal_path = "%s/journal/"%dump_dir person_path = "%s/person/"%dump_dir journalList = get_itemList(dump_dir, "journal") personList = get_itemList(dump_dir,"person") organizationList = get_itemList(dump_dir, "organization") modPersonList = [] for person in personList: #print str(person['last_name']) + ", " + str(person['first_name']) + " " + str(person['middle_name']) #personName = " ".join(person[i] for i in ('first_name', 'middle_name', 'last_name') if person.get(i, None) is not None) if person['last_name'] is not None: personName = "%s"%person['last_name'] if person['first_name'] is not None: personName = "%s, %s"%(personName, person['first_name']) if person['middle_name'] is not None: personName = "%s %s"%(personName, person['middle_name']) modPersonList.append(personName) #print personName for (root,dirs,files) in os.walk(art_path): for f in files: f = "%s%s"%(art_path, f) with open(f) as item: article = json.load(item) prov = get_doc_prov(article, gcis_url, refList, journalList, organizationList, personList, dump_dir) import_prov(conn, index, alias, prov)
def index_gcis(gcis_url, es_url, index, alias, dump_dir): """Index GCIS into PROV-ES ElasticSearch index.""" conn = get_es_conn(es_url, index, alias) refList = get_refList(dump_dir) file_path = "%s/model/"%(dump_dir) for (root,dirs,files) in os.walk(file_path): for f in files: f = "%s%s"%(file_path, f) with open(f) as item: jsonFile = json.load(item) prov = get_doc_prov(jsonFile, gcis_url, refList) import_prov(conn, index, alias, prov)
def index_gcis(gcis_url, es_url, index, alias, dump_dir): """Index GCIS into PROV-ES ElasticSearch index.""" conn = get_es_conn(es_url, index, alias) refList = get_itemList(dump_diri, "reference") art_path = "%s/article/"%(dump_dir) for (root,dirs,files) in os.walk(art_path): for f in files: f = "%s%s"%(art_path, f) print("f: %s" % f) with open(f) as item: article = json.load(item) prov = get_doc_prov(article, gcis_url, refList) print("prov: %s" % json.dumps(prov, indent=2)) import_prov(conn, index, alias, prov)
def index_gcis(gcis_url, es_url, index, alias, dump_dir): """Index GCIS into PROV-ES ElasticSearch index.""" conn = get_es_conn(es_url, index, alias) refList = get_refList(dump_dir) articleList = get_itemList(dump_dir, "article") personList = get_itemList(dump_dir, "person") organizationList = get_itemList(dump_dir, "organization") file_path = "%s/journal/"%(dump_dir) for (root,dirs,files) in os.walk(file_path): for f in files: f = "%s%s"%(file_path, f) with open(f) as item: jsonFile = json.load(item) prov = get_doc_prov(jsonFile, gcis_url, refList, articleList, personList, organizationList) import_prov(conn, index, alias, prov)
def index_gcis(gcis_url, es_url, index, alias): """Index GCIS into PROV-ES ElasticSearch index.""" conn = get_es_conn(es_url, index, alias) r = requests.get('%s/article.json' % gcis_url, params={ 'all': 1 }, verify=False) r.raise_for_status() docs = r.json() #print(json.dumps(images, indent=2)) #print(len(images)) for doc in docs: doc_id = doc['identifier'] doc_href = doc['href'] r2 = requests.get(doc_href, params={ 'all': 1 }, verify=False) r2.raise_for_status() doc_md = r2.json() #print(json.dumps(doc_md, indent=2)) prov = get_doc_prov(doc_md, gcis_url) #print(json.dumps(prov, indent=2)) import_prov(conn, index, alias, prov)
class ImportProvEs(Resource): """Import PROV-ES document.""" resp_model = api.model('ImportResponse', { 'success': fields.Boolean(required=True, description="if 'false', encountered exception; otherwise no errors occurred"), 'message': fields.String(required=True, description="message describing success or failure") }) @api.doc(params={ 'prov_es': 'PROV-ES JSON document string'}) @api.marshal_with(resp_model) def post(self): # get PROV-ES json prov_es = request.form.get('prov_es', request.args.get('prov_es', None)) if prov_es is None: return { 'success': False, 'message': "Missing prov_es parameter.", 'result': {} }, 400 # load JSON try: pej = json.loads(prov_es) except Exception, e: message = "Failed to parse PROV-ES json. Check that your PROV-ES JSON conforms to PROV-JSON." current_app.logger.debug(message) return { 'success': False, 'message': message, 'result': {} }, 500 # import prov es_url = current_app.config['ES_URL'] dt = datetime.utcnow() es_index = "%s-%04d.%02d.%02d" % (current_app.config['PROVES_ES_PREFIX'], dt.year, dt.month, dt.day) alias = current_app.config['PROVES_ES_ALIAS'] conn = get_es_conn(es_url, es_index, alias) try: import_prov(conn, es_index, alias, pej) except Exception, e: current_app.logger.debug("Got error: %s" % e) current_app.logger.debug("Traceback: %s" % traceback.format_exc()) message = "Failed to import PROV-ES json. Check that your PROV-ES JSON conforms to PROV-JSON." current_app.logger.debug(message) return { 'success': False, 'message': message, 'result': {} }, 500
def index_gcis(gcis_url, es_url, index, alias, dump_dir): """Index GCIS into PROV-ES ElasticSearch index.""" conn = get_es_conn(es_url, index, alias) refList = get_refList(dump_dir) personList = get_itemList(dump_dir, "person") organizationList = get_itemList(dump_dir, "organization") activityList = get_itemList(dump_dir, "activity") reportList = get_itemList(dump_dir, "report") webpageList = get_itemList(dump_dir, "webpage") dataset_path = "%s/dataset/"%(dump_dir) for (root,dirs,files) in os.walk(dataset_path): for f in files: f = "%s%s"%(dataset_path, f) with open(f) as item: dataset = json.load(item) prov = get_doc_prov(dataset, gcis_url, refList, personList, reportList, webpageList)# personList, organizationList, activityList) import_prov(conn, index, alias, prov)
def index_gcis(gcis_url, es_url, index, alias): """Index GCIS into PROV-ES ElasticSearch index.""" conn = get_es_conn(es_url, index, alias) r = requests.get('%s/image.json' % gcis_url, params={ 'all': 1 }) r.raise_for_status() imgs = r.json() #print(json.dumps(images, indent=2)) #print(len(images)) for img in imgs: img_id = img['identifier'] img_href = img['href'] r2 = requests.get(img_href, params={ 'all': 1 }) r2.raise_for_status() img_md = r2.json() #print(json.dumps(img_md, indent=2)) prov = get_image_prov(img_md, gcis_url) #print(json.dumps(prov, indent=2)) import_prov(conn, index, alias, prov)
def index_gcis(gcis_url, es_url, index, alias): """Index GCIS into PROV-ES ElasticSearch index.""" conn = get_es_conn(es_url, index, alias) r = requests.get('%s/image.json' % gcis_url, params={ 'all': 1 }, verify=False) r.raise_for_status() imgs = r.json() #print(json.dumps(images, indent=2)) #print(len(images)) for img in imgs: img_id = img['identifier'] #if img_id != 'f27374a2-d4ef-479c-8f96-9de23fedfc3e': continue img_href = img['href'] r2 = requests.get(img_href, params={ 'all': '1' }, verify=False) r2.raise_for_status() img_md = r2.json() #print(json.dumps(img_md, indent=2)) prov = get_image_prov(img_md, gcis_url) #print(json.dumps(prov, indent=2)) import_prov(conn, index, alias, prov)
def index_gcis(gcis_url, es_url, index, alias): """Index GCIS into PROV-ES ElasticSearch index.""" conn = get_es_conn(es_url, index, alias) r = requests.get('%s/article.json' % gcis_url, params={'all': 1}, verify=False) r.raise_for_status() docs = r.json() #print(json.dumps(images, indent=2)) #print(len(images)) for doc in docs: doc_id = doc['identifier'] doc_href = doc['href'] r2 = requests.get(doc_href, params={'all': 1}, verify=False) r2.raise_for_status() doc_md = r2.json() #print(json.dumps(doc_md, indent=2)) prov = get_doc_prov(doc_md, gcis_url) #print(json.dumps(prov, indent=2)) import_prov(conn, index, alias, prov)
def index_gcis(gcis_url, es_url, index, alias): """Index GCIS into PROV-ES ElasticSearch index.""" conn = get_es_conn(es_url, index, alias) r = requests.get('%s/image.json' % gcis_url, params={'all': 1}, verify=False) r.raise_for_status() imgs = r.json() #print(json.dumps(images, indent=2)) #print(len(images)) for img in imgs: img_id = img['identifier'] #if img_id != 'f27374a2-d4ef-479c-8f96-9de23fedfc3e': continue img_href = img['href'] r2 = requests.get(img_href, params={'all': '1'}, verify=False) r2.raise_for_status() img_md = r2.json() #print(json.dumps(img_md, indent=2)) prov = get_image_prov(img_md, gcis_url) #print(json.dumps(prov, indent=2)) import_prov(conn, index, alias, prov)
from fv_prov_es.lib.import_utils import get_es_conn, import_prov from prov_es.model import (get_uuid, ProvEsDocument, GCIS, PROV, PROV_TYPE, PROV_ROLE, PROV_LABEL, PROV_LOCATION, HYSDS) env = os.environ.get('PROVES_ENV', 'prod') app = create_app('fv_prov_es.settings.%sConfig' % env.capitalize(), env=env) es_url = app.config['ES_URL'] gcis_url = "http://data.globalchange.gov" dt = datetime.utcnow() #index = "%s-%04d.%02d.%02d" % (app.config['PROVES_ES_PREFIX'], # dt.year, dt.month, dt.day) index = "%s-gcis" % app.config['PROVES_ES_PREFIX'] alias = app.config['PROVES_ES_ALIAS'] conn = get_es_conn(es_url, index, alias) #get json file #prov = get_image_prov(img_md, gcis_url) #print(json.dumps(prov, indent=2)) with open(sys.argv[1]) as item: prov_es_json = json.load(item) import_prov(conn, index, alias, prov_es_json) #index_gcis(gcis_url, es_url, index, alias)