Exemplo n.º 1
0
def index_gcis(gcis_url, es_url, index, alias, dump_dir):
    """Index GCIS into PROV-ES ElasticSearch index."""
    conn = get_es_conn(es_url, index, alias)
    refList = get_refList(dump_dir)
    art_path = "%s/article/"%(dump_dir)
    journal_path = "%s/journal/"%dump_dir
    person_path = "%s/person/"%dump_dir
    
    journalList = get_itemList(dump_dir, "journal") 
    personList = get_itemList(dump_dir,"person") 
    organizationList = get_itemList(dump_dir, "organization")

    modPersonList = []
    for person in personList:
        #print str(person['last_name']) + ", " + str(person['first_name']) + " " + str(person['middle_name'])
        #personName = " ".join(person[i] for i in ('first_name', 'middle_name', 'last_name') if person.get(i, None) is not None)
        if person['last_name'] is not None:
            personName = "%s"%person['last_name']
        if person['first_name'] is not None:
            personName = "%s, %s"%(personName, person['first_name'])
        if person['middle_name'] is not None:
            personName = "%s %s"%(personName, person['middle_name'])
        modPersonList.append(personName)
        #print personName

    for (root,dirs,files) in os.walk(art_path):
        for f in files:
            f = "%s%s"%(art_path, f)
            with open(f) as item:
                article = json.load(item)
                prov = get_doc_prov(article, gcis_url, refList, journalList, organizationList, personList, dump_dir)
                import_prov(conn, index, alias, prov)
Exemplo n.º 2
0
def index_gcis(gcis_url, es_url, index, alias, dump_dir):
    """Index GCIS into PROV-ES ElasticSearch index."""
    conn = get_es_conn(es_url, index, alias)
    refList = get_refList(dump_dir)
    file_path = "%s/model/"%(dump_dir)
    for (root,dirs,files) in os.walk(file_path):
        for f in files:
            f = "%s%s"%(file_path, f)
            with open(f) as item:
                jsonFile = json.load(item)
                prov = get_doc_prov(jsonFile, gcis_url, refList)
                import_prov(conn, index, alias, prov)
def index_gcis(gcis_url, es_url, index, alias, dump_dir):
    """Index GCIS into PROV-ES ElasticSearch index."""
    conn = get_es_conn(es_url, index, alias)
    refList = get_itemList(dump_diri, "reference")
    art_path = "%s/article/"%(dump_dir)
    for (root,dirs,files) in os.walk(art_path):
        for f in files:
            f = "%s%s"%(art_path, f)
            print("f: %s" % f)
            with open(f) as item:
                article = json.load(item)
                prov = get_doc_prov(article, gcis_url, refList)
                print("prov: %s" % json.dumps(prov, indent=2))
                import_prov(conn, index, alias, prov)
Exemplo n.º 4
0
def index_gcis(gcis_url, es_url, index, alias, dump_dir):
    """Index GCIS into PROV-ES ElasticSearch index."""
    conn = get_es_conn(es_url, index, alias)
    refList = get_refList(dump_dir)
    articleList = get_itemList(dump_dir, "article")
    personList = get_itemList(dump_dir, "person")
    organizationList = get_itemList(dump_dir, "organization")
    
    file_path = "%s/journal/"%(dump_dir)
    for (root,dirs,files) in os.walk(file_path):
        for f in files:
            f = "%s%s"%(file_path, f)
            with open(f) as item:
                jsonFile = json.load(item)
                prov = get_doc_prov(jsonFile, gcis_url, refList, articleList, personList, organizationList)
                import_prov(conn, index, alias, prov)
Exemplo n.º 5
0
def index_gcis(gcis_url, es_url, index, alias):
    """Index GCIS into PROV-ES ElasticSearch index."""

    conn = get_es_conn(es_url, index, alias)
    r = requests.get('%s/article.json' % gcis_url, params={ 'all': 1 }, verify=False)
    r.raise_for_status()
    docs = r.json()
    #print(json.dumps(images, indent=2))
    #print(len(images))
    for doc in docs:
        doc_id = doc['identifier']
        doc_href = doc['href']
        r2 = requests.get(doc_href, params={ 'all': 1 }, verify=False)
        r2.raise_for_status()
        doc_md = r2.json()
        #print(json.dumps(doc_md, indent=2))
        prov = get_doc_prov(doc_md, gcis_url)
        #print(json.dumps(prov, indent=2))
        import_prov(conn, index, alias, prov)
Exemplo n.º 6
0
class ImportProvEs(Resource):
    """Import PROV-ES document."""

    resp_model = api.model('ImportResponse', {
        'success': fields.Boolean(required=True, description="if 'false', encountered exception; otherwise no errors occurred"),
        'message': fields.String(required=True, description="message describing success or failure")
    })

    @api.doc(params={ 'prov_es': 'PROV-ES JSON document string'})
    @api.marshal_with(resp_model)
    def post(self):
        # get PROV-ES json
        prov_es = request.form.get('prov_es', request.args.get('prov_es', None))
        if prov_es is None:
            return { 'success': False,
                     'message': "Missing prov_es parameter.",
                     'result': {} }, 400

        # load JSON
        try: pej = json.loads(prov_es)
        except Exception, e:
            message = "Failed to parse PROV-ES json. Check that your PROV-ES JSON conforms to PROV-JSON."
            current_app.logger.debug(message)
            return { 'success': False,
                     'message': message,
                     'result': {} }, 500

        # import prov
        es_url = current_app.config['ES_URL']
        dt = datetime.utcnow()
        es_index = "%s-%04d.%02d.%02d" % (current_app.config['PROVES_ES_PREFIX'],
                                          dt.year, dt.month, dt.day)
        alias = current_app.config['PROVES_ES_ALIAS']
        conn = get_es_conn(es_url, es_index, alias)
        try: import_prov(conn, es_index, alias, pej)
        except Exception, e:
            current_app.logger.debug("Got error: %s" % e)
            current_app.logger.debug("Traceback: %s" % traceback.format_exc())
            message = "Failed to import PROV-ES json. Check that your PROV-ES JSON conforms to PROV-JSON."
            current_app.logger.debug(message)
            return { 'success': False,
                     'message': message,
                     'result': {} }, 500
Exemplo n.º 7
0
def index_gcis(gcis_url, es_url, index, alias, dump_dir):
    """Index GCIS into PROV-ES ElasticSearch index."""
    conn = get_es_conn(es_url, index, alias)
    refList = get_refList(dump_dir)
    personList = get_itemList(dump_dir, "person")
    organizationList = get_itemList(dump_dir, "organization")
    activityList = get_itemList(dump_dir, "activity")
    reportList = get_itemList(dump_dir, "report")
    webpageList = get_itemList(dump_dir, "webpage")
    

    dataset_path = "%s/dataset/"%(dump_dir)
    for (root,dirs,files) in os.walk(dataset_path):
        for f in files:
            f = "%s%s"%(dataset_path, f)
            with open(f) as item:
                dataset = json.load(item)
                prov = get_doc_prov(dataset, gcis_url, refList, personList, reportList, webpageList)# personList, organizationList, activityList)
                import_prov(conn, index, alias, prov)
Exemplo n.º 8
0
def index_gcis(gcis_url, es_url, index, alias):
    """Index GCIS into PROV-ES ElasticSearch index."""

    conn = get_es_conn(es_url, index, alias)
    r = requests.get('%s/image.json' % gcis_url, params={ 'all': 1 })
    r.raise_for_status()
    imgs = r.json()
    #print(json.dumps(images, indent=2))
    #print(len(images))
    for img in imgs:
        img_id = img['identifier']
        img_href = img['href']
        r2 = requests.get(img_href, params={ 'all': 1 })
        r2.raise_for_status()
        img_md = r2.json()
        #print(json.dumps(img_md, indent=2))
        prov = get_image_prov(img_md, gcis_url)
        #print(json.dumps(prov, indent=2))
        import_prov(conn, index, alias, prov)
Exemplo n.º 9
0
def index_gcis(gcis_url, es_url, index, alias):
    """Index GCIS into PROV-ES ElasticSearch index."""

    conn = get_es_conn(es_url, index, alias)
    r = requests.get('%s/image.json' % gcis_url, params={ 'all': 1 }, verify=False)
    r.raise_for_status()
    imgs = r.json()
    #print(json.dumps(images, indent=2))
    #print(len(images))
    for img in imgs:
        img_id = img['identifier']
        #if img_id != 'f27374a2-d4ef-479c-8f96-9de23fedfc3e': continue
        img_href = img['href']
        r2 = requests.get(img_href, params={ 'all': '1' }, verify=False)
        r2.raise_for_status()
        img_md = r2.json()
        #print(json.dumps(img_md, indent=2))
        prov = get_image_prov(img_md, gcis_url)
        #print(json.dumps(prov, indent=2))
        import_prov(conn, index, alias, prov)
Exemplo n.º 10
0
def index_gcis(gcis_url, es_url, index, alias):
    """Index GCIS into PROV-ES ElasticSearch index."""

    conn = get_es_conn(es_url, index, alias)
    r = requests.get('%s/article.json' % gcis_url,
                     params={'all': 1},
                     verify=False)
    r.raise_for_status()
    docs = r.json()
    #print(json.dumps(images, indent=2))
    #print(len(images))
    for doc in docs:
        doc_id = doc['identifier']
        doc_href = doc['href']
        r2 = requests.get(doc_href, params={'all': 1}, verify=False)
        r2.raise_for_status()
        doc_md = r2.json()
        #print(json.dumps(doc_md, indent=2))
        prov = get_doc_prov(doc_md, gcis_url)
        #print(json.dumps(prov, indent=2))
        import_prov(conn, index, alias, prov)
Exemplo n.º 11
0
def index_gcis(gcis_url, es_url, index, alias):
    """Index GCIS into PROV-ES ElasticSearch index."""

    conn = get_es_conn(es_url, index, alias)
    r = requests.get('%s/image.json' % gcis_url,
                     params={'all': 1},
                     verify=False)
    r.raise_for_status()
    imgs = r.json()
    #print(json.dumps(images, indent=2))
    #print(len(images))
    for img in imgs:
        img_id = img['identifier']
        #if img_id != 'f27374a2-d4ef-479c-8f96-9de23fedfc3e': continue
        img_href = img['href']
        r2 = requests.get(img_href, params={'all': '1'}, verify=False)
        r2.raise_for_status()
        img_md = r2.json()
        #print(json.dumps(img_md, indent=2))
        prov = get_image_prov(img_md, gcis_url)
        #print(json.dumps(prov, indent=2))
        import_prov(conn, index, alias, prov)
Exemplo n.º 12
0
from fv_prov_es.lib.import_utils import get_es_conn, import_prov

from prov_es.model import (get_uuid, ProvEsDocument, GCIS, PROV, PROV_TYPE,
                                   PROV_ROLE, PROV_LABEL, PROV_LOCATION, HYSDS)




env = os.environ.get('PROVES_ENV', 'prod')
app = create_app('fv_prov_es.settings.%sConfig' % env.capitalize(), env=env)
es_url = app.config['ES_URL']
gcis_url =  "http://data.globalchange.gov"
dt = datetime.utcnow()
                    #index = "%s-%04d.%02d.%02d" % (app.config['PROVES_ES_PREFIX'],
                        #                               dt.year, dt.month, dt.day)
index = "%s-gcis" % app.config['PROVES_ES_PREFIX']
alias = app.config['PROVES_ES_ALIAS']

conn = get_es_conn(es_url, index, alias)

#get json file
#prov = get_image_prov(img_md, gcis_url)
#print(json.dumps(prov, indent=2))
with open(sys.argv[1]) as item:
    prov_es_json = json.load(item)

import_prov(conn, index, alias, prov_es_json)


#index_gcis(gcis_url, es_url, index, alias)