示例#1
0
 def put_mock_log(self, index_name):
     with open('../data/logs/njnert_logs/njnet_access_mix.log') as f:
         for no, line in enumerate(f):
             d = Document(message=line)
             d.save(using=self.client, index=index_name)
             if no % 1000 == 0:
                 print('put {} rows'.format(no))
示例#2
0
def pg2es():

    Doc = Document()

    for row in places.objects.all():
        geojson = serialize('geojson', [row], geometry_field='geom')

        Doc.save(index = "places2")

        print(geojson)    
def worker_abort(worker):
    current_app = worker.app.wsgi()

    es = Elasticsearch(
        current_app.config.get("ELASTIC").split(","),
        verify_certs=False,
        timeout=120,
    )
    doc = Document(url=worker.current_request.uri,
                   host=socket.gethostname(),
                   pid=os.getpid(),
                   timestamp=datetime.now(),
                   timeout=True,
                   error="Hit gunicorn timeout prior to request completion")
    doc.save(using=es, index=".datashader_tiles")
示例#4
0
def main():
    import time
    start = time.time()
    index_name = 'test_index'
    es_index = Index(index_name)
    if not es_index.exists():
        es_index.put_alias(using='default')
        # Number of data node
        es_index.settings(number_of_shards=1)
        es_index.save()
    else:
        es_index.put_alias(using='default', name="sss3")
    doc = Document(first_name='cheng',
                   last_name="unknowname",
                   hometown='China')
    doc.save(using='default', index=index_name)
    print(time.time() - start)
示例#5
0
    def save(self, es_connection: Elasticsearch, document: Document):
        """
        Saves the client_document in Elasticsearch. This should be override by the Document Dao

        :param es_connection:
        :param document:
        :return: doc_status, doc_meta
        """
        return document.save(using=es_connection)
示例#6
0
def insert_or_ignore(job: elasticsearch_dsl.Document, alias='default',
                     index=JOB_INDEX):
    """Inserts the training job into the elasticsearch index
    if no job with the same name and creation timestamp exists.
    """
    if index == JOB_INDEX and 'slack' not in job.raw_log:
        print('job is incomplete, returning')
        return

    matches = index.search() \
        .query('match', job_name=job.job_name) \
        .query('match', created_at=job.created_at) \
        .count()

    if matches == 0:
        job.save(using=alias)
    else:
        print('job {} created at {} exists'.format(
            job.job_name, job.created_at))
 def save(self, **kwargs):
     return Document.save(self, **kwargs)
示例#8
0
def merge_generated_parameters(params, idx, hash):
    """

    :param params:
    :param paramsfile:
    :param idx:
    :return:
    """

    layer_id = "%s_%s" % (hash, socket.gethostname())
    es = Elasticsearch(
        current_app.config.get("ELASTIC").split(","),
        verify_certs=False,
        timeout=120
    )

    #See if the hash exists
    try:
        doc = Document.get(id=layer_id, using=es, index=".datashader_layers")
    except NotFoundError:
        doc = None

    if not doc:
        #if not, create the hash in the db but only if it does not already exist
        try:
            doc = Document(_id=layer_id,
                            creating_host=socket.gethostname(),
                            creating_pid=os.getpid(),
                            creating_timestamp=datetime.now(),
                            generated_params=None,
                            params=params)
            doc.save(using=es, index=".datashader_layers", op_type="create", skip_empty=False)
            current_app.logger.debug("Created Hash document")
        except ConflictError:
            current_app.logger.debug("Hash document now exists, continuing")

        #re-fetch to get sequence number correct
        doc = Document.get(id=layer_id, using=es, index=".datashader_layers")

    #Check for generator timeouts:
    if doc.to_dict().get("generated_params", {}).get("generation_start_time") and \
                datetime.now() > datetime.strptime(doc.to_dict().get("generated_params", {}).get("generation_start_time"),"%Y-%m-%dT%H:%M:%S.%f")+timedelta(seconds=5*60):
        #Something caused the worker generating the params to time out so clear that entry
        try:
            doc.update(using=es, index=".datashader_layers", retry_on_conflict=0, refresh=True, \
                generated_params=None)
        except ConflictError:
            current_app.logger.debug("Abandoned resetting parameters due to conflict, other process has completed.")

    #Loop-check if the generated params are in missing/in-process/complete
    timeout_at = datetime.now()+timedelta(seconds=45)
    while doc.to_dict().get("generated_params", {}).get("complete", False) == False:
        if datetime.now() > timeout_at:
            current_app.logger.info("Hit timeout waiting for generated parameters to be placed into database")
            break
        #If missing, mark them as in generation
        if not doc.to_dict().get("generated_params", None):
            #Mark them as being generated but do so with concurrenty control
            #https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
            current_app.logger.info("Discovering generated parameters")
            generated_params = dict()
            generated_params["complete"] = False
            generated_params["generation_start_time"] = datetime.now()
            generated_params["generating_host"] = socket.gethostname()
            generated_params["generating_pid"] = os.getpid()
            try:
                doc.update(using=es, index=".datashader_layers", retry_on_conflict=0, refresh=True, \
                    generated_params=generated_params)
            except ConflictError:
                current_app.logger.debug("Abandoned generating parameters due to conflict, will wait for other process to complete.")
                break
            #Generate and save off parameters
            current_app.logger.warn("Discovering generated params")
            generated_params.update(generate_global_params(params, idx))
            generated_params["generation_complete_time"] = datetime.now()
            generated_params["complete"] = True
            #Store off generated params
            doc.update(using=es, index=".datashader_layers", retry_on_conflict=0, refresh=True, \
                    generated_params=generated_params)
            break
        else:
            time.sleep(1)
            doc = Document.get(id=layer_id, using=es, index=".datashader_layers")

    #We now have params so use them
    params["generated_params"] = doc.to_dict().get("generated_params")
    return params
示例#9
0
def get_tms(idx, x: int, y: int, z: int):
    tile_height_px = 256
    tile_width_px = 256

    # Validate request is from proxy if proxy mode is enabled
    tms_key = current_app.config.get("TMS_KEY")
    tms_proxy_key = request.headers.get("TMS_PROXY_KEY")
    if tms_key is not None:
        if tms_key != tms_proxy_key:
            current_app.logger.warning(
                "TMS must be accessed via reverse proxy: keys %s != %s",
                tms_key,
                tms_proxy_key,
            )
            return Response("TMS must be accessed via reverse proxy", status=403)

    # TMS tile coordinates
    x = int(x)
    y = int(y)
    z = int(z)

    es = Elasticsearch(
        current_app.config.get("ELASTIC").split(","),
        verify_certs=False,
        timeout=120,
    )

    # Get hash and parameters
    try:
        parameter_hash, params = extract_parameters(request)
    except Exception as e:
        current_app.logger.exception("Error while extracting parameters")
        params = {"user": request.headers.get("es-security-runas-user", None)}
        #Create an error entry in .datashader_tiles
        doc = Document(
            idx=idx,
            x=x,
            y=y,
            z=z,
            url=request.url,
            host=socket.gethostname(),
            pid=os.getpid(),
            timestamp=datetime.now(),
            params=params,
            error=repr(e)
        )
        doc.save(using=es, index=".datashader_tiles")
        #Generate and return an error tile
        return error_tile_response(e, tile_height_px, tile_width_px)

    cache_dir = Path(current_app.config["CACHE_DIRECTORY"])
    tile_name = f"{idx}/{parameter_hash}/{z}/{x}/{y}.png"
    tile_id = "%s_%s_%s_%s_%s" % (idx, parameter_hash, z, x, y)
    force = request.args.get("force")

    # Check if the cached image already exists
    c = get_cache(cache_dir, tile_name)
    if c is not None and force is None:
        current_app.logger.info("Hit cache (%s), returning", parameter_hash)
        # Return Cached Value
        img = c
        try:
            body = {"script" : {"source": "ctx._source.cache_hits++"}}
            es.update(".datashader_tiles", tile_id, body=body, retry_on_conflict=5)
        except NotFoundError:
            current_app.logger.warn("Unable to find cached tile entry in .datashader_tiles")
    else:
        # Generate a tile
        if force is not None:
            current_app.logger.info(
                "Forced cache flush, generating a new tile %s/%s/%s", z, x, y
            )
        else:
            current_app.logger.info(
                "No cache (%s), generating a new tile %s/%s/%s", parameter_hash, z, x, y
            )

        check_cache_dir(cache_dir, idx)

        headers = get_es_headers(request_headers=request.headers, user=params["user"])
        current_app.logger.debug("Loaded input headers %s", request.headers)
        current_app.logger.debug("Loaded elasticsearch headers %s", headers)

        # Get or generate extended parameters
        params = merge_generated_parameters(params, idx, parameter_hash)

        # Separate call for ellipse
        t1 = datetime.now()
        try:
            if params["render_mode"] in ["ellipses", "tracks"]:
                img, metrics = generate_nonaggregated_tile(idx, x, y, z, params)
            else:
                img, metrics = generate_tile(idx, x, y, z, params)
        except Exception as e:
            logging.exception("Exception Generating Tile for request %s", request)
            #Create an error entry in .datashader_tiles
            doc = Document(
                hash=parameter_hash,
                idx=idx,
                x=x,
                y=y,
                z=z,
                url=request.url,
                host=socket.gethostname(),
                pid=os.getpid(),
                timestamp=datetime.now(),
                params=params,
                error=repr(e)
            )
            doc.save(using=es, index=".datashader_tiles")
            # generate an error tile/don't cache cache it
            return error_tile_response(e, tile_height_px, tile_width_px)
        et = (datetime.now() - t1).total_seconds()
        # Make entry into .datashader_tiles
        doc = Document(
            _id=tile_id,
            hash=parameter_hash,
            idx=idx,
            x=x,
            y=y,
            z=z,
            url=request.url,
            host=socket.gethostname(),
            pid=os.getpid(),
            render_time=et,
            timestamp=datetime.now(),
            params=params,
            metrics=metrics,
            cache_hits=0,
        )
        doc.save(using=es, index=".datashader_tiles")

        # Store image as well
        set_cache(cache_dir, tile_name, img)

    resp = Response(img, status=200)
    resp.headers["Content-Type"] = "image/png"
    resp.headers["Access-Control-Allow-Origin"] = "*"
    resp.headers["Datashader-Parameter-Hash"] = parameter_hash
    resp.headers["Datashader-RunAs-User"] = params.get("user", "")
    resp.cache_control.max_age = 60
    return resp