Пример #1
0
def update_calculated_properties():
    es = get_es()

    #todo: use some sort of ES scrolling/paginating
    results = es.get(DOMAIN_INDEX + "/hqdomain/_search",
                     data={"size": 99999})['hits']['hits']
    all_stats = _all_domain_stats()
    for r in results:
        dom = r["_source"]["name"]
        calced_props = {
            "cp_n_web_users": int(all_stats["web_users"][dom]),
            "cp_n_active_cc_users": int(CALC_FNS["mobile_users"](dom)),
            "cp_n_cc_users": int(all_stats["commcare_users"][dom]),
            "cp_n_active_cases": int(CALC_FNS["cases_in_last"](dom, 120)),
            "cp_n_cases": int(all_stats["cases"][dom]),
            "cp_n_forms": int(all_stats["forms"][dom]),
            "cp_first_form": CALC_FNS["first_form_submission"](dom, False),
            "cp_last_form": CALC_FNS["last_form_submission"](dom, False),
            "cp_is_active": CALC_FNS["active"](dom),
            "cp_has_app": CALC_FNS["has_app"](dom),
        }
        if calced_props['cp_first_form'] == 'No forms':
            del calced_props['cp_first_form']
            del calced_props['cp_last_form']
        es.post("%s/hqdomain/%s/_update" % (DOMAIN_INDEX, r["_id"]),
                data={"doc": calced_props})
Пример #2
0
 def _domains_matching(key, value):
     es = get_es()
     throwaway_facet_name = 'facets'
     query = {
         "filter":{
             "term":{
                 key: value
             }
         },
         "facets":{
             throwaway_facet_name: {
                 "terms":{
                         "field":"domain.exact",
                         "size":1000
                     },
                     "facet_filter":{
                     "term":{
                         key: value
                     }
                 }
             }
         }
     }
     res = es['xforms'].post('_search', data=query)
     return [r['term'] for r in res['facets'][throwaway_facet_name]['terms']]
Пример #3
0
def _check_es_rev(index, doc_id, couch_rev):
    es = get_es()
    doc_id_query = {"filter": {"ids": {"values": [doc_id]}}, "fields": ["_id", "_rev"]}

    try:
        res = es[index].get("_search", data=doc_id_query)
        status = False
        message = "Not in sync"

        if res.has_key("hits"):
            if res["hits"].get("total", 0) == 0:
                status = False
                # if doc doesn't exist it's def. not in sync
                message = "Not in sync %s" % index
            elif res["hits"].has_key("hits"):
                fields = res["hits"]["hits"][0]["fields"]
                if fields["_rev"] == couch_rev:
                    status = True
                    message = "%s OK" % index
                else:
                    status = False
                    # less likely, but if it's there but the rev is off
                    message = "Not in sync - %s stale" % index
        else:
            status = False
            message = "Not in sync - query failed"
    except Exception, ex:
        message = "ES Error: %s" % ex
        status = False
Пример #4
0
def _get_latest_doc_from_index(es_index, sort_field):
    """
    Query elasticsearch index sort descending by the sort field
    and get the doc_id back so we can then do a rev-update check.

    This si because there's no direct view known ahead of time what's inside the report* index,
    so just get it directly from the index and do the modify check workflow.
    """
    recent_query = {
        "filter": {
            "match_all": {}
        },
        "sort": {sort_field: "desc"},
        "size": 1
    }
    es = get_es()

    try:
        res = es[es_index].get('_search', data=recent_query)
        if 'hits' in res:
            if 'hits' in res['hits']:
                result = res['hits']['hits'][0]
                return result['_source']['_id']

    except Exception, ex:
        logging.error("Error querying get_latest_doc_from_index[%s]: %s" % (es_index, ex))
        return None
Пример #5
0
def apps_update_calculated_properties():
    es = get_es()
    q = {"filter": {"and": [{"missing": {"field": "copy_of"}}]}}
    results = stream_es_query(q=q, es_url=ES_URLS["apps"], size=999999, chunksize=500)
    for r in results:
        calced_props = {"cp_is_active": is_app_active(r["_id"], r["_source"]["domain"])}
        es.post("%s/app/%s/_update" % (APP_INDEX, r["_id"]), data={"doc": calced_props})
Пример #6
0
def update_calculated_properties():
    es = get_es()

    q = {"filter": {"and": [
        {"term": {"doc_type": "Domain"}},
        {"term": {"is_snapshot": False}}
    ]}}
    results = stream_es_query(q=q, es_url=ES_URLS["domains"], size=999999, chunksize=500, fields=["name"])
    all_stats = _all_domain_stats()
    for r in results:
        dom = r["fields"]["name"]
        calced_props = {
            "cp_n_web_users": int(all_stats["web_users"][dom]),
            "cp_n_active_cc_users": int(CALC_FNS["mobile_users"](dom)),
            "cp_n_cc_users": int(all_stats["commcare_users"][dom]),
            "cp_n_active_cases": int(CALC_FNS["cases_in_last"](dom, 120)),
            "cp_n_users_submitted_form": total_distinct_users([dom]),
            "cp_n_inactive_cases": int(CALC_FNS["inactive_cases_in_last"](dom, 120)),
            "cp_n_60_day_cases": int(CALC_FNS["cases_in_last"](dom, 60)),
            "cp_n_cases": int(all_stats["cases"][dom]),
            "cp_n_forms": int(all_stats["forms"][dom]),
            "cp_first_form": CALC_FNS["first_form_submission"](dom, False),
            "cp_last_form": CALC_FNS["last_form_submission"](dom, False),
            "cp_is_active": CALC_FNS["active"](dom),
            "cp_has_app": CALC_FNS["has_app"](dom),
            "cp_last_updated": datetime.now().strftime(DATE_FORMAT),
        }
        if calced_props['cp_first_form'] == 'No forms':
            del calced_props['cp_first_form']
            del calced_props['cp_last_form']
        es.post("%s/hqdomain/%s/_update" % (DOMAIN_INDEX, r["_id"]), data={"doc": calced_props})
Пример #7
0
def apps_update_calculated_properties():
    es = get_es()
    q = {"filter": {"and": [{"missing": {"field": "copy_of"}}]}}
    results = stream_es_query(q=q, es_url=ES_URLS["apps"], size=999999, chunksize=500)
    for r in results:
        calced_props = {"cp_is_active": is_app_active(r["_id"], r["_source"]["domain"])}
        es.post("%s/app/%s/_update" % (APP_INDEX, r["_id"]), data={"doc": calced_props})
Пример #8
0
def _get_latest_doc_from_index(es_index, sort_field):
    """
    Query elasticsearch index sort descending by the sort field
    and get the doc_id back so we can then do a rev-update check.

    This si because there's no direct view known ahead of time what's inside the report* index,
    so just get it directly from the index and do the modify check workflow.
    """
    recent_query = {
        "filter": {
            "match_all": {}
        },
        "sort": {
            sort_field: "desc"
        },
        "size": 1
    }
    es = get_es()

    try:
        res = es[es_index].get('_search', data=recent_query)
        if 'hits' in res:
            if 'hits' in res['hits']:
                result = res['hits']['hits'][0]
                return result['_source']['_id']

    except Exception, ex:
        logging.error("Error querying get_latest_doc_from_index[%s]: %s" %
                      (es_index, ex))
        return None
Пример #9
0
def update_calculated_properties():
    es = get_es()

    #todo: use some sort of ES scrolling/paginating
    results = es.get(DOMAIN_INDEX + "/hqdomain/_search", data={"size": 99999})['hits']['hits']
    all_stats = _all_domain_stats()
    for r in results:
        dom = r["_source"]["name"]
        calced_props = {
            "cp_n_web_users": int(all_stats["web_users"][dom]),
            "cp_n_active_cc_users": int(CALC_FNS["mobile_users"](dom)),
            "cp_n_cc_users": int(all_stats["commcare_users"][dom]),
            "cp_n_active_cases": int(CALC_FNS["cases_in_last"](dom, 120)),
            "cp_n_inactive_cases": int(CALC_FNS["inactive_cases_in_last"](dom, 120)),
            "cp_n_60_day_cases": int(CALC_FNS["cases_in_last"](dom, 60)),
            "cp_n_cases": int(all_stats["cases"][dom]),
            "cp_n_forms": int(all_stats["forms"][dom]),
            "cp_first_form": CALC_FNS["first_form_submission"](dom, False),
            "cp_last_form": CALC_FNS["last_form_submission"](dom, False),
            "cp_is_active": CALC_FNS["active"](dom),
            "cp_has_app": CALC_FNS["has_app"](dom),
        }
        if calced_props['cp_first_form'] == 'No forms':
            del calced_props['cp_first_form']
            del calced_props['cp_last_form']
        es.post("%s/hqdomain/%s/_update" % (DOMAIN_INDEX, r["_id"]), data={"doc": calced_props})
Пример #10
0
 def handle(self, *args, **options):
     self.es = get_es()
     try:
         from_date = datetime.strptime(options['from_date'], "%Y-%m-%d")
     except Exception, ex:
         self.printerr("need a valid date string --from_date YYYY-mm-dd: %s" % ex)
         sys.exit()
Пример #11
0
def es_histogram(histo_type, domains=None, startdate=None, enddate=None, tz_diff=None):
    date_field = {"forms": "received_on", "cases": "opened_on"}[histo_type]
    es_url = {"forms": XFORM_INDEX + "/xform/_search", "cases": CASE_INDEX + "/case/_search"}[histo_type]

    q = {"query": {"match_all": {}}}

    if domains is not None:
        q["query"] = {"in": {"domain.exact": domains}}

    q.update(
        {
            "facets": {
                "histo": {
                    "date_histogram": {"field": date_field, "interval": "day"},
                    "facet_filter": {"and": [{"range": {date_field: {"from": startdate, "to": enddate}}}]},
                }
            },
            "size": 0,
        }
    )

    if tz_diff:
        q["facets"]["histo"]["date_histogram"]["time_zone"] = tz_diff

    if histo_type == "forms":
        q["facets"]["histo"]["facet_filter"]["and"].append(
            {"not": {"in": {"doc_type": ["xformduplicate", "xformdeleted"]}}}
        )

    es = get_es()
    ret_data = es.get(es_url, data=q)
    return ret_data["facets"]["histo"]["entries"]
Пример #12
0
 def _domains_matching(key, value):
     es = get_es()
     throwaway_facet_name = 'facets'
     query = {
         "filter": {
             "term": {
                 key: value
             }
         },
         "facets": {
             throwaway_facet_name: {
                 "terms": {
                     "field": "domain.exact",
                     "size": 1000
                 },
                 "facet_filter": {
                     "term": {
                         key: value
                     }
                 }
             }
         }
     }
     res = es['xforms'].post('_search', data=query)
     return [
         r['term'] for r in res['facets'][throwaway_facet_name]['terms']
     ]
Пример #13
0
def es_query(params, facets=None, terms=None, q=None):
    if terms is None:
        terms = []
    if q is None:
        q = {}

    q["size"] = 9999
    q["filter"] = q.get("filter", {})
    q["filter"]["and"] = q["filter"].get("and", [])
    for attr in params:
        if attr not in terms:
            attr_val = [params[attr].lower()] if isinstance(params[attr], basestring) else [p.lower() for p in params[attr]]
            q["filter"]["and"].append({"terms": {attr: attr_val}})

    def facet_filter(facet):
        ff = {"facet_filter": {}}
        ff["facet_filter"]["and"] = [clause for clause in q["filter"]["and"] if facet not in clause.get("terms", [])]
        return ff if ff["facet_filter"]["and"] else {}

    if facets:
        q["facets"] = {}
        for facet in facets:
            q["facets"][facet] = {"terms": {"field": facet, "size": 9999}}
            q["facets"][facet].update(facet_filter(facet))

    if not q['filter']['and']:
        del q["filter"]

    es_url = "cc_exchange/domain/_search"
    es = get_es()
    ret_data = es.get(es_url, data=q)
    return ret_data
Пример #14
0
def update_calculated_properties():
    es = get_es()

    q = {
        "filter": {
            "and": [{
                "term": {
                    "doc_type": "Domain"
                }
            }, {
                "term": {
                    "is_snapshot": False
                }
            }]
        }
    }
    results = stream_es_query(q=q,
                              es_url=ES_URLS["domains"],
                              size=999999,
                              chunksize=500,
                              fields=["name"])
    all_stats = _all_domain_stats()
    for r in results:
        dom = r["fields"]["name"]
        calced_props = {
            "cp_n_web_users":
            int(all_stats["web_users"][dom]),
            "cp_n_active_cc_users":
            int(CALC_FNS["mobile_users"](dom)),
            "cp_n_cc_users":
            int(all_stats["commcare_users"][dom]),
            "cp_n_active_cases":
            int(CALC_FNS["cases_in_last"](dom, 120)),
            "cp_n_users_submitted_form":
            total_distinct_users([dom]),
            "cp_n_inactive_cases":
            int(CALC_FNS["inactive_cases_in_last"](dom, 120)),
            "cp_n_60_day_cases":
            int(CALC_FNS["cases_in_last"](dom, 60)),
            "cp_n_cases":
            int(all_stats["cases"][dom]),
            "cp_n_forms":
            int(all_stats["forms"][dom]),
            "cp_first_form":
            CALC_FNS["first_form_submission"](dom, False),
            "cp_last_form":
            CALC_FNS["last_form_submission"](dom, False),
            "cp_is_active":
            CALC_FNS["active"](dom),
            "cp_has_app":
            CALC_FNS["has_app"](dom),
            "cp_last_updated":
            datetime.now().strftime(DATE_FORMAT),
        }
        if calced_props['cp_first_form'] == 'No forms':
            del calced_props['cp_first_form']
            del calced_props['cp_last_form']
        es.post("%s/hqdomain/%s/_update" % (DOMAIN_INDEX, r["_id"]),
                data={"doc": calced_props})
Пример #15
0
    def _es_query(self):
        es_config_case = {
            'index': 'report_cases',
            'type': 'report_case',
            'field_to_path': lambda f: '%s.#value' % f,
            'fields': {
                'date': 'server_modified_on',
                'submission_type': 'type',
            }
        }
        es_config_form = {
            'index': 'report_xforms',
            'type': 'report_xform',
            'field_to_path': lambda f: 'form.%s.#value' % f,
            'fields': {
                'date': 'received_on',
                'submission_type': 'xmlns',
            }
        }
        es_config = {
            'case': es_config_case,
            'form': es_config_form,
        }[self.mode]

        MAX_DISTINCT_VALUES = 50

        es = elastic.get_es()
        filter_criteria = [
            {"term": {"domain": self.domain}},
            {"term": {es_config['fields']['submission_type']: self.submission_type}},
            {"range": {es_config['fields']['date']: {
                    "from": self.start_date,
                    "to": self.end_date,
                }}},
        ]
        if self.location_id:
            filter_criteria.append({"term": {"location_id": self.location_id}})
        result = es.get('%s/_search' % es_config['index'], data={
                "query": {"match_all": {}}, 
                "size": 0, # no hits; only aggregated data
                "facets": {
                    "blah": {
                        "terms": {
                            "field": "%s.%s" % (es_config['type'], es_config['field_to_path'](self.field)),
                            "size": MAX_DISTINCT_VALUES
                        },
                        "facet_filter": {
                            "and": filter_criteria
                        }
                    }
                },
            })
        result = result['facets']['blah']

        raw = dict((k['term'], k['count']) for k in result['terms'])
        if result['other']:
            raw[_('Other')] = result['other']
        return raw
Пример #16
0
def add_demo_user_to_user_index():
    es = get_es()
    es_path = USER_INDEX + "/user/demo_user"
    es.put(es_path,
           data={
               "_id": "demo_user",
               "username": "******",
               "doc_type": "DemoUser"
           })
Пример #17
0
def es_query(params=None, facets=None, terms=None, q=None, es_url=None, start_at=None, size=None, dict_only=False):
    """
        Any filters you include in your query should an and filter
        todo: intelligently deal with preexisting filters
    """
    if terms is None:
        terms = []
    if q is None:
        q = {}
    if params is None:
        params = {}

    q["size"] = size or 9999
    q["from"] = start_at or 0
    q["filter"] = q.get("filter", {})
    q["filter"]["and"] = q["filter"].get("and", [])

    def convert(param):
        #todo: find a better way to handle bools, something that won't break fields that may be 'T' or 'F' but not bool
        if param == 'T' or param is True:
            return 1
        elif param == 'F' or param is False:
            return 0
        return param

    for attr in params:
        if attr not in terms:
            attr_val = [convert(params[attr])] if not isinstance(params[attr], list) else [convert(p) for p in params[attr]]
            q["filter"]["and"].append({"terms": {attr: attr_val}})

    def facet_filter(facet):
        ff = {"facet_filter": {}}
        ff["facet_filter"]["and"] = [clause for clause in q["filter"]["and"] if facet not in clause.get("terms", [])]
        return ff if ff["facet_filter"]["and"] else {}

    if facets:
        q["facets"] = q.get("facets", {})
        for facet in facets:
            q["facets"][facet] = {"terms": {"field": facet, "size": 9999}}

    if q.get('facets'):
        for facet in q["facets"]:
            q["facets"][facet].update(facet_filter(facet))

    if not q['filter']['and']:
        del q["filter"]

    if dict_only:
        return q

    es_url = es_url or DOMAIN_INDEX + '/hqdomain/_search'

    es = get_es()
    ret_data = es.get(es_url, data=q)

    return ret_data
Пример #18
0
def es_filter_cases(domain, filters=None):
    """
    Filter cases using elastic search
    """
    
    
    q = ElasticCaseQuery(domain, filters)
    res = get_es().get('hqcases/_search', data=q.get_query())
    # this is ugly, but for consistency / ease of deployment just
    # use this to return everything in the expected format for now
    return [CommCareCase.wrap(r["_source"]).get_json() for r in res['hits']['hits'] if r["_source"]]
Пример #19
0
def check_es_cluster_health():
    """
    The color state of the cluster health is just a simple indicator for how a cluster is running
    It'll mainly be useful for finding out if shards are in good/bad state (red)

    There are better realtime tools for monitoring ES clusters which should probably be looked at. specifically paramedic or bigdesk
    """
    ret = {}
    es = get_es()
    cluster_health = es.get('_cluster/health')
    ret[CLUSTER_HEALTH] = cluster_health['status']
    return ret
Пример #20
0
def es_filter_cases(domain, filters=None):
    """
    Filter cases using elastic search
    (Domain, Filters?) -> [CommCareCase]
    """
    
    
    q = ElasticCaseQuery(domain, filters)
    res = get_es().get('hqcases/_search', data=q.get_query())
    # this is ugly, but for consistency / ease of deployment just
    # use this to return everything in the expected format for now
    return [CommCareCase.wrap(r["_source"]) for r in res['hits']['hits'] if r["_source"]]
Пример #21
0
def check_es_cluster_health():
    """
    The color state of the cluster health is just a simple indicator for how a cluster is running
    It'll mainly be useful for finding out if shards are in good/bad state (red)

    There are better realtime tools for monitoring ES clusters which should probably be looked at. specifically paramedic or bigdesk
    """
    ret = {}
    es = get_es()
    cluster_health = es.get('_cluster/health')
    ret[CLUSTER_HEALTH] = cluster_health['status']
    return ret
Пример #22
0
 def change_trigger(self, changes_dict):
     es = get_es()
     user_ids = changes_dict["doc"].get("users", [])
     q = {"filter": {"and": [{"terms": {"_id": user_ids}}]}}
     for user_source in stream_es_query(es_url=ES_URLS["users"], q=q, fields=["__group_ids", "__group_names"]):
         group_ids = set(user_source.get('fields', {}).get("__group_ids", []))
         group_names = set(user_source.get('fields', {}).get("__group_names", []))
         if changes_dict["doc"]["name"] not in group_names or changes_dict["doc"]["_id"] not in group_ids:
             group_ids.add(changes_dict["doc"]["_id"])
             group_names.add(changes_dict["doc"]["name"])
             doc = {"__group_ids": list(group_ids), "__group_names": list(group_names)}
             es.post("%s/user/%s/_update" % (USER_INDEX, user_source["_id"]), data={"doc": doc})
Пример #23
0
def es_histogram(histo_type,
                 domains=None,
                 startdate=None,
                 enddate=None,
                 tz_diff=None):
    date_field = {"forms": "received_on", "cases": "opened_on"}[histo_type]
    es_url = {
        "forms": XFORM_INDEX + '/xform/_search',
        "cases": CASE_INDEX + '/case/_search'
    }[histo_type]

    q = {"query": {"match_all": {}}}

    if domains is not None:
        q["query"] = {"in": {"domain.exact": domains}}

    q.update({
        "facets": {
            "histo": {
                "date_histogram": {
                    "field": date_field,
                    "interval": "day"
                },
                "facet_filter": {
                    "and": [{
                        "range": {
                            date_field: {
                                "from": startdate,
                                "to": enddate
                            }
                        }
                    }]
                }
            }
        },
        "size": 0
    })

    if tz_diff:
        q["facets"]["histo"]["date_histogram"]["time_zone"] = tz_diff

    if histo_type == "forms":
        q["facets"]["histo"]["facet_filter"]["and"].append(
            {"not": {
                "in": {
                    "doc_type": ["xformduplicate", "xformdeleted"]
                }
            }})

    es = get_es()
    ret_data = es.get(es_url, data=q)
    return ret_data["facets"]["histo"]["entries"]
Пример #24
0
    def handle(self, *args, **options):
        if len(args) != 0: raise CommandError("This command doesn't expect arguments!")
        show_info = options['show_info']
        list_pillows = options['list_pillows']
        flip_all = options['flip_all']
        flip_single = options['pillow_class']
        code_red = options['code_red']
        es = get_es()

        pillows = get_all_pillow_instances()
        aliased_pillows = filter(lambda x: isinstance(x, AliasedElasticPillow), pillows)

        if code_red:
            if raw_input('\n'.join([
                'CODE RED!!!',
                'Really delete ALL the elastic indices and pillow checkpoints?',
                'The following pillows will be affected:',
                '\n'.join([type(p).__name__ for p in aliased_pillows]),
                'This is a PERMANENT action. (Type "code red" to continue):',
                '',
            ])).lower() == 'code red':
                for pillow in aliased_pillows:
                    pillow.get_es_new().indices.delete(pillow.es_index)
                    print 'deleted elastic index: {}'.format(pillow.es_index)
                    checkpoint_id = pillow.checkpoint.checkpoint_id
                    if pillow.couch_db.doc_exist(checkpoint_id):
                        pillow.couch_db.delete_doc(checkpoint_id)
                        print 'deleted checkpoint: {}'.format(checkpoint_id)
            else:
                print 'Safety first!'
            return

        if show_info:
            get_pillow_states(aliased_pillows).dump_info()
        if list_pillows:
            print aliased_pillows
        if flip_all:
            for pillow in aliased_pillows:
                assume_alias_for_pillow(pillow)
            print simplejson.dumps(es.get('_aliases'), indent=4)
        if flip_single is not None:
            pillow_class_name = flip_single
            pillow_to_use = filter(lambda x: x.__class__.__name__ == pillow_class_name, aliased_pillows)
            if len(pillow_to_use) != 1:
                print "Unknown pillow (option --pillow <name>) class string, the options are: \n\t%s" % ', '.join(
                    [x.__class__.__name__ for x in aliased_pillows])
                sys.exit()

            target_pillow = pillow_to_use[0]
            assume_alias_for_pillow(target_pillow)
            print es.get('_aliases')
Пример #25
0
    def results(self):
        """Elasticsearch Results"""

        # there's no point doing filters that are like owner_id:(x1 OR x2 OR ... OR x612)
        # so past a certain number just exclude
        MAX_IDS = 50

        def _filter_gen(key, list):
            if list and len(list) < MAX_IDS:
                yield {"terms": {key: [item.lower() if item else "" for item in list]}}

            # demo user hack
            elif list and "demo_user" not in list:
                yield {"not": {"term": {key: "demo_user"}}}

        if self.params.search:
            # these are not supported/implemented on the UI side, so ignoring (dmyung)
            pass

        subterms = [self.filter] if self.filter else []
        if self.case_type:
            subterms.append({"term": {"type": self.case_type}})

        if self.status:
            subterms.append({"term": {"closed": (self.status == "closed")}})

        user_filters = list(_filter_gen("owner_id", self.owner_ids)) + list(_filter_gen("user_id", self.owner_ids))
        if user_filters:
            subterms.append({"or": user_filters})

        and_block = {"and": subterms} if subterms else {}

        es_query = {
            "query": {"filtered": {"query": {"match": {"domain.exact": self.domain}}, "filter": and_block}},
            "sort": {self.sort_key: {"order": self.sort_order}},
            "from": self.params.start,
            "size": self.params.count,
        }
        es_results = get_es().get("hqcases/_search", data=es_query)
        if es_results.has_key("error"):
            notify_exception(None, "Error in case list elasticsearch query: %s" % es_results["error"])
            return {"skip": self.params.start, "limit": self.params.count, "rows": [], "total_rows": 0}

        # transform the return value to something compatible with the report listing
        ret = {
            "skip": self.params.start,
            "limit": self.params.count,
            "rows": [{"doc": x["_source"]} for x in es_results["hits"]["hits"]],
            "total_rows": es_results["hits"]["total"],
        }
        return ret
Пример #26
0
 def _domains_matching(key, value):
     es = get_es()
     throwaway_facet_name = "facets"
     query = {
         "filter": {"term": {key: value}},
         "facets": {
             throwaway_facet_name: {
                 "terms": {"field": "domain.exact", "size": 1000},
                 "facet_filter": {"term": {key: value}},
             }
         },
     }
     res = es["xforms"].post("_search", data=query)
     return [r["term"] for r in res["facets"][throwaway_facet_name]["terms"]]
Пример #27
0
 def change_trigger(self, changes_dict):
     user_id, username, domain, xform_id = self.get_fields(changes_dict)
     es = get_es()
     es_path = USER_INDEX + "/user/"
     if user_id and not self.user_db.doc_exist(user_id) and not es.head(es_path + user_id):
         doc_type = "AdminUser" if username == "admin" else "UnknownUser"
         doc = {
             "_id": user_id,
             "domain": domain,
             "username": username,
             "first_form_found_in": xform_id,
             "doc_type": doc_type,
         }
         if domain:
             doc["domain_membership"] = {"domain": domain}
         es.put(es_path + user_id, data=doc)
Пример #28
0
 def change_trigger(self, changes_dict):
     user_id, username, domain, xform_id = self.get_fields(changes_dict)
     es = get_es()
     es_path = USER_INDEX + "/user/"
     if user_id and not self.user_db.doc_exist(user_id) and not es.head(
             es_path + user_id):
         doc_type = "AdminUser" if username == "admin" else "UnknownUser"
         doc = {
             "_id": user_id,
             "domain": domain,
             "username": username,
             "first_form_found_in": xform_id,
             "doc_type": doc_type,
         }
         if domain:
             doc["domain_membership"] = {"domain": domain}
         es.put(es_path + user_id, data=doc)
Пример #29
0
def es_histogram(histo_type, domains=None, startdate=None, enddate=None, tz_diff=None, interval="day"):
    date_field = {  "forms": "received_on",
                    "cases": "opened_on",
                    "users": "created_on", }[histo_type]
    es_url = {  "forms": XFORM_INDEX + '/xform/_search',
                "cases": CASE_INDEX + '/case/_search',
                "users": USER_INDEX + '/user/_search' }[histo_type]

    q = {"query": {"match_all":{}}}

    if domains is not None:
        q["query"] = {"in" : {"domain.exact": domains}}

    q.update({
        "facets": {
            "histo": {
                "date_histogram": {
                    "field": date_field,
                    "interval": interval
                },
                "facet_filter": {
                    "and": [{
                        "range": {
                            date_field: {
                                "from": startdate,
                                "to": enddate
                            }}}]}}},
        "size": 0
    })

    if tz_diff:
        q["facets"]["histo"]["date_histogram"]["time_zone"] = tz_diff

    if histo_type == "forms":
        q["facets"]["histo"]["facet_filter"]["and"].append({"not": {"in": {"doc_type": ["xformduplicate", "xformdeleted"]}}})
        q["facets"]["histo"]["facet_filter"]["and"].append({"not": {"missing": {"field": "xmlns"}}})
        q["facets"]["histo"]["facet_filter"]["and"].append({"not": {"missing": {"field": "form.meta.userID"}}})

    if histo_type == "users":
        q["facets"]["histo"]["facet_filter"]["and"].append({"term": {"doc_type": "CommCareUser"}})

    es = get_es()
    ret_data = es.get(es_url, data=q)
    return ret_data["facets"]["histo"]["entries"]
Пример #30
0
def _check_es_rev(index, doc_id, couch_revs):
    """
    Specific docid and rev checker.

    index: rawes index
    doc_id: id to query in ES
    couch_rev: target couch_rev that you want to match
    """
    es = get_es()
    doc_id_query = {
        "filter": {
            "ids": {
                "values": [doc_id]
            }
        },
        "fields": ["_id", "_rev"]
    }

    try:
        res = es[index].get('_search', data=doc_id_query)
        status = False
        message = "Not in sync"

        if res.has_key('hits'):
            if res['hits'].get('total', 0) == 0:
                status = False
                # if doc doesn't exist it's def. not in sync
                message = "Not in sync %s" % index
            elif 'hits' in res['hits']:
                fields = res['hits']['hits'][0]['fields']
                if fields['_rev'] in couch_revs:
                    status = True
                    message = "%s OK" % index
                else:
                    status = False
                    # less likely, but if it's there but the rev is off
                    message = "Not in sync - %s stale" % index
        else:
            status = False
            message = "Not in sync - query failed"
    except Exception, ex:
        message = "ES Error: %s" % ex
        status = False
Пример #31
0
def _check_es_rev(index, doc_id, couch_revs):
    """
    Specific docid and rev checker.

    index: rawes index
    doc_id: id to query in ES
    couch_rev: target couch_rev that you want to match
    """
    es = get_es()
    doc_id_query = {
        "filter": {
            "ids": {"values": [doc_id]}
        },
        "fields": ["_id", "_rev"]
    }

    try:
        res = es[index].get('_search', data=doc_id_query)
        status = False
        message = "Not in sync"

        if res.has_key('hits'):
            if res['hits'].get('total', 0) == 0:
                status = False
                # if doc doesn't exist it's def. not in sync
                message = "Not in sync %s" % index
            elif 'hits' in res['hits']:
                fields = res['hits']['hits'][0]['fields']
                if fields['_rev'] in couch_revs:
                    status = True
                    message = "%s OK" % index
                else:
                    status = False
                    # less likely, but if it's there but the rev is off
                    message = "Not in sync - %s stale" % index
        else:
            status = False
            message = "Not in sync - query failed"
            notify_error("%s: %s" % (message, str(res)))
    except Exception, ex:
        message = "ES Error: %s" % ex
        status = False
Пример #32
0
 def change_trigger(self, changes_dict):
     es = get_es()
     user_ids = changes_dict["doc"].get("users", [])
     q = {"filter": {"and": [{"terms": {"_id": user_ids}}]}}
     for user_source in stream_es_query(
             es_url=ES_URLS["users"],
             q=q,
             fields=["__group_ids", "__group_names"]):
         group_ids = set(
             user_source.get('fields', {}).get("__group_ids", []))
         group_names = set(
             user_source.get('fields', {}).get("__group_names", []))
         if changes_dict["doc"]["name"] not in group_names or changes_dict[
                 "doc"]["_id"] not in group_ids:
             group_ids.add(changes_dict["doc"]["_id"])
             group_names.add(changes_dict["doc"]["name"])
             doc = {
                 "__group_ids": list(group_ids),
                 "__group_names": list(group_names)
             }
             es.post("%s/user/%s/_update" %
                     (USER_INDEX, user_source["_id"]),
                     data={"doc": doc})
Пример #33
0
def _check_es_rev(index, doc_id, couch_rev):
    es = get_es()
    doc_id_query = {
        "filter": {
            "ids": {
                "values": [doc_id]
            }
        },
        "fields": ["_id", "_rev"]
    }

    try:
        res = es[index].get('_search', data=doc_id_query)
        status = False
        message = "Not in sync"

        if res.has_key('hits'):
            if res['hits'].get('total', 0) == 0:
                status = False
                #if doc doesn't exist it's def. not in sync
                message = "Not in sync %s" % index
            elif res['hits'].has_key('hits'):
                fields = res['hits']['hits'][0]['fields']
                if fields['_rev'] == couch_rev:
                    status = True
                    message = "%s OK" % index
                else:
                    status = False
                    #less likely, but if it's there but the rev is off
                    message = "Not in sync - %s stale" % index
        else:
            status = False
            message = "Not in sync - query failed"
    except Exception, ex:
        message = "ES Error: %s" % ex
        status = False
Пример #34
0
 def __init__(self, domain):
     super(ESView, self).__init__()
     self.domain = domain.lower()
     self.es = get_es()
Пример #35
0
    def _es_query(self):
        es_config_case = {
            'index': 'report_cases',
            'type': 'report_case',
            'field_to_path': lambda f: '%s.#value' % f,
            'fields': {
                'date': 'server_modified_on',
                'submission_type': 'type',
            }
        }
        es_config_form = {
            'index': 'report_xforms',
            'type': 'report_xform',
            'field_to_path': lambda f: 'form.%s.#value' % f,
            'fields': {
                'date': 'received_on',
                'submission_type': 'xmlns',
            }
        }
        es_config = {
            'case': es_config_case,
            'form': es_config_form,
        }[self.mode]

        MAX_DISTINCT_VALUES = 50

        es = elastic.get_es()
        filter_criteria = [
            {
                "term": {
                    "domain": self.domain
                }
            },
            {
                "term": {
                    es_config['fields']['submission_type']:
                    self.submission_type
                }
            },
            {
                "range": {
                    es_config['fields']['date']: {
                        "from": self.start_date,
                        "to": self.end_date,
                    }
                }
            },
        ]
        if self.location_id:
            filter_criteria.append({"term": {"location_": self.location_id}})
        result = es.get(
            '%s/_search' % es_config['index'],
            data={
                "query": {
                    "match_all": {}
                },
                "size": 0,  # no hits; only aggregated data
                "facets": {
                    "blah": {
                        "terms": {
                            "field":
                            "%s.%s" % (es_config['type'],
                                       es_config['field_to_path'](self.field)),
                            "size":
                            MAX_DISTINCT_VALUES
                        },
                        "facet_filter": {
                            "and": filter_criteria
                        }
                    }
                },
            })
        result = result['facets']['blah']

        raw = dict((k['term'], k['count']) for k in result['terms'])
        if result['other']:
            raw[_('Other')] = result['other']
        return raw
Пример #36
0
def es_filter_cases(domain, filters=None):
    """
    Filter cases using elastic search
    """
    
    class ElasticCaseQuery(object):
        # this class is currently pretty customized to serve exactly
        # this API. one day it may be worth reconciling our ES interfaces
        # but today is not that day.
        # To be replaced by CaseES framework.
        RESERVED_KEYS = ('date_modified_start', 'date_modified_end', 
                         'server_date_modified_start', 'server_date_modified_end', 
                         'limit')
    
        def __init__(self, domain, filters):
            self.domain = domain
            self.filters = filters
            self.limit = int(filters.get('limit', 50))
            self._date_modified_start = filters.get("date_modified_start", None)
            self._date_modified_end = filters.get("date_modified_end", None)
            self._server_date_modified_start = filters.get("server_date_modified_start", None)
            self._server_date_modified_end = filters.get("server_date_modified_end", None)
            
        
        @property
        def uses_modified(self):
            return bool(self._date_modified_start or self._date_modified_end)
        
        @property
        def uses_server_modified(self):
            return bool(self._server_date_modified_start or self._server_date_modified_end)
        
        @property
        def date_modified_start(self):
            return self._date_modified_start or datetime(1970,1,1).strftime("%Y-%m-%d")
        
        @property
        def date_modified_end(self):
            return self._date_modified_end or datetime.max.strftime("%Y-%m-%d")
        
        @property
        def server_date_modified_start(self):
            return self._server_date_modified_start or datetime(1970,1,1).strftime("%Y-%m-%d")
        
        @property
        def server_date_modified_end(self):
            return self._server_date_modified_end or datetime.max.strftime("%Y-%m-%d")
        
        @property
        def scrubbed_filters(self):
            return dict((k, v) for k, v in self.filters.items() if k not in self.RESERVED_KEYS)
        
        def _modified_params(self, key, start, end):
            return {
                'range': {
                    key: {
                        'from': start,
                        'to': end
                    }
                }
            }
        
        @property
        def modified_params(self, ):
            return self._modified_params('modified_on',
                                         self.date_modified_start,
                                         self.date_modified_end)
        
        @property
        def server_modified_params(self):
            return self._modified_params('server_modified_on',
                                         self.server_date_modified_start,
                                         self.server_date_modified_end)
        
        def get_terms(self):
            yield {'term': {'domain.exact': self.domain}}
            if self.uses_modified:
                yield self.modified_params
            if self.uses_modified:
                yield self.modified_params
            if self.uses_server_modified:
                yield self.server_modified_params
            for k, v in self.scrubbed_filters.items():
                yield {'term': {k: v.lower()}}

        def get_query(self):
            return {
                'query': {
                    'bool': {
                        'must': list(self.get_terms())
                    }
                },
                'sort': {
                    'modified_on': {'order': 'asc'}
                },
                'from': 0,
                'size': self.limit,
            }
    
    q = ElasticCaseQuery(domain, filters)
    res = get_es().get('hqcases/_search', data=q.get_query())
    # this is ugly, but for consistency / ease of deployment just
    # use this to return everything in the expected format for now
    return [CommCareCase.wrap(r["_source"]).get_json() for r in res['hits']['hits'] if r["_source"]]
Пример #37
0
 def __init__(self, domain):
     self.domain=domain.lower()
     self.es = get_es()
Пример #38
0
 def __init__(self, domain):
     self.domain = domain.lower()
     self.es = get_es()
Пример #39
0
 def __init__(self, domain):
     self.domain=domain
     self.es = get_es()
Пример #40
0
    def handle(self, *args, **options):
        if len(args) != 0: raise CommandError("This command doesn't expect arguments!")

        print ""
        show_info = options['show_info']
        list_pillows = options['list_pillows']
        do_flip = options['do_flip']
        es = get_es()

        pillows = import_pillows()
        aliased_pillows = filter(lambda x: isinstance(x, AliasedElasticPillow), pillows)

        #make tuples of (index, alias)
        #this maybe problematic if we have multiple pillows pointing to the same alias or indices
        master_aliases = dict((x.es_index, x.es_alias) for x in aliased_pillows)
        print master_aliases

        if show_info:
            system_status = es.get('_status')
            indices = system_status['indices'].keys()
            print ""
            print "\tActive indices"
            for index in indices:
                print "\t\t%s" % index
            print ""

            print "\n\tAlias Mapping Status"
            active_aliases = es.get('_aliases')
            for idx, alias_dict in active_aliases.items():
                line = ["\t\t", idx]
                is_master = False
                if idx in master_aliases:
                    is_master = True
                    line.append('*HEAD')

                if is_master:
                    if master_aliases[idx] in alias_dict['aliases']:
                        #is master, has alias, good
                        line.append('=> %s :)' % master_aliases[idx])
                    else:
                        #is not master, doesn't have alias, bad
                        line.append('=> Does not have alias yet :(')
                else:
                    #not a master index
                    line.append(
                        '=> [%s] Non HEAD has alias' % (' '.join(alias_dict['aliases'].keys())))
                print ' '.join(line)

            print ""
            sys.exit()
        if list_pillows:
            print aliased_pillows
            sys.exit()

        if do_flip:
            pillow_class_name = options['pillow_class']
            pillow_to_use = filter(lambda x: x.__class__.__name__ == pillow_class_name,
                                   aliased_pillows)
            if len(pillow_to_use) != 1:
                print "Unknown pillow (option --pillow <name>) class string, the options are: \n\t%s" % ', '.join(
                    [x.__class__.__name__ for x in aliased_pillows])
                sys.exit()

            #ok we got the pillow
            target_pillow = pillow_to_use[0]
            target_pillow.assume_alias()

            print es.get('_aliases')
Пример #41
0
 def __init__(self, **kwargs):
     super(UnknownUsersPillow, self).__init__(**kwargs)
     self.couch_db = XFormInstance.get_db()
     self.user_db = CouchUser.get_db()
     self.es = get_es()
Пример #42
0
def es_query(params=None,
             facets=None,
             terms=None,
             q=None,
             es_url=None,
             start_at=None,
             size=None,
             dict_only=False):
    if terms is None:
        terms = []
    if q is None:
        q = {}
    if params is None:
        params = {}

    q["size"] = size or 9999
    q["from"] = start_at or 0
    q["filter"] = q.get("filter", {})
    q["filter"]["and"] = q["filter"].get("and", [])

    def convert(param):
        #todo: find a better way to handle bools, something that won't break fields that may be 'T' or 'F' but not bool
        if param == 'T' or param is True:
            return 1
        elif param == 'F' or param is False:
            return 0
        return param.lower()

    for attr in params:
        if attr not in terms:
            attr_val = [convert(params[attr])] if not isinstance(
                params[attr], list) else [convert(p) for p in params[attr]]
            q["filter"]["and"].append({"terms": {attr: attr_val}})

    def facet_filter(facet):
        ff = {"facet_filter": {}}
        ff["facet_filter"]["and"] = [
            clause for clause in q["filter"]["and"]
            if facet not in clause.get("terms", [])
        ]
        return ff if ff["facet_filter"]["and"] else {}

    if facets:
        q["facets"] = q.get("facets", {})
        for facet in facets:
            q["facets"][facet] = {"terms": {"field": facet, "size": 9999}}

    if q.get('facets'):
        for facet in q["facets"]:
            q["facets"][facet].update(facet_filter(facet))

    if not q['filter']['and']:
        del q["filter"]

    if dict_only:
        return q

    es_url = es_url or "cc_exchange/domain/_search"

    es = get_es()
    ret_data = es.get(es_url, data=q)

    return ret_data
Пример #43
0
 def __init__(self, **kwargs):
     super(UnknownUsersPillow, self).__init__(**kwargs)
     self.couch_db = XFormInstance.get_db()
     self.user_db = CouchUser.get_db()
     self.es = get_es()
Пример #44
0
def add_demo_user_to_user_index():
    es = get_es()
    es_path = USER_INDEX + "/user/demo_user"
    es.put(es_path, data={"_id": "demo_user", "username": "******", "doc_type": "DemoUser"})
Пример #45
0
 def __init__(self, domain):
     super(ESView, self).__init__()
     self.domain = domain.lower()
     self.es = get_es()