示例#1
0
def update(args, FIELDS_URL):
    """modify properties of a Field"""

    data = parse_opts(args)
    if 'name' not in data:
        raise Exception("Updating a Field requires a name")
    name = data['name']
    del data['name']

    json_http(FIELDS_URL + "/" + name, method='PUT', data=data)
    print "Updated Field: " + name
示例#2
0
def update(args, DS_URL):
    """modify properties of a datasource"""

    data = parse_opts(args)
    i = get_id(data)
    if i is None:
        raise Exception(
            "Updating a DataSource requires either an id or a name")

    json_http(DS_URL + "/" + i, method='PUT', data=data)
    print "Updated DataSource #" + i
示例#3
0
def delete(args, FIELDS_URL):
    """remove a Field"""

    if 1 != len(args):
        raise Exception("Deleting a Field requires a name")

    name = args.pop(0)
    # be helpful if they get the syntax confused
    if (0 == name.find('name=')): name = name.replace('name=', '', 1)

    json_http(FIELDS_URL + "/" + name, method='DELETE')
    print "Deleted Field: " + name
示例#4
0
def schedule(args, DS_URL):
    """modify the schedule of a datasource"""

    data = parse_opts(args)
    i = get_id(data)
    if i is None:
        raise Exception(
            "Modifying the schedule of a DataSource requires either an id or a name"
        )

    json_http(DS_URL + "/" + i + '/schedule', method='PUT', data=data)
    print "Updated Schedule of DataSource #" + i
示例#5
0
def delete(args, DS_URL):
    """remove a datasource"""

    if 1 != len(args):
        raise Exception("wrong number of args for deleting a DataSource")

    i = get_id(parse_opts(args))
    if i is None:
        raise Exception(
            "Must supply either an id or a name for a DataSource to delete it")

    json_http(DS_URL + '/' + i, method='DELETE')
    print "Deleted DataSource #" + i
示例#6
0
def create_filters(filters, users):
    #roles = lweutils.json_http(lweutils.COL_URL + "/roles", method="GET")
    #for role in roles:
    #    print role

    #{u'groups': [], u'users': [u'admin'], u'filters': [u'*:*'], u'name': u'DEFAULT'}
    #{u'groups': [], u'users': [u'user.10'], u'filters': [u'symbol:AES'], u'name': u'user10'}
    filters_split = filters.split(";")
    for the_filter in filters_split:
        print "Applying filter: " + the_filter
        splits = the_filter.split("=")
        #curl -H 'Content-type: application/json'
        # -d '{"name": "ONLY_PUBLIC","groups": ["group1","group2"],"filters": ["status:public"],
        # "users": ["user1"]}' http://localhost:8888/api/collections/collection1/roles
        # rolename=uids=query;uids=query
        the_users = []
        uids = splits[1].split(",")
        for uid in uids:
            the_users.append(uid)
        data = {"name": splits[0], "users": the_users, "filters": splits[2]}
        print "Sending Data to:" + COL_URL + "/roles"
        print data
        result = lweutils.json_http(COL_URL + "/roles",
                                    method="POST",
                                    data=data)
        print "Result:"
        print result
示例#7
0
def get_id(opts, DS_URL):
    """
    Determines the id of the datasource the client is interested in
    either because it was explicitly mentioned, or by looking up the name.
    The 'id' key is removed from the data if present.
    The 'name' key is removed from the data if it was used to lookup the id
    """

    if 'id' in opts:
        i = str(opts['id'])
        del opts['id']
        return i

    if 'name' in opts:
        ids = []
        name = opts['name']
        del opts['name']
        data = json_http(DS_URL)
        for ds in data:
            if 'name' in ds and ds['name'] == name:
                ids.append(ds['id'])
        if 0 == len(ids):
            raise Exception("Can't locate a DataSource with name=" + name)
        if 1 != len(ids):
            raise Exception("Multiple DataSource's found with name=" + name +
                            ": " + str(ids))
        return str(ids.pop())

    return None
示例#8
0
def status(args, DS_URL):
    """display status of datasources"""
    if 1 < len(args):
        raise Exception("wrong number of args for showing DataSource status")

    i = get_id(parse_opts(args))

    if (i is not None):
        url = DS_URL + '/' + i + '/status'
        data = json_http(url)
        print "Status of DataSource #" + i + ": " + url + " => " + pretty_json(
            data)

    else:
        url = DS_URL + "/all/status"
        data = json_http(url)
        print "Status of All DataSources: " + url + " => " + pretty_json(data)
示例#9
0
def show(args, DS_URL):
    """display current datasources"""
    if 1 < len(args):
        raise Exception("wrong number of args for showing a DataSource")

    i = get_id(parse_opts(args))

    if (i is None):
        print 'Data Sources: ' + DS_URL
        data = json_http(DS_URL)

        if 0 == len(data):
            print '  (none)'
        else:
            for ds in data:
                print_ds(ds, '  ')
    else:
        print_ds(json_http(DS_URL + '/' + i))
示例#10
0
def print_ds(data, DS_URL, indent=''):
    i = str(data['id'])
    dsu = DS_URL + '/' + i

    print indent + "Data Source #" + i + ': '
    indent = indent + '  '

    print indent + "Info: " + dsu + ' => ' + pretty_json(data, indent)

    status_url = dsu + "/status"
    status = json_http(status_url)
    print indent + "Status: " + status_url + " => " + pretty_json(
        status, indent)

    sched_url = dsu + "/schedule"
    sched = json_http(sched_url)
    print indent + "Schedule: " + sched_url + " => " + pretty_json(
        sched, indent)
示例#11
0
def show(args, SETTINGS_URL):
    """display settings """

    label = 'Index Settings'
    url = SETTINGS_URL
    if (0 < len(args)): 
        label += ' (' + ', '.join(args) + ')'
        url += '/' + ','.join(args)
    data = json_http(url)
    print label + ': ' + url + ' => ' + pretty_json(data)
示例#12
0
def show(args, COL_URL):
    """display current collection info"""

    label = 'Collection Info'
    url = COL_URL + "/info"
    if (0 < len(args)):
        label += ' (' + ', '.join(args) + ')'
        url += '/' + ','.join(args)
    data = json_http(url)
    print label + ": " + url + " => " + pretty_json(data)
示例#13
0
def create_collection(name):
    data = {"name": name}
    try:
        print "Trying: " + name
        rsp = lweutils.json_http(API_URL + "/collections",
                                 method='POST',
                                 data=data)
        print "Created New Collection: " + data['name']
    except Exception as e:
        traceback.print_exc()
示例#14
0
def create(args, FIELDS_URL):
    """create a field"""

    data = parse_opts(args)
    for arg in ('name', 'field_type'):
        if arg not in data:
            raise Exception("Creating a Field requires a " + arg)

    rsp = json_http(FIELDS_URL, method='POST', data=data)
    print "Created New Field: " + data['name'] + " at: " + FIELDS_URL
示例#15
0
def show(args, FIELDS_URL):
    """display current fields"""
    if 1 < len(args):
        raise Exception("wrong number of args for showing fields")

    if (0 < len(args)):
        name = args.pop(0)
        # be helpful if they get the syntax confused
        if (0 == name.find('name=')): name = name.replace('name=', '', 1)
        url = FIELDS_URL + "/" + name
        data = json_http(url)
        print "Field " + name + ": " + url + " => " + pretty_json(data, '  ')
    else:
        print 'Fields: ' + FIELDS_URL
        data = json_http(FIELDS_URL)
        if 0 == len(data):
            print '  (none)'
        else:
            for field in data:
                name = field['name']
                url = FIELDS_URL + '/' + name
                print "  Field: " + name + ": " + url + " => " + pretty_json(
                    field, '  ')
示例#16
0
def create(args, DS_URL, added_data=None):
    """create a datasource"""

    data = parse_opts(args)
    if 'name' not in data:
        raise Exception("Creating a DataSource requires a name")
    if 'type' not in data:
        raise Exception("Creating a DataSource requires a type")
    if 'crawler' not in data:
        raise Exception("Creating a DataSource requires a crawler")
    if added_data:
        data.update(added_data)
    rsp = json_http(DS_URL, method='POST', data=data)
    print "Created New DataSource: " + str(
        rsp['id']) + " with name: " + data['name'] + " at " + DS_URL
    return rsp['id']
示例#17
0
def history(args, DS_URL):
    """display the indexing history of a datasource"""

    if 1 != len(args):
        raise Exception("wrong number of args for showing a DataSource")

    i = get_id(parse_opts(args))
    if i is None:
        raise Exception(
            "Must supply either an id or a name to view the indexing history of a DataSource"
        )

    url = DS_URL + "/" + i + '/history'
    data = json_http(url)
    print "History of DataSource #" + i + ": " + url + " => " + pretty_json(
        data)
示例#18
0
def create_press_crawler(stock):
    #data = {"mapping": {"mappings": {"symbol": "symbol", "open": "open", "high": "high", "low": "low", "close": "close",
    #                 "trade_date":"trade_date",
    #                 "volume": "volume",
    #                 "adj_close": "adj_close"}}}
    url = "http://finance.yahoo.com/q/p?s=" + stock + "+Press+Releases"
    include_paths = [
        "http://finance\.yahoo\.com/news/.*",
        "http://finance\.yahoo\.com/q/p\?s=" + stock + "+Press+Releases"
    ]
    id = ds.create([
        "name=PressRelease_" + stock, "type=web", "bounds=none", "url=" + url,
        "crawler=lucid.aperture", "crawl_depth=2", "include_paths=" +
        include_paths[0], "include_paths=" + include_paths[1]
    ], DS_URL)
    rsp = lweutils.json_http(COL_URL + "/datasources/" + id + "/job",
                             method="PUT")
    return id
示例#19
0
def add_twitter(i, stock_lists, stocks, access_token, consumer_key,
                consumer_secret, token_secret):
    args = [
        "name=Twitter_" + str(i), "access_token=" + access_token,
        "consumer_key=" + consumer_key, "consumer_secret=" + consumer_secret,
        "token_secret=" + token_secret, "type=twitter_stream",
        "crawler=lucid.twitter.stream", "sleep=10000"
    ]
    print stock_lists
    symbols = ""
    for symbol in stock_lists:
        symbols += "$" + symbol + ", " + stocks[symbol][1] + ", "
        #args.append("filter_track=$" + symbol)
        #args.append("filter_track=" + stocks[symbol][1])
    args.append("filter_track=" + symbols[:len(symbols) - 1])
    data = {"mapping": create_twitter_mappings()}
    id = ds.create(args, DS_URL, data)
    #rsp = lweutils.json_http(lweutils.COL_URL + "/datasources/" + id + "/mapping", method="PUT", data=data)
    rsp = lweutils.json_http(COL_URL + "/datasources/" + id + "/job",
                             method="PUT")
示例#20
0
def standard(name=None):
    # do a match all request
    query = "*:*"
    start = 0
    user = None
    sort_criteria = None

    if request.method == 'POST' and 'search_box' in request.form:
        query = request.form['search_box']
    else:
        if request.args.get('q'):
            query = request.args.get('q')

    if request.method == 'POST' and "user" in request.form:
        user = request.form['user']
    else:
        if request.args.get('user'):
            user = request.args.get('user')
    if request.args.get('start'):
        start = request.args.get('start')
    fq = []
    if request.args.get('fq'):
        fq = request.args.getlist('fq')
    active = "Results"
    if request.args.get('active'):
        active = request.args.get('active')
    if request.args.get('sort_criteria'):
        sort_criteria = request.args.get('sort_criteria')
    dsn_results = "data_source_name:HistoricalPrices"

    source_filters = []
    group = "false"
    group_field = "symbol"
    if active == "Results":
        source_filters.append("-" + dsn_results)
    else:  #Historical, do grouping
        source_filters.append(dsn_results)
        group = "true"
        if sort_criteria == None:
            sort_criteria = "trade_date"

    # &facet.date=timestamp&facet.date.start=2013-10-08T14:17:49.04Z&facet.date.end=NOW/DAY%2B1DAY&facet.date.gap=%2B1HOUR
    app.logger.info("Query: " + query)
    kwargs = {
        "qt": "/lucid",
        "facet": "true",
        "start": start,
        "fl": "*,score",
        "facet.date": "timestamp",
        "facet.date.start": "NOW/DAY-30DAY",
        "facet.date.end": "NOW/DAY+1DAY",
        "facet.date.gap": "+1DAY",
        "facet.date.other": "all",
        "facet.range": ["open", "close", "volume"],
        "facet.range.start": "0",
        "facet.range.end": "1000",
        "facet.range.gap": "100",
        "facet.range.other": "all",
        "facet.mincount": "1",
        "f.open.facet.limit": "5",
        "f.close.facet.limit": "5",
        "f.close.open.limit": "5",
        "f.volume.facet.limit": "5",
        "f.volume.facet.range.gap": "500000",
        "f.volume.facet.range.start": "10000",
        "f.volume.facet.range.end": "5000000",
        "facet.pivot":
        ["open,close,volume", "attr_retweetcount,attr_username"],
        "stats": "true",
        "stats.field": ["open", "close", "volume"],
        "fq": source_filters
    }

    if fq:
        kwargs['fq'] = fq
    #the_role = "DEFAULT"
    if sort_criteria:
        kwargs['group.sort'] = sort_criteria + " desc"

    if active == "Historical":
        kwargs['group'] = group
        kwargs['group.field'] = group_field
        kwargs['group.limit'] = 30

    if user and user != 'none':
        kwargs['user'] = user
        # we have a user, let's see what roles they play
        #/api/collections/collection/roles/role
        print "User: "******"/roles", method="GET")
        #{u'groups': [], u'users': [u'admin'], u'filters': [u'*:*'], u'name': u'DEFAULT'}
        #{u'groups': [], u'users': [u'user.10'], u'filters': [u'symbol:AES'], u'name': u'user10'}
        for role in roles:
            #print user + ", " + role['users'][0]
            for role_user in role['users']:
                if user == role_user:
                    #print role['name']
                    kwargs['role'] = role[
                        'name']  #TODO: Handle multiple roles?

    params = {'q': query}
    params.update(kwargs)
    solr_rsp = solr._select(params)
    result = solr.decoder.decode(solr_rsp)
    response = result.get('response') or {}
    facets = result.get('facet_counts') or {}
    stats = result.get('stats') or {}
    grouped = result.get("grouped")
    highlights = result.get("highlighting")
    #app.logger.info("Facets: " + facets)
    numFound = response.get('numFound', 0)
    result_kwargs = process_solr_rsp(result)

    results = Results(response.get('docs', ()), numFound, **result_kwargs)
    page_count = int(math.ceil(numFound / 10.0))
    start = response.get('start', 0)
    current_page_number = int(math.ceil(start / 10.0))
    if page_count > 0:
        current_page_number += 1
    else:
        current_page_number = 1
        page_count = 1
    #page_count = (int) Math.ceil(results_found / (double) results_per_page);
    #current_page_number = (int) Math.ceil(start / (double) results_per_page) + (page_count > 0 ? 1 : 0);
    #
    #app.logger.info("Saw {0} result(s).".format(len(results)))
    next_start = start + 10
    prev_start = max(start - 10, 0)
    filter_urls = {}
    if fq:
        i = 0
        filter_base_url = url_for('standard', start=str(start), q=query)
        for outer in fq:
            filter_urls[outer] = filter_base_url
            for inner in fq:
                if outer != inner:
                    app.logger.info("Inner: " + inner)
                    filter_urls[outer] += "&fq=" + inner
            i += 1

    current_url = url_for('standard',
                          start=str(start),
                          q=query,
                          fq=fq,
                          active=active)
    results_url = url_for('standard',
                          start=str(start),
                          q=query,
                          fq=fq,
                          active="Results")
    historical_url = url_for('standard',
                             start=str(start),
                             q=query,
                             fq=fq,
                             active="Historical")
    next_url = url_for('standard',
                       start=str(next_start),
                       q=query,
                       fq=fq,
                       active=active)
    prev_url = url_for('standard',
                       start=str(prev_start),
                       q=query,
                       fq=fq,
                       active=active)
    app.logger.info("Next: " + next_url)
    return render_template('standard.jinja2',
                           name=name,
                           search_results=results,
                           fq=fq,
                           the_user=user,
                           grouped=grouped,
                           active=active,
                           filter_urls=filter_urls,
                           raw_response=response,
                           start=start,
                           current_url=current_url,
                           historical_url=historical_url,
                           results_url=results_url,
                           the_facets=facets,
                           the_stats=stats,
                           the_query=query,
                           current_page=current_page_number,
                           next_url=next_url,
                           prev_url=prev_url,
                           the_page_count=page_count,
                           highlights=highlights,
                           users=users,
                           sort_criteria=sort_criteria)
示例#21
0
def update(args, SETTINGS_URL):
    """modify settings"""

    data = parse_opts(args)
    json_http(SETTINGS_URL, method='PUT', data=data)
    print "Updated Settings"