Exemplo n.º 1
0
def populateMetadata():
    import pysharkbite
    conf = pysharkbite.Configuration()
    conf.set("FILE_SYSTEM_ROOT", "/accumulo")

    model = apps.get_model(app_label='query', model_name='AccumuloCluster')
    accumulo_cluster = model.objects.first()
    if accumulo_cluster is None:
        return
    zk = pysharkbite.ZookeeperInstance(accumulo_cluster.instance,
                                       accumulo_cluster.zookeeper, 1000, conf)
    user = pysharkbite.AuthInfo("root", "secret", zk.getInstanceId())
    connector = pysharkbite.AccumuloConnector(user, zk)

    indexTableOps = connector.tableOps("DatawaveMetadata")

    auths = pysharkbite.Authorizations()

    indexScanner = indexTableOps.createScanner(auths, 100)
    indexrange = pysharkbite.Range()

    indexScanner.addRange(indexrange)
    indexScanner.fetchColumn("f", "")

    combinertxt = ""
    ## load the combiner from the file system and send it to accumulo
    with open('countgatherer.py', 'r') as file:
        combinertxt = file.read()
    combiner = pysharkbite.PythonIterator("MetadataCounter", combinertxt, 200)
    indexScanner.addIterator(combiner)
    indexSet = indexScanner.getResultSet()
    import json
    counts = 0
    mapping = {}
    for indexKeyValue in indexSet:
        value = indexKeyValue.getValue()
        key = indexKeyValue.getKey()
        if key.getColumnFamily() == "f":
            day = key.getColumnQualifier().split("\u0000")[1]
            dt = key.getColumnQualifier().split("\u0000")[0]
            if day in mapping:
                if key.getRow() in mapping[day]:
                    try:
                        mapping[day][key.getRow()] += int(value.get())
                    except:
                        pass
                else:
                    try:
                        mapping[day][key.getRow()] = int(value.get())
                    except:
                        pass
            else:
                mapping[day] = {}
                try:
                    mapping[day][key.getRow()] = int(value.get())
                except:
                    pass
    caches['metadata'].set("field", json.dumps(mapping), 3600 * 48)
    return json.dumps(mapping)
Exemplo n.º 2
0
def getDocuments(cancellationtoken: CancellationToken, name: int,
                 lookupInformation: LookupInformation,
                 input: queue.SimpleQueue, outputQueue: queue.SimpleQueue):
    count = 0
    while cancellationtoken.running():
        docInfo = None
        try:
            try:
                if input.empty():
                    pass
                else:
                    docInfo = input.get(timeout=1)
            except:
                pass
                # Handle empty queue here
            if not docInfo is None:
                tableOps = lookupInformation.getTableOps()
                scanner = tableOps.createScanner(lookupInformation.getAuths(),
                                                 5)
                startKey = pysharkbite.Key()
                endKey = pysharkbite.Key()
                startKey.setRow(docInfo.getShard())
                docid = docInfo.getDataType() + "\x00" + docInfo.getDocId()
                startKey.setColumnFamily(docid)
                endKey.setRow(docInfo.getShard())
                endKey.setColumnFamily(docid + "\xff")
                print("Searching for " + docInfo.getShard())
                rng = pysharkbite.Range(startKey, True, endKey, True)

                scanner.addRange(rng)

                rangecount = 1

                while rangecount < 10:
                    try:
                        docInfo = input.get(False)
                        startKey = pysharkbite.Key()
                        endKey = pysharkbite.Key()
                        startKey.setRow(docInfo.getShard())
                        docid = docInfo.getDataType(
                        ) + "\x00" + docInfo.getDocId()
                        startKey.setColumnFamily(docid)
                        endKey.setRow(docInfo.getShard())
                        endKey.setColumnFamily(docid + "\xff")
                        rng = pysharkbite.Range(startKey, True, endKey, True)
                        print("Searching for " + docInfo.getShard())
                        scanner.addRange(rng)
                        rangecount = rangecount + 1

                    except:
                        rangecount = 11

                with open('jsoncombiner.py', 'r') as file:
                    combinertxt = file.read()
                    combiner = pysharkbite.PythonIterator(
                        "PythonCombiner", combinertxt, 100)
                    scanner.addIterator(combiner)
                try:
                    count = count + scanDoc(scanner, outputQueue)
                except:
                    pass
            else:
                time.sleep(0.5)

        except:
            e = sys.exc_info()[0]
    return True
Exemplo n.º 3
0
    scanner = tableOperations.createScanner(auths, 2)

    startKey = pysharkbite.Key()

    endKey = pysharkbite.Key()

    startKey.setRow("row")

    endKey.setRow("row3")

    range = pysharkbite.Range("a")

    scanner.addRange(range)

    iterator = pysharkbite.PythonIterator("PythonIterator", 100)
    iterator = iterator.onNext(
        "lambda x : sharkbite_iterator.Key( x.getRow(), 'new cf', x.getColumnQualifier()) "
    )

    scanner.addIterator(iterator)
    resultset = scanner.getResultSet()

    for keyvalue in resultset:
        key = keyvalue.getKey()
        value = keyvalue.getValue()
        print(key.getRow() + ":" + key.getColumnFamily() + ":" +
              key.getColumnQualifier() + " [" + key.getColumnVisibility() +
              "] -- " + value.get())
    """ delete your table if user did not create temp """
Exemplo n.º 4
0
    def mthd(self):

        import pysharkbite

        tableOperations = super().getTableOperations()

        if not tableOperations.exists(False):
            print("Creating table")
            if not tableOperations.create(False):
                print("Could not create table")
        else:
            print("Table already exists, so not creating it")

        auths = pysharkbite.Authorizations()
        """ Add authorizations """
        """ mutation.put("cf","cq","cv",1569786960) """

        writer = tableOperations.createWriter(auths, 10)

        mutation = pysharkbite.Mutation("sow2")

        mutation.put("cf", "cq", "", 1569786960, "value")
        mutation.put("cf2", "cq", "", 1569786960, "value")
        """ no value """
        mutation.put("cf3", "cq", "", 1569786960, "value")

        writer.addMutation(mutation)

        writer.close()

        writer = tableOperations.createWriter(auths, 10)

        rng = range(0, 1000)
        for i in rng:
            row = ("row%i" % (i + 5))
            mutation = pysharkbite.Mutation(row)
            mutation.put("cf", "cq", "", 1569786960, "value")
            writer.addMutation(mutation)

        writer.close()

        print("written")
        """ auths.addAuthorization("cv") """

        scanner = tableOperations.createScanner(auths, 2)

        accumuloRange = pysharkbite.Range("sow", True, "sow3", False)

        scanner.addRange(accumuloRange)

        iterator = pysharkbite.PythonIterator("PythonIterator", 100)
        iterator = iterator.onNext(
            "lambda x : KeyValue( Key( x.getKey().getRow(), 'new cf', x.getKey().getColumnQualifier()), Value()) "
        )
        scanner.addIterator(iterator)

        resultset = scanner.getResultSet()

        for keyvalue in resultset:
            key = keyvalue.getKey()
            assert ("sow2" == key.getRow())
            value = keyvalue.getValue()
            if "cf" == key.getColumnFamily():
                sys.exit(154)
            if "new cf" == key.getColumnFamily():
                assert ("" == value.get())

        scanner = tableOperations.createScanner(auths, 2)

        accumuloRange = pysharkbite.Range("sow", True, "sow3", False)

        scanner.addRange(accumuloRange)

        iterator = pysharkbite.PythonIterator("PythonIterator", 100)
        iterator = iterator.onNext(
            "lambda x : Key( x.getKey().getRow(), x.getKey().getColumnFamily(), 'new cq') "
        )
        scanner.addIterator(iterator)

        resultset = scanner.getResultSet()

        for keyvalue in resultset:
            key = keyvalue.getKey()
            assert ("sow2" == key.getRow())
            value = keyvalue.getValue()
            if "cq" == key.getColumnQualifier():
                sys.exit(154)
        """ delete your table if user did not create temp """

        tableOperations.remove()
Exemplo n.º 5
0
def pouplateEventCountMetadata():
    import pysharkbite
    import time
    conf = pysharkbite.Configuration()
    conf.set("FILE_SYSTEM_ROOT", "/accumulo")
    model = apps.get_model(app_label='query', model_name='AccumuloCluster')
    accumulo_cluster = model.objects.first()
    if accumulo_cluster is None:
        return
    zoo_keeper = pysharkbite.ZookeeperInstance(accumulo_cluster.instance,
                                               accumulo_cluster.zookeeper,
                                               1000, conf)
    user = pysharkbite.AuthInfo("root", "secret", zoo_keeper.getInstanceId())
    connector = pysharkbite.AccumuloConnector(user, zoo_keeper)
    queryRanges = list()
    #last 15 days
    for dateinrange in getDateRange(-15):
        shardbegin = dateinrange.strftime("%Y%m%d")
        if caches['eventcount'].get(shardbegin) is None or caches[
                'eventcount'].get(shardbegin) == 0:
            queryRanges.append(shardbegin)
        else:
            pass  # don't add to range

    if len(queryRanges) > 0:
        ## all is cached

        user = pysharkbite.AuthInfo("root", "secret",
                                    zoo_keeper.getInstanceId())
        connector = pysharkbite.AccumuloConnector(user, zoo_keeper)

        indexTableOps = connector.tableOps("DatawaveMetrics")

        auths = pysharkbite.Authorizations()
        auths.addAuthorization("MTRCS")

        indexScanner = indexTableOps.createScanner(auths, 100)
        start = time.time()
        for dt in queryRanges:
            indexrange = pysharkbite.Range(dt, True, dt + "\uffff", False)
            indexScanner.addRange(indexrange)
        indexScanner.fetchColumn("EVENT_COUNT", "")

        combinertxt = ""
        ## load the combiner from the file system and send it to accumulo
        with open('metricscombiner.py', 'r') as file:
            combinertxt = file.read()
        combiner = pysharkbite.PythonIterator("MetadataCounter", combinertxt,
                                              200)
        indexScanner.addIterator(combiner)
        indexSet = indexScanner.getResultSet()

        counts = 0
        mapping = {}
        try:
            for indexKeyValue in indexSet:
                value = indexKeyValue.getValue()
                key = indexKeyValue.getKey()
                if key.getColumnFamily() == "EVENT_COUNT":
                    dt = key.getRow().split("_")[0]
                    if dt in mapping:
                        mapping[dt] += int(value.get())
                    else:
                        mapping[dt] = int(value.get())
            arr = [None] * len(mapping.keys())
            for field in mapping:
                caches['eventcount'].set(field, str(mapping[field]), 3600 * 48)
        except:
            pass