Exemplos de Elastic em Python, exemplos de Elastic.Elastic em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: Rally.py Projeto: portante/browbeat

 def __init__(self, config, hosts=None):
     self.logger = logging.getLogger('browbeat.Rally')
     self.config = config
     self.tools = Tools(self.config)
     self.connmon = Connmon(self.config)
     self.grafana = Grafana(self.config)
     self.elastic = Elastic(self.config)
     self.error_count = 0
     self.pass_count = 0
     self.test_count = 0
     self.scenario_count = 0

Exemplo n.º 2

0

Exibir arquivo

Arquivo: Execute.py Projeto: softwarethreads/ca

class Execute:
    def __init__(self):
        self.classification = Classification()
        self.extraction = Extraction()
        self.elastic = Elastic()
        self.z = Database()

    def execute(self, query):
        try:
            json_data = json.loads(query)
            prediction = self.classification.predict(json_data['query'])
            results = self.extraction.processQuery(prediction[0][0],
                                                   json_data['query'])
            if results[0] == 0.0:
                status = self.elastic.process0(results)
                return status
            if results[0] == 1.0:
                houses = self.elastic.process1(results)
                return houses
            elif results[0] == 2.0:
                houses = self.elastic.process2(results)
                return houses
            elif results[0] == 3.0:
                houses = self.elastic.process3(results)
                return houses
            elif results[0] == 4.0:
                houses = self.elastic.process4(results)
                return houses
            elif results[0] == 5.0:
                houses = self.elastic.process5(results)
                return houses
            elif results[0] == 6.0:
                houses = self.elastic.process6(results)
                return houses
            elif results[0] == 7.0:
                houses = self.elastic.process7(results)
                return houses
            elif results[0] == 8.0:
                status = self.elastic.process8(results)
                return status
            elif results[0] == 9.0:
                status = self.elastic.process9(results)
                return status
            else:
                return "query type " + str(results[0]) + "not supported"
        except Exception as ex:
            print(str(ex))

Exemplo n.º 3

0

Exibir arquivo

Arquivo: Initialize.py Projeto: softwarethreads/ca

class Initialize:
    def __init__(self):
        self.index_name = None
        self.conn = None
        self.es = Elasticsearch()
        self.db = Database()
        self.elastic = Elastic()

    def create_db_connection(self, db_file):
        try:
            self.conn = sqlite3.connect(db_file)
            print("Connected to database successfully")
        except Error as e:
            print(e)

    def create_db_table(self):
        create_table = """ CREATE TABLE IF NOT EXISTS zipcodes (
                                            zipcode text primary key,
                                            download_date text
                                        ); """
        if self.conn is not None:
            self.conn.execute(create_table)
        else:
            print("Error! cannot create the database connection.")
        print("Created db table")

    def create_es_connection(self):
        self.es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
        if self.es.ping():
            print('Elasticsearch Yay Connect')
        else:
            print('Awww it could not connect!')

    def create_es_index(self, zipcode):
        created = False
        # index settings
        settings = {
            "settings": {
                "number_of_shards": 1,
                "number_of_replicas": 0
            }
        }
        try:
            index_name = 'listings_' + str(zipcode)
            if not self.db.exists_db(zipcode):
                # Ignore 400 means to ignore "Index Already Exist" error.
                self.es.indices.create(index_name, ignore=400, body=settings)
                print('Elasticsearch created index', str(index_name))
                records = self.elastic.load_es(zipcode)
                self.db.insert_db(zipcode, time.time())
                print("Loaded ", len(records), " into Elasticsearch")
            created = True
        except Exception as ex:
            print(str(ex))
        finally:
            return created

Exemplo n.º 4

0

Exibir arquivo

Arquivo: index_fedweb13.py Projeto: wsgan001/ecir2017-fusion

def fedweb13_index(index_name, output_file):

    elastic = Elastic(index_name)
    mappings = {
        # "id": Elastic.notanalyzed_field(),
        "title": Elastic.analyzed_field(),
        "content": Elastic.analyzed_field()
    }

    elastic.create_index(mappings=mappings, force=True)

    maindir = "/data/collections/fedwebgh/search_data/fedweb13/FW13-sample-search"
    num_doc = 0
    f_feb = open(output_file, "w")
    for d in os.listdir(maindir):
        if d.startswith("md"):  #ignore md5.txt
            continue
        inpath = os.path.join(maindir, d)
        tar = tarfile.open(inpath, "r")
        tar.extractall()
        docs = {}
        for member_info in tar.getmembers():
            if len(member_info.name) > 23:  #get file instead of folder
                f = open(member_info.name)
                soup = BeautifulSoup(f, "lxml")
                for snippets in soup.find_all("snippet"):
                    doc_dict = {}
                    doc_id = snippets.get("id").lower()
                    f_feb.write(doc_id + " " + "FW13-" + doc_id[5:9] +
                                "\n")  # write output file
                    try:
                        title = snippets.title.text
                    except:
                        title = ""
                    try:
                        content = snippets.description.text
                    except:
                        content = snippets
                    doc_dict["title"] = title
                    doc_dict["content"] = content
                    docs[doc_id] = doc_dict
                    num_doc += 1
        "continous update docs and add into index"
        elastic.add_docs_bulk(docs)
        print("finish parse and index for file: ", inpath)
    print(num_doc, " indexed")

Exemplo n.º 5

0

Exibir arquivo

def make_property(paramters):
    """
    Make an instance of Property
    """
    prop_type = paramters['type']
    if prop_type == 'eos':
        return EOS(paramters)
    elif prop_type == 'elastic':
        return Elastic(paramters)
    elif prop_type == 'vacancy':
        return Vacancy(paramters)
    elif prop_type == 'interstitial':
        return Interstitial(paramters)
    elif prop_type == 'surface':
        return Surface(paramters)
    else:
        raise RuntimeError(f'unknown property type {prop_type}')

Exemplo n.º 6

0

Exibir arquivo

Arquivo: Rally.py Projeto: portante/browbeat

class Rally(WorkloadBase):

    def __init__(self, config, hosts=None):
        self.logger = logging.getLogger('browbeat.Rally')
        self.config = config
        self.tools = Tools(self.config)
        self.connmon = Connmon(self.config)
        self.grafana = Grafana(self.config)
        self.elastic = Elastic(self.config)
        self.error_count = 0
        self.pass_count = 0
        self.test_count = 0
        self.scenario_count = 0

    def run_scenario(self, task_file, scenario_args, result_dir, test_name, benchmark):
        self.logger.debug("--------------------------------")
        self.logger.debug("task_file: {}".format(task_file))
        self.logger.debug("scenario_args: {}".format(scenario_args))
        self.logger.debug("result_dir: {}".format(result_dir))
        self.logger.debug("test_name: {}".format(test_name))
        self.logger.debug("--------------------------------")

        from_ts = int(time.time() * 1000)
        if 'sleep_before' in self.config['rally']:
            time.sleep(self.config['rally']['sleep_before'])
        task_args = str(scenario_args).replace("'", "\"")
        plugins = []
        if "plugins" in self.config['rally']:
            if len(self.config['rally']['plugins']) > 0:
                for plugin in self.config['rally']['plugins']:
                    for name in plugin:
                        plugins.append(plugin[name])
        plugin_string = ""
        if len(plugins) > 0:
            plugin_string = "--plugin-paths {}".format(",".join(plugins))
        cmd = "source {}; ".format(self.config['rally']['venv'])
        cmd += "rally {} task start {} --task-args \'{}\' 2>&1 | tee {}.log".format(
            plugin_string, task_file,task_args, test_name)
        from_time = time.time()
        self.tools.run_cmd(cmd)
        to_time = time.time()
        if 'sleep_after' in self.config['rally']:
            time.sleep(self.config['rally']['sleep_after'])
        to_ts = int(time.time() * 1000)
        self.grafana.create_grafana_urls({'from_ts':from_ts, 'to_ts':to_ts})
        self.grafana.print_dashboard_url(test_name)
        self.grafana.log_snapshot_playbook_cmd(from_ts, to_ts, result_dir, test_name)
        self.grafana.run_playbook(from_ts, to_ts, result_dir, test_name)
        return (from_time, to_time)

    def update_tests(self):
        self.test_count += 1

    def update_pass_tests(self):
        self.pass_count += 1

    def update_fail_tests(self):
        self.error_count += 1

    def update_scenarios(self):
        self.scenario_count += 1

    def get_task_id(self, test_name):
        cmd = "grep \"rally task results\" {}.log | awk '{{print $4}}'".format(
            test_name)
        return self.tools.run_cmd(cmd)

    def _get_details(self):
        self.logger.info(
            "Current number of Rally scenarios executed:{}".format(
                self.scenario_count))
        self.logger.info("Current number of Rally tests executed:{}".format(self.test_count))
        self.logger.info("Current number of Rally tests passed:{}".format(self.pass_count))
        self.logger.info("Current number of Rally test failures:{}".format(self.error_count))

    def gen_scenario_html(self, task_ids, test_name):
        all_task_ids = ' '.join(task_ids)
        cmd = "source {}; ".format(self.config['rally']['venv'])
        cmd += "rally task report --task {} --out {}.html".format(
            all_task_ids, test_name)
        return self.tools.run_cmd(cmd)

    def gen_scenario_json(self, task_id):
        cmd = "source {}; ".format(self.config['rally']['venv'])
        cmd += "rally task results {}".format(task_id)
        return self.tools.run_cmd(cmd)

    def gen_scenario_json_file(self, task_id, test_name):
        cmd = "source {}; ".format(self.config['rally']['venv'])
        cmd += "rally task results {} > {}.json".format(task_id, test_name)
        return self.tools.run_cmd(cmd)

    def rally_metadata(self, result, meta) :
        result['rally_metadata'] = meta
        return result

    def json_result(self,task_id):
        rally_data = {}
        rally_errors = []
        rally_sla = []
        self.logger.info("Loadding Task_ID {} JSON".format(task_id))
        rally_json = self.elastic.load_json(self.gen_scenario_json(task_id))
        if len(rally_json) < 1 :
            self.logger.error("Issue with Rally Results")
            return False
        for metrics in rally_json[0]['result']:
            for workload in metrics :
                if type(metrics[workload]) is dict:
                    for value in metrics[workload] :
                        if not type(metrics[workload][value]) is list:
                            if value not in rally_data:
                                rally_data[value] = []
                            rally_data[value].append(metrics[workload][value])
            if len(metrics['error']) > 0 :
                rally_errors.append({'action_name': value,
                                     'error': metrics['error']})
        rally_doc = []
        for workload in rally_data:
            if not type(rally_data[workload]) is dict :
                rally_stats = {'action': workload,
                               '90th':numpy.percentile(rally_data[workload], 90),
                               '95th':numpy.percentile(rally_data[workload], 95),
                               'Max':numpy.max(rally_data[workload]),
                               'Min':numpy.min(rally_data[workload]),
                               'Average':numpy.average(rally_data[workload]),
                               'Median':numpy.median(rally_data[workload]),
                               'Raw':rally_data[workload]}
                rally_doc.append(rally_stats)

        return {'rally_stats' : rally_doc,
                'rally_errors' : rally_errors,
                'rally_setup' : rally_json[0]['key']}

    def start_workloads(self):
        """Iterates through all rally scenarios in browbeat yaml config file"""
        results = OrderedDict()
        self.logger.info("Starting Rally workloads")
        es_ts = datetime.datetime.now()
        dir_ts = es_ts.strftime("%Y%m%d-%H%M%S")
        self.logger.debug("Time Stamp (Prefix): {}".format(dir_ts))
        benchmarks = self.config.get('rally')['benchmarks']
        if len(benchmarks) > 0:
            for benchmark in benchmarks:
                if benchmark['enabled']:
                    self.logger.info("Benchmark: {}".format(benchmark['name']))
                    scenarios = benchmark['scenarios']
                    def_concurrencies = benchmark['concurrency']
                    def_times = benchmark['times']
                    self.logger.debug(
                        "Default Concurrencies: {}".format(def_concurrencies))
                    self.logger.debug("Default Times: {}".format(def_times))
                    for scenario in scenarios:
                        if scenario['enabled']:
                            self.update_scenarios()
                            self.update_total_scenarios()
                            scenario_name = scenario['name']
                            scenario_file = scenario['file']
                            self.logger.info(
                                "Running Scenario: {}".format(scenario_name))
                            self.logger.debug(
                                "Scenario File: {}".format(scenario_file))

                            del scenario['enabled']
                            del scenario['file']
                            del scenario['name']
                            if len(scenario) > 0:
                                self.logger.debug(
                                    "Overriding Scenario Args: {}".format(scenario))

                            result_dir = self.tools.create_results_dir(
                                self.config['browbeat'][
                                    'results'], dir_ts, benchmark['name'],
                                scenario_name)
                            self.logger.debug("Created result directory: {}".format(result_dir))
                            workload = self.__class__.__name__
                            self.workload_logger(result_dir, workload)

                            # Override concurrency/times
                            if 'concurrency' in scenario:
                                concurrencies = scenario['concurrency']
                                del scenario['concurrency']
                            else:
                                concurrencies = def_concurrencies
                            if 'times' not in scenario:
                                scenario['times'] = def_times

                            for concurrency in concurrencies:
                                scenario['concurrency'] = concurrency
                                for run in range(self.config['browbeat']['rerun']):
                                    if run not in results:
                                        results[run] = []
                                    self.update_tests()
                                    self.update_total_tests()
                                    test_name = "{}-browbeat-{}-{}-iteration-{}".format(
                                        dir_ts, scenario_name, concurrency, run)

                                    if not result_dir:
                                        self.logger.error(
                                            "Failed to create result directory")
                                        exit(1)

                                    # Start connmon before rally
                                    if self.config['connmon']['enabled']:
                                        self.connmon.start_connmon()

                                    from_time,to_time = self.run_scenario(
                                        scenario_file, scenario, result_dir, test_name,
                                        benchmark['name'])

                                    # Stop connmon at end of rally task
                                    if self.config['connmon']['enabled']:
                                        self.connmon.stop_connmon()
                                        try:
                                            self.connmon.move_connmon_results(
                                                result_dir, test_name)
                                        except:
                                            self.logger.error(
                                                "Connmon Result data missing, \
                                                Connmon never started")
                                            return False
                                        self.connmon.connmon_graphs(result_dir, test_name)
                                    new_test_name = test_name.split('-')
                                    new_test_name = new_test_name[3:]
                                    new_test_name = "-".join(new_test_name)

                                    # Find task id (if task succeeded in
                                    # running)
                                    task_id = self.get_task_id(test_name)
                                    if task_id:
                                        self.logger.info(
                                            "Generating Rally HTML for task_id : {}".
                                            format(task_id))
                                        self.gen_scenario_html([task_id], test_name)
                                        self.gen_scenario_json_file(task_id, test_name)
                                        results[run].append(task_id)
                                        self.update_pass_tests()
                                        self.update_total_pass_tests()
                                        self.get_time_dict(
                                            to_time, from_time, benchmark['name'], new_test_name,
                                            workload, "pass")
                                        if self.config['elasticsearch']['enabled'] :
                                            # Start indexing
                                            result_json = self.json_result(task_id)
                                            _meta = {'taskid' : task_id,
                                                     'timestamp': es_ts,
                                                     'workload' : {
                                                         'name' : benchmark['name'],
                                                         'scenario' : scenario_name,
                                                         'times' : scenario['times'],
                                                         'concurrency' : scenario['concurrency']},
                                                     'grafana': self.grafana.grafana_urls()
                                                     }
                                            if result_json :
                                                result = self.elastic.combine_metadata(
                                                    self.rally_metadata(result_json,_meta))
                                                if result is False :
                                                    self.logger.error
                                                    ("Error with ElasticSerach connector")
                                                else :
                                                    if len(result) < 1 :
                                                        self.logger.error(
                                                            "Issue with ElasticSearch Data, \
                                                            for task_id {}".format(task_id))
                                                    else :
                                                        self.elastic.index_result(result,
                                                                                  _id=task_id)
                                    else:
                                        self.logger.error("Cannot find task_id")
                                        self.update_fail_tests()
                                        self.update_total_fail_tests()
                                        self.get_time_dict(
                                            to_time, from_time, benchmark['name'], new_test_name,
                                            workload, "fail")

                                    for data in glob.glob("./{}*".format(test_name)):
                                        shutil.move(data, result_dir)

                                    self._get_details()

                        else:
                            self.logger.info(
                                "Skipping {} scenario enabled: false".format(scenario['name']))
                else:
                    self.logger.info(
                        "Skipping {} benchmarks enabled: false".format(benchmark['name']))
            self.logger.debug("Creating Combined Rally Reports")
            for run in results:
                combined_html_name = 'all-rally-run-{}'.format(run)
                self.gen_scenario_html(results[run], combined_html_name)
                if os.path.isfile('{}.html'.format(combined_html_name)):
                    shutil.move('{}.html'.format(combined_html_name),
                                '{}/{}'.format(self.config['browbeat']['results'], dir_ts))
        else:
            self.logger.error("Config file contains no rally benchmarks.")

Exemplo n.º 7

0

Exibir arquivo

Arquivo: nova-create-pbench-uperf.py Projeto: arcolife/browbeat

    def nova_create_pbench_uperf(
            self,
            image,
            flavor,
            zones,
            user,
            test_types,
            protocols,
            samples,
            external,
            test_name,
            send_results=True,
            num_pairs=1,
            password="",
            message_sizes=None,
            instances=None,
            elastic_host=None,
            elastic_port=None,
            cloudname=None,
            **kwargs):

        pbench_path = "/opt/pbench-agent"
        pbench_results = "/var/lib/pbench-agent"

        # Create env
        router = self._create_router({}, external_gw=external)
        network = self._create_network({})
        subnet = self._create_subnet(network, {})
        kwargs["nics"] = [{'net-id': network['network']['id']}]
        self._add_interface_router(subnet['subnet'], router['router'])

        # Launch pbench-jump-host
        jh, jip = self._boot_server_with_fip(image,
                                             flavor,
                                             use_floating_ip=True,
                                             floating_network=external['name'],
                                             key_name=self.context["user"]["keypair"]["name"],
                                             **kwargs)

        servers = []
        clients = []
        # Launch Guests
        if num_pairs is 1:
            server = self._boot_server(
                image,
                flavor,
                key_name=self.context["user"]["keypair"]["name"],
                availability_zone=zones['server'],
                **kwargs)
            client = self._boot_server(
                image,
                flavor,
                key_name=self.context["user"]["keypair"]["name"],
                availability_zone=zones['client'],
                **kwargs)

            # IP Addresses
            servers.append(
                str(server.addresses[network['network']['name']][0]["addr"]))
            clients.append(
                str(client.addresses[network['network']['name']][0]["addr"]))
        else:
            for i in range(num_pairs):
                server = self._boot_server(
                    image,
                    flavor,
                    key_name=self.context["user"]["keypair"]["name"],
                    availability_zone=zones['server'],
                    **kwargs)
                client = self._boot_server(
                    image,
                    flavor,
                    key_name=self.context["user"]["keypair"]["name"],
                    availability_zone=zones['client'],
                    **kwargs)

                # IP Addresses
                servers.append(
                    str(server.addresses[network['network']['name']][0]["addr"]))
                clients.append(
                    str(client.addresses[network['network']['name']][0]["addr"]))

        # Wait for ping
        self._wait_for_ping(jip['ip'])

        # Open SSH Connection
        jump_ssh = sshutils.SSH(user, jip['ip'], 22, self.context[
                                "user"]["keypair"]["private"], password)

        # Check for connectivity
        self._wait_for_ssh(jump_ssh)

        # Write id_rsa to get to guests.
        self._run_command_over_ssh(jump_ssh, {'remote_path': "rm -rf ~/.ssh"})
        self._run_command_over_ssh(jump_ssh, {'remote_path': "mkdir ~/.ssh"})
        jump_ssh.run(
            "cat > ~/.ssh/id_rsa",
            stdin=self.context["user"]["keypair"]["private"])

        jump_ssh.execute("chmod 0600 ~/.ssh/id_rsa")

        # Check status of guest
        ready = False
        retry = 10
        while (not ready):
            for sip in servers + clients:
                cmd = "ssh -o StrictHostKeyChecking=no {}@{} /bin/true".format(
                    user, sip)
                s1_exitcode, s1_stdout, s1_stderr = jump_ssh.execute(cmd)
                if retry < 1:
                    LOG.error(
                        "Error : Issue reaching {} the guests through the Jump host".format(sip))
                    return 1
                if s1_exitcode is 0:
                    LOG.info("Server: {} ready".format(sip))
                    ready = True
                else:
                    LOG.info("Error reaching server: {} error {}".format(sip,s1_stderr))
                    retry = retry - 1
                    time.sleep(10)

        # Register pbench across FIP
        for sip in servers + clients:
            cmd = "{}/util-scripts/pbench-register-tool-set --remote={}".format(
                pbench_path, sip)
            jump_ssh.execute(cmd)

        # Quick single test
        # debug = "--message-sizes=1024 --instances=1"
        debug = ""

        # Start uperf against private address
        uperf = "{}/bench-scripts/pbench-uperf --clients={} --servers={} --samples={} {}".format(
            pbench_path, ','.join(clients), ','.join(servers), samples, debug)
        uperf += " --test-types={} --protocols={} --config={}".format(
            test_types,
            protocols,
            test_name)

        if message_sizes is not None :
            uperf += " --message-sizes={}".format(message_sizes)

        if instances is not None:
            uperf += " --instances={}".format(instances)

        # Execute pbench-uperf
        # execute returns, exitcode,stdout,stderr
        LOG.info("Starting Rally - PBench UPerf")
        uperf_exitcode, stdout_uperf, stderr = jump_ssh.execute(uperf)

        # Prepare results
        cmd = "cat {}/uperf_{}*/result.csv".format(pbench_results, test_name)
        exitcode, stdout, stderr = jump_ssh.execute(cmd)

        if send_results :
            if uperf_exitcode is not 1:
                cmd = "cat {}/uperf_{}*/result.json".format(
                    pbench_results, test_name)
                LOG.info("Running command : {}".format(cmd))
                exitcode, stdout_json, stderr = jump_ssh.execute(cmd)
                LOG.info("Result: {}".format(stderr))

                es_ts = datetime.datetime.utcnow()
                config = {
                    'elasticsearch': {
                        'host': elastic_host, 'port': elastic_port}, 'browbeat': {
                        'cloud_name': cloudname, 'timestamp': es_ts}}
                elastic = Elastic(config, 'pbench')
                json_result = StringIO.StringIO(stdout_json)
                json_data = json.load(json_result)
                for iteration in json_data:
                    elastic.index_result(iteration,test_name,'results/')
            else:
                LOG.error("Error with PBench Results")

        # Parse results
        result = StringIO.StringIO('\n'.join(stdout.split('\n')[1:]))
        creader = csv.reader(result)
        report = []
        for row in creader:
            if len(row) >= 1:
                report.append(["aggregate.{}".format(row[1]), float(row[2])])
                report.append(["single.{}".format(row[1]), float(row[3])])
        if len(report) > 0:
            self.add_output(
                additive={"title": "PBench UPerf Stats",
                          "description": "PBench UPerf Scenario",
                          "chart_plugin": "StatsTable",
                          "axis_label": "Gbps",
                          "label": "Gbps",
                          "data": report})

        cmd = "{}/util-scripts/pbench-move-results".format(pbench_path)
        self._run_command_over_ssh(jump_ssh, {"remote_path": cmd})

Exemplo n.º 8

0

Exibir arquivo

Arquivo: index-cerc.py Projeto: wsgan001/ecir2017-fusion

def CERC_index(index_name):

    elastic = Elastic(index_name)
    mappings = {
        # "id": Elastic.notanalyzed_field(),
        "title": Elastic.analyzed_field(),
        "content": Elastic.analyzed_field()
    }

    elastic.create_index(mappings=mappings, force=True)

    maindir = "/data/collections/cerc/csiro-corpus"
    num_doc = 0
    for d in os.listdir(maindir):
        inpath = os.path.join(maindir, d)
        infile = open(inpath, mode="r", errors="ignore")
        docs = {}
        isParse = False
        for line in infile.readlines():
            #charset = "utf-8"
            line = line.lower()
            if ("<docno>") in line:  # get doc id
                docno = re.sub(("<docno>|</docno>|\n"), "", line.strip(" "))

            elif ("</dochdr>") in line:  # start to parse
                isParse = True
                doc_dict = {}
                doc = ""
            elif ("</doc>") in line:  # finish parse
                isParse = False
                try:
                    soup = BeautifulSoup(doc, "lxml")
                    try:
                        title = soup.title.text
                    except:  # if there is no title, use an empty string instead
                        title = ""
                    [script.extract() for script in soup.findAll("script")]
                    [style.extract() for style in soup.findAll("style")]
                    soup.prettify()
                    reg1 = re.compile("<[^>]*>")
                    content = reg1.sub('', soup.prettify())
                    doc_dict["title"] = title
                    doc_dict["content"] = content
                    docs[docno] = doc_dict
                    num_doc += 1

                except:
                    # other files apart from html
                    title = ""
                    content = doc
                    print(content)
                    doc_dict["title"] = title
                    doc_dict["content"] = content
                    docs[docno] = doc_dict
                    num_doc += 1

            elif isParse:
                # parse doc
                doc += line

        "continous update docs and add into index"
        elastic.add_docs_bulk(docs)
        print("finish parse and index for file: ", inpath)

    print(num_doc, " documents indexed")

Exemplo n.º 9

0

Exibir arquivo

from GetCourses import GetCourses

from Elastic import Elastic

el = Elastic()

print("server is ready")

query = GetCourses()

print(query)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: Initialize.py Projeto: softwarethreads/ca

 def __init__(self):
     self.index_name = None
     self.conn = None
     self.es = Elasticsearch()
     self.db = Database()
     self.elastic = Elastic()

Exemplo n.º 11

0

Exibir arquivo

Arquivo: index-data.py Projeto: weshayutin/quads

                if re.findall(r'(\d+\.\d+\.\d+\.\d+)',line):
                    continue
                else :
                    hosts.append(line)
    if len(hosts) < 1 :
        print "Error no hosts could be read in."
        exit(1)

    payload = { "message": _result,
                "hosts": list(set(hosts)),
                }
    if options.owner :
        payload["owner"] = options.owner
    if options.cloud :
        payload["cloud"] = options.cloud
    if options.ticket :
        payload["ticket"] = options.ticket

    if not options.index :
        print "Missing index"
        exit(1)
    if not options.type :
        print "Missing type"
        exit(1)

    es = Elastic(quads_config['elastic_host'],quads_config['elastic_port'])
    es.index(payload,options.index,options.type)

if __name__ == "__main__":
    main(sys.argv[1:])

Exemplo n.º 12

0

Exibir arquivo

Arquivo: index.py Projeto: wsgan001/ecir2017-fusion

def parse_and_index(index_name):
    
    elastic = Elastic(index_name)

    mappings = {
        # "id": Elastic.notanalyzed_field(),
        "title": Elastic.analyzed_field(),
        "content": Elastic.analyzed_field()
    }
    
    elastic.create_index(mappings=mappings, force=True)
    
    maindir = "/data/collections/Blogs06"
    
    # list directories
    stage1 = '20051230'
    stage2 = '20060129'
    for d in sorted(os.listdir(maindir)):
        if not d.startswith("200"):
            continue
        dirname = os.path.join(maindir, d)
        ## stage1: Index first 24 directories by removing the next three "#"
        if dirname[-8:] > stage1:
            continue
        print(dirname,int(dirname[-8:]))
        
        # stage2: Index the middle 24 directories by removing the next three "#"
        #if dirname[-8:] <= stage1 or dirname[-8:]>stage2:
         #   continue
        #print(dirname,int(dirname[-8:]))
        
        # stage3: Index the last 23 directories by removing the next three #
        #if dirname[-8:] <= stage2:
            #continue
        #print(dirname,int(dirname[-8:]))

        for f in os.listdir(dirname):
            if f.startswith("permalinks-"):
                inpath = os.path.join(dirname, f)
                print(inpath)
    
                infile = gzip.GzipFile(inpath, "r")
                isParse = False
                docs = {}
                for line in infile:
                    charset = "utf-8"
                    line = line.lower().decode(charset)#bytes to string
                    if ("<docno>") in line: # get doc id
                        docno = re.sub(("<docno>|</docno>|\n"),"",line.strip(" "))
                
                    elif ("charset") in line and (">") not in line:  #encode method             
                        charset = line[33:]
                
                    elif ("</dochdr>") in line: # start to parse
                        isParse = True
                        doc_dict = {}
                        doc = ""
                
                    elif ("</doc>") in line: # finish parse
                        isParse = False
                        try:
                            soup = BeautifulSoup(doc,'lxml')
                            try:
                                title = soup.title.text
                            except: # if there is no title, use an empty string instead
                                title = ""
                            [script.extract() for script in soup.findAll('script')]
                            [style.extract() for style in soup.findAll('style')]
                            soup.prettify()
                            reg1 = re.compile("<[^>]*>")
                            content = reg1.sub('',soup.prettify())
                            doc_dict["title"] = title
                            doc_dict["content"] = content
                            docs[docno] = doc_dict               
                            #print (docno)
                            #print (content)
                        except:
                            # files beautifulsoup can not handle
                            title = ""
                            content = doc
                            doc_dict["title"] = title
                            doc_dict["content"] = content                    
                            docs[docno] = doc_dict                            
                    
                    elif isParse:
                        # parse doc
                        doc += line
                
                "continous update docs and add into index"
                elastic.add_docs_bulk(docs)
                print ("finish parse and index for file: ",inpath)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: pbench-uperf.py Projeto: arcolife/browbeat

    def pbench_uperf(
            self,
            image,
            flavor,
            user,
            test_types,
            protocols,
            samples,
            test_name,
            external=None,
            send_results=True,
            num_pairs=1,
            password="",
            network_id=None,
            zones=None,
            message_sizes=None,
            instances=None,
            elastic_host=None,
            elastic_port=None,
            cloudname=None,
            **kwargs):

        pbench_path = "/opt/pbench-agent"
        pbench_results = "/var/lib/pbench-agent"

        # Create env
        if not network_id:
            router = self._create_router({}, external_gw=external)
            network = self._create_network({})
            subnet = self._create_subnet(network, {})
            kwargs["nics"] = [{'net-id': network['network']['id']}]
            self._add_interface_router(subnet['subnet'], router['router'])
        else:
            kwargs["nics"] = [{'net-id': network_id}]

        jump_ssh, jump_host_ip, jump_host = self.build_jump_host(
            external, image, flavor, user, **kwargs)
        _clients, _servers = self.create_guest_pairs(
            jump_ssh, num_pairs, image, flavor, user, zones, **kwargs)

        # Register pbench across FIP
        for sip in _servers + _clients:
            cmd = "{}/util-scripts/pbench-register-tool-set --remote={}".format(
                pbench_path, sip)
            exitcode, stdout, stderr = jump_ssh.execute(cmd)

        # Start uperf against private address
        uperf = "{}/bench-scripts/pbench-uperf --clients={} --servers={} --samples={}".format(
            pbench_path, ','.join(_clients), ','.join(_servers), samples)
        uperf += " --test-types={} --protocols={} --config={}".format(
            test_types,
            protocols,
            test_name)

        if message_sizes is not None:
            uperf += " --message-sizes={}".format(message_sizes)

        if instances is not None:
            uperf += " --instances={}".format(instances)

        # Execute pbench-uperf
        # execute returns, exitcode,stdout,stderr
        LOG.info("Starting Rally - PBench UPerf")
        uperf_exitcode, stdout_uperf, stderr = jump_ssh.execute(uperf)

        # Prepare results
        cmd = "cat {}/uperf_{}*/result.csv".format(pbench_results, test_name)
        exitcode, stdout, stderr = jump_ssh.execute(cmd)
        if exitcode is 1:
            return False

        if send_results:
            if uperf_exitcode is not 1:
                cmd = "cat {}/uperf_{}*/result.json".format(
                    pbench_results, test_name)
                LOG.info("Running command : {}".format(cmd))
                exitcode, stdout_json, stderr = jump_ssh.execute(cmd)
                LOG.info("Result: {}".format(stderr))

                es_ts = datetime.datetime.utcnow()
                config = {
                    'elasticsearch': {
                        'host': elastic_host,
                        'port': elastic_port},
                    'browbeat': {
                        'cloud_name': cloudname,
                        'timestamp': es_ts,
                        'num_pairs': num_pairs}}
                elastic = Elastic(config, 'pbench')
                json_result = StringIO.StringIO(stdout_json)
                json_data = json.load(json_result)
                for iteration in json_data:
                    elastic.index_result(iteration, test_name, 'results/')
            else:
                LOG.error("Error with PBench Results")

        # Parse results
        result = StringIO.StringIO('\n'.join(stdout.split('\n')[1:]))
        creader = csv.reader(result)
        report = []
        for row in creader:
            if len(row) >= 1:
                report.append(["aggregate.{}".format(row[1]), float(row[2])])
                report.append(["single.{}".format(row[1]), float(row[3])])
        if len(report) > 0:
            self.add_output(
                additive={"title": "PBench UPerf Stats",
                          "description": "PBench UPerf Scenario",
                          "chart_plugin": "StatsTable",
                          "axis_label": "Gbps",
                          "label": "Gbps",
                          "data": report})

        cmd = "{}/util-scripts/pbench-move-results".format(pbench_path)
        self._run_command_over_ssh(jump_ssh, {"remote_path": cmd})

Exemplo n.º 14

0

Exibir arquivo

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = CUDA_VISIBLE_DEVICES
logger = logging.getLogger('tf-pose')
logger.setLevel(logging.DEBUG)
# file_log = logging.FileHandler("TfPoseEstimator.log")
# file_log.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
# file_log.setFormatter(formatter)
ch.setFormatter(formatter)
logger.addHandler(ch)
# logger.addHandler(file_log)
if ES_ON:
    handler = Elastic()
    logger.addHandler(handler)


def save_to_kafka(now, person_num, is_fall, url, producer, picture):
    """
    保存信息到kafka,字段含义如下
    :param now:
    :param person_num:
    :param is_fall:
    :param url:
    :param producer:
    :param picture:
    :return:
    """
    logger.debug('Upload message save to kafka')

Exemplo n.º 15

0

Exibir arquivo

Arquivo: custom_store.py Projeto: bevywise-networks/mqttroute-elasticsearch-connector

#
# custom_store.py
#
# The custom data store hook for the Big Data Storage.
# The Custom data hook can be enabled in the broker.conf
# inside conf/ folder.
#
# The parameter data will be in dict format and the keys are 'sender','topic', 'message', 'unixtime', 'timestamp'
#
################################################################
import os, sys, time
sys.path.append(os.getcwd() + '../extensions')
# replace the Elastic installed path with next line
sys.path.append('/usr/local/lib/python2.7/dist-packages')
from Elastic import Elastic
global Elas_inst
confpath = "../extensions/plugin.conf"
Elas_inst = Elastic(confpath)


def handle_Received_Payload(data):
    #
    # Write your code here. Use your connection object to
    #
    #
    # finish your code here.
    #
    # Send data to your data store
    result = Elas_inst.data_consumer(data)
    # if result is none then write failed

Exemplo n.º 16

0

Exibir arquivo

Arquivo: Execute.py Projeto: softwarethreads/ca

 def __init__(self):
     self.classification = Classification()
     self.extraction = Extraction()
     self.elastic = Elastic()
     self.z = Database()