Exemplo n.º 1
0
def main():
    global limit_step
    global complex_messages

    # configs
    config = LoadConfigs('config.ini')
    template_paths = GetConfigParameter(config, 'Templates', 'paths').split(',')
    filemaxlines = int(GetConfigParameter(config, 'OutputFile', 'filemaxlines'))
    delimiter = GetConfigParameter(config, 'OutputFile', 'delimiter')
    quotechar = GetConfigParameter(config, 'OutputFile', 'quotechar')
    outputdir = GetConfigParameter(config, 'OutputFile', 'outputdir')
    analitics_path = outputdir + '/' + GetConfigParameter(config, 'Analitics', 'filename')
    metrics = GetConfigParameter(config, 'Analitics', 'metrics').split(',') # add check if it is in query
    limit_step = int(GetConfigParameter(config, 'ClickHouse', 'limit_step'))
    clickhouse_host = GetConfigParameter(config, 'ClickHouse', 'host')
    colums = GetConfigParameter(config, 'ClickHouse', 'colums').split(',')
    date_start = sys.argv[1]
    date_end = sys.argv[2]

    complex_messages = []
    total_count = limit_step*2

    GetTemplatesFromCSV(template_paths)

    client = ClickHouseClient(clickhouse_host, on_progress=OnProgress)
    query_format = GenerateQueryFormat(config, colums)

    filename = GenerateFileName(outputdir, date_start, date_end)
    csvwriter = CSVWriter(filemaxlines, filename, delimiter, quotechar, u'\ufeff', colums + ['FullText', 'MessageType', 'Template'])
    analitics = Analitics(analitics_path, metrics)
    for single_date in DateRange(date_start, date_end):
        date_str = single_date.strftime("%Y-%m-%d")
        ColoredPrint('\n[Determiner]', 'Working on date(' + date_str + ')', bcolors.OKGREEN, bcolors.WARNING)
        limit_count = 0
        msg_count = 0
        while limit_count < total_count:
            query = query_format.format(date_str, limit_count, limit_step)
            result = client.select(query, on_progress=OnProgress, send_progress_in_http_headers=1)
            print()
            data_len = len(result.data)
            total_count = limit_count + data_len + 1
            counter = Counter(data_len, 0.2)
            for v in result.data:
                message = Message(colums, v)
                id, c_message = ToComplexMessage(message)
                if c_message.isFullFiled():
                    operator_id = message.get('OperatorGroupId')
                    originator = message.get('OutgoingOriginator')
                    c_message.determineTemplate(GetTemplates(operator_id, originator))
                    c_message.writePartsToCsvFile(csvwriter)
                    analitics.addData(date_str[:-3], c_message.getParameters(metrics), c_message.getType(), c_message.getCount())
                    complex_messages.pop(id)
                msg_count+=1
                counter.step(bcolors.OKGREEN + '[' + date_str + '] ' + bcolors.ENDC + str(msg_count) + ' messages handled')
            counter.lastTell(bcolors.OKGREEN + '[' + date_str + '] ' + bcolors.ENDC + str(msg_count) + ' messages handled')
            del result
            limit_count += limit_step
    WriteDownAllRemainingData(csvwriter, analitics, metrics)
    csvwriter.close()
    SaveAnalitics(analitics, delimiter, quotechar)
Exemplo n.º 2
0
    def __init__(self, server, port, table_name, df):
        self.server = server
        self.port = port
        self.table_name = table_name
        self.df = df

        if not isinstance(self.server, ClickHouseServer):
            raise Exception('Expected ClickHouseServer, got ' + repr(self.server))
        if not isinstance(self.df, DataFrame):
            raise Exception('Expected DataFrame, got ' + repr(self.df))

        self.server.wait_for_request(port)
        self.client = ClickHouseClient(server.binary_path, port)
Exemplo n.º 3
0
class ClickHouseTable:
    def __init__(self, server, port, table_name, df):
        self.server = server
        self.port = port
        self.table_name = table_name
        self.df = df

        if not isinstance(self.server, ClickHouseServer):
            raise Exception('Expected ClickHouseServer, got ' + repr(self.server))
        if not isinstance(self.df, DataFrame):
            raise Exception('Expected DataFrame, got ' + repr(self.df))

        self.server.wait_for_request(port)
        self.client = ClickHouseClient(server.binary_path, port)

    def _convert(self, name):
        types_map = {
            'float64': 'Float64',
            'int64': 'Int64',
            'float32': 'Float32',
            'int32': 'Int32'
        }

        if name in types_map:
            return types_map[name]
        return 'String'

    def _create_table_from_df(self):
        self.client.query('create database if not exists test')
        self.client.query('drop table if exists test.{}'.format(self.table_name))

        column_types = list(self.df.dtypes)
        column_names = list(self.df)
        schema = ', '.join((name + ' ' + self._convert(str(t)) for name, t in zip(column_names, column_types)))
        print 'schema:', schema

        create_query = 'create table test.{} (date Date DEFAULT today(), {}) engine = MergeTree(date, (date), 8192)'
        self.client.query(create_query.format(self.table_name, schema))

        insert_query = 'insert into test.{} ({}) format CSV'

        with tempfile.TemporaryFile() as tmp_file:
            self.df.to_csv(tmp_file, header=False, index=False)
            tmp_file.seek(0)
            self.client.query(insert_query.format(self.table_name, ', '.join(column_names)), pipe=tmp_file)

    def apply_model(self, model_name, float_columns, cat_columns):
        columns = ', '.join(list(float_columns) + list(cat_columns))
        query = "select modelEvaluate('{}', {}) from test.{} format TSV"
        result = self.client.query(query.format(model_name, columns, self.table_name))
        return tuple(map(float, filter(len, map(str.strip, result.split()))))

    def _drop_table(self):
        self.client.query('drop table test.{}'.format(self.table_name))

    def __enter__(self):
        self._create_table_from_df()
        return self

    def __exit__(self, type, value, traceback):
        self._drop_table()
Exemplo n.º 4
0
from client import ClickHouseClient
from errors import Error as ClickHouseError

def on_progress(total, read, progress):
    print(total,read,progress)

try:
    client = ClickHouseClient('http://ch00.fin.adfox.ru:8123/', on_progress=on_progress, user='******', password='******')
    query = 'SELECT date FROM adfox.dist_elog WHERE date > toDate(0)'
    result = client.select(query, send_progress_in_http_headers=1)
    print(result.data)
except ClickHouseError as e:
    print(e)
except Exception as e:
    print(e)
Exemplo n.º 5
0
from client import ClickHouseClient
from errors import Error as ClickHouseError
import logging

logging.basicConfig(level=logging.DEBUG)


def on_progress(total, read, progress):
    print(total, read, progress)


try:
    client = ClickHouseClient('http://example.com:8123/',
                              on_progress=on_progress,
                              user='******',
                              password='******')
    query = 'SELECT sum(load) as loads FROM dbname.table WHERE date = today()'
    result = client.select(query, send_progress_in_http_headers=1)
    print(result.data)
except ClickHouseError as e:
    print(e)
except Exception as e:
    print(e)
Exemplo n.º 6
0
from client import ClickHouseClient
from errors import Error as ClickHouseError


def on_progress(total, read, progress):
    print(total, read, progress)


try:
    client = ClickHouseClient('clh.datalight.me:8123/',
                              on_progress=on_progress,
                              user='******',
                              password='******')
    query = 'SELECT 1'
    result = client.select(query, send_progress_in_http_headers=1)
    print(result.data)
except ClickHouseError as e:
    print(e)
except Exception as e:
    print(e)