Python CSVDataSource примеры использования

Язык программирования: Python

Пространство имен/Пакет: osas.data.datasources

Класс/Тип: CSVDataSource

Примеров на hotexamples.com: 6

Python CSVDataSource - 6 примеров найдено. Это лучшие примеры Python кода для osas.data.datasources.CSVDataSource, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

CSVDataSource(6)

save(1)

Основные методы

CSVDataSource (6)

save (1)

Пример #1

Показать файл

def process(params):
    # load and run pipeline
    datasource = CSVDataSource(params.input_file)
    p = Pipeline('DEV')
    p.load_config(params.conf_file)
    p.load_model(params.model_file)
    p(datasource)
    # save, if necessary
    if params.output_file:
        datasource.save(open(params.output_file, 'w'))
    # push to elasticsearch
    es = Elasticsearch([{
        'host': 'localhost',
        'port': 9200
    }],
                       http_auth=('admin', 'admin'))
    data = [item for item in datasource]
    for item in data:
        item['model'] = p._scoring_model_name
        item['raw'] = str(item['labels'])
        for key in item:
            if item[key] == 'NaN' or (is_numeric(item[key])
                                      and np.isnan(item[key])):
                item[key] = None
    helpers.bulk(es, data, index="anomalies", doc_type="type")

Пример #2

Показать файл

def process(params):
    # load and run pipeline
    datasource = CSVDataSource(params.input_file)
    p = Pipeline('DEV')
    p.load_config(params.conf_file)
    model = p.build_pipeline(datasource)
    json.dump(model, open(params.model_file, 'w'))

Пример #3

Показать файл

Файл: autoconfig.py Проект: magnologan/OSAS

def process(params):
    datasource = CSVDataSource(params.input_file)
    sys.stdout.write('Preprocessing')
    field_type = _detect_field_type(datasource)
    sys.stdout.write('\t::Detected field types:\n')
    for key in field_type:
        sys.stdout.write('\t\t"{0}": {1}\n'.format(key, field_type[key]))

    generators = _get_generators(datasource, field_type)
    sys.stdout.write('\t::Suggested generators:\n')
    for item in generators:
        sys.stdout.write('\t\t{0}: {1}\n'.format(item[0], item[1]))

    _write_conf(generators, params.output_file)

Пример #4

Показать файл

                    label_list.append(label)
            all_labels.append(label_list)
        dataset[dest_field_labels] = all_labels
        dataset['_labels'] = all_labels
        if self._detect_anomalies is not None:
            scores = self._detect_anomalies(dataset)
            dataset[dest_field_score] = scores


if __name__ == '__main__':
    p = Pipeline('DEV')
    p.load_config('tests/pipeline_test.conf')
    import time

    ts1 = time.time()
    datasource = CSVDataSource('tests/test_small.csv')
    ts2 = time.time()
    pipeline_model = p.build_pipeline(datasource)
    ts3 = time.time()
    p(datasource)
    ts4 = time.time()
    json.dump(pipeline_model, open('tests/pipeline.json', 'w'), indent=4)
    for item in datasource[:10]:
        print(item)
        print()
        print()

    print(
        "Timing:\n\tLoad dataset: {0}\n\tBuild pipeline: {1}\n\tApply models:{2}\n\tDataset size: {3} entries\n".format(
            ts2 - ts1, ts3 - ts2, ts4 - ts3, len(datasource)))

Пример #5

Показать файл

    def from_pretrained(pretrained: str) -> AnomalyDetection:
        tmp = json.loads(pretrained)
        pre_model = pickle.loads(base64.b64decode(tmp['model']))
        model = SupervisedClassifierAnomaly()
        model._encoder = pre_model['encoder']
        model._ind_to_ground_truth = pre_model['ind_to_ground_truth']
        model._is_binary_preds = pre_model['is_binary_preds']
        model._model = pre_model['classifier']

        return model


if __name__ == "__main__":
    from osas.data.datasources import CSVDataSource

    data_source = CSVDataSource('corpus/hubble_test_tags.csv')

    def coverter(x):
        return ast.literal_eval(x)

    data_source._data['_labels'] = data_source._data['_labels'].apply(
        lambda x: coverter(x))

    model = StatisticalNGramAnomaly()
    tmp = model.build_model(data_source)
    tmp = json.dumps(tmp)
    model2 = StatisticalNGramAnomaly.from_pretrained(tmp)
    scores = model(data_source)

    scores2 = model2(data_source)
    import operator

Пример #6

Показать файл

        reg_lab = [(regex, label)
                   for regex, label in zip(regex_list, label_list)]
        kblg = KnowledgeBased(reg_lab, field_name)
        return kblg


if __name__ == '__main__':
    mfc = MultinomialFieldCombiner(['user', 'parent_process'],
                                   absolute_threshold=500,
                                   relative_threshold=0.005)
    nfc = NumericField('count')
    tf = TextField('command', lm_mode='token', ngram_range=(3, 5))
    klg = KeywordBased(
        keyword_list=['bash', 'java', 'netcat', 'sudo', 'apache2'],
        field_name='command')
    from osas.data.datasources import CSVDataSource

    dataset = CSVDataSource('corpus/test.csv')
    print("Building model")
    klg.build_model(dataset)
    print("Done")

    #    rez = mfc.build_model(dataset)
    for item in dataset[:20]:
        print("\n\n")
        print(item)
        print("")
        print(klg(item))
        print("\n\n")
        print("=" * 20)