示例#1
0
#!/usr/bin/env python
# coding=utf-8
from multiprocessing import Process
import sys
import datetime
import ujson as json
import time
import random
import libpyfeature_extract
import tensorflow as tf

fe = libpyfeature_extract.PyFeatureExtract('feature_index')


def process(in_path):
    out_path = in_path.replace('train_data', 'nt_ads_train_data_cvr')
    print(in_path, out_path)
    filename = out_path
    options_zlib = tf.python_io.TFRecordOptions(
        tf.python_io.TFRecordCompressionType.GZIP)
    writer = tf.python_io.TFRecordWriter(filename, options=options_zlib)
    cnt = 0
    with open(in_path) as f:
        for line in f:
            sample = json.loads(line)
            if sample['label']['click'] == 0:
                continue
            cnt += 1
            s = fe.extract_tf_example(line)
            writer.write(s)
            if cnt % 1000 == 0:
示例#2
0
#!/usr/bin/env python
# coding=utf-8
from multiprocessing import Process
import sys
import datetime
import ujson as json
import time
import random
import libpyfeature_extract
import tensorflow as tf


fe = libpyfeature_extract.PyFeatureExtract('')

def process(in_path):
    out_path = in_path.replace('train_data', 'ads_train_data')
    print(in_path, out_path)
    filename = out_path
    options_zlib = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.GZIP)
    writer = tf.python_io.TFRecordWriter(filename, options=options_zlib)
    cnt = 0
    with open(in_path) as f:
        for line in f:
            cnt += 1
            s = fe.extract_tf_example(line)
            writer.write(s)
            if cnt % 1000 == 0:
                print('process %s' % cnt)
    writer.close()

 def callPartitionSetup(self):
     sys.path.append('lib')
     import libpyfeature_extract
     self.fe_lib = libpyfeature_extract.PyFeatureExtract('')