def __init__(self, conf_file, start_mode): with open(conf_file) as f: self.conf = yaml.load(f) self.start_mode = start_mode self.hdfs_client = hdfs.Config().get_client('dev') # Topo information # In production, it should be ready in any standby data-center for quick handing over self.pickle_dir = 'pickled_nodes' self.pickle_dir_local = os.path.join(CONSTANTS.ROOT_DIR, 'pickled_nodes') # used for recovery self.backup_dir = 'backup' # used for testing self.computing_state_dir = 'computing_state' if start_mode == 'new': # create/overwrite these directories for d in (self.pickle_dir, self.backup_dir, self.computing_state_dir): self.hdfs_client.delete(d, recursive=True) self.hdfs_client.makedirs(d) time.sleep(2)
def __init__(self, node_id, type, computing_state=0): self.node_id = node_id self.type = type # update after handling (creating if Spout) each app tuple self.computing_state = computing_state # for measuring the delay before processing new tuples self.last_run_state = computing_state # update after handling each tuple self.tuple_handling_state = None # update after handling (creating if Spout) each BarrierTuple self.latest_checked_version = None self.hdfs_client = hdfs.Config().get_client('dev') # create backup directories # TODO: parent dir should be an argument self.backup_dir = os.path.join('backup', str(node_id)) self.node_backup_dir = os.path.join(self.backup_dir, 'node') self.node_latest_version_path = os.path.join(self.node_backup_dir, 'latest_version') self.hdfs_client.write(self.node_latest_version_path, data=str(self.computing_state)) self.computing_state_dir = 'computing_state' for d in (self.backup_dir, self.node_backup_dir): self.hdfs_client.makedirs(d) self.hdfs_client.write(os.path.join(self.computing_state_dir, '.'.join([str(self.node_id), str(0)])), data='')
def __init__(self, node, restart=False, test_mode=True): self.node = node self.read_interval = CONSTANTS.SPACE_AUDIT_INTERVAL_RESTART if restart else CONSTANTS.SPACE_AUDIT_INTERVAL self.test_mode = test_mode self.log_path = os.path.join( CONSTANTS.ROOT_DIR, 'results', '_'.join([str(self.node.node_id), 'space'])) self.hdfs_client = hdfs.Config().get_client('dev') # only measure the backup part, which is most important self.storage_max = 0 self.storage_avg = None # in bytes, only audit sender self.network_normal = 0 self.network_other = 0
from kafka import KafkaConsumer import hdfs hdfs_client = hdfs.Config().get_client() kafka_client = KafkaConsumer("test1", bootstrap_servers="sandbox.hortonworks.com:6667") i = 0 file = 0 queue = [] for msg in kafka_client: i = i + 1 queue.append(msg.value) if i % 1000 == 0: with hdfs_client.write("/tmp/test_" + str(file) + ".hl7") as writer: for cur in queue: hl7 = {} hl7_list = [] for seg in hl7_msg.split('\n'): fields = seg.split('|') if fields[0] == 'MSH': hl7['msh_ts'] = fields[6] if fields[0] == 'OBX': hl7['component'] = fields[3] hl7['value'] = fields[5] hl7['unit'] = fields[6] hl7['result_ts'] = fields[14]
# init queue_params = {'I_SUBSCRIBER_TYPE': u'BOBJ_DS', 'I_SUBSCRIBER_NAME': u'PYTHON', 'I_SUBSCRIBER_PROCESS': u'PYTHON', 'I_QUEUENAME': u'2LIS_02_ITM', 'I_EXTRACTION_MODE': u'D' } # Logging logging.basicConfig(format = u'%(filename)s[LINE:%(lineno)d]# %(levelname)-8s [%(asctime)s] %(message)s', filename = u'/home/local/X5/olga.guzeva/logs/' + queue_params['I_QUEUENAME'] + '_' + datetime.now().strftime('%Y%m%d') + '_log.log', level = logging.INFO) # Подключение к HDFS client = hdfs.Config().get_client('dev') logging.info('************Start program************') print_params() # загружаем конфиг с параметрами для подключения config = configparser.ConfigParser() config.read('/home/local/X5/olga.guzeva/cfg/sapnwrfc.cfg') cfg_sap = config['SAP_ER1'] # Устанавливаем коннект и печатаем статус conn = pyrfc.Connection(**cfg_sap) result = conn.call('STFC_CONNECTION', REQUTEXT='Hello SAP!') logging.info(result) logging.info('Connaction alive = ' + str(conn.alive))