def _load_to_hdfs(self,file): # get file name and date binary_year,binary_month,binary_day,binary_hour,binary_date_path,file_name = util.build_hdfs_path(file,'flow') # hdfs path with timestamp. hdfs_path = "{0}/{1}/{2}".format(self._hdfs_root_path,binary_date_path,binary_hour) util.creat_hdfs_folder(hdfs_path) # load to hdfs. util.load_to_hdfs(file_name,file,hdfs_path) # send the notification to rabbitmq server. hadoop_pcap_file = "{0}/{1}".format(hdfs_path,file_name) util.send_new_file_notification(hadoop_pcap_file,self._queue_name) print "Done !!!!!"
def _process_pcap_file(self,file_name,file_local_path,hdfs_root_path): # get timestamp from the file name. file_date = file_name.split('.')[0] pcap_hour=file_date[-4:-2] pcap_date_path = file_date[-12:-4] # hdfs path with timestamp. hdfs_path = "{0}/{1}/{2}".format(hdfs_root_path,pcap_date_path,pcap_hour) util.creat_hdfs_folder(hdfs_path) # get file size. file_size = os.stat(file_local_path) if file_size.st_size > 1145498644: # split file. self._split_pcap_file(file_name,file_local_path,hdfs_path) else: # load file to hdfs util.load_to_hdfs(file_name,file_local_path,hdfs_path) # send rabbitmq notification. hadoop_pcap_file = "{0}/{1}".format(hdfs_path,file_name) util.send_new_file_notification(hadoop_pcap_file,self._queue_name)