示例#1
0
class DataSet:
    ###############################################
    ### root_path: The root path for shell logs ###
    ###############################################
    def __init__(self, root_path):
        self.NAME_PATTERN = re.compile(
            '(.*)([0-9]{2})_([0-9]{2})_([0-9]{4})_([0-9]{2})_([0-9]{2})\+?([0-9]+)?.log_?$'
        )
        self.item_set = SList()

        os.chdir(root_path)
        self.ROOT_PATH = os.path.abspath(os.getcwd())

        for dir_item in os.listdir(self.ROOT_PATH):
            if os.path.isdir(os.path.join(self.ROOT_PATH, dir_item)):
                self._process_single_item(dir_item)
            else:
                print "Not a directory: {}".format(dir_item)

    ####################################
    ### Process single log directory ###
    ####################################
    def _process_single_item(self, dir_name):
        file_list = self._unwrap_directory(
            os.path.join(self.ROOT_PATH, dir_name))
        match_group = self.NAME_PATTERN.match(dir_name)
        info = [match_group.group(i + 1) for i in range(match_group.lastindex)]
        for item in file_list:
            # self.item_set.append(self.DataItem(info, item))
            self.item_set.append(LOG_TYPE_TABLE[item[1]](info, item))

    ################################################
    ### Get all files contained in the directory ###
    ################################################
    def _unwrap_directory(self, dir_path):
        file_set = []
        for item in os.listdir(dir_path):
            tem_path = os.path.join(dir_path, item)
            if os.path.isdir(tem_path):
                file_set += self._unwrap_directory(tem_path)
            elif os.path.isfile(tem_path):
                file_set.append((tem_path, item))
            else:
                print 'ERROR::Detect an item that is neither file nor directory.'
                print "\t {}".format(tem_path)
        return file_set
示例#2
0
class DataSet:
    ###############################################
    ### root_path: The root path for shell logs ###
    ###############################################
    def __init__(self, root_path):
        self.NAME_PATTERN = re.compile('(.*)([0-9]{2})_([0-9]{2})_([0-9]{4})_([0-9]{2})_([0-9]{2})\+?([0-9]+)?.log_?$')
        self.item_set = SList()

        os.chdir(root_path)
        self.ROOT_PATH = os.path.abspath(os.getcwd())

        for dir_item in os.listdir(self.ROOT_PATH):
            if os.path.isdir(os.path.join(self.ROOT_PATH, dir_item)):
                self._process_single_item(dir_item)
            else:
                print "Not a directory: {}".format(dir_item)

    ####################################
    ### Process single log directory ###
    ####################################
    def _process_single_item(self, dir_name):
        file_list = self._unwrap_directory(os.path.join(self.ROOT_PATH, dir_name))
        match_group = self.NAME_PATTERN.match(dir_name)
        info = [match_group.group(i+1) for i in range(match_group.lastindex)]
        for item in file_list:
            # self.item_set.append(self.DataItem(info, item))
            self.item_set.append(LOG_TYPE_TABLE[item[1]](info, item))

    ################################################
    ### Get all files contained in the directory ###
    ################################################
    def _unwrap_directory(self, dir_path):
        file_set = []
        for item in os.listdir(dir_path):
            tem_path = os.path.join(dir_path, item)
            if os.path.isdir(tem_path):
                file_set += self._unwrap_directory(tem_path)
            elif os.path.isfile(tem_path):
                file_set.append((tem_path, item))
            else:
                print 'ERROR::Detect an item that is neither file nor directory.'
                print "\t {}".format(tem_path)
        return file_set
示例#3
0
	def generate_feature(self, data_item):
		def cut_mat(dim_x, dim_y):
			tmp_feature = []
			for x in range(max(0, int(pnt[0]-dim_x)), min(data_item.img_dim[0]-1, int(pnt[0]+dim_x))+1):
				for y in range(max(0, int(pnt[1]-dim_y)), min(data_item.img_dim[1]-1, int(pnt[1]+dim_y))+1):
					tmp_feature.append(image_data[x][y])
			return numpy.array(tmp_feature)

		data_set = SList([])

		image_data = data_item.image_data
		for index, pnt in enumerate(data_item.tag):
			data_set.append((cut_mat(self.MAT_SIZE[0], self.MAT_SIZE[1]),1, pnt))

		for i in range(int(len(data_item.tag)*self.POS_NEG_RATIO)):
			pnt = (random.randint(self.MAT_SIZE[0], data_item.img_dim[0]-self.MAT_SIZE[0]), random.randint(self.MAT_SIZE[1], data_item.img_dim[1]-self.MAT_SIZE[1]))
			if data_item.contain_tag(range(pnt[0]-self.MAT_SIZE[0],pnt[0]+self.MAT_SIZE[0]), range(pnt[1]-self.MAT_SIZE[1],pnt[1]+self.MAT_SIZE[1])):
				data_set.append((cut_mat(self.MAT_SIZE[0], self.MAT_SIZE[1]), 1, pnt))
			else:
				data_set.append((cut_mat(self.MAT_SIZE[0], self.MAT_SIZE[1]), 0, pnt))

		print('{}/{}'.format(data_set.filter_by(lambda x: x[1] == 1).count(), data_set.count()))
		return data_set.filter_by(lambda x: len(x[0]) == (2*self.MAT_SIZE[0]+1)*(2*self.MAT_SIZE[1]+1))
示例#4
0
class LogDataItem(DataItem):
    def __init__(self, info_list, file_info):
        DataItem.__init__(self, info_list, file_info)
        self.log_type = LOG_TYPE_SHELL

    def _parse_operation(self, log_content):
        self.operation_list = re.findall(re.compile('[0-9]{10} [0-9]+'),
                                         log_content)
        if len(self.operation_list) == 0:
            self.has_timestamp = False
        else:
            self.has_timestamp = True
        for line in log_content.split("\n"):
            tmp_list = filter(None, line.split(' '))
            if len(tmp_list) > 1 and self.has_timestamp:
                tmp_timestamp = tmp_list[0]
                self.operation_list.append(
                    (tmp_timestamp,
                     filter(lambda x: x != tmp_timestamp, tmp_list)))
            elif len(tmp_list) > 0 and not self.has_timestamp:
                self.operation_list.append(('0', tmp_list))

    def combine(self):
        self.cmd_list = SList([])
        CHARACTER = [8, 9] + range(32, 128)
        tem_cmd = []
        tem_timestamp = []
        for item in self.operation_list:
            if len(item[1]) > 1 and '27' in item[1]:
                continue
            if len(filter(lambda x: int(x) < 0, item[1])) > 0:
                continue
            if '4' in item[1]:
                continue
            for op in item[1]:
                tem_timestamp.append(item[0])
                tem_cmd.append(op)
                if not int(op) in CHARACTER:
                    content, delimiter = self._convert_to_text(tem_cmd)
                    self.cmd_list.append({
                        'action':
                        'shell',
                        'content':
                        content,
                        'delimiter':
                        delimiter,
                        'timestamp':
                        int(tem_timestamp[0]),
                        'timestamp_end':
                        int(tem_timestamp[-1])
                    })
                    tem_cmd = []
                    tem_timestamp = []
        return self

    def _convert_to_text(self, cmd):
        text = ''
        rtn = ''
        for c in cmd:
            if int(c) in range(32, 127):
                text += chr(int(c))
            elif int(c) == 8:
                text = text[:-1]
            elif int(c) == 9:
                text += '[TAB]'
            elif int(c) == 13:
                rtn = '[RETURN]'
            elif int(c) == 27:
                rtn = '[ESC]'
            elif int(c) == 127:
                rtn = '[DELETE]'
            else:
                rtn = "[{}]".format(c)
        return text, rtn
示例#5
0
class EditorDataItem(DataItem):
    def __init__(self, info_list, file_info):
        DataItem.__init__(self, info_list, file_info)
        self.log_type = LOG_TYPE_EDITOR

    def _parse_operation(self, log_content):
        lines = log_content.split(u"\n")
        for line in lines:
            if len(line) != 0:
                self.operation_list.append(json.loads(line))

    def combine(self):
        prev_command = False
        self.cmd_list = SList([])
        for item in self.operation_list:
            if item['action'] in ['insert', 'remove']:
                item['content'] = u"\n".join(item['lines'])
            elif item['action'] in ['copy', 'paste']:
                item['content'] = item['text']
            elif item['action'] in ['open', 'save']:
                item['content'] = ''
            else:
                continue

            if not prev_command:
                prev_command = item
                continue
            if (prev_command['action'],
                    item['action']) in [(u'insert', u'remove'),
                                        (u'insert', u'insert'),
                                        (u'remove', u'remove')]:
                if item['action'] == u'insert' and prev_command[
                        'action'] == u'insert' and str(
                            prev_command['end']) == str(item['start']):
                    prev_command['content'] += item['content']
                    prev_command['end'] = item['end']
                elif item['action'] == u'remove' and str(
                        prev_command['start']) == str(item['end']):
                    prev_command['content'] += item['content']
                    prev_command['start'] = item['start']
                elif item['action'] == u'insert' and prev_command[
                        'action'] == u'remove' and str(
                            prev_command['end']) == str(item['end']):
                    prev_length = len(prev_command['content'])
                    tem_length = len(item['content'])
                    if prev_length - tem_length < 0:
                        prev_command['content'] = item['content'][:tem_length -
                                                                  prev_length]
                        prev_command['action'] = u'remove'
                    else:
                        prev_command['content'] = prev_length[
                            'content'][:tem_length - prev_length]
                else:
                    self.cmd_list.append(prev_command)
                    prev_command = item
            else:
                self.cmd_list.append(prev_command)
                prev_command = item
        if prev_command:
            self.cmd_list.append(prev_command)
        return self
示例#6
0
class TestSList(unittest.TestCase):
    def setUp(self):
        self.list = SList()

    def test_isEmpty(self):
        self.assertTrue(self.list.isEmpty())
        self.list.append('new')
        self.assertFalse(self.list.isEmpty())

    def test_size(self):
        self.assertEqual(0, self.list.size())
        self.list.append('new1')
        self.assertEqual(1, self.list.size())
        self.list.append('new2')
        self.list.append('new3')
        self.assertEqual(3, self.list.size())

    def test_index(self):
        self.assertIsNone(self.list.index('not exist'))
        self.list.append('1')
        self.list.append('2')
        self.list.append('3')
        self.assertEqual(0, self.list.index('1'))
        self.assertEqual(1, self.list.index('2'))
        self.assertEqual(2, self.list.index('3'))
        self.list.pop()
        self.assertIsNone(self.list.index('3'))

    def test_search(self):
        self.assertFalse(self.list.search('1'))
        self.list.append('1')
        self.assertTrue(self.list.search('1'))
        self.assertFalse(self.list.search('2'))

    def test_insertAppendAdd(self):
        self.list.append('1')
        self.list.append('2')
        self.list.add('3')
        self.list.add('4')
        self.list.insert(2, '5')
        self.list.insert(0, '6')
        self.list.insert(7, '7')
        # [6,4,3,5,1,2,7]
        self.assertEqual('6', self.list.get(0))
        self.assertEqual('4', self.list.get(1))
        self.assertEqual('3', self.list.get(2))
        self.assertEqual('5', self.list.get(3))
        self.assertEqual('1', self.list.get(4))
        self.assertEqual('2', self.list.get(5))
        self.assertEqual('7', self.list.get(6))

    def test_popRemove(self):
        self.list.append('1')
        self.list.append('2')
        self.list.add('3')
        self.list.add('4')
        self.list.insert(2, '5')
        self.list.insert(0, '6')
        self.list.insert(7, '7')
        # [6,4,3,5,1,2,7]
        self.assertEqual('7', self.list.pop())
        self.assertEqual('6', self.list.pop(0))
        self.assertEqual('5', self.list.remove('5'))
        # [4,3,1,2]
        self.list.append('8')
示例#7
0
class LogDataItem(DataItem):
    def __init__(self, info_list, file_info):
        DataItem.__init__(self, info_list, file_info)
        self.log_type = LOG_TYPE_SHELL

    def _parse_operation(self, log_content):
        self.operation_list = re.findall(re.compile("[0-9]{10} [0-9]+"), log_content)
        if len(self.operation_list) == 0:
            self.has_timestamp = False
        else:
            self.has_timestamp = True
        for line in log_content.split("\n"):
            tmp_list = filter(None, line.split(" "))
            if len(tmp_list) > 1 and self.has_timestamp:
                tmp_timestamp = tmp_list[0]
                self.operation_list.append((tmp_timestamp, filter(lambda x: x != tmp_timestamp, tmp_list)))
            elif len(tmp_list) > 0 and not self.has_timestamp:
                self.operation_list.append(("0", tmp_list))

    def combine(self):
        self.cmd_list = SList([])
        CHARACTER = [8, 9] + range(32, 128)
        tem_cmd = []
        tem_timestamp = []
        for item in self.operation_list:
            if len(item[1]) > 1 and "27" in item[1]:
                continue
            if len(filter(lambda x: int(x) < 0, item[1])) > 0:
                continue
            if "4" in item[1]:
                continue
            for op in item[1]:
                tem_timestamp.append(item[0])
                tem_cmd.append(op)
                if not int(op) in CHARACTER:
                    content, delimiter = self._convert_to_text(tem_cmd)
                    self.cmd_list.append(
                        {
                            "action": "shell",
                            "content": content,
                            "delimiter": delimiter,
                            "timestamp": int(tem_timestamp[0]),
                            "timestamp_end": int(tem_timestamp[-1]),
                        }
                    )
                    tem_cmd = []
                    tem_timestamp = []
        return self

    def _convert_to_text(self, cmd):
        text = ""
        rtn = ""
        for c in cmd:
            if int(c) in range(32, 127):
                text += chr(int(c))
            elif int(c) == 8:
                text = text[:-1]
            elif int(c) == 9:
                text += "[TAB]"
            elif int(c) == 13:
                rtn = "[RETURN]"
            elif int(c) == 27:
                rtn = "[ESC]"
            elif int(c) == 127:
                rtn = "[DELETE]"
            else:
                rtn = "[{}]".format(c)
        return text, rtn
示例#8
0
class EditorDataItem(DataItem):
    def __init__(self, info_list, file_info):
        DataItem.__init__(self, info_list, file_info)
        self.log_type = LOG_TYPE_EDITOR

    def _parse_operation(self, log_content):
        lines = log_content.split(u"\n")
        for line in lines:
            if len(line) != 0:
                self.operation_list.append(json.loads(line))

    def combine(self):
        prev_command = False
        self.cmd_list = SList([])
        for item in self.operation_list:
            if item["action"] in ["insert", "remove"]:
                item["content"] = u"\n".join(item["lines"])
            elif item["action"] in ["copy", "paste"]:
                item["content"] = item["text"]
            elif item["action"] in ["open", "save"]:
                item["content"] = ""
            else:
                continue

            if not prev_command:
                prev_command = item
                continue
            if (prev_command["action"], item["action"]) in [
                (u"insert", u"remove"),
                (u"insert", u"insert"),
                (u"remove", u"remove"),
            ]:
                if (
                    item["action"] == u"insert"
                    and prev_command["action"] == u"insert"
                    and str(prev_command["end"]) == str(item["start"])
                ):
                    prev_command["content"] += item["content"]
                    prev_command["end"] = item["end"]
                elif item["action"] == u"remove" and str(prev_command["start"]) == str(item["end"]):
                    prev_command["content"] += item["content"]
                    prev_command["start"] = item["start"]
                elif (
                    item["action"] == u"insert"
                    and prev_command["action"] == u"remove"
                    and str(prev_command["end"]) == str(item["end"])
                ):
                    prev_length = len(prev_command["content"])
                    tem_length = len(item["content"])
                    if prev_length - tem_length < 0:
                        prev_command["content"] = item["content"][: tem_length - prev_length]
                        prev_command["action"] = u"remove"
                    else:
                        prev_command["content"] = prev_length["content"][: tem_length - prev_length]
                else:
                    self.cmd_list.append(prev_command)
                    prev_command = item
            else:
                self.cmd_list.append(prev_command)
                prev_command = item
        if prev_command:
            self.cmd_list.append(prev_command)
        return self