Пример #1
0
def getLSM9DS0Reading():
    try:
        # grab data from sensor
        gyro, mag, accel = imu.read()

        sensorValues = [gyro, mag, accel]
        print sensorValues

        gyro_x, gyro_y, gyro_z = gyro
        mag_x, mag_y, mag_z = mag
        accel_x, accel_y, accel_z = accel

        sensorValue = "%s; %s; %s; %s; %s; %s; %s; %s; %s" % (
            gyro_x, gyro_y, gyro_z, mag_x, mag_y, mag_z, accel_x, accel_y,
            accel_z)
        logger.log("Gyro: %s %s %s, Mag: %s %s %s, Accel: %s %s %s\n" %
                   (gyro_x, gyro_y, gyro_z, mag_x, mag_y, mag_z, accel_x,
                    accel_y, accel_z))
        print sensorValue

        FileUtil.saveToNewFile(lsm9ds0_reportDir, lsm9ds0_name, sensorValue)
    except Exception as e:
        logger.log("ERROR")
        logger.log(e)
        print e
Пример #2
0
def saveResult(rfidData, dictValue):
    global oldRfidCode
    logger.log("Card: " + rfidData + " mapped to: " + dictValue)
    if (oldRfidCode != dictValue):
        FileUtil.saveToNewFile(reportDir, name, dictValue)
        oldRfidCode = dictValue
    else:
        logger.log("Duplicate read")
Пример #3
0
 def __init__(self):
     self.filename = None
     self.worksheetName = None
     self._df = None
     self._fu = FileUtil()
     # make the df display look better: https://stackoverflow.com/questions/11707586/how-do-i-expand-the-output-display-to-see-more-columns-of-a-pandas-dataframe
     pd.set_option('display.max_rows', 100)
     pd.set_option('display.max_columns', 50)
     pd.set_option('display.width', 800)
Пример #4
0
 def get_worksheets(self, excelFileName=None):
     if excelFileName:
         self.filename = excelFileName
     fu = FileUtil()
     if fu.file_exists(self.filename):
         xl = pd.ExcelFile(self.filename)
         return xl.sheet_names
     else:
         logger.error(f'Cannot find Excel file {self.filename}.')
         return None
Пример #5
0
    def test_is_windows(self):
        with mock.patch('platform.system') as mocked_platform:
            mocked_platform.return_value = 'Linux'
            mocked_fu = FileUtil()
            test1 = mocked_fu.is_Windows
            self.assertFalse(test1)

        with mock.patch('platform.system') as mocked_platform:
            mocked_platform.return_value = 'Windows'
            mocked_fu = FileUtil()
            self.assertTrue(mocked_fu.is_Windows)
Пример #6
0
def measureTmeperature():
    temperature = sensor.get_temperature()
    temperature = format(temperature, '.2f')
    #print("The temperature is %s celsius" % temperature)
    try:
        FileUtil.saveToNewFile(reportDir, name, temperature)
        logger.log(temperature)
    except Exception as e:
        logger.log("Error:")
        logger.log(e)
        print e
Пример #7
0
def measureLight():

    try:
        lightLevel = readLight()
        intLightLevel = int(lightLevel)
        FileUtil.saveToNewFile(reportDir, name, intLightLevel)
        sensorValueForLogger = ": %s lux" % (lightLevel)
        logger.log(sensorValueForLogger)
    except Exception as e:
        print e
        logger.log("Error:")
        logger.log(e)
Пример #8
0
 def __init__(self, *args, **kwargs):
     super(Test_FileUtil, self).__init__(*args, **kwargs)
     self.path = r'c:\temp' if platform.system() == 'Windows' else r'/tmp'
     self._fu = FileUtil()
     self._du = DateUtil()
     self.features_dict = {
         'book': "Hitchhiker's Guide",
         'characters': {
             'answer': 42,
             'name': 'Dent. Arthur Dent.'
         }
     }
Пример #9
0
    def executing_directory(self) -> str:
        """
        Get the current executing directory using executing_file and stripping off the filename.
        Note differences between Windows and Linux.

        :return:
        """
        fu = FileUtil()
        path, _ = fu.split_qualified_path(self.executing_file())
        logger.debug(f'executing file is {self.executing_file()}')
        logger.debug(f'path (minus filename) is {path}')
        return path
Пример #10
0
    def test_write_excel(self):
        pu = PandasUtil()
        fu = FileUtil()
        df = self.my_test_df()
        fn = self.fu.qualified_path(self.path, self.spreadsheet_name)
        yaml_dict = {'outputFile': fn, 'worksheet': self.worksheet_name}
        fu.dump_yaml(Test_ApplicationUtil.excel_qual_path, yaml_dict)
        app = MyApplication(Test_ApplicationUtil.excel_qual_path)
        app.write_excel(df=df,
                        output_file_yaml_entry='outputFile',
                        worksheet=self.worksheet_name)

        actual = pu.read_df_from_excel(excelFileName=fn,
                                       excelWorksheet=self.worksheet_name)
        assert_frame_equal(df, actual)
Пример #11
0
    def fetch_report_single(self, any_pageID, any_guideline):
        self.wd.get(self.rep_index_url_base + self.projectID + "/")
        DateUtil.app_sleep(self.shortWait)

        # 処理対象PIDデータの処理
        qy_page_rows = []
        new_page_rows = {}
        page_rows = self.get_page_list_data()
        if any_pageID == "":
            new_page_rows = page_rows
        else:
            # ループ用PIDマップの生成
            if TextUtil.is_csv(any_pageID) is True:
                tmp_arr = any_pageID.split(",")
                for r in tmp_arr:
                    qy_page_rows.append(r)
            else:
                qy_page_rows.append(any_pageID)
            for tmp_pid in qy_page_rows:
                for key, value in page_rows.items():
                    if tmp_pid == key:
                        new_page_rows[key] = value
            if len(new_page_rows) < 1:
                print("-p オプションで指定したPIDが存在しません。処理を停止します。")
            else:
                pass

        # 処理対象ガイドラインデータの処理
        guideline_rows = []
        if any_guideline == "":
            guideline_rows = FileUtil.open_text_data(self.guideline_file_name)
        else:
            if TextUtil.is_csv(any_guideline) is True:
                tmp_arr = any_guideline.split(",")
                for r in tmp_arr:
                    guideline_rows.append(r)
            else:
                guideline_rows.append(any_guideline)

        # header
        self.rep_data.extend(TextUtil.get_header())

        # guidelineのループ
        for guideline in guideline_rows:
            guideline_disp = guideline
            if TextUtil.is_jis2016_lower(guideline) is False:
                guideline = "7." + guideline
            else:
                pass
            # pageのループ
            for key, value in new_page_rows.items():
                pageID = key
                pageURL = value
                print(pageID + ". " + guideline_disp + " を処理しています。(" +
                      DateUtil.get_logtime() + ")")
                path = self.fetch_report_detail_path(pageID, guideline)
                self.wd.get(path)
                DateUtil.app_sleep(self.shortWait)
                self.rep_data.extend(
                    self.get_detail_table_data(pageID, pageURL, guideline))
Пример #12
0
class InfoCollect():
    def __init__(self, filePath, siteName):
	self.filePath = filePath
	self.siteName = siteName
	self.fileUtil = FileUtil()

    def getInfo(self, strCommand):
	if strCommand == 'whois':
	    command = 'whois ' + self.siteName
	elif strCommand == 'whatweb':
	    command = 'whatweb ' + self.siteName
	elif strCommand == 'dig':
	    command = 'dig @114.114.114.114 ' + self.siteName + ' any'
	(status, results) = commands.getstatusoutput(command)
	self.fileUtil.writeFile("Status:" + str(status) + "\n\n" + results, self.filePath + strCommand + '.txt', 'w')
	print strCommand + self.siteName + '收集完毕!'
Пример #13
0
    def fetch_report_sequential(self):

        # header
        self.rep_data.extend(TextUtil.get_header())

        self.wd.get(self.rep_index_url_base + self.projectID + "/")
        DateUtil.app_sleep(self.shortWait)

        guideline_rows = FileUtil.open_text_data(self.guideline_file_name)
        page_rows = self.get_page_list_data()

        # guidelineのループ
        for guideline in guideline_rows:
            guideline_disp = guideline
            if TextUtil.is_jis2016_lower(guideline) is False:
                guideline = "7." + guideline
            else:
                pass
            # pageのループ
            for key, value in page_rows.items():
                pageID = key
                pageURL = value
                print(pageID + ". " + guideline_disp + " を処理しています。(" +
                      DateUtil.get_logtime() + ")")
                path = self.fetch_report_detail_path(pageID, guideline)
                self.wd.get(path)
                DateUtil.app_sleep(self.shortWait)
                self.rep_data.extend(
                    self.get_detail_table_data(pageID, pageURL, guideline))
Пример #14
0
    def test_fully_qualified_path(self):
        # Test 1, Windows (should be unchanged)
        path1 = r'c:\temp\subdir\subsubdir'
        with mock.patch('platform.system') as mocked_platform:
            mocked_platform.return_value = 'Windows'
            mocked_fu = FileUtil()
            exp1 = path1 + mocked_fu.separator + self.fn
            self.assertEqual(
                exp1,
                mocked_fu.fully_qualified_path(dirPath=path1,
                                               filename=self.fn),
                'Test 1 fail')
        # Test 2, Linux without the leading /
        test2 = r'dir/to/path'

        # Test 3, Linux with the leading / (should be unchanged)
        with mock.patch('platform.system') as mocked_platform:
            mocked_platform.return_value = 'Linux'
            mocked_fu = FileUtil()
            exp2 = mocked_fu.separator + test2 + mocked_fu.separator + self.fn
            self.assertEqual(
                exp2,
                mocked_fu.fully_qualified_path(dirPath=test2,
                                               filename=self.fn,
                                               dir_path_is_array=False),
                "Test 2 fail")
            test3 = mocked_fu.separator + test2
            exp3 = test3 + mocked_fu.separator + self.fn
            self.assertEqual(
                exp3,
                mocked_fu.fully_qualified_path(dirPath=test3,
                                               filename=self.fn,
                                               dir_path_is_array=False),
                "Test 3 fail")
Пример #15
0
    def do_reset_guideline():

        # 設定ファイルの読み込み
        user_data = FileUtil.getUserProperties("user.yaml")
        gLevel = user_data[7]

        # 書き出し内容を配列で制御
        gA = ["1.1.1","1.2.1","1.2.2","1.2.3","1.3.1","1.3.2","1.3.3","1.4.1","1.4.2","2.1.1","2.1.2","2.2.1","2.2.2","2.3.1","2.4.1","2.4.2","2.4.3","2.4.4","3.1.1","3.2.1","3.2.2","3.3.1","3.3.2","4.1.1","4.1.2"]
        gAA = ["1.2.4","1.2.5","1.4.3","1.4.4","1.4.5","2.4.5","2.4.6","2.4.7","3.1.2","3.2.3","3.2.4","3.3.3","3.3.4"]
        gAAA = ["1.2.6","1.2.7","1.2.8","1.2.9","1.4.6","1.4.7","1.4.8","1.4.9","2.1.3","2.2.3","2.2.4","2.2.5","2.3.2","2.4.8","2.4.9","2.4.10","3.1.3","3.1.4","3.1.5","3.1.6","3.2.5","3.3.5","3.3.6"]
        guideline_names = []
        if gLevel == "A":
            guideline_names = gA
        elif gLevel == "AA":
            guideline_names = gA + gAA
        else:
            guideline_names = gA + gAA + gAAA
        
        # テキストデータ書き込み
        FileUtil.write_text_data(guideline_names, "guideline_datas.txt")
Пример #16
0
def pkm2png(path):
    files = FileUtil.getAlllFilesPathOfCurrentDirectory(path)
    for f in files:
        if f.endswith(suffix_pkm):
            #print(f)
            pkm = f
            png = pkm.replace(suffix_pkm, suffix_png)
            #print(png)
            plist = pkm.replace(suffix_pkm, suffix_plist)
            #print(plist)
            #etc1tool.exe %%x --decode -o %%x.png
            command = '%s %s --decode -o %s' % (exe, pkm, png)
            print(command)
            os.system(command)
    pass
Пример #17
0
    def do_report(projectID, any_pageID, any_guideline):

        # 設定データロード
        user_data = FileUtil.getUserProperties("user.yaml")
        uid = user_data[0]
        pswd = user_data[1]
        systemWait = user_data[2]
        longWait = user_data[3]
        midWait = user_data[4]
        shortWait = user_data[5]
        driver_type = user_data[6]
        appWait = [systemWait, longWait, midWait, shortWait]

        # LibraDriverインスタンスの生成
        lrp = LibraDriver(uid, pswd, projectID, appWait, driver_type)

        # ログイン
        lrp.login()
        DateUtil.app_sleep(shortWait)

        # レポートインデックスページ
        lrp.browse_repo()
        DateUtil.app_sleep(shortWait)

        # 条件分岐
        if any_pageID == "" and any_guideline == "":
            lrp.fetch_report_sequential()
        else:
            lrp.fetch_report_single(any_pageID, any_guideline)
        
        # ログアウト
        lrp.logout()
        DateUtil.app_sleep(shortWait)
        lrp.shutdown()

        rep_data = lrp.getRepData()

        print("Excel書き出し処理に移ります。(" + DateUtil.get_logtime() + ")")
        ExcelUtil.save_xlsx(rep_data)
        print("Excel書き出し処理が完了しました。(" + DateUtil.get_logtime() + ")")
Пример #18
0
    def test_qualified_path(self):
        # Test 1. Normal case.
        expected = self.path + sep + self.fn
        actual = self._fu.qualified_path(self.path, self.fn)
        self.assertEqual(actual, expected, "Test 1 fail")
        # Test 2. Using an array and a Linux mock.

        with mock.patch('platform.system') as mocked_platform:
            mocked_platform.return_value = 'Windows'
            mocked_fu = FileUtil()
            dir_to_path = mocked_fu.separator.join(
                ['C:', 'dir', 'to',
                 'path'])  # should be C:\dir\to\path for Windows
            pathArray = dir_to_path.split(mocked_fu.separator)
            expected = dir_to_path + mocked_fu.separator + self.fn
            self.assertEqual(
                expected,
                mocked_fu.fully_qualified_path(pathArray,
                                               self.fn,
                                               dir_path_is_array=True),
                "Test 2 fail")

        # Test 3, using a windows path with a drive
        exp3 = r'c:\temp\subdir\subsubdir'
        exp3_array = exp3.split(_BACKSLASH)
        test3_with_fn = deepcopy(exp3_array)
        test3_with_fn.append(self.fn)
        test3 = _BACKSLASH.join(test3_with_fn)

        with mock.patch('platform.system') as mocked_platform:
            mocked_platform.return_value = 'Windows'
            mocked_fu = FileUtil()
            actual = mocked_fu.qualified_path(dirPath=exp3_array,
                                              filename=self.fn,
                                              dir_path_is_array=True)
            self.assertEqual(test3, actual, "Test 3 fail")
Пример #19
0
        userName=value;
    if op == "-l":
        savePath=value;
    if op == "-p":
        password=value;
    if op == "-c":
        courseUrl=value;

makeDirWhenNeeded(savePath);

doLogin(userName,password);
content=doPost(courseUrl);
#print content;
answerPageUrls=getSubbmittedUrls(content);

fileUtil=FileUtil();
count=0;
for url,name in answerPageUrls:
    answerPage=doPost(urlRoot+url);
    fileUrl=getAnswerFileUrl(answerPage)
#    print fileUrl;
    saveName=name+"."+fileUrl.split('.')[1];
#    saveName=str(count)+"."+fileUrl.split('.')[1];
    count+=1;
    saveName=unicode(saveName,"utf-8");
    fileUrl=urlHomeRoot+fileUrl;
#    print fileUrl;
    fileContent=HtmlGraber().doGrab(fileUrl);
    filePath=savePath+saveName;
    fileUtil.binaryWrite(filePath, fileContent);
    print saveName+" saved\n";
Пример #20
0
# coding=utf-8
'''
Created on 2017年12月18日

@author: Administrator
'''
from FileUtil import FileUtil

if __name__ == '__main__':
    path = unicode("F:\部门对接\编辑所需文件20170930", "utf-8")
    fileutils = FileUtil()
    dic = fileutils.readFile(path)
    for index in dic:
        print index
    #print dic;
Пример #21
0
 def tearDownClass(cls) -> None:
     fu = FileUtil()
     path = r'c:\temp' if platform.system() == 'Windows' else r'/tmp'
     fu.delete_file(fu.qualified_path(path, cls.yaml))
     fu.delete_file(fu.qualified_path(path, cls.fn))
     fu.delete_file(fu.qualified_path(path, cls.text_fn))
Пример #22
0
    
    print "主域名:" + domainName
    siteName = domainName.split('.')[0]
    #获取当前时间
    currentTime = time.strftime('%Y%m%d_%H%M%S',time.localtime(int(time.time())))
    print "当前时间:" + currentTime
    #创建結果文件夾及文件
    dnsResultFilePath = resultFilePath + siteName + '/domain/'
    nmapResultFilePath = resultFilePath + siteName + '/nmap/'
    infoResultFilePath = resultFilePath + siteName + '/info/'
    nmapResultFile = nmapResultFilePath + currentTime + '.txt'
    #创建扫描结果存放路径
    pathUtil = PathUtil()
    pathUtil.createFilePath(dnsResultFilePath, nmapResultFilePath, infoResultFilePath)
    #收集信息
    infoCollect = InfoCollect(infoResultFilePath, domainName)
    infoCollect.getInfo('whois')
    infoCollect.getInfo('whatweb')
    infoCollect.getInfo('dig')

    #读取dns字典
    fileUtil = FileUtil()
    domainList = fileUtil.getdomainNameList(domainName, domainListDic)
    #获取dnf密码字典中的域名的IP地址并分类
    finalIPList = getDNSIP(domainList, dnsResultFilePath, currentTime)
    #获取端口列表
    portList = getPortList(portFilePath) 
    #创建扫描队列
    queueUtil = QueueUtil()
    queueUtil.createQueue(finalIPList, portList, nmapResultFile, queueNumber)
Пример #23
0
class Test_FileUtil(TestCase):
    path_no_drive = 'temp'
    fn = 'test.csv'
    yaml = 'example.yaml'
    text_fn = 'test.txt'

    def __init__(self, *args, **kwargs):
        super(Test_FileUtil, self).__init__(*args, **kwargs)
        self.path = r'c:\temp' if platform.system() == 'Windows' else r'/tmp'
        self._fu = FileUtil()
        self._du = DateUtil()
        self.features_dict = {
            'book': "Hitchhiker's Guide",
            'characters': {
                'answer': 42,
                'name': 'Dent. Arthur Dent.'
            }
        }

    @classmethod
    def tearDownClass(cls) -> None:
        fu = FileUtil()
        path = r'c:\temp' if platform.system() == 'Windows' else r'/tmp'
        fu.delete_file(fu.qualified_path(path, cls.yaml))
        fu.delete_file(fu.qualified_path(path, cls.fn))
        fu.delete_file(fu.qualified_path(path, cls.text_fn))

    @property
    def path(self):
        return self._path

    @path.setter
    def path(self, p):
        self._path = p

    def create_csv(self):
        lines = [
            ',col1,col2',
            '0,1,3',
            '1,2,4',
        ]
        filename = self._fu.qualified_path(self.path, self.fn)
        self._fu.write_text_file(filename, lines)
        logger.debug(f'create_csv to {self.path}{sep}{self.fn}.')

    def create_yaml(self, keys: list, vals: list):
        writeMe = []
        for i in range(len(keys)):
            writeMe.append(f'{keys[i]}: {vals[i]}')

        qualifiedPath = self._fu.qualified_path(self.path, self.yaml)
        self._fu.write_text_file(filename=qualifiedPath, lines=writeMe)

    def generate_text_lines(self,
                            how_many: int = 10,
                            width: int = None) -> List[str]:
        if width:
            ans = [
                '{0:*^{width}}'.format(i, width=width) for i in range(how_many)
            ]
            return ans
        return [f'Line {i}' for i in range(how_many)]

    def create_text_file(self,
                         filename: str,
                         how_many: int = 10,
                         width: int = None):
        lines = self.generate_text_lines(how_many, width)
        self._fu.write_text_file(filename, lines)

    @logit()
    def test_is_windows(self):
        with mock.patch('platform.system') as mocked_platform:
            mocked_platform.return_value = 'Linux'
            mocked_fu = FileUtil()
            test1 = mocked_fu.is_Windows
            self.assertFalse(test1)

        with mock.patch('platform.system') as mocked_platform:
            mocked_platform.return_value = 'Windows'
            mocked_fu = FileUtil()
            self.assertTrue(mocked_fu.is_Windows)

    @logit()
    def test_dump_yaml(self):
        yaml_fn = self._fu.qualified_path(self.path, self.yaml)
        self._fu.dump_yaml(yaml_fn, self.features_dict)
        self.assertTrue(self._fu.file_exists(yaml_fn))
        actual = self._fu.read_yaml(yaml_fn)
        self.assertDictEqual(self.features_dict, actual)

    @logit()
    def test_current_directory(self):
        logger.debug(
            f'current working dir is really {self._fu.current_directory()}')
        my_mock_dir = r'\synthesys\testing'
        with mock.patch('FileUtil.getcwd', return_value=my_mock_dir):
            actual = self._fu.current_directory()
            self.assertEqual(actual, my_mock_dir)

    def test_read_text_file(self):
        filename = self._fu.qualified_path(self.path, self.text_fn)
        how_many_lines = randrange(10) + 2
        self.create_text_file(filename, how_many_lines)
        expected = self.generate_text_lines(how_many_lines)
        actual = [x.rstrip() for x in self._fu.read_text_file(filename)
                  ]  # must remove newline chars
        self.assertListEqual(expected, actual)

    @logit()
    def test_read_text_file_err(self):
        # test an IO error
        filename = self._fu.qualified_path(self.path, self.text_fn)
        with mock.patch('FileUtil.open', create=True) as mocked_open:
            mocked_open.side_effect = IOError()
            self._fu.read_text_file(filename)

    @logit()
    def test_read_yaml(self):
        keys = ['firstname', 'lastname', 'zip']
        vals = ['Rajah', 'Chacko', 28269]
        self.create_yaml(keys, vals)

        qualifiedPath = self._fu.qualified_path(self.path, self.yaml)
        d = self._fu.read_yaml(yamlFile=qualifiedPath)
        logger.debug(f'Contents of yaml: {d}')
        self.assertEqual(list(d.keys()), keys)
        self.assertEqual(vals[0], d[keys[0]])

    @logit()
    @mock.patch('FileUtil.safe_load')
    def test_read_yaml_err(self, mock_obj):
        yaml_fn = self._fu.qualified_path(self.path, self.yaml)
        self.create_text_file(yaml_fn)
        mock_obj.side_effect = YAMLError('mock error')
        actual = self._fu.read_yaml(yamlFile=yaml_fn)
        self.assertIsNone(actual)

    @logit()
    def test_qualified_path(self):
        # Test 1. Normal case.
        expected = self.path + sep + self.fn
        actual = self._fu.qualified_path(self.path, self.fn)
        self.assertEqual(actual, expected, "Test 1 fail")
        # Test 2. Using an array and a Linux mock.

        with mock.patch('platform.system') as mocked_platform:
            mocked_platform.return_value = 'Windows'
            mocked_fu = FileUtil()
            dir_to_path = mocked_fu.separator.join(
                ['C:', 'dir', 'to',
                 'path'])  # should be C:\dir\to\path for Windows
            pathArray = dir_to_path.split(mocked_fu.separator)
            expected = dir_to_path + mocked_fu.separator + self.fn
            self.assertEqual(
                expected,
                mocked_fu.fully_qualified_path(pathArray,
                                               self.fn,
                                               dir_path_is_array=True),
                "Test 2 fail")

        # Test 3, using a windows path with a drive
        exp3 = r'c:\temp\subdir\subsubdir'
        exp3_array = exp3.split(_BACKSLASH)
        test3_with_fn = deepcopy(exp3_array)
        test3_with_fn.append(self.fn)
        test3 = _BACKSLASH.join(test3_with_fn)

        with mock.patch('platform.system') as mocked_platform:
            mocked_platform.return_value = 'Windows'
            mocked_fu = FileUtil()
            actual = mocked_fu.qualified_path(dirPath=exp3_array,
                                              filename=self.fn,
                                              dir_path_is_array=True)
            self.assertEqual(test3, actual, "Test 3 fail")

    @logit()
    def test_fully_qualified_path(self):
        # Test 1, Windows (should be unchanged)
        path1 = r'c:\temp\subdir\subsubdir'
        with mock.patch('platform.system') as mocked_platform:
            mocked_platform.return_value = 'Windows'
            mocked_fu = FileUtil()
            exp1 = path1 + mocked_fu.separator + self.fn
            self.assertEqual(
                exp1,
                mocked_fu.fully_qualified_path(dirPath=path1,
                                               filename=self.fn),
                'Test 1 fail')
        # Test 2, Linux without the leading /
        test2 = r'dir/to/path'

        # Test 3, Linux with the leading / (should be unchanged)
        with mock.patch('platform.system') as mocked_platform:
            mocked_platform.return_value = 'Linux'
            mocked_fu = FileUtil()
            exp2 = mocked_fu.separator + test2 + mocked_fu.separator + self.fn
            self.assertEqual(
                exp2,
                mocked_fu.fully_qualified_path(dirPath=test2,
                                               filename=self.fn,
                                               dir_path_is_array=False),
                "Test 2 fail")
            test3 = mocked_fu.separator + test2
            exp3 = test3 + mocked_fu.separator + self.fn
            self.assertEqual(
                exp3,
                mocked_fu.fully_qualified_path(dirPath=test3,
                                               filename=self.fn,
                                               dir_path_is_array=False),
                "Test 3 fail")

    @logit()
    def test_split_qualified_path(self):
        fn = 'test.txt'
        qpath = self._fu.qualified_path(self.path, fn)
        # Test 1. c:\temp for Windows or /tmp for Linux.
        which_test = 1
        splitpath, splitfn = self._fu.split_qualified_path(qpath,
                                                           makeArray=False)
        self.assertEqual(splitpath, self.path,
                         f'Test {which_test}. Paths should be equal.')
        self.assertEqual(splitfn, fn,
                         f'Test {which_test}. File names should be equal.')
        # Test 2. Split paths into arrays.
        which_test = 2
        pathArray, splitfn = self._fu.split_qualified_path(qpath,
                                                           makeArray=True)
        expected = self.path.split(sep)
        self.assertEqual(pathArray, expected,
                         f'Test {which_test}. Paths should be equal.')
        self.assertEqual(splitfn, fn,
                         f'Test {which_test}. File names should be equal.')
        # Test 3. Try a more complex path.
        which_test = 3
        complex_path = r'C:\Users\Owners\Documents\Tickers.csv' if platform.system(
        ) == 'Windows' else r'/tmp/parent/child/Tickers.csv'
        pathArray, splitfn = self._fu.split_qualified_path(complex_path,
                                                           makeArray=True)
        expected = complex_path.split(sep)
        expected.pop()  # Pop off the last el, which is the file name.
        self.assertEqual(pathArray, expected,
                         f'Test {which_test}. Paths should be equal.')
        self.assertEqual(splitfn, 'Tickers.csv',
                         f'Test {which_test}. File names should be equal.')

    @logit()
    def test_split_file_name(self):
        expected_file = "file"
        expected_ext = ".ext"
        expected_fn = expected_file + expected_ext
        # First test with just file.ext
        actual_file, actual_ext = self._fu.split_file_name(expected_fn)
        self.assertEqual(actual_file, expected_file)
        self.assertEqual(actual_ext, expected_ext)
        # Another test with path/file.ext
        qpath = self._fu.qualified_path(self.path, expected_fn)
        actual_file, actual_ext = self._fu.split_file_name(qpath)
        self.assertEqual(actual_file, expected_file)
        self.assertEqual(actual_ext, expected_ext)

    @logit()
    def test_file_exists(self):
        self.create_csv()
        qualifiedPath = self._fu.qualified_path(self.path, self.fn)
        self.assertTrue(self._fu.file_exists(qualifiedPath))
        qualifiedPath = self._fu.qualified_path(self.path, 'noSuchFile.xxd')
        self.assertFalse(self._fu.file_exists(qualifiedPath))

    @logit()
    def test_ensure_dir(self):
        self._fu.ensure_dir(self.path)
        self.assertTrue(self._fu.dir_exists(self.path))

    @logit()
    def test_delete_file(self):
        self.create_csv()
        qualifiedPath = self._fu.qualified_path(self.path, self.fn)
        # delete_file should return True the first time
        self.assertTrue(self._fu.delete_file(qualifiedPath))
        # but return false the second time.
        self.assertFalse(self._fu.delete_file(qualifiedPath))

    @logit()
    @mock.patch('FileUtil.remove')
    def test_delete_file_err(self, mock_obj):
        self.create_csv()
        expected_log_message = 'delete_file mocktest'
        mock_obj.side_effect = OSError(expected_log_message)
        qualifiedPath = self._fu.qualified_path(self.path, self.fn)
        with self.assertLogs(FileUtil.__name__, level='DEBUG') as cm:
            ans = self._fu.delete_file(qualifiedPath)
            self.assertFalse(ans)
            self.assertTrue(
                next((True
                      for line in cm.output if expected_log_message in line),
                     False))

    @logit()
    def test_copy_file(self):
        self.create_csv()
        copy_fn = self.fn + '.copy'
        copied_file = self._fu.qualified_path(self.path, copy_fn)
        source_path = self._fu.qualified_path(self.path, self.fn)
        self._fu.copy_file(source_path, copied_file)
        self.assertTrue(self._fu.file_exists(source_path))
        self.assertTrue(self._fu.file_exists(copied_file))
        self._fu.delete_file(copied_file)

    @logit()
    @mock.patch('FileUtil.copy2')
    def test_copy_file_err(self, mock_obj):
        tmp_path = self._fu.qualified_path(self.path, 'tmp')
        qualifiedPath = self._fu.qualified_path(self.path, self.fn)
        expected_log_message = 'copy_file mocktest'
        mock_obj.side_effect = IOError(expected_log_message)
        with self.assertLogs(FileUtil.__name__, level='DEBUG') as cm:
            _ = self._fu.copy_file(qualifiedPath, tmp_path)
            self.assertTrue(
                next((True
                      for line in cm.output if expected_log_message in line),
                     False))

    @logit()
    def test_getList(self):
        dir_name = r'c:\temp'
        flist = self._fu.getList(dir_name)
        logger.debug(f'All list is: {flist}')

    def isFile_side_effect(*args, **kwargs) -> bool:
        """
        Side effect for mocking test_get_files.
        Returns True if there is a .txt in the filename. Not great, but ok for mocking.
        :param args:
        :param kwargs:
        :return:
        """
        return mock_is_file(args[1])

    def isDir_side_effect(*args) -> bool:
        return mock_is_dir(args[1])

    @logit()
    @mock.patch('FileUtil.isfile')
    @mock.patch('FileUtil.listdir')
    def test_get_files(self, mock_listdir, mock_isfile):
        dir_name = r'\nosuchdir'
        file_list = ['filea.txt', 'fileb.txt', 'filec.txt', 'somedir']
        mock_listdir.return_value = file_list
        mock_isfile.side_effect = self.isFile_side_effect
        actual = self._fu.get_files(dir_name)
        expected = [f for f in file_list if mock_is_file(f)
                    ]  # Condition must match isFile_side_effect
        self.assertListEqual(expected, actual)

    @logit()
    @mock.patch('FileUtil.isdir')
    @mock.patch('FileUtil.listdir')
    def test_get_dirs(self, mock_listdir, mock_isdir):
        dir_name = r'\nosuchdir'
        file_list = ['filea.txt', 'fileb.txt', 'filec.txt', 'somedir']
        mock_listdir.return_value = file_list
        mock_isdir.side_effect = self.isDir_side_effect
        actual = self._fu.get_dirs(dir_name)
        expected = [f for f in file_list if mock_is_dir(f)
                    ]  # Condition must match isDir_side_effect
        self.assertListEqual(expected, actual)

    @logit()
    def test_getRecursiveList(self):
        dir_name = r'\nosuchdir'
        file_list = ['filea.txt', 'fileb.txt', 'filec.txt']
        actual = self._fu.getRecursiveList(dir_name)
        self.assertListEqual(actual,
                             [])  # Since no such dir, should be empty list
        eu = ExecUtil()
        exec_file = eu.exec_file_path()
        dir_name, _ = self._fu.split_qualified_path(exec_file)
        logger.debug(f'dir name is: {dir_name}')

        with mock.patch('FileUtil.listdir', return_value=file_list):
            actual = self._fu.getRecursiveList(dir_name)
            expected = [
                self._fu.fully_qualified_path(dirPath=dir_name, filename=f)
                for f in file_list
            ]
            self.assertListEqual(expected, actual)

    @logit()
    def test_load_logs_and_subdir_names(self):
        no_such_dir_name = r'\nosuchdir'
        file_list = ['filea.txt', 'fileb.csv', 'otherfile.txt']
        actual = self._fu.load_logs_and_subdir_names(no_such_dir_name)
        self.assertListEqual(actual,
                             [])  # Since no such dir, should be empty list

        eu = ExecUtil()
        dir_name = eu.executing_directory()  # ensures that dir_name is real

        with mock.patch('FileUtil.listdir', return_value=file_list):
            # Test with neither prefix nor suffix
            actual = self._fu.load_logs_and_subdir_names(dir_name)
            expected = [
                self._fu.fully_qualified_path(dirPath=dir_name, filename=f)
                for f in file_list
            ]
            self.assertListEqual(expected, actual)
            # Test for suffixes ending in .txt
            suffix = '.txt'
            actual = self._fu.load_logs_and_subdir_names(dir_name,
                                                         requiredSuffix=suffix)
            txt_only = [
                self._fu.fully_qualified_path(dirPath=dir_name, filename=f)
                for f in file_list if f.endswith(suffix)
            ]
            self.assertListEqual(txt_only, actual)
            # Test for prefixes starting with 'file'
            prefix = 'file'
            actual = self._fu.load_logs_and_subdir_names(dir_name,
                                                         requiredPrefix=prefix)
            file_only = [
                self._fu.fully_qualified_path(dirPath=dir_name, filename=f)
                for f in file_list if f.startswith(prefix)
            ]
            self.assertListEqual(file_only, actual)

    @logit()
    @mock.patch('FileUtil.isfile')
    @mock.patch('FileUtil.listdir')
    def test_cull_existing_files(self, mock_listdir, mock_isfile):
        dir_name = r'\nosuchdir'
        file_list = ['filea.txt', 'fileb.txt', 'filec.txt', 'somedir']
        mock_listdir.return_value = file_list
        mock_isfile.side_effect = self.isFile_side_effect
        qualified_file_list = [
            self._fu.qualified_path(dirPath=dir_name, filename=f)
            for f in file_list
        ]
        actual = self._fu.cull_existing_files(qualified_file_list)
        expected = [f for f in qualified_file_list if mock_is_file(f)
                    ]  # Condition must match isFile_side_effect
        self.assertListEqual(expected, actual)

    @logit()
    def test_read_generator(self):
        filename = self._fu.qualified_path(self.path, self.text_fn)
        how_many_lines = 5
        self.create_text_file(filename, how_many_lines)
        lines_read_in = 0
        for i, line in enumerate(self._fu.read_generator(filename)):
            logger.debug(f'Read in line {i}, which contains <{line}>.')
            lines_read_in += 1
        self.assertEqual(how_many_lines, lines_read_in)

    @logit()
    @mock.patch('FileUtil.open')
    def test_read_generator_err(self, mock_open):
        expected_log_message = 'mocked error'
        mock_open.side_effect = IOError(expected_log_message)
        filename = self._fu.qualified_path(self.path, self.text_fn)
        with self.assertLogs(FileUtil.__name__, level='DEBUG') as cm:
            for i, line in enumerate(self._fu.read_generator(filename)):
                x = line
                logger.debug(f'Read in line {i}, which contains <{x}>.')
                self.assertIsNone(x)
            logger.debug(f'Caught exception message: {cm.output}')
            self.assertTrue(
                next((True
                      for line in cm.output if expected_log_message in line),
                     False))

    @logit()
    def test_file_modify_time(self):
        start_time = self._du.as_timestamp()
        keys = [
            'greeting',
            'farewell',
        ]
        vals = [
            'Hello',
            'Goodbye',
        ]
        self.create_yaml(keys, vals)
        qualifiedPath = self._fu.qualified_path(self.path, self.yaml)
        mod_time = self._fu.file_modify_time(qualifiedPath)
        mod_timestamp = self._du.as_timestamp(dt=mod_time)
        logger.debug(
            f'mod_time is {mod_timestamp}. start_time is {start_time}.')
        self.assertTrue((start_time - mod_timestamp) <
                        .1)  # asserting a difference of < 0.1 seconds.

    @logit()
    def test_file_modify_time2(self):
        start_time = self._du.as_timestamp()
        keys = [
            'greeting',
            'farewell',
        ]
        vals = [
            'Hello',
            'Goodbye',
        ]
        self.create_yaml(keys, vals)
        qualifiedPath = self._fu.qualified_path(self.path, self.yaml)
        mod_time = self._fu.file_modify_time2(qualifiedPath)
        mod_timestamp = self._du.as_timestamp(dt=mod_time)
        self.assertTrue((start_time - mod_timestamp) <
                        .1)  # asserting a difference of < 0.1 seconds.

    @logit()
    def test_file_size(self):
        filename = self._fu.qualified_path(self.path, self.text_fn)
        width = 20
        how_many_lines = randrange(10) + 2
        self.create_text_file(filename, how_many_lines, width)
        eol_len = 2
        actual = self._fu.file_size(filename)
        self.assertEqual((width + eol_len) * how_many_lines, actual)

    @logit()
    def test_list_modules(self):
        mods = []
        for mod_name in self._fu.list_module_contents(module_name='itertools'):
            mods.append(mod_name)

        self.assertTrue('__docs__' in mods)

    @logit()
    def test_list_modules(self):
        doc = self._fu.list_module_attributes('itertools', True)
        logger.debug('{}'.format(doc))
        mods = []
        for mod_name in self._fu.list_modules(module_name='itertools'):
            mods.append(mod_name)

        self.assertTrue('__doc__' in mods)
        self.assertTrue('__name__' in mods)
Пример #24
0
@author: sniperwang
'''
import re
from BeautifulSoup import BeautifulSoup
from HtmlGraber import HtmlGraber
from FileUtil import FileUtil;

if __name__ == '__main__':
    pass

#url="http://share.renren.com/share/249317678/14623723075?from=0101010302&ref=hotnewsfeed&sfet=104&fin=36&fid=20148636643&ff_id=249317678";
PicUrlHead="http://share.renren.com/share/249317678/14623723075/?photoId=";
htmlGraber=HtmlGraber();
maxCount=50;
firstIndex=249317678-50;
fileUtil=FileUtil();
homeSavePath="E:\\temp\\"

def grabImageUrl(picUrl):
    patt=re.compile(r'http://fmn.rrimg.com/.*');
    htmlContent=htmlGraber.doGrab(picUrl);
   # print htmlContent;
    soup=BeautifulSoup(htmlContent);
    imgurls=soup.findAll('img',id="photo");#re.compile(patt));
    #print str(imgurl[0].src);
#    print "\n".join([str(item) for item in imgurls]);
    if(len(imgurls)>0):
        return imgurls[0]["src"];
    else:
        return ""
Пример #25
0
from LibraDriver import LibraDriver
from FileUtil import FileUtil
from DateUtil import DateUtil
from TextUtil import TextUtil

# 設定データロード
user_data = FileUtil.getUserProperties("user.yaml")
uid = user_data[0]
pswd = user_data[1]
systemWait = user_data[2]
longWait = user_data[3]
midWait = user_data[4]
shortWait = user_data[5]
driver_type = user_data[6]
appWait = [systemWait, longWait, midWait, shortWait]

# LibraDriverインスタンス
lrp = LibraDriver(uid, pswd, "551", appWait, driver_type)

# Libraログイン
lrp.login()
DateUtil.app_sleep(shortWait)
print(DateUtil.get_logtime() + "login")

# レポートインデックスページに遷移
lrp.browse_repo()
DateUtil.app_sleep(shortWait)
print(DateUtil.get_logtime() + "report index")

# PID+URL一覧データ取得
datas = lrp.get_page_list_data()
Пример #26
0
def main():
    """
       main test the FileUtil class.

    """
    file1 = FileUtil('test_file.txt')
    print "\n\nself.readlines():"
    print file1.readlines()

    clean_list = file1.readlines_clean()
    print "\n\nself.readlines_clean():"
    print clean_list

    file1.replace_string_in_file("alon", "ilan")
    print "\n\nself.replace_string_in_file('alon', 'ilan'):"
    print file1.readlines()

    print "\n\nself.return_string_in_lines('ilan\\S*'):"
    print file1.return_string_in_lines("ilan\\S*")

    file1.append_file("THIS IS THE APPENDED LINE")
    print "\n\nself.append_file('THIS IS THE APPENDED LINE'):"
    print file1.readlines()

    file2 = FileUtil('output_file.csv')
    input_list = file1.readlines()
    file2.create_csv_file_from_list(input_list)

    file3 = FileUtil('output_file.bak')
    file3.writelines(file1.readlines())
    print "\n\nSelf.writelenes('output_file.bak') \n\n"

    print "Colums 2, delimetar=' ':", file1.read_column(1, " "), "\n\n"
Пример #27
0

while True:
    serial.flushInput()
    rfid_data = serial.readline().strip()
    if len(rfid_data) > 0:
        logger.log("Rfid Data: %s" % (rfid_data))
        try:
            #it is a preacheck if the data were correclty read
            rfidData = rfid_data[1:11]

            dictValue = getDictValueIfKeyContainsString(cardNumDict, rfidData)
            if dictValue is not None:
                saveResult(rfidData, dictValue)
            else:
                logger.log("Error read")
                rfidData = rfid_data[3:11]
                dictValue = getDictValueIfKeyContainsString(
                    cardNumDict, rfidData)
                if dictValue is not None:
                    saveResult(rfidData, dictValue)
                else:
                    FileUtil.saveToNewFile(reportDir, name, READ_FAILURE)
                    logger.log("Read Failure")
                    logger.log(READ_FAILURE)
        except Exception as e:
            FileUtil.saveToNewFile(reportDir, name, READ_FAILURE)
            logger.log("Error has occured:")
            logger.log("Sending error message:")
            logger.log(str(e))
Пример #28
0
# -*-coding:utf-8-*-

import os
import xlrd

from FileUtil import FileUtil
from StringParser import StringParser

# excel = "sample.xlsx"
excel = "translate.xls"
OUT_PROJECT = os.path.expanduser("AndroidLanguagePackage")

book = xlrd.open_workbook(excel)
sheet = book.sheet_by_index(0)

FileUtil.make_dir(OUT_PROJECT)
string_parser = StringParser()

for col in range(sheet.ncols):

    if col == 0:
        continue

    cells = sheet.col(col)
    string_keys = sheet.col(0)
    lang_list = list()

    lang = cells[0].value

    if isinstance(lang, unicode):
        lang = lang.encode('utf-8')
Пример #29
0
 def log(self, msg):
     futil = FileUtil()
     futil.writeToFile("log.txt",msg,True,True)
Пример #30
0
class ApplicationUtil:
    df = None
    _d = {}
    _tuple = None
    pu = PandasUtil()
    fu = FileUtil()

    def __init__(self, yaml_file:str):
        self.logger = self.init_logger()
        d = YamlUtil(yaml_file)
        self._tuple = d.asnamedtuple
        self._d = d
        self.logger.debug(f'Read in yaml file {yaml_file} with fields: {self._d.fields}')

    def init_logger(self):
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.DEBUG)
        # create console handler and set level to debug
        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)
        # create formatter
        formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
        # add fromatter to ch
        ch.setFormatter(formatter)
        # add ch to logger
        self.logger.addHandler(ch)
        return self.logger

    def yaml_entry(self, yaml_entry:str) -> str:
        """
        Read the dictionary and return the value of the given key. Give a warning if the yaml_entry is missing and return a blank.
        :param yaml_entry:
        :return:
        """
        try:
            return self._d.asdict[yaml_entry]
        except KeyError:
            self.logger.warning(f'Unable to find yaml key: {yaml_entry}. Returning blank value.')
            return ''

    def load_df_from_excel(self, input_file_yaml_entry:str, worksheet:str='Sheet1'):
        input_file = self._d.asdict[input_file_yaml_entry]
        self.logger.debug(f'Reading {worksheet} file: {input_file}')
        if self.fu.file_exists(input_file):
            df = self.pu.read_df_from_excel(excelFileName=input_file, excelWorksheet=worksheet, header=0)
            self.pu.get_rowCount_colCount(df)
            return df
        else:
            self.logger.warning(f'Unable to find {worksheet} file: {input_file_yaml_entry}. Returning empty dataframe.')
            return self.pu.empty_df()

    def write_excel(self, df:pd.DataFrame, output_file_yaml_entry:str, worksheet:str) -> None:
        """
        Write the given dataframe to the file indicated by the dictionary entry (that was read in using the yaml file).
        :param df: DataFrame to write
        :param output_file_yaml_entry:
        :param worksheet:
        :return:
        """
        output_file = self.yaml_entry(output_file_yaml_entry)
        self.logger.debug(f'Writing {worksheet} file: {output_file}')
        self.pu.write_df_to_excel(df=df, excelFileName=output_file, excelWorksheet=worksheet)
Пример #31
0
    def __init__(self, filePath, siteName):
	self.filePath = filePath
	self.siteName = siteName
	self.fileUtil = FileUtil()
Пример #32
0
                dateValue = fs.read().strip()
                fs.close()
                dt = datetime.strptime(dateValue, '%Y-%m-%d %H:%M:%S')
                day = dt.day
                month = dt.month
                year = dt.year

                fulldate = '%s-%s-%s %s:%s:%s' % (year, month, day, hour,
                                                  minute, second)
                ts = mktime(strptime(fulldate, '%Y-%m-%d %H:%M:%S'))
                timestamp = int(ts)
                print timestamp

                timestamp = int(time.time())
                latitude = format(gpsData.latitude, '.6f')
                longitude = format(gpsData.longitude, '.6f')
                sensorValue = "%s;%s;%s" % (timestamp, latitude, longitude)
                sensorValueForLogger = "Sat_num: %s, gps_quality: %s,Lat: %s; Long: %s; Alt: %s; Speed %f; Timestamp %s" % (
                    gpsData.num_sats, gpsData.gps_qual, gpsData.latitude,
                    gpsData.longitude, gpsData.altitude, speed, timestamp)
                FileUtil.saveToNewFile(reportDir, name, sensorValue)
                logger.log(sensorValueForLogger)
            except Exception as e:
                print e
                logger.log(e)
    except Exception as e:
        print e
        logger.log(e)

#scheduler.add_job(getGPSCoordinates, 'interval', seconds=interval)
#scheduler.start()
Пример #33
0
class PandasUtil:
    _EMPTY_DF = pd.DataFrame()

    def __init__(self):
        self.filename = None
        self.worksheetName = None
        self._df = None
        self._fu = FileUtil()
        # make the df display look better: https://stackoverflow.com/questions/11707586/how-do-i-expand-the-output-display-to-see-more-columns-of-a-pandas-dataframe
        pd.set_option('display.max_rows', 100)
        pd.set_option('display.max_columns', 50)
        pd.set_option('display.width', 800)

    # Getters and setters for filename, worksheetname, and df
    @property
    def filename(self):
        return self._filename

    # Setter for filename.
    @filename.setter
    def filename(self, fn: str):
        self._filename = fn

    @property
    def worksheetName(self):
        return self._worksheetName

    @worksheetName.setter
    def worksheetName(self, wks: str):
        self._worksheetName = wks

    @property
    def df(self):
        return self._df

    @df.setter
    def df(self, myDf: pd.DataFrame):
        self._df = myDf

    @classmethod
    def empty_df(cls) -> pd.DataFrame:
        return pd.DataFrame()

    def pandas_version(self):
        """
        Return the panas version as three ints
        :return: maj, minor, sub
        """
        v = pd.__version__
        majMinSub = [int(x) for x in v.split('.')]
        return majMinSub[0], majMinSub[1], majMinSub[2]

    def write_df_to_excel(self,
                          df: pd.DataFrame = None,
                          excelFileName: str = None,
                          excelWorksheet: str = None,
                          write_index=False) -> bool:
        """
        Write the given df to the excel file name and worksheet (unless
        they have already been provided and then are optional).
        Caller is responsible to catch any I/O errors.
        :param df:
        :param excelFileName:
        :param excelWorksheet:
        :return: True if Excel file written, False if df is empty.
        """
        if not df.empty:
            self._df = df
        else:
            logger.warning('Empty dataframe will not be written.')
            return False
        fn = excelFileName or self.filename
        wks = excelWorksheet or self.worksheetname
        writer = pd.ExcelWriter(fn)
        self._df.to_excel(writer, wks, index=write_index)
        writer.save()
        logger.debug(f'Successfully wrote to {fn}.')
        return True

    def write_df_to_csv(self,
                        df: pd.DataFrame = None,
                        csv_file_name: str = None,
                        write_header: bool = True,
                        write_index: bool = False,
                        enc: str = 'utf-8') -> bool:
        """
        Write the given df to the file name and worksheet (unless
        they have already been provided and then are optional).
        Caller is responsible to catch any I/O errors.
        :param df:
        :param csv_file_name:
        :param write_header:
        :param write_index:
        :param enc:
        :return: True if Excel file written, False if df is empty.
        """
        if not df.empty:
            self._df = df
        else:
            logger.warning('Empty dataframe will not be written.')
            return False
        df.to_csv(csv_file_name,
                  header=write_header,
                  index=write_index,
                  encoding=enc)
        logger.debug(f'Successfully wrote to {csv_file_name}.')
        return True

    def read_df_from_excel(self,
                           excelFileName: str = None,
                           excelWorksheet: str = 'Sheet1',
                           header: int = 0,
                           index_col: int = -1) -> pd.DataFrame:
        """
        Read an Excel file.
        :param excelFileName:
        :param excelWorksheet:
        :param header: 0-offset location of header (0=row 1 in Excel)
        :param index_col:
        :return: dataframe result
        """
        param_dict = {'header': header}
        if excelFileName:
            self.filename = excelFileName
        logger.debug(f'Will read from the Excel file: {self.filename}.')
        param_dict['io'] = self.filename
        if self._fu.file_exists(self.filename):
            if excelWorksheet:
                self.worksheetName = excelWorksheet
            wks = self.worksheetName
            major, minor, _ = self.pandas_version()
            logger.debug(
                f'Will read from the worksheet: {wks}. Pandas minor version is {minor}.'
            )
            if wks not in self.get_worksheets(excelFileName):
                logger.warning(
                    f'Cannot find Excel worksheet: {self.worksheetName}. Returning empty df.'
                )
                return PandasUtil.empty_df()
            if ((major == 0) & (minor > 21)) | (major >= 1):
                param_dict['sheet_name'] = wks
            else:
                param_dict['sheetname'] = wks
            if index_col >= 0:
                param_dict['index_col'] = index_col
            self._df = pd.read_excel(**param_dict)
            logger.debug(f'Read in {len(self.df)} records.')
            return self._df
        else:
            logger.error(
                f'Cannot find Excel file: {self.filename}. Returning empty df.'
            )
            return PandasUtil.empty_df()

    def read_df_from_csv(self,
                         csv_file_name: str = None,
                         header: int = 0,
                         enc: str = 'utf-8',
                         index_col: int = None,
                         sep: str = None) -> pd.DataFrame:
        """
        Write the given df to the file name and worksheet (unless
        they have already been provided and then are optional).
        :param df:
        :param csv_file_name:
        :param header: Where the headers live (0 means first line of the file)
        :param enc: try 'latin-1' or 'ISO-8859-1' if you are getting encoding errors
        :return:
        """
        param_dict = {
            'filepath_or_buffer': csv_file_name,
            'header': header,
            'encoding': enc,
        }
        if sep:
            param_dict['sep'] = sep
        if index_col is not None:
            param_dict['index_col'] = index_col
        ans = pd.read_csv(**param_dict)
        return ans

    def get_df_headers(self, df: pd.DataFrame = _EMPTY_DF) -> list:
        """
        Get a list of the headers. This provides a list of the column NAMES.
        :param df:
        :param self:
        :return: list of headers
        """
        if not self.is_empty(df):
            self.df = df
            return list(self.df.columns)
        else:
            logger.warning('df is empty. Returning None for headers')
            return None

    def set_df_headers(self, df: pd.DataFrame, new_headers: list):
        """
        This sets the column NAMES.
        :param df:
        :param new_headers: list of new headers)
        :return: None (but side effect of changed df)
        """
        df.columns = new_headers

    def get_rowCount_colCount(self, df: pd.DataFrame):
        """
        Return the row and column_name count of the df.
        :param df:
        :return: row count, col count
        """
        rows, cols = df.shape
        logger.debug(f'df has {rows} rows and {cols} columns.')
        return rows, cols

    def get_basic_data_analysis(self, df: pd.DataFrame) -> str:
        buffer = StringIO()
        df.info(buf=buffer)
        ans = buffer.getvalue()
        logger.info(f'info:\n{ans}')
        return ans

    def get_quartiles(self,
                      df: pd.DataFrame,
                      percentiles: list = [.25, .50, .75]) -> pd.DataFrame:
        """
        Return basic statistics about the dataframe.
        :param df:
        :param percentiles: list of %-tiles as fractions between 0 and 1, e.g. [.2, .4, .6, .8] for quintiles
        :return: basic description df
        """
        ans = df.describe(percentiles=percentiles)
        logger.info(f'info:\n{ans.head(10)}')
        return ans

    @logit(showRetVal=True)
    def get_worksheets(self, excelFileName=None):
        if excelFileName:
            self.filename = excelFileName
        fu = FileUtil()
        if fu.file_exists(self.filename):
            xl = pd.ExcelFile(self.filename)
            return xl.sheet_names
        else:
            logger.error(f'Cannot find Excel file {self.filename}.')
            return None

    def duplicate_rows(self,
                       df: pd.DataFrame,
                       fieldList: list = None,
                       keep: str = 'first') -> pd.DataFrame:
        """
        Return a dataframe with the duplicates as specified by the columns in fieldList.
        If fieldList is missing or None, then return the exactly duplicated rows.
        :param df: dataframe to scan for duplicates
        :param fieldList: fields in df to examine for duplicates.
        :param keep: 'first' or 'last' to keep the first dupe or the last.
        :return: df of the duplicates
        """
        if fieldList:
            ans = df[df.duplicated(fieldList, keep=keep)]
        else:
            ans = df[df.duplicated(keep=keep)]
        return ans

    def drop_duplicates(self,
                        df: pd.DataFrame,
                        fieldList: list = None,
                        keep: str = 'first') -> pd.DataFrame:
        """
        Drop the duplicates as specified by the columns in fieldList.
        If fieldList is missing or None, then return the exactly duplicated rows.
        :param df: dataframe to scan for duplicates
        :param fieldList: fields in df to examine for duplicates.
        :param keep: 'first' or 'last' to keep the first dupe or the last.
        :return: df without the duplicates
        """
        param_dict = {'keep': keep, 'inplace': False}
        if fieldList:
            param_dict['subset'] = fieldList
        return df.drop_duplicates(**param_dict)

    def convert_dict_to_dataframe(self, list_of_dicts: list) -> pd.DataFrame:
        """
        Convert a list of dictionaries to a dataframe.
        :param list_of_dicts:
        :return:
        """
        return pd.DataFrame(list_of_dicts)

    def convert_list_to_dataframe(self,
                                  lists: list,
                                  column_names: List = None) -> pd.DataFrame:
        """
        Convert a list of lists to a dataframe. If provided, add the column names. If not, provide default col names.
        :param lists: a list of lists, like [[1,2,3], ['a', 'b', 'c']]
        :param column_names: Column names to use. Defaults to col00, col01, col22, .. col99
        :return:
        """
        if column_names:
            return pd.DataFrame(data=lists, columns=column_names)
        # Use the default column names: col00, col01...
        ans = pd.DataFrame(data=lists)
        self.replace_col_names_by_pattern(ans)
        return ans

    def convert_matrix_to_dataframe(self, lists: list) -> pd.DataFrame:
        """
        convert a list of lists to a dataframe.
        :param lists:
        :return:
        """
        return pd.DataFrame(data=lists)

    def convert_dataframe_to_matrix(self, df: pd.DataFrame) -> np.ndarray:
        """
        Convert all of the values to a numpy ndarray.

        :param df:
        :return:
        """
        return df.to_numpy()

    def convert_dataframe_to_vector(self, df: pd.DataFrame) -> np.ndarray:
        """
        Convert the dataframe to a numpy vector.
        :param df:
        :return:
        """
        cols = self.get_df_headers(df)
        if len(cols) == 1:
            return df.to_numpy().reshape(-1, )
        logger.warning(
            f'Dataframe should have exactly one column, but contains {len(cols)}. Returning None.'
        )
        return None

    def convert_dataframe_col_to_list(self, df: pd.DataFrame,
                                      column_name: str) -> list:
        """
        Convert the given dataframe column to a list.
        :param df:
        :param column_name: a column name, like 'age'
        :return: a list of that column
        """
        return df[column_name].values.tolist()

    def without_null_rows(self, df: pd.DataFrame,
                          column_name: str) -> pd.DataFrame:
        """
        Return a DataFrame without the rows that are null in the given column_name.
        :param df: source DataFrame
        :param column_name: Column name to remove.
        :return: new DataFrame
        """
        try:
            mask = pd.notnull(df[column_name])
            return df[mask]
        except KeyError:
            logger.error(
                f'Unable to find column_name name: {column_name}. Returning empty df.'
            )
            return PandasUtil.empty_df()

    def select(self, df: pd.DataFrame, column_name: str,
               match_me: Union[str, int]) -> pd.DataFrame:
        """
        Return a DataFrame that selects on the column_name that is equal to match_me.
        Similar to a SELECT * WHERE clause in SQL.
        :param df:
        :param column_name:
        :param match_me:
        :return: df with the column_name matching the selected clause (possibly empty)
        """
        return df.loc[df[column_name] == match_me]

    def mask_blanks(self, df: pd.DataFrame, column_name: str) -> list:
        """
        Return a boolean list with a True in the rows that have a blank column_name.
        :param df:
        :param column_name:
        :return:
        """
        # ans = df.loc[df[column_name] == '']
        ans = df[column_name] == ''
        return ans

    def select_blanks(self, df: pd.DataFrame, column_name: str) -> list:
        return df[self.mask_blanks(df, column_name)]

    def mask_non_blanks(self, df: pd.DataFrame, column_name: str) -> list:
        """
        Return a boolean list with a True in the rows that have a nonblank column_name.
        :param df:
        :param column_name:
        :return:
        """
        blanks = self.mask_blanks(df, column_name)
        non_blanks_mask = [not x for x in blanks]
        return non_blanks_mask

    def select_non_blanks(self, df: pd.DataFrame, column_name: str) -> list:
        return df[self.mask_non_blanks(df, column_name)]

    def unique_values(self, df: pd.DataFrame, column_name: str) -> list:
        """
        Return a list of the unique values in column_name.
        :param df:
        :param column_name:
        :return:
        """
        return self.drop_duplicates(df=df[column_name]).tolist()

    def count_by_column(self,
                        df: pd.DataFrame,
                        column_name: str = None) -> pd.DataFrame:
        """
        Return a count by value of the given column.
        :param df:
        :param column_name:
        :return:
        """
        return df[column_name].value_counts()

    def add_new_col_with_func(self, df: pd.DataFrame, column_name: str,
                              func: Callable[[], list]) -> pd.DataFrame:
        """
        Call the func with no args to assign a new column_name to the dataframe.
        func should return a list comprehension.
        Here's an example of what the function should do.
            def my_func(self) -> list:
                df = self.pu.df
                col_of_interest = df['number']
                return [self.my_f(x) for x in col_of_interest]

        It gets called with:
            df = self.pu.add_new_col_with_func(df, 'new_col_name', self.my_func)

        :param df:
        :param column_name:
        :param func: func (usually no args)
        :return:
        """
        self.df = df
        df[column_name] = func()
        return df

    def add_new_col_from_array(self, df: pd.DataFrame, column_name: str,
                               new_col: np.array) -> pd.DataFrame:
        """
        Use the values in new_col to create a new column.
        Limitations: this is not as sophisticated as https://stackoverflow.com/questions/12555323/adding-new-column-to-existing-dataframe-in-python-pandas .
        The length of new_col must be the same as the length of df.
        :param df:
        :param column_name:
        :param new_col: If this really is a Series, it will try to match indexes with the existing df (probably a good thing).
        :return:
        """
        df[column_name] = new_col
        return df

    def mark_rows_by_func(self, df: pd.DataFrame, column_name: str,
                          func: Callable[[], list]) -> Bools:
        """
        Return a list of bools depending on the func.
        Here's a func (which takes a list as a parameter):
            def is_adult(self, age:list):
                return age >= 21
        Here's how to invoke it:
            mark = self.pu.mark_rows_by_func(df, 'Age', self.is_adult)

        :param df: dataframe under scrutiny
        :param column_name: name of the column_name
        :param func:   function that is to be invoked. Takes a list and returns a list of booleans.
        :return:
        """
        mask = func(df[column_name])
        return mask

    def mark_rows_by_criterion(self, df: pd.DataFrame, column_name: str,
                               criterion: Union[str, int, float]) -> Bools:
        """
        Return a list of bools when column_name meets the criterion.
        :param df:
        :param column_name:
        :param criterion:
        :return:
        """
        mask = df[column_name] == criterion
        return mask

    def mark_isnull(self, df: pd.DataFrame, column_name: str) -> Bools:
        mask = df[column_name].isnull()
        return mask

    def masked_df(self,
                  df: pd.DataFrame,
                  mask: Bools,
                  invert_mask: bool = False):
        if not invert_mask:
            return df[mask]
        else:
            my_mask = [not x for x in mask]
            return df[my_mask]

    def slice_df(self,
                 df: pd.DataFrame,
                 start_index: int = 0,
                 end_index: int = None,
                 step: int = 1):
        """
        Slice the df by the given start, end, and step.
        NOTE: this does row slicing only.
        :param df:
        :param start_index: 0-based first index to use. Defaults to 0 (the first el)
        :param end_index: end of list index. Defaults to None (which means the end of the list).
        :param step: how many to skip. 2 means skip every other. Default of 1 means don't skip.
        :return:
        """
        end_idx = end_index or len(df)
        ans = df.iloc[start_index:end_idx:step]
        return ans

    def set_index(self,
                  df: pd.DataFrame,
                  columns: Union[Strings, str],
                  is_in_place: bool = True) -> pd.DataFrame:
        """
        Set the index of df.

        :param df: Dataframe under scrutiny.
        :param columns: Can be a str (=single column_name) or a List of strings.
        :param is_in_place: True to add the index in place / False to create a new df
        :return: df or None (if is_in_place is true)
        """
        return df.set_index(columns, inplace=is_in_place)

    def reset_index(self,
                    df: pd.DataFrame,
                    is_in_place: bool = True,
                    is_dropped: bool = False) -> pd.DataFrame:
        """
        Reset the index.
        :param df:
        :param is_in_place:
        :param is_dropped:
        :return:
        """
        return df.reset_index(drop=is_dropped, inplace=is_in_place)

    def drop_index(self,
                   df: pd.DataFrame,
                   is_in_place: bool = True) -> pd.DataFrame:
        """
        Drop the index
        :param df:
        :param is_in_place:
        :param is_dropped:
        :return:
        """
        return self.reset_index(df=df,
                                is_in_place=is_in_place,
                                is_dropped=True)

    def drop_col(self,
                 df: pd.DataFrame,
                 columns: Union[Strings, str],
                 is_in_place: bool = True) -> pd.DataFrame:
        """
        Drop the given column_name.
        :param df:
        :param columns: Can be a str (=single column_name) or a List of strings.
        :param is_in_place: if true, column_name is dropped from df in place. Otherwise, a new df is returned.
        :return: None if is_in_place is True. Else df with the column_name dropped.
        """
        major, minor, _ = self.pandas_version()
        if (major == 0) & (minor < 21):
            logger.warning(
                f'Unable to drop column, as Pandas version is {minor}. Returning unchanged df.'
            )
            return df

        return df.drop(columns=columns, inplace=is_in_place)

    @logit()
    def drop_col_keeping(self,
                         df: pd.DataFrame,
                         cols_to_keep: Union[Strings, str],
                         is_in_place: bool = True) -> pd.DataFrame:
        """
        Keep the given columns and drop the rest.
        :param df:
        :param cols_to_keep:
        :param is_in_place:
        :return:
        """
        headers_to_drop = self.get_df_headers(df)
        logger.debug(
            f'I have these headers: {headers_to_drop}. But I will keep {cols_to_keep}'
        )
        exceptions = cols_to_keep
        if isinstance(cols_to_keep, str):
            exceptions = [cols_to_keep]
        for col in exceptions:
            headers_to_drop.remove(col)
        return self.drop_col(df=df,
                             columns=headers_to_drop,
                             is_in_place=is_in_place)

    def drop_row_by_criterion(self,
                              df: pd.DataFrame,
                              column_name: str,
                              criterion: Union[int, str],
                              is_in_place: bool = True) -> pd.DataFrame:
        """
        Drop the rows that have criterion in the given column.
        :param df:
        :param column_name:
        :param criterion:
        :param is_in_place:
        :return:
        """
        return df.drop(df[df[column_name] == criterion].index,
                       inplace=is_in_place)

    def drop_row_if_nan(self,
                        df: pd.DataFrame,
                        column_names: Strings = None,
                        is_in_place: bool = True) -> pd.DataFrame:
        """
        Drop a row if the given column name is NaN.
        :param df:
        :param column_names: Drop the rows based in this array of column names. If None, drop every row with all NaNs.
        :param is_in_place:
        :return:
        """
        if column_names:
            return df.dropna(axis='index',
                             subset=column_names,
                             inplace=is_in_place)
        return df.dropna(axis='index', inplace=is_in_place, how='all')

    def reorder_cols(self, df: pd.DataFrame, columns: Strings) -> pd.DataFrame:
        """
        Using the columns, return a new df.
        :param df:
        :param columns: list of strings, like ['colD', 'colA', 'colB', 'colC']
        :return:
        """
        return df[columns]

    def replace_col(self, df: pd.DataFrame, column: str,
                    replace_dict: dict) -> pd.DataFrame:
        """
        Replace the values of column_name using replace_dict.
        This will will replace the column VALUES.
        :param df:
        :param column:
        :param replace_dict: {'origA':'replA', 'origB':'replB'}
        :return: df with column_name replaced
        """
        try:
            df[column] = df[column].map(replace_dict)
        except KeyError:
            logger.warning(
                f'Value found outside of: {replace_dict.keys()} or column_name {column} not found. Returning empty df.'
            )
            return self.empty_df()
        return df

    def replace_col_using_func(self, df: pd.DataFrame, column_name: str,
                               func: Callable[[], list]) -> pd.DataFrame:
        """
        Replace the column contents by each element's value, as determined by func.
        This will will replace the column VALUES.
        :param df: Dataframe under scrutiny.
        :param column_name: (single column_name) name
        :param func: Function operates on whatever element it is presented, and returns the changed element.
        :return: df
        """
        df[column_name] = df[column_name].apply(func)
        return df

    def replace_col_using_mult_cols(self, df: pd.DataFrame,
                                    column_to_replace: str, cols: Strings,
                                    func: Callable[[], list]) -> pd.DataFrame:
        """
        Replace column_to_replace, using the given func.
        This will will replace the column VALUES.
        :param df: Dataframe under scrutiny.
        :param column_to_replace: (single column_name) name
        :param cols: list of columns used for the following func
        :param func: Pointer to a local function.
        :return: df with replaced column
        """
        df[column_to_replace] = df[cols].apply(func, axis=1)
        return df

    def replace_col_with_scalar(self,
                                df: pd.DataFrame,
                                column_name: str,
                                replace_with: Union[str, int],
                                mask: Bools = None) -> pd.DataFrame:
        """
        Replace the all column_name with replace_with. If a mask of bools is used, only replace those elements with a True.
        Helpful reference at https://kanoki.org/2019/07/17/pandas-how-to-replace-values-based-on-conditions/
        :param df:
        :param column_name:
        :param replace_with:
        :param mask:
        :return:
        """
        if mask is None:
            df[column_name] = replace_with
        elif isinstance(mask, pd.Series):
            df[column_name].mask(mask.tolist(), replace_with, inplace=True)
        elif isinstance(mask, list):
            # df[column_name].mask(mask, replace_with, inplace=True) # Method 1 and works
            df.loc[mask, column_name] = replace_with  # Method 2 at kanoki.
        else:
            logger.warning(
                f'mask must be None, a series, or a list, but it is: {type(mask)}'
            )
            return self.empty_df()

    def join_two_dfs_on_index(self, df1: pd.DataFrame,
                              df2: pd.DataFrame) -> pd.DataFrame:
        """
        return a column-wise join of these two dataframes on their mutual index.
        :param df1:
        :param df2:
        :return:
        """
        return pd.concat([df1, df2], axis=1, ignore_index=False)

    def join_dfs_by_column(self, dfs: Dataframes) -> pd.DataFrame:
        """
        Return a column-wise join of these dataframes.
        :param dfs:
        :return:
        """
        return pd.concat(dfs, axis='columns')

    def join_dfs_by_row(self, dfs: Dataframes) -> pd.DataFrame:
        """
        Return a row-wise join of these dataframes.
        Note: all the dfs should have the same column names, so you might call it in this way:
          headers = pu.get_df_headers(big_df)
          pu.set_df_headers(new_df, headers)
          df2 = pu.join_dfs_by_row([new_df, big_df])
        :param dfs:
        :return:
        """
        return pd.concat(dfs, axis='rows', ignore_index=True)

    def dummy_var_df(self,
                     df: pd.DataFrame,
                     columns: Union[Strings, str],
                     drop_first: bool = True) -> pd.DataFrame:
        """
        Do a one-hot encoding.
        Create a dummy variable based on the given column.
        :param df:
        :param columns: a single column name or a list of column names.
        :return:
        """
        if isinstance(columns, str):
            my_columns = [columns]
        else:
            my_columns = columns
        df = pd.get_dummies(data=df, columns=my_columns, drop_first=drop_first)
        return df

    def replace_col_names(self,
                          df: pd.DataFrame,
                          replace_dict: dict,
                          is_in_place: bool = True) -> pd.DataFrame:
        """
        :param replace_dict: {'origColA':'replColA', 'origColB':'replColB'}

        """
        return df.rename(columns=replace_dict, inplace=is_in_place)

    def replace_col_names_by_pattern(self,
                                     df: pd.DataFrame,
                                     prefix: str = "col",
                                     is_in_place: bool = True) -> pd.DataFrame:
        """
        Replace the column names with col1, col2....
        :param df:
        :param prefix: string prefix, such as "col"
        :param is_in_place:
        :return:
        """
        cur_names = self.get_df_headers(df)
        gen = generate_col_names(prefix)
        replacement_dict = {k: next(gen) for k in cur_names}
        return self.replace_col_names(df, replacement_dict, is_in_place)

    def coerce_to_string(self, df: pd.DataFrame,
                         columns: Union[Strings, str]) -> pd.DataFrame:
        """
        Coerce the given column_name name to a string.
        :param df:
        :param column_name:
        :return: new df with column_name coerced to str.
        """
        if isinstance(columns, str):
            # Make the single str columns into a list with just that one element.
            cols_as_list = [columns]
        else:
            cols_as_list = columns
        for col in cols_as_list:
            df[col] = df[col].apply(str)
        return df

    def coerce_to_numeric(self, df: pd.DataFrame,
                          columns: Union[Strings, str]) -> pd.DataFrame:
        """
        Coerce the given column_name name to ints or floats.
        :param df:
        :param columns: a column name (or list of names) to coerce
        :return: df with columns coerced to a numeric in place.
        """
        if isinstance(columns, str):
            # Make the single str columns into a list with just that one element.
            cols_as_list = [columns]
        else:
            cols_as_list = columns
        df[cols_as_list] = df[cols_as_list].apply(pd.to_numeric)
        return df

    def coerece_to_int(self, df: pd.DataFrame,
                       columns: Union[Strings, str]) -> pd.DataFrame:
        """
        Coerce the given column name(s) to an int.
        :param df:
        :param columns: a column name (or list of names) to coerce
        :return: df with columns coerced to a numeric in place.
        """
        df[columns] = df[columns].astype(int)
        return df

    def round(self, df: pd.DataFrame, rounding_dict: dict) -> pd.DataFrame:
        """
        Round the columns given in rounding_dict to the given number of decimal places.
        Unexpected result found in testing: python function round(4.55, 2) yields 4.5 BUT this function returns 4.6
        :param df:
        :param rounding_dict: {'A': 2, 'B':3}
        :return: df rounded to the specified number of places.
        """
        return df.round(rounding_dict)

    def replace_vals(self,
                     df: pd.DataFrame,
                     replace_me: str,
                     new_val: str,
                     is_in_place: bool = True) -> pd.DataFrame:
        """
        Replace the values of replace_me with the new_val.

        :param df: Dataframe under scrutiny.
        :param
        :param is_in_place: True to replace values in place / False to create a new df
        :return: df or None (if is_in_place is true)
        """
        return df.replace(to_replace=replace_me,
                          value=new_val,
                          inplace=is_in_place)

    def replace_vals_by_mask(self, df: pd.DataFrame, mask: Bools,
                             col_to_change: str, new_val: Union[str, int,
                                                                float]):
        """
        Replace the values in the col_to_change with the new_val
        :param df:
        :param mask:
        :param col_to_change: Column Name whose rows you want to change
        :param new_val:
        :return: the changed df (also changed in place)
        """
        ans = df.loc[mask, col_to_change] = new_val
        return ans

    def is_empty(self, df: pd.DataFrame) -> bool:
        """
        Return true if the df is empty.
        :param df: Dataframe to inspect
        :return: True IFF it is empty
        """
        return df.empty

    def aggregates(self, df: pd.DataFrame, group_by: Strings,
                   col: str) -> pd.DataFrame:
        """
        Return the average, min, max, and sum of the dataframe when grouped by the given strings.
        Reference: https://jamesrledoux.com/code/group-by-aggregate-pandas .
        :param df:
        :param group_by:
        :return:
        """
        grouped_multiple = df.groupby(group_by).agg(
            {col: ['mean', 'min', 'max', 'sum']})
        grouped_multiple.columns = ['mean', 'min', 'max', 'sum']
        self.reset_index(grouped_multiple, is_in_place=True)
        return grouped_multiple

    def stats(self, df: pd.DataFrame, xlabel_col_name: str,
              ylabel_col_name: str):
        """
        Calculate the main statistics.
        :param df: dataframe under scrutiny
        :param xlabel_col_name: x column label
        :param ylabel_col_name: y column label
        :return: slope, intercept, and r (correlation)
        """
        slope, intercept, r, p, epsilon = linregress(df[xlabel_col_name],
                                                     df[ylabel_col_name])
        logger.info('Main equation: y = %.3f x + %.3f' % (slope, intercept))
        logger.info('r^2 = %.4f' % (r * r))
        logger.info('p = %.4f' % (p))
        logger.info('std err: %.4f' % (epsilon))
        return slope, intercept, r

    def head(self, df: pd.DataFrame, how_many_rows: int = 10) -> pd.DataFrame:
        """
        Return the first how_many_rows. This works well if called as the last line of an immediate, as in:
          pu.head(df)
        :param df:
        :param how_many_rows:
        :return:
        """
        self.df = df
        return self.df.head(how_many_rows)

    def head_as_string(self, df: pd.DataFrame, how_many_rows: int = 10) -> str:
        """
        Return the first how_many_rows as a string, separated by \n.
        :param df:
        :param how_many_rows:
        :return:
        """
        ans = str(self.head(df, how_many_rows))
        logger.debug(f'First {how_many_rows} are:\n{ans}')
        return ans

    def tail_as_string(self, df: pd.DataFrame, how_many_rows: int = 10) -> str:
        """
        Return the last how_many_rows as a string, separated by \n.
        :param df:
        :param how_many_rows:
        :return:
        """
        ans = str(self.tail(df, how_many_rows))
        logger.debug(f'Last {how_many_rows} are:\n{ans}')
        return ans

    def tail(self, df: pd.DataFrame, how_many_rows: int = 10) -> pd.DataFrame:
        """
        Return the last how_many_rows. This works well if called as the last line of an immediate, as in:
          pu.tail(df)
        :param df:
        :param how_many_rows:
        :return:
        """
        self.df = df
        return self.df.tail(how_many_rows)

    def sort(self,
             df: pd.DataFrame,
             columns: Union[Strings, str],
             is_in_place: bool = True,
             is_asc: bool = True):
        """
        Sort the given dataFrame by the given column(s).
        :param df:
        :param columns:
        :param is_in_place:
        :param is_asc:
        :return:
        """
        return df.sort_values(columns,
                              ascending=is_asc,
                              inplace=is_in_place,
                              kind='quicksort',
                              na_position='last')

    def largest_index(self, df: pd.DataFrame) -> Tuple[int, int]:
        """
        Return the largest index and its value (usually an int and an int).
        :return:
        :param df:
        :return: (index, value of index)
        """
        return df.index.argmax(), df.index.max()

    def smallest_index(self, df: pd.DataFrame) -> Tuple[int, int]:
        """
        Return the smallest index and its value (usually an int and an int).
        :return:
        :param df:
        :return: (index, value of index)
        """
        return df.index.argmin(), df.index.min()
Пример #34
0
# -*- coding:utf-8 -*-
'''
Created on Jan 29, 2019

@author: Jackie
'''
from FileUtil import FileUtil
from NlpTookit.NlpTookit import NlpTookit
import jieba
import jieba.posseg as pseg

dir ="data/sports"
fu = FileUtil()
nt = NlpTookit()
wordcounter={}#统计词频的词典
poscounter={}#统计词性的词典
lengthCounter ={}#统计词长、句长分布字典
#step one: get all files' absolute paths
filelist = fu.getFiles(dir)

#step two: read every file
for filepath in filelist:
    print(filepath)
    lines = fu.readlines(filepath, "UTF-8")
    for line in lines:
        if(len(line.strip())==0):
            continue
        #step three:split sentences
#         print(line.strip())
        sentences = nt.toSentenceList(line.strip())
        for sentence in sentences:
Пример #35
0
def main():
    if len(sys.argv) == 1:
        dt = TimeUtil.get_yesterday_str()
    elif len(sys.argv) == 2:
        dt = sys.argv[1]
    else:
        logger.error("parameter error")
        sys.exit(1)

    recent_items = DSSMConfigUtil.sample_conf.get_int("recent-items")
    cpu_threshold = DSSMConfigUtil.sample_conf.get_float("cpu-threshold")
    window = DSSMConfigUtil.sample_conf.get_int("window")
    J = DSSMConfigUtil.sample_conf.get_int("J")

    action_path = DSSMConfigUtil.action_conf.get_string("action-path")
    if action_path == "":
        job_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../../"))
        action_path = os.path.join(job_path, "data/rec-recall/dssm/action")
    click_path = os.path.join(action_path, "click")

    click_files = click_action_files(click_path, dt, 1)
    logger.info("click_files:{}".format(click_files))

    sample_path = DSSMConfigUtil.sample_conf.get_string("sample-path")
    if sample_path == "":
        job_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../../"))
        sample_path = os.path.join(job_path, "data/rec-recall/dssm/sample")

    os.makedirs(sample_path, exist_ok=True)
    logger.info("sample_path:{}".format(sample_path))

    if not FileUtil.files_exists(click_files):
        logger.info("there are click files that do not exist")
        sys.exit(1)

    ma = merge_action(click_files)
    fa = filter_action(ma, window+1, recent_items)
    logger.info("len(ma):{}".format(len(ma)))
    logger.info("len(fa):{}".format(len(fa)))

    items = set_items(ma)
    logger.info("len(items):{}".format(len(items)))

    total_number = len(fa)
    block_number = math.ceil(CpuUtil.cpu_count() * cpu_threshold)
    block_size = FileUtil.block_size(total_number, block_number)

    logger.info("total_number:{}".format(total_number))
    logger.info("block_number:{}".format(block_number))
    logger.info("block_size:{}".format(block_size))

    blocks = split_merge_action(fa, block_size)

    logger.info("len(blocks):{}".format(len(blocks)))

    FileUtil.remove(os.path.join(sample_path, "sample_{}.data".format(dt)))

    del ma, fa
    gc.collect()

    pool = Pool(block_number)
    for block in blocks:
        pool.apply_async(deal_block, args=(block, items, window, J, os.path.join(sample_path, "sample_{}.data".format(dt))))
    pool.close()
    pool.join()

    logger.info("sample_file:{}".format(os.path.join(sample_path, "sample_{}.data".format(dt))))
Пример #36
0
class BotEngine:
    def __init__(self,driver):
        self.driver = driver
        
    recentSignals={'forexsignalzz':0,'amirFX_signal':0,'FOR3X_SIGNAL':0,'AmirFx VIP signal':0,'Eagl777':0 , 'WallstreetFXsignals':0, 'wolfofforexplus':0}
    signalVendors={'a':  1}
    utils = Utils() 
    fileUtil = FileUtil()
    
    #{'Forex signals': 'GforexSignalsIr'}
    
    def setListOfVendors(self):
        self.signalVendors.update({'amirFX_signal': GforexSignalsIr(self.driver)})
        self.signalVendors['FOR3X_SIGNAL'] = FOR3X_SIGNAL(self.driver)
        self.signalVendors['AmirFx VIP signal'] = GforexSignalsIr(self.driver)
        self.signalVendors['signalTest']= Eagl777(self.driver)
        self.signalVendors['Eagl777']= Eagl777(self.driver)
        self.signalVendors['WallstreetFXsignals']= WallstreetFXsignals(self.driver)
        self.signalVendors['wolfofforexplus']= wolfofforexplus(self.driver)
        self.signalVendors['forexsignalzz']=forexsignalzz(self.driver)
    
    def getNewMessage(self):
        coutner=1;
        while(coutner >0):
            try:
                sleep(1)

                for key in self.recentSignals:
                    try:
                        newMessages=self.find_last_update_time(key) #return last two messages webElement-time
                        
                        print('before getting time')
                        if newMessages == None or newMessages[0] == self.recentSignals[key]  :
                            print('repeated signal for '+key+' provider')
                            continue
                        else:
                            print('preparing new signal started in signalFinder!')
                            provider=self.signalVendors[key]
                            self.recentSignals[key] = newMessages[0]
    
                            sleep(2)
                            signalText=provider.get_message(key) 
                            if signalText != None :  
                                signalObjs= provider.createSignalDto(signalText,key) 
                                
                                if(signalObjs[0].enterPrice !=0):
                                    for signal in signalObjs.values(): 
                                        if(signal !=0):
                                            signal.vol = 0.01
                                            self.fileUtil.writeOnFile("s",signal)
                                        sleep(10)
                                else: 
                                    print('why here!!????')
                                    self.recentSignals[key]=0
                    except: # INNER TRY
                        print('in INNER except signalFinder: ')
                        self.recentSignals[key]=0
                        print(sys.exc_info()[0])
                        continue
                                   
            except : # outer try
                print('in OUTER except signalFinder: ')
                self.recentSignals[key]=0
                print(sys.exc_info()[0])
                continue
                    

    def find_last_update_time(self, chName):
        print('start finding last update time')
        c1=5
        while c1>0: 
            try: 
                elem= self.driver.find_element_by_xpath("//input[contains(@class,'im_dialogs_search_field')]")
                sleep(2)
                elem.clear()
                elem.send_keys(chName)
                sleep(1)
            except :
                sleep(2)
                c1+=1
                
        c2=5
        while c2>0: 
            try:      
                self.driver.find_elements_by_xpath("//div[@class='im_dialogs_col']//li[contains(@class,'im_dialog_wrap')]/a")[0].click()
                sleep(2)
            except :
                sleep(2)
                c2+=1
        c3=5      
        while c3>0:
            try:
                firstLastMessageTime = self.driver.find_elements_by_xpath("//div[contains(@class,'im_history_messages_peer')]//div[contains(@class,'im_history_message_wrap')]//span[@class='im_message_date_text nocopy']")[-1].get_attribute('data-content')
                sleep(2)
            except:
                sleep(2)
                c3+=1
                
                
        try:
            secondLastMessageTime = self.driver.find_elements_by_xpath("//div[contains(@class,'im_history_messages_peer')]//div[contains(@class,'im_history_message_wrap')]//span[@class='im_message_date_text nocopy']")[-2].get_attribute('data-content')
        except :
            secondLastMessageTime=""
            print('no second message')
        
        # providerCH=self.driver.find_elements_by_xpath("//span/ancestor::a[@class='im_dialog']")[0]
        # sleep(2)
        # last_time=providerCH.find_element_by_xpath("//div[@class='im_dialog_date']").text     #self.driver.find_elements_by_xpath("//span/ancestor::a[@class='im_dialog']//div[@class='im_dialog_date']")[0].text
        sleep(2)
        print('end of finding last update time')
        return [firstLastMessageTime ,secondLastMessageTime]





    
    
    
    
    
    
    
        
Пример #37
0

def getLocation(phoneNumber):
    htmlGraber = HtmlGraber()
    url = serviceUrl + "?m=" + phoneNumber
    content = htmlGraber.doGrab(url)
    content = content.replace("<br/><br/>", "||")
    items = content.split("||")
    #    print items[1];
    return items[1]


if __name__ == "__main__":
    pass

fileUtil = FileUtil()
content = fileUtil.openFile(phoneSrc)
# print content;
content = content.replace("\r", " ").replace("\t", " ").replace("\n", " ")
# print content;
items = content.split(" ")

items = [item for item in items if item != ""]

output = ""
index = 1
for i in range(len(items)):
    if i % 2 == 1:
        print index
        index += 1
        location = getLocation(items[i])