def getLSM9DS0Reading(): try: # grab data from sensor gyro, mag, accel = imu.read() sensorValues = [gyro, mag, accel] print sensorValues gyro_x, gyro_y, gyro_z = gyro mag_x, mag_y, mag_z = mag accel_x, accel_y, accel_z = accel sensorValue = "%s; %s; %s; %s; %s; %s; %s; %s; %s" % ( gyro_x, gyro_y, gyro_z, mag_x, mag_y, mag_z, accel_x, accel_y, accel_z) logger.log("Gyro: %s %s %s, Mag: %s %s %s, Accel: %s %s %s\n" % (gyro_x, gyro_y, gyro_z, mag_x, mag_y, mag_z, accel_x, accel_y, accel_z)) print sensorValue FileUtil.saveToNewFile(lsm9ds0_reportDir, lsm9ds0_name, sensorValue) except Exception as e: logger.log("ERROR") logger.log(e) print e
def saveResult(rfidData, dictValue): global oldRfidCode logger.log("Card: " + rfidData + " mapped to: " + dictValue) if (oldRfidCode != dictValue): FileUtil.saveToNewFile(reportDir, name, dictValue) oldRfidCode = dictValue else: logger.log("Duplicate read")
def __init__(self): self.filename = None self.worksheetName = None self._df = None self._fu = FileUtil() # make the df display look better: https://stackoverflow.com/questions/11707586/how-do-i-expand-the-output-display-to-see-more-columns-of-a-pandas-dataframe pd.set_option('display.max_rows', 100) pd.set_option('display.max_columns', 50) pd.set_option('display.width', 800)
def get_worksheets(self, excelFileName=None): if excelFileName: self.filename = excelFileName fu = FileUtil() if fu.file_exists(self.filename): xl = pd.ExcelFile(self.filename) return xl.sheet_names else: logger.error(f'Cannot find Excel file {self.filename}.') return None
def test_is_windows(self): with mock.patch('platform.system') as mocked_platform: mocked_platform.return_value = 'Linux' mocked_fu = FileUtil() test1 = mocked_fu.is_Windows self.assertFalse(test1) with mock.patch('platform.system') as mocked_platform: mocked_platform.return_value = 'Windows' mocked_fu = FileUtil() self.assertTrue(mocked_fu.is_Windows)
def measureTmeperature(): temperature = sensor.get_temperature() temperature = format(temperature, '.2f') #print("The temperature is %s celsius" % temperature) try: FileUtil.saveToNewFile(reportDir, name, temperature) logger.log(temperature) except Exception as e: logger.log("Error:") logger.log(e) print e
def measureLight(): try: lightLevel = readLight() intLightLevel = int(lightLevel) FileUtil.saveToNewFile(reportDir, name, intLightLevel) sensorValueForLogger = ": %s lux" % (lightLevel) logger.log(sensorValueForLogger) except Exception as e: print e logger.log("Error:") logger.log(e)
def __init__(self, *args, **kwargs): super(Test_FileUtil, self).__init__(*args, **kwargs) self.path = r'c:\temp' if platform.system() == 'Windows' else r'/tmp' self._fu = FileUtil() self._du = DateUtil() self.features_dict = { 'book': "Hitchhiker's Guide", 'characters': { 'answer': 42, 'name': 'Dent. Arthur Dent.' } }
def executing_directory(self) -> str: """ Get the current executing directory using executing_file and stripping off the filename. Note differences between Windows and Linux. :return: """ fu = FileUtil() path, _ = fu.split_qualified_path(self.executing_file()) logger.debug(f'executing file is {self.executing_file()}') logger.debug(f'path (minus filename) is {path}') return path
def test_write_excel(self): pu = PandasUtil() fu = FileUtil() df = self.my_test_df() fn = self.fu.qualified_path(self.path, self.spreadsheet_name) yaml_dict = {'outputFile': fn, 'worksheet': self.worksheet_name} fu.dump_yaml(Test_ApplicationUtil.excel_qual_path, yaml_dict) app = MyApplication(Test_ApplicationUtil.excel_qual_path) app.write_excel(df=df, output_file_yaml_entry='outputFile', worksheet=self.worksheet_name) actual = pu.read_df_from_excel(excelFileName=fn, excelWorksheet=self.worksheet_name) assert_frame_equal(df, actual)
def fetch_report_single(self, any_pageID, any_guideline): self.wd.get(self.rep_index_url_base + self.projectID + "/") DateUtil.app_sleep(self.shortWait) # 処理対象PIDデータの処理 qy_page_rows = [] new_page_rows = {} page_rows = self.get_page_list_data() if any_pageID == "": new_page_rows = page_rows else: # ループ用PIDマップの生成 if TextUtil.is_csv(any_pageID) is True: tmp_arr = any_pageID.split(",") for r in tmp_arr: qy_page_rows.append(r) else: qy_page_rows.append(any_pageID) for tmp_pid in qy_page_rows: for key, value in page_rows.items(): if tmp_pid == key: new_page_rows[key] = value if len(new_page_rows) < 1: print("-p オプションで指定したPIDが存在しません。処理を停止します。") else: pass # 処理対象ガイドラインデータの処理 guideline_rows = [] if any_guideline == "": guideline_rows = FileUtil.open_text_data(self.guideline_file_name) else: if TextUtil.is_csv(any_guideline) is True: tmp_arr = any_guideline.split(",") for r in tmp_arr: guideline_rows.append(r) else: guideline_rows.append(any_guideline) # header self.rep_data.extend(TextUtil.get_header()) # guidelineのループ for guideline in guideline_rows: guideline_disp = guideline if TextUtil.is_jis2016_lower(guideline) is False: guideline = "7." + guideline else: pass # pageのループ for key, value in new_page_rows.items(): pageID = key pageURL = value print(pageID + ". " + guideline_disp + " を処理しています。(" + DateUtil.get_logtime() + ")") path = self.fetch_report_detail_path(pageID, guideline) self.wd.get(path) DateUtil.app_sleep(self.shortWait) self.rep_data.extend( self.get_detail_table_data(pageID, pageURL, guideline))
class InfoCollect(): def __init__(self, filePath, siteName): self.filePath = filePath self.siteName = siteName self.fileUtil = FileUtil() def getInfo(self, strCommand): if strCommand == 'whois': command = 'whois ' + self.siteName elif strCommand == 'whatweb': command = 'whatweb ' + self.siteName elif strCommand == 'dig': command = 'dig @114.114.114.114 ' + self.siteName + ' any' (status, results) = commands.getstatusoutput(command) self.fileUtil.writeFile("Status:" + str(status) + "\n\n" + results, self.filePath + strCommand + '.txt', 'w') print strCommand + self.siteName + '收集完毕!'
def fetch_report_sequential(self): # header self.rep_data.extend(TextUtil.get_header()) self.wd.get(self.rep_index_url_base + self.projectID + "/") DateUtil.app_sleep(self.shortWait) guideline_rows = FileUtil.open_text_data(self.guideline_file_name) page_rows = self.get_page_list_data() # guidelineのループ for guideline in guideline_rows: guideline_disp = guideline if TextUtil.is_jis2016_lower(guideline) is False: guideline = "7." + guideline else: pass # pageのループ for key, value in page_rows.items(): pageID = key pageURL = value print(pageID + ". " + guideline_disp + " を処理しています。(" + DateUtil.get_logtime() + ")") path = self.fetch_report_detail_path(pageID, guideline) self.wd.get(path) DateUtil.app_sleep(self.shortWait) self.rep_data.extend( self.get_detail_table_data(pageID, pageURL, guideline))
def test_fully_qualified_path(self): # Test 1, Windows (should be unchanged) path1 = r'c:\temp\subdir\subsubdir' with mock.patch('platform.system') as mocked_platform: mocked_platform.return_value = 'Windows' mocked_fu = FileUtil() exp1 = path1 + mocked_fu.separator + self.fn self.assertEqual( exp1, mocked_fu.fully_qualified_path(dirPath=path1, filename=self.fn), 'Test 1 fail') # Test 2, Linux without the leading / test2 = r'dir/to/path' # Test 3, Linux with the leading / (should be unchanged) with mock.patch('platform.system') as mocked_platform: mocked_platform.return_value = 'Linux' mocked_fu = FileUtil() exp2 = mocked_fu.separator + test2 + mocked_fu.separator + self.fn self.assertEqual( exp2, mocked_fu.fully_qualified_path(dirPath=test2, filename=self.fn, dir_path_is_array=False), "Test 2 fail") test3 = mocked_fu.separator + test2 exp3 = test3 + mocked_fu.separator + self.fn self.assertEqual( exp3, mocked_fu.fully_qualified_path(dirPath=test3, filename=self.fn, dir_path_is_array=False), "Test 3 fail")
def do_reset_guideline(): # 設定ファイルの読み込み user_data = FileUtil.getUserProperties("user.yaml") gLevel = user_data[7] # 書き出し内容を配列で制御 gA = ["1.1.1","1.2.1","1.2.2","1.2.3","1.3.1","1.3.2","1.3.3","1.4.1","1.4.2","2.1.1","2.1.2","2.2.1","2.2.2","2.3.1","2.4.1","2.4.2","2.4.3","2.4.4","3.1.1","3.2.1","3.2.2","3.3.1","3.3.2","4.1.1","4.1.2"] gAA = ["1.2.4","1.2.5","1.4.3","1.4.4","1.4.5","2.4.5","2.4.6","2.4.7","3.1.2","3.2.3","3.2.4","3.3.3","3.3.4"] gAAA = ["1.2.6","1.2.7","1.2.8","1.2.9","1.4.6","1.4.7","1.4.8","1.4.9","2.1.3","2.2.3","2.2.4","2.2.5","2.3.2","2.4.8","2.4.9","2.4.10","3.1.3","3.1.4","3.1.5","3.1.6","3.2.5","3.3.5","3.3.6"] guideline_names = [] if gLevel == "A": guideline_names = gA elif gLevel == "AA": guideline_names = gA + gAA else: guideline_names = gA + gAA + gAAA # テキストデータ書き込み FileUtil.write_text_data(guideline_names, "guideline_datas.txt")
def pkm2png(path): files = FileUtil.getAlllFilesPathOfCurrentDirectory(path) for f in files: if f.endswith(suffix_pkm): #print(f) pkm = f png = pkm.replace(suffix_pkm, suffix_png) #print(png) plist = pkm.replace(suffix_pkm, suffix_plist) #print(plist) #etc1tool.exe %%x --decode -o %%x.png command = '%s %s --decode -o %s' % (exe, pkm, png) print(command) os.system(command) pass
def do_report(projectID, any_pageID, any_guideline): # 設定データロード user_data = FileUtil.getUserProperties("user.yaml") uid = user_data[0] pswd = user_data[1] systemWait = user_data[2] longWait = user_data[3] midWait = user_data[4] shortWait = user_data[5] driver_type = user_data[6] appWait = [systemWait, longWait, midWait, shortWait] # LibraDriverインスタンスの生成 lrp = LibraDriver(uid, pswd, projectID, appWait, driver_type) # ログイン lrp.login() DateUtil.app_sleep(shortWait) # レポートインデックスページ lrp.browse_repo() DateUtil.app_sleep(shortWait) # 条件分岐 if any_pageID == "" and any_guideline == "": lrp.fetch_report_sequential() else: lrp.fetch_report_single(any_pageID, any_guideline) # ログアウト lrp.logout() DateUtil.app_sleep(shortWait) lrp.shutdown() rep_data = lrp.getRepData() print("Excel書き出し処理に移ります。(" + DateUtil.get_logtime() + ")") ExcelUtil.save_xlsx(rep_data) print("Excel書き出し処理が完了しました。(" + DateUtil.get_logtime() + ")")
def test_qualified_path(self): # Test 1. Normal case. expected = self.path + sep + self.fn actual = self._fu.qualified_path(self.path, self.fn) self.assertEqual(actual, expected, "Test 1 fail") # Test 2. Using an array and a Linux mock. with mock.patch('platform.system') as mocked_platform: mocked_platform.return_value = 'Windows' mocked_fu = FileUtil() dir_to_path = mocked_fu.separator.join( ['C:', 'dir', 'to', 'path']) # should be C:\dir\to\path for Windows pathArray = dir_to_path.split(mocked_fu.separator) expected = dir_to_path + mocked_fu.separator + self.fn self.assertEqual( expected, mocked_fu.fully_qualified_path(pathArray, self.fn, dir_path_is_array=True), "Test 2 fail") # Test 3, using a windows path with a drive exp3 = r'c:\temp\subdir\subsubdir' exp3_array = exp3.split(_BACKSLASH) test3_with_fn = deepcopy(exp3_array) test3_with_fn.append(self.fn) test3 = _BACKSLASH.join(test3_with_fn) with mock.patch('platform.system') as mocked_platform: mocked_platform.return_value = 'Windows' mocked_fu = FileUtil() actual = mocked_fu.qualified_path(dirPath=exp3_array, filename=self.fn, dir_path_is_array=True) self.assertEqual(test3, actual, "Test 3 fail")
userName=value; if op == "-l": savePath=value; if op == "-p": password=value; if op == "-c": courseUrl=value; makeDirWhenNeeded(savePath); doLogin(userName,password); content=doPost(courseUrl); #print content; answerPageUrls=getSubbmittedUrls(content); fileUtil=FileUtil(); count=0; for url,name in answerPageUrls: answerPage=doPost(urlRoot+url); fileUrl=getAnswerFileUrl(answerPage) # print fileUrl; saveName=name+"."+fileUrl.split('.')[1]; # saveName=str(count)+"."+fileUrl.split('.')[1]; count+=1; saveName=unicode(saveName,"utf-8"); fileUrl=urlHomeRoot+fileUrl; # print fileUrl; fileContent=HtmlGraber().doGrab(fileUrl); filePath=savePath+saveName; fileUtil.binaryWrite(filePath, fileContent); print saveName+" saved\n";
# coding=utf-8 ''' Created on 2017年12月18日 @author: Administrator ''' from FileUtil import FileUtil if __name__ == '__main__': path = unicode("F:\部门对接\编辑所需文件20170930", "utf-8") fileutils = FileUtil() dic = fileutils.readFile(path) for index in dic: print index #print dic;
def tearDownClass(cls) -> None: fu = FileUtil() path = r'c:\temp' if platform.system() == 'Windows' else r'/tmp' fu.delete_file(fu.qualified_path(path, cls.yaml)) fu.delete_file(fu.qualified_path(path, cls.fn)) fu.delete_file(fu.qualified_path(path, cls.text_fn))
print "主域名:" + domainName siteName = domainName.split('.')[0] #获取当前时间 currentTime = time.strftime('%Y%m%d_%H%M%S',time.localtime(int(time.time()))) print "当前时间:" + currentTime #创建結果文件夾及文件 dnsResultFilePath = resultFilePath + siteName + '/domain/' nmapResultFilePath = resultFilePath + siteName + '/nmap/' infoResultFilePath = resultFilePath + siteName + '/info/' nmapResultFile = nmapResultFilePath + currentTime + '.txt' #创建扫描结果存放路径 pathUtil = PathUtil() pathUtil.createFilePath(dnsResultFilePath, nmapResultFilePath, infoResultFilePath) #收集信息 infoCollect = InfoCollect(infoResultFilePath, domainName) infoCollect.getInfo('whois') infoCollect.getInfo('whatweb') infoCollect.getInfo('dig') #读取dns字典 fileUtil = FileUtil() domainList = fileUtil.getdomainNameList(domainName, domainListDic) #获取dnf密码字典中的域名的IP地址并分类 finalIPList = getDNSIP(domainList, dnsResultFilePath, currentTime) #获取端口列表 portList = getPortList(portFilePath) #创建扫描队列 queueUtil = QueueUtil() queueUtil.createQueue(finalIPList, portList, nmapResultFile, queueNumber)
class Test_FileUtil(TestCase): path_no_drive = 'temp' fn = 'test.csv' yaml = 'example.yaml' text_fn = 'test.txt' def __init__(self, *args, **kwargs): super(Test_FileUtil, self).__init__(*args, **kwargs) self.path = r'c:\temp' if platform.system() == 'Windows' else r'/tmp' self._fu = FileUtil() self._du = DateUtil() self.features_dict = { 'book': "Hitchhiker's Guide", 'characters': { 'answer': 42, 'name': 'Dent. Arthur Dent.' } } @classmethod def tearDownClass(cls) -> None: fu = FileUtil() path = r'c:\temp' if platform.system() == 'Windows' else r'/tmp' fu.delete_file(fu.qualified_path(path, cls.yaml)) fu.delete_file(fu.qualified_path(path, cls.fn)) fu.delete_file(fu.qualified_path(path, cls.text_fn)) @property def path(self): return self._path @path.setter def path(self, p): self._path = p def create_csv(self): lines = [ ',col1,col2', '0,1,3', '1,2,4', ] filename = self._fu.qualified_path(self.path, self.fn) self._fu.write_text_file(filename, lines) logger.debug(f'create_csv to {self.path}{sep}{self.fn}.') def create_yaml(self, keys: list, vals: list): writeMe = [] for i in range(len(keys)): writeMe.append(f'{keys[i]}: {vals[i]}') qualifiedPath = self._fu.qualified_path(self.path, self.yaml) self._fu.write_text_file(filename=qualifiedPath, lines=writeMe) def generate_text_lines(self, how_many: int = 10, width: int = None) -> List[str]: if width: ans = [ '{0:*^{width}}'.format(i, width=width) for i in range(how_many) ] return ans return [f'Line {i}' for i in range(how_many)] def create_text_file(self, filename: str, how_many: int = 10, width: int = None): lines = self.generate_text_lines(how_many, width) self._fu.write_text_file(filename, lines) @logit() def test_is_windows(self): with mock.patch('platform.system') as mocked_platform: mocked_platform.return_value = 'Linux' mocked_fu = FileUtil() test1 = mocked_fu.is_Windows self.assertFalse(test1) with mock.patch('platform.system') as mocked_platform: mocked_platform.return_value = 'Windows' mocked_fu = FileUtil() self.assertTrue(mocked_fu.is_Windows) @logit() def test_dump_yaml(self): yaml_fn = self._fu.qualified_path(self.path, self.yaml) self._fu.dump_yaml(yaml_fn, self.features_dict) self.assertTrue(self._fu.file_exists(yaml_fn)) actual = self._fu.read_yaml(yaml_fn) self.assertDictEqual(self.features_dict, actual) @logit() def test_current_directory(self): logger.debug( f'current working dir is really {self._fu.current_directory()}') my_mock_dir = r'\synthesys\testing' with mock.patch('FileUtil.getcwd', return_value=my_mock_dir): actual = self._fu.current_directory() self.assertEqual(actual, my_mock_dir) def test_read_text_file(self): filename = self._fu.qualified_path(self.path, self.text_fn) how_many_lines = randrange(10) + 2 self.create_text_file(filename, how_many_lines) expected = self.generate_text_lines(how_many_lines) actual = [x.rstrip() for x in self._fu.read_text_file(filename) ] # must remove newline chars self.assertListEqual(expected, actual) @logit() def test_read_text_file_err(self): # test an IO error filename = self._fu.qualified_path(self.path, self.text_fn) with mock.patch('FileUtil.open', create=True) as mocked_open: mocked_open.side_effect = IOError() self._fu.read_text_file(filename) @logit() def test_read_yaml(self): keys = ['firstname', 'lastname', 'zip'] vals = ['Rajah', 'Chacko', 28269] self.create_yaml(keys, vals) qualifiedPath = self._fu.qualified_path(self.path, self.yaml) d = self._fu.read_yaml(yamlFile=qualifiedPath) logger.debug(f'Contents of yaml: {d}') self.assertEqual(list(d.keys()), keys) self.assertEqual(vals[0], d[keys[0]]) @logit() @mock.patch('FileUtil.safe_load') def test_read_yaml_err(self, mock_obj): yaml_fn = self._fu.qualified_path(self.path, self.yaml) self.create_text_file(yaml_fn) mock_obj.side_effect = YAMLError('mock error') actual = self._fu.read_yaml(yamlFile=yaml_fn) self.assertIsNone(actual) @logit() def test_qualified_path(self): # Test 1. Normal case. expected = self.path + sep + self.fn actual = self._fu.qualified_path(self.path, self.fn) self.assertEqual(actual, expected, "Test 1 fail") # Test 2. Using an array and a Linux mock. with mock.patch('platform.system') as mocked_platform: mocked_platform.return_value = 'Windows' mocked_fu = FileUtil() dir_to_path = mocked_fu.separator.join( ['C:', 'dir', 'to', 'path']) # should be C:\dir\to\path for Windows pathArray = dir_to_path.split(mocked_fu.separator) expected = dir_to_path + mocked_fu.separator + self.fn self.assertEqual( expected, mocked_fu.fully_qualified_path(pathArray, self.fn, dir_path_is_array=True), "Test 2 fail") # Test 3, using a windows path with a drive exp3 = r'c:\temp\subdir\subsubdir' exp3_array = exp3.split(_BACKSLASH) test3_with_fn = deepcopy(exp3_array) test3_with_fn.append(self.fn) test3 = _BACKSLASH.join(test3_with_fn) with mock.patch('platform.system') as mocked_platform: mocked_platform.return_value = 'Windows' mocked_fu = FileUtil() actual = mocked_fu.qualified_path(dirPath=exp3_array, filename=self.fn, dir_path_is_array=True) self.assertEqual(test3, actual, "Test 3 fail") @logit() def test_fully_qualified_path(self): # Test 1, Windows (should be unchanged) path1 = r'c:\temp\subdir\subsubdir' with mock.patch('platform.system') as mocked_platform: mocked_platform.return_value = 'Windows' mocked_fu = FileUtil() exp1 = path1 + mocked_fu.separator + self.fn self.assertEqual( exp1, mocked_fu.fully_qualified_path(dirPath=path1, filename=self.fn), 'Test 1 fail') # Test 2, Linux without the leading / test2 = r'dir/to/path' # Test 3, Linux with the leading / (should be unchanged) with mock.patch('platform.system') as mocked_platform: mocked_platform.return_value = 'Linux' mocked_fu = FileUtil() exp2 = mocked_fu.separator + test2 + mocked_fu.separator + self.fn self.assertEqual( exp2, mocked_fu.fully_qualified_path(dirPath=test2, filename=self.fn, dir_path_is_array=False), "Test 2 fail") test3 = mocked_fu.separator + test2 exp3 = test3 + mocked_fu.separator + self.fn self.assertEqual( exp3, mocked_fu.fully_qualified_path(dirPath=test3, filename=self.fn, dir_path_is_array=False), "Test 3 fail") @logit() def test_split_qualified_path(self): fn = 'test.txt' qpath = self._fu.qualified_path(self.path, fn) # Test 1. c:\temp for Windows or /tmp for Linux. which_test = 1 splitpath, splitfn = self._fu.split_qualified_path(qpath, makeArray=False) self.assertEqual(splitpath, self.path, f'Test {which_test}. Paths should be equal.') self.assertEqual(splitfn, fn, f'Test {which_test}. File names should be equal.') # Test 2. Split paths into arrays. which_test = 2 pathArray, splitfn = self._fu.split_qualified_path(qpath, makeArray=True) expected = self.path.split(sep) self.assertEqual(pathArray, expected, f'Test {which_test}. Paths should be equal.') self.assertEqual(splitfn, fn, f'Test {which_test}. File names should be equal.') # Test 3. Try a more complex path. which_test = 3 complex_path = r'C:\Users\Owners\Documents\Tickers.csv' if platform.system( ) == 'Windows' else r'/tmp/parent/child/Tickers.csv' pathArray, splitfn = self._fu.split_qualified_path(complex_path, makeArray=True) expected = complex_path.split(sep) expected.pop() # Pop off the last el, which is the file name. self.assertEqual(pathArray, expected, f'Test {which_test}. Paths should be equal.') self.assertEqual(splitfn, 'Tickers.csv', f'Test {which_test}. File names should be equal.') @logit() def test_split_file_name(self): expected_file = "file" expected_ext = ".ext" expected_fn = expected_file + expected_ext # First test with just file.ext actual_file, actual_ext = self._fu.split_file_name(expected_fn) self.assertEqual(actual_file, expected_file) self.assertEqual(actual_ext, expected_ext) # Another test with path/file.ext qpath = self._fu.qualified_path(self.path, expected_fn) actual_file, actual_ext = self._fu.split_file_name(qpath) self.assertEqual(actual_file, expected_file) self.assertEqual(actual_ext, expected_ext) @logit() def test_file_exists(self): self.create_csv() qualifiedPath = self._fu.qualified_path(self.path, self.fn) self.assertTrue(self._fu.file_exists(qualifiedPath)) qualifiedPath = self._fu.qualified_path(self.path, 'noSuchFile.xxd') self.assertFalse(self._fu.file_exists(qualifiedPath)) @logit() def test_ensure_dir(self): self._fu.ensure_dir(self.path) self.assertTrue(self._fu.dir_exists(self.path)) @logit() def test_delete_file(self): self.create_csv() qualifiedPath = self._fu.qualified_path(self.path, self.fn) # delete_file should return True the first time self.assertTrue(self._fu.delete_file(qualifiedPath)) # but return false the second time. self.assertFalse(self._fu.delete_file(qualifiedPath)) @logit() @mock.patch('FileUtil.remove') def test_delete_file_err(self, mock_obj): self.create_csv() expected_log_message = 'delete_file mocktest' mock_obj.side_effect = OSError(expected_log_message) qualifiedPath = self._fu.qualified_path(self.path, self.fn) with self.assertLogs(FileUtil.__name__, level='DEBUG') as cm: ans = self._fu.delete_file(qualifiedPath) self.assertFalse(ans) self.assertTrue( next((True for line in cm.output if expected_log_message in line), False)) @logit() def test_copy_file(self): self.create_csv() copy_fn = self.fn + '.copy' copied_file = self._fu.qualified_path(self.path, copy_fn) source_path = self._fu.qualified_path(self.path, self.fn) self._fu.copy_file(source_path, copied_file) self.assertTrue(self._fu.file_exists(source_path)) self.assertTrue(self._fu.file_exists(copied_file)) self._fu.delete_file(copied_file) @logit() @mock.patch('FileUtil.copy2') def test_copy_file_err(self, mock_obj): tmp_path = self._fu.qualified_path(self.path, 'tmp') qualifiedPath = self._fu.qualified_path(self.path, self.fn) expected_log_message = 'copy_file mocktest' mock_obj.side_effect = IOError(expected_log_message) with self.assertLogs(FileUtil.__name__, level='DEBUG') as cm: _ = self._fu.copy_file(qualifiedPath, tmp_path) self.assertTrue( next((True for line in cm.output if expected_log_message in line), False)) @logit() def test_getList(self): dir_name = r'c:\temp' flist = self._fu.getList(dir_name) logger.debug(f'All list is: {flist}') def isFile_side_effect(*args, **kwargs) -> bool: """ Side effect for mocking test_get_files. Returns True if there is a .txt in the filename. Not great, but ok for mocking. :param args: :param kwargs: :return: """ return mock_is_file(args[1]) def isDir_side_effect(*args) -> bool: return mock_is_dir(args[1]) @logit() @mock.patch('FileUtil.isfile') @mock.patch('FileUtil.listdir') def test_get_files(self, mock_listdir, mock_isfile): dir_name = r'\nosuchdir' file_list = ['filea.txt', 'fileb.txt', 'filec.txt', 'somedir'] mock_listdir.return_value = file_list mock_isfile.side_effect = self.isFile_side_effect actual = self._fu.get_files(dir_name) expected = [f for f in file_list if mock_is_file(f) ] # Condition must match isFile_side_effect self.assertListEqual(expected, actual) @logit() @mock.patch('FileUtil.isdir') @mock.patch('FileUtil.listdir') def test_get_dirs(self, mock_listdir, mock_isdir): dir_name = r'\nosuchdir' file_list = ['filea.txt', 'fileb.txt', 'filec.txt', 'somedir'] mock_listdir.return_value = file_list mock_isdir.side_effect = self.isDir_side_effect actual = self._fu.get_dirs(dir_name) expected = [f for f in file_list if mock_is_dir(f) ] # Condition must match isDir_side_effect self.assertListEqual(expected, actual) @logit() def test_getRecursiveList(self): dir_name = r'\nosuchdir' file_list = ['filea.txt', 'fileb.txt', 'filec.txt'] actual = self._fu.getRecursiveList(dir_name) self.assertListEqual(actual, []) # Since no such dir, should be empty list eu = ExecUtil() exec_file = eu.exec_file_path() dir_name, _ = self._fu.split_qualified_path(exec_file) logger.debug(f'dir name is: {dir_name}') with mock.patch('FileUtil.listdir', return_value=file_list): actual = self._fu.getRecursiveList(dir_name) expected = [ self._fu.fully_qualified_path(dirPath=dir_name, filename=f) for f in file_list ] self.assertListEqual(expected, actual) @logit() def test_load_logs_and_subdir_names(self): no_such_dir_name = r'\nosuchdir' file_list = ['filea.txt', 'fileb.csv', 'otherfile.txt'] actual = self._fu.load_logs_and_subdir_names(no_such_dir_name) self.assertListEqual(actual, []) # Since no such dir, should be empty list eu = ExecUtil() dir_name = eu.executing_directory() # ensures that dir_name is real with mock.patch('FileUtil.listdir', return_value=file_list): # Test with neither prefix nor suffix actual = self._fu.load_logs_and_subdir_names(dir_name) expected = [ self._fu.fully_qualified_path(dirPath=dir_name, filename=f) for f in file_list ] self.assertListEqual(expected, actual) # Test for suffixes ending in .txt suffix = '.txt' actual = self._fu.load_logs_and_subdir_names(dir_name, requiredSuffix=suffix) txt_only = [ self._fu.fully_qualified_path(dirPath=dir_name, filename=f) for f in file_list if f.endswith(suffix) ] self.assertListEqual(txt_only, actual) # Test for prefixes starting with 'file' prefix = 'file' actual = self._fu.load_logs_and_subdir_names(dir_name, requiredPrefix=prefix) file_only = [ self._fu.fully_qualified_path(dirPath=dir_name, filename=f) for f in file_list if f.startswith(prefix) ] self.assertListEqual(file_only, actual) @logit() @mock.patch('FileUtil.isfile') @mock.patch('FileUtil.listdir') def test_cull_existing_files(self, mock_listdir, mock_isfile): dir_name = r'\nosuchdir' file_list = ['filea.txt', 'fileb.txt', 'filec.txt', 'somedir'] mock_listdir.return_value = file_list mock_isfile.side_effect = self.isFile_side_effect qualified_file_list = [ self._fu.qualified_path(dirPath=dir_name, filename=f) for f in file_list ] actual = self._fu.cull_existing_files(qualified_file_list) expected = [f for f in qualified_file_list if mock_is_file(f) ] # Condition must match isFile_side_effect self.assertListEqual(expected, actual) @logit() def test_read_generator(self): filename = self._fu.qualified_path(self.path, self.text_fn) how_many_lines = 5 self.create_text_file(filename, how_many_lines) lines_read_in = 0 for i, line in enumerate(self._fu.read_generator(filename)): logger.debug(f'Read in line {i}, which contains <{line}>.') lines_read_in += 1 self.assertEqual(how_many_lines, lines_read_in) @logit() @mock.patch('FileUtil.open') def test_read_generator_err(self, mock_open): expected_log_message = 'mocked error' mock_open.side_effect = IOError(expected_log_message) filename = self._fu.qualified_path(self.path, self.text_fn) with self.assertLogs(FileUtil.__name__, level='DEBUG') as cm: for i, line in enumerate(self._fu.read_generator(filename)): x = line logger.debug(f'Read in line {i}, which contains <{x}>.') self.assertIsNone(x) logger.debug(f'Caught exception message: {cm.output}') self.assertTrue( next((True for line in cm.output if expected_log_message in line), False)) @logit() def test_file_modify_time(self): start_time = self._du.as_timestamp() keys = [ 'greeting', 'farewell', ] vals = [ 'Hello', 'Goodbye', ] self.create_yaml(keys, vals) qualifiedPath = self._fu.qualified_path(self.path, self.yaml) mod_time = self._fu.file_modify_time(qualifiedPath) mod_timestamp = self._du.as_timestamp(dt=mod_time) logger.debug( f'mod_time is {mod_timestamp}. start_time is {start_time}.') self.assertTrue((start_time - mod_timestamp) < .1) # asserting a difference of < 0.1 seconds. @logit() def test_file_modify_time2(self): start_time = self._du.as_timestamp() keys = [ 'greeting', 'farewell', ] vals = [ 'Hello', 'Goodbye', ] self.create_yaml(keys, vals) qualifiedPath = self._fu.qualified_path(self.path, self.yaml) mod_time = self._fu.file_modify_time2(qualifiedPath) mod_timestamp = self._du.as_timestamp(dt=mod_time) self.assertTrue((start_time - mod_timestamp) < .1) # asserting a difference of < 0.1 seconds. @logit() def test_file_size(self): filename = self._fu.qualified_path(self.path, self.text_fn) width = 20 how_many_lines = randrange(10) + 2 self.create_text_file(filename, how_many_lines, width) eol_len = 2 actual = self._fu.file_size(filename) self.assertEqual((width + eol_len) * how_many_lines, actual) @logit() def test_list_modules(self): mods = [] for mod_name in self._fu.list_module_contents(module_name='itertools'): mods.append(mod_name) self.assertTrue('__docs__' in mods) @logit() def test_list_modules(self): doc = self._fu.list_module_attributes('itertools', True) logger.debug('{}'.format(doc)) mods = [] for mod_name in self._fu.list_modules(module_name='itertools'): mods.append(mod_name) self.assertTrue('__doc__' in mods) self.assertTrue('__name__' in mods)
@author: sniperwang ''' import re from BeautifulSoup import BeautifulSoup from HtmlGraber import HtmlGraber from FileUtil import FileUtil; if __name__ == '__main__': pass #url="http://share.renren.com/share/249317678/14623723075?from=0101010302&ref=hotnewsfeed&sfet=104&fin=36&fid=20148636643&ff_id=249317678"; PicUrlHead="http://share.renren.com/share/249317678/14623723075/?photoId="; htmlGraber=HtmlGraber(); maxCount=50; firstIndex=249317678-50; fileUtil=FileUtil(); homeSavePath="E:\\temp\\" def grabImageUrl(picUrl): patt=re.compile(r'http://fmn.rrimg.com/.*'); htmlContent=htmlGraber.doGrab(picUrl); # print htmlContent; soup=BeautifulSoup(htmlContent); imgurls=soup.findAll('img',id="photo");#re.compile(patt)); #print str(imgurl[0].src); # print "\n".join([str(item) for item in imgurls]); if(len(imgurls)>0): return imgurls[0]["src"]; else: return ""
from LibraDriver import LibraDriver from FileUtil import FileUtil from DateUtil import DateUtil from TextUtil import TextUtil # 設定データロード user_data = FileUtil.getUserProperties("user.yaml") uid = user_data[0] pswd = user_data[1] systemWait = user_data[2] longWait = user_data[3] midWait = user_data[4] shortWait = user_data[5] driver_type = user_data[6] appWait = [systemWait, longWait, midWait, shortWait] # LibraDriverインスタンス lrp = LibraDriver(uid, pswd, "551", appWait, driver_type) # Libraログイン lrp.login() DateUtil.app_sleep(shortWait) print(DateUtil.get_logtime() + "login") # レポートインデックスページに遷移 lrp.browse_repo() DateUtil.app_sleep(shortWait) print(DateUtil.get_logtime() + "report index") # PID+URL一覧データ取得 datas = lrp.get_page_list_data()
def main(): """ main test the FileUtil class. """ file1 = FileUtil('test_file.txt') print "\n\nself.readlines():" print file1.readlines() clean_list = file1.readlines_clean() print "\n\nself.readlines_clean():" print clean_list file1.replace_string_in_file("alon", "ilan") print "\n\nself.replace_string_in_file('alon', 'ilan'):" print file1.readlines() print "\n\nself.return_string_in_lines('ilan\\S*'):" print file1.return_string_in_lines("ilan\\S*") file1.append_file("THIS IS THE APPENDED LINE") print "\n\nself.append_file('THIS IS THE APPENDED LINE'):" print file1.readlines() file2 = FileUtil('output_file.csv') input_list = file1.readlines() file2.create_csv_file_from_list(input_list) file3 = FileUtil('output_file.bak') file3.writelines(file1.readlines()) print "\n\nSelf.writelenes('output_file.bak') \n\n" print "Colums 2, delimetar=' ':", file1.read_column(1, " "), "\n\n"
while True: serial.flushInput() rfid_data = serial.readline().strip() if len(rfid_data) > 0: logger.log("Rfid Data: %s" % (rfid_data)) try: #it is a preacheck if the data were correclty read rfidData = rfid_data[1:11] dictValue = getDictValueIfKeyContainsString(cardNumDict, rfidData) if dictValue is not None: saveResult(rfidData, dictValue) else: logger.log("Error read") rfidData = rfid_data[3:11] dictValue = getDictValueIfKeyContainsString( cardNumDict, rfidData) if dictValue is not None: saveResult(rfidData, dictValue) else: FileUtil.saveToNewFile(reportDir, name, READ_FAILURE) logger.log("Read Failure") logger.log(READ_FAILURE) except Exception as e: FileUtil.saveToNewFile(reportDir, name, READ_FAILURE) logger.log("Error has occured:") logger.log("Sending error message:") logger.log(str(e))
# -*-coding:utf-8-*- import os import xlrd from FileUtil import FileUtil from StringParser import StringParser # excel = "sample.xlsx" excel = "translate.xls" OUT_PROJECT = os.path.expanduser("AndroidLanguagePackage") book = xlrd.open_workbook(excel) sheet = book.sheet_by_index(0) FileUtil.make_dir(OUT_PROJECT) string_parser = StringParser() for col in range(sheet.ncols): if col == 0: continue cells = sheet.col(col) string_keys = sheet.col(0) lang_list = list() lang = cells[0].value if isinstance(lang, unicode): lang = lang.encode('utf-8')
def log(self, msg): futil = FileUtil() futil.writeToFile("log.txt",msg,True,True)
class ApplicationUtil: df = None _d = {} _tuple = None pu = PandasUtil() fu = FileUtil() def __init__(self, yaml_file:str): self.logger = self.init_logger() d = YamlUtil(yaml_file) self._tuple = d.asnamedtuple self._d = d self.logger.debug(f'Read in yaml file {yaml_file} with fields: {self._d.fields}') def init_logger(self): self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.DEBUG) # create console handler and set level to debug ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) # create formatter formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') # add fromatter to ch ch.setFormatter(formatter) # add ch to logger self.logger.addHandler(ch) return self.logger def yaml_entry(self, yaml_entry:str) -> str: """ Read the dictionary and return the value of the given key. Give a warning if the yaml_entry is missing and return a blank. :param yaml_entry: :return: """ try: return self._d.asdict[yaml_entry] except KeyError: self.logger.warning(f'Unable to find yaml key: {yaml_entry}. Returning blank value.') return '' def load_df_from_excel(self, input_file_yaml_entry:str, worksheet:str='Sheet1'): input_file = self._d.asdict[input_file_yaml_entry] self.logger.debug(f'Reading {worksheet} file: {input_file}') if self.fu.file_exists(input_file): df = self.pu.read_df_from_excel(excelFileName=input_file, excelWorksheet=worksheet, header=0) self.pu.get_rowCount_colCount(df) return df else: self.logger.warning(f'Unable to find {worksheet} file: {input_file_yaml_entry}. Returning empty dataframe.') return self.pu.empty_df() def write_excel(self, df:pd.DataFrame, output_file_yaml_entry:str, worksheet:str) -> None: """ Write the given dataframe to the file indicated by the dictionary entry (that was read in using the yaml file). :param df: DataFrame to write :param output_file_yaml_entry: :param worksheet: :return: """ output_file = self.yaml_entry(output_file_yaml_entry) self.logger.debug(f'Writing {worksheet} file: {output_file}') self.pu.write_df_to_excel(df=df, excelFileName=output_file, excelWorksheet=worksheet)
def __init__(self, filePath, siteName): self.filePath = filePath self.siteName = siteName self.fileUtil = FileUtil()
dateValue = fs.read().strip() fs.close() dt = datetime.strptime(dateValue, '%Y-%m-%d %H:%M:%S') day = dt.day month = dt.month year = dt.year fulldate = '%s-%s-%s %s:%s:%s' % (year, month, day, hour, minute, second) ts = mktime(strptime(fulldate, '%Y-%m-%d %H:%M:%S')) timestamp = int(ts) print timestamp timestamp = int(time.time()) latitude = format(gpsData.latitude, '.6f') longitude = format(gpsData.longitude, '.6f') sensorValue = "%s;%s;%s" % (timestamp, latitude, longitude) sensorValueForLogger = "Sat_num: %s, gps_quality: %s,Lat: %s; Long: %s; Alt: %s; Speed %f; Timestamp %s" % ( gpsData.num_sats, gpsData.gps_qual, gpsData.latitude, gpsData.longitude, gpsData.altitude, speed, timestamp) FileUtil.saveToNewFile(reportDir, name, sensorValue) logger.log(sensorValueForLogger) except Exception as e: print e logger.log(e) except Exception as e: print e logger.log(e) #scheduler.add_job(getGPSCoordinates, 'interval', seconds=interval) #scheduler.start()
class PandasUtil: _EMPTY_DF = pd.DataFrame() def __init__(self): self.filename = None self.worksheetName = None self._df = None self._fu = FileUtil() # make the df display look better: https://stackoverflow.com/questions/11707586/how-do-i-expand-the-output-display-to-see-more-columns-of-a-pandas-dataframe pd.set_option('display.max_rows', 100) pd.set_option('display.max_columns', 50) pd.set_option('display.width', 800) # Getters and setters for filename, worksheetname, and df @property def filename(self): return self._filename # Setter for filename. @filename.setter def filename(self, fn: str): self._filename = fn @property def worksheetName(self): return self._worksheetName @worksheetName.setter def worksheetName(self, wks: str): self._worksheetName = wks @property def df(self): return self._df @df.setter def df(self, myDf: pd.DataFrame): self._df = myDf @classmethod def empty_df(cls) -> pd.DataFrame: return pd.DataFrame() def pandas_version(self): """ Return the panas version as three ints :return: maj, minor, sub """ v = pd.__version__ majMinSub = [int(x) for x in v.split('.')] return majMinSub[0], majMinSub[1], majMinSub[2] def write_df_to_excel(self, df: pd.DataFrame = None, excelFileName: str = None, excelWorksheet: str = None, write_index=False) -> bool: """ Write the given df to the excel file name and worksheet (unless they have already been provided and then are optional). Caller is responsible to catch any I/O errors. :param df: :param excelFileName: :param excelWorksheet: :return: True if Excel file written, False if df is empty. """ if not df.empty: self._df = df else: logger.warning('Empty dataframe will not be written.') return False fn = excelFileName or self.filename wks = excelWorksheet or self.worksheetname writer = pd.ExcelWriter(fn) self._df.to_excel(writer, wks, index=write_index) writer.save() logger.debug(f'Successfully wrote to {fn}.') return True def write_df_to_csv(self, df: pd.DataFrame = None, csv_file_name: str = None, write_header: bool = True, write_index: bool = False, enc: str = 'utf-8') -> bool: """ Write the given df to the file name and worksheet (unless they have already been provided and then are optional). Caller is responsible to catch any I/O errors. :param df: :param csv_file_name: :param write_header: :param write_index: :param enc: :return: True if Excel file written, False if df is empty. """ if not df.empty: self._df = df else: logger.warning('Empty dataframe will not be written.') return False df.to_csv(csv_file_name, header=write_header, index=write_index, encoding=enc) logger.debug(f'Successfully wrote to {csv_file_name}.') return True def read_df_from_excel(self, excelFileName: str = None, excelWorksheet: str = 'Sheet1', header: int = 0, index_col: int = -1) -> pd.DataFrame: """ Read an Excel file. :param excelFileName: :param excelWorksheet: :param header: 0-offset location of header (0=row 1 in Excel) :param index_col: :return: dataframe result """ param_dict = {'header': header} if excelFileName: self.filename = excelFileName logger.debug(f'Will read from the Excel file: {self.filename}.') param_dict['io'] = self.filename if self._fu.file_exists(self.filename): if excelWorksheet: self.worksheetName = excelWorksheet wks = self.worksheetName major, minor, _ = self.pandas_version() logger.debug( f'Will read from the worksheet: {wks}. Pandas minor version is {minor}.' ) if wks not in self.get_worksheets(excelFileName): logger.warning( f'Cannot find Excel worksheet: {self.worksheetName}. Returning empty df.' ) return PandasUtil.empty_df() if ((major == 0) & (minor > 21)) | (major >= 1): param_dict['sheet_name'] = wks else: param_dict['sheetname'] = wks if index_col >= 0: param_dict['index_col'] = index_col self._df = pd.read_excel(**param_dict) logger.debug(f'Read in {len(self.df)} records.') return self._df else: logger.error( f'Cannot find Excel file: {self.filename}. Returning empty df.' ) return PandasUtil.empty_df() def read_df_from_csv(self, csv_file_name: str = None, header: int = 0, enc: str = 'utf-8', index_col: int = None, sep: str = None) -> pd.DataFrame: """ Write the given df to the file name and worksheet (unless they have already been provided and then are optional). :param df: :param csv_file_name: :param header: Where the headers live (0 means first line of the file) :param enc: try 'latin-1' or 'ISO-8859-1' if you are getting encoding errors :return: """ param_dict = { 'filepath_or_buffer': csv_file_name, 'header': header, 'encoding': enc, } if sep: param_dict['sep'] = sep if index_col is not None: param_dict['index_col'] = index_col ans = pd.read_csv(**param_dict) return ans def get_df_headers(self, df: pd.DataFrame = _EMPTY_DF) -> list: """ Get a list of the headers. This provides a list of the column NAMES. :param df: :param self: :return: list of headers """ if not self.is_empty(df): self.df = df return list(self.df.columns) else: logger.warning('df is empty. Returning None for headers') return None def set_df_headers(self, df: pd.DataFrame, new_headers: list): """ This sets the column NAMES. :param df: :param new_headers: list of new headers) :return: None (but side effect of changed df) """ df.columns = new_headers def get_rowCount_colCount(self, df: pd.DataFrame): """ Return the row and column_name count of the df. :param df: :return: row count, col count """ rows, cols = df.shape logger.debug(f'df has {rows} rows and {cols} columns.') return rows, cols def get_basic_data_analysis(self, df: pd.DataFrame) -> str: buffer = StringIO() df.info(buf=buffer) ans = buffer.getvalue() logger.info(f'info:\n{ans}') return ans def get_quartiles(self, df: pd.DataFrame, percentiles: list = [.25, .50, .75]) -> pd.DataFrame: """ Return basic statistics about the dataframe. :param df: :param percentiles: list of %-tiles as fractions between 0 and 1, e.g. [.2, .4, .6, .8] for quintiles :return: basic description df """ ans = df.describe(percentiles=percentiles) logger.info(f'info:\n{ans.head(10)}') return ans @logit(showRetVal=True) def get_worksheets(self, excelFileName=None): if excelFileName: self.filename = excelFileName fu = FileUtil() if fu.file_exists(self.filename): xl = pd.ExcelFile(self.filename) return xl.sheet_names else: logger.error(f'Cannot find Excel file {self.filename}.') return None def duplicate_rows(self, df: pd.DataFrame, fieldList: list = None, keep: str = 'first') -> pd.DataFrame: """ Return a dataframe with the duplicates as specified by the columns in fieldList. If fieldList is missing or None, then return the exactly duplicated rows. :param df: dataframe to scan for duplicates :param fieldList: fields in df to examine for duplicates. :param keep: 'first' or 'last' to keep the first dupe or the last. :return: df of the duplicates """ if fieldList: ans = df[df.duplicated(fieldList, keep=keep)] else: ans = df[df.duplicated(keep=keep)] return ans def drop_duplicates(self, df: pd.DataFrame, fieldList: list = None, keep: str = 'first') -> pd.DataFrame: """ Drop the duplicates as specified by the columns in fieldList. If fieldList is missing or None, then return the exactly duplicated rows. :param df: dataframe to scan for duplicates :param fieldList: fields in df to examine for duplicates. :param keep: 'first' or 'last' to keep the first dupe or the last. :return: df without the duplicates """ param_dict = {'keep': keep, 'inplace': False} if fieldList: param_dict['subset'] = fieldList return df.drop_duplicates(**param_dict) def convert_dict_to_dataframe(self, list_of_dicts: list) -> pd.DataFrame: """ Convert a list of dictionaries to a dataframe. :param list_of_dicts: :return: """ return pd.DataFrame(list_of_dicts) def convert_list_to_dataframe(self, lists: list, column_names: List = None) -> pd.DataFrame: """ Convert a list of lists to a dataframe. If provided, add the column names. If not, provide default col names. :param lists: a list of lists, like [[1,2,3], ['a', 'b', 'c']] :param column_names: Column names to use. Defaults to col00, col01, col22, .. col99 :return: """ if column_names: return pd.DataFrame(data=lists, columns=column_names) # Use the default column names: col00, col01... ans = pd.DataFrame(data=lists) self.replace_col_names_by_pattern(ans) return ans def convert_matrix_to_dataframe(self, lists: list) -> pd.DataFrame: """ convert a list of lists to a dataframe. :param lists: :return: """ return pd.DataFrame(data=lists) def convert_dataframe_to_matrix(self, df: pd.DataFrame) -> np.ndarray: """ Convert all of the values to a numpy ndarray. :param df: :return: """ return df.to_numpy() def convert_dataframe_to_vector(self, df: pd.DataFrame) -> np.ndarray: """ Convert the dataframe to a numpy vector. :param df: :return: """ cols = self.get_df_headers(df) if len(cols) == 1: return df.to_numpy().reshape(-1, ) logger.warning( f'Dataframe should have exactly one column, but contains {len(cols)}. Returning None.' ) return None def convert_dataframe_col_to_list(self, df: pd.DataFrame, column_name: str) -> list: """ Convert the given dataframe column to a list. :param df: :param column_name: a column name, like 'age' :return: a list of that column """ return df[column_name].values.tolist() def without_null_rows(self, df: pd.DataFrame, column_name: str) -> pd.DataFrame: """ Return a DataFrame without the rows that are null in the given column_name. :param df: source DataFrame :param column_name: Column name to remove. :return: new DataFrame """ try: mask = pd.notnull(df[column_name]) return df[mask] except KeyError: logger.error( f'Unable to find column_name name: {column_name}. Returning empty df.' ) return PandasUtil.empty_df() def select(self, df: pd.DataFrame, column_name: str, match_me: Union[str, int]) -> pd.DataFrame: """ Return a DataFrame that selects on the column_name that is equal to match_me. Similar to a SELECT * WHERE clause in SQL. :param df: :param column_name: :param match_me: :return: df with the column_name matching the selected clause (possibly empty) """ return df.loc[df[column_name] == match_me] def mask_blanks(self, df: pd.DataFrame, column_name: str) -> list: """ Return a boolean list with a True in the rows that have a blank column_name. :param df: :param column_name: :return: """ # ans = df.loc[df[column_name] == ''] ans = df[column_name] == '' return ans def select_blanks(self, df: pd.DataFrame, column_name: str) -> list: return df[self.mask_blanks(df, column_name)] def mask_non_blanks(self, df: pd.DataFrame, column_name: str) -> list: """ Return a boolean list with a True in the rows that have a nonblank column_name. :param df: :param column_name: :return: """ blanks = self.mask_blanks(df, column_name) non_blanks_mask = [not x for x in blanks] return non_blanks_mask def select_non_blanks(self, df: pd.DataFrame, column_name: str) -> list: return df[self.mask_non_blanks(df, column_name)] def unique_values(self, df: pd.DataFrame, column_name: str) -> list: """ Return a list of the unique values in column_name. :param df: :param column_name: :return: """ return self.drop_duplicates(df=df[column_name]).tolist() def count_by_column(self, df: pd.DataFrame, column_name: str = None) -> pd.DataFrame: """ Return a count by value of the given column. :param df: :param column_name: :return: """ return df[column_name].value_counts() def add_new_col_with_func(self, df: pd.DataFrame, column_name: str, func: Callable[[], list]) -> pd.DataFrame: """ Call the func with no args to assign a new column_name to the dataframe. func should return a list comprehension. Here's an example of what the function should do. def my_func(self) -> list: df = self.pu.df col_of_interest = df['number'] return [self.my_f(x) for x in col_of_interest] It gets called with: df = self.pu.add_new_col_with_func(df, 'new_col_name', self.my_func) :param df: :param column_name: :param func: func (usually no args) :return: """ self.df = df df[column_name] = func() return df def add_new_col_from_array(self, df: pd.DataFrame, column_name: str, new_col: np.array) -> pd.DataFrame: """ Use the values in new_col to create a new column. Limitations: this is not as sophisticated as https://stackoverflow.com/questions/12555323/adding-new-column-to-existing-dataframe-in-python-pandas . The length of new_col must be the same as the length of df. :param df: :param column_name: :param new_col: If this really is a Series, it will try to match indexes with the existing df (probably a good thing). :return: """ df[column_name] = new_col return df def mark_rows_by_func(self, df: pd.DataFrame, column_name: str, func: Callable[[], list]) -> Bools: """ Return a list of bools depending on the func. Here's a func (which takes a list as a parameter): def is_adult(self, age:list): return age >= 21 Here's how to invoke it: mark = self.pu.mark_rows_by_func(df, 'Age', self.is_adult) :param df: dataframe under scrutiny :param column_name: name of the column_name :param func: function that is to be invoked. Takes a list and returns a list of booleans. :return: """ mask = func(df[column_name]) return mask def mark_rows_by_criterion(self, df: pd.DataFrame, column_name: str, criterion: Union[str, int, float]) -> Bools: """ Return a list of bools when column_name meets the criterion. :param df: :param column_name: :param criterion: :return: """ mask = df[column_name] == criterion return mask def mark_isnull(self, df: pd.DataFrame, column_name: str) -> Bools: mask = df[column_name].isnull() return mask def masked_df(self, df: pd.DataFrame, mask: Bools, invert_mask: bool = False): if not invert_mask: return df[mask] else: my_mask = [not x for x in mask] return df[my_mask] def slice_df(self, df: pd.DataFrame, start_index: int = 0, end_index: int = None, step: int = 1): """ Slice the df by the given start, end, and step. NOTE: this does row slicing only. :param df: :param start_index: 0-based first index to use. Defaults to 0 (the first el) :param end_index: end of list index. Defaults to None (which means the end of the list). :param step: how many to skip. 2 means skip every other. Default of 1 means don't skip. :return: """ end_idx = end_index or len(df) ans = df.iloc[start_index:end_idx:step] return ans def set_index(self, df: pd.DataFrame, columns: Union[Strings, str], is_in_place: bool = True) -> pd.DataFrame: """ Set the index of df. :param df: Dataframe under scrutiny. :param columns: Can be a str (=single column_name) or a List of strings. :param is_in_place: True to add the index in place / False to create a new df :return: df or None (if is_in_place is true) """ return df.set_index(columns, inplace=is_in_place) def reset_index(self, df: pd.DataFrame, is_in_place: bool = True, is_dropped: bool = False) -> pd.DataFrame: """ Reset the index. :param df: :param is_in_place: :param is_dropped: :return: """ return df.reset_index(drop=is_dropped, inplace=is_in_place) def drop_index(self, df: pd.DataFrame, is_in_place: bool = True) -> pd.DataFrame: """ Drop the index :param df: :param is_in_place: :param is_dropped: :return: """ return self.reset_index(df=df, is_in_place=is_in_place, is_dropped=True) def drop_col(self, df: pd.DataFrame, columns: Union[Strings, str], is_in_place: bool = True) -> pd.DataFrame: """ Drop the given column_name. :param df: :param columns: Can be a str (=single column_name) or a List of strings. :param is_in_place: if true, column_name is dropped from df in place. Otherwise, a new df is returned. :return: None if is_in_place is True. Else df with the column_name dropped. """ major, minor, _ = self.pandas_version() if (major == 0) & (minor < 21): logger.warning( f'Unable to drop column, as Pandas version is {minor}. Returning unchanged df.' ) return df return df.drop(columns=columns, inplace=is_in_place) @logit() def drop_col_keeping(self, df: pd.DataFrame, cols_to_keep: Union[Strings, str], is_in_place: bool = True) -> pd.DataFrame: """ Keep the given columns and drop the rest. :param df: :param cols_to_keep: :param is_in_place: :return: """ headers_to_drop = self.get_df_headers(df) logger.debug( f'I have these headers: {headers_to_drop}. But I will keep {cols_to_keep}' ) exceptions = cols_to_keep if isinstance(cols_to_keep, str): exceptions = [cols_to_keep] for col in exceptions: headers_to_drop.remove(col) return self.drop_col(df=df, columns=headers_to_drop, is_in_place=is_in_place) def drop_row_by_criterion(self, df: pd.DataFrame, column_name: str, criterion: Union[int, str], is_in_place: bool = True) -> pd.DataFrame: """ Drop the rows that have criterion in the given column. :param df: :param column_name: :param criterion: :param is_in_place: :return: """ return df.drop(df[df[column_name] == criterion].index, inplace=is_in_place) def drop_row_if_nan(self, df: pd.DataFrame, column_names: Strings = None, is_in_place: bool = True) -> pd.DataFrame: """ Drop a row if the given column name is NaN. :param df: :param column_names: Drop the rows based in this array of column names. If None, drop every row with all NaNs. :param is_in_place: :return: """ if column_names: return df.dropna(axis='index', subset=column_names, inplace=is_in_place) return df.dropna(axis='index', inplace=is_in_place, how='all') def reorder_cols(self, df: pd.DataFrame, columns: Strings) -> pd.DataFrame: """ Using the columns, return a new df. :param df: :param columns: list of strings, like ['colD', 'colA', 'colB', 'colC'] :return: """ return df[columns] def replace_col(self, df: pd.DataFrame, column: str, replace_dict: dict) -> pd.DataFrame: """ Replace the values of column_name using replace_dict. This will will replace the column VALUES. :param df: :param column: :param replace_dict: {'origA':'replA', 'origB':'replB'} :return: df with column_name replaced """ try: df[column] = df[column].map(replace_dict) except KeyError: logger.warning( f'Value found outside of: {replace_dict.keys()} or column_name {column} not found. Returning empty df.' ) return self.empty_df() return df def replace_col_using_func(self, df: pd.DataFrame, column_name: str, func: Callable[[], list]) -> pd.DataFrame: """ Replace the column contents by each element's value, as determined by func. This will will replace the column VALUES. :param df: Dataframe under scrutiny. :param column_name: (single column_name) name :param func: Function operates on whatever element it is presented, and returns the changed element. :return: df """ df[column_name] = df[column_name].apply(func) return df def replace_col_using_mult_cols(self, df: pd.DataFrame, column_to_replace: str, cols: Strings, func: Callable[[], list]) -> pd.DataFrame: """ Replace column_to_replace, using the given func. This will will replace the column VALUES. :param df: Dataframe under scrutiny. :param column_to_replace: (single column_name) name :param cols: list of columns used for the following func :param func: Pointer to a local function. :return: df with replaced column """ df[column_to_replace] = df[cols].apply(func, axis=1) return df def replace_col_with_scalar(self, df: pd.DataFrame, column_name: str, replace_with: Union[str, int], mask: Bools = None) -> pd.DataFrame: """ Replace the all column_name with replace_with. If a mask of bools is used, only replace those elements with a True. Helpful reference at https://kanoki.org/2019/07/17/pandas-how-to-replace-values-based-on-conditions/ :param df: :param column_name: :param replace_with: :param mask: :return: """ if mask is None: df[column_name] = replace_with elif isinstance(mask, pd.Series): df[column_name].mask(mask.tolist(), replace_with, inplace=True) elif isinstance(mask, list): # df[column_name].mask(mask, replace_with, inplace=True) # Method 1 and works df.loc[mask, column_name] = replace_with # Method 2 at kanoki. else: logger.warning( f'mask must be None, a series, or a list, but it is: {type(mask)}' ) return self.empty_df() def join_two_dfs_on_index(self, df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame: """ return a column-wise join of these two dataframes on their mutual index. :param df1: :param df2: :return: """ return pd.concat([df1, df2], axis=1, ignore_index=False) def join_dfs_by_column(self, dfs: Dataframes) -> pd.DataFrame: """ Return a column-wise join of these dataframes. :param dfs: :return: """ return pd.concat(dfs, axis='columns') def join_dfs_by_row(self, dfs: Dataframes) -> pd.DataFrame: """ Return a row-wise join of these dataframes. Note: all the dfs should have the same column names, so you might call it in this way: headers = pu.get_df_headers(big_df) pu.set_df_headers(new_df, headers) df2 = pu.join_dfs_by_row([new_df, big_df]) :param dfs: :return: """ return pd.concat(dfs, axis='rows', ignore_index=True) def dummy_var_df(self, df: pd.DataFrame, columns: Union[Strings, str], drop_first: bool = True) -> pd.DataFrame: """ Do a one-hot encoding. Create a dummy variable based on the given column. :param df: :param columns: a single column name or a list of column names. :return: """ if isinstance(columns, str): my_columns = [columns] else: my_columns = columns df = pd.get_dummies(data=df, columns=my_columns, drop_first=drop_first) return df def replace_col_names(self, df: pd.DataFrame, replace_dict: dict, is_in_place: bool = True) -> pd.DataFrame: """ :param replace_dict: {'origColA':'replColA', 'origColB':'replColB'} """ return df.rename(columns=replace_dict, inplace=is_in_place) def replace_col_names_by_pattern(self, df: pd.DataFrame, prefix: str = "col", is_in_place: bool = True) -> pd.DataFrame: """ Replace the column names with col1, col2.... :param df: :param prefix: string prefix, such as "col" :param is_in_place: :return: """ cur_names = self.get_df_headers(df) gen = generate_col_names(prefix) replacement_dict = {k: next(gen) for k in cur_names} return self.replace_col_names(df, replacement_dict, is_in_place) def coerce_to_string(self, df: pd.DataFrame, columns: Union[Strings, str]) -> pd.DataFrame: """ Coerce the given column_name name to a string. :param df: :param column_name: :return: new df with column_name coerced to str. """ if isinstance(columns, str): # Make the single str columns into a list with just that one element. cols_as_list = [columns] else: cols_as_list = columns for col in cols_as_list: df[col] = df[col].apply(str) return df def coerce_to_numeric(self, df: pd.DataFrame, columns: Union[Strings, str]) -> pd.DataFrame: """ Coerce the given column_name name to ints or floats. :param df: :param columns: a column name (or list of names) to coerce :return: df with columns coerced to a numeric in place. """ if isinstance(columns, str): # Make the single str columns into a list with just that one element. cols_as_list = [columns] else: cols_as_list = columns df[cols_as_list] = df[cols_as_list].apply(pd.to_numeric) return df def coerece_to_int(self, df: pd.DataFrame, columns: Union[Strings, str]) -> pd.DataFrame: """ Coerce the given column name(s) to an int. :param df: :param columns: a column name (or list of names) to coerce :return: df with columns coerced to a numeric in place. """ df[columns] = df[columns].astype(int) return df def round(self, df: pd.DataFrame, rounding_dict: dict) -> pd.DataFrame: """ Round the columns given in rounding_dict to the given number of decimal places. Unexpected result found in testing: python function round(4.55, 2) yields 4.5 BUT this function returns 4.6 :param df: :param rounding_dict: {'A': 2, 'B':3} :return: df rounded to the specified number of places. """ return df.round(rounding_dict) def replace_vals(self, df: pd.DataFrame, replace_me: str, new_val: str, is_in_place: bool = True) -> pd.DataFrame: """ Replace the values of replace_me with the new_val. :param df: Dataframe under scrutiny. :param :param is_in_place: True to replace values in place / False to create a new df :return: df or None (if is_in_place is true) """ return df.replace(to_replace=replace_me, value=new_val, inplace=is_in_place) def replace_vals_by_mask(self, df: pd.DataFrame, mask: Bools, col_to_change: str, new_val: Union[str, int, float]): """ Replace the values in the col_to_change with the new_val :param df: :param mask: :param col_to_change: Column Name whose rows you want to change :param new_val: :return: the changed df (also changed in place) """ ans = df.loc[mask, col_to_change] = new_val return ans def is_empty(self, df: pd.DataFrame) -> bool: """ Return true if the df is empty. :param df: Dataframe to inspect :return: True IFF it is empty """ return df.empty def aggregates(self, df: pd.DataFrame, group_by: Strings, col: str) -> pd.DataFrame: """ Return the average, min, max, and sum of the dataframe when grouped by the given strings. Reference: https://jamesrledoux.com/code/group-by-aggregate-pandas . :param df: :param group_by: :return: """ grouped_multiple = df.groupby(group_by).agg( {col: ['mean', 'min', 'max', 'sum']}) grouped_multiple.columns = ['mean', 'min', 'max', 'sum'] self.reset_index(grouped_multiple, is_in_place=True) return grouped_multiple def stats(self, df: pd.DataFrame, xlabel_col_name: str, ylabel_col_name: str): """ Calculate the main statistics. :param df: dataframe under scrutiny :param xlabel_col_name: x column label :param ylabel_col_name: y column label :return: slope, intercept, and r (correlation) """ slope, intercept, r, p, epsilon = linregress(df[xlabel_col_name], df[ylabel_col_name]) logger.info('Main equation: y = %.3f x + %.3f' % (slope, intercept)) logger.info('r^2 = %.4f' % (r * r)) logger.info('p = %.4f' % (p)) logger.info('std err: %.4f' % (epsilon)) return slope, intercept, r def head(self, df: pd.DataFrame, how_many_rows: int = 10) -> pd.DataFrame: """ Return the first how_many_rows. This works well if called as the last line of an immediate, as in: pu.head(df) :param df: :param how_many_rows: :return: """ self.df = df return self.df.head(how_many_rows) def head_as_string(self, df: pd.DataFrame, how_many_rows: int = 10) -> str: """ Return the first how_many_rows as a string, separated by \n. :param df: :param how_many_rows: :return: """ ans = str(self.head(df, how_many_rows)) logger.debug(f'First {how_many_rows} are:\n{ans}') return ans def tail_as_string(self, df: pd.DataFrame, how_many_rows: int = 10) -> str: """ Return the last how_many_rows as a string, separated by \n. :param df: :param how_many_rows: :return: """ ans = str(self.tail(df, how_many_rows)) logger.debug(f'Last {how_many_rows} are:\n{ans}') return ans def tail(self, df: pd.DataFrame, how_many_rows: int = 10) -> pd.DataFrame: """ Return the last how_many_rows. This works well if called as the last line of an immediate, as in: pu.tail(df) :param df: :param how_many_rows: :return: """ self.df = df return self.df.tail(how_many_rows) def sort(self, df: pd.DataFrame, columns: Union[Strings, str], is_in_place: bool = True, is_asc: bool = True): """ Sort the given dataFrame by the given column(s). :param df: :param columns: :param is_in_place: :param is_asc: :return: """ return df.sort_values(columns, ascending=is_asc, inplace=is_in_place, kind='quicksort', na_position='last') def largest_index(self, df: pd.DataFrame) -> Tuple[int, int]: """ Return the largest index and its value (usually an int and an int). :return: :param df: :return: (index, value of index) """ return df.index.argmax(), df.index.max() def smallest_index(self, df: pd.DataFrame) -> Tuple[int, int]: """ Return the smallest index and its value (usually an int and an int). :return: :param df: :return: (index, value of index) """ return df.index.argmin(), df.index.min()
# -*- coding:utf-8 -*- ''' Created on Jan 29, 2019 @author: Jackie ''' from FileUtil import FileUtil from NlpTookit.NlpTookit import NlpTookit import jieba import jieba.posseg as pseg dir ="data/sports" fu = FileUtil() nt = NlpTookit() wordcounter={}#统计词频的词典 poscounter={}#统计词性的词典 lengthCounter ={}#统计词长、句长分布字典 #step one: get all files' absolute paths filelist = fu.getFiles(dir) #step two: read every file for filepath in filelist: print(filepath) lines = fu.readlines(filepath, "UTF-8") for line in lines: if(len(line.strip())==0): continue #step three:split sentences # print(line.strip()) sentences = nt.toSentenceList(line.strip()) for sentence in sentences:
def main(): if len(sys.argv) == 1: dt = TimeUtil.get_yesterday_str() elif len(sys.argv) == 2: dt = sys.argv[1] else: logger.error("parameter error") sys.exit(1) recent_items = DSSMConfigUtil.sample_conf.get_int("recent-items") cpu_threshold = DSSMConfigUtil.sample_conf.get_float("cpu-threshold") window = DSSMConfigUtil.sample_conf.get_int("window") J = DSSMConfigUtil.sample_conf.get_int("J") action_path = DSSMConfigUtil.action_conf.get_string("action-path") if action_path == "": job_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../../")) action_path = os.path.join(job_path, "data/rec-recall/dssm/action") click_path = os.path.join(action_path, "click") click_files = click_action_files(click_path, dt, 1) logger.info("click_files:{}".format(click_files)) sample_path = DSSMConfigUtil.sample_conf.get_string("sample-path") if sample_path == "": job_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../../")) sample_path = os.path.join(job_path, "data/rec-recall/dssm/sample") os.makedirs(sample_path, exist_ok=True) logger.info("sample_path:{}".format(sample_path)) if not FileUtil.files_exists(click_files): logger.info("there are click files that do not exist") sys.exit(1) ma = merge_action(click_files) fa = filter_action(ma, window+1, recent_items) logger.info("len(ma):{}".format(len(ma))) logger.info("len(fa):{}".format(len(fa))) items = set_items(ma) logger.info("len(items):{}".format(len(items))) total_number = len(fa) block_number = math.ceil(CpuUtil.cpu_count() * cpu_threshold) block_size = FileUtil.block_size(total_number, block_number) logger.info("total_number:{}".format(total_number)) logger.info("block_number:{}".format(block_number)) logger.info("block_size:{}".format(block_size)) blocks = split_merge_action(fa, block_size) logger.info("len(blocks):{}".format(len(blocks))) FileUtil.remove(os.path.join(sample_path, "sample_{}.data".format(dt))) del ma, fa gc.collect() pool = Pool(block_number) for block in blocks: pool.apply_async(deal_block, args=(block, items, window, J, os.path.join(sample_path, "sample_{}.data".format(dt)))) pool.close() pool.join() logger.info("sample_file:{}".format(os.path.join(sample_path, "sample_{}.data".format(dt))))
class BotEngine: def __init__(self,driver): self.driver = driver recentSignals={'forexsignalzz':0,'amirFX_signal':0,'FOR3X_SIGNAL':0,'AmirFx VIP signal':0,'Eagl777':0 , 'WallstreetFXsignals':0, 'wolfofforexplus':0} signalVendors={'a': 1} utils = Utils() fileUtil = FileUtil() #{'Forex signals': 'GforexSignalsIr'} def setListOfVendors(self): self.signalVendors.update({'amirFX_signal': GforexSignalsIr(self.driver)}) self.signalVendors['FOR3X_SIGNAL'] = FOR3X_SIGNAL(self.driver) self.signalVendors['AmirFx VIP signal'] = GforexSignalsIr(self.driver) self.signalVendors['signalTest']= Eagl777(self.driver) self.signalVendors['Eagl777']= Eagl777(self.driver) self.signalVendors['WallstreetFXsignals']= WallstreetFXsignals(self.driver) self.signalVendors['wolfofforexplus']= wolfofforexplus(self.driver) self.signalVendors['forexsignalzz']=forexsignalzz(self.driver) def getNewMessage(self): coutner=1; while(coutner >0): try: sleep(1) for key in self.recentSignals: try: newMessages=self.find_last_update_time(key) #return last two messages webElement-time print('before getting time') if newMessages == None or newMessages[0] == self.recentSignals[key] : print('repeated signal for '+key+' provider') continue else: print('preparing new signal started in signalFinder!') provider=self.signalVendors[key] self.recentSignals[key] = newMessages[0] sleep(2) signalText=provider.get_message(key) if signalText != None : signalObjs= provider.createSignalDto(signalText,key) if(signalObjs[0].enterPrice !=0): for signal in signalObjs.values(): if(signal !=0): signal.vol = 0.01 self.fileUtil.writeOnFile("s",signal) sleep(10) else: print('why here!!????') self.recentSignals[key]=0 except: # INNER TRY print('in INNER except signalFinder: ') self.recentSignals[key]=0 print(sys.exc_info()[0]) continue except : # outer try print('in OUTER except signalFinder: ') self.recentSignals[key]=0 print(sys.exc_info()[0]) continue def find_last_update_time(self, chName): print('start finding last update time') c1=5 while c1>0: try: elem= self.driver.find_element_by_xpath("//input[contains(@class,'im_dialogs_search_field')]") sleep(2) elem.clear() elem.send_keys(chName) sleep(1) except : sleep(2) c1+=1 c2=5 while c2>0: try: self.driver.find_elements_by_xpath("//div[@class='im_dialogs_col']//li[contains(@class,'im_dialog_wrap')]/a")[0].click() sleep(2) except : sleep(2) c2+=1 c3=5 while c3>0: try: firstLastMessageTime = self.driver.find_elements_by_xpath("//div[contains(@class,'im_history_messages_peer')]//div[contains(@class,'im_history_message_wrap')]//span[@class='im_message_date_text nocopy']")[-1].get_attribute('data-content') sleep(2) except: sleep(2) c3+=1 try: secondLastMessageTime = self.driver.find_elements_by_xpath("//div[contains(@class,'im_history_messages_peer')]//div[contains(@class,'im_history_message_wrap')]//span[@class='im_message_date_text nocopy']")[-2].get_attribute('data-content') except : secondLastMessageTime="" print('no second message') # providerCH=self.driver.find_elements_by_xpath("//span/ancestor::a[@class='im_dialog']")[0] # sleep(2) # last_time=providerCH.find_element_by_xpath("//div[@class='im_dialog_date']").text #self.driver.find_elements_by_xpath("//span/ancestor::a[@class='im_dialog']//div[@class='im_dialog_date']")[0].text sleep(2) print('end of finding last update time') return [firstLastMessageTime ,secondLastMessageTime]
def getLocation(phoneNumber): htmlGraber = HtmlGraber() url = serviceUrl + "?m=" + phoneNumber content = htmlGraber.doGrab(url) content = content.replace("<br/><br/>", "||") items = content.split("||") # print items[1]; return items[1] if __name__ == "__main__": pass fileUtil = FileUtil() content = fileUtil.openFile(phoneSrc) # print content; content = content.replace("\r", " ").replace("\t", " ").replace("\n", " ") # print content; items = content.split(" ") items = [item for item in items if item != ""] output = "" index = 1 for i in range(len(items)): if i % 2 == 1: print index index += 1 location = getLocation(items[i])