示例#1
0
 def savetexts(self, filepath, prepath):
     """
     保存预处理后的文本
     :param filepath: html文件路径
     :param prepath:  保存路径
     :return:
     """
     self.logger.info('init pretreatment directory:"{0}"'.format(prepath))
     FileUtil.init_path(prepath)
     try:
         file_lists = os.listdir(filepath)  # 返回当前路径下所有文件和路径,字符串类型
         for filename in file_lists:
             file = os.path.join(filepath, filename)
             if os.path.isfile(file):
                 # 1.获取url及文本
                 url, text = FileUtil.get_url_text(file)
                 # 2.关键词信息
                 kws = PreDeal.seg(text)
                 self.logger.info(
                     "Store pretreatment texts content:{0}".format(
                         filename))
                 FileUtil.writefile(url + '\t'.join(kws),
                                    os.path.join(prepath, filename))
         self.logger.info('Text pretreatment End!')
     except Exception as e:
         print(e)
示例#2
0
def main():
    from_path = os.path.expanduser(sys.argv[1])

    filestat_file = os.path.join(os.path.dirname(__file__),
                                 '../data/filestat.lst')
    dirstat_file = os.path.join(os.path.dirname(__file__),
                                '../data/dirstat.lst')
    json_file = os.path.join(os.path.dirname(__file__), '../data/data.json')

    #scan_file_sizes(from_path, stat_file)
    #calc_dir_sizes(filestat_file, dirstat_file)

    lines = FileUtil.read_all(dirstat_file).splitlines()
    iter_dir_sizes = FileUtil.parse_path_sizes(lines)

    root = SizeNode.build_tree(iter_dir_sizes)

    node = root.find_node(os.path.join(from_path))

    #nodes = sorted([c for c in node.name2childs.values()], key=lambda n: n.size, reverse=True)
    nodes = node.name2childs.values()
    min_size = node.size * 0.001
    #print "min_size", min_size

    #nodes = SquareTreeMap.norm_node_sizes(nodes)
    rects = SquareTreeMap.squarify_size_nodes(node.name, nodes, 0., 0., 700.,
                                              400., min_size)

    # rects = square_node(node, 0., 0., 700., 400.)
    # print 'rects', rects
    write_d3_rect_json(json_file, rects)
示例#3
0
文件: hide1.py 项目: loinly/TextStego
 def expriment(self, path='', pagenum=100):
     savename = os.path.join(config.hidepath, 'res.txt')
     for dirname in os.listdir(path):
         filepath = os.path.join(path, dirname)
         if os.path.isdir(filepath):
             for f in os.listdir(filepath):
                 fi = os.path.join(filepath, f)
                 res = []
                 keywords, num = self.info(fi=fi, pagenum=pagenum)
                 unmatch = 0
                 hidenum = 0
                 s = '\t||'
                 for i in num:
                     if i == 0:
                         unmatch += 1
                     else:
                         hidenum = hidenum + i
                     s = s + '\t' + str(i)
                 res.append(fi)
                 res.append(str(len(keywords)))
                 res.append(str(hidenum))
                 res.append(str(len(num)))
                 res.append(str(unmatch))
                 res_str = '\t'.join(res) + s + '\n'
                 FileUtil.write_apd_file(res_str, savename)
             FileUtil.write_apd_file(dirname + ' End !\n', savename)
         pass
     pass
示例#4
0
 def init_path(self):
     savepath = os.path.join(config.hidepath, '_'.join(self.keys))
     kwpath = os.path.join(config.hidekwpath, '_'.join(self.keys))
     if not os.path.exists(savepath):
         os.makedirs(savepath)
     else:
         FileUtil.init_path(savepath)
     return savepath, kwpath
示例#5
0
 def query(self, keywords, kwpath=''):
     path = []  # 已经找到的文章列表
     num = []  # 每篇含文章组合的个数
     unmatch = 0  # 失配个数
     maxh = 0  # 关键词个数
     q = ''  # 联合关键词
     flag = True  # 失配标志
     hidekey = []
     while keywords:
         kw = keywords[0]
         paper = Index.search(self.pindexp, q + ' ' + kw, limit=None)
         if paper:
             keywords.pop(0)
             hidekey.append(kw)
             q = q + ' ' + kw
             maxh += 1
         else:  # 当联合搜索无法进行下去时,转为寻找相似关键词
             simikeys = WV.similarwords(kw)
             t_paper = []
             if not simikeys:
                 print(
                     ".................Failed to find similar words................"
                 )
                 flag = False
             else:
                 for skw, similarity in simikeys:
                     sq = q + ' ' + skw
                     t_paper = Index.search(self.pindexp, sq, limit=None)
                     if t_paper:
                         hidekey.append(skw)
                         keywords.pop(0)
                         q = sq
                         maxh += 1
                         break
                 if not t_paper:  # 有关键词但联合搜索仍失败
                     flag = False
             # 失配
             if not flag:
                 doc = Index.search(self.pindexp, q, limit=None)
                 if not doc:
                     print("The keyword  '%s' is unMatch !" % kw)
                     unmatch += 1
                     hidekey.append('0')
                     keywords.pop(0)
                     path.append(None)
                     # flag = True
                 else:
                     path.append(doc)
                     num.append(maxh)
                     maxh = 0
                     q = ''
                 flag = True
         if not keywords:
             path.append(paper)
     hide_string = ' '.join(hidekey)
     FileUtil.writefile(hide_string, kwpath)
     return path
示例#6
0
文件: res.py 项目: loinly/TextStego
 def all_CR(self, infopath, extpath):
     CR = []
     file_list = os.listdir(infopath)
     for name in file_list:
         origin_info = FileUtil.readfile(filename=os.path.join(infopath, name))
         ext_info = FileUtil.readfile(filename=os.path.join(extpath, name))
         cr = self.CR(origin_info, ext_info)
         CR.append(cr)
     return CR
示例#7
0
 def crawl(self):
     self.download.download()
     readpath = os.path.join(config.spiderhtml, self.filename)
     savepath = os.path.join(config.spidertext, self.filename)
     FileUtil.init_path(savepath)
     for filename in os.listdir(readpath):
         file = os.path.join(readpath, filename)
         url, content = self.parse.parse(file)
         filename = filename.rstrip('.html') + '.txt'
         self.logger.info("Save spider url and content:{0}".format(url))
         FileUtil.writefile(url + content, os.path.join(savepath, filename))
     print('crawl web contents end!')
示例#8
0
 def search_archive_file(self, sf):
     """Search an archive (compressed) file"""
     ext = FileUtil.get_extension(sf.filename)
     if not ext:
         return
     if self.settings.debug:
         common.log('Searching {0} file {1}'.format(ext, sf))
     if ext in ('zip', 'jar', 'war', 'ear'):
         # handle zip files
         try:
             self.search_zip_file(sf)
         except zipfile.BadZipfile as e:
             if not ext == 'ear':
                 common.log('BadZipfile: {0!s}: {1}'.format(e, sf))
     elif ext in ('bz2', 'tar', 'tgz', 'gz') and \
          tarfile.is_tarfile(sf.relativepath):
         # handle tar files
         try:
             self.search_tar_file(sf, ext)
         except Exception as e:
             msg = 'Exception while searching a tar file {0}: {1!s}'
             common.log(msg.format(sf, e))
     else:
         msg = 'Searching archive file type "{0}" is unsupported at this time'
         common.log(msg.format(ext))
示例#9
0
    def save_data(self):
        _resAll = FileUtil.readlines(self.resFileName)
        self._clear_data()
        _totalSize = len(_resAll)
        print('Res data storing...')
        _stepSize = 1000
        _sql = "insert into resitem(name) values"
        _sqlTemp = ''
        for _index, _value in enumerate(_resAll):
            try:
                _value = pymysql.escape_string(_value)
                if _index % _stepSize == 0 or _index == _totalSize - 1:
                    if _sqlTemp:
                        _conn = self.dbHelper.connectDatabase()
                        print('storing: %d / %d' % (_index + 1, _totalSize))
                        _cur = _conn.cursor()
                        _sqlTemp = _sqlTemp + ",('%s')" % _value if _index == _totalSize - 1 else _sqlTemp
                        _cur.execute(_sqlTemp)
                        _conn.commit()
                        _cur.close()
                        _conn.close()

                    _sqlTemp = _sql + "('%s')" % _value
                else:
                    _sqlTemp += ",('%s')" % _value
            except Exception as error:
                self.logger.log(logging.ERROR, error)
示例#10
0
 def kw_num(self):
     nums = []
     for file in os.listdir(self.filepath):
         filename = os.path.join(self.filepath, file)
         kws = FileUtil.readupkws(filename)
         nums.append(len(kws))
     return nums
示例#11
0
文件: sync.py 项目: conanhjj/Cirrus
 def is_same(self, local_file_name):
     local_md5 = FileUtil.file_md5(local_file_name)
     remote_md5 = self.cloud.query_cloudfile_md5(local_file_name)
     #print "file name", local_file_name
     #print "local_md5", local_md5
     #print "remote_md5", remote_md5
     return local_md5 == remote_md5
示例#12
0
文件: pipes.py 项目: oprema/OpenDoor
def main():
    # Create directory if it doesn't exist
    futil = FileUtil(".opendoord")

    # Get access to the database handler
    logger = Logger.get(verbose=True)
    db = Sqlite(futil.path + "/opendoor.db", logger)
    port = Port(logger)
    pipes = Pipes(logger, port, db)

    i = 0
    logger.debug("Send commands via pipe with 10 sec delay")
    while i < 100:
        i += 1
        pipes.send_to_app("OPEN DOOR\n", i)
        logger.debug("OPEN DOOR")
        time.sleep(10)
        i += 1
        pipes.send_to_app("DOORBELL PRESSED\n", i)
        logger.debug("DOORBELL PRESSED")
        time.sleep(10)
        i += 1
        pipes.send_to_app("DOW RING WITH AUTO OPEN\n", i)
        logger.debug("DOW RING WITH AUTO OPEN")
        time.sleep(10)
 def __init__(self, lgConfFile, ptrnConfFile, outFldr, chnksz):
     self.lgConfFile = lgConfFile
     self.ptrnConfFile = ptrnConfFile
     self.outFldr = outFldr
     self.chnksz = chnksz
     self.rep = Report()
     self.fu = FileUtil()  
示例#14
0
def calc_dir_sizes(filestat_file, dirstat_file):
    lines = FileUtil.read_all(filestat_file).splitlines()

    dir2size = {}
    for filepath, size in FileUtil.parse_path_sizes(lines):
        dir2size[filepath] = size
        dirpath = os.path.dirname(filepath)
        while dirpath:
            try:
                dir2size[dirpath] += size
            except KeyError:
                dir2size[dirpath] = size
            dirpath = dirpath.rpartition("/")[0]

    iter_lines = FileUtil.combine_path_sizes(sorted(dir2size.items()))
    FileUtil.write_all(dirstat_file, iter_lines)
示例#15
0
文件: hide1.py 项目: loinly/TextStego
 def info(self, fi='', pagenum=100):
     info = FileUtil.readfile(fi)
     keywords = PreDeal.seg(info)
     # 1. 关键词提取
     keys = jieba.analyse.textrank(info,
                                   topK=10,
                                   withWeight=False,
                                   allowPOS=('ns', 'n', 'vn', 'v'))
     # 2. 调用搜索引擎爬取相关网页
     # 2.1 抓取链接
     spider_link = SpiderLink(keys, self.root)
     spider_link.crawl(pagenum)
     # 2.2 抓取内容
     filename = '_'.join(keys) + '.html'
     spider_to = SpiderTo(filename)
     spider_to.crawl()
     # 3. 文本预处理,去重,去停用词,分词,保留url和关键词集合
     p = PreDeal()
     filepath = os.path.join(config.spidertext, '_'.join(keys))
     prepath = os.path.join(config.prepapath, '_'.join(keys))
     p.savetexts(filepath=filepath, prepath=prepath)
     # 4. 构建索引, 并检索,得到包含关键词信息的网页
     # 4.1 索引构建
     indexpath = os.path.join(config.indexpath, '_'.join(keys))
     idx = Index()
     idx.build(datapath=prepath, indexpath=indexpath)
     search = Search1(filename=fi, pindexp=indexpath)
     # 4.2 搜索并保存
     info_k = keywords[:]
     num = search.retrieve(keywords=info_k)
     return keywords, num
 def loadAlterSQL(self, dbConnection):
     fileList = os.listdir(ResourceLocation.AlterDatabaseSQLs.value)
     index = 0
     if len(fileList) > 0:
         print("Choose the file number:\n")
         foundSQLScript = False
         for fileName in fileList:
             index += 1
             if "sql" in fileName:
                 print((str)(index) + ".) " + fileName + "\n")
                 foundSQLScript = True
         if foundSQLScript == True:
             exceptionFlag = False
             choosenFileIndex = input()
             filePaths = self.getFilePaths(
                 fileList, "sql", ResourceLocation.AlterDatabaseSQLs.value)
             try:
                 filePath = filePaths[(int)(choosenFileIndex) - 1]
             except Exception as e:
                 er = Error("You have chosen wrong file as an Input.",
                            traceback.format_exc())
                 er.handleError()
                 exceptionFlag = True
             if exceptionFlag == False:
                 file = FileUtil(filePath, "r")
                 self.executeAndCommitToDatabase(dbConnection, file)
         else:
             print("Sorry, No SQL Files Exists in the Folder.")
     else:
         print("Sorry, No SQL Files Exists in the Folder.")
def init(use_base_dir=False):
    args = parser.parse_args()
    setup = ExperimentSetups.parse(args.setup)
    dirname = fileutil.base_dir(args.dest_dir, setup.name, args.max_quantifier_length, args.model_size) if use_base_dir \
        else fileutil.run_dir(args.dest_dir, setup.name, args.max_quantifier_length, args.model_size, args.name)
    file_util = FileUtil(dirname)
    return args, setup, file_util
示例#18
0
 def _failed(self, filename):
     urls = FileUtil.readfilelist(filename)
     for i, failed_url in enumerate(urls):
         html = self.downloader.download(failed_url)
         datas = self.parser.parse(failed_url, html)
         self.logger.info("the spider system has fetch %s failed links" %
                          str(i + 1))
         self.output.add_data(datas)
示例#19
0
 def filter_file(self, sf):
     if FileUtil.is_hidden(sf.filename) and self.settings.excludehidden:
         return False
     if sf.filetype == FileType.Archive:
         return self.settings.searcharchives and self.is_archive_search_file(
             sf.filename)
     return not self.settings.archivesonly and self.is_search_file(
         sf.filename)
示例#20
0
 def filter_file(self, sf):
     if FileUtil.is_hidden(sf.filename) and self.settings.excludehidden:
         return False
     if sf.filetype == FileType.Archive:
         return self.settings.searcharchives and \
                self.is_archive_search_file(sf.filename)
     return not self.settings.archivesonly and \
            self.is_search_file(sf.filename)
 def createSchema(self, dbConnection, schemaName):
     utility = Utility()
     self.createDataTablesSQLScript(schemaName)
     sqlRead = FileUtil(ResourceLocation.DatabaseScript.value, "r")
     utility.writeLogs(ResourceLocation.LogFileLocation.value, "",
                       LogMessage.DBDatabaseCreation.value, "a", False)
     self.executeAndCommitToDatabase(dbConnection, sqlRead)
     utility.writeLogs(ResourceLocation.LogFileLocation.value, "",
                       LogMessage.Completed.value, "a", True)
示例#22
0
 def get_url_titles(self):
     parse_list = []
     html_str = FileUtil.readfile(self.filename)
     linktr = etree.HTML(text=html_str).xpath('//tr')
     for item in linktr:
         url = item.xpath('string(./td[1])')
         title = item.xpath('string(./td[2])')
         parse_list.append(dict([('url', url), ('title', title)]))
     return parse_list
示例#23
0
 def writeLogs(self, fileLocation, message, content, access, doComplete):
     t = time.localtime()
     logCommands = FileUtil(fileLocation, access)
     logCommands.writeFileContent(
         time.strftime("%H:%M:%S", t) + " : " + message + "\n" + content +
         "\n")
     if doComplete == True:
         logCommands.writeFileContent("\n" + logmessage.Seperator.value +
                                      "\n")
 def readConfig(self):
     # Read the file and config the connection variables
     file = FileUtil(self.configFileName, "r")
     dbConfiguration = file.getFileContent()
     config = dbConfiguration[int(self.configLine)]
     configArray = config.split(self.configDelimiter)
     self.hostName = configArray[0]
     self.portValue = configArray[1]
     self.dbName = configArray[2]
     self.userName = configArray[3]
     self.password = configArray[4]
     self.schemaName = configArray[5]
 def getTableHeader(self, fileList):
     utility = Utility()
     filePaths = self.getFilePaths(fileList, "csv",
                                   ResourceLocation.DatabaseLocation.value)
     utility.writeLogs(ResourceLocation.LogFileLocation.value,
                       ("\n").join(filePaths), LogMessage.Files.value, "a",
                       False)
     tableHeaders = []
     for filePath in filePaths:
         fileHeader = ((FileUtil(filePath, "r")).getFileContent())[0]
         tableHeaders.append(fileHeader)
     return tableHeaders
示例#26
0
def main():
  global log, gpio, lirc, ping, db, gammu

  # Parse arguments, use file docstring as a parameter definition
  args = docopt.docopt(__doc__, version='0.1a')
  #print args

  # Create directory if it doesn't exist
  futil = FileUtil("/home/pi/.resq-pi")
  gammu = None

  # Create a logger
  if args["--verbose"]:
    log = Logger.get(verbose = True)
  else:
    log = Logger.get(futil.path + "/resq-pi.log", False)
  log.info("*** Start ResQ-Pi ***")

  # Be sure we have root privileges
  if os.geteuid() != 0:
    exit("You need to have root privileges. Exiting.")
    
  # Ctrl-C and SIGTERM handler
  signal.signal(signal.SIGINT, signal_handler)
  signal.signal(signal.SIGTERM, signal_handler)

  # Get access to the resq-pi database
  db = ResqStore(futil.path + "/resq-pi.db")
  if not db.exist():
    log.info("No database found. Will create one.")
    db.create_tables() # if not already created
    db.reset_tables()  # and initialize

  # Initalize GPIO, Lirc, GooglePing ...
  gpio = ResqGpio()
  gpio.led(0, False) # all LEDs off
  lirc = ResqLirc(log, gpio)
  ping = GooglePing(log, gpio)

  test = False
  if args["--alarm"]:
    test = True

  if args["--resetdb"]:
    log.info("Reset database")
    db.reset_tables()
  elif args["--resetpass"]:
    log.info("Reset password")
    db.reset_password()
  elif args["--credits"]:
    get_sms_credits()
  else:
    resqpi_endless_loop(test)
示例#27
0
    def test_02_size_tree(self):
        from fileutil import FileUtil
        from sizenode import SizeNode

        lines = """\
node_modules/promise/domains: 15908
node_modules/promise/lib: 15912
node_modules/promise/node_modules: 34383
node_modules/promise/node_modules/asap: 34383
node_modules/promise/setimmediate: 15886
node_modules/promise/src: 16314
""".splitlines()

        path_sizes = FileUtil.parse_path_sizes(lines)
        self.assertEqual(len([ps for ps in path_sizes]), 6)

        path_sizes = FileUtil.parse_path_sizes(lines)
        tree = SizeNode.build_tree(path_sizes)
        self.assertTrue(tree)
        self.assertTrue(tree.name2childs)
        self.assertEqual(tree.find_node("node_modules/promise/node_modules/asap").size, 34383)
示例#28
0
 def retrieve(self, keywords):
     savepath, kwpath = self.savepath, self.kwpath
     path = self.query(keywords, kwpath)
     for i, doc in enumerate(path):
         if not doc:
             oldname = os.path.join(config.unMatch_path,
                                    config.unMatch_name)
             newname = os.path.join(savepath,
                                    str(i) + '+' + config.unMatch_name)
             FileUtil.copyfile(oldname, newname)
         elif len(doc) > 1:
             filepath = os.path.join(savepath, str(i))
             if not os.path.exists(filepath):
                 os.mkdir(filepath)
             for d in doc:
                 name = d.get('title')
                 oldname = d.get('path')
                 newname = os.path.join(filepath, str(i) + '+' + name)
                 FileUtil.copyfile(oldname, newname)
         else:
             name = doc[0].get('title')
             oldname = doc[0].get('path')
             newname = os.path.join(savepath, str(i) + '+' + name)
             FileUtil.copyfile(oldname, newname)
     return path
示例#29
0
 def is_search_dir(self, d):
     path_elems = [p for p in d.split(os.sep) if p not in FileUtil.DOT_DIRS]
     if self.settings.excludehidden:
         for p in path_elems:
             if FileUtil.is_hidden(p):
                 return False
     if self.settings.in_dirpatterns and \
         not any_matches_any_pattern(path_elems, self.settings.in_dirpatterns):
         return False
     if self.settings.out_dirpatterns and \
         any_matches_any_pattern(path_elems, self.settings.out_dirpatterns):
         return False
     return True
示例#30
0
    def test_01_file_util(self):
        from fileutil import FileUtil

        test_file = __file__ + ".test_01.data"
        path_sizes = [
            ("path1", 1),
            ("path2", 2),
            ("path3", 3),
        ]

        iter_lines = FileUtil.combine_path_sizes(path_sizes)
        FileUtil.write_all(test_file, iter_lines)

        data = FileUtil.read_all(test_file)
        self.assertEqual("""
path1: 1
path2: 2
path3: 3
        """.strip(), data.strip())

        path2size = dict([(path, size) for path, size in FileUtil.parse_path_sizes(data.splitlines())])
        self.assertEqual(path2size['path2'], 2)
示例#31
0
 def is_search_dir(self, d):
     path_elems = [p for p in d.split(os.sep) if p not in FileUtil.DOT_DIRS]
     if self.settings.excludehidden:
         for p in path_elems:
             if FileUtil.is_hidden(p):
                 return False
     if self.settings.in_dirpatterns and \
         not any_matches_any_pattern(path_elems, self.settings.in_dirpatterns):
         return False
     if self.settings.out_dirpatterns and \
         any_matches_any_pattern(path_elems, self.settings.out_dirpatterns):
         return False
     return True
示例#32
0
 def scpToRemoteMachine(self,remoteFilePath,localFile='', localCatalogue = ''):
     if localFile and localCatalogue:
         print 'SCP parameters are wrong.'
     t = paramiko.Transport((self.scpIp,22))
     t.connect(username = self.scpUser, password = self.scpPassword)
     sftp = paramiko.SFTPClient.from_transport(t)
     if localFile:
         scpInfo="scp %s %s@%s:%s" % (localFile,self.scpUser,self.scpIp,remoteFilePath)
         print scpInfo
         basename=os.path.basename(localFile)#文件的不带路径的名字
         sftp.put(localFile,remoteFilePath+'/'+basename)
     elif localCatalogue:
         from fileutil import FileUtil
         fileutil = FileUtil()
         scpInfo="scp %s/* %s@%s:%s" % (localCatalogue,self.scpUser,self.scpIp,remoteFilePath)
         print scpInfo
         files=os.listdir(localCatalogue)
         for eachFile in files:
             if fileutil.isExistFile(eachFile):
                 sftp.put(os.path.join(localCatalogue,eachFile),os.path.join(remoteFilePath,eachFile))
             else:
                 print eachFile + ' is not a file.'
     t.close()
示例#33
0
 def build(self, datapath, indexpath):
     self.logger.info('the process of create full-text index!')
     schema = Schema(title=TEXT(stored=True),
                     path=TEXT(stored=True),
                     content=TEXT(analyzer=SpaceSeparatedTokenizer()))
     if not os.path.exists(indexpath):  # 索引存储路径
         os.makedirs(indexpath)
     ix = create_in(indexpath, schema)  # 创建索引
     writer = ix.writer()
     for filename in os.listdir(datapath):
         filepath = os.path.join(datapath, filename)
         content = FileUtil.readfile(filepath)
         writer.add_document(path=filepath, title=filename, content=content)
     writer.commit()
示例#34
0
 def is_archive_search_file(self, f):
     ext = FileUtil.get_extension(f)
     if self.settings.in_archiveextensions and \
         not ext in self.settings.in_archiveextensions:
         return False
     if self.settings.out_archiveextensions and \
         ext in self.settings.out_archiveextensions:
         return False
     if self.settings.in_archivefilepatterns and \
         not matches_any_pattern(f, self.settings.in_archivefilepatterns):
         return False
     if self.settings.out_archivefilepatterns and \
         matches_any_pattern(f, self.settings.out_archivefilepatterns):
         return False
     return True
示例#35
0
 def is_search_file(self, f):
     ext = FileUtil.get_extension(f)
     if self.settings.in_extensions and \
         not ext in self.settings.in_extensions:
         return False
     if self.settings.out_extensions and \
         ext in self.settings.out_extensions:
         return False
     if self.settings.in_filepatterns and \
         not matches_any_pattern(f, self.settings.in_filepatterns):
         return False
     if self.settings.out_filepatterns and \
         matches_any_pattern(f, self.settings.out_filepatterns):
         return False
     return True
示例#36
0
    def key2loc(self, keywords, filename='.'):
        col_bits = self.col_bits
        n = pow(2, col_bits)
        _url = FileUtil.readurl(filename)
        text_kws = FileUtil.readupkws(filename)
        row, col, p = self.location(keywords=keywords, text_list=text_kws)  # 得到每个关键词的坐标(x,y)
        row_bits = len(self._dec2bin(len(text_kws) // n))  # 总行数的二进制表示所需的比特数
        s = ''
        print('location information: ')
        for r, c in zip(row, col):
            loc = self._dec2bin(num=r, bits=row_bits) + \
                self._dec2bin(num=c, bits=col_bits)
            print(loc)
            s = s + loc
        num_add_col_bits = self._dec2bin(col_bits, 5)  # col_bits作为密钥,5位补全二进制流
        _res = ''
        if len(s) % 8 == 0:
            _res = '000' + num_add_col_bits + s
        else:
            num = 8 - len(s) % 8     # 补0的个数
            num_add = self._dec2bin(num, 3)
            _res = num_add + num_add_col_bits + s + '0' * num

        return _url, _res, p
示例#37
0
def main():
    global log, port, pipes, db, test_mode

    # Be sure we have root privileges
    if os.geteuid() != 0:
        exit("You need to have root privileges. Exiting.")

    # Ctrl-C and SIGTERM handler
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGTERM, signal_handler)

    # Parse arguments, use file docstring as a parameter definition
    args = docopt.docopt(__doc__, version='0.1a')

    # Create directory if it doesn't exist
    futil = FileUtil(".opendoord")

    print("Path: %s, args: %s" % (futil.path, args))
    # Create a logger
    if args["--console"]:
        log = Logger.get(verbose=True)
    else:
        log = Logger.get(futil.path + "/opendoor.log",
                         verbose=args["--verbose"])
    log.info("*** Start OpenDoor ***")

    # Get access to the database handler
    db = Sqlite(futil.path + "/opendoor.db", log)
    if not db.exist():
        log.info("No database found. Will create one.")
        db.create_tables()  # if not already created
        db.reset_tables()  # and initialize

    if args["--test"]:
        test_mode = True

    # Let's initialize the gpio's
    port = Port(log, test_mode)

    # Open the pipes
    pipes = Pipes(log, port, db)

    if args["--resetdb"]:
        db.reset_tables()
        log.info("Database has been reset.")
    else:
        log.info("Watch door events in an endless loop.")
        opendoor_endless_loop()
示例#38
0
 def is_search_file(self, sf):
     if self.settings.in_filetypes and \
         not sf.filetype in self.settings.in_filetypes:
         return False
     if self.settings.out_filetypes and \
        sf.filetype in self.settings.out_filetypes:
         return False
     ext = FileUtil.get_extension(sf.filename)
     if self.settings.in_extensions and \
         not ext in self.settings.in_extensions:
         return False
     if self.settings.out_extensions and \
         ext in self.settings.out_extensions:
         return False
     if self.settings.in_filepatterns and \
         not matches_any_pattern(sf.filename, self.settings.in_filepatterns):
         return False
     if self.settings.out_filepatterns and \
         matches_any_pattern(sf.filename, self.settings.out_filepatterns):
         return False
     return True
示例#39
0
 def is_xml_file(self, f):
     """Return true if file is of a (known) xml file type"""
     return FileUtil.get_extension(f) in self.filetypes['xml']
示例#40
0
 def is_text_file(self, f):
     """Return true if file is of a (known) text file type"""
     return FileUtil.get_extension(f) in self.filetypes['text']
示例#41
0
 def is_searchable_file(self, f):
     """Return true if file is of a (known) searchable type"""
     return FileUtil.get_extension(f) in self.filetypes['searchable']
示例#42
0
 def test_get_extension_hidden_file_no_extension(self):
     filename = '.hidden'
     self.assertEqual(FileUtil.get_extension(filename), '')
示例#43
0
 def test_is_hidden_double_dot(self):
     filename = '..'
     self.assertFalse(FileUtil.is_hidden(filename))
示例#44
0
 def test_is_hidden_not_hidden_file(self):
     filename = 'filename.txt'
     self.assertFalse(FileUtil.is_hidden(filename))
示例#45
0
#-*- coding: UTF-8 -*-

import lxml.etree
from fileutil import FileUtil

#http://lxml.de/lxmlhtml.html
#http://www.cnblogs.com/descusr/archive/2012/06/20/2557075.html
#http://docs.python-guide.org/en/latest/scenarios/scrape/

    
if __name__=='__main__':
    fileutil = FileUtil()
    content = fileutil.readLocalFile('./example.html')
    page = lxml.etree.HTML(content.decode('UTF-8'), parser=None, base_url=None)


    
    
    
    
    
    
    
    
    
    
    '''
    for image in images:
        imageDict = image.attrib
        try:
            print imageDict['href']
示例#46
0
 def test_get_extension_has_txt_extension(self):
     filename = 'filename.txt'
     self.assertEqual(FileUtil.get_extension(filename), 'txt')
示例#47
0
 def test_get_extension_no_extension(self):
     filename = 'filename'
     self.assertEqual(FileUtil.get_extension(filename), '')
示例#48
0
 def test_get_extension_hidden_txt_file(self):
     filename = '.hidden.txt'
     self.assertEqual(FileUtil.get_extension(filename), 'txt')
示例#49
0
 def test_is_dot_dir_non_dot_dir(self):
     filename = '.git'
     self.assertFalse(FileUtil.is_dot_dir(filename))
示例#50
0
    #网页内容的回调函数
    def contentWriteCallBack(self,buf):
        self.b.write(buf)
    
    #头部信息的回调函数
    def headerWriteCallBack(self,buf):
        self.headerWrite.truncate()#从当前位置开始,后面的内容全部删除
        self.headerWrite.write(str(buf))
        
    #下载/上传进度
    def progressWriteCallBack(self,downloadTotal,downloadNow,uploadTotal,uploadNow):
        if downloadTotal > 0:
            Progress = 'download progress:  ' + str(round((downloadNow/downloadTotal)*100,2)) + '%'
            print Progress+"                           \r"
        else:
            if uploadTotal > 0:
                Progress = 'upload progress:  ' + str(round((uploadNow/uploadTotal)*100,2)) + '%'
                print Progress+"                           \r"
#######################回调函数结束##################################################
 
if __name__=='__main__':
    from fileutil import FileUtil
    fileutil = FileUtil()
    downloadutil = DownloadUtil()
    value = downloadutil.download('http://s.taobao.com/search?q=10%E5%85%83%E5%8C%85%E9%82%AE')
    if value:
        print 'Login success'
        fileutil.writeIntoFile('C:/Users/guo_f/Desktop/compare/search_pycurl.html',value)
    else:
        print 'error'
示例#51
0
 def test_is_hidden_hidden_file(self):
     filename = '.filename.txt'
     self.assertTrue(FileUtil.is_hidden(filename))
示例#52
0
 def is_archive_file(self, f):
     """Return true if file is of a (known) archive file type"""
     return FileUtil.get_extension(f) in self.filetypes['archive']
class SimpleLogAnalyzer:
    def __init__(self, lgConfFile, ptrnConfFile, outFldr, chnksz):
        self.lgConfFile = lgConfFile
        self.ptrnConfFile = ptrnConfFile
        self.outFldr = outFldr
        self.chnksz = chnksz
        self.rep = Report()
        self.fu = FileUtil()  

    def getLogFiles(self):
        files =[]
        folders =  self.fu.readAllLines(self.lgConfFile);
        for fldrName in folders:
            lindx = getLastPathIndx( fldrName )
            fldr = fldrName[0:lindx]
            ptrn = fldrName[lindx+1:]
            files+=self.fu.listDir( fldr, ptrn ) 
        return files

    def copy(self,rb,chunks,found):
        chnk = chunk()
        chnk.addTag(found)
        chunks.append(chnk)
        for rbel in rb.get():
           if(rbel!=None):
               chnk.append(rbel) 

    def processBuffer(self,buff, ptrns, offset, chnksz, chnks,rb):
        cLastIndx = len(chnks) - 1
        for line in buff:
            if(offset>0):
                chnks[cLastIndx].append(line)
                found = match(line,ptrns)
                offset-=1 
                if(found != None):
                    chnks[cLastIndx].addTag(found)
            else:
                rb.append(line)
                found = match(line,ptrns)
                if(found != None):
                    self.copy(rb,chnks,found)
                    cLastIndx+=1
                    offset=chnksz
        return offset

    def processFile(self,inFileName, ptrns, chnksz):
        bsz = chnksz * 20
        offset=0
        chnks=[]
        rb = RingBuffer(chnksz) 
        buffitr = self.fu.readNextBuffer(inFileName, bsz)
        for buff in buffitr:
            offset = self.processBuffer(buff,ptrns,offset,chnksz,chnks,rb) 
        return chnks

    def dumpChnks(self, chnks, outFldr, lgFile):
        i=getLastPathIndx(lgFile)
        outFile = outFldr + lgFile[i:] + ".xml"
        for cn in chnks:
            self.fu.writeContents(outFile, cn.serialize())

    def process(self):
        lgFiles = self.getLogFiles()
        ptrns = self.fu.readAllLines(self.ptrnConfFile)
        if(len(lgFiles)>0 and len(ptrns)>0):
            for lgFile in lgFiles:
                chnks = self.processFile(lgFile, ptrns, self.chnksz)
                if(len(chnks)>0):
                    self.rep.addMatchedFile(lgFile,chnks)
                    self.dumpChnks(chnks, self.outFldr, lgFile)
                else:
                    self.rep.addUnMatchedFile(lgFile)  


    def generateReport(self):
        self.rep.printReport()
示例#54
0
 def is_code_file(self, f):
     """Return true if file is of a (known) code file type"""
     return FileUtil.get_extension(f) in self.filetypes['code']
示例#55
0
 def test_get_extension_missing_extension(self):
     filename = 'filename.'
     self.assertEqual(FileUtil.get_extension(filename), '')
示例#56
0
 def test_is_dot_dir_double_dot(self):
     filename = '..'
     self.assertTrue(FileUtil.is_dot_dir(filename))