def __init__(self, args): self.start = '' self.stop = '' self.finish = '' self.clear = False for a in args: a = a.split('=') if a[0] == '--input': # If one input go ahead with that self.files = [a[1]] elif a[0] == '--inputs': # If multiple inputs, go ahead and get them self.files = getFile(a[1], lambda x: x.strip()) elif a[0] == '--start': self.start = a[1] elif a[0] == '--stop': self.stop = a[1] elif a[0] == '--finish': self.finish = a[1] # throw error if no input files supplied if not hasattr(self, 'files') or len(self.files) <= 0: raise ValueError('You must supply a file with --input or a list of space-separated files with --inputs') if '-c' in args: self.clear = True
def getAndFilter(options): """ Retrieves the files listed within options['files'] and filters them based on the other options[...] Returns array of cleaned, filtered words. """ # Create the Filter class f = Filter(options) rtn = [] for file in options['files']: # Get the words, passing-in the callback filter() rtn += getFile(file, f.filter) return rtn
def test_getFile_moby(self): t0 = time() self.assertLess(200000, len(getFile('moby_test.txt'))) t1 = time() print('Moby import time sec:', t1 - t0)
def test_getFile_basic(self): self.assertEqual(words, getFile('small_test.txt'))
def getAndFilter(options): f = Filter(options) rtn = [] for file in options['files']: rtn += getFile(file, f.filter) return rtn
#!/usr/bin/env python import os import sys import re from getFile import getFile from getData import getData from toXls import toXls from getCalc import getCalc # file = open(filePath,'r') def display(): '''Display data in graph Not currently implemented ''' a = 1 if __name__=='__main__': path = getFile() data = getData(path) data = getCalc(data) toXls(data,path) display()
def test_getFile_(self): res = [ 'Fishhi', 'hathi', 'foohi', 'cowhi', 'Cowhi', 'siamese.hi', 'Wonderlandhi', 'foohi', 'toothpastehi', 'barhi' ] self.assertEqual(res, getFile('small_test.txt', on_word))
import getFile import re import os import random from time import sleep book_list = getFile.getFile() downloaded_book = open('DownloadedBook.txt', "a", encoding='utf8') def get_title(self): for line in self: if '<li class="title"><span><a>' in line: name = line.replace('<li class="title"><span><a>', '') name = name.replace('</a></span></li>', '') name = remove_characters(name) return name def remove_characters(self): name = self name = name.strip() name = name.replace(' ', '_') name = re.sub('[\/:*?"<>|]', '', name) name = name.replace("'", "") name = re.sub('[()]', '', name) return name def make_dir(self): if os.path.isdir(self): print(self, ' is already created.')
def getAndFilter(args): options = dealArgs(args) words = getFile(options['file']) arr = WalkArray(words, options) arr.filter() return arr.words
def test_getFile_basic(self): self.assertEqual(words, getFile('testfiles/small_test.txt', lambda a: a))
proxyPort = args.proxyPort proxy = (useProxy, proxyIP, proxyPort) print('开始扫描页面列表') twoListClass = get2list.getTwoList(args.url, config=args.config, proxy=proxy) twoListClass.setDeep(args.deep) video = twoListClass.getAllClassName() for i in video: print('开始获取%s页面列表' % (i)) twoListClass.setNowJod(i) twoListClass.scan() twoListClass.close() print('开始获取视频详情') getVideo = getvideoData.getvideoData(args.url, config=args.config, proxy=proxy) getVideo.scan() print('开始下载视频种子及图片') baseDir = anser['baseDir'] if baseDir[-1] != '/' and baseDir[-1] != '\\': baseDir = baseDir + '/' baseDir = baseDir + 'videoData/' down = getFile.getFile(args.url, baseWay=baseDir, config=args.config, proxy=proxy) down.down() print('运行结束')
def getAndFilter(options): f = Filter(options) these_words = getFile(options['file'], f.filter) return these_words