def testParseDataWithNullData(getFixtures): try: parser = DataParser() parsedData = parser.parseData(getFixtures.id, 1, None) fail("parser.parseData should have thrown an error on null data") except Exception as ex: assert (type(ex) is ValueError)
def __init__(self, parameters, sc): parameters = json.loads(parameters) schema = parameters.get('schema', None) header = parameters.get('header', False) self._parser = DataParser(schema, header) self._sc = sc
def fillTableWithDataFromFile(self, fileName): # Parse data into booking class dataParser = DataParser(fileName) bookings = dataParser.GetAllBookings() # Show data in UI table self.dataTable.setRowCount(0) for row, booking in enumerate(bookings): self.dataTable.insertRow(row) dateItem = QtWidgets.QTableWidgetItem(booking.date) bookoingTypeItem = QtWidgets.QTableWidgetItem(booking.bookingType) nameItem = QtWidgets.QTableWidgetItem(booking.name) purposeItem = QtWidgets.QTableWidgetItem(booking.purpose) valueItem = QtWidgets.QTableWidgetItem(booking.value) valueItem.setTextAlignment(Qt.AlignRight) self.dataTable.setItem(row, 0, dateItem) self.dataTable.setItem(row, 1, bookoingTypeItem) self.dataTable.setItem(row, 2, nameItem) self.dataTable.setItem(row, 3, purposeItem) self.dataTable.setItem(row, 4, valueItem) self.dataTable.resizeColumnsToContents()
def testParseDataWithMissingOffSet(getFixtures): try: parser = DataParser() parsedData = parser.parseData(getFixtures.id, None, getFixtures.data) fail("parser.parseData should have thrown an error on null offset") except Exception as ex: assert (type(ex) is ValueError)
def testBasicFileParsing(self): dataParser = DataParser() dataParser.parseFile("/Users/LilyWU/Documents/PAMAP/PAMAP2_Dataset/Protocol/subject101.dat") # for sess in dataParser.sessions: # #Session()=session # for sample in sess[1]: # print(1,sample.samples.hand.accX) print(dataParser.sessions[1].samples.hand.accX)
def testParseDataWithNonJsonData(getFixtures): try: parser = DataParser() data = "this is not JSON" parsedData = parser.parseData(getFixtures.id, 1, data) fail("parser.parseData should have thrown an error on non JSON data") except Exception as ex: assert (type(ex) is ValueError)
def test_04_parser_parse_raw_data(self): input = "empid=D011\ngender=M\nage=29" parser = DataParser() parser.parse_raw_data(input) expected = [{'empid': 'D011', 'gender': 'M', 'age': '29'}] actual = parser.get_data() self.assertEqual(expected, actual)
def testOffsetTimeGenerationWithInvalidArgs(getFixtures): parser = DataParser() try: strTimeNow = "2016-10-09T15:48:54" dtOffset = parser.getOffsetTime(strTimeNow, getFixtures.offset) fail("should have thrown exception b/c invalid timestamp") except Exception as ex: assert (type(ex) is ValueError)
def testParseDataWithInvalidJsonData(getFixtures): try: parser = DataParser() data = {'foobar': 'goo'} parsedData = parser.parseData(getFixtures.id, 1, data) fail( "parser.parseData should have thrown an error on incorrect JSON data" ) except Exception as ex: assert (type(ex) is KeyError)
def setUp(self): self.parser = DataParser() self.cmd_view = CmdView() self.file_reader = FileReader() self.validator = Validator() self.db = Database("test.db") self.vis = Visualiser() self.val = Validator() self.serial = Serializer() self.controller = Controller(self.cmd_view, self.file_reader, self.parser, self.validator, self.db, self.vis, self.serial) self.init()
def parse(self): struct = self.dictFromListBox(self.fields_list) struct["separator"] = self.tokenSeparator.get() parser = DataParser(struct) with open(self.logFilename.get(), 'r') as logfile: testline = logfile.readline() print(testline, struct) data = parser.parse_line(testline) if data == None: print( "Could not match line structure to the log's [first] line") return None all_data = parser.parse_file(self.logFilename.get()) self.generateStatistics(all_data)
def _get_segments(self, data: InputData) -> iter: """以标准数据的格式,读取数据流,返回数据段枚举""" if data.stream is None or not data.stream.readable(): self._logger.error( "Data stream is None when trying to convert to standard Task: %s" % data._source) return succ = True try: for seg in DataParser.parse_standard_data(data.stream): # 获取到一个数据段 seg: DataSeg = seg self._add_required_fields(seg, data) # 验证字段有效性 if not self._validation_fields(seg, data): succ = False continue yield seg finally: if not succ: data.on_complete(False)
def _deal_data(self, data: InputData) -> bool: """ 处理数据的流程 :param data: :return: """ try: if data.stream is None or not data.stream.readable(): self._logger.error( f"Data stream is None when trying to convert to standard Task: {data._source}" ) return False exten = data.extension for seg in DataParser.parse_standard_data(data.stream): # 获取到一个数据段 data_fields = seg._fields if exten == self.o_suffix: self.dispatch_to_client(data_fields) elif exten == self.c_suffix: self.output_dns_data(data_fields, self.o_suffix, self._outputdir) self._logger.info('Output dns_req result') except Exception as error: self._logger.error(f"Deal with dns data error, err:{error}") finally: data.on_complete()
def generate_text(n: int, align: str = 'full') -> str: """ Generate a fixed width text of n lines. :param n: number of lines of the text :param align: alignment style ie. "full", "left" or "right", default full :return: generated text """ f = { 'full': generate_line_full, 'left': generate_line_left, 'right': generate_line_right }[align] dp = DataParser.factory() text = "" if dp.include_header is True: for i in range(len(dp.column_names)): width = dp.offsets[i] name = dp.column_names[i] if align == "right": text += ' ' * (width - len(name)) + name else: text += name + ' ' * (width - len(name)) text += '\n' for i in range(n - 1): text += f(dp.offsets) + '\n' text += f(dp.offsets) return text
def query_img_info_func(next_page_href): print '------------------------------------------------------------------%d' % 1 #thread = threading.Thread(target=thread_run,args=(resultJson,page)) result = DataParser.parse_img_info( Downloader.fetch_next_page(next_page_href)) #print result print '------------------------------------------------------------------%d' % 2 return result
def query_img_info_by_content(img_path): result_json_list = [] pool = ThreadPool(70) try: html = ContentDownloader.fetch_first_page(img_path) #print html img_list = DataParser.parse_img_info(html) #print img_list next_page_href_list = DataParser.parse_next_page_href(html) #print next_page_href_list result_json_list = pool.map( query_img_info_func, next_page_href_list) pool.close() pool.join() result_json_list = reduce(image_info_list_reduce, result_json_list) result_json_list = filter(image_info_list_filter, result_json_list) print 'Finish query.' except Exception, e: print e
def parseDataAndPersistIntoDb(self, db_filename): raw_data_filenames = [ "PAMAP2_Dataset/Protocol/subject101.dat", "PAMAP2_Dataset/Protocol/subject102.dat", "PAMAP2_Dataset/Protocol/subject103.dat", "PAMAP2_Dataset/Protocol/subject104.dat", "PAMAP2_Dataset/Protocol/subject105.dat", "PAMAP2_Dataset/Protocol/subject106.dat", "PAMAP2_Dataset/Protocol/subject107.dat", "PAMAP2_Dataset/Protocol/subject108.dat", "PAMAP2_Dataset/Protocol/subject109.dat" ] for (index, dataFilename) in enumerate(raw_data_filenames): dataParser = DataParser() dataParser.parseFile(dataFilename) self.__persistDataParserIntoDb(db_filename, dataParser, index)
def testParsedDataWithValidArgs(getFixtures): try: parser = DataParser() parsedData = parser.parseData(getFixtures.id, getFixtures.offset, getFixtures.data) assert (parsedData != None) #logging.debug(parsedData) assert (len(parsedData) > 0) for data in parsedData: assert data['id'] != None assert data['time'] != None assert data['heartRate'] != None assert data['coordinates'] != None except Exception as ex: logging.debug(str(ex)) fail("should not have an exception when parsedData has valid input" ) # cause a failure
def setUp(self): self.parser = DataParser() self.cmd_view = CmdView() self.file_view = FileView() self.validator = Validator() self.db = DatabaseView("test.db") self.vis = Visualiser() # self.val = Validator() self.controller = Controller(self.cmd_view, self.file_view, self.parser, self.validator, self.db, self.vis)
def test_10_controller_validate_fail(self): self.controller = Controller(self.cmd_view, self.file_reader, DataParser(), self.validator, self.db, self.vis, self.serial) captured_output = io.StringIO() sys.stdout = captured_output self.controller.validate() expected = "* No data has been read.\n-- Type 'help get' for more details.\n" actual = captured_output.getvalue() sys.stdout = sys.__stdout__ self.assertEqual(expected, actual)
def testOffsetTimeGeneration(getFixtures): try: parser = DataParser() strTimeNow = "2016-10-09T15:48:54Z" dtOffset = parser.getOffsetTime(strTimeNow, getFixtures.offset) assert (dtOffset.second == 59) strTimeNow = "2016-10-09T15:48:55Z" dtOffset = parser.getOffsetTime(strTimeNow, getFixtures.offset) assert (dtOffset.second == 0) assert (dtOffset.minute == 49) strTimeNow = "2016-10-09T15:48:56Z" dtOffset = parser.getOffsetTime(strTimeNow, getFixtures.offset) assert (dtOffset.second == 1) assert (dtOffset.minute == 49) except Exception as ex: logging.debug(str(ex)) fail("should not have an exception when parsing valid time and offset" ) # cause a failure
def _convert(self, data: InputData) -> iter: """将中心下发的任务转换为自有的通用任务结构Task体枚举(一个文件可能有多个任务段)""" succ = True try: if data.stream is None or not data.stream.readable(): self._logger.error( "Data stream is None when trying to convert to standard Task: %s" % data._source) succ = False return for seg in DataParser.parse_standard_data(data.stream): if seg is None or len(seg._fields) < 1: continue try: # 必要字段 self._add_required_fields(seg, data) # 根据host拿apptype if not seg.contains_key("apptype"): apptype = self._get_apptype(seg._fields, data) if not apptype is None: seg.append_to_fields('apptype', apptype) # 验证字段有效性 if not self._validation_fields(seg, data): succ = False continue tsk: Task = Task(seg._fields) tsk.segindex = seg.segindex tsk.segline = seg.segline if tsk is None: continue yield tsk except Exception: succ = False self._logger.error( "Generate Task from dic fields error:\ndata:%s\nex:%s" % (data._source, traceback.format_exc())) except Exception: succ = False self._logger.error("Convert data to Task error:\ndata:%s\nex:%s" % (data._source, traceback.format_exc())) finally: if not succ and not data is None: data.on_complete(False)
def make_person_content(info, page = 1, page_count = 1): from template import TEngine start = int(PAGESIZE) * (int(page) - 1) + 1 page_info = info[start:start+PAGESIZE] ret = "" for item in page_info: parser = DataParser(item['path'] + "/profile.txt") person_info = parser.parse() context = person_info context['username'] = person_info['username'] context['server_addr'] = WEBDIR context['now_page'] = page context['up_page'] = [str(int(page) - 1)] context['down_page'] = [str(int(page) + 1)] context['pagecount'] = page_count context['photo']=get_random_person_img(item, person_info) #context = dict(username=person_info['username'],server_addr=WEBDIR,name=person_info['name'],gender=person_info['gender'], photo=get_random_person_img(item, person_info)) engine = TEngine("person_sub.html", context, False) engine.parse() html = engine.content ret += html return ret
def _bcp_deal(self, bcpfi: str, data: InputData) -> iter: """读取bcp文件行,构建task任务""" try: segindex = 0 segline = 0 succ = True with open(bcpfi, 'r', encoding=self._enc) as fs: for seg in DataParser.parse_bcp_data(fs): try: seg: DataSeg = seg # 必要字段 self._add_required_fields(seg, data) # 根据host拿apptype if not seg.contains_key("apptype"): apptype = self._get_apptype(seg._fields, data) if not apptype is None: seg.append_to_fields('apptype', apptype) # 验证字段有效性 if not self._validation_fields(seg, data): succ = False continue task: Task = Task(seg._fields) task.segindex = segindex task.segline = segline segline += 1 segindex += 1 yield task except Exception as ex: succ = False self._logger.error( "Parse one line in bcp file error:\ndata:%s\nerror:%s" % (bcpfi, ex)) except Exception: succ = False self._logger.error("Deal bcp file error:\nfile:%s\nerror:%s" % (bcpfi, traceback.format_exc())) finally: if not succ: data.on_complete(False) return
def _convert(self, data: InputData) -> iter: """读取数据,返回数据段的字典迭代器""" try: if data.stream is None or not data.stream.readable(): self._logger.error( "Data stream is None when trying to convert to standard Task: %s" % data._source) return for dicseg in DataParser.parse_standard_data(data.stream): if dicseg is None or len(dicseg._fields) < 1: continue yield dicseg except Exception: self._logger.error("Convert data to Task error:\ndata:%s\nex:%s" % (data._source, traceback.format_exc())) if not data is None: data.on_complete(False)
def _parse_data_back(self, data: InputData) -> iter: """""" try: for seg in DataParser.parse_standard_data(data.stream): seg: DataSeg = seg try: tb: IscanTaskBack = IscanTaskBack.create_from_dataseg( seg, data._platform) tb.inputdata = data yield tb except Exception: self._logger.error( "Parse one data segment error:\ndata:{}\nsegindex:{}\nerror:{}" .format(data._source, seg.segindex, traceback.format_exc())) # 解析数据时,只要出错一个数据段,就算作错误数据 data.on_complete(False) except Exception: self._logger.error( "Parse TaskBatchBack data error:\ndata:{}\nerror:{}".format( data._source, traceback.format_exc()))
class LogisticRegression: def __init__(self, parameters, sc): parameters = json.loads(parameters) schema = parameters.get('schema', None) header = parameters.get('header', False) self._parser = DataParser(schema, header) self._sc = sc def predict(self, input_data): return self._model.predict(input_data) def train(self, input_data, parameters): iterations = parameters.get('iterations', None) weights = parameters.get('weights', None) intercept = parameters.get('intercept', None) numFeatures = parameters.get('numFeatures', None) numClasses = parameters.get('numClasses', None) data = self._sc.parallelize(self._parser.parse(input_data)) self._model = LogisticRegressionWithLBFGS.train(data,\ iterations=iterations,\ numClasses=numClasses)
def test_01_parser_to_list(self): expected = ['empid=D011', 'gender=M', 'age=29'] actual = DataParser()._to_list("empid=D011\ngender=M\nage=29") self.assertEqual(expected, actual)
__author__ = 'Radim Spigel' __version__ = '1.0' import sys from dataparser import print_help, DataParser from qtgui import qt_main if __name__ == "__main__": if len(sys.argv) > 1: print sys.argv if '-h' in sys.argv: print_help() sys.exit() datagetter = DataParser(sys.argv[1]) datagetter.from_command_line(sys.argv[1:]) elif len(sys.argv) > 6: print_help() else: qt_main()
def analyze(self): if self.fname is None: print "File is not setted." return datagetter = DataParser(self.fname[0]) if self.regexTextField.toPlainText() is None: print "Regexp is not setted." return datagetter.init_regex(self.regexTextField.toPlainText()) datagetter.filled_data() if self.allowStatistics.isChecked(): datagetter.print_statistics() if self.csvReport.isChecked(): datagetter.save_to_csv() if self.allowGraphs.isChecked(): datagetter.print_graphs(self.separateGraphs.isChecked())
'''Produce a list of co-authors of each author in the given input file. ''' from dataparser import DataParser def get_co_authors(author): global researchpapers co_authors = set({}) for paper in researchpapers: if author in paper['author']: co_authors.update(paper['author']) co_authors.remove(author) return co_authors researchpapers = DataParser.readandparsefile('citations.txt') for paper in researchpapers: for author in paper['author']: co_authors = get_co_authors(author) if len(co_authors) > 0: string = ', '.join(co_authors) print(author + ' -> ' + string)
def test_02_parser_to_dict(self): expected = {'empid': 'D011', 'gender': 'M', 'age': '29'} actual = DataParser()._to_dict(['empid=D011', 'gender=M', 'age=29']) self.assertEqual(expected, actual)
""" Routes and views for the flask application. """ from datetime import datetime from flask import render_template, request from ProteinDB import app from dataparser import DataParser dataparser = DataParser() dataparser.read_from_csv() @app.route('/') @app.route('/home') def home(): """Renders the home page.""" return render_template( 'index.html', title='Home', year=datetime.now().year, unique_pathologies = dataparser.unique_pathologies, unique_biofluids = dataparser.unique_biofluids, ) @app.route("/searchprotein") def query_protein(): """Renders the results of the query""" table, link = dataparser.search_protein(request.args["proteinName"]) return render_template(
if __name__ == '__main__': parser = OptionParser() parser.add_option("-d", "--dataset", dest="db_type", default="berlin") parser.add_option("-p", "--dataset_path", dest="path", default="") (options, args) = parser.parse_args(sys.argv) db_type = options.db_type path = options.path print("Loading data from " + db_type + " dataset...") if db_type not in ('berlin'): sys.exit("Dataset not registered. Please create a method to read it") db = DataParser(path, db_type) # k_folds = len(db.test_sets) # splits = zip(db.train_sets, db.test_sets) callback_list = [ EarlyStopping( monitor='acc', patience=1, verbose=1 ), ModelCheckpoint( filepath='cnnlstm_model.h5', monitor='val_loss', save_best_only='True' )
# Instantiate models model1 = BCNN() model2 = TCNN() # Load models from files model1.load_state_dict(torch.load("./bcnn_model.pt")) model2.load_state_dict(torch.load("./tcnn_model.pt")) model1 = model1.cuda() model2 = model2.cuda() # Set to eval mode model1.eval() model2.eval() # Load Data from testing set testset = DataParser('04') testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=True) total = 0 err_x = 0 err_z = 0 err_t = 0 # Run for all testing data for counter, d in enumerate(testloader, 0): dtype = torch.cuda.FloatTensor x1 = d["img_l1"].type(dtype) x2 = d["img_l2"].type(dtype) yx = d["dx"].type(dtype) yz = d["dz"].type(dtype) yt = d["dth"].type(dtype)
def get_logs(): dp = DataParser(constants.log_file_path) mentions_by_ticker = dp.mentions_by_ticker()
def test_03_parser_scrub_db_list(self): expected = [14, 25] actual = DataParser().scrub_db_list([(14, ), (25, )]) self.assertEqual(expected, actual)
import numpy as np from numpy import genfromtxt # Helper module to read data from CSV from dataparser import DataParser def target_function(x): """ The function that we want to approximate using a simple Multi-Layered Perceptron, MLP y = 2 * x + 8 """ x_double = np.multiply(2, x) return np.add(x_double, 8) # Read the training and test data from CSV files csv_parser = DataParser() x_train, y_train = csv_parser.parse("data/linear_training.csv", delimiter=",") x_test, y_test = csv_parser.parse("data/linear_test.csv", delimiter=",") # Network parameters n_units = 10 # Training parameters n_epochs = 70 # batchsize = np.size(x_train) batchsize = np.size(x_train) # The size of the training data datasize = np.size(x_train) # Define the linear network model with 1 input unit and 1 output unit