Exemplo n.º 1
0
def testParseDataWithNullData(getFixtures):
    try:
        parser = DataParser()
        parsedData = parser.parseData(getFixtures.id, 1, None)
        fail("parser.parseData should have thrown an error on null data")
    except Exception as ex:
        assert (type(ex) is ValueError)
Exemplo n.º 2
0
    def __init__(self, parameters, sc):

        parameters = json.loads(parameters)
        schema = parameters.get('schema', None)
        header = parameters.get('header', False)
        self._parser = DataParser(schema, header)
        self._sc = sc
Exemplo n.º 3
0
    def fillTableWithDataFromFile(self, fileName):
        # Parse data into booking class
        dataParser = DataParser(fileName)
        bookings = dataParser.GetAllBookings()

        # Show data in UI table
        self.dataTable.setRowCount(0)
        for row, booking in enumerate(bookings):
            self.dataTable.insertRow(row)

            dateItem = QtWidgets.QTableWidgetItem(booking.date)
            bookoingTypeItem = QtWidgets.QTableWidgetItem(booking.bookingType)
            nameItem = QtWidgets.QTableWidgetItem(booking.name)
            purposeItem = QtWidgets.QTableWidgetItem(booking.purpose)
            valueItem = QtWidgets.QTableWidgetItem(booking.value)

            valueItem.setTextAlignment(Qt.AlignRight)

            self.dataTable.setItem(row, 0, dateItem)
            self.dataTable.setItem(row, 1, bookoingTypeItem)
            self.dataTable.setItem(row, 2, nameItem)
            self.dataTable.setItem(row, 3, purposeItem)
            self.dataTable.setItem(row, 4, valueItem)

        self.dataTable.resizeColumnsToContents()
Exemplo n.º 4
0
def testParseDataWithMissingOffSet(getFixtures):
    try:
        parser = DataParser()
        parsedData = parser.parseData(getFixtures.id, None, getFixtures.data)
        fail("parser.parseData should have thrown an error on null offset")
    except Exception as ex:
        assert (type(ex) is ValueError)
 def testBasicFileParsing(self):
     dataParser = DataParser()
     dataParser.parseFile("/Users/LilyWU/Documents/PAMAP/PAMAP2_Dataset/Protocol/subject101.dat")
     # for sess in dataParser.sessions:
     #   #Session()=session
     #   for sample in sess[1]:
     #      print(1,sample.samples.hand.accX)
     print(dataParser.sessions[1].samples.hand.accX)
Exemplo n.º 6
0
def testParseDataWithNonJsonData(getFixtures):
    try:
        parser = DataParser()
        data = "this is not JSON"
        parsedData = parser.parseData(getFixtures.id, 1, data)
        fail("parser.parseData should have thrown an error on non JSON data")
    except Exception as ex:
        assert (type(ex) is ValueError)
Exemplo n.º 7
0
    def test_04_parser_parse_raw_data(self):
        input = "empid=D011\ngender=M\nage=29"
        parser = DataParser()
        parser.parse_raw_data(input)

        expected = [{'empid': 'D011', 'gender': 'M', 'age': '29'}]
        actual = parser.get_data()
        self.assertEqual(expected, actual)
Exemplo n.º 8
0
def testOffsetTimeGenerationWithInvalidArgs(getFixtures):
    parser = DataParser()
    try:

        strTimeNow = "2016-10-09T15:48:54"
        dtOffset = parser.getOffsetTime(strTimeNow, getFixtures.offset)
        fail("should have thrown exception b/c invalid timestamp")
    except Exception as ex:
        assert (type(ex) is ValueError)
Exemplo n.º 9
0
def testParseDataWithInvalidJsonData(getFixtures):
    try:
        parser = DataParser()
        data = {'foobar': 'goo'}
        parsedData = parser.parseData(getFixtures.id, 1, data)
        fail(
            "parser.parseData should have thrown an error on incorrect JSON data"
        )
    except Exception as ex:
        assert (type(ex) is KeyError)
Exemplo n.º 10
0
 def setUp(self):
     self.parser = DataParser()
     self.cmd_view = CmdView()
     self.file_reader = FileReader()
     self.validator = Validator()
     self.db = Database("test.db")
     self.vis = Visualiser()
     self.val = Validator()
     self.serial = Serializer()
     self.controller = Controller(self.cmd_view, self.file_reader,
                                  self.parser, self.validator, self.db,
                                  self.vis, self.serial)
     self.init()
Exemplo n.º 11
0
 def parse(self):
     struct = self.dictFromListBox(self.fields_list)
     struct["separator"] = self.tokenSeparator.get()
     parser = DataParser(struct)
     with open(self.logFilename.get(), 'r') as logfile:
         testline = logfile.readline()
         print(testline, struct)
         data = parser.parse_line(testline)
         if data == None:
             print(
                 "Could not match line structure to the log's [first] line")
             return None
     all_data = parser.parse_file(self.logFilename.get())
     self.generateStatistics(all_data)
Exemplo n.º 12
0
    def _get_segments(self, data: InputData) -> iter:
        """以标准数据的格式,读取数据流,返回数据段枚举"""
        if data.stream is None or not data.stream.readable():
            self._logger.error(
                "Data stream is None when trying to convert to standard Task: %s"
                % data._source)
            return

        succ = True
        try:
            for seg in DataParser.parse_standard_data(data.stream):
                # 获取到一个数据段
                seg: DataSeg = seg

                self._add_required_fields(seg, data)

                # 验证字段有效性
                if not self._validation_fields(seg, data):
                    succ = False
                    continue

                yield seg

        finally:
            if not succ:
                data.on_complete(False)
Exemplo n.º 13
0
    def _deal_data(self, data: InputData) -> bool:
        """
        处理数据的流程
        :param data:
        :return:
        """
        try:
            if data.stream is None or not data.stream.readable():
                self._logger.error(
                    f"Data stream is None when trying to convert to standard Task: {data._source}"
                )
                return False

            exten = data.extension
            for seg in DataParser.parse_standard_data(data.stream):
                # 获取到一个数据段
                data_fields = seg._fields
                if exten == self.o_suffix:
                    self.dispatch_to_client(data_fields)
                elif exten == self.c_suffix:
                    self.output_dns_data(data_fields, self.o_suffix,
                                         self._outputdir)
                    self._logger.info('Output dns_req result')
        except Exception as error:
            self._logger.error(f"Deal with dns data error, err:{error}")
        finally:
            data.on_complete()
Exemplo n.º 14
0
def generate_text(n: int, align: str = 'full') -> str:
    """
    Generate a fixed width text of n lines.
    :param n: number of lines of the text
    :param align: alignment style ie. "full", "left" or "right", default full
    :return: generated text
    """
    f = {
        'full': generate_line_full,
        'left': generate_line_left,
        'right': generate_line_right
    }[align]
    dp = DataParser.factory()
    text = ""
    if dp.include_header is True:
        for i in range(len(dp.column_names)):
            width = dp.offsets[i]
            name = dp.column_names[i]
            if align == "right":
                text += ' ' * (width - len(name)) + name
            else:
                text += name + ' ' * (width - len(name))
        text += '\n'
    for i in range(n - 1):
        text += f(dp.offsets) + '\n'
    text += f(dp.offsets)
    return text
Exemplo n.º 15
0
def query_img_info_func(next_page_href):
    print '------------------------------------------------------------------%d' % 1
    #thread = threading.Thread(target=thread_run,args=(resultJson,page))

    result = DataParser.parse_img_info(
        Downloader.fetch_next_page(next_page_href))
    #print result
    print '------------------------------------------------------------------%d' % 2
    return result
Exemplo n.º 16
0
 def query_img_info_by_content(img_path):
     result_json_list = []
     pool = ThreadPool(70)
     try:
         html = ContentDownloader.fetch_first_page(img_path)
         #print html
         img_list = DataParser.parse_img_info(html)
         #print img_list
         next_page_href_list = DataParser.parse_next_page_href(html)
         #print next_page_href_list
         result_json_list = pool.map(
             query_img_info_func, next_page_href_list)
         pool.close()
         pool.join()
         result_json_list = reduce(image_info_list_reduce, result_json_list)
         result_json_list = filter(image_info_list_filter, result_json_list)
         print 'Finish query.'
     except Exception, e:
         print e
Exemplo n.º 17
0
    def parseDataAndPersistIntoDb(self, db_filename):

        raw_data_filenames = [
            "PAMAP2_Dataset/Protocol/subject101.dat",
            "PAMAP2_Dataset/Protocol/subject102.dat",
            "PAMAP2_Dataset/Protocol/subject103.dat",
            "PAMAP2_Dataset/Protocol/subject104.dat",
            "PAMAP2_Dataset/Protocol/subject105.dat",
            "PAMAP2_Dataset/Protocol/subject106.dat",
            "PAMAP2_Dataset/Protocol/subject107.dat",
            "PAMAP2_Dataset/Protocol/subject108.dat",
            "PAMAP2_Dataset/Protocol/subject109.dat"
        ]



        for (index, dataFilename) in enumerate(raw_data_filenames):
            dataParser = DataParser()
            dataParser.parseFile(dataFilename)
            self.__persistDataParserIntoDb(db_filename, dataParser, index)
Exemplo n.º 18
0
def testParsedDataWithValidArgs(getFixtures):
    try:
        parser = DataParser()
        parsedData = parser.parseData(getFixtures.id, getFixtures.offset,
                                      getFixtures.data)
        assert (parsedData != None)
        #logging.debug(parsedData)

        assert (len(parsedData) > 0)

        for data in parsedData:
            assert data['id'] != None
            assert data['time'] != None
            assert data['heartRate'] != None
            assert data['coordinates'] != None

    except Exception as ex:
        logging.debug(str(ex))
        fail("should not have an exception when parsedData has valid input"
             )  # cause a failure
Exemplo n.º 19
0
 def setUp(self):
     self.parser = DataParser()
     self.cmd_view = CmdView()
     self.file_view = FileView()
     self.validator = Validator()
     self.db = DatabaseView("test.db")
     self.vis = Visualiser()
     # self.val = Validator()
     self.controller = Controller(self.cmd_view, self.file_view,
                                  self.parser, self.validator, self.db,
                                  self.vis)
Exemplo n.º 20
0
    def test_10_controller_validate_fail(self):
        self.controller = Controller(self.cmd_view, self.file_reader,
                                     DataParser(), self.validator, self.db,
                                     self.vis, self.serial)
        captured_output = io.StringIO()
        sys.stdout = captured_output
        self.controller.validate()

        expected = "* No data has been read.\n-- Type 'help get' for more details.\n"
        actual = captured_output.getvalue()

        sys.stdout = sys.__stdout__
        self.assertEqual(expected, actual)
Exemplo n.º 21
0
def testOffsetTimeGeneration(getFixtures):
    try:
        parser = DataParser()

        strTimeNow = "2016-10-09T15:48:54Z"
        dtOffset = parser.getOffsetTime(strTimeNow, getFixtures.offset)
        assert (dtOffset.second == 59)

        strTimeNow = "2016-10-09T15:48:55Z"
        dtOffset = parser.getOffsetTime(strTimeNow, getFixtures.offset)
        assert (dtOffset.second == 0)
        assert (dtOffset.minute == 49)

        strTimeNow = "2016-10-09T15:48:56Z"
        dtOffset = parser.getOffsetTime(strTimeNow, getFixtures.offset)
        assert (dtOffset.second == 1)
        assert (dtOffset.minute == 49)

    except Exception as ex:
        logging.debug(str(ex))
        fail("should not have an exception when parsing valid time and offset"
             )  # cause a failure
Exemplo n.º 22
0
    def _convert(self, data: InputData) -> iter:
        """将中心下发的任务转换为自有的通用任务结构Task体枚举(一个文件可能有多个任务段)"""
        succ = True
        try:
            if data.stream is None or not data.stream.readable():
                self._logger.error(
                    "Data stream is None when trying to convert to standard Task: %s"
                    % data._source)
                succ = False
                return

            for seg in DataParser.parse_standard_data(data.stream):
                if seg is None or len(seg._fields) < 1:
                    continue
                try:
                    # 必要字段
                    self._add_required_fields(seg, data)

                    # 根据host拿apptype
                    if not seg.contains_key("apptype"):
                        apptype = self._get_apptype(seg._fields, data)
                        if not apptype is None:
                            seg.append_to_fields('apptype', apptype)

                    # 验证字段有效性
                    if not self._validation_fields(seg, data):
                        succ = False
                        continue

                    tsk: Task = Task(seg._fields)
                    tsk.segindex = seg.segindex
                    tsk.segline = seg.segline

                    if tsk is None:
                        continue

                    yield tsk

                except Exception:
                    succ = False
                    self._logger.error(
                        "Generate Task from dic fields error:\ndata:%s\nex:%s"
                        % (data._source, traceback.format_exc()))

        except Exception:
            succ = False
            self._logger.error("Convert data to Task error:\ndata:%s\nex:%s" %
                               (data._source, traceback.format_exc()))
        finally:
            if not succ and not data is None:
                data.on_complete(False)
Exemplo n.º 23
0
def make_person_content(info, page = 1, page_count = 1):
    from template import TEngine
    start = int(PAGESIZE) * (int(page)  - 1) + 1 
    page_info = info[start:start+PAGESIZE]
    ret = ""
    for item in page_info:
        parser = DataParser(item['path'] + "/profile.txt")
        person_info = parser.parse()
        context = person_info
        context['username'] = person_info['username']
        context['server_addr'] = WEBDIR
        context['now_page']    = page
        context['up_page']     = [str(int(page) - 1)]
        context['down_page']   = [str(int(page) + 1)]
        context['pagecount']   = page_count
        context['photo']=get_random_person_img(item, person_info)
        #context = dict(username=person_info['username'],server_addr=WEBDIR,name=person_info['name'],gender=person_info['gender'], photo=get_random_person_img(item, person_info))
        engine = TEngine("person_sub.html", context, False) 
        engine.parse()
        html = engine.content
        ret += html

    return ret
Exemplo n.º 24
0
    def _bcp_deal(self, bcpfi: str, data: InputData) -> iter:
        """读取bcp文件行,构建task任务"""
        try:
            segindex = 0
            segline = 0
            succ = True
            with open(bcpfi, 'r', encoding=self._enc) as fs:
                for seg in DataParser.parse_bcp_data(fs):
                    try:
                        seg: DataSeg = seg
                        # 必要字段
                        self._add_required_fields(seg, data)

                        # 根据host拿apptype
                        if not seg.contains_key("apptype"):
                            apptype = self._get_apptype(seg._fields, data)
                            if not apptype is None:
                                seg.append_to_fields('apptype', apptype)

                        # 验证字段有效性
                        if not self._validation_fields(seg, data):
                            succ = False
                            continue

                        task: Task = Task(seg._fields)
                        task.segindex = segindex
                        task.segline = segline
                        segline += 1
                        segindex += 1
                        yield task
                    except Exception as ex:
                        succ = False
                        self._logger.error(
                            "Parse one line in bcp file error:\ndata:%s\nerror:%s"
                            % (bcpfi, ex))

        except Exception:
            succ = False
            self._logger.error("Deal bcp file error:\nfile:%s\nerror:%s" %
                               (bcpfi, traceback.format_exc()))
        finally:
            if not succ:
                data.on_complete(False)

        return
Exemplo n.º 25
0
    def _convert(self, data: InputData) -> iter:
        """读取数据,返回数据段的字典迭代器"""
        try:
            if data.stream is None or not data.stream.readable():
                self._logger.error(
                    "Data stream is None when trying to convert to standard Task: %s"
                    % data._source)
                return

            for dicseg in DataParser.parse_standard_data(data.stream):
                if dicseg is None or len(dicseg._fields) < 1:
                    continue
                yield dicseg

        except Exception:
            self._logger.error("Convert data to Task error:\ndata:%s\nex:%s" %
                               (data._source, traceback.format_exc()))
            if not data is None:
                data.on_complete(False)
Exemplo n.º 26
0
 def _parse_data_back(self, data: InputData) -> iter:
     """"""
     try:
         for seg in DataParser.parse_standard_data(data.stream):
             seg: DataSeg = seg
             try:
                 tb: IscanTaskBack = IscanTaskBack.create_from_dataseg(
                     seg, data._platform)
                 tb.inputdata = data
                 yield tb
             except Exception:
                 self._logger.error(
                     "Parse one data segment error:\ndata:{}\nsegindex:{}\nerror:{}"
                     .format(data._source, seg.segindex,
                             traceback.format_exc()))
                 # 解析数据时,只要出错一个数据段,就算作错误数据
                 data.on_complete(False)
     except Exception:
         self._logger.error(
             "Parse TaskBatchBack data error:\ndata:{}\nerror:{}".format(
                 data._source, traceback.format_exc()))
Exemplo n.º 27
0
class LogisticRegression:
    def __init__(self, parameters, sc):

        parameters = json.loads(parameters)
        schema = parameters.get('schema', None)
        header = parameters.get('header', False)
        self._parser = DataParser(schema, header)
        self._sc = sc

    def predict(self, input_data):
        return self._model.predict(input_data)

    def train(self, input_data, parameters):
        iterations = parameters.get('iterations', None)
        weights = parameters.get('weights', None)
        intercept = parameters.get('intercept', None)
        numFeatures = parameters.get('numFeatures', None)
        numClasses = parameters.get('numClasses', None)
        data = self._sc.parallelize(self._parser.parse(input_data))
        self._model = LogisticRegressionWithLBFGS.train(data,\
         iterations=iterations,\
         numClasses=numClasses)
Exemplo n.º 28
0
 def test_01_parser_to_list(self):
     expected = ['empid=D011', 'gender=M', 'age=29']
     actual = DataParser()._to_list("empid=D011\ngender=M\nage=29")
     self.assertEqual(expected, actual)
Exemplo n.º 29
0
__author__ = 'Radim Spigel'
__version__ = '1.0'
import sys
from dataparser import print_help, DataParser
from qtgui import qt_main


if __name__ == "__main__":
    if len(sys.argv) > 1:
        print sys.argv
        if '-h' in sys.argv:
            print_help()
            sys.exit()
        datagetter = DataParser(sys.argv[1])
        datagetter.from_command_line(sys.argv[1:])
    elif len(sys.argv) > 6:
        print_help()
    else:
        qt_main()
Exemplo n.º 30
0
 def analyze(self):
     if self.fname is None:
         print "File is not setted."
         return
     datagetter = DataParser(self.fname[0])
     if self.regexTextField.toPlainText() is None:
         print "Regexp is not setted."
         return
     datagetter.init_regex(self.regexTextField.toPlainText())
     datagetter.filled_data()
     if self.allowStatistics.isChecked():
         datagetter.print_statistics()
     if self.csvReport.isChecked():
         datagetter.save_to_csv()
     if self.allowGraphs.isChecked():
         datagetter.print_graphs(self.separateGraphs.isChecked())
Exemplo n.º 31
0
'''Produce a list of co-authors of each author in the given input file. '''

from dataparser import DataParser


def get_co_authors(author):
    global researchpapers

    co_authors = set({})

    for paper in researchpapers:
        if author in paper['author']:
            co_authors.update(paper['author'])

    co_authors.remove(author)
    return co_authors


researchpapers = DataParser.readandparsefile('citations.txt')

for paper in researchpapers:
    for author in paper['author']:
        co_authors = get_co_authors(author)
        if len(co_authors) > 0:
            string = ', '.join(co_authors)
            print(author + ' -> ' + string)
Exemplo n.º 32
0
 def test_02_parser_to_dict(self):
     expected = {'empid': 'D011', 'gender': 'M', 'age': '29'}
     actual = DataParser()._to_dict(['empid=D011', 'gender=M', 'age=29'])
     self.assertEqual(expected, actual)
Exemplo n.º 33
0
 def analyze(self):
     if self.fname is None:
         print "File is not setted."
         return
     datagetter = DataParser(self.fname[0])
     if self.regexTextField.toPlainText() is None:
         print "Regexp is not setted."
         return
     datagetter.init_regex(self.regexTextField.toPlainText())
     datagetter.filled_data()
     if self.allowStatistics.isChecked():
         datagetter.print_statistics()
     if self.csvReport.isChecked():
         datagetter.save_to_csv()
     if self.allowGraphs.isChecked():
         datagetter.print_graphs(self.separateGraphs.isChecked())
Exemplo n.º 34
0
"""
Routes and views for the flask application.
"""

from datetime import datetime
from flask import render_template, request
from ProteinDB import app
from dataparser import DataParser

dataparser = DataParser()
dataparser.read_from_csv()


@app.route('/')
@app.route('/home')
def home():
    """Renders the home page."""
    return render_template(
        'index.html',
        title='Home',
        year=datetime.now().year,
        unique_pathologies = dataparser.unique_pathologies,
        unique_biofluids = dataparser.unique_biofluids,
    )

@app.route("/searchprotein")
def query_protein():
    """Renders the results of the query"""

    table, link = dataparser.search_protein(request.args["proteinName"])
    return render_template(
if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option("-d", "--dataset", dest="db_type", default="berlin")
    parser.add_option("-p", "--dataset_path", dest="path", default="")

    (options, args) = parser.parse_args(sys.argv)

    db_type = options.db_type
    path = options.path

    print("Loading data from " + db_type + " dataset...")
    if db_type not in ('berlin'):
        sys.exit("Dataset not registered. Please create a method to read it")

    db = DataParser(path, db_type)

    # k_folds = len(db.test_sets)
    # splits = zip(db.train_sets, db.test_sets)

    callback_list = [
        EarlyStopping(
            monitor='acc',
            patience=1,
            verbose=1
        ),
        ModelCheckpoint(
            filepath='cnnlstm_model.h5',
            monitor='val_loss',
            save_best_only='True'
        )
Exemplo n.º 36
0
# Instantiate models
model1 = BCNN()
model2 = TCNN()

# Load models from files
model1.load_state_dict(torch.load("./bcnn_model.pt"))
model2.load_state_dict(torch.load("./tcnn_model.pt"))
model1 = model1.cuda()
model2 = model2.cuda()

# Set to eval mode
model1.eval()
model2.eval()

# Load Data from testing set
testset = DataParser('04')
testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=True)

total = 0
err_x = 0
err_z = 0
err_t = 0

# Run for all testing data
for counter, d in enumerate(testloader, 0):
    dtype = torch.cuda.FloatTensor
    x1 = d["img_l1"].type(dtype)
    x2 = d["img_l2"].type(dtype)
    yx = d["dx"].type(dtype)
    yz = d["dz"].type(dtype)
    yt = d["dth"].type(dtype)
Exemplo n.º 37
0
def get_logs():
    dp = DataParser(constants.log_file_path)
    mentions_by_ticker = dp.mentions_by_ticker()
Exemplo n.º 38
0
 def test_03_parser_scrub_db_list(self):
     expected = [14, 25]
     actual = DataParser().scrub_db_list([(14, ), (25, )])
     self.assertEqual(expected, actual)
Exemplo n.º 39
0
Arquivo: mlp.py Projeto: hvy/mlp-demo
import numpy as np
from numpy import genfromtxt

# Helper module to read data from CSV
from dataparser import DataParser

def target_function(x):
    """
    The function that we want to approximate using a simple Multi-Layered Perceptron, MLP
    y = 2 * x + 8
    """
    x_double = np.multiply(2, x)
    return np.add(x_double, 8)

# Read the training and test data from CSV files
csv_parser = DataParser()
x_train, y_train = csv_parser.parse("data/linear_training.csv", delimiter=",")
x_test, y_test = csv_parser.parse("data/linear_test.csv", delimiter=",")

# Network parameters
n_units = 10

# Training parameters
n_epochs = 70
# batchsize = np.size(x_train)
batchsize = np.size(x_train)

# The size of the training data
datasize = np.size(x_train)

# Define the linear network model with 1 input unit and 1 output unit