Exemplo n.º 1
0
 def test_json2(self):
     self.jsonpath = "test/test.json"
     self.json = json.load(open(self.jsonpath, "r"))
     fv = []
     for d in self.json:
         fe = DocumentFeatures(d, parse=False)
         fe.pipeline()
         logging.debug(pformat(fe.features))
         fv.append(fe.features)
     raise Exception
Exemplo n.º 2
0
 def test_json2(self):
     self.jsonpath = "test/test.json"
     self.json = json.load(open(self.jsonpath, "r"))
     fv = []
     for d in self.json:
         fe = DocumentFeatures(d, parse=False)
         fe.pipeline()
         logging.debug(pformat(fe.features))
         fv.append(fe.features)
     raise Exception
Exemplo n.º 3
0
 def test_json(self):
     self.json = json.load(open(self.jsonpath, "r"))
     fv = []
     for d in self.json[0:5]:
         fe = DocumentFeatures(d, parse=True)
         fe.pipeline()
         logging.debug(pformat(fe.features))
         fv.append(fe.features)
     # vec = DictVectorizer(sparse=True)
     # array_f = vec.fit_transform(fv).toarray()
     # logging.debug(pformat(array_f))
     raise Exception
Exemplo n.º 4
0
 def test_json(self):
     self.json = json.load(open(self.jsonpath, "r"))
     fv = []
     for d in self.json[0:5]:
         fe = DocumentFeatures(d, parse=True)
         fe.pipeline()
         logging.debug(pformat(fe.features))
         fv.append(fe.features)
     # vec = DictVectorizer(sparse=True)
     # array_f = vec.fit_transform(fv).toarray()
     # logging.debug(pformat(array_f))
     raise Exception
Exemplo n.º 5
0
def process_each_cat(cat=None, docs=[]):
    """
    Parameters
    ----------
    cat: integer
        low=0, mid=1, high=2
    docs: list
        [[`doc1`],...]

    Returns
    -------
    _f: list of dictionaries 
        feature vectors 
    _s: list of integers
        scores
    """
    assignment = [20, 50, 90]
    _f = []
    _s = []
    _p = []
    widgets = [
        'Extracting features... for category {}: done for '.format(cat),
        progressbar.Counter(), ' doc(s), (',
        progressbar.Timer(), ')'
    ]
    pbar = progressbar.ProgressBar(widgets=widgets, maxval=len(docs)).start()
    for n, d in enumerate(docs):
        fe = DocumentFeatures(d, parse=False)
        fe.pipeline()
        _f.append(fe.features)
        _p.append(fe.doc)
        b = assignment[cat]
        # s = randrange(b-10, b+10)/100.0
        s = b / 100.0
        _s.append(s)
        pbar.update(n + 1)
    assert len(_f) == len(_s)
    # import json
    # json.dump(_p, open("../parsed/{}.json".format(cat), "w"))
    pbar.finish()
    return _f, _s
Exemplo n.º 6
0
def get_score(doc=[], modelpath=None):
    '''
    Parameters
    ----------
    doc: a list of lists
        document as a list 
        (currently assuming this is not yet parsed)
    e.g.: [ ["The", "cat", "sat", "on", "the", "mat", "."],
           ["Colorless", "green", "ideas", "sleep", "furiously", "."] ]

    Returns
    -------
    score: float, ranges 0 to 1
        regression score
    '''
    fe = DocumentFeatures(doc)
    _f = fe.pipeline()
    model = SklearnClassifier()
    model.load_model(modelpath)
    model.load_fmap(modelpath)
    f = model.transform(_f)
    score = model.predict(f) 
    return int(score*100)
Exemplo n.º 7
0
def process_each_cat(cat=None, docs=[]):
    """
    Parameters
    ----------
    cat: integer
        low=0, mid=1, high=2
    docs: list
        [[`doc1`],...]

    Returns
    -------
    _f: list of dictionaries 
        feature vectors 
    _s: list of integers
        scores
    """
    assignment = [20, 50, 90]
    _f = []
    _s = []
    _p = []
    widgets = ['Extracting features... for category {}: done for '.format(cat), progressbar.Counter(), ' doc(s), (', progressbar.Timer(), ')']
    pbar = progressbar.ProgressBar(widgets=widgets, maxval=len(docs)).start()
    for n, d in enumerate(docs):
        fe = DocumentFeatures(d, parse=False)
        fe.pipeline()
        _f.append(fe.features)
        _p.append(fe.doc)
        b = assignment[cat]
        # s = randrange(b-10, b+10)/100.0
        s = b/100.0
        _s.append(s)
        pbar.update(n+1)
    assert len(_f)==len(_s)
    # import json
    # json.dump(_p, open("../parsed/{}.json".format(cat), "w"))
    pbar.finish()
    return _f, _s