Пример #1
0
def testUriChain():
    from serializers import obj2json
    uri = "testObjects/67a01e17-9e8e-4d99-973b-170d74110a4b"
    ret = uriProcessingChain(uri, uriChain)
    ret = obj2json(ret)
    pp(ret)
    ret = type(ret)
Пример #2
0
    def getPicID(self, profile_id):
        url = "https://m.facebook.com/app_scoped_user_id/" + profile_id
        self.browser.addheaders = [(
            'User-agent',
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36'
        )]
        html = self.browser.open(url).read()
        #pp(html)
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO.StringIO(html), parser)
        ids = tree.xpath("//div[@class='bl bm']/a/@href")
        if (not ids):
            ids = tree.xpath("//div[@class='bk bl']/a/@href")
            if (not ids):
                ids = tree.xpath("//div[@class='de']/a/@href")
                if (not ids):
                    ids = tree.xpath("//div[@class='bi cv']/a/@href")

        isID = re.search(r'id=[0-9]*', ids[0], re.M | re.I)
        if (isID):
            pid = isID.group()
            pp(pid)
            return pid.split('=')[1]
        else:
            isID = re.search(r'profile_id=[0-9]*', ids[0], re.M | re.I)
            pid = isID.group()
            return pid.split('=')[1]
Пример #3
0
def getFromJson(number):
    import json
    f = open(str(number) + "/flft.json").read()
    Flftfiles = json.loads(f)
    from collections import defaultdict
    all_flft = defaultdict(int)
    Flfts = []
    for count,flft_gram in enumerate(Flftfiles):
        flftGroup = flft_gram[0]
        file = flft_gram[1]
        flftGroup2 = []
        for flft in flftGroup:
            if remove(flft):
                continue
            flft = re.sub(u"第二百二五十三条", u"第二百五十三条", flft)
            flft = re.sub(u"第三十二条(五)", u"第三十二条", flft)
            flft = re.sub(u"第一审民商事案件标准的通知》第三条", u"第三条", flft)

            all_flft[re.sub(u"^.*?第", u"第", flft)] += 1
            flftGroup2.append(flft)
            # all_flft[re.sub(u"第.*?条",u"",flftGroup[idx])] += 1
            # all_flft[flftGroup[idx]] += 1
        Flfts.append([flftGroup2, file])
        if ((count + 1) % 500 == 0):
            print "进度:", (count * 1.0 + 1) / 11953
    pp(all_flft)
    f = open(str(number) + "/flftDC.json", "w")
    data = json.dumps(Flfts, ensure_ascii=False)
    f.write(data.encode('utf-8'))
Пример #4
0
def getTestObjectByApi(log=False):
    url = 'http://192.168.99.100:8081/test-man/api/v0/testObjects?filter=abc'
    resp = requests.get(url)
    testObjects = json.loads(resp.text)
    testObjectUri = testObjects[0]['uri']
    resp = requests.get(testObjectUri)
    testObject = json.loads(resp.text)
    if log:
        pp(testObject)
    return testObject
Пример #5
0
def make_tag():
    if confirm(yellow("Tag this release?"), default=True):
        print(green("The last 5 tags were: "))
        tags = local('git tag | tail -n 20', capture=True)
        pp(sorted(tags.split('\n'), compare_versions, reverse=True))
        prompt("New release tag in the format vX.Y[.Z]?", 'tag',
                validate=VERSION_PATTERN)
        local('git tag -as %(tag)s' % env)
        local('git push --tags origin', capture=True)
        local('git fetch --tags origin', capture=True)
Пример #6
0
  def testSet(self):
    set1 = set(['John', 'Jane', 'Jack', 'Janice'])
    pp(set1)
    expected='''
[
    "Jane", 
    "Janice", 
    "John", 
    "Jack"
]
'''.strip()
    self.assertEqual(pp_str(set1), expected)
Пример #7
0
def compare2Std():
    import json
    f2 = open("Flft_55000_2.json").read()
    Flftfiles2 = json.loads(f2)
    for count, flft_gram in enumerate(Flftfiles2[8000:8500]):
        flftGroup = flft_gram[0]
        for c, flft in enumerate(flftGroup):
            flft = re.sub(u"第.*?条$", u"", flft)
            stdFlft = getStdFlft(flft)
            stdFlft2 = getStdFlft2(flft)
            if stdFlft != stdFlft2:
                pp([flft, stdFlft, stdFlft2])
Пример #8
0
  def testObject(self):
    class MyClass(object):
      def __str__(self):
          return "<MyClass>"
    ls = list([1, MyClass()])
    pp(ls)
    expected='''
[
    1, 
    "<MyClass>"
]
'''.strip()
    self.assertEqual(pp_str(ls), expected)
Пример #9
0
def testClassifier(x_train, y_train, x_test, y_test, clf):
    """
    this method will first train the classifier on the training data
    and will then test the trained classifier on test data.
    Finally it will report some metrics on the classifier performance.
    
    Parameters
    ----------
    x_train: np.ndarray
             train data matrix
    y_train: list
             train data label
    x_test: np.ndarray
            test data matrix
    y_test: list
            test data label
    clf: sklearn classifier object implementing fit() and predict() methods
    
    Returns
    -------
    metrics: list
             [training time, testing time, recall and precision for every class, macro-averaged F1 score]
    """
    #metrics = []
    start = dt.now()
    clf.fit(x_train, y_train)
    end = dt.now()
    print 'training time: ', (end - start)

    # add training time to metrics
    #metrics.append(end-start)

    start = dt.now()
    yhat = clf.predict(x_test)
    end = dt.now()
    print 'testing time: ', (end - start)

    # add testing time to metrics
    #metrics.append(end-start)

    print 'classification report: '
    #print classification_report(y_test, yhat)
    pp(classification_report(y_test, yhat))

    print 'f1 score'
    print f1_score(y_test, yhat, average='weighted')

    print 'accuracy score'
Пример #10
0
def get_np():
    connects = []
    for cols in make_list():
        connect = []
        for col in cols:
            if col["pos"] == "名詞":
                connect.append(col["surface"])
            else:
                if len(connect) > 1:
                    connects.append("".join(connect))
                connect = []
        if len(connect) > 1:
            connects.append("".join(connect))

    np = set(connects)

    pp(sorted(np, key=connects.index))
Пример #11
0
def find_gunosy_accounts():
    api = tweepy.API()
    gunosy_accounts = set()
    for tweet in tweepy.Cursor(api.search,
                               q=search_query,
                               rpp=100,
                               result_type="recent",
                               include_entities=True,
                               lang="ja").items():
        for entity in tweet.entities['urls']:
            url = entity['expanded_url']
            result = url_analyzer.search(url)
            if result:
                gunosy_accounts.add(result.group(1))
    gunosy_accounts = list(gunosy_accounts)
    print "----use following accounts-----"
    pp(gunosy_accounts)
    return gunosy_accounts
Пример #12
0
def test():
    '''
    test verbena and rose, the modified apps
    '''
    apps = [
        'verbena',
        'rose',
    ]
    with settings(warn_only=True):
        with virtualenv():
            with cd("%s/arxer" % DIR):
                for app in apps:
                    result = run('./manage.py test %s' % app)
    if result.failed and not confirm("Tests failed. Continue anyway?"):
        abort("Aborting at request of user")
    else:
        from prettyprint import pp
        pp(result)
Пример #13
0
def solve():
    import json
    f2 = open("Flft_55000_2.json").read()
    Flftfiles2 = json.loads(f2)
    f11 = open("Flft_55000_11.json").read()
    Flftfiles11 = json.loads(f11)
    for count, flft_gram in enumerate(Flftfiles2[8000:8500]):
        flftGroup = flft_gram[0]
        flftGroup11 = Flftfiles11[count][0]
        if len(flftGroup) != len(flftGroup11):
            continue
        for c, flft in enumerate(flftGroup):
            flft = re.sub(u"第.*?条$", u"", flft)
            flft11 = re.sub(u"第.*?条$", u"", flftGroup11[c])
            flft11 = re.sub(u"\(.*?\)$", u"", flft11)
            stdFlft = getStdFlft(flft)
            if flft11 != stdFlft:
                pp([flft, stdFlft, flft11])
Пример #14
0
  def testNestedSet(self):
    set1 = list([6, set([2,1,3]), 5,[3,1,2], None])
    pp(set1)
    expected='''
[
    6, 
    [
        1, 
        2, 
        3
    ], 
    5, 
    [
        3, 
        1, 
        2
    ], 
    null
]
'''.strip()
    self.assertEqual(pp_str(set1), expected)
Пример #15
0
def apiRoot(request, uri):
    obj = uriProcessingChain(uri, uriChain)
    print obj
    if (None == obj):
        response = HttpResponse()
        response.status_code = 404
        return response

    ret = obj2json(obj)
    objType = type(obj)
    print 'objType', objType
    pp(ret)
    injector = None
    if obj in uriInjectors:
        injector = uriInjectors[obj]
    elif objType in uriInjectors:
        injector = uriInjectors[objType]
    ret = injectUri(injector, ret, getUriPrefixByRequest(request))
    ret = json.dumps(ret, ensure_ascii=False)
    response = HttpResponse(ret)
    return response
Пример #16
0
def writeFlft(number):
    from collections import defaultdict
    all_flft = defaultdict(int)
    file_dir = u"E:/毕业设计/素材/9015/"
    f = open(str(number) + "/files.json").read()
    Files = json.loads(f)
    file_num = len(Files)
    Flfts = []
    for count, file in enumerate(Files):
        flftGroup = getFlft(file_dir + file)
        for idx,flft in enumerate(flftGroup):
            flftGroup[idx] = flft
            flftGroup[idx] = normalize_flft(flftGroup[idx])
            all_flft[re.sub(u"第.*?条",u"",flftGroup[idx])] += 1
            # all_flft[flftGroup[idx]] += 1
        Flfts.append([flftGroup,file])
        if ((count + 1) % 500 == 0):
            print "进度:", (count * 1.0 + 1) / file_num
    pp(all_flft)
    f = open(str(number) + "/flft.json", "w")
    data = json.dumps(Flfts, ensure_ascii=False)
    f.write(data.encode('utf-8'))
Пример #17
0
    def messagegen(self):
        jsonStr = self.getChat(self.minTimestampMs)
        responseItems = []
        try:
            responseItems = json.loads(jsonStr)

        except ValueError:
            pass
        
        if 'result' not in responseItems:
            self.errorcount += 1
            self.adjust_sleep(0)
            if(self.errorcount > self.MAX_ERRORS):
                print("error counter exceeded, existing...")
                raise UnexpectedResultException(jsonStr)
                # sys.exit(1)

            if 'error' in responseItems:
                print(responseItems)
            else:
                self.errorcount += 1
        else:
            self.errorcount = 0
            responseItemsOrderedAsc = responseItems['result']
            responseItemsOrderedAsc.reverse()
            for message in responseItemsOrderedAsc:
                yield message
                self.minTimestampMs = message[1] + 1
            prettyprint.pp(responseItems)

            tm = time.localtime(self.minTimestampMs/1000.0)
            print("%04d/%02d/%02d %02d:%02d:%02d" % (tm.tm_year, tm.tm_mon, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec))

            counts = len(responseItemsOrderedAsc)
            self.adjust_sleep(counts)

            print("counts: %d" % len(responseItemsOrderedAsc))
            print("sleep_sec: %d" % self.sleep_sec)
def main():
    """ Main Function loop """

    mydevice = Device()
    mydevice.connect(ASA_CREDENTIALS)

    myroutingtable = ASARoutingTable()
    myroutingtable.setroutingtable(mydevice)

    print "\nPrint Routing Table"
    myroutingtable.printtable()

    print "\nWhat interface does {0} exit".format(TRACEDATA['destip'])
    print myroutingtable.whatinterface(TRACEDATA['destip'])

    mypackettracer = PacketTracer()


    print "\nRunning Packet Trace"
    output = mypackettracer.runpackettracer(mydevice, TRACEDATA)
    _, results = mypackettracer.processtracer(output)

    pp(results)
Пример #19
0
    def getPicID(self, profile_id):
        url = "https://m.facebook.com/app_scoped_user_id/"+profile_id
        self.browser.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36')]
        html = self.browser.open(url).read()
        #pp(html)
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO.StringIO(html), parser)
        ids = tree.xpath("//div[@class='bl bm']/a/@href")
        if(not ids):
            ids = tree.xpath("//div[@class='bk bl']/a/@href")
            if(not ids):
                ids = tree.xpath("//div[@class='de']/a/@href")
                if(not ids):
                    ids = tree.xpath("//div[@class='bi cv']/a/@href")

        isID = re.search(r'id=[0-9]*', ids[0], re.M|re.I)
        if(isID):
            pid = isID.group()
            pp(pid)
            return pid.split('=')[1]
        else:
            isID = re.search(r'profile_id=[0-9]*', ids[0], re.M|re.I)
            pid = isID.group()
            return pid.split('=')[1]
Пример #20
0
def getFromJson4(dir):
    import json
    f = open(str(dir) + "/flftDC.json").read()
    Flftfiles = json.loads(f)
    from collections import defaultdict
    all_flft = defaultdict(int)
    Flfts = []
    for count, flft_gram in enumerate(Flftfiles):
        flftGroup = flft_gram[0]
        file = flft_gram[1]
        flftGroup2 = []
        for flft in flftGroup:
            ft = re.sub(u"^.*?第", u"第", flft)
            flft = transfer.getStdFlft(flft)
            all_flft[flft + ft] += 1
            flftGroup2.append(flft + ft)
        Flfts.append([flftGroup2, file])

        if ((count + 1) % 500 == 0):
            print "进度:", (count * 1.0 + 1) / len(Flftfiles)
    pp(all_flft)
    f = open(str(dir) + "/flftStd.json", "w")
    data = json.dumps(Flfts, ensure_ascii=False)
    f.write(data.encode('utf-8'))
Пример #21
0
def genLevel(ws, remains, callback):
    cellAndValues = map(lambda x: (x, getMergedCellPresent(ws, x).value),
                        remains)
    callback(cellAndValues)


def pushLevels(levelDict, cellAndValues):
    currentDict = levelDict
    for cell, value in cellAndValues:
        if value not in currentDict:
            currentDict[value] = {}
        currentDict = currentDict[value]


def travesalLevels(ws, tl, rb, onCell):
    levels = ws[tl:rb]
    for row in levels:
        genLevel(ws, row, onCell)


if __name__ == '__main__':
    print 'here'
    import sys
    wb = load_workbook(filename="../test.xlsx", read_only=False)
    ws = wb.worksheets[0]
    levelTypes = getLevelTypes(ws, 'B6', 'D7')
    levelDict = {}
    travesalLevels(ws, 'B8', 'D419', partial(pushLevels, levelDict))
    pp(levelDict)
Пример #22
0
# -*- coding: utf-8 -*-
import os
import sys
import random
import MeCab
import wakati

from prettyprint import pp, pp_str

if __name__ == "__main__":
    src = open("test.txt", "r").read()
    wordlist = wakati.parse(src)
    pp(wordlist)
    #ret = wakati.parse("今日は、いい天気ですね。明日はどんな天気でしょうか?")
    #pp(ret)
rules = [
    (shouldHavePriority, u"优先级必须确定"),
    (shouldHaveDeadline, u"高优先级任务必须有截止时间"),
    (partial(shouldHaveOwner, tf), u"任务必须有负责人"),
    (None, u"高优先级任务必须有工时"),
    (None, u"必须有所属项目"),
    (None, u"如果父任务有截止时间,任务截止时间不得超出父项目"),
    (None, u"任务应该有描述"),
]

availableRules = filter(lambda x: x[0] != None, rules)

results = []
with CacheUsers(users) as cu:
    ui =UserInfo()
    #for task in filter(filters[0][0], tasks):
    for task in tasks:
        violations = []
        for rule in availableRules:
            if False == rule[0](task):
                violations.append(rule[1])
        if len(violations) > 0:
            tid = task['id']
            results.append({
                'id': tid,
                'title': tf.longTitle(tid),
                'violations': violations,
                'owner': tf.owner(tid),
                'author': tf.author(tid)})
    pp(results)
Пример #24
0
    def countDictWords(self):
        return len(self.vocab)

    def wakachi(self, text):
        w = self.mecab.parse(text)
        return w.split()


if __name__ == '__main__':
    ## source activate root
    ## pip install prettyprint
    from prettyprint import pp
    dictfile = 'dict.dat'
    dict = MyDict(dictfile)

    #dict.loadict()
    lids = dict.loadtext('jp.txt')
    dict.savedict()

    #print lids
    #print dict.lids2words(lids)
    print dict.countDictWords()
    i = 0
    for ids in lids:
        jwords = ','.join(dict.ids2words(ids))
        iwords = ','.join(map(str, ids))
        s = str(i) + '\t' + jwords + '\t' + iwords
        pp(s)
        i += 1
Пример #25
0
def append_mecab():
    with open(filename) as data, open(output, "w") as out:
        mc = MeCab.Tagger()
        out.write(mc.parse(data.read()))


def make_list():
    with open(output) as f:
        sentences = []
        for line in f:
            splited = line.split("\t")
            if (len(splited) < 2):
                break
            factor = splited[1].split(",")
            sentence = {
                'surface': splited[0],
                'base': factor[6],
                'pos': factor[0],
                'pos1': factor[1]
            }
            sentences.append(sentence)

            if factor[1] == "句点":
                yield sentences
                sentences = []


if __name__ == "__main__":
    # append_mecab()
    pp(make_list())
                    srvc = host.get_service('tcp', port)
                    if srvc is None:
                        continue
                    record[host.ip][srvc.name] = int(port)
            self.shared_queue.put( { self.target: record } )
        except:
            # We don't know what we caught.
            raise
        finally:
            # Don't let a failure hang any waiting threads.
            self.alive = False


if __name__ == '__main__':
    import prettyprint
    import time
    import os, sys

    if not os.geteuid() == 0:
        sys.exit("\nOnly a root user can run this\n")

    sh_queue = Queue.Queue()
    psc = PortScanner('window-specialist.com', shared_queue=sh_queue)
    while psc.alive:
        time.sleep(0.1)
    prettyprint.pp(sh_queue.get(block=False))




Пример #27
0
def make_release(release=None):
    """Based on the deployment type and any arguments from the command line,
    determine the proper identifier for the commit to deploy.

    If a tag is required (e.g. when in the production app environment), the
    deploy must be coming from the master branch, and cannot proceed without
    either creating a new tag or specifing and existing one.

    Requires the env keys:
        allow_no_tag - whether or not to require the release to be tagged in git
        default_revision - the commit ref for HEAD
    """
    require('allow_no_tag')
    require('default_revision')

    env.release = release
    env.tagged = False
    if not env.release or env.release == 'latest_tag':
        if not env.allow_no_tag:
            branch = utils.branch()
            if branch != "master":
                abort("Make sure to checkout the master branch and merge in the"
                        " development branch before deploying to production.")
            local('git checkout master', capture=True)
        description = local('git describe master' % env, capture=True
                ).rstrip('\n')
        if '-' in description:
            env.latest_tag = description[:description.find('-')]
        else:
            env.latest_tag = description
        if not re.match(env.version_pattern, env.latest_tag):
            env.latest_tag = None
        env.release = env.release or env.latest_tag
        env.commit = 'HEAD'
        if not env.allow_no_tag:
            if confirm(yellow("Tag this release?"), default=False):
                require('master_remote')
                from prettyprint import pp
                print(green("The last 5 tags were: "))
                tags = local('git tag | tail -n 20', capture=True)
                pp(sorted(tags.split('\n'), utils.compare_versions,
                        reverse=True))
                prompt("New release tag in the format vX.Y[.Z]?",
                        'tag',
                        validate=env.version_pattern)
                require('commit')
                local('git tag -s %(tag)s %(commit)s' % env)
                local('git push --tags %(master_remote)s' % env, capture=True)
                env.tagged = True
                env.release = env.tag
                local('git fetch --tags %(master_remote)s' % env, capture=True)
            else:
                print(green("Using latest tag %(latest_tag)s" % env))
                env.release = env.latest_tag
        else:
            make_head_commit()
            env.release = env.head_commit
            print(green("Using the HEAD commit %s" % env.head_commit))
    else:
        local('git checkout %s' % env.release, capture=True)
        env.tagged = re.match(env.version_pattern, env.release)
    make_pretty_release()
Пример #28
0
}
u = User(**ukw)

print repr(u)
print str(u)


### Create a new Account object and add to User object

# One way to do it
aname = 'Budget'
a = Account(username=u.user, accountname=aname)
u.add_account(a) # Returns a


# Another way to do it
aname = 'Salary'
a = u.new_account(accountname=aname)

# These should be changed, so that Account objects returns something useful
print repr(a)
print str(a)


###  Invoke __get__() for property User.accounts
print u.accounts
# u.accounts.pop(0) # Direct access to the _accounts property. 
pp(u.accounts)


Пример #29
0
    testObject = getTestObjectByApi()
    url = testObject['categories']
    resp = requests.get(url)
    return json.loads(resp.text)


def getOrphanTestCategories():
    url = 'http://192.168.99.100:8081/test-man/api/v0/testCategories/orphans'
    resp = requests.get(url)
    return (json.loads(resp.text))


def putTestObjectCategory():
    testObject = getTestObjectByApi()
    category = getOrphanTestCategories()[0]
    url = testObject['categories']
    resp = requests.put(url, json=[category['id']])
    return (json.loads(resp.text))


#pp(putTestObjectCategory())
#pp(getTestObjectCategories())
#pp(getOrphanTestCategories())
#pp(getTestObjectCategories())
#obj = getTestObjectsByApi()
#pp(obj)
#obj = getTestObjectByApi()
#pp(obj)
obj = getTestObjectCategories()
pp(obj)
Пример #30
0

#TODO: filenem/(path ?) generation in eigene Struktur mit unterscheidung ob host-bezogen (yml Files) oder liste (dhcpd.con) etc
#TODO ggfs hn aus writeFile hearusnehmen und abghaengig vom tpl/tpl-type anderen parameter uebergeben ?!

def writeFile(hn,contens):
    if tpl == 'auto-install':
        out_filename['cloud-config'] = 'auto-install'+'.@@hn@@.yml'
    filename = out_filename[tpl_type].replace('@@hn@@', hn)
    out_file = os.path.join(deploydir,filename)

    print "create " + out_file
    with open(out_file,'w+') as f:
        f.write(contens)


# main

cfg = cfg_defaults.copy()

# for testing only one possibility: use given tpl and tpl_type
# TODO: handle values/options from cmdline args and create single host related outputs as well as hostlist based outputs
content = createObjectFromHostCfg(hn,tpl,tpl_type)


print "\nOUTPUT:\n"
pp(content)

writeFile(hn, content)

Пример #31
0
# cut -f1 hightemp.txt | sort | uniq -c | sort -r

import sys
import csv
from prettyprint import pp

if __name__ == '__main__':
    argv = sys.argv
    if len(argv) != 2:
        # 引数がちゃんとあるかチェック
        # 正しくなければメッセージを出力して終了
        print('Usage: python %s filename1 ...' % argv[0])
        quit()
    file_path = argv[1]

    word_set = set()
    with open(file_path) as f:
        reader = csv.reader(f, delimiter="\t")
        for row in reader:
            word_set.add(row[0].encode("utf-8"))

    pp("文字種類 : ")
    pp(word_set)
    pp("文字数:" + str(len(word_set)))






Пример #32
0
                    cover=js['cover'],
                    description=js['description'].encode('utf-8'),
                    ratings=js['ratings'],
                    reviews=js['reviews'],
                    title=js['title'],
                    url=js['url'],
                    outlinks=js['outlinks'])
        book.add_authors(js['authors'])
        book.add_userreviews(js['userreviews'])
        # book.authors_url = authors_urls
        # for userrev in js['userreviews']:
        #     book.add_userreview(userrev['userName'], userrev['userReview'], userrev['userReviewDate'], userrev['userURL'])
        book.id = idx
        book.save()

    # s = Search(es).index('book-index').doc_type('book').query("match", description='prince')
    s = Search(es).index('book-index').doc_type('book')

    response = s.execute()
    print response.success(), response.hits.total
    for res in response:
        print res._meta.score
        print res.title
        print res.description.encode('utf-8')
        # pp(res.outlinks)
        pp(res.userreviews_userName)
        pp(res.userreviews_userReview)
        print ''

    # print response.to_dict()
Пример #33
0
def testClassifier(x_train, y_train, x_test, y_test, clf):
    """
    this method will first train the classifier on the training data
    and will then test the trained classifier on test data.
    Finally it will report some metrics on the classifier performance.
    
    Parameters
    ----------
    x_train: np.ndarray
             train data matrix
    y_train: list
             train data label
    x_test: np.ndarray
            test data matrix
    y_test: list
            test data label
    clf: sklearn classifier object implementing fit() and predict() methods
    
    Returns
    -------
    metrics: list
             [training time, testing time, recall and precision for every class, macro-averaged F1 score]
    """
    #metrics = []
    start = dt.now()
    clf.fit(x_train, y_train)
    end = dt.now()
    print 'training time: ', (end - start)

    # add training time to metrics
    #metrics.append(end-start)

    start = dt.now()
    yhat = clf.predict(x_test)
    end = dt.now()
    print 'testing time: ', (end - start)

    # add testing time to metrics
    #metrics.append(end-start)

    print 'classification report: '
    #     print classification_report(y_test, yhat)
    pp(classification_report(y_test, yhat))

    print 'f1 score'
    print f1_score(y_test, yhat, average='macro')

    print 'accuracy score'
    accuracy = accuracy_score(y_test, yhat)
    print accuracy
    #metrics.append(accuracy)
    #precision = precision_score(y_test, yhat, average=None)
    #recall = recall_score(y_test, yhat, average=None)

    # add precision and recall values to metrics
    #for p, r in zip(precision, recall):
    #    metrics.append(p)
    #    metrics.append(r)

    #add macro-averaged F1 score to metrics
    #metrics.append(f1_score(y_test, yhat, average='macro'))

    print 'confusion matrix:'
    print confusion_matrix(y_test, yhat)

    # plot the confusion matrix
    plt.imshow(confusion_matrix(y_test, yhat), interpolation='nearest')
    plt.show()

    return accuracy
                    raise Exception(msg.format(cnt, row, self.destination_ip))

            if len(hops) > 0:
                # This hop told us who it was.
                trace_globs[count+1] = tuple(hops)
            else:
                # This hop doesn't like traceroute.
                trace_globs[count+1] = None
        return trace_globs


class whois(object):
    def __init__(self, ip):
        pass


def test_trace_route(domain='www.google.com'):
    """Test the trace route object on domain.
    """
    import time
    queue = Queue.Queue()
    trc = TraceRoute(domain, queue)
    time.sleep(15)

    return queue.get(block=True, timeout=30)

if __name__ == '__main__':
    import prettyprint
    prettyprint.pp(test_trace_route())

Пример #35
0
# coding: utf-8
import MeCab
from NLP_30 import make_list
from collections import Counter
from prettyprint import pp


def get_counter():
    counter = Counter()
    for cols in make_list():
        counter.update([col["surface"] for col in cols])
    word_list = counter.most_common()
    return word_list


if __name__ == "__main__":
    pp(get_counter())
            return self.pyg.record_by_name(hostname)
        else:
            raise Exception('Undefined error')

@singleton
class GeoIp(object):
    def __init__(self):
        pass

    def distance(self, record_a, record_b):
        if not ('latitude' in record_a and 'longitude' in record_a):
            raise Exception('latitude/longitude not in {}'.format(record_a))
        if not ('latitude' in record_b and 'longitude' in record_b):
            raise Exception('latitude/longitude not in {}'.format(record_b))
        point_a = ( record_a['latitude'], record_a['longitude'] )
        point_b = ( record_b['latitude'], record_b['longitude'] )
        return geopy.distance.vicenty(point_a, point_b).miles


def test_ip_address():
    """A function to test the ip_address object.
    """
    ipa = IPAddress()
    record = ipa.approx_geograph(hostname='www.google.com')
    return record

if __name__ == '__main__':
    import prettyprint
    prettyprint.pp(test_ip_address())

Пример #37
0
     labellist.append(erritem)
     msgidlist = []
     for msgtxt in erritem['message']:
         ## wakachiで処理するため明示的にunicodeにする
         ## ex) cmapeerdが停止したためプロセスを再起動します
         w = u'' + msgtxt
         words = dict.wakachi(w.encode('utf-8'))
         ids = dict.words2ids(words)
         ## ex) [113, 63, 11, 9, 13, 104, 6, 7, 66, 8, 9, 10]
         #y = train.predict(ids)
         y = train.predict(ids[::-1])
         wordid = y.data.argmax(1)[0]    ## ex) 84
         msgidlist.append(wordid)
         
     analizeRnnInputList.append(msgidlist)
     pp(erritem)
     pp(msgidlist)
 ## ex) [[65,66],[40,86,84]]
 pp(analizeRnnInputList)
 
 ## 採用メッセージリスト保存
 ## [[83], [65, 66]...] こうなっているのを [83,65,66...]にしてユーニークにする
 fsave(acceptFile, list(set([flatten for inner in analizeRnnInputList for flatten in inner])))
 
 print "training...learning"
 trainData = analizeRnnInputList
 
 
 config = ConfigParser.SafeConfigParser()
 config.read(configFile)
 
Пример #38
0
# -*- encoding: utf-8 -*-
import re
from prettyprint import pp
from knock20 import load_json_file, get_record

if __name__ == "__main__":
    data = load_json_file("jawiki-country.json")
    text = get_record(data, "イギリス")
    r = re.compile(u"\{\{基礎情報 国([\s\S]+)\}\}")
    pp(r.findall(text))
    field_dic = {}

    field_flag = False
    for row in text:
        if row.find("{{基礎情報") != -1:
            field_flag = True
            continue
        if field_flag:
            data = row.split(" = ")
            field_dic[data.replace("|", "")] = data[1]
        if row == "{{"


Пример #39
0
    while counter < max_packets:
        # receive a packet
        pkt_obj = receive_raw_packet(sock)
        if pkt_obj and pkt_obj.protocol == UDP_PROTO:
            if pkt_obj.data:
                # Host name lookup
                # This is a sen't packet.
                if pkt_obj.data[:2].__repr__() not in pkt_seen:
                    print 's', pkt_obj.data[2:].__repr__()
                    pkt_seen[pkt_obj.data[:2].__repr__()] = True
                    PENDING_DNS_REQUESTS[pkt_obj.src_port] = pkt_obj.data[:2].__repr__()
                if pkt_obj.dest_port in PENDING_DNS_REQUESTS:
                    print 'd', pkt_obj.data[10:]
                counter += 1

    pp (pkt_seen)
    pp(PENDING_DNS_REQUESTS)
    print 'TEST RECEIVE RAW COMPLETE\n\n'


if __name__ == '__main__':
    import time
    import os, sys
    if not os.geteuid() == 0:
        sys.exit("\nOnly a root user can run this\n")

    #dnsp = DNSParser()
    test_receive_raw_packet()
    #dnsp.alive.clear()
    # time.sleep(5)
    # test_receive_tcp_packet()
Пример #40
0
    parser.add_argument('--sg', type=int, dest='sg', required=False, default=0)   # 0:cbow , 1:skipgram
    parser.add_argument('--cbow_type', type=int, dest='cbow_type', required=False, default=1)  # 0:average_concat +syn1_doc 1:average , 2:concatenate+syn1_doc , 3:concatenate , 4:average_concat
    parser.add_argument('--skip_gram_type', type=int, dest='skip_gram_type', required=False, default=0) 
    parser.add_argument('--alpha', type=float, dest='alpha', required=False, default=0.025) 
    parser.add_argument('--alpha_doc', type=int, dest='alpha_doc', required=False, default=0.025) 
    parser.add_argument('--alpha_rate', type=int, dest='alpha_rate', required=False, default=0.025) 
    parser.add_argument('--alpha_flag', type=int, dest='alpha_flag', required=False, default=0)  # 0:学習パラメータは減少させていく, 1:減少させない
    parser.add_argument('--cbow_mean', type=int, dest='cbow_mean', required=False, default=1)  # 0:no 1:average
    parser.add_argument('--iteration', type=int, dest='iteration', required=False, default=20) # 学習回数
    parser.add_argument('--average_flag', type=int, dest='average_flag', required=False, default=1) # 0:word_vecの和のみ 1:doc_vecとword_vecの和
    parser.add_argument('--is_np_mean_syn1', type=int, dest='is_np_mean_syn1', required=False, default=0) #0: syn1の誤差を平均, 1:syn1の誤差を合計 (cbow_type=0,2のとき)
    parser.add_argument('--is_using_word2vec', type=int, dest='is_using_word2vec', required=False, default=0) # 1:word2vecの単語ベクトルを用いる, 0:用いない
    parser.add_argument('--is_using_wiki', type=int, dest='is_using_wiki', required=False, default=0) # 0:none 1:wiki
    parser.add_argument('--hs', type=int, dest='hs', required=False, default=0) # 0:hsを使わない 1:使う
    parser.add_argument('--sample', type=float, dest='sample', required=False, default=1e-5)  # 1e-5 頻度の高い単語を減らす
    parser.add_argument('--freeze_learn', type=int, dest='freeze_learn', required=False, default=0) # 0:更新する  1: word2vecを使った場合に単語ベクトルを更新しない
    parser.add_argument('--random_learn_flag', type=int, dest='random_learn_flag', required=False, default=0) # 1:ランダムに学習する, 0:与えられた文章から順に学習する
    parser.add_argument('--n_gram_mode', type=int, dest='n_gram_mode', required=False, default=0) # 0:変換しない 1,2,3-gram
    parser.add_argument('--null_vec_type', type=int, dest='null_vec_type', required=False, default=2) # 0:zeros , 1: ones, 2:random
    parser.add_argument('--skip_concat', type=int, dest='skip_concat', required=False, default=0) # 0:not skip   1:nullをskip 
    parser.add_argument('--input', type=str, dest='input_file', required=False, default="INPUT.txt") 
    # 1行1ドキュメントのファイル名


    args = parser.parse_args()
    args_dict =  vars(args)
    pp(args_dict)



    make_paragraph_vector(**args_dict)
Пример #41
0
# -*- encoding: utf-8 -*-
import sys
import csv
from prettyprint import pp

if __name__ == '__main__':
    argv = sys.argv
    if len(argv) != 2:
        # 引数がちゃんとあるかチェック
        # 正しくなければメッセージを出力して終了
        print('Usage: python %s filename1 ...' % argv[0])
        quit()
    file_path = argv[1]

    data_frame = []
    with open(file_path) as f:
        reader = csv.reader(f, delimiter="\t")
        for row in reader:
            data_frame.append(row)

    sorted(data_frame, key=lambda data:data[2], reverse=True)
    pp(data_frame)






Пример #42
0
    def predict(self, data):
        inmsg = data.split('\n')
        #inmsg = ['エージェントが可能な変更を行った後動作を開始しました']

        log.debug('predict: predict data=%s', inmsg)
        dictfile = self.model_path + '/dict.dat'
        configFile = self.model_path + '/analog.ini'
        dict = MyDict(dictfile)
        dict.loadict()
        print 'dict count: %d' % dict.countDictWords()

        ## 採用メッセージID
        acceptid = self.fload(self.model_path + '/accept.dat')
        print '**accept id:'
        print acceptid

        ## エラー定義の読み込み
        with open(self.model_path + '/error.json', 'r') as f:
            errjson = json.load(f, "utf-8")

        ## RNNメッセージID化
        vocab = 10000
        dim = 100
        y = 94
        train = Trainer(vocab, dim, y)
        train.load(self.model_path + '/train')

        labellist = []  ## ラベル一覧(出力)
        #msglist = []    ## メッセージ一覧(無視リスト作成のため使用メッセージを保存)
        analizeRnnInputList = []
        analizeRnnLabelList = []
        msgidlist = []
        for msgtxt in inmsg:
            ## wakachiで処理するため明示的にunicodeにする
            ## ex) cmapeerdが停止したためプロセスを再起動します
            #w = u'' + msgtxt
            #words = dict.wakachi(w.encode('utf-8'))
            words = dict.wakachi(msgtxt)
            ids = dict.words2ids(words)
            ## 改行のみはスキップ
            if ids == []:
                continue
            ## ex) [113, 63, 11, 9, 13, 104, 6, 7, 66, 8, 9, 10]
            y = train.predict(ids[::-1])
            wordid = y.data.argmax(1)[0]  ## ex) 84

            ## エラーリストにあるメッセージ以外は処理にしない
            if wordid in acceptid:
                msgidlist.append(wordid)
            else:
                print 'not accept id:%d, msg:%s' % (wordid, msgtxt)
                continue
            print '** msg prdict %d, %2f' % (wordid, y.data[0][wordid]
                                             )  ## 選択されたidの確率表示
        pp(inmsg)
        pp(msgidlist)

        ## 対象メッセージが無い
        if msgidlist == []:
            result = [{'score': 100, 'id': None, 'label': None}]
            return result

        ## エラーケースの予測
        config = ConfigParser.SafeConfigParser()
        config.read(configFile)

        dim_in = config.getint('analize', 'dim_in')
        dim_mid = config.getint('analize', 'dim_mid')
        dim_out = config.getint('analize', 'dim_out')
        """
        train = AnazlizeTrainer(dim_in, dim_mid, dim_out)
        train.load(self.model_path + '/train_analize')
        y = train.predict(msgidlist[::-1])
        print y.data.argmax(1)[0]
        rank = y.data.argsort()[0]
        uprank = map(int, rank[::-1])
        print uprank
        #print y.data[0]
        """

        train = AnazlizeTrainer(dim_in, dim_mid, dim_out)
        train.load(self.model_path + '/train_analize')
        #y = train.predict(msgidlist[::-1])

        targetlist = msgidlist
        resultlist = np.zeros(dim_out)  ## 確率結果の最大値を格納する配列
        print resultlist
        for i in range(len(targetlist)):
            target = targetlist[i:]
            y = train.predict(target[::-1])
            print target
            print y.data[0]
            for i in range(len(y.data[0])):
                if y.data[0][i] > resultlist[i]:
                    resultlist[i] = y.data[0][i]

        print resultlist
        #print y.data.argmax(1)[0]
        #rank = y.data.argsort()[0]
        rank = resultlist.argsort()
        uprank = map(int, rank[::-1])
        print uprank
        #print y.data[0]

        result = []
        for i in uprank:
            print '%d, %2f' % (i, resultlist[i])
            item = {
                'score': round(float(resultlist[i]) * 100, 2),
                'id': i,
                'label': errjson[i]['label']
            }
            result.append(item)

        return result
Пример #43
0
# -*- encoding: utf-8 -*-
import re
from prettyprint import pp
from knock20 import load_json_file, get_record


if __name__ == "__main__":
    data = load_json_file("jawiki-country.json")
    text = get_record(data, "イギリス")
    r = re.compile("\[\[Category:(.+?)\]")
    m = r.findall(text)
    pp(m)


Пример #44
0
                          csrfToken, session)
        csrfToken = mock.getCsrfValue(resp.text)
        resp = mock.addTaskParent(theTask['phid'], parent, csrfToken, session)
    transactions = []
    try:
        if None != points and len(points.strip()) > 0:
            transaction = {'type': 'points', 'value': str(points)}
            transactions.append(transaction)
    except Exception, e:
        pass
    # if None != parent:
    # transaction = {'type': 'parent', 'value': parent}
    # transactions.append(transaction)
    if len(transactions) > 0:
        print 'transactions:'
        pp(transactions)
        fab.maniphest.edit(transactions=transactions,
                           objectIdentifier=theTask['objectName'])
    for subTask in subTasks:
        subTask['parent'] = 'T%s' % (taskId, )
        newTask(fab, **subTask)
    return theTask


def updateTask(fab, **args):
    # 获取 tid
    tid = args.pop('tid')
    if None == tid:
        return

    title = args.pop('task')
Пример #45
0
            if (len(splited) < 2):
                break
            factor = splited[1].split(",")
            sentence = {
                'surface': splited[0],
                'base': factor[6],
                'pos': factor[0],
                'pos1': factor[1]
            }
            sentences.append(sentence)

            if factor[1] == "句点":
                yield sentences
                sentences = []


if __name__ == "__main__":
    np = set()
    np_test = []
    for cols in make_list():
        for i in range(1, len(cols) - 1):
            if cols[i]["surface"] == "の" \
                    and cols[i - 1]["pos"] == "名詞" \
                    and cols[i + 1]["pos"] == "名詞":
                np_test.append(cols[i - 1]["surface"] + cols[i]["surface"] +
                               cols[i + 1]["surface"])
                np.add(cols[i - 1]["surface"] + cols[i]["surface"] +
                       cols[i + 1]["surface"])

    pp(sorted(np, key=np_test.index))
Пример #46
0
pydir =  os.path.dirname(os.path.abspath(__file__))
basedir = os.path.dirname(pydir)
confdir = os.path.join(basedir,"config")
tpldir = os.path.join(basedir,"tpl")


cfg_defaults = {}
hosts = {}

filename = "virt-install-cmd.xen.hjson"
in_file = os.path.join(tpldir,filename)

#print "read " + in_file

with open(in_file,'r') as f:
    dict =  hjson.load(f)


for k in dict['cfg'].keys():
    #print "key=%s\n" % k
    pp(dict['cfg'][k])


    # cfg['initial-cluster-string'] = getCoreosInitialClusterString()
    # cfg['install-img-path'] = getInstallImgPath()
    # cfg['install-img-format'] = cfg['disks']['disk0']['img-format']
    # cfg['install-bridge'] = cfg['nics']['nic0']['bridge']
    # cfg['install-mac'] = cfg['nics']['nic0']['mac']

Пример #47
0
        csrfToken, session)
    csrfToken = mock.getCsrfValue(resp.text)
    resp = mock.addTaskParent(theTask['phid'], parent, csrfToken, session)
  transactions = []
  try:
    if None != points and len(points.strip()) > 0:
      transaction = {'type':'points', 'value':str(points)}
      transactions.append(transaction)
  except Exception, e:
    pass
  # if None != parent:
    # transaction = {'type': 'parent', 'value': parent}
    # transactions.append(transaction)
  if len(transactions) > 0:
    print 'transactions:'
    pp(transactions)
    fab.maniphest.edit(
      transactions = transactions,
      objectIdentifier = theTask['objectName'])
  for subTask in subTasks:
    subTask['parent'] = 'T%s' % (taskId,)
    newTask(fab, **subTask)
  return theTask

def updateTask(fab, **args):
  # 获取 tid
  tid = args.pop('tid')
  if None == tid:
    return

  title        = args.pop('task')