示例#1
0
def project_part_2_prepare():
    labelled_data = read_file('CN/train', type="with_label")
    pr(labelled_data[:3])

    word_set = set(it[0] for sequence in labelled_data for it in sequence)
    label_set = set(it[1] for sequence in labelled_data for it in sequence)

    emission_parameter, c1, c2 = emission_parameter_calcul_on_train_set(
        labelled_data, label_set, word_set)
    pr(emission_parameter["O"]["高兴"])

    from collections import Counter
    words = [it[0] for sequence in labelled_data for it in sequence]
    word_count = Counter(words)

    print("count of 高兴 is : " + str(word_count["高兴"]))
    print("e(O->高兴) is ")
    print(emission_parameter["O"]["高兴"])

    emission_parameter["O"]["撒达到"] = emission_parameter_calcul(
        "O", "撒达到", count_set=(c1, c2, word_set), labelled_data=labelled_data)

    print(
        "撒达到 is an new word,which hasn't been in the train set,count of its occurance is :"
        + str(word_count["撒达到"]))
    print("e(O->撒达到) is : ")
    print(emission_parameter["O"]["撒达到"])

    print(
        "according to the special case handling they all appears to be 1 time, and the latter one is slightly smaller(which is also consistent with the algotithm)"
    )

    return
示例#2
0
def process_example():
    records = json.load(open("./data/example.json"))
    d = {}
    d['queryID'] = 1
    d["data"] = records
    pr(d)
    result = process(d)
    pr(result)
    return
示例#3
0
def primeNear(max):
    #nums = []
    pr("cur: ")
    lastNum = 0
    for cur in range(2, max + 1):
        if checkPrime(cur):
            lastNum = cur
            #pr("{}".format(cur), end = ("\n" if cur%20==0 else " "));
    pr("")
    return lastNum  # nums[-1]
示例#4
0
def train_w2v(sentences):
    word_set = set([it for re in sentences for it in re])
    word_map = {word: str(c) for c, word in enumerate(word_set)}
    sentences = [[word_map[word] for word in sentence]
                 for sentence in sentences]

    pr(word_map)
    # pr(sentences)
    voca_size = len(word_map)
    pr(voca_size)
    model = gensim.models.Word2Vec(sentences,
                                   min_count=1,
                                   size=VECTOR_DIMENSION)
    # model.save('/tmp/mymodel')
    # new_model = gensim.models.Word2Vec.load('/tmp/mymodel')

    # model=model.wv
    # mapping from word to index

    index_to_vector = dict()
    f_write = open("./model/vectors_txt_format.txt", "w")
    for i in range(voca_size):
        if str(i) in model:
            f_write.write(
                str(i) + ":" + " ".join([str(temp)
                                         for temp in model[str(i)]]) + "\n")
            index_to_vector[str(i)] = [str(temp) for temp in model[str(i)]]
        else:
            print(str(i) + " is not in vocabulary")
    f_write.close()
    print(len(index_to_vector))

    # save two file
    # 1. mapping dictionary
    # 2. vectors corresponding to  each index
    with open("model/word_map.dict", "wb") as f:
        pickle.dump(word_map, f)
    with open("model/vectors.dict", "wb") as f:
        pickle.dump(index_to_vector, f)

    # in case of danger, save the model either
    with open("./model/genism_model_vector.test.db", "wb") as f:
        model = gensim.models.Word2Vec(sentences,
                                       size=100,
                                       window=5,
                                       workers=4)
        pickle.dump(model, f)

    return
示例#5
0
def multarg(*args, **args2):
    for v in args:
        pr(v, end=', ')
    pr("\n---")
    keys = sorted(args2.keys())
    for k in keys:
        pr("{} : {}".format(k, args2[k]))
    pr("---")
示例#6
0
def main():
    # return app.send_static_file('base.html')
    # for it in request.files:
    #     f=request.files[it]
    #     f.save('./'+"temp_working.json")
    records = request.json
    print("got a sequence of data")
    pr(records)
    # records=json.load(open("./temp_working.json"))
    result = process(records)
    # try:
    pr(result)
    header = {"content-type": "application/json"}
    r = requests.post("http://115.159.91.188:3000/statue",
                      json=result,
                      headers=header)
    # except:
    #     print("sending data back error")

    return json.dumps(result)
示例#7
0
def fill_placeholders(intents, plugs):
    new = []
    start_id = 1
    for plug in plugs:
        for intent in intents:
            text = intent['text']
            tags = get_tags(text.split())
            #             text = re.sub(r'{(.+?):}', r'{\1:'+plug+'}', text)
            text = re.sub(r'{(.+?):}', plug, text)
            obj_model = {
                'id': start_id,
                'text': text,
                'tags': tags,
                'intent': intent['intent']
            }
            #             pr(obj_model)
            new.append(obj_model)
            start_id += 1
            if (start_id % 1000 == 0):
                pr(start_id)
    return new
示例#8
0
            'state_context_input': X_s,
            'observation_context_input': X_o,
            'current_input': X_current
        },
        batch_size=32,
        verbose=2)
    return predictions


if __name__ == "__main__":

    word_map, vectors = load_meta_model()
    index_to_word = {item: key for key, item in word_map.items()}

    records = load_labeled_data("./data/tmp.json")

    model = load_model("./model/dis_enc_model.model")
    for i in range(10):
        record = records[i]
        sentence = record["content"]
        print("raw sentence is ")
        print("".join(sentence))
        real_keys = record["key"]
        print("real keywords as")
        print(real_keys)
        words = sentence
        pro = conditiaonal_probability(words, sentence, [], model)
        pr(pro)

    quit()
print(joinList)

# %%
# Unpacking dictionary
dictioOfPoint = dict(c=10, f=20)
dictioOfPoint2 = dict(c=2, t=50)
joinDictio = {**dictioOfPoint, **dictioOfPoint2, "zz": 120}
print(joinDictio)

# %%
# Task find char with biggest occurrence
testString = "This is a common interview question"
listOfChar = [*testString]

listOfWords = testString.split(" ")
print(listOfWords)
# %%
discWithNumberOfChar = {}
for char in listOfChar:
    if char in discWithNumberOfChar:
        discWithNumberOfChar[char] += 1
    else:
        discWithNumberOfChar[char] = 1

maxInDisc = max(discWithNumberOfChar, key=discWithNumberOfChar.get)
pr(discWithNumberOfChar, width=1)
print("Max is: ", maxInDisc, "with number of ocurr: ",
      discWithNumberOfChar[maxInDisc])
# [discWithNumberOfChar[item]+=1 for item in listOfChar]
# print(discWithNumberOfChar)
示例#10
0
文件: ftpdown.py 项目: dog-2/ftpdown
        if verbose:
            print "Host %s directory %s download finished:" % (self.conn.host, rdir)
            print "%d directories, %d(%d failed) files, %d unknown type." % (numDir, numFile, numDownErr, numUnknown)
        return numDir, numFile, numUnknown, numDownErr


if __name__ == "__main__":
    import sys
    import traceback
    from pprint import pprint as pr

    flog = open("err.log", "wb")

    def run(host):
        try:
            fd = FtpDownloader(host=host, user="******", passwd="test", port=21, timeout=10)
            numDir, numFile, numUnknown, numDownErr = fd.downloadDir(
                rdir=".", ldir="download", tree=None, errHandleFunc=None, verbose=True
            )
            flog.write(
                "%s\nok\n"
                "%d directories, %d(%d failed) files, %d unknown type\n\n\n"
                % (host, numDir, numFile, numDownErr, numUnknown)
            )
        except Exception as err:
            traceback.print_exc()
            flog.write("%s\nerror\n%s\n\n\n" % (host, traceback.format_exc()))

    pr(run(sys.argv[1]))
    flog.close()
示例#11
0
if __name__ == '__main__':
    import sys
    import traceback
    from pprint import pprint as pr

    flog = open('err.log', 'wb')

    def run(host):
        try:
            fd = FtpDownloader(host=host,
                               user='******',
                               passwd='test',
                               port=21,
                               timeout=10)
            numDir, numFile, numUnknown, numDownErr = fd.downloadDir(
                rdir='.',
                ldir='download',
                tree=None,
                errHandleFunc=None,
                verbose=True)
            flog.write(
                '%s\nok\n'
                '%d directories, %d(%d failed) files, %d unknown type\n\n\n' %
                (host, numDir, numFile, numDownErr, numUnknown))
        except Exception as err:
            traceback.print_exc()
            flog.write('%s\nerror\n%s\n\n\n' % (host, traceback.format_exc()))

    pr(run(sys.argv[1]))
    flog.close()
示例#12
0
    def getBatchable(self):
        sql = 'SELECT * FROM poc WHERE batchable=1'
        self.cursor.execute(sql)
        return self.cursor.fetchall()

    def countAll(self):
        sql = 'SELECT count(*) FROM poc'
        self.cursor.execute(sql)
        return self.cursor.fetchone()


if __name__ == '__main__':
    # testing code
    sys.path.append('../')
    from SETTINGS import FRAMEWORK_DIR
    sys.path.append(FRAMEWORK_DIR)

    from pprint import pprint as pr

    db = Database(dbFilePath='../hive.db', pocDir='../pocs/')

    #print db.updtDbFromBB2Db(bb2DbFile='../pocdb.json')
    #print db.updtDbFromPocs(pocDir='../pocs')

    #pr(db.searchStr(item='discuz'))
    #pr(db.countAll())
    #pr(db.searchPoc(pocId='poc-2014-0019'))

    #pr(db.getBatchable())
    pr(db.updtDbFromJson(jsonFile='../pocdb.json'))
示例#13
0
              epochs=5,
              batch_size=32,
              verbose=2)

    model.save('model/lstm_enc_model.model', overwrite=True)
    print("Saved model to disk.")
    # with open("model/lstm_enc_model.model","wb") as f:
    #     pickle.dump(model,f,protocol=2)

    score = model.evaluate(
        {
            'state_context_input': X_s_test,
            'observation_context_input': X_o_test
        }, {'predictions': Y_test},
        batch_size=128)
    pr(score)
    print()
    quit()

    print(np.min(X_train), np.max(X_train))
    print('X_train shape:', X_train.shape)
    print(X_train.shape[0], 'train samples')
    print(X_test.shape[0], 'test samples')

    shape = X_train.shape[1:]
    print(shape)
    dropout_rate = 0.25
    opt = Adam(lr=1e-4)
    dopt = Adam(lr=1e-3)

    ntrain = 10000
示例#14
0
from event import Event
from pprint import pprint as pr
from selenium import webdriver

# def sele_phantom():
#     url = "https://tanzu.vmware.com/content/webinars/jun-30-making-k8s-great-improving-the-kubernetes-developer-experience?utm_campaign=Global_BT_Q221_Improving-K8s-Developer-Experience&utm_source=twitter&utm_medium=social"
#     browser = webdriver.PhantomJS()
#     browser.get(url)
#
#     iframe = browser.find_element_by_tag_name("iframe")
#     print(iframe)
#     browser.switch_to.default_content()
#     browser.switch_to.frame(iframe)
#
#     iframe_source = browser.page_source
#
#     print(iframe_source)
#
#     print(browser.current_url)

if __name__ == "__main__":

    keywords = "kubernetes"

    event = Event(keywords)
    # start crawler
    data = event.start()

    pr(data)
    # sele_phantom()
示例#15
0
    X_o = np.array([np.vstack(np.asarray(it)) for it in X[:, 1]])
    X_s = np.array([np.concatenate(np.array(it)) for it in X[:, 0]])
    return X_o, X_s


if __name__ == "__main__":

    word_map, vectors = load_meta_model()
    index_to_word = {item: key for key, item in word_map.items()}

    records = load_labeled_data("./data/tmp.json")

    records = preprocessing.load_labeled_data("./data/tmp.json")
    temp = [re["key"] for re in records]

    pr(temp[:10])
    class_number = find_class_number()
    X, Y = construct_train_data(records)
    total_number = len(X)
    Y = keras.utils.to_categorical(Y, num_classes=class_number + 1)

    X_train = X[:10, :]
    Y_train = Y[:10]
    X_test = X[int(total_number * 0.9):, :]
    Y_test = Y[int(total_number * 0.9):]

    X_o = np.array([np.vstack(np.array(it)) for it in X_train[:, 0]])
    print(X_o.shape)
    X_s = np.array([np.concatenate(np.array(it)) for it in X_train[:, 0]])
    print(X_s.shape)
示例#16
0
try:
    inputValue = int(input("Age:"))
    xFactor = 10 / inputValue
except Exception as ex:
    print("Age have to be number")
    print(ex)
    print(type(ex))

# print("Age is", inputValue)

# %%
from pprint import pprint as pr
try:
    file = open("App.py")
    fileContent = file.readlines()
    pr(fileContent, width=80)
    ageVal = 0
    factor = 10 / ageVal
except Exception as ex:
    print(ex)
    print(type(ex))
finally:
    file.close()

# %%
# example of using block alternative in python: with clousule.
# Idisposible : object has two magic methods __enter__ and __exit__
try:
    with open("App.py") as file:
        for line in file.readlines():
            if line.strip():
示例#17
0
import math

from pprint import pr

pr.print_on = False

pr("hello!")
pr("циферки: {}, {}".format(11, 22))
s = '''hello 2 
'''
pr(s)

for i in range(10, 20, 3):
    if i % 2 == 0:
        pr("even: {}".format(i))
    else:
        pr("odd:  {}".format(i))

words = ['aaa', 'bbb', 'ccc', 'ddd']
for w in words:
    pr(w)

for i in range(len(words)):
    pr("{} : {}".format(i, words[i]))

for x in list(range(10)):
    if x % 2 == 0:
        pr("- {}".format(x))
    else:
        pass
示例#18
0
def read_and_print(file="yamlread2.yaml") -> {}:
    with open(file, 'r') as fp:
        d = yaml.load(fp, Loader=yaml.FullLoader)
        pr(d)
        print("\n\n\n")
        return d
示例#19
0
from pprint import pr

pr.print_on = True

# printOn = True
# def pr(*args, **kwargs):
#     if printOn: print(*args, **kwargs)

pr('1: %d, 2: %d, 3: %s' % (111, 222, 'ccc'))

pr('name: %(name)s, age: %(age)d' % {'name': 'Vasya', 'age': 25})

name = 'Osya'
pr(f'var name = {name}')
示例#20
0
    valid_record = detect_and_delete(content)

    com_keys = key_word_extract(".".join(
        ["".join(rec["content"]) for rec in valid_record]))
    keys = dict()
    for i in range(10):
        try:
            keys["KeyCom" + str(i + 1)] = com_keys[i]
        except:
            keys["KeyCom" + str(i + 1)] = ""
    return len(valid_record), keys, valid_record


if __name__ == "__main__":
    # raw = load_raw_data("./data/jd_comment_items.json")
    # print("now is with label")
    # print(len(raw))
    # raw = make_fake_labels(raw)
    # raw=detect_and_delete(raw)
    #
    # save_as_file(raw)
    # pr(raw[0:5])
    # print(len(raw))
    #
    records = load_labeled_data("./data/tmp.json")
    pr(records[0:5])
    sentences = [record["content"] for record in records]
    train_w2v(sentences)

    # label_data = load_labeled_data("./data/example.json")
示例#21
0
if resp:
    print('Responce is correct')
else:
    print('bad response')

print(resp.headers)  # return all headers

print(resp.text)  # return all of page

with open('resp.html', 'w') as file:
    file.write(resp.text)
r = req.get(
    'https://i2.wp.com/itc.ua/wp-content/uploads/2018/09/3-1.jpg?fit=830%2C460&quality=100&strip=all&ssl=1'
)
with open('logo.png', 'wb') as f:
    f.write(r.content)

payload = {'username': '******', 'password': '******'}
auth = req.post('http://httpbin.org/post', data=payload)
auth_dict = auth.json()
# pr(auth_dict)
pr(auth_dict['form'])

basic_auth = req.get('http://httpbin.org/basic-auth/test/pass',
                     auth=('test', 'pass'))
print(basic_auth.text)

timeout_req = req.get('https://httpbin.org/delay/2', timeout=2)
pr(timeout_req)
# python3 requests_demo.py > page.html
示例#22
0
        self.size = buckets

    def insert(self, node):
        key = hash_fn(node.data, self.size)
        if key not in self.buckets:
            self.buckets[key] = [node.data]
        else:
            self.buckets.get(key).append(node.data)

    def remove(self, node):
        key = hash_fn(node.data, self.size)
        if key in self.buckets:
            if node.data in self.buckets[key]:
                self.buckets[key].remove(node.data)
            else:
                raise ValueError("No such data!")
        else:
            raise KeyError("No such key!")


if __name__ == '__main__':
    ht1 = HashTable()
    datas = [
        ''.join([choice('qazwsxcderfvbgtyhnmjuiklop') for _ in range(10)])
        for _ in range(20)
    ]
    for data in datas:
        ht1.insert(Node(data))

    pr(ht1.buckets)
示例#23
0
文件: db.py 项目: 2625668714/Beehive
        sql = 'SELECT * FROM poc WHERE batchable=1'
        self.cursor.execute(sql)
        return self.cursor.fetchall()

    def countAll(self):
        sql = 'SELECT count(*) FROM poc'
        self.cursor.execute(sql)
        return self.cursor.fetchone()


if __name__ == '__main__':
    # testing code
    sys.path.append('../')
    from SETTINGS import FRAMEWORK_DIR
    sys.path.append(FRAMEWORK_DIR)

    from pprint import pprint as pr

    db = Database(dbFilePath='../hive.db',
                  pocDir='../pocs/')

    #print db.updtDbFromBB2Db(bb2DbFile='../pocdb.json')
    #print db.updtDbFromPocs(pocDir='../pocs')

    #pr(db.searchStr(item='discuz'))
    #pr(db.countAll())
    #pr(db.searchPoc(pocId='poc-2014-0019'))

    #pr(db.getBatchable())
    pr(db.updtDbFromJson(jsonFile='../pocdb.json'))
					self.__result[target_name].append(data)
				except KeyError, e:
					self.__result[target_name] = []
					self.__result[target_name].append(data)
			else:
				self.__result[target_name] = data
			self.__getdata = False

	# Method used to get tag details information, in a contained way.
	def get_tag_details(self, tag):
		try:
			return self.__parsing_pattern[tag]
		except KeyError:
			return None


# Example data
parsing_pattern = 	{
						'p': {
							'target_name':'test',
							'attributes' : [('class', 'test'),],
							'multiple_return_values':True,
							'type':'data',
							'subtags':None,
						},
					}

test_parser = sax_style_HTML_Parser(parsing_pattern)
result = test_parser.parse("<!DOCTYPE html><html><a hmm='kage'>hej</a><p class='test'>test</p><b>hmm</b><p class='test'>Test 200</p></html>")
pr(result)
示例#25
0
    import traceback
    from pprint import pprint as pr

    flog = open('err.log', 'wb')

    def run(host):
        for x in range(1, 15):
            try:
                fd = FtpDownloader(host='figment.csee.usf.edu',
                                   user='******',
                                   passwd='',
                                   port=21,
                                   timeout=10)
                numDir, numFile, numUnknown, numDownErr = fd.downloadDir(
                    rdir='/pub/DDSM/cases/benigns/benign_%02d/' % (x),
                    ldir='./download/',
                    tree=None,
                    errHandleFunc=None,
                    verbose=True)
                flog.write(
                    '%s\nok\n'
                    '%d directories, %d(%d failed) files, %d unknown type\n\n\n'
                    % (host, numDir, numFile, numDownErr, numUnknown))
            except Exception as err:
                traceback.print_exc()
                flog.write('%s\nerror\n%s\n\n\n' %
                           (host, traceback.format_exc()))

    pr(run(sys.argv[0]))
    flog.close()
        [
            struct.pack(">I", 1),  # msgid
            "\x00" * 4,  # msgcall
            "\x00\x00\x00\x02",  # rpc version
            "\x55" * 4,  # wdb programe number
            "\x00\x00\x00\x01",  # programe version
            struct.pack(">I", 123),  # function number: WDB_TGT_INFO_GET = 123
            "\x00" * 16,
            "\x00" * 4,
            "\x00\x00\x00\x44",  # packet length
            struct.pack(">I", 1),  # msg seq
            "\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00",  # parameter
        ]
    )
    try:
        sock.sendto(infoReq, (host, port))
        resp2 = sock.recv(65536)
    except socket.timeout as err:
        resp2 = ""

    return "vxworks" in resp2.lower(), resp1, resp2


if __name__ == "__main__":
    import sys
    from pprint import pprint as pr

    pr(scanV1(sys.argv[1]))
    print
    pr(scanV2(sys.argv[2]))