def main():
    # path = 'ExampleData/input_files/'
    # endPath = 'ExampleData/test/'
    path, endPath = getFilePath()
    reader_fgi = list(csv.reader(open(path + "fGI_stats.csv", "rt", encoding="ascii"), delimiter=","))
    reader_core = list(csv.reader(open(path + "Core_attfGI.csv", "rt", encoding="ascii"), delimiter=","))
    genomeListing = list(open(path + "db.txt", "r"))

    genomeClusterDict = pickle.load(open(path + "genomeCluster.dict", "rb"))
    genomeLocusDict = pickle.load(open(path + "genomeLocus.dict", "rb"))
    coreDict, fgiDict = createCoreClusterDict(reader_core)

    # genome = 'E00002'
    genomeIdDict = {}
    index = 3
    for genome2 in genomeListing:
        if "\n" in genome2:
            genome2 = genome2[0:-1]
        genomeIdDict[genome2] = index
        index += 1

    for genome in genomeIdDict:
        genomeDict = createfgiInsertDict(reader_fgi, genome)
        referenceList = createfGIFeatures(
            genomeDict, coreDict, fgiDict, genomeClusterDict, genomeLocusDict, genome, genomeIdDict[genome]
        )
        writeFile(endPath, genome, referenceList)

    genomeDict = createfgiInsertDict(reader_fgi, genome)
    referenceList = createfGIFeatures(
        genomeDict, coreDict, fgiDict, genomeClusterDict, genomeLocusDict, genome, genomeIdDict[genome]
    )

    writeFile(endPath, genome, referenceList)
Пример #2
0
def _remove_persistent(put_queue_name, get_queue_name, pickled, pos_name):
    try:
        try:
            with open(pos_name, 'r') as pn:
                get_pos = int(pn.read())
        except FileNotFoundError:
            get_pos = 0
        os.remove(pos_name)
        method = 'ab' if pickled else 'a'
        with open(put_queue_name, method) as put_queue, \
                open(get_queue_name, _file_method('r', pickled)) as f:
            if pickled:
                for _ in range(get_pos):
                    pickle.load(f)
                while True:
                    try:
                        pickle.dump(pickle.load(f), put_queue)
                    except EOFError:
                        break
            else:
                for _ in range(get_pos):
                    next(f)
                for line in f:
                    put_queue.write(line.strip('\n') + '\n')
        _touch(get_queue_name, pickled=pickled)
    except (Empty, FileNotFoundError):
        pass
Пример #3
0
def _load_data(datadir, tetrodes):
    
    import os
    import re
    import pickle as pkl
    
    filelist = os.listdir(datadir)
    filelist.sort()
    
    reg = [ re.search('(\w+)_(\w+).(\w+)', filename) for filename in filelist ]
    reg = [ r for r in reg if r != None ]
    
    ext = ['bhv', 'cls', 'syn', 'ons']
    data = dict.fromkeys(ext)
    data['cls'] = {}
    
    # Load the data files into data dictionary
    for r in reg:
        if r.group(3) in ['bhv', 'syn', 'ons']:
            file = os.path.join(datadir, r.group(0))
            with open(file,'r') as f:
                data[r.group(3)] = pkl.load(f)
        elif r.group(3) == 'cls':
            for tetrode in tetrodes:
                file = '%s/%s.%s' % (datadir, r.group(0), tetrode)
                with open(file,'r') as f:
                    data['cls'].update({tetrode:pkl.load(f)})
            
    # Checking to make sure the data files were loaded
    if None in data.viewvalues():
        for key, value in data.iteritems():
            if value == None:
                raise Exception, '%s file wasn\'t loaded properly' % key
    
    return data
Пример #4
0
def test_pickle_model():
    tmpdir = tempfile.mkdtemp()
    pickle_file = os.path.join(tmpdir, 'stanmodel.pkl')
    model_code = 'parameters {real y;} model {y ~ normal(0,1);}'
    m = pystan.StanModel(model_code=model_code, model_name="normal1",
                         save_dso=False)
    module_name = m.module.__name__
    with open(pickle_file, 'wb') as f:
        pickle.dump(m, f)
    del m
    del sys.modules[module_name]

    with open(pickle_file, 'rb') as f:
        m = pickle.load(f)
    assert m.model_name == "normal1"

    m = pystan.StanModel(model_code=model_code, model_name="normal2")
    module_name = m.module.__name__
    module_filename = m.module.__file__
    with open(pickle_file, 'wb') as f:
        pickle.dump(m, f)
    del m
    del sys.modules[module_name]

    with open(pickle_file, 'rb') as f:
        m = pickle.load(f)
    assert m.model_name == "normal2"
    assert m.module is not None
    assert module_filename != m.module.__file__
    fit = m.sampling()
    y = fit.extract()['y']
    assert len(y) == 4000
Пример #5
0
def main():
    pickle_folder = "../mir_1k/pickles"
    pickle_folders_to_load = [f for f in os.listdir(pickle_folder) if "__beat_spec.pick" in f]
    pickle_folders_to_load = sorted(pickle_folders_to_load)

    sdr_type = "background"

    fits = []
    sdrs = []
    for pick in pickle_folders_to_load:
        pick = pick.replace("__beat_spec.pick", "")
        beat_spec_path = join(pickle_folder, pick + "__beat_spec.pick")
        beat_spec = pickle.load(open(beat_spec_path, "rb"))

        entropy, log_mean = beat_spectrum_prediction_statistics(beat_spec)
        fit_X = [entropy, log_mean]
        fits.append(fit_X)

        sdrs_name = join(pickle_folder, pick + "__sdrs.pick")
        sdr_vals = pickle.load(open(sdrs_name, "rb"))
        cur_sdr = sdr_vals[sdr_type][0]
        sdrs.append(cur_sdr)

    fits = np.array(fits)
    sdrs = np.array(sdrs).reshape(-1, 1)
    knn = neighbors.KNeighborsRegressor(5, weights="distance")
    scores = cross_validation.cross_val_predict(knn, fits, sdrs, cv=10, verbose=1)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
Пример #6
0
def main():
    t_c_matrix = open("t_c_matrix", "r")
    t_set      = open("t_set", "r")
    word_dict  = defaultdict(float)

    word_context_matrix = pickle.load(t_c_matrix)
    word_set = pickle.load(t_set)
    word_set = list(word_set)

#    print word_set.index("Spain")
#    print word_set.index("England")
#    print word_set.index("Athens")
    
    vec1 = word_context_matrix[word_set.index("Spain")]
    vec2 = word_context_matrix[word_set.index("England")]
    vec3 = word_context_matrix[word_set.index("Athens")]
    vec  = vec1 - vec2 + vec3

    for index, word in enumerate(word_set):
        sim = cos_sim(word_context_matrix[index], vec)
        word_dict[word] = sim

    count = 0
    for word, sim in sorted(word_dict.items(), key = lambda x:-x[1]):
        print word + "\t" + str(sim)
        count += 1
        if count == 9:
           break
Пример #7
0
Файл: main.py Проект: fei6409/IR
def TF_IDF():
    print('Doing TF_IDF', file=sys.stderr)
    global TFIDF, docWeight, index

    if os.path.isfile('TFIDF.dat') and os.path.isfile('docWeight.dat') and os.path.isfile('index.dat'): 
        f = open('TFIDF.dat', 'rb')
        TFIDF = pickle.load(f)
        f.close()
        f = open('docWeight.dat', 'rb')
        docWeight = pickle.load(f)
        f.close()
        f = open('index.dat', 'rb')
        index = pickle.load(f)
        f.close()

    else:
        print('.dat not exist, generating', file=sys.stderr)

        TFIDF = {}
        docCnt = len(docSize)
        avgSize = 0
        index = [[] for i in range(docCnt)]
        for i in range(docCnt):
            avgSize += docSize[i]
        avgSize /= docCnt
        
        docWeight = [0 for i in range(docCnt)]
        para_b = 0.7 # tuning
        d = [(1 - para_b + para_b*docSize[i]/avgSize) for i in range(docCnt)]


        for i in invIndexUnigram: # word id
            IDF = math.log( docCnt / len(invIndexUnigram[i]) )
            TFIDF[i] = {}
            for j in invIndexUnigram[i]: # doc id
                v =  (invIndexUnigram[i][j] / d[j]) * IDF
                TFIDF[i][j] = v
                docWeight[j] += v * v
                index[j].append(i)

        for i in invIndexBigram: # word id
            IDF = math.log( docCnt / len(invIndexBigram[i]) )
            TFIDF[i] = {}
            for j in invIndexBigram[i]: # doc id
                v =  (invIndexBigram[i][j] / d[j]) * IDF
                TFIDF[i][j] = v
                docWeight[j] += v * v
                index[j].append(i)

        f = open('TFIDF.dat', 'wb')
        pickle.dump(TFIDF, f)
        f.close()
        f = open('docWeight.dat', 'wb')
        pickle.dump(docWeight, f)
        f.close()
        f = open('index.dat', 'wb')
        pickle.dump(index, f)
        f.close()

    printTime()
Пример #8
0
 def dump_file(self, content):
     counter = 0
     while True:
         while True:
             try:
                 fl = open(self.PATH+'.tmp', 'wb')
                 pickle.dump(content, fl)
                 fl.close()
                 fl = open(self.PATH+'.tmp','rb')
                 h2 = pickle.load(fl)
                 fl.close()
                 assert h2 == content
                 break
             except:
                 #print '\nThere was an error dumping the history!\n'\
                 #'This happened %d times so far, trying again...'%(counter)
                 counter+=1
         try:
             if os.path.exists(self.PATH):
                 os.remove(self.PATH)
             os.rename(self.PATH+'.tmp',self.PATH)
             fl = open(self.PATH,'rb')
             h2 = pickle.load(fl)
             fl.close()
             assert h2 == content
             break
         except:
             pass
Пример #9
0
def sort_warheroes_list():
    data_list = pickle.load(open("war_base", "rb"))
    war_base_url = "http://www.warheroes.ru/hero/hero.asp?Hero_id="
    session = requests.session()
    for unit in data_list:
        num = unit[0]
        title = unit[1]
        print(unit)
        id = str(num)
        war_url = war_base_url + id
        is_get_res = False
        try_count = 0
        while not is_get_res and try_count < 10:
            try:
                res = session.get(war_url)
                war_data = res.text
                is_get_res = True
            except:
                try_count += 1
                time.sleep(1)
        if is_otrs_exists(war_data):
            try:
                print("  - otrs exists")
                good_list = pickle.load(open("free_war_base", "rb"))
                good_list.append(unit)
                pickle.dump(good_list, open("free_war_base", "wb"))
            except:
                tools.debug_print("Bad pickle save3. "+id)
        else:
            try:
                bad_list = pickle.load(open("nonfree_war_base", "rb"))
                bad_list.append(unit)
                pickle.dump(bad_list, open("nonfree_war_base", "wb"))
            except:
                tools.debug_print("Bad pickle save3. "+id)
Пример #10
0
def main():
    """loads data, trains model, tests model

    Inputs:
        file: binary file containing sparse numpy array with text features
        file: binary file containing pandas dataframe with training labels

    Outs:
        print: classification report of classifier performance

    """

    # Load training labels and text features
    chdir("../pickles")
    with open("word_counts.pkl", "rb") as f:
        X = pickle.load(f)
    with open("training_labels.pkl", "rb") as f:
        y = pickle.load(f)
        y = np.ravel(y["sponsored"])

    # Create train and test splits
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

    # Create and train model
    clf = MultinomialNB()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print(classification_report(y_test, y_pred))
Пример #11
0
def test_pickleable():
    interval = pybedtools.create_interval_from_list(
        ['chr1', '1', '100', 'asdf'])
    fn = pybedtools.BedTool._tmp()
    import pickle
    out = open(fn, 'w')
    pickle.dump(interval, out)
    out.close()
    new_interval = pickle.load(open(fn))
    assert str(interval) == str(new_interval)

    interval = pybedtools.create_interval_from_list(
        ['chr1', '1', '100'])
    fn = pybedtools.BedTool._tmp()
    import pickle
    out = open(fn, 'w')
    pickle.dump(interval, out)
    out.close()
    new_interval = pickle.load(open(fn))
    assert str(interval) == str(new_interval)

    interval = pybedtools.create_interval_from_list(
        "chr2L	.	UTR	41	70	0	+	.	ID=mRNA:xs2:UTR:41-70;Parent=mRNA:xs2;".split('\t'))
    fn = pybedtools.BedTool._tmp()
    import pickle
    out = open(fn, 'w')
    pickle.dump(interval, out)
    out.close()
    new_interval = pickle.load(open(fn))
    assert str(interval) == str(new_interval)
Пример #12
0
    def sortFilesByPlayback(self):
        originalFiles=[]
        historyMovFile = self.getHistoryMovFileName()
        if(os.path.exists(historyMovFile)):
            infile = open(historyMovFile,"rb")
            originalFiles = pickle.load(infile)
            infile.close()

        finalFiles = []
        favoriteFiles = []
        favoriteFileName = self.getFavoriteFileName()
        if(os.path.exists(favoriteFileName)):
            infile = open(favoriteFileName,"rb")
            favoriteFiles = pickle.load(infile)
            infile.close()       
        favoriteLen = len(favoriteFiles)
        originalLen = len(originalFiles)
        if favoriteLen == 0 or originalLen == 0:
            return
        else:
            for i in range(favoriteLen):
                try:
                    finalFiles.append(favoriteFiles[i][1])
                    originalFiles.remove(favoriteFiles[i][1])
                except:
                    pass
                    

        finalFiles = finalFiles + originalFiles
        ## print finalFiles
     
        outfile = open(historyMovFile,"wb")
        pickle.dump(finalFiles, outfile,2)
        outfile.close()
        self.showAllImage()
Пример #13
0
def search_key(index, keyword):
    """ search museum dictionary """
    #load dictionary
    dict_file = open('data.pkl', 'rb')
    dictionar = pickle.load(dict_file)
    dict_file.close()
    #load headers
    head_file = open('headers.hd', 'rb')
    header = pickle.load(head_file)
    head_file.close()
    #find keyword
    muzee = []
    if keyword == "":
        for i in range(len(dictionar[header[0]])):
            muzee.append({'cod': dictionar[header[0]][i],
                         'judet': dictionar[header[2]][i].decode(encoding="UTF-8"),
                         'nume': dictionar[header[3]][i].decode(encoding="UTF-8"),
                         'lat': sub(',', '.', dictionar[header[35]][i]),
                         'lng': sub(',', '.', dictionar[header[36]][i])})
    else:
        for i in range(len(dictionar[header[index]])):
            new_word = dictionar[header[index]][i].decode(encoding='UTF-8').lower()
            keyword = keyword.lower()
            if keyword in new_word:
                muzee.append({'cod': dictionar[header[0]][i],
                              'judet': dictionar[header[2]][i].decode(encoding="UTF-8"),
                              'nume': dictionar[header[3]][i].decode(encoding="UTF-8"),
                              'lat': sub(',', '.', dictionar[header[35]][i]),
                              'lng': sub(',', '.', dictionar[header[36]][i])})
    return muzee
Пример #14
0
def test_feature_extraction():
    np.random.seed(33)
    print "Testing implementation of feature extraction..."

    import featureLearner as fl
    k = 5
    learner = fl.FeatureLearner(k)
    learner.trained = True

    image = util.loadTrainImages()[33]

    # load test centroids and features
    testDat = open('data/kmeans_test.npy','r')
    centroids = pickle.load(testDat)
    testDat.close()
    testDat = open('data/features_test.npy','r')
    features = pickle.load(testDat)
    testDat.close()

    learner.centroids = centroids

    studentFeats = learner.extractFeatures(image)
    assert isinstance(studentFeats,np.ndarray),"Features should be in an numpy array"
    assert studentFeats.shape==features.shape,"Dimension mismatch"
    studentFeatsList = studentFeats.tolist()
    if np.abs(np.sum(features)-np.sum(studentFeats)) > 1e-3:
        print "Feature mismatch, test failed"
        return

    if np.abs(np.sqrt(np.sum(features**2)) - np.sqrt(np.sum(features**2))) > 1e-3:
        print "Feature mismatch, test failed"
        return
    
    print "Feature extraction test passed"
Пример #15
0
    def save_settings(self, server, port, path, username, password):

        # load config file or create a new one
        if os.path.isfile(self._config_path):
            print "Found ftp config file - saving new settings!"
            with open(self._config_path) as f:
                l = pickle.load(f)

            l['server'] = str(server)
            l['port'] = str(port)
            l['path'] = str(path)
            l['username'] = str(username)
            l['pass'] = str(password)

            with open(self._config_path, 'w') as f:
                pickle.dump(l,f)
                print "Saved successful ftp-config!"

        else:
            print "Did not found ftp config file, creating new one!"
            with open(self._config_path, 'w') as f:
                pickle.dump(
                            {
                             'server': server,
                             'port': port,
                             'path': path,
                             'username': username,
                             'pass': password,
                             }, f)

            with open(self._config_path) as f:
                l = pickle.load(f)
                print "Saved successful ftp-config!"
Пример #16
0
def adauga_muzeu(muzeu):
    """adauga intrare noua in dictionar"""
    #load dictionary
    dict_file = open('data.pkl', 'rb')
    dictionar = pickle.load(dict_file)
    dict_file.close()
    #load headers
    head_file = open('headers.hd', 'rb')
    header = pickle.load(head_file)
    head_file.close()
    request.args.get('nume')
    #read info from form
    if request.method == 'GET':
        target_fields = {3: 'nume', 2: 'judet', 17: 'descriere', 35: 'lat', 36: 'lng'}
        for i in range(len(header)):
            if i in target_fields.keys():
                dictionar[header[i]].append(request.args.get(target_fields[i]))
            elif i == 0:
                dictionar[header[i]].append(get_next_code(dictionar, header))
            else:
                dictionar[header[i]].append("")
    output = open('data.pkl', 'wb')
    pickle.dump(dictionar, output)
    output.close()
    return redirect("/")
Пример #17
0
def read_encrypted_logs(username, filelogs):
    names = []
    my_key = RSA.importKey(open(username + '.pri').read())
    cipher = PKCS1_OAEP.new(my_key, SHA256.new())

    for log in filelogs:
        file_log_hash = SHA256.new()
        with open(log, 'rb') as input:
            dictlog = pickle.load(input)
            sig = pickle.load(input)
        owner = dictlog['owner']
        with open(owner + '.dsapub', 'rb') as input:
            owner_msk = pickle.load(input)
        length = len(dictlog)
        with open(log, 'rb') as outfile:
            picklelog = outfile.read(length)
        file_log_hash.update(picklelog)

        if not owner_msk.verify(file_log_hash.digest(), sig):
            print('invalid file')

        if username in dictlog:
            block = dictlog[username]
            block.decrypt_permission_block(cipher)
            name = decrypt_filename(block.get_file_encryption_key(), log[0:-5])
            names.append(name)
              
    return names
Пример #18
0
def rename_directory(username, old_filename, new_filename):
    filelog = oldfilename + '.flog'
    with open(filelog, 'rb') as input:
        log = pickle.load(input)
        sig = pickle.load(input)
        
    block = log[username]

    key = RSA.importKey(open(username + '.pri').read())
    cipher = PKCS1_OAEP.new(key, SHA256.new())
    block.decrypt_permission_block(cipher)
    encrypted_new_filename = encrypt_filename(block.get_file_encryption_key(), new_filename)

    log['encrypted_name'] = encrypted_new_filename

    new_filelog = encrypted_new_filename + '.dlog'
    with open(new_filelog, 'wb') as outfile:
        pickle.dump(log)
    length = len(log)
    with open(new_filelog, 'rb') as infile:
        pickleload = infile.read(length)
    file_log_hash = SHA256.new()
    file_log_hash.update(picklelog)
    with open(username + '.dsa', 'rb') as infile:
        owner_msk = pickle.load(infile)
    k = random.StrongRandom().randint(1,owner_msk.q-1)
    sig = owner_msk.sign(file_log_hash.digest(), k)
    with open(new_filelog, 'a+b') as outfile:
        pickle.dump(sig, outfile, -1)

    return encrypted_new_filename
Пример #19
0
def main():
    parser = buildArgsParser()
    args = parser.parse_args()
    ocr = args.ocr
    validation_file = args.validation_file
    train_file = args.train_file
    test_file = args.test_file
    is_vizu = args.is_vizu

    if not os.path.isfile(ocr):
        parser.error("-ocr '{0}' doit être un fichier!".format(os.path.abspath(ocr)))

    if not os.path.isfile(train_file):
        parser.error("-train '{0}' doit être un fichier!".format(os.path.abspath(train_file)))

    if not os.path.isfile(test_file):
        parser.error("-test '{0}' doit être un fichier!".format(os.path.abspath(test_file)))

    if not os.path.isfile(validation_file):
        parser.error("-valider '{0}' doit être un fichier!".format(os.path.abspath(validation_file)))

    with open(train_file) as f:
        train = pickle.load(f)

    with open(test_file) as f:
        test = pickle.load(f)

    reseau_de_neurones = recognize_characters(ocr, train, test, validation_file)

    validate_ocr(reseau_de_neurones, validation_file)

    if is_vizu:
        # Visualisation des prédictions sur l'ensemble de test
        show_recognized_characters(reseau_de_neurones, test)
Пример #20
0
def share_directory(other_username, dlog):
    with open(dlog, 'rb') as input:
        log = pickle.load(input)

    userList = log['users']
    userList.append(other_username)
    owner_block = log[owner]
    owner = log['owner']
    key = RSA.importKey(open(owner + '.pri').read())
    cipher = PKCS1_OAEP.new(key, SHA256.new())
    owner_block.decrypt_permission_block(cipher)
    file_aes_key = owner_block.get_file_encryption_key()
    file_dsa_key = None
    user_block = AccessBlock(file_aes_key, file_dsa_key)
    other_key = RSA.importKey(open(other_username + '.pub').read())
    other_cipher = PKCS1_OAEP.new(other_key, SHA256.new())
    user_block.encrypt_permission_block(other_cipher)
    log[other_username] = user_block
    file_log_hash = SHA256.new()
    with open(filelog, 'wb') as infile:
        pickle.dump(log, infile, -1)
    length = len(log)
    with open(filelog, 'rb') as outfile:
        picklelog = outfile.read(length)
    file_log_hash.update(picklelog)
    with open(owner + '.dsa', 'rb') as infile:
        owner_msk = pickle.load(infile)
    k = random.StrongRandom().randint(1,owner_msk.q-1)
    sig = owner_msk.sign(file_log_hash.digest(), k)
    with open(filelog, 'a+b') as outfile:
        pickle.dump(sig, outfile, -1)    
Пример #21
0
   def __init__(self, pos_doc_file = "positive.p",
                     neg_doc_file = "negative.p",
                     num_doc_pos_file = "num_doc_pos.txt",
                     num_doc_neg_file = "num_doc_neg.txt",
                     j = -1):
      """This method initializes and trains the Naive Bayes Sentiment Classifier.  If a 
      cache of a trained classifier has been stored, it loads this cache.  Otherwise, 
      the system will proceed through training.  After running this method, the classifier 
      is ready to classify input text."""

      self.positive_words = {}
      self.negative_words = {}

      #for presence
      self.num_doc_pos = 0
      self.num_doc_neg = 0

      #If the pickled files exist, then load the dictionaries into memory.
      if os.path.exists(pos_doc_file):
         self.positive_words = pickle.load(open(pos_doc_file, "rb"))
      if os.path.exists(num_doc_pos_file):
         self.num_doc_pos = int(pickle.load(open(num_doc_pos_file, "rb")))

      if os.path.exists(neg_doc_file):
         self.negative_words = pickle.load(open(neg_doc_file, "rb"))
      if os.path.exists(num_doc_neg_file):
         self.num_doc_neg = int(pickle.load(open(num_doc_neg_file, "rb")))

      #If the pickled files do not exist, then train the system.
      else:
         self.train(pos_doc_file, neg_doc_file, num_doc_pos_file, num_doc_neg_file, j)
def preprocess(article_file, lable_file, k):

    features = pickle.load(open(article_file))
    features = np.array(features)

    # transform non-numerical labels (as long as they are hashable and comparable) to numerical labels
    lables = pickle.load(open(lable_file))
    le = preprocessing.LabelEncoder()
    le.fit(lables)
    lables = le.transform(lables)
    # print le.inverse_transform([0])

    ### text vectorization--go from strings to lists of numbers
    vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5, min_df=1,
                                 stop_words='english')
    features_train_transformed = vectorizer.fit_transform(features)

    # selector : SelectPercentile
    selector = SelectPercentile(f_classif, percentile=k)
    selector.fit(features_train_transformed, lables)

    # selector : chi2
    # selector = SelectPercentile(score_func=chi2)
    # selector.fit(features_train_transformed, lables)

    features_train_transformed = selector.transform(features_train_transformed).toarray()

    return features_train_transformed, lables, vectorizer, selector, le, features
Пример #23
0
def build_assigned_from_existing(assigned_dir, clusters, regions, nrand):
    
    """
    
    Loads results produced from above analysis from saved file - can either be switched to
    piclking or removed 
    
    assigned_dir - location of files
    clusters - name of experiment
    regions - list of genic regions 
    nrand - number of shuffled datsets to look for
    
    """
    
    CLUS_regions = {}
    for region in regions:
        CLUS_regions[region]={}
        for n in range(nrand):
            CLUS_regions[region]['rand']={}
    for region in regions:
        bedfile = os.path.join(assigned_dir, "%s.%s.real.BED" %(clusters, region))
        CLUS_regions[region]['real'] = pybedtools.BedTool(bedfile)
        for n in range(nrand):
            randbedfile = os.path.join(assigned_dir, "%s.%s.rand.%s.BED" %(clusters, region, str(n)))                
            CLUS_regions[region]['rand'][n] = pybedtools.BedTool(randbedfile)
    try:
        sizes = pickle.load(open(os.path.join(assigned_dir, "%s.sizes.pickle" %(clusters)), 'rb'))
    except:
        sizes=[1,1,1,1,1]
    try:
        Gsizes = pickle.load(open(os.path.join(assigned_dir, "Gsizes.pickle"), 'rb'))
    except:
        Gsizes=[1,1,1,1,1]

    return CLUS_regions, sizes, Gsizes
Пример #24
0
def make_dewpoint(directory):
    """Makes an hourly timeseries of dewpoint temperatures."""

    # open the 3-hr temperature and daily relative humidity files

    tempfile = '{}/average_temperature'.format(directory)
    hfile    = '{}/average_humidity'.format(directory)
    dewfile  = '{}/average_dewpoint'.format(directory)

    if not os.path.isfile(dewfile):

        with open(tempfile, 'rb') as f: ts, Ts  = zip(*pickle.load(f))
        with open(hfile, 'rb') as f:    ts, RHs = zip(*pickle.load(f))

        # aggregate 3-hr temperatures to daily and get the Tmin

        Tmins = [min(Ts[i:i+8]) for i in range(0, len(Ts), 8)]
        Ts    = [sum(Ts[i:i+8]) / 8 for i in range(0, len(Ts), 8)]

        # calculate the daily dewpoint

        dewpoints = calculate_dewpoint(numpy.array(Ts), numpy.array(RHs))
        dewpoints = [(t, min(Tm, T)) for t, T, Tm in zip(ts, dewpoints, Tmins)]

        with open(dewfile, 'wb') as f: pickle.dump(dewpoints, f)
Пример #25
0
def load_data():
    """Loads movie_data, cust_data, and answers from pickles.

    Returns:
        The tuple (movie_data, cust_data, answers) with the objects loaded from
        their pickles.
    """
    # load movie data cache
    if isfile(CACHE_LOC + MOVIE_PICKLE):
        with open(CACHE_LOC + MOVIE_PICKLE, 'rb') as movie_file:
            movie_data = load(movie_file)
    else:
        movie_data = loads(urlopen(CACHE_URL + MOVIE_PICKLE).read())
    # load customer data cache
    if isfile(CACHE_LOC + CUSTOMER_PICKLE):
        with open(CACHE_LOC + CUSTOMER_PICKLE, 'rb') as cust_file:
            cust_data = load(cust_file)
    else:
        cust_data = loads(urlopen(CACHE_URL + CUSTOMER_PICKLE).read())
    # load answers
    if isfile(CACHE_LOC + ANSWER_PICKLE):
        with open(CACHE_LOC + ANSWER_PICKLE, 'rb') as answer_file:
            answers = load(answer_file)
    else:
        answers = loads(urlopen(CACHE_URL + ANSWER_PICKLE).read())
    return(movie_data, cust_data, answers)
Пример #26
0
    def load(filename):
        """
        Restores a PPSD instance from a file.

        Automatically determines whether the file was saved with compression
        enabled or disabled.

        :type filename: str
        :param filename: Name of file containing the pickled PPSD object
        """
        # identify bzip2 compressed file using bzip2's magic number
        bz2_magic = b'\x42\x5a\x68'
        with open(filename, 'rb') as file_:
            file_start = file_.read(len(bz2_magic))

        if file_start == bz2_magic:
            # In theory a file containing random data could also start with the
            # bzip2 magic number. However, since save() (implicitly) uses
            # version "0" of the pickle protocol, the pickled data is
            # guaranteed to be ASCII encoded and hence cannot start with this
            # magic number.
            # cf. http://docs.python.org/2/library/pickle.html
            #
            # due to an bug in older python version we can't use with
            # http://bugs.python.org/issue8601
            file_ = bz2.BZ2File(filename, 'rb')
            ppsd = pickle.load(file_)
            file_.close()
        else:
            with open(filename, 'rb') as file_:
                ppsd = pickle.load(file_)

        return ppsd
Пример #27
0
def line_position():
	global black_line, white_line, range_col
	# load default values from files
	try:
		with open(file_b, 'rb') as f:
			black_line = pickle.load(f)
	except:
		black_line = [0] * 5

	try:
		with open(file_w, 'rb') as f:
			white_line = pickle.load(f)
	except:
		white_line = [0] * 5

	try:
		with open(file_r, 'rb') as f:
			range_col = pickle.load(f)
	except:
		range_col = [0] * 5

	curr = get_sensorval()
	diff_val = list(map(operator.sub, curr, white_line))
	curr_pos = 0
	percent_black_line = [0] * 5
	for i in range(5):
		percent_black_line[i] = diff_val[i] * 100 / range_col[i]
		curr_pos += percent_black_line[i] * multp[i]
	return curr_pos
Пример #28
0
def plot_TP():
    with open ('p_files/ROC_table_' + str(seg) + '_pc' + str(p) + '_k' + str(k) + '.p', 'rb') as f:
        ROC_table = pickle.load(f)
    with open ('p_files/species_stats.p', 'rb') as f:
        species_table = pickle.load(f)

    with open ('p_files/species.p', 'rb') as f:
        species_name = pickle.load(f)

    xes = []#[item['number'] for item in ROC_table.values()]
    yes = []#[item['fp'] for item in ROC_table.values()]
    label = []
    low_TP = []
    for specie in ROC_table:
        xes.append(species_table[specie])
        yes.append(ROC_table[specie]['tp_rate'])
        label.append(specie)
        if float(ROC_table[specie]['tp_rate']) < 0.3 and species_table[specie] > 100:
            #print(ROC_table[specie]['tp_rate'])
            low_TP.append((specie, ROC_table[specie]['tp']))

    fig, ax = plt.subplots()
    plt.subplots_adjust(bottom=0.1)
    ax.plot([0,max(xes)],[0.3,0.3], ls="--")
    ax.scatter(xes, yes, marker = '.')
    for i, txt in enumerate(label):
        if txt in interesting:
        #if float(yes[i]) < 0.3 and xes[i] > 100 :
            #print(txt)
            #ax.annotate(parser.get_specie_name('../data/train/', str(species_name[txt][0]) + '.xml'), (xes[i],yes[i]))
            ax.annotate(txt, (xes[i],yes[i]))
    plt.suptitle('False Positive', fontsize = 14)
#    ax.set_xlabel('Principal Components')
#    ax.set_ylabel('Percentage of Variance')
    plt.show()
Пример #29
0
def averages_if_less(seg, p, k, n):
    with open ('p_files/ROC_table_' + str(seg) + '_pc' + str(p) + '_k' + str(k) + '.p', 'rb') as f:
        ROC_table = pickle.load(f)
    with open ('p_files/species_stats.p', 'rb') as f:
        species_table = pickle.load(f)

    ROC_table = dict((k, ROC_table[k]) for k in ROC_table.keys() if species_table[k] > n)
    #print(len(ROC_table))
    #sys.exit(2)

    recall_sum = sum(float(item['tp_rate']) for item in ROC_table.values())
    recall_sum_percentage = recall_sum * 100.0
    recall_average = recall_sum_percentage / len(ROC_table)

    #print('Average of recall for ', seg, 'with ', p, ' PCs and ', k, ' neighbours ', recall_average)

    precision_sum = sum(float(item['precision']) for item in ROC_table.values())
    precision_sum_percentage = precision_sum * 100.0
    precision_average = precision_sum_percentage / len(ROC_table)

    #print('Average of precision for ', seg, 'with ', p, ' PCs and ', k, ' neighbours ', precision_average)

    f_sum = sum(float(item['f_measure']) for item in ROC_table.values())
    f_sum_percentage = f_sum * 100.0
    f_average = f_sum_percentage / len(ROC_table)

    #print('Average of F-measure for ', seg, 'with ', p, ' PCs and ', k, ' neighbours ', f_average)

    tp_sum = sum(float(item['tp']) for item in ROC_table.values())
    all_instances = sum(float(item['number']) for item in ROC_table.values())
    average = tp_sum / all_instances
    return recall_average, precision_average, f_average, average
Пример #30
0
def build_series(concepts, vector_dir, min_score=.5):
    concept_series = defaultdict(lambda: [])
    
    for concept in concepts:
        print '\t', concept['id']
        pos_concept = concept['id']
        neg_concept = 'random'
        
        pipeline = pickle.load(open(concept_pipeline(pos_concept, neg_concept), 'rb'))        
        classifier = pipeline.named_steps['classifier']
                
        concept_series[pos_concept].append( ['Week', 'Signal'] )
        
        vecfiles = os.listdir(vector_dir)
        vecfiles.sort()
            
        for vecfile in vecfiles:
            X_sel = pickle.load(open(vector_dir+'/'+vecfile, 'r'))
            tokens = vecfile.split('-')
            week_dir = '-'.join(tokens[:3])
                    
            probs = classifier.predict_proba(X_sel)                
            pos_indices = [ i for i in range(X_sel.shape[0]) if probs[i,1] > min_score]    
            range_pct = (len(pos_indices) / float(X_sel.shape[0])) * 100.0
            
            concept_series[pos_concept].append( [week_dir, range_pct] )

    return concept_series
Пример #31
0
def prepareData(dataFile, nSample, doPlot=False):

    with open(dataFile, 'rb') as file:
        molDataGroupedChosen = pickle.load(file)

    #nSmilesCodes = 200000
    nSmilesMore = np.min([molDataGroupedChosen.shape[0], int(1.2 * nSample)])
    mask = random.randint(0, molDataGroupedChosen.shape[0], size=nSmilesMore)
    #mask = random.randint(0, molDataGroupedChosen.shape[0], size=nSmilesCodes)
    mask = molDataGroupedChosen.index
    staticFeatures = pd.DataFrame()
    toBeAveraged = [
        'standard_value', 'alogp', 'hba', 'hbd', 'psa', 'rtb', 'full_mwt',
        'qed_weighted'
    ]
    for quantity in toBeAveraged:
        staticFeatures.loc[:, quantity] = (
            molDataGroupedChosen.loc[mask, (quantity, 'min')] +
            molDataGroupedChosen.loc[mask, (quantity, 'max')]) / 2
        staticFeatures.loc[:, quantity].astype(float)
    toBeTaken = ['aromatic_rings', 'heavy_atoms']
    for quantity in toBeTaken:
        staticFeatures.loc[:, quantity] = molDataGroupedChosen.loc[mask,
                                                                   (quantity,
                                                                    'min')]
        staticFeatures.loc[:, quantity].astype(float)
    staticFeatures.loc[:, 'number_of_rings'] = molDataGroupedChosen.loc[
        mask, 'numberOfRings'].astype(float)

    staticFeatures['full_mwt'] = staticFeatures.full_mwt.astype(float)
    staticFeatures['qed_weighted'] = staticFeatures.qed_weighted.astype(float)
    staticFeatures['aromatic_rings'] = staticFeatures.aromatic_rings.astype(
        float)
    staticFeatures['smiles_length'] = molDataGroupedChosen.loc[
        staticFeatures.index, 'canonicalSmiles'].apply(lambda x: len(x))

    # Remove rows with nans
    staticFeatures = staticFeatures.dropna()

    # Filter the smiles from given length range
    staticFeatures = staticFeatures[(staticFeatures['smiles_length'] >= 40)
                                    & (staticFeatures['smiles_length'] <= 60)]

    thres = 100000
    print(staticFeatures[staticFeatures['standard_value'] < thres].shape[0] /
          staticFeatures['standard_value'].shape[0])

    staticFeatures = staticFeatures[staticFeatures['standard_value'] < thres]

    staticFeatures = staticFeatures.sample(nSample)

    allDescriptors = [
        'standard_value', 'alogp', 'hba', 'hbd', 'psa', 'rtb', 'full_mwt',
        'qed_weighted', 'aromatic_rings', 'heavy_atoms', 'number_of_rings',
        'smiles_length'
    ]

    if (doPlot):
        plotIdx = 1
        nRows = np.ceil(len(allDescriptors) / 2)
        fig = plt.figure(figsize=(16, 16))
        for quantity in allDescriptors:
            print(quantity)
            plt.subplot(nRows, 2, plotIdx)
            plt.hist(
                staticFeatures[~staticFeatures[quantity].isnull()][quantity],
                bins=10)
            plt.title(quantity)
            plotIdx += 1

    smilesCodes = molDataGroupedChosen.loc[staticFeatures.index,
                                           'encodedSmiles']

    maxlen = -1
    for code in smilesCodes:
        if len(code) > maxlen:
            maxlen = len(code)
    maxlen

    minlen = 1e6
    for code in smilesCodes:
        if len(code) < minlen:
            minlen = len(code)
    minlen

    # pad the codes to the longest code
    smilesCodes = smilesCodes.apply(
        lambda x: pad_smile(x, max_len=maxlen, padding='right'))

    chars = sorted(list(set(smilesCodes.str.cat(sep=''))))
    print('total chars:', len(chars))
    print(chars)
    char2indices = dict((c, i) for i, c in enumerate(chars))
    indices2char = dict((i, c) for i, c in enumerate(chars))

    dynamicFeatures = np.zeros((len(smilesCodes), maxlen, len(chars)),
                               dtype=np.float)
    print(dynamicFeatures.shape)

    for codeidx, code in enumerate(smilesCodes):
        for charidx, char in enumerate(code):
            dynamicFeatures[codeidx, charidx, char2indices[char]] = 1

    if (doPlot):
        sums = []
        for idx in range(dynamicFeatures.shape[0]):
            sums.append(np.sum(dynamicFeatures[idx, :, :]))
        plt.hist(sums)

    return staticFeatures, dynamicFeatures, char2indices, indices2char
Пример #32
0
# -*- coding: utf-8 -*-

import os
import pickle
import numpy as np
from keras.models import Sequential
import gensim
from keras.layers.recurrent import LSTM, SimpleRNN
from sklearn.model_selection import train_test_split
import theano
theano.config.optimizer = "None"

with open('conversation.pickle', 'rb') as f:
    vec_x, vec_y = pickle.load(f)

vec_x = np.array(vec_x, dtype=np.float64)
vec_y = np.array(vec_y, dtype=np.float64)

x_train, x_test, y_train, y_test = train_test_split(vec_x,
                                                    vec_y,
                                                    test_size=0.2,
                                                    random_state=1)

model = Sequential()
model.add(
    LSTM(output_dim=300,
         input_shape=x_train.shape[1:],
         return_sequences=True,
         init='glorot_normal',
         inner_init='glorot_normal',
         activation='sigmoid'))
Пример #33
0
import tkinter as tk
from tkinter import ttk
import pickle
import bisect
from tkinter import messagebox as msg
from student import Student
from teacher import Teacher
from course import Course

FONT = ("Times New Roman", 16)
currentUser = None

# Opening students.dat to populate students[] -- if this throws an error for you, run initialize_data.py to fix it
with open("students.dat", "rb") as fp:
    students = pickle.load(fp)

# Opening teachers.dat to populate teachers[]
with open("teachers.dat", "rb") as fp:
    teachers = pickle.load(fp)

courses=[]

for student in students:
    for course in student.courses:
        if course.title not in courses:
            bisect.insort(courses, course.title)

print(courses)


class Bookworm(tk.Tk):
Пример #34
0
            dicId[row[0]] = dic
dicIdTrain = {}
with open("/home/ramon/Escritorio/text/pan-ap17-bigdata/training/truth.txt",
          'rt') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=':')
    for row in spamreader:
        dic = {}
        if len(row) > 1:
            dic["sexo"] = row[3]
            dic["nacionalidad"] = row[6]
            dicIdTrain[row[0]] = dic

vocabularioNacional = {}
vocabularioNacionalTest = {}
fileObject = open("tntTrained", 'rb')
b = pickle.load(fileObject)
with open("/home/ramon/Escritorio/text/frases_training.json") as json_data:
    data = json.load(json_data)
    lexicon = {}
    for id in data.keys():
        lexicon = generateBoW(data[id], lexicon, b)
    bolsaPalabras = reduceBow(lexicon, 500)

dicRepeticionesNumeroDePalabras = {}
for i in sorted(bolsaPalabras.keys()):
    dicRepeticionesNumeroDePalabras[math.log(i)] = math.log(
        len(bolsaPalabras[i]))
pdf = pd.DataFrame(list(dicRepeticionesNumeroDePalabras.items()),
                   columns=["repeticiones", "numeroP"])
print(dicRepeticionesNumeroDePalabras)
Пример #35
0
	def __init__(
		self,
		N_d,
		N_s,
		K,
		f_s,
		inp_tgt_type,
		network_type,
		min_snr,
		max_snr,
		snr_inter,
		sample_dir=None,
		ver='VERSION_NAME',
		train_s_list=None,
		train_d_list=None,
		sample_size=None,
		**kwargs
		):
		"""
		Argument/s
			N_d - window duration (samples).
			N_s - window shift (samples).
			K - number of frequency bins.
			f_s - sampling frequency.
			inp_tgt_type - input and target type.
			network_type - network type.
			min_snr - minimum SNR level for training.
			max_snr - maximum SNR level for training.
			stats_dir - path to save sample statistics.
			ver - version name.
			train_s_list - clean-speech training list to compute statistics.
			train_d_list - noise training list to compute statistics.
			sample_size - number of samples to compute the statistics from.
			kwargs - keyword arguments.
		"""
		self.inp_tgt_type = inp_tgt_type
		self.network_type = network_type
		self.min_snr = min_snr
		self.max_snr = max_snr
		self.snr_levels = list(range(self.min_snr, self.max_snr + 1, snr_inter))
		self.ver = ver
		self.train_s_list=train_s_list
		self.train_d_list=train_d_list

		inp_tgt_obj_path = sample_dir + '/' + self.ver + '_inp_tgt.p'
		if os.path.exists(inp_tgt_obj_path):
			with open(inp_tgt_obj_path, 'rb') as f:
				self.inp_tgt = pickle.load(f)
		else:
			self.inp_tgt = inp_tgt_selector(self.inp_tgt_type, N_d, N_s, K, f_s, **kwargs)
			s_sample, d_sample, x_sample, wav_len = self.sample(sample_size, sample_dir)
			self.inp_tgt.stats(s_sample, d_sample, x_sample, wav_len)
			with open(inp_tgt_obj_path, 'wb') as f:
				pickle.dump(self.inp_tgt, f, pickle.HIGHEST_PROTOCOL)

		self.inp = Input(name='inp', shape=[None, self.inp_tgt.n_feat], dtype='float32')
		self.network = network_selector(self.network_type, self.inp,
			self.inp_tgt.n_outp, **kwargs)

		self.model = Model(inputs=self.inp, outputs=self.network.outp)
		self.model.summary()
		if not os.path.exists("log/summary"):
			os.makedirs("log/summary")
		with open("log/summary/" + self.ver + ".txt", "w") as f:
			self.model.summary(print_fn=lambda x: f.write(x + '\n'))
Пример #36
0
    def loadModels(self):
        """
            load models and aux data
        """
        if self.bAttentionLayer:
            self.model = load_model(os.path.join(self.dirName,self.sModelName+'.hd5'),custom_objects={"AttentionDecoder": AttentionDecoder})
        else:
            self.model = load_model(os.path.join(self.dirName,self.sModelName+'.hd5'))

        print('model loaded: %s/%s.hd5' % (self.dirName,self.sModelName))  
        try:
            self.bMultiType,self.maxngram,self.max_features,self.max_sentence_len, self.lnbClasses,self.ltag_vector , self.node_transformer = pickle.load(gzip.open('%s/%s.%s'%(self.dirName,self.sModelName,self.sAux),'r'))
        except:
            self.maxngram,self.max_features,self.max_sentence_len, self.lnbClasses,self.ltag_vector , self.node_transformer = pickle.load(gzip.open('%s/%s.%s'%(self.dirName,self.sModelName,self.sAux),'r'))
            self.bMultiType = False
        print('aux data loaded: %s/%s.%s' % (self.dirName,self.sModelName,self.sAux))        
        print("ngram: %s\tmaxfea=%s\tpadding=%s\tnbclasses=%s" % (self.maxngram,self.max_features,self.max_sentence_len, self.lnbClasses))
        print("multitype model:%s"%(self.bMultiType))
Пример #37
0
test_dataFrame = pd.read_csv('Output/finalTestingData.csv')

#Dropping values that weren't trained on
test_dataFrame = test_dataFrame.drop(['StartTime','SrcAddr','DstAddr'], axis=1)

#Splitting up the test dataframe into one with only features, other with classifications
test_dataFrame_Classification = test_dataFrame[['LabelDisc']].copy()
test_dataFrame = test_dataFrame.drop(['LabelDisc'], axis=1)

#Getting vals to use for testing
test_data = test_dataFrame.values
test_data_class = test_dataFrame_Classification.values

test_features = test_data[0::]
test_results = test_data_class[0::,0]

#Testing
modelsFolder = "Models"
modelFileNames = [modelsFolder + "/" + file for file in os.listdir(modelsFolder) if file.endswith(".pkl")]

model = None
for model_filename in modelFileNames:
    print("\n\nTesting using model "+model_filename+"...")
    with open(model_filename, 'rb') as file:  
        model = pickle.load(file)

    predictionClassification = model.predict(test_features)

    print("Scikit-learn classification report: ")
    print(classification_report(test_dataFrame_Classification.LabelDisc, predictionClassification))
    print ('Accuracy = ' + str(accuracy_score(test_results, predictionClassification)) )
Пример #38
0
def main():
    plotonly = True
    global resolved
    resolved = False
    pvalues = None
    if plotonly:
        print("Loading parameters...")
        if resolved:
            picklepath = "fitvalues_resolved.pickle"
        else:
            picklepath = "fitvalues_merged.pickle"
        with open(picklepath, 'rb') as f:
            pvalues = pickle.load(f)

    massset = set()
    for each in os.listdir("ntuples"):
        if ".root" in each:
            massset.add(int(each.split("w")[0]))
    i = 0
    processes = []
    for each_mass in sorted(massset):
        maxv = int(each_mass * 2.5)
        if maxv > 3000:
            maxv = 3000
        i += 1
        # plot_mass_modified(each_mass, linspace(0, int(maxv), 25))
        if resolved:
            bins_resolved = [
                100, 180, 210.0, 230.0, 250.0, 270.0, 290.0, 310.0, 330.0,
                350.0, 370.0, 390.0, 410.0, 430.0, 450.0, 470.0, 490.0, 510.0,
                530.0, 550.0, 570.0, 590.0, 610.0, 640.0, 670.0, 700.0, 730.0,
                760.0, 790.0, 820.0, 870.0, 920.0, 970.0, 1020.0, 1070.0,
                1120.0, 1170.0, 1220.0, 1280.0, 1340.0, 1400.0, 1460.0, 1570.0,
                1640.0, 1720.0, 1810, 1910, 2000.0, 2100, 2200, 2300, 2400,
                2500, 2600, 2700
            ]
            bins_merged = [
                180, 260.0, 320.0, 390.0, 470.0, 550.0, 640.0, 740.0, 850.0,
                970.0, 1100.0, 1250.0, 1410.0, 1590.0, 1790.0, 2010.0, 2250.0,
                2510.0, 3000
            ]
        else:
            bins_resolved = linspace(0, int(maxv), 15)
            if maxv > 1000:
                bins_resolved = linspace(0, int(maxv), 30)
        # if int(maxv) < 1000:
        # bins_resolved = linspace(0, int(maxv), 25)
        # else:
        #     bins_resolved = linspace(0, int(maxv), 40)
        # t = multiprocessing.Process(target=plot_mass_modified, args=(each_mass, linspace(0, int(maxv), 25)))
        t = multiprocessing.Process(target=plot_mass_modified,
                                    args=(each_mass, np.array(bins_resolved),
                                          pvalues))
        processes.append(t)
        t.start()
        if (i + 1) % 10 == 0:
            for each in processes:
                each.join()
            processes = []
    for each in processes:
        each.join()

    if not plotonly:
        paras = {}
        allfiles = os.listdir("pickle")
        for each in allfiles:
            mass = int(each.replace("fitvalues", "").replace(".pickle", ""))
            with open("pickle/" + each, 'rb') as f:
                pvalues = pickle.load(f)
                paras[mass] = pvalues
        with open('fitvalues.pickle', 'wb') as f:
            pickle.dump(paras, f)
		#get only the boundary vertices
		boundaryPoints, correspondenceMap = getContourVertices(mesh, coordinates, coordinateMap)

		#plot the points to verify
		plt.scatter(boundaryPoints[:,0], boundaryPoints[:,1])
		plt.show()

		return boundaryPoints, correspondenceMap
	else:
		print (args[0]+' plane not defined.')


#get data from Vertices.data file
dataPath = 'python_data/'
f = open(dataPath+'Vertices_new.data', 'rb')
vertices = pickle.load(f)
f.close()
print(len(vertices))
boundaryPoints, correspondenceMap = project (vertices, 'AP')

#save boundary poins and correspondence map in python data
f = open(dataPath+'AP_contourPoints.data','wb')
pickle.dump(boundaryPoints, f)
f.close()

f = open(dataPath+'AP_correspondenceMap.data','wb')
pickle.dump(correspondenceMap, f)
f.close()


boundaryPoints, correspondenceMap = project (vertices, 'ML')
Пример #40
0
​
​
# Creamos una clase de prueba
class Persona:
    def __init__(self, nombre, apellido):
        self.nombre = nombre
        self.apellido = apellido
​
    def __str__(self):
        return "%s, %s" % (self.apellido, self.nombre)
​
​
# Creamos la lista con los objetos
nombres = ["Hector", "Mario", "Marta"]
apellidos = ["Fernandez", "Guzman", "Kicillof"]
personas = []
​
for i in range(len(nombres)):
    p = Persona(nombres[i], apellidos[i])
    personas.append(p)
​
# Escribimos la lista en el fichero con pickle
with open('personas.pck', 'wb') as f:
    pickle.dump(personas, f)
​
# Leemos la lista del fichero con pickle
with open('personas.pck', 'rb') as f:
    personas = pickle.load(f)
​
for p in personas:
    print(p)
Пример #41
0
# take raw data and pre-process it for the model.

from flask import Flask, request
import pandas as pd
import numpy as np
import json
import pickle
import os

app = Flask(__name__)
# Load Model and Scaler Files
model_path = os.path.join(os.path.pardir,os.path.pardir,'models')
model_filepath = os.path.join(model_path, 'lr_model.pkl')
scaler_filepath = os.path.join(model_path, 'lr_scaler.pkl')

model = pickle.load(open(model_filepath, 'rb'))
scaler = pickle.load(open(scaler_filepath, 'rb'))

# columns
columns = [ u'Age', u'Fare', u'FamilySize', \
       u'IsMother', u'IsMale', u'Deck_A', u'Deck_B', u'Deck_C', u'Deck_D', \
       u'Deck_E', u'Deck_F', u'Deck_G', u'Deck_Z', u'Pclass_1', u'Pclass_2', \
       u'Pclass_3', u'Title_Lady', u'Title_Master', u'Title_Miss', u'Title_Mr', \
       u'Title_Mrs', u'Title_Officer', u'Title_Sir', u'Fare_Bin_very_low', \
       u'Fare_Bin_low', u'Fare_Bin_high', u'Fare_Bin_very_high', u'Embarked_C', \
       u'Embarked_Q', u'Embarked_S', u'AgeState_Adult', u'AgeState_Child']


@app.route('/api', methods=['POST'])
def make_prediction():
    # read json object and conver to json string
Пример #42
0
    write_sbml_model,
)

config = cobra.Configuration()
config.solver = "glpk"

if __name__ == "__main__":
    # ecoli
    ecoli_model = read_sbml_model("iJO1366.xml.gz")
    with open("iJO1366.pickle", "wb") as outfile:
        dump(ecoli_model, outfile, protocol=2)

    # salmonella
    salmonella = read_sbml_model("salmonella.xml")
    with open("salmonella.genes", "rb") as infile:
        gene_names = load(infile)
    for gene in salmonella.genes:
        gene.name = gene_names[gene.id]
    with open("salmonella.media", "rb") as infile:
        salmonella.media_compositions = load(infile)
    with open("salmonella.pickle", "wb") as outfile:
        dump(salmonella, outfile, protocol=2)

    # create mini model from textbook
    textbook = read_sbml_model("textbook.xml.gz")
    mini = cobra.Model("mini_textbook")
    mini.compartments = textbook.compartments

    for r in textbook.reactions:
        if r.id in (
                "GLCpts",
Пример #43
0
    def learn(self,
              to_predict,
              from_data,
              test_from_data=None,
              group_by=None,
              window_size=None,
              order_by=[],
              sample_margin_of_error=CONFIG.DEFAULT_MARGIN_OF_ERROR,
              ignore_columns=[],
              stop_training_in_x_seconds=None,
              stop_training_in_accuracy=None,
              backend='lightwood',
              rebuild_model=True,
              use_gpu=False,
              disable_optional_analysis=False,
              equal_accuracy_for_all_output_categories=False,
              output_categories_importance_dictionary=None,
              unstable_parameters_dict={}):
        """
        Learn to predict a column or columns from the data in 'from_data'

        Mandatory arguments:
        :param to_predict: what column or columns you want to predict
        :param from_data: the data that you want to learn from, this can be either a file, a pandas data frame, or url or a mindsdb data source

        Optional arguments:
        :param test_from_data: If you would like to test this learning from a different data set

        Optional Time series arguments:
        :param order_by: this order by defines the time series, it can be a list. By default it sorts each sort by column in ascending manner, if you want to change this pass a touple ('column_name', 'boolean_for_ascending <default=true>')
        :param group_by: This argument tells the time series that it should learn by grouping rows by a given id
        :param window_size: The number of samples to learn from in the time series

        Optional data transformation arguments:
        :param ignore_columns: it simply removes the columns from the data sources

        Optional sampling parameters:
        :param sample_margin_of_error (DEFAULT 0): Maximum expected difference between the true population parameter, such as the mean, and the sample estimate.

        Optional debug arguments:
        :param stop_training_in_x_seconds: (default None), if set, you want training to finish in a given number of seconds

        :return:
        """

        from_ds = getDS(from_data)
        test_from_ds = test_from_data if test_from_data is None else getDS(
            test_from_data)

        transaction_type = TRANSACTION_LEARN
        sample_confidence_level = 1 - sample_margin_of_error

        # lets turn into lists: predict, order_by and group by
        predict_columns = [to_predict
                           ] if type(to_predict) != type([]) else to_predict
        group_by = group_by if type(group_by) == type(
            []) else [group_by] if group_by else []
        order_by = order_by if type(order_by) == type(
            []) else [order_by] if order_by else []

        if len(predict_columns) == 0:
            error = 'You need to specify a column to predict'
            self.log.error(error)
            raise ValueError(error)

        # lets turn order by into tuples if not already
        # each element ('column_name', 'boolean_for_ascending <default=true>')
        order_by = [(col_name, True) if type(col_name) != type(
            ()) else col_name for col_name in order_by]

        is_time_series = True if len(order_by) > 0 else False

        heavy_transaction_metadata = {}
        heavy_transaction_metadata['name'] = self.name
        heavy_transaction_metadata['from_data'] = from_ds
        heavy_transaction_metadata['test_from_data'] = test_from_ds
        heavy_transaction_metadata['bucketing_algorithms'] = {}
        heavy_transaction_metadata['predictions'] = None
        heavy_transaction_metadata['model_backend'] = backend

        light_transaction_metadata = {}
        light_transaction_metadata['version'] = str(__version__)
        light_transaction_metadata['name'] = self.name
        light_transaction_metadata['data_preparation'] = {}
        light_transaction_metadata['predict_columns'] = predict_columns
        light_transaction_metadata['model_columns_map'] = from_ds._col_map
        light_transaction_metadata['model_group_by'] = group_by
        light_transaction_metadata['model_order_by'] = order_by
        light_transaction_metadata['model_is_time_series'] = is_time_series
        light_transaction_metadata['data_source'] = from_data
        light_transaction_metadata['type'] = transaction_type
        light_transaction_metadata['ignore_columns'] = ignore_columns
        light_transaction_metadata['window_size'] = window_size
        light_transaction_metadata[
            'sample_margin_of_error'] = sample_margin_of_error
        light_transaction_metadata[
            'sample_confidence_level'] = sample_confidence_level
        light_transaction_metadata[
            'stop_training_in_x_seconds'] = stop_training_in_x_seconds
        light_transaction_metadata[
            'stop_training_in_accuracy'] = stop_training_in_accuracy
        light_transaction_metadata['rebuild_model'] = rebuild_model
        light_transaction_metadata['model_accuracy'] = {
            'train': {},
            'test': {}
        }
        light_transaction_metadata['column_importances'] = None
        light_transaction_metadata['columns_buckets_importances'] = None
        light_transaction_metadata['columnless_prediction_distribution'] = None
        light_transaction_metadata[
            'all_columns_prediction_distribution'] = None
        light_transaction_metadata['use_gpu'] = use_gpu
        light_transaction_metadata['malformed_columns'] = {
            'names': [],
            'indices': []
        }
        light_transaction_metadata[
            'disable_optional_analysis'] = disable_optional_analysis
        light_transaction_metadata['validation_set_accuracy'] = None
        light_transaction_metadata['lightwood_data'] = {}
        light_transaction_metadata['ludwig_data'] = {}
        light_transaction_metadata['weight_map'] = {}
        light_transaction_metadata['confusion_matrices'] = {}
        light_transaction_metadata[
            'equal_accuracy_for_all_output_categories'] = equal_accuracy_for_all_output_categories
        light_transaction_metadata['output_categories_importance_dictionary'] = output_categories_importance_dictionary if output_categories_importance_dictionary is not None else {}

        if 'skip_model_training' in unstable_parameters_dict:
            light_transaction_metadata[
                'skip_model_training'] = unstable_parameters_dict[
                    'skip_model_training']
        else:
            light_transaction_metadata['skip_model_training'] = False

        if 'skip_stats_generation' in unstable_parameters_dict:
            light_transaction_metadata[
                'skip_stats_generation'] = unstable_parameters_dict[
                    'skip_stats_generation']
        else:
            light_transaction_metadata['skip_stats_generation'] = False

        if 'always_use_model_prediction' in unstable_parameters_dict:
            light_transaction_metadata[
                'always_use_model_prediction'] = unstable_parameters_dict[
                    'always_use_model_prediction']
        else:
            light_transaction_metadata['always_use_model_prediction'] = False

        if 'optimize_model' in unstable_parameters_dict:
            light_transaction_metadata[
                'optimize_model'] = unstable_parameters_dict['optimize_model']
        else:
            light_transaction_metadata['optimize_model'] = False

        if rebuild_model is False:
            old_lmd = {}
            for k in light_transaction_metadata:
                old_lmd[k] = light_transaction_metadata[k]

            old_hmd = {}
            for k in heavy_transaction_metadata:
                old_hmd[k] = heavy_transaction_metadata[k]

            with open(
                    os.path.join(
                        CONFIG.MINDSDB_STORAGE_PATH,
                        light_transaction_metadata['name'] +
                        '_light_model_metadata.pickle'), 'rb') as fp:
                light_transaction_metadata = pickle.load(fp)

            with open(
                    os.path.join(
                        CONFIG.MINDSDB_STORAGE_PATH,
                        heavy_transaction_metadata['name'] +
                        '_heavy_model_metadata.pickle'), 'rb') as fp:
                heavy_transaction_metadata = pickle.load(fp)

            for k in [
                    'data_preparation', 'rebuild_model', 'data_source', 'type',
                    'ignore_columns', 'sample_margin_of_error',
                    'sample_confidence_level', 'stop_training_in_x_seconds',
                    'stop_training_in_accuracy'
            ]:
                if old_lmd[k] is not None:
                    light_transaction_metadata[k] = old_lmd[k]

            for k in ['from_data', 'test_from_data']:
                if old_hmd[k] is not None:
                    heavy_transaction_metadata[k] = old_hmd[k]
        Transaction(session=self,
                    light_transaction_metadata=light_transaction_metadata,
                    heavy_transaction_metadata=heavy_transaction_metadata,
                    logger=self.log)
Пример #44
0
from flask import Flask, render_template, request
import jsonify
import requests
import pickle
import numpy as np
import sklearn
from sklearn.preprocessing import StandardScaler

app = Flask(__name__)
model = pickle.load(open('US_prediction.pkl', 'rb'))


@app.route('/', methods=['GET'])
def Home():
    return render_template('index.html')


standard_to = StandardScaler()


@app.route("/predict", methods=['POST'])
def predict():
    try:
        if request.method == 'POST':
            GRE = float(request.form['GRE'])
            TOEFL = float(request.form['TOEFL'])
            UR = float(request.form['UR'])
            SOP = float(request.form['SOP'])
            LOR = float(request.form['LOR'])
            GPA = float(request.form['GPA'])
            RES = float(request.form['RES'])
Пример #45
0
from keras.utils.np_utils import to_categorical
# from keras.utils import print_summary
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
# optimizers: SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam, TFOptimizer
from keras.optimizers import RMSprop, Adadelta
from keras.callbacks import ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator

import matplotlib.pyplot as plt
import seaborn as sns

# ---------------------------Data Preparation---------------------------------
# load the dataset
file = gzip.open('mnist.pkl.gz','rb')
train_set, valid_set, test_set = pk.load(file,encoding='latin1')
file.close()

x_train = train_set[0]
# print(x_train.shape) #(50000, 784)
y_train = train_set[1]
del train_set # free some space

x_test = test_set[0]
# print(x_test.shape) #(10000, 784)
y_test = test_set[1]
del test_set

x_val = valid_set[0]
y_val = valid_set[1]
del valid_set
Пример #46
0
    def get_model_data(self, model_name):
        with open(
                os.path.join(CONFIG.MINDSDB_STORAGE_PATH,
                             f'{model_name}_light_model_metadata.pickle'),
                'rb') as fp:
            lmd = pickle.load(fp)
        # ADAPTOR CODE
        amd = {}

        if lmd['current_phase'] == MODEL_STATUS_TRAINED:
            amd['status'] = 'complete'
        elif lmd['current_phase'] == MODEL_STATUS_ERROR:
            amd['status'] = 'error'
        else:
            amd['status'] = 'training'

        # Shared keys
        for k in [
                'name', 'version', 'is_active', 'data_source', 'predict',
                'current_phase', 'train_end_at', 'updated_at', 'created_at',
                'data_preparation', 'validation_set_accuracy'
        ]:
            if k == 'predict':
                amd[k] = lmd['predict_columns']
            elif k in lmd:
                amd[k] = lmd[k]
                if k == 'validation_set_accuracy':
                    if lmd['validation_set_accuracy'] is not None:
                        amd['accuracy'] = round(lmd['validation_set_accuracy'],
                                                3)
                    else:
                        amd['accuracy'] = None
            else:
                amd[k] = None

        amd['data_analysis'] = {
            'target_columns_metadata': [],
            'input_columns_metadata': []
        }

        amd['model_analysis'] = []

        for col in lmd['model_columns_map'].keys():
            if col in lmd['malformed_columns']['names']:
                continue

            try:
                icm = self._adapt_column(lmd['column_stats'][col], col)
            except Exception as e:
                icm = {'column_name': col}
                #continue

            amd['force_vectors'] = {}
            if col in lmd['predict_columns']:
                # Histograms for plotting the force vectors
                if 'all_columns_prediction_distribution' in lmd and lmd[
                        'all_columns_prediction_distribution'] is not None:
                    amd['force_vectors'][col] = {}
                    amd['force_vectors'][col][
                        'normal_data_distribution'] = lmd[
                            'all_columns_prediction_distribution'][col]
                    amd['force_vectors'][col]['normal_data_distribution'][
                        'type'] = 'categorical'

                    amd['force_vectors'][col]['missing_data_distribution'] = {}
                    for missing_column in lmd[
                            'columnless_prediction_distribution'][col]:
                        amd['force_vectors'][col]['missing_data_distribution'][
                            missing_column] = lmd[
                                'columnless_prediction_distribution'][col][
                                    missing_column]
                        amd['force_vectors'][col]['missing_data_distribution'][
                            missing_column]['type'] = 'categorical'

                    icm['importance_score'] = None
                amd['data_analysis']['target_columns_metadata'].append(icm)

                if 'confusion_matrices' in lmd and col in lmd[
                        'confusion_matrices']:
                    confusion_matrix = lmd['confusion_matrices'][col]
                else:
                    confusion_matrix = None
                # Model analysis building for each of the predict columns
                mao = {
                    'column_name': col,
                    'overall_input_importance': {
                        "type": "categorical",
                        "x": [],
                        "y": []
                    },
                    "train_accuracy_over_time": {
                        "type": "categorical",
                        "x": [],
                        "y": []
                    },
                    "test_accuracy_over_time": {
                        "type": "categorical",
                        "x": [],
                        "y": []
                    },
                    "accuracy_histogram": {
                        "x": [],
                        "y": [],
                        'x_explained': []
                    },
                    "confusion_matrix": confusion_matrix
                }

                # This is a check to see if model analysis has run on this data
                if 'model_accuracy' in lmd and lmd[
                        'model_accuracy'] is not None and 'train' in lmd[
                            'model_accuracy'] and 'combined' in lmd[
                                'model_accuracy']['train'] and lmd[
                                    'model_accuracy']['train'][
                                        'combined'] is not None:
                    train_acc = lmd['model_accuracy']['train']['combined']
                    test_acc = lmd['model_accuracy']['test']['combined']

                    for i in range(0, len(train_acc)):
                        mao['train_accuracy_over_time']['x'].append(i)
                        mao['train_accuracy_over_time']['y'].append(
                            train_acc[i])

                    for i in range(0, len(test_acc)):
                        mao['test_accuracy_over_time']['x'].append(i)
                        mao['test_accuracy_over_time']['y'].append([i])

                if 'model_accuracy' in lmd and lmd[
                        'model_accuracy'] is not None and lmd[
                            'column_importances'] is not None:
                    mao['accuracy_histogram']['x'] = [
                        f'{x}'
                        for x in lmd['accuracy_histogram'][col]['buckets']
                    ]
                    mao['accuracy_histogram']['y'] = lmd['accuracy_histogram'][
                        col]['accuracies']

                    for output_col_bucket in lmd[
                            'columns_buckets_importances'][col]:
                        x_explained_member = []
                        for input_col in lmd['columns_buckets_importances'][
                                col][output_col_bucket]:
                            stats = lmd['columns_buckets_importances'][col][
                                output_col_bucket][input_col]
                            adapted_sub_incol = self._adapt_column(
                                stats, input_col)
                            x_explained_member.append(adapted_sub_incol)
                        mao['accuracy_histogram']['x_explained'].append(
                            x_explained_member)

                    for icol in lmd['model_columns_map'].keys():
                        if icol in lmd['malformed_columns']['names']:
                            continue
                        if icol not in lmd['predict_columns']:
                            try:
                                mao['overall_input_importance']['x'].append(
                                    icol)
                                mao['overall_input_importance']['y'].append(
                                    round(lmd['column_importances'][icol], 1))
                            except:
                                print(
                                    f'No column importances found for {icol} !'
                                )

                amd['model_analysis'].append(mao)
            else:
                if 'column_importances' in lmd and lmd[
                        'column_importances'] is not None:
                    icm['importance_score'] = lmd['column_importances'][col]
                amd['data_analysis']['input_columns_metadata'].append(icm)

        return amd
Пример #47
0
    for i in new_x.split():
        for y in company_list:
            if similar(i, y) > threshold:
                first_list.append(y)
                print(y, i)
            else:
                first_list.append('')
    result_list.append(first_list)


with open('datafile_reuters.pkl', 'wb') as f:
    print('pickle dumping...')
    pickle.dump(result_list, f)

with open('datafile_reuters.pkl', 'rb') as f:
    newlist = pickle.load(f)

df_new = pd.DataFrame()
df_i = []
df_j = []
df_k = []

print('begin appending...')
for new_name, new_file, new_path in zip(newlist, all_files, all_paths):
    for i in set(new_name):
        if i != '':
            df_i.append(i)
        else:
            df_i.append('')
        df_j.append(new_file)
        df_k.append(new_path)
Пример #48
0
import pickle
# filename should be .sav format
filename = ''
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.most_similar(positive=[],negative=[], topn=5)
print(result)
Пример #49
0
            if i < len(cap)-1:
                vhist[i, :] = np.logical_or(vhist[i, :], vhist[i-1, :])
                vhist[i, ii_1] = 1

            nxts.append(nxt)
            imgs.append(img)
            seqs.append(seq)

        vhists.extend(vhist)

    return imgs, curs, nxts, seqs, vhists

if __name__ == '__main__':


    dec_map = pickle.load(open("dec_map.pkl", 'rb'))
    enc_map = pickle.load(open("enc_map.pkl", 'rb'))
#"/general/home/manish.singhal/Image-Captioning-master/encoded_images_inceptionV3.p"
    img_train = pickle.load(open("../encoded_images_inceptionV3.p", 'rb'))
    
    img_test = pickle.load(open("../encoded_images_test_inceptionV3.p", 'rb'))
    print (len(img_train))
    df_cap = pd.read_csv("../processed_files/flickr8k_training_dataset.txt", delimiter = '\t')

    vocab_size = len(dec_map)
    print("vocab sixe is",vocab_size)
    embedding_matrix = generate_embedding_matrix('pre_trained/glove.6B.100d.txt', dec_map)
    model = image_caption_model(vocab_size=vocab_size, embedding_matrix=embedding_matrix)

    if len(sys.argv) >= 2:
        print('load weights from : {}'.format(sys.argv[1]))
Пример #50
0
def extract_features(data_folder, filename, ini, end):
    df_data_x = pickle.load(open(data_folder + filename, "rb"))
    _, sig_sz = df_data_x.shape
    df_data_x = df_data_x[range(ini, min(end, sig_sz))]

    sc_mean = pd.DataFrame(sc_mean_(df_data_x), columns=['sc_mean'])
    sc_median = pd.DataFrame(sc_median_(df_data_x),columns=['sc_median'])
    sc_std = pd.DataFrame(sc_std_(df_data_x),columns=['sc_std'])
    sc_min = pd.DataFrame(sc_min_(df_data_x),columns=['sc_min'])
    sc_max = pd.DataFrame(sc_max_(df_data_x),columns=['sc_max'])
    sc_range = pd.DataFrame(sc_range_(sc_max,sc_min),columns=['sc_range'])
    sc_minRatio = pd.DataFrame(sc_minRatio_(df_data_x,sc_min),columns=['sc_minRatio'])
    sc_maxRatio = pd.DataFrame(sc_maxRatio_(df_data_x,sc_max),columns=['sc_maxRatio'])

    sc1Diff_mean = pd.DataFrame( sc1Diff_mean_(df_data_x),columns=['sc1Diff_mean'])
    sc1Diff_median = pd.DataFrame( sc1Diff_median_(df_data_x),columns=['sc1Diff_median'] )
    sc1Diff_std = pd.DataFrame( sc1Diff_std_(df_data_x),columns=['sc1Diff_std'])
    sc1Diff_min = pd.DataFrame( sc1Diff_min_(df_data_x),columns=['sc1Diff_min'])
    sc1Diff_max = pd.DataFrame( sc1Diff_max_(df_data_x),columns=['sc1Diff_max'])
    sc1Diff_range = pd.DataFrame( sc1Diff_range_(sc1Diff_max,sc1Diff_min),columns=['sc1Diff_range'])
    sc1Diff_minRatio = sc1Diff_minRatio_(df_data_x,sc1Diff_min)
    sc1Diff_minRatio.columns=['sc1Diff_minRatio']
    sc1Diff_maxRatio = sc1Diff_maxRatio_(df_data_x,sc1Diff_max)
    sc1Diff_maxRatio.columns=['sc1Diff_maxRatio']

    sc2Diff_std = pd.DataFrame( sc2Diff_std_(df_data_x),columns=['sc2Diff_std'] )
    sc2Diff_min = pd.DataFrame( sc2Diff_min_(df_data_x),columns=['sc2Diff_min'] )
    sc2Diff_max = pd.DataFrame( sc2Diff_max_(df_data_x),columns=['sc2Diff_max'] )
    sc2Diff_range = pd.DataFrame(sc2Diff_range_(sc2Diff_max,sc2Diff_min),columns=['sc2Diff_range'])
    sc2Diff_minRatio = sc2Diff_minRatio_(df_data_x,sc2Diff_min)
    sc2Diff_minRatio.columns = ['sc2Diff_minRatio']
    sc2Diff_maxRatio = sc2Diff_maxRatio_(df_data_x,sc2Diff_max)
    sc2Diff_maxRatio.columns = ['sc2Diff_maxRatio']

    scfft_df = scfft_(df_data_x)
    scfft_mean = pd.DataFrame( scfft_mean_(scfft_df),columns=['scfft_mean'])
    scfft_median = pd.DataFrame( scfft_median_(scfft_df),columns=['scfft_median'])
    scfft_std = pd.DataFrame( scfft_std_(scfft_df),columns=['scfft_std'])
    scfft_min = pd.DataFrame( scfft_min_(scfft_df),columns=['scfft_min'])
    scfft_max = pd.DataFrame( scfft_max_(scfft_df),columns=['scfft_max'])
    scfft_range = pd.DataFrame( scfft_range_(scfft_max,scfft_min),columns=['scfft_range'])

    feature_list = ['sc_mean','sc_median','sc_std','sc_min','sc_max','sc_range',
                    'sc_minRatio','sc_maxRatio','sc1Diff_mean','sc1Diff_median',
                    'sc1Diff_std','sc1Diff_min','sc1Diff_max','sc1Diff_range',
                    'sc1Diff_minRatio','sc1Diff_maxRatio','sc2Diff_std',
                    'sc2Diff_min','sc2Diff_max','sc2Diff_range','sc2Diff_minRatio',
                    'sc2Diff_maxRatio','scfft_mean','scfft_median','scfft_std',
                    'scfft_min','scfft_max','scfft_range']

    temp_feature_df = pd.DataFrame()
    for i in feature_list:
        temp_feature_df = pd.concat([locals()[i], temp_feature_df], axis=1)

    # rename columns with name of channel
    tmp = filename.split('_')
    ch_id = tmp[0] + "_" + tmp[1]
    tmp_cols = temp_feature_df.columns
    temp_feature_df.columns = [ch_id + "_" + name for name in tmp_cols]
    pickle.dump(temp_feature_df, open(data_folder + "feat_" + filename, "wb"))

    print('--- GSR features ---')
    print(temp_feature_df.shape)
    return temp_feature_df
Пример #51
0
    params  = {
        'database': os.path.join(
            'tests',
            'data',
            'BSA.fasta'
        ),
    },
    force  = False
)

scan_rt_lookup = pickle.load(
    open(
        os.path.join(
            'tests',
            'data',
            '_test_ursgal_lookup.pkl'
        )
        ,
        'rb'
    )
)

unify_csv_main = R.unodes['unify_csv_1_0_0']['class'].import_engine_as_python_function()

input_csv = os.path.join(
    'tests',
    'data',
    'omssa_2_1_9',
    'test_BSA1_omssa_2_1_9.csv'
)
output_csv = os.path.join(
Пример #52
0
def loadPickle(fileName):
    with open(fileName, mode="rb") as f:
        return pickle.load(f)
Пример #53
0
def verify(source_uri, renditions, do_profiling, max_samples, model_dir, model_name):
    """
    Function that returns the predicted compliance of a list of renditions
    with respect to a given source file using a specified model.
    """

    total_start = time.clock()
    total_start_user = time.time()

    source_video, source_audio, video_available, audio_available = retrieve_video_file(source_uri)

    if video_available:
    # Prepare source and renditions for verification
        source = {'path': source_video,
                  'audio_path' : source_audio,
                  'video_available': video_available,
                  'audio_available': audio_available,
                  'uri': source_uri}

        # Create a list of preverified renditions
        pre_verified_renditions = []
        for rendition in renditions:
            pre_verification = pre_verify(source, rendition)
            if rendition['video_available']:
                pre_verified_renditions.append(pre_verification)

        # Cleanup the audio file generated to avoid cluttering
        if os.path.exists(source['audio_path']):
            os.remove(source['audio_path'])

        # Configure model for inference
        model_name = 'OCSVM'
        scaler_type = 'StandardScaler'
        learning_type = 'UL'
        loaded_model = pickle.load(open('{}/{}.pickle.dat'.format(model_dir,
                                                                  model_name), 'rb'))
        loaded_scaler = pickle.load(open('{}/{}_{}.pickle.dat'.format(model_dir,
                                                                      learning_type,
                                                                      scaler_type), 'rb'))

        # Open model configuration file
        with open('{}/param_{}.json'.format(model_dir, model_name)) as json_file:
            params = json.load(json_file)
            features = params['features']

        # Remove non numeric features from feature list
        non_temporal_features = ['attack_ID', 'title', 'attack', 'dimension', 'size', 'size_dimension_ratio']
        metrics_list = []
        for metric in features:
            if metric not in non_temporal_features:
                metrics_list.append(metric.split('-')[0])

        # Initialize times for assets processing profiling
        start = time.clock()
        start_user = time.time()

        # Instantiate VideoAssetProcessor class
        asset_processor = VideoAssetProcessor(source,
                                              pre_verified_renditions,
                                              metrics_list,
                                              do_profiling,
                                              max_samples,
                                              features)

        # Record time for class initialization
        initialize_time = time.clock() - start
        initialize_time_user = time.time() - start_user

        # Register times for asset processing
        start = time.clock()
        start_user = time.time()

        # Assemble output dataframe with processed metrics
        metrics_df, pixels_df, dimensions_df = asset_processor.process()

        # Record time for processing of assets metrics
        process_time = time.clock() - start
        process_time_user = time.time() - start_user

        # Normalize input data using the associated scaler
        x_renditions = np.asarray(metrics_df)
        x_renditions = loaded_scaler.transform(x_renditions)
       
        # Make predictions for given data
        start = time.clock()
        y_pred = loaded_model.decision_function(x_renditions)
        prediction_time = time.clock() - start

        # Add predictions to rendition dictionary
        i = 0
        for _, rendition in enumerate(renditions):
            if rendition['video_available']:
                rendition.pop('path', None)
                rendition['tamper'] = np.round(y_pred[i], 6)
                # Append the post-verification of resolution and pixel count
                if 'pixels' in rendition:
                    rendition['pixels_post_verification'] = float(rendition['pixels']) / pixels_df[i]
                if 'resolution' in rendition:
                    rendition['resolution']['height_post_verification'] = float(rendition['resolution']['height']) / int(dimensions_df[i].split(':')[0])
                    rendition['resolution']['width_post_verification'] = float(rendition['resolution']['width']) / int(dimensions_df[i].split(':')[1])
                i += 1

        if do_profiling:
            print('Features used:', features)
            print('Total CPU time:', time.clock() - total_start)
            print('Total user time:', time.time() - total_start_user)
            print('Initialization CPU time:', initialize_time)
            print('Initialization user time:', initialize_time_user)

            print('Process CPU time:', process_time)
            print('Process user time:', process_time_user)
            print('Prediction CPU time:', prediction_time)

    return renditions
Пример #54
0
def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)
Пример #55
0
def main(args):

    # Get data file
    dt = h5py.File(args.prior, 'r')

    # Extract data
    face_basis = get_face_basis(dt, args.size_id, args.size_exp)

    # SECTION 2
    print("\nSection 2...")

    # Sample alpha and delta
    print("\tSampling latent variables")

    if args.latent is None:
        alpha = np.random.uniform(-1, 1, args.size_id).astype(np.float32)
        delta = np.random.uniform(-1, 1, args.size_exp).astype(np.float32)
    else:
        with open(args.latent, "rb") as f:
            (alpha, delta), _ = pickle.load(f)

            alpha, delta = alpha.detach().numpy(), delta.detach().numpy()

    # Generate face from respective alpha and delta
    print("\tGenerating face 3D point-cloud")
    face_3D = face_basis(alpha, delta)

    # Save object for later visualization
    print("\tSaving face data")
    save_obj(
        args.face_3D_file,
        face_3D,
        face_basis.color,
        face_basis.mesh,
    )
    print("\tSaved to ", args.face_3D_file)

    if args.up_to is not None and args.up_to == "3D":
        return

    # SECTION 3
    print("\nSection 3...")
    print("Rotating face")

    # Transform face
    print("\tTransforming face with omega: ", args.omega, " and t: ", args.t)
    face_transform = FaceTransform()
    face_wt = face_transform(face_3D, args.omega, args.t)

    print("\tSaving rotated face data")
    save_obj(args.face_wt_file, face_wt, face_basis.color, face_basis.mesh)
    print("\tSaved to ", args.face_wt_file)

    if args.up_to is not None and args.up_to == "rotate":
        return

    print("Applying camera projection")

    # Init camera
    print("\tInitializing camera with FOV: ", args.fov, " aspect ratio: ",
          args.aratio, " near-far clips: ", args.near_far)
    camera = Camera(args.fov, args.aratio, args.near_far)

    # Generate image from face
    print("\tGenerating uv image")
    face_uv = camera(face_wt)

    print("\tNormalizing uv image (z coordinate)")
    uv_normalizer = UVNormalizer()
    face_uv_n = uv_normalizer(face_uv)

    # Extracting landmark points
    print("\tExtracting landmark pointsi from", args.landmarks)
    lmks = get_landmarks(args.landmarks)

    face_lmks = face_uv_n[lmks, :2]

    # Generate image
    plt.scatter(face_lmks[:, 0], face_lmks[:, 1])
    plt.axis('equal')
    plt.savefig(args.face_uv_file + ".png", dpi=900)
    print("\tSaved to ", args.face_uv_file + ".png")

    save_obj(args.face_uv_file + ".obj", face_uv_n, face_basis.color,
             face_basis.mesh)
    print("\tSaved to ", args.face_uv_file + ".obj")

    if args.up_to is not None and args.up_to == "project":
        return
# Create model
def multilayer_perceptron(_X, _weights, _biases):
    #Hidden layer with RELU activation
    layer_1 = tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1']))
    #Hidden layer with sigmoid activation
    layer_2 = tf.nn.relu(
        tf.add(tf.matmul(layer_1, _weights['h2']), _biases['b2']))
    # layer_3 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_2, _weights['h3']), _biases['b3'])), 0.5)
    return tf.matmul(layer_2, _weights['out']) + _biases['out']


print("Loading saved Weights for layer 1 ...")
file_ID = "./data/parameters_mfcc_1.pkl"
f = open(file_ID, "rb")
W = pickle.load(f)
b = pickle.load(f)
f.close()

weights = {
    'h1': tf.Variable(W['h1']),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    # 'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
    'b1': tf.Variable(b['b1']),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    # 'b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}
Пример #57
0
        print(self.played_memory)


if __name__ == "__main__":
    train_game = 20000
    lr = 0.75
    y = 0.90
    q1cumul_reward_list = []
    q1actions_list = []
    q1states_list = []
    q2cumul_reward_list = []
    q2actions_list = []
    q2states_list = []
    winner1 = []
    with open('player.pkl', 'rb') as model:
        q_player = pickle.load(model)
    q_player1 = q_player
    q_player2 = q_player(9)
    for i in range(train_game):
        game = Game(3, 3)
        grille = game.get_grille()
        q1actions = []
        q1states = [grille]
        q1cumul_reward = 0
        q2actions = []
        q2states = [grille]
        q2cumul_reward = 0
        while game.end == False:
            grille = game.get_grille()
            choice_1 = q_player1.choose_move(i, grille)
            game.game_turn_x(choice_1)
def load_photo_features(filename, dataset):
    # load all features
    all_features = load(open(filename, 'rb'))
    # filter features
    features = {k: all_features[k] for k in dataset}
    return features
Пример #59
0
    def initialize_cast(self):
        # This fixes the `No handlers could be found for logger
        # "pychromecast.socket_client` warning"`.
        # See commit 18005ebd4c96faccd69757bf3d126eb145687e0d.
        if chromecast:
            from pychromecast import socket_client
            self.cclist = self._get_chromecasts()
            self.cclist = [[i, _, 'Gcast'] for i, _ in enumerate(self.cclist)]
        else:
            self.cclist = []

        if sonos:
            try:
                self.sonos_list = list(soco.discover())
                for self.index, device in enumerate(self.sonos_list):
                    add_sonos = [self.index, device, 'Sonos']
                    self.cclist.append(add_sonos)
            except TypeError:
                pass

        if self.debug is True:
            print('self.cclist', self.cclist)

        if (len(self.cclist) != 0 and self.select_device is False and
                self.device_name is None):
            if self.debug is True:
                print('if len(self.cclist) != 0 and self.select_device == False:')
            print(' ')
            print_available_devices(self.available_devices())
            print(' ')
            if self.discover is False:
                print(colors.important('Casting to first device shown above!'))
                print(colors.important('Select devices by using the -s flag.'))
                print(' ')
                self.cast_to = self.cclist[0][1]
                if self.cclist[0][2] == 'Sonos':
                    print(colors.success(self.cast_to.player_name))
                else:
                    print(colors.success(self.cast_to))
                print(' ')

        elif (len(self.cclist) != 0 and self.select_device is True and
                self.tray is False and self.device_name is None):
            if self.debug is True:
                print('elif len(self.cclist) != 0 and self.select_device == True'
                      ' and self.tray == False:')
            if os.path.exists('/tmp/mkchromecast.tmp') is False:
                self.tf = open('/tmp/mkchromecast.tmp', 'wb')
                print(' ')
                print_available_devices(self.available_devices())
            else:
                if self.debug is True:
                    print('else:')
                self.tf = open('/tmp/mkchromecast.tmp', 'rb')
                self.index = pickle.load(self.tf)
                self.cast_to = self.cclist[int(self.index)]
                print(' ')
                print(colors.options('Casting to:') + ' ' +
                      colors.success(self.cast_to))
                print(' ')

        elif (len(self.cclist) != 0 and self.select_device is True and
                self.tray is True):
            if self.debug is True:
                print('elif len(self.cclist) != 0 and self.select_device == True'
                      '  and self.tray == True:')
            if os.path.exists('/tmp/mkchromecast.tmp') is False:
                self.tf = open('/tmp/mkchromecast.tmp', 'wb')
                print(' ')
                print_available_devices(self.available_devices())
            else:
                if self.debug is True:
                    print('else:')
                self.tf = open('/tmp/mkchromecast.tmp', 'rb')
                self.cast_to = pickle.load(self.tf)
                print_available_devices(self.available_devices())
                print(' ')
                print(colors.options('Casting to:') + ' ' +
                      colors.success(self.cast_to))
                print(' ')

        elif len(self.cclist) == 0 and self.tray is False:
            if self.debug is True:
                print('elif len(self.cclist) == 0 and self.tray == False:')
            print(colors.error('No devices found!'))
            if self.platform == 'Linux' and self.adevice is None:
                remove_sink()
            elif self.platform == 'Darwin':
                inputint()
                outputint()
            terminate()
            exit()

        elif len(self.cclist) == 0 and self.tray is True:
            print(colors.error(':::Tray::: No devices found!'))
            self.available_devices = []
Пример #60
0
    staticFeatures, dynamicFeatures, char2indices, indices2char = prepareData(
        args.dataFile, args.nSample, doPlot=False)

    saveFeatures = True
    if (saveFeatures):
        with open('staticFeatures.pckl', 'wb') as f:
            pickle.dump(staticFeatures, f)

        with open('dynamicFeatures.pckl', 'wb') as f:
            pickle.dump(dynamicFeatures, f)

    loadFeatures = False
    if (loadFeatures):
        with open('staticFeatures.pckl', 'rb') as f:
            staticFeatures = pickle.load(f)

        with open('dynamicFeatures.pckl', 'rb') as f:
            dynamicFeatures = pickle.load(f)

    chosenFeatures = ['full_mwt', 'heavy_atoms', 'smiles_length']
    staticFeaturesSlice = staticFeatures[chosenFeatures]
    staticFeaturesSliceScaled, scaler = scaleFeatures(staticFeaturesSlice)
    aeDimensions = [64, 64, 32]
    #model, history = trainModel(dynamicFeatures, staticFeaturesSliceScaled, aeDimensions, args.modelWeightsFile, args.nEpoch, args.nBatch)
    model, history = trainModelDynamic(dynamicFeatures, aeDimensions,
                                       args.modelWeightsFile, args.nEpoch,
                                       args.nBatch)
    nCharInSmiles = dynamicFeatures.shape[1]
    nCharSet = dynamicFeatures.shape[2]
    nStatic = staticFeaturesSlice.shape[1]