Пример #1
0
    def convert_msg_to_txt_file(self):
        pat=re.compile('t(\d{2})-(\d{2})\.dat')
        for _file_num in range(len(self.msg_t_file_list)):
            with open(self.msg_t_file_list[_file_num].get_path(), 'rb') as f:
                self.ttt.extend(msgpack.load(f))
            with open(self.msg_x_file_list[_file_num].get_path(), 'rb') as f:
                self.x.extend(msgpack.load(f))
            with open(self.msg_y_file_list[_file_num].get_path(), 'rb') as f:
                self.y.extend(msgpack.load(f))
            with open(self.msg_I_file_list[_file_num].get_path(), 'rb') as f:
                self.I.extend(msgpack.load(f))
            print(123,self.msg_t_file_list[_file_num].get_path())
            print(234,len(self.ttt),len(self.x),len(self.y),len(self.I))

            _cur_res = re.search(pat, self.msg_t_file_list[_file_num].name)
            beads_num=_cur_res.group(1)
            file_num=_cur_res.group(2)
            _cur_dir_path=self.msg_t_file_list[_file_num].parent.parent.get_path()+'/txt/'+self.msg_t_file_list[_file_num].parent.name
            if not os.path.exists(_cur_dir_path): os.makedirs(_cur_dir_path)
            _cur_file_path=_cur_dir_path+'/beads{}_{}.txt'.format(beads_num, file_num)
            print(555,len(self.ttt))
            with open(_cur_file_path, 'w+') as f:
                f.write('time(sec),x(nm),y(nm),total intensity(V)\r\n')
                for i in range(len(self.ttt)):
                    f.write('{},{},{},{}\r\n'.format(self.ttt[i], self.x[i], self.y[i], self.I[i]))
                    if i%10**6==0:print(i)

            self.ttt, self.x, self.y, self.I = [],[],[],[]
Пример #2
0
 def read_msg_file_to_tx2(self, _file_num=None):
     if _file_num == None:
         print('please assign file_number')
     with open(self.msg_t_file_list[_file_num].get_path(), 'rb') as f:
         self.ttt.extend(msgpack.load(f))
     with open(self.msg_x_file_list[_file_num].get_path(), 'rb') as f:
         self.x.extend(msgpack.load(f))
         print(222,self.msg_t_file_list[_file_num].get_path())
         print(len(self.ttt))
Пример #3
0
def get_or_build(path, build_fn, *args, **kwargs):
    """
    Load from serialized form or build an object, saving the built
    object.

    Remaining arguments are provided to `build_fn`.
    """

    save = False
    obj = None

    if path is not None and os.path.isfile(path):
        with open(path, 'rb') as obj_f:
            obj = msgpack.load(obj_f, use_list=False, encoding='utf-8')
    else:
        save = True

    if obj is None:
        obj = build_fn(*args, **kwargs)

        if save and path is not None:
            with open(path, 'wb') as obj_f:
                msgpack.dump(obj, obj_f)

    return obj
Пример #4
0
def main():
    r = redis.StrictRedis('localhost', 6379)
    dat = msgpack.load(open(args.file_name))
    r.delete(args.set_name)
    for text, score in dat:
        enc = msgpack.dumps( [ text, score ] )
        r.zadd(args.set_name, score, enc)
Пример #5
0
    def load_file(self, fp):
        try:
            return msgpack.load(fp)
        except Exception as ex:
            log.warn('Unable to load object from file: %s', ex, exc_info=True)

        return None
Пример #6
0
def get_msgpack_object_ref(path):
    """Get object-id ref for object in messagepack-encoded file.

    Args:
        (str) path: Full path to file.
    Returns:
        (str) reference, in form A/B/C e.g. '93/111124/2'
    Raises:
        IOError if the file cannot be opened.
        ValueError if the data in the file cannot be decoded.
        KeyError if the reference field is not found in the data.
    """
    import msgpack
    try:
        f = open(path)
    except IOError as err:
        raise IOError('Cannot open file for reading: {}'.format(path))
    try:
        t = msgpack.load(f)
    except Exception as err:
        raise ValueError('Cannot decode messagepack data in path "{}": {}'
                         ''.format(path, err))
    try:
        ref = t['ref']
    except KeyError:
        raise KeyError('Field "ref" not found in object at "{}"'.format(path))
    return ref
Пример #7
0
def split_all_url():
    '''
        将url列表以人分成多个文件,然后用hadoop爬数据
    '''
    result_folder = 'person_url_check'
    if not os.path.exists(result_folder):
        os.makedirs(result_folder)
    pic_face_index_dic = msgpack.load(open('pic_face_index_url_dic.p', 'rb'))
    person_count = 0
    url_count = 0
    person_index = 0
    for person in pic_face_index_dic:
        start = time()
        person_index += 1
        with open(os.path.join(result_folder, str(person_index)), 'w') as f_result:
            try:
                need_check_url_index_list = pic_face_index_dic.get(person)
                for index, pic_url in need_check_url_index_list:
                    write_content = [person, index, pic_url]
                    f_result.write('\t'.join(map(str, write_content))+'\n')
                    url_count += 1
            except:
                traceback.print_exc()
                continue
            person_count += 1
            print person, person_count, url_count, time()-start
def move_pic():
    pic_folder = '/data/pictures_face/'
    right_pic_folder = '/data/pictures_face_baidu_filter/'
    need_annotate_folder = '/data/pictures_face_need_annotate/'
    person_result_dic = msgpack.load(open('person_result_dic.p', 'r'))
    person_list = os.listdir(pic_folder)
    for person in person_list:
        old_person_path = os.path.join(pic_folder, person)
        right_person_path = os.path.join(right_pic_folder, person)
        annotate_person_path = os.path.join(need_annotate_folder, person)
        right_index_list, wrong_index_list = person_result_dic.get(person.decode('gbk').encode('utf-8'), ([], []))
        right_index_list = set(right_index_list)
        wrong_index_list = set(wrong_index_list)
        old_pic_list = os.listdir(old_person_path)
        for pic in old_pic_list:
            pic_index = pic.replace('.png', '').replace('0.jpg', '').replace('_', '')
            if pic_index in right_index_list:
                if not os.path.exists(right_person_path):
                    os.makedirs(right_person_path)
                shutil.copyfile(os.path.join(old_person_path, pic),
                                os.path.join(right_person_path, pic))
            elif pic_index in wrong_index_list:
                continue
            else:
                if not os.path.exists(annotate_person_path):
                    os.makedirs(annotate_person_path)
                shutil.copyfile(os.path.join(old_person_path, pic),
                                os.path.join(annotate_person_path, pic))
def load_check_result_url(dic_file, check_url_file):
    person_result_dic = {}   # {person:([](right_set),[](wrong_set))} # 肯定正确和肯定错的的图片
    right_url_count = wrong_url_count = error_format_count = no_baike_count = no_meaning_count = 0
    if os.path.exists(dic_file):
        person_result_dic = msgpack.load(open(dic_file, 'rb'))
    for line in open(check_url_file):
        tmp = line.rstrip().split('\t')
        # [person_name, pic_index, pic_url, baike_name, baike_sim, newbaike_sim, guess_info]
        person_name = tmp[0]
        right_list, wrong_list = person_result_dic.get(person_name, ([], []))
        if len(tmp) == 7:
            if tmp[3] not in no_meaning_list:
                if tmp[3] == no_find_baike:
                    no_baike_count += 1
                    continue
                else:
                    if get_newbaike_sim(tmp[4]) > sim_threshold:
                        if tmp[0] == tmp[3]:
                            right_list.append(tmp[1])
                            right_url_count += 1
                        else:
                            wrong_url_count += 1
                            wrong_list.append(tmp[1])
                    else:   # 小于某概率时结果不可信,需要标注
                        no_baike_count += 1
                        continue
            else:
                no_meaning_count += 1
                continue
        else:
            error_format_count += 1
            continue
        person_result_dic[person_name] = (right_list, wrong_list)
    print right_url_count, wrong_url_count, no_baike_count, no_meaning_count, error_format_count
    msgpack.dump(person_result_dic, open('person_result_dic.p', 'w'))
Пример #10
0
 def parseFromFile(self, fname):
     """
     Overwritten to read Msgpack files.
     """
     import msgpack
     f = open(fname, "r")
     return msgpack.load(f)
Пример #11
0
def targets(tgt, tgt_type='glob', **kwargs):  # pylint: disable=W0613
    '''
    Return the targets from the flat yaml file, checks opts for location but
    defaults to /etc/salt/roster
    '''
    cache = os.path.join(syspaths.CACHE_DIR, 'master', 'minions', tgt, 'data.p')

    if not os.path.exists(cache):
        return {}

    roster_order = __opts__.get('roster_order', (
        'public', 'private', 'local'
    ))

    with salt.utils.fopen(cache, 'r') as fh_:
        cache_data = msgpack.load(fh_)

    ipv4 = cache_data.get('grains', {}).get('ipv4', [])
    preferred_ip = extract_ipv4(roster_order, ipv4)
    if preferred_ip is None:
        return {}

    return {
        tgt: {
            'host': preferred_ip,
        }
    }
Пример #12
0
 def __init__(self, ensemble_clf_model):
     with open(ensemble_clf_model, "rb") as file_handler_in:
         self.classifiers, self.manual_feature_list = msgpack.load(file_handler_in)
         # interp GBDT x 3: x, v, a | manual GBDT | Output: LR
         self.interp_gbdts = [CustomGradientBoostingClassifier(arg) for arg in self.classifiers[:3]]
         self.manual_gbdt = CustomGradientBoostingClassifier(self.classifiers[3])
         self.lr = CustomLogisticRegression(self.classifiers[4])
 def parse(self, stream, media_type=None, parser_context=None):
     try:
         return msgpack.load(stream,
                             use_list=True,
                             object_hook=MessagePackDecoder().decode)
     except Exception, exc:
         raise ParseError('MessagePack parse error - %s' % unicode(exc))
Пример #14
0
def load_msgpack(blob, **kwargs):
    """
    Load a dict packed with msgpack into kwargs for
    a Trimesh constructor

    Parameters
    ----------
    blob : bytes
      msgpack packed dict containing
      keys 'vertices' and 'faces'

    Returns
    ----------
    loaded : dict
     Keyword args for Trimesh constructor, aka
     mesh=trimesh.Trimesh(**loaded)
    """

    import msgpack
    if hasattr(blob, 'read'):
        data = msgpack.load(blob)
    else:
        data = msgpack.loads(blob)
    loaded = load_dict(data)
    return loaded
Пример #15
0
def main(path):
    sensor_time_values = {"Pupil Eyetracker": {}, "LTR-506ALS Light sensor": {}}
    for fn in glob.glob(path + "/*.msgpack"):
        s = msgpack.load(open(fn))
        for k, vs in s[3].items():
            for v in vs:
                if k == "Pupil Eyetracker":
                    sensor_time_values[k][v[1]] = v[0][2]
                elif k == "LTR-506ALS Light sensor":
                    sensor_time_values[k][v[1]] = v[0][0]
    mp.ion()
    mp.show()
    for k, vs in sensor_time_values.items():
        if k == "Pupil Eyetracker":
            c = [0, 1, 0]
        elif k == "LTR-506ALS Light sensor":
            c = [1, 0, 0]
        va = np.array(vs.values())
        M = np.max(va)
        m = np.min(va)
        s = 1.0 / (M - m)
        prev_x = 0
        xs = []
        ys = []
        for x, y in sorted(vs.items()):
            if x - prev_x < 0.25:
                continue
            xs.append(x)
            ys.append(y)
            prev_x = x
        mp.plot(np.array(xs) - xs[0], (np.array(ys) - m) * s, c=c, label=k)
    mp.title("Pupil Radius and Ambient Light over Time")
    mp.legend()
    mp.draw()
    mp.savefig("pupil_light_plot.png")
Пример #16
0
    def _load_from_file(self, id):
        fname = self._get_fname_for_id(id)
        #
        #    Due to some concurrency issues, we need to perform this check
        #    before we try to read the .trace file.
        #
        if not os.path.exists(fname):

            for _ in xrange(1 / 0.05):
                time.sleep(0.05)
                if os.path.exists(fname):
                    break
            else:
                msg = 'Timeout expecting trace file to be written "%s"' % fname
                raise IOError(msg)

        #
        #    Ok... the file exists, but it might still be being written
        #
        req_res = open(fname, 'rb')
        request_dict, response_dict = msgpack.load(req_res)
        req_res.close()
        
        request = HTTPRequest.from_dict(request_dict)
        response = HTTPResponse.from_dict(response_dict)
        return (request, response)
Пример #17
0
def redis_store(input_dir, name, server, port, **kw):
    import redis
    r = redis.StrictRedis(server, port)
    times = set()
    sensor_types = {}
    fn_to_time = lambda x: int(x.rsplit('/', 1)[-1].split('.', 1)[0])
    r.sadd('users', name)
    for fn in sorted(glob.glob(input_dir + '/*'), key=fn_to_time):
        fn_time = fn_to_time(fn) / 1000.
        if fn.endswith('.jpg'):
            times.add(sample[1])
            r.zadd(name + ':images', fn_time, os.path.basename(fn))
        else:
            try:
                data = msgpack.load(open(fn))
            except ValueError:
                print('Could not parse [%s]' % fn)
                continue
            print(data)
            for sensor_name, type_num in data[2].items():
                sensor_types[sensor_name] = msgpack.dumps(type_num)
            for sensor_name, samples in data[3].items():
                for sample in samples:
                    times.add(sample[1])
                    r.zadd(name + ':sensor:' + sensor_name, sample[1], msgpack.dumps(sample))
    r.hmset(name + ':sensors', sensor_types)
    r.zadd(name + ':times', **{msgpack.dumps(x): x for x in times})
Пример #18
0
def LoadPermResults(path,name,method,idx):
    if method=='msgpack':
        with open(os.path.join(path,name+'.mpac')) as f:
            data = msgpack.load(f)[idx]
    else:
        with open(os.path.join(path,name+'.pickle')) as f:
            data = pickle.load(f)[idx]
    return data
Пример #19
0
	def from_cache(cls, path):
		"""Initialize a UCD from a cache file generated by gen_cache(), faster and uses less memory."""
		u = cls(None)
		u.code_points = {}
		with open(path) as f:
			for id, attrs in msgpack.load(f):
				u.code_points[id] = CodePoint(u, id, attrs)
		return u
 def __call__(self):
   if self.filename == None:
     self.filename = msgpackmemoized_basedir + '/' + self.f.__name__ + '.msgpack'
   if path.exists(self.filename):
     return msgpack.load(open(self.filename))
   result = self.f()
   msgpack.dump(result, open(self.filename, 'w'))
   return result
Пример #21
0
def init():
	global topicid2word, username2userid, userid2username, userid2etc
	f = open(DATAS_DIRPATH+'data_msgpack')
	aaa = msgpack.load(f)
	f.close()
	topicid2word = aaa['topicid2word']
	username2userid = aaa['username2userid']
	userid2username = aaa['userid2username']
	userid2etc = aaa['userid2etc']
Пример #22
0
def model_fromfile(path):
    if path.endswith('.js.gz'):
        return call_import(json.loads(zlib.decompress(open(path).read())))
    elif path.endswith('.msgpack.gz'):
        return call_import(msgpack.load(zlib.decompress(open(path).read())))
    elif path.endswith('.pkl.gz'):
        return pickle.loads(zlib.decompress(open(path).read()))
    else:
        raise ValueError('Unknown model type[%s]' % path)
Пример #23
0
Файл: spm.py Проект: DaveQB/salt
        def _read_metadata(repo, repo_info):
            cache_path = '{0}/{1}.p'.format(
                self.opts['spm_cache_dir'],
                repo
            )

            with salt.utils.fopen(cache_path, 'r') as cph:
                metadata[repo] = {
                    'info': repo_info,
                    'packages': msgpack.load(cph),
                }
Пример #24
0
 def load(cls, f):
   # Assumes everything is encoded in UTF-8.
   # This means that if some records (e.g., config files, feature vector
   # keys) are not encoded in UTF-8, the model cannot be loaded.  However,
   # such models cannot be written out to text or JSON, so we don't really
   # care.  Callers are responsible for handling UnicodeDecodeError.
   values = msgpack.load(f, encoding='utf-8', unicode_errors='strict')
   field_names = map(lambda x: x[0], cls.fields())
   c = cls()
   c.set(dict(zip(field_names, values)))
   return c
    def load(cls, filepath=None):
        """Load the contents of the given filepath.
        If None, assume '<current_snapshot>/repos.msgpack'"""

        if filepath is None:
            filepath = os.path.join(config['current_snapshot'], 'repos.msgpack')

        with open(filepath, 'rb') as f:
            records = msgpack.load(f, object_hook=cls._loader, use_list=False)

        return records
Пример #26
0
def load(fp, resource=None, encoding='UTF8', full_clean=True):
    """
    Load a from a MessagePack encoded file.

    See :py:meth:`loads` for more details of the loading operation.

    :param fp: a file pointer to read MessagePack data from.
    :param resource: A resource instance or a resource name to use as the base for creating a resource.
    :param full_clean: Do a full clean of the object as part of the loading process.
    :returns: A resource object or object graph of resources loaded from file.
    """
    return resources.build_object_graph(msgpack.load(fp, encoding=encoding), resource, full_clean)
Пример #27
0
def load_reg():
    '''
    Load the register from msgpack files
    '''
    reg_dir = _reg_dir()
    regfile = os.path.join(reg_dir, 'register')
    try:
        with salt.utils.fopen(regfile, 'r') as fh_:
            return msgpack.load(fh_)
    except:
        log.error('Could not write to msgpack file {0}'.format(__opts__['outdir']))
        raise
Пример #28
0
def main():
    print 'find_use_word'
    min_threshold = 100
    max_threshold = 100000
    get_useful_word(min_threshold=min_threshold,max_threshold=max_threshold)
    print 'create_data'
    useful_word_count_dic = msgpack.load(open(data_folder+
                                        'useful_word_count_dic_chinese_filter_%d_%d'%(min_threshold,max_threshold),'rb'))
    raw_data_folder = '/data/liubo/message/message_content_False'
    content_sentence_file = data_folder + '/' + 'content_sentence_%d_%d'%(min_threshold,max_threshold)
    content_word_list_file = data_folder + '/' + 'content_word_list_%d_%d'%(min_threshold,max_threshold)
    create_data(raw_data_folder,useful_word_count_dic,content_sentence_file,content_word_list_file)
Пример #29
0
    def test_index_directory(self):
        """
        test_index
        """
        self.do_wait_for_tasks = False
        os.system("rm -Rf testdata/test")
        self.unzip_testfiles_clean()
        self.cboptions.sync = False
        localindex_check = msgpack.load(open("testdata/localindex_test.pickle"))
        localindex = make_local_index(self.cboptions)

        #msgpack.dump(localindex, open("testdata/localindex_test.pickle", "w"))
        self.assertTrue(localindex_check == localindex)
Пример #30
0
    def _load_from_file(self, id):
        fname = self._get_fname_for_id(id)
        WAIT_TIME = 0.05

        #
        #    Due to some concurrency issues, we need to perform these checks
        #
        for _ in xrange(int(1 / WAIT_TIME)):
            if not os.path.exists(fname):
                time.sleep(WAIT_TIME)
                continue

            # Ok... the file exists, but it might still be being written
            req_res = open(fname, 'rb')

            try:
                data = msgpack.load(req_res, use_list=True)
            except ValueError:
                # ValueError: Extra data. returned when msgpack finds invalid
                # data in the file
                req_res.close()
                time.sleep(WAIT_TIME)
                continue

            try:
                request_dict, response_dict, canary = data
            except TypeError:
                # https://github.com/andresriancho/w3af/issues/1101
                # 'NoneType' object is not iterable
                req_res.close()
                time.sleep(WAIT_TIME)
                continue

            if not canary == self._MSGPACK_CANARY:
                # read failed, most likely because the file write is not
                # complete but for some reason it was a valid msgpack file
                req_res.close()
                time.sleep(WAIT_TIME)
                continue

            # Success!
            req_res.close()

            request = HTTPRequest.from_dict(request_dict)
            response = HTTPResponse.from_dict(response_dict)
            return request, response

        else:
            msg = 'Timeout expecting trace file to be ready "%s"' % fname
            raise IOError(msg)
Пример #31
0
def build_vocab(questions, contexts):
    '''
    Build vocabulary sorted by global word frequency, or consider frequencies in questions first,
    which is controlled by `args.sort_all`.
    '''
    if args.sort_all:
        counter = collections.Counter(w for doc in questions + contexts
                                      for w in doc)
        vocab = sorted([t for t in counter if t in wv_vocab],
                       key=counter.get,
                       reverse=True)
    else:
        counter_q = collections.Counter(w for doc in questions for w in doc)
        counter_c = collections.Counter(w for doc in contexts for w in doc)
        counter = counter_c + counter_q
        vocab = sorted([t for t in counter_q if t in wv_vocab],
                       key=counter_q.get,
                       reverse=True)
        vocab += sorted(
            [t for t in counter_c.keys() - counter_q.keys() if t in wv_vocab],
            key=counter.get,
            reverse=True)
    total = sum(counter.values())
    matched = sum(counter[t] for t in vocab)
    log.info(
        'vocab coverage {1}/{0} | OOV occurrence {2}/{3} ({4:.4f}%)'.format(
            len(counter), len(vocab), (total - matched), total,
            (total - matched) / total * 100))
    vocab.insert(0, "<PAD>")
    vocab.insert(1, "<UNK>")
    with open(join(squad_dir, 'vocab.msgpack'), 'rb') as f:
        voc1 = msgpack.load(f, encoding='utf8')
    if not set(vocab).issubset(set(voc1)):
        print(
            "The checkpoints cannot be used, since a vocabulary has different set of words"
        )
        print(
            "New dataset %d vs %d (diff in %d), adopting general vocabulary for checkpoints"
            %
            (len(vocab), len(voc1), len(vocab) - len(set(vocab) & set(voc1))))
        vocab = voc1
    else:
        # to preserve the order of indices due to sorted Counter
        vocab = voc1
    return vocab, voc1, counter
Пример #32
0
    def check_minion_cache(self, preserve_minions=False):
        '''
        Check the minion cache to make sure that old minion data is cleared
        '''
        keys = self.list_keys()
        minions = []
        for key, val in six.iteritems(keys):
            minions.extend(val)

        m_cache = os.path.join(self.opts['cachedir'], 'minions')
        if os.path.isdir(m_cache):
            for minion in os.listdir(m_cache):
                if minion not in minions:
                    shutil.rmtree(os.path.join(m_cache, minion))

        kind = self.opts.get('__role', '')  # application kind
        if kind not in kinds.APPL_KINDS:
            emsg = ("Invalid application kind = '{0}'.".format(kind))
            log.error(emsg + '\n')
            raise ValueError(emsg)
        role = self.opts.get('id', '')
        if not role:
            emsg = ("Invalid id.")
            log.error(emsg + "\n")
            raise ValueError(emsg)

        name = "{0}_{1}".format(role, kind)
        road_cache = os.path.join(self.opts['cachedir'], 'raet', name,
                                  'remote')
        if os.path.isdir(road_cache):
            for road in os.listdir(road_cache):
                root, ext = os.path.splitext(road)
                if ext not in ['.json', '.msgpack']:
                    continue
                prefix, sep, name = root.partition('.')
                if not name or prefix != 'estate':
                    continue
                path = os.path.join(road_cache, road)
                with salt.utils.fopen(path, 'rb') as fp_:
                    if ext == '.json':
                        data = json.load(fp_)
                    elif ext == '.msgpack':
                        data = msgpack.load(fp_)
                    if data['role'] not in minions:
                        os.remove(path)
Пример #33
0
def load_scoops_from_action_index_map(container_dir, header_only=True):
    '''
    Load a msgpack action index map file and return a list of scoop dictionaries
    '''
    indexed_scoop_list = []
    action_index_map_filepath = get_container_action_index_map_filepath(
        container_dir)
    print(action_index_map_filepath)
    with open(action_index_map_filepath, 'rb') as ff:
        imap = mp.load(ff)

    for packed_indexed_scoop in imap.values():
        unpacked_indexed_scoop = unpack_scoop_header(packed_indexed_scoop,
                                                     container_dir,
                                                     header_only=header_only)
        indexed_scoop_list.append(unpacked_indexed_scoop)

    return indexed_scoop_list
Пример #34
0
 def _read(self):
     '''
     Read in from disk
     '''
     if not HAS_MSGPACK or not os.path.exists(self._path):
         return
     with salt.utils.files.fopen(self._path, 'rb') as fp_:
         cache = msgpack.load(fp_, encoding=__salt_system_encoding__)
     if "CacheDisk_cachetime" in cache:  # new format
         self._dict = cache["CacheDisk_data"]
         self._key_cache_time = cache["CacheDisk_cachetime"]
     else:  # old format
         self._dict = cache
         timestamp = os.path.getmtime(self._path)
         for key in self._dict:
             self._key_cache_time[key] = timestamp
     if log.isEnabledFor(logging.DEBUG):
         log.debug('Disk cache retrieved: {0}'.format(cache))
Пример #35
0
def load(fp,
         resource=None,
         encoding='UTF8',
         full_clean=True,
         default_to_not_supplied=False):
    """
    Load a from a MessagePack encoded file.

    See :py:meth:`loads` for more details of the loading operation.

    :param fp: a file pointer to read MessagePack data from.
    :param resource: A resource instance or a resource name to use as the base for creating a resource.
    :param full_clean: Do a full clean of the object as part of the loading process.
    :returns: A resource object or object graph of resources loaded from file.
    """
    return resources.build_object_graph(msgpack.load(fp, encoding=encoding),
                                        resource, full_clean,
                                        default_to_not_supplied)
Пример #36
0
    def load():
        Storage.DATABASE = QFontDatabase()

        if not Storage.USER_PREFERENCES_DIR.exists():
            Storage.USER_PREFERENCES_DIR.mkdir()

        Storage.load_page_data()

        Storage.SOUNDS = {
            "tap":
            QSound(
                Storage.resolve_audio(
                    "navigation_forward-selection-minimal.wav"))
        }

        if Storage.USER_DATA_FILE.exists():
            with open(Storage.USER_DATA_FILE, "r") as file:
                Storage.USER_DATA = msgpack.load(file)
Пример #37
0
 def _load_master(self, cls: Type[ma.MasterAsset]) -> ma.MasterDict:
     name = cls.__name__
     # -1 for self, and -1 for the asset_manager argument.
     # What remains is the number of arguments to keep from the msgpack file itself.
     argument_count = len(inspect.signature(cls.__init__).parameters) - 2
     asset_path = self.path / f'Master/{name}.msgpack'
     with asset_path.open('rb') as f:
         data = msgpack.load(f, strict_map_key=False, use_list=False)
     if self.drop_extra_fields:
         if len(next(iter(data.values()))) != argument_count:
             self.logger.info(f'Dropping extra arguments from {name}.')
         master_dict = ma.MasterDict(
             {k: cls(self, *(v[:argument_count]))
              for k, v in data.items()}, name, asset_path)
     else:
         master_dict = ma.MasterDict(
             {k: cls(self, *v)
              for k, v in data.items()}, name, asset_path)
     self.masters[name] = master_dict
     if cls.db_fields:
         with self.db:
             cur = self.db.cursor()
             fields = [
                 cls.__dataclass_fields__[db_field]
                 for db_field in cls.db_fields
             ]
             type_mapping = {
                 bool: 'integer',
                 int: 'integer',
                 float: 'real',
                 str: 'text',
                 msgpack.Timestamp: 'datetime'
             }
             cur.execute(
                 f'CREATE TABLE {name}'
                 f'({", ".join(f"{field.name} {type_mapping[field.type]}" for field in fields)})'
             )
             insert_query = f'INSERT INTO {name} VALUES ({f", ".join(["?"] * len(cls.db_fields))})'
             for value in master_dict.values():
                 field_dict = dataclasses.asdict(value)
                 field_values = [field_dict[name] for name in cls.db_fields]
                 cur.execute(insert_query, field_values)
     return master_dict
Пример #38
0
def cBpack(lang_code, src_path, swear_path, dst_path):
    """
    Works with a cBpack source and builds a combined list.
    Explaination of cBpack: https://github.com/LuminosoInsight/wordfreq/blob/7a742499a42a6539be772ab26b6460d7e160ae04/wordfreq/__init__.py#L37-L76
    Frequency calculation adjusted to work with 15..255 format
    """
    with gzip.open(src_path, "rb") as f_src:
        data_raw = msgpack.load(f_src, raw=False)
    header = data_raw[0]
    if (not isinstance(header, dict) or header.get("format") != "cB"
            or header.get("version") != 1):
        raise ValueError("Unexpected header: {}".format(header))
    data = data_raw[1:]
    data_sanitized = []
    for innerlist in data:
        sanitized_list = []
        for word in innerlist:
            word_sanitized = word.strip()
            if (__validate_str(word_sanitized)):
                sanitized_list.append(word_sanitized)
        if len(sanitized_list) > 0:
            data_sanitized.append(sanitized_list)
    swear_words = []
    with io.open(swear_path, encoding="utf-8") as f_swear:
        for swear_word in f_swear.readlines():
            swear_word = swear_word.strip()
            if len(swear_word) > 0:
                if swear_word[0] != "#":
                    swear_words.append(swear_word)
    index = 0
    with io.open(dst_path, "w", encoding="utf-8") as f_dst:
        # Write header of combined list first
        f_dst.write(__header(lang_code))
        len_list = len(data_sanitized)
        for innerlist in data_sanitized:
            freq = __freq_for_index(index, len_list)
            for word in innerlist:
                if word in swear_words:
                    adjusted_freq = 0
                else:
                    adjusted_freq = freq
                f_dst.write(" word={},f={}\n".format(word, adjusted_freq))
            index += 1
Пример #39
0
    def _recver(self):
        """ 接收处理数据 """
        recv_func = self.sock.recv

        def _read(c):
            d = recv_func(c)
            if d:
                return d
            if self.stoped:
                raise GreenletExit
            self._recver_on_error = True
            self._on_socket_error(None)
            self._recver_on_error = False
            return None

        try:
            sio = BytesIO()
            while not self.stoped:
                d = _read(4)
                if d is None:
                    printf('_read(4) return None')
                    continue
                dlen = unpack('I', d)[0]
                #rs = []
                sio.seek(0)
                sio.truncate()
                while dlen > 0:
                    data = _read(dlen)
                    if data is None:
                        continue
                    #rs.append(data)
                    sio.write(data)
                    dlen -= len(data)
                #spawn(self._handle, loads(''.join(rs)))
                sio.seek(0)
                spawn(self._handle, load(sio))
                #self._pool.spawn(self._handle, loads(''.join(rs)))
        except GreenletExit:
            printf('[RpcService._recver GreenletExit]%s', self.sock)
        except Exception as err:
            printf('[RpcService._recver]%s', err)
        finally:
            self.stop()
Пример #40
0
def load_eval_data(opt, eval_data): # can be extended to true test set
    with open(eval_data, 'rb') as f:
        data = msgpack.load(f, encoding='utf8')
    embedding = torch.Tensor(data['embedding'])

    assert opt['embedding_dim'] == embedding.size(1)
    assert opt['num_features'] == len(data['premise_features'][0][0])

    eval_set = list(zip(
        data['premise_ids'],
        data['premise_features'],
        data['premise_tags'],
        data['premise_ents'],
        data['hypothesis_ids'],
        data['hypothesis_features'],
        data['hypothesis_tags'],
        data['hypothesis_ents']
    ))
    return eval_set, embedding, [text2class(ans) for ans in data['answers']]
Пример #41
0
def load_processed_data(f, threshold):
    """Takes a file path containing trajectory data and loads it, returning a data frame with additional metadata.

    Arguments:
        f: string
            Path to trajectory data archive file.
        threshold:
            Classifier threshold to filter the results before querying the metadata from the database.

    Returns:
        pandas.DataFrame
    """
    data = None
    try:
        with zipfile.ZipFile(f, "r", zipfile.ZIP_DEFLATED) as zf:
            with zf.open(f.split("/")[-1].replace("zip", "msgpack"),
                         "r") as file:
                data = msgpack.load(file)
    except Exception as e:
        print("Error loading {}".format(f))
        print(str(e))
        return None

    if len(data) == 0:
        return None
    from IPython.display import display
    frame_id, bee_id0, bee_id1, score = zip(*data)
    frame_id = pd.Series(frame_id, dtype=np.uint64)
    bee_id0 = pd.Series(bee_id0, dtype=np.uint16)
    bee_id1 = pd.Series(bee_id1, dtype=np.uint16)
    score = pd.Series(score, dtype=np.float32)
    data = [frame_id, bee_id0, bee_id1, score]
    data = pd.concat(data, axis=1)
    data.columns = ["frame_id", "bee_id0", "bee_id1", "score"]
    data = data[(~pd.isnull(data.score)) & (data.score >= threshold)]
    all_frame_ids = data.frame_id.unique()

    metadata = db.metadata.get_frame_metadata(all_frame_ids)
    metadata.frame_id = metadata.frame_id.astype(np.uint64)
    metadata["datetime"] = pd.to_datetime(metadata.timestamp, unit="s")

    data = data.merge(metadata, on="frame_id", how="inner")
    return data
Пример #42
0
def main():
    picttree.main(picttree.argument_parser().parse_args([
        "-m", "black-base", "-t", "128", "-s", "-T", "picture2d",
        "-O", "msgpack", "-o", "output.p2mt", "input.png"
    ]))
    with open("output.p2mt", "rb") as f:
        treeinfo = msgpack.load(f, encoding="UTF-8")

    show_0th_volumes(treeinfo)
    show_0th_volumes_4_5(treeinfo)
    show_1st_volume(treeinfo)
    show_1st_volume_6_7(treeinfo)
    # image = Image.open("input.png").convert("RGB")
    # print(image)

    picttree.main(picttree.argument_parser().parse_args([
        "-m", "black-base", "-t", "128", "-s", "-T", "picture2d",
        "-O", "json", "-o", "output-tree.json", "input.png"
    ]))
Пример #43
0
def iter_agenda(publications=None, rin_list=None):
    agenda_dir = Path(data_dir) / 'agenda'
    if not publications:
        publications = os.listdir(agenda_dir)

    if rin_list:
        rin_list = set(rin_list)

    for publication in publications:

        for rin_file in os.listdir(agenda_dir / publication):

            if rin_list:
                rin = rin_file.split('.')[0]
                if rin not in rin_list:
                    continue

            with open(agenda_dir / publication / rin_file, 'rb') as f:
                yield load(f)
Пример #44
0
def load_key(path_or_file, serial='json'):
    '''
    Read in a key from a file and return the applicable key object based on
    the contents of the file
    '''
    if hasattr(path_or_file, 'read'):
        stream = path_or_file
    else:
        if serial == 'json':
            stream = open(path_or_file, 'r')
        else:
            stream = open(path_or_file, 'rb')

    try:
        if serial == 'msgpack':
            import msgpack
            key_data = msgpack.load(stream)
        elif serial == 'json':
            import json
            key_data = json.loads(stream.read(), encoding='UTF-8')
    finally:
        if stream != path_or_file:
            stream.close()

    if 'priv' in key_data and 'sign' in key_data and 'pub' in key_data:
        return libnacl.dual.DualSecret(
            libnacl.encode.hex_decode(key_data['priv']),
            libnacl.encode.hex_decode(key_data['sign']))
    elif 'priv' in key_data and 'pub' in key_data:
        return libnacl.public.SecretKey(
            libnacl.encode.hex_decode(key_data['priv']))
    elif 'sign' in key_data:
        return libnacl.sign.Signer(libnacl.encode.hex_decode(key_data['sign']))
    elif 'pub' in key_data:
        return libnacl.public.PublicKey(
            libnacl.encode.hex_decode(key_data['pub']))
    elif 'verify' in key_data:
        return libnacl.sign.Verifier(key_data['verify'])
    elif 'priv' in key_data:
        return libnacl.secret.SecretBox(
            libnacl.encode.hex_decode(key_data['priv']))
    raise ValueError('Found no key data')
Пример #45
0
def unpack_scoop_header(packed_header, container_dir, header_only=True):
    unpacked_header = {
        'id': packed_header[0],
        'scoop': unpack_scoop(packed_header[1]),
        'measured_result_map': packed_header[2],
        'modeled_result_map': packed_header[3]
    }
    if not header_only:
        traj_dir = os.path.join(container_dir, 'get_stuff', 'traj')
        scoop_filepath = os.path.join(traj_dir,
                                      unpacked_header['id']) + '.mpac'

        try:
            with open(scoop_filepath, 'rb') as ff:
                scoop_mpac = mp.load(ff)

            unpacked_header['scoop'] = unpack_scoop(scoop_mpac)
        except IOError:
            print('Scoop file', scoop_filepath, 'not found!')
    return unpacked_header
Пример #46
0
def load_http_response_from_temp_file(filename, remove=True):
    """
    :param filename: The filename that holds the HTTP response as msgpack
    :param remove: Remove the file after reading
    :return: An HTTP response instance
    """
    # Importing here to prevent import cycle
    from w3af.core.data.url.HTTPResponse import HTTPResponse

    try:
        data = msgpack.load(file(filename, 'rb'), raw=False)
        result = HTTPResponse.from_dict(data)
    except:
        if remove:
            remove_file_if_exists(filename)
        raise
    else:
        if remove:
            remove_file_if_exists(filename)
        return result
Пример #47
0
def main(argv):
    '''
    Command line tool for MindMeld construction/manipulation.
    '''

    p = argparse.ArgumentParser(prog='melder')
    p.add_argument('meldfile',help='meld file path')

    p.add_argument('--add-pypath', dest='pypaths', default=[], action='append', help='add a python path to the meld')
    p.add_argument('--add-datfiles', dest='datfiles', action='store_true', help='when adding pypath, include datfiles')

    p.add_argument('--dump-info', dest='dumpinfo', action='store_true', help='dump the entire meld info dictionary to stdout')

    p.add_argument('--set-name', dest='name', default=None, help='set meld name (ie, "foolib")')
    p.add_argument('--set-version', dest='version', default=None, help='set meld version (ie, 8.2.30)')

    opts = p.parse_args(argv)

    meldinfo = {}
    if os.path.isfile(opts.meldfile):
        with open(opts.meldfile,'rb') as fd:
            meldinfo = msgpack.load(fd,encoding='utf8')

    meld = s_mindmeld.MindMeld(**meldinfo)

    if opts.version:
        meld.setVersion(vertup(opts.version))

    if opts.name:
        meld.setName(opts.name)

    for pypath in opts.pypaths:
        meld.addPyPath(pypath,datfiles=opts.datfiles)

    meldinfo = meld.getMeldDict()
    if opts.dumpinfo:
        print(repr(meldinfo))

    meldbyts = msgpack.dumps( meld.getMeldDict(), encoding='utf8', use_bin_type=True )
    with open(opts.meldfile,'wb') as fd:
        fd.write(meldbyts)
Пример #48
0
def load_msgpack(blob, file_type=None):
    '''
    Load a dict packed with msgpack into kwargs for Trimesh constructor

    Parameters
    ----------
    blob: msgpack packed dict with keys for 'vertices' and 'faces'
    file_type: not used

    Returns
    ----------
    loaded: kwargs for Trimesh constructor (aka mesh=trimesh.Trimesh(**loaded))
    '''

    import msgpack
    if hasattr(blob, 'read'):
        data = msgpack.load(blob)
    else:
        data = msgpack.loads(blob)
    loaded = load_dict(data)
    return loaded
Пример #49
0
    def __init__(self, root, task_root, debug, split="train", data_fraction=1.0):
        self.root = f"{root}/{task_root}"
        self.split = split
        self.name = "clevr"

        with open(f"{self.root}/{split}.msgpack", "rb") as fp:
            self.data = msgpack.load(fp, encoding="utf8")
        if split == "val":
            self.process_family()

        if split == "train":
            partial = int(len(self.data) * data_fraction)
            self.data = self.data[:partial]

        with open(f"{self.root}/dictionary.json", "r") as fp:
            dic = json.load(fp)
            self.qdic = dic["question"]
            self.adic = dic["answer"]

        if debug:
            self.data = self.data[:1000]
Пример #50
0
 def load_data(self):
     print('Load train_meta.msgpack...')
     meta_file_name = os.path.join(self.spacyDir, 'train_meta.msgpack')
     with open(meta_file_name, 'rb') as f:
         meta = msgpack.load(f, encoding='utf8')
     self.opt['char_vocab_size'] = len(meta['char_vocab'])
     all_embedding = {}
     if 'GLOVE' in self.opt:
         glove_embedding = torch.Tensor(meta['glove_embedding'])
         all_embedding['glove_embedding'] = glove_embedding
         self.opt['vocab_size'] = glove_embedding.size(0)
         self.opt['vocab_dim'] = glove_embedding.size(1)
     if 'FastText' in self.opt:
         fast_embedding = torch.Tensor(meta['fast_embedding'])
         all_embedding['fast_embedding'] = fast_embedding
         self.opt['vocab_size'] = fast_embedding.size(0)
         self.opt['vocab_dim'] = fast_embedding.size(1)
     if 'PHOC' in self.opt:
         phoc_embedding = torch.Tensor(meta['phoc_embedding'])
         all_embedding['phoc_embedding'] = phoc_embedding
     return meta['vocab'], meta['char_vocab'], all_embedding
Пример #51
0
 def load_from_file(self, filename):
     """Load coverage db from a save covdb file. Requires msgpack"""
     if file_backing_disabled:
         raise Exception(
             "[!] Can't save/load coverage db files without msgpack. Try `pip install msgpack`"
         )
     self.filename = filename
     with open(filename, "r") as f:
         object_dict = msgpack.load(f)
     self.module_name = object_dict["module_name"]
     self.module_base = object_dict["module_base"]
     self.module_blocks = object_dict["module_blocks"]
     self.trace_dict = {
         k: set(v)
         for k, v in object_dict["trace_dict"].items()
     }
     self.block_dict = object_dict["block_dict"]
     self.function_stats = object_dict["function_stats"]
     self.coverage_files = object_dict["coverage_files"]
     self.total_coverage = set(object_dict["total_coverage"])
     self.frontier = set(object_dict["frontier"])
Пример #52
0
Файл: cli.py Проект: mattf/nutai
def test_minhash(documents, stopwords, labeled):
    with open(stopwords, 'rb') as fp:
        stops = set(msgpack.load(fp))
    filter_processor = simple_preprocess_and_filter_stopwords(stops)
    docs = load_docs(documents,
                     process_text=lambda doc: filter_processor(
                         combine_issue_and_body_filter_labels(doc)))
    labels = load_testset(labeled, docs)
    model = nutai.minhash.Model()

    true = [label for _, _, label in labels]
    pred = [
        approx_jaccard_score(model.calculate_signature(docs[id0]['text']),
                             model.calculate_signature(docs[id1]['text']))
        for id0, id1, _ in labels
    ]
    best_thresh = calculate_best_threshold(pred, labels)

    print("best threshold:", best_thresh)
    print_confusion_matrix(
        confusion_matrix(true, [p > best_thresh for p in pred]))
Пример #53
0
def main():
    print("Downloading data...")
    os.system("wget https://www.dropbox.com/s/r33ljlagys0wscb/data.msgpack?dl=1")
    os.system("wget https://www.dropbox.com/s/83txkgiqmdlv1m3/meta.msgpack?dl=1")
    os.system("mv 'meta.msgpack?dl=1' data/meta.msgpack")
    os.system("mv 'data.msgpack?dl=1' data/data.msgpack")
    os.system("python3 -m spacy download en_core_web_sm")
    print("Done.")
    print("Loading data...")
    with open(F_DATA_PATH, 'rb') as f:
        data = msgpack.load(f, encoding='utf8')
    print("Done.")
    print("Processing...")
    data = (
            make_comfort_data(data['train']),
            make_comfort_data(data['dev'], is_dev=True)
    )
    print("Done.")
    print("Saving...")
    save_comfort_data(data)
    print("Done.")
Пример #54
0
def main():
    path = input('Enter file path of your model:')

    with open(RAW_META_PATH, 'rb') as f:
        meta = msgpack.load(f, encoding='utf8')

    embedding = load_pickle(EMB_PATH)

    model = models.AttentionModel(path)

    w2id = {w: i for i, w in enumerate(meta['vocab'])}
    tag2id = {w: i for i, w in enumerate(meta['vocab_tag'])}
    ent2id = {w: i for i, w in enumerate(meta['vocab_ent'])}

    while True:
        id_ = 0
        try:
            while True:
                context = input('Enter context: ')
                if context.strip():
                    break
            while True:
                question = input('Enter question: ')
                if question.strip():
                    break
        except EOFError:
            break
        id_ += 1
        annotated = annotate(('interact-{}'.format(id_), context, question),
                             meta['wv_cased'])
        model_in_raw = to_id(annotated, w2id, tag2id, ent2id)
        model_in = generate_batch(model_in_raw)
        start_probas, end_probas = model.model.predict(model_in)
        answ_pair = get_preds2(start_probas, end_probas, MAX_ANSW_LEN)
        print('Answer:', end=' ')
        for i in range(answ_pair[0][0], answ_pair[0][1] + 1):
            print(model_in_raw[6][model_in_raw[7][i][0]:model_in_raw[7][i][1] +
                                  1],
                  end='')
        print('\n\n\n-------------|||-------------\n\n\n')
Пример #55
0
def cal_acc(dist_file):
    x = []
    y = []
    same_dist_list, no_same_dist_list = msgpack.load(open(dist_file, 'rb'))
    for dist in same_dist_list:
        x.append(dist)
        y.append(0)
    for dist in no_same_dist_list:
        x.append(dist)
        y.append(1)
    x = np.reshape(np.asarray(x), (len(x), 1))
    y = np.asarray(y)
    train_x, valid_x, train_y, valid_y = train_test_split(x, y, test_size=0.1)
    clf = LinearSVC()
    print len(x), len(y)
    clf.fit(train_x, train_y)
    acc = accuracy_score(valid_y, clf.predict(valid_x))
    print acc
    clf = DecisionTreeClassifier()
    clf.fit(train_x, train_y)
    acc = accuracy_score(valid_y, clf.predict(valid_x))
    print acc
Пример #56
0
def load_object(path, build_fn, *args, **kwargs):
    """ load from serialized form or build an object, saving the built
        object; kwargs provided to `build_fn`.
    """

    save = False
    obj = None

    if path is not None and os.path.isfile(path):
        with open(path, 'rb') as obj_f:
            obj = msgpack.load(obj_f, use_list=False, encoding='utf-8')
    else:
        save = True

    if obj is None:
        obj = build_fn(*args, **kwargs)

        if save and path is not None:
            with open(path, 'wb') as obj_f:
                msgpack.dump(obj, obj_f)

    return obj
Пример #57
0
def get_or_build(path, build_fn, *args, **kwargs):
    """
    从序列化的形式加载或建立一个对象,保存内置的目的。
    剩余的参数被提供给`build_fn`。
    """

    save = False
    obj = None

    if path is not None and os.path.isfile(path):
        with open(path, 'rb') as obj_f:
            obj = msgpack.load(obj_f, use_list=False, encoding='utf-8')
    else:
        save = True

    if obj is None:
        obj = build_fn(*args, **kwargs)
        if save and path is not None:
            with open(path, 'wb') as obj_f:
                msgpack.dump(obj, obj_f)

    return obj
Пример #58
0
def load(path):
    """
    Return data read from file path as dict
    file may be either json, msgpack, or cbor given by extension .json, .mgpk, or
    .cbor respectively
    Otherwise raise IOError
    """
    root, ext = os.path.splitext(path)
    if ext == '.json':
        with ocfn(path, "rb") as f:
            it = json.load(f)
    elif ext == '.mgpk':
        with ocfn(path, "rb") as f:
            it = msgpack.load(f)
    elif ext == '.cbor':
        with ocfn(path, "rb") as f:
            it = cbor.load(f)
    else:
        raise IOError(f"Invalid file path ext '{path}' "
                      f"not '.json', '.mgpk', or 'cbor'.")

    return it
def consolidate(dump):
    pts = {}
    for x in glob.glob(dump + '/*.msgpack'):
        msg = msgpack.load(open(x))
        if msg[0] == 'sensors':
            print(msg)
            for x in msg[3].get('Pupil Point', []):
                if int(x[0][0]) == -1:
                    continue
                pts.setdefault(int(x[0][0]),
                               []).append([x[0][2], x[0][1], x[0][4], x[0][3]])
    params = []
    for k, vs in sorted(pts.items()):
        p = np.array(vs)[:, 2:]
        print(p)
        params.append(
            [np.mean(p, 0).tolist(),
             np.cov(p.T).tolist(),
             p.tolist()])
        print(params[-1][0])
        print(params[-1][1])
    open('calib.js', 'w').write(json.dumps(params))
Пример #60
0
 def getGuides(self, gene_exon):
     try:
         filename = gene_exon + ".p"
         if self.scoring_alg == "Doench":
             path = os.path.join(
                 os.path.dirname(__file__),
                 'static/data/GRCh37_guides_msgpack_Doench/', filename)
         elif self.domains_enabled:
             path = os.path.join(
                 os.path.dirname(__file__),
                 'static/data/GRCh37_guides_msgpack_Azimuth_domains/',
                 filename)
         else:
             path = os.path.join(
                 os.path.dirname(__file__),
                 'static/data/GRCh37_guides_msgpack_Azimuth/', filename)
         with open(path) as datafile:
             gRNAs = msgpack.load(datafile)
             return gRNAs
     except IOError:
         gene, exon = gene_exon.split('_')
         raise ExonError(gene, exon)