Пример #1
0
 def test_loadFileArgsError(self):
     try:
         ujson.load("[]")
     except TypeError:
         pass
     else:
         assert False, "expected TypeError"
Пример #2
0
def load_cooc_dict():
    global cw_dict, c_dict
    liblogger.info("load cooc dict")
    pxy_cache_file = cooc_dict_file + ".pxy.cache"
    py_cache_file = cooc_dict_file + ".py.cache"
    if using_cache and os.path.exists(pxy_cache_file) and os.path.exists(py_cache_file):
        cw_dict = json.load(open(pxy_cache_file))
        c_dict = json.load(open(py_cache_file))
        return 
    cooc_dict = json.load(open(cooc_dict_file))
    cw_dict = defaultdict(int)
    c_dict = defaultdict(int)
    for w in cooc_dict:
        #ctxs = [eval(ctx) for ctx in cooc_dict[w].keys()]
        for ctx in cooc_dict[w]:
            count = cooc_dict[w][ctx]
            cw = (w, ctx)
            count = cooc_dict[w][ctx]
            cw_dict[cw] += count
            c_dict[ctx] += count
    liblogger.info("norm cooc dict for P(x, y)")
    cw_sum = float(sum(cw_dict.values()))
    for cw in cw_dict:
        cw_dict[cw] = math.log(cw_dict[cw] / cw_sum)
    json.dump(cw_dict, open(pxy_cache_file, "w"))
    liblogger.info("ctx dict P(y)")
    c_sum = float(sum(c_dict.values()))
    for c in c_dict:
        c_dict[c] = math.log(c_dict[c] / c_sum)
    json.dump(c_dict, open(py_cache_file, "w"))
def test_orderbook():
    variable_order_book = Book()
    control_order_book = Book()

    with open('testdata/messages.json') as messages_json_file:
        messages = json.load(messages_json_file)

    with open('testdata/beginning_level_3.json') as begin_json_file:
        beginning_level_3 = json.load(begin_json_file)

    with open('testdata/ending_level_3.json') as end_json_file:
        ending_level_3 = json.load(end_json_file)

    try:
        assert beginning_level_3['sequence'] + 1 == messages[0]['sequence']
        assert ending_level_3['sequence'] == messages[-1]['sequence']
    except AssertionError:
        print("Problem with sample data sequences")

    variable_order_book.get_level3(beginning_level_3)

    start = time.time()
    [variable_order_book.process_message(message) for message in messages]
    end = time.time()
    print('messages per sec: {0}'.format(int(len(messages)/(end-start))))

    control_order_book.get_level3(ending_level_3)

    dict_compare(variable_order_book.asks.price_map, control_order_book.asks.price_map, price_map=True)
    dict_compare(variable_order_book.asks.order_map, control_order_book.asks.order_map, order_map=True)
Пример #4
0
    def extract_json_data(self, filename, option):
        '''
        Imports .json files from peeringdb and returns a list of dictionaries with all the retrieved IXP information.
        Input: 
            a) filename: A .json file name.
            b) mypath: The directory path of the database.
            c) option: Flag to download the file.
            d) config: Dictionary that contains the config file.
        Ouput: 
            a) A list of dictionaries.
        '''

        try:
            with open(self.homepath + '/database' + filename) as data_file:
                obj = ujson.load(data_file)
        except:
            print(filename + ' was not found.')

            if not self.downloader.download_peering(option):
                print("Could not download " + filename +
                      ". Copying from the default database.")
                try:
                    copyfile(self.libpath + '/database/Default' + filename,
                             self.homepath + '/database' + filename)
                except:
                    print('Could not copy ' + filename +
                          ' from the default database.')

            try:
                with open(self.homepath + '/database' + filename) as data_file:
                    obj = ujson.load(data_file)
            except:
                print('Could not open ' + filename + '. Exiting.')
                exit(0)
        return (obj['data'])
Пример #5
0
    def get_translation_percentage(self, locale_path: Text, locale: Text) -> int:

        # backend stats
        po = polib.pofile(self.get_po_filename(locale_path, locale))
        not_translated = len(po.untranslated_entries())
        total = len(po.translated_entries()) + not_translated

        # frontend stats
        with open(self.get_json_filename(locale_path, locale)) as reader:
            for key, value in ujson.load(reader).items():
                total += 1
                if value == '':
                    not_translated += 1

        # mobile stats
        with open(os.path.join(locale_path, 'mobile_info.json')) as mob:
            mobile_info = ujson.load(mob)
        try:
            info = mobile_info[locale]
        except KeyError:
            if self.strict:
                raise
            info = {'total': 0, 'not_translated': 0}

        total += info['total']
        not_translated += info['not_translated']

        return (total - not_translated) * 100 // total
Пример #6
0
def _load(logger, tests_root, manifest, types=None, meta_filters=None, allow_cached=True):
    # "manifest" is a path or file-like object.
    manifest_path = (manifest if isinstance(manifest, string_types)
                     else manifest.name)
    if allow_cached and manifest_path in __load_cache:
        return __load_cache[manifest_path]

    if isinstance(manifest, string_types):
        if os.path.exists(manifest):
            logger.debug("Opening manifest at %s" % manifest)
        else:
            logger.debug("Creating new manifest at %s" % manifest)
        try:
            with open(manifest) as f:
                rv = Manifest.from_json(tests_root,
                                        fast_json.load(f),
                                        types=types,
                                        meta_filters=meta_filters)
        except IOError:
            return None
        except ValueError:
            logger.warning("%r may be corrupted", manifest)
            return None
    else:
        rv = Manifest.from_json(tests_root,
                                fast_json.load(manifest),
                                types=types,
                                meta_filters=meta_filters)

    if allow_cached:
        __load_cache[manifest_path] = rv
    return rv
Пример #7
0
 def setUp(self):
     with open("tests/data/square.geojson") as f:
         self.square_geojson = json.load(f)
     with open("tests/data/square.topojson") as f:
         self.square_topojson = json.load(f)
     with open("tests/data/multipolygons_spherical.geojson") as f:
         self.ref = json.load(f)
Пример #8
0
def reading_vqa_data(vqa_dir, section):
    ans = 'mscoco_%s2014_annotations.json' % section
    with (vqa_dir / ans).open() as file_:
        ans_data = json.load(file_)
    image_by_id = {}
    answers_by_id = {}
    for answer in ans_data['annotations']:
        image = str(answer['image_id'])
        mca = answer['multiple_choice_answer']
        img = '0'*(12 - len(image)) + image
        s = '/data/%s/images' % section
        s = s + '/COCO_%s2014_' % section + img + '.jpg'
        image_by_id[answer['question_id']] = s
        answers_by_id[answer['question_id']] = mca
    filename = ('MultipleChoice_mscoco_'
                '%s2014_questions.json' % section)
    with (vqa_dir / filename).open() as file_:
        ques_data = json.load(file_)
    for question in ques_data['questions']:
        text = question['question']
        ques_id = question['question_id']
        options = question['multiple_choices']
        image_path = image_by_id[ques_id]
        image = Image.open(image_path)
        if min(image.size) < IMAGE_SIZE:
            image_path = prev_image
            image_by_id[ques_id] = image_path
        else:
            if (answers_by_id[ques_id] == 'yes'):
                prev_image = image_path
        yield ques_id, image_by_id[ques_id], text, options, answers_by_id[ques_id]
Пример #9
0
def load_place_savers(user_dir):
    """
    This function loads the following place saving parameters:
    1. cur_hop - Current hop of collection algorithm
    2. cur_user_list - List of users collented during current hop
    3. next_user_list - List of users to collect on next hop
    4. added_topics_for_cur_hop - Topics added from current hop (if relevant to sampling method)
    5. unavailable_accounts - List of unavailable accounts
    6. finished_users - Users that have already been collected

    :param user_dir: Directory where profile information is saved
    :return place_saver_obj: Python dictionary of forementioned fields
    """
    # Load object
    try:
        jfid = open(os.path.join(user_dir, "place_saver_v1.txt"))
        place_saver_obj = ujson.load(jfid)
        jfid.close()
    except ValueError:
        jfid = open(os.path.join(user_dir, "place_saver_v2.txt"))
        place_saver_obj = ujson.load(jfid)
        jfid.close()
    except IOError:
        print "The object 'place_saver' does not exist, creating it now"
        place_saver_obj = {}
    # Make all necessary fields in case they don't already exist
    if "cur_user_list" not in place_saver_obj.keys():
        place_saver_obj["cur_user_list"] = set([])
    if "next_user_list" not in place_saver_obj.keys():
        place_saver_obj["next_user_list"] = set([])
    if "cur_hop" not in place_saver_obj.keys():
        place_saver_obj["cur_hop"] = 0
    if "added_topics_for_cur_hop" not in place_saver_obj.keys():
        place_saver_obj["added_topics_for_cur_hop"] = set([])
    if "unavailable_accounts" not in place_saver_obj.keys():
        place_saver_obj["unavailable_accounts"] = set([])
    if "finished_users" not in place_saver_obj.keys():
        place_saver_obj["finished_users"] = {}
    jsons = filter(lambda k: re.match("userInfo_*", k), os.listdir(user_dir))
    for jj in range(len(jsons)):
        if jj % 200 == 0:
            print "Check profile JSON {} of {}".format(jj + 1, len(jsons))
        try:
            full_filename = os.path.join(user_dir, jsons[jj])
            if os.path.getsize(full_filename) == 0:
                continue
            jfid = open(full_filename)
            profile = ujson.load(jfid)
            jfid.close()
            if profile["id"] in place_saver_obj["finished_users"].keys():
                continue
            else:
                place_saver_obj["finished_users"][profile["id"]] = jsons[jj]
        except ValueError:
            continue
    # Ensure that all fields are set objects
    for kk in place_saver_obj.keys():
        if (kk != "finished_users") and (kk != "cur_hop"):
            place_saver_obj[kk] = set(place_saver_obj[kk])
    return place_saver_obj
Пример #10
0
def combine_dicts():
    with open('title10to100000.json') as tag200, open('title100000plus.json') as tag1500:
        tag200dict = ujson.load(tag200)
        tag500dict = ujson.load(tag1500)
        newdict = dict(chain(tag200dict.items(), tag500dict.items()))
        with open('titletagwords.json', 'w') as write:
            ujson.dump(newdict, write)
Пример #11
0
    def __init__(self, path, writer_queue=None):
        """Initialize using path to file and optional thread-safe queue.

        Queue is used for json serializable data to be written to file when
        self.write_queued() is called.

        If the file at 'path' doesn't exist it will be created.
        """

        self.path = os.path.realpath(os.path.expanduser(path))
        if not os.path.exists(self.path):
            print("Persistence file %s does not exist yet, creating it...")
            json.dump({}, open(self.path, 'w'))
        else:
            # check for json-ness
            try:
                json.load(open(self.path))
                LOG.debug("Loaded existing persistence file %s.",
                          os.path.relpath(self.path))
            except ValueError as err:
                raise ValueError("The persistence file -> %s is not "
                                 "a valid json file. | %s"
                                 % (os.path.relpath(self.path), err))
        if writer_queue and not isinstance(writer_queue, Queue.Queue):
            raise TypeError('writer_queue should be a Queue.Queue.')
        elif writer_queue:
            self.synq = writer_queue
            self.synq._persisted = set()
        else:
            self.synq = None
Пример #12
0
def addin_dubbed_video_mappings(node_data, lang=en_lang_code):
    # Get the dubbed videos from the spreadsheet and substitute them
    # for the video, and topic attributes of the returned data struct.

    build_path = os.path.join(os.getcwd(), "build")

    # Create a dubbed_video_mappings.json, at build folder.
    if os.path.exists(os.path.join(build_path, "dubbed_video_mappings.json")):
        logging.info("Dubbed videos json already exist at %s" % (DUBBED_VIDEOS_MAPPING_FILEPATH))
    else:
        main()

    # Get the list of video ids from dubbed video mappings
    lang_code = get_lang_name(lang).lower()
    dubbed_videos_path = os.path.join(build_path, "dubbed_video_mappings.json")
    with open(dubbed_videos_path, "r") as f:
        dubbed_videos_load = ujson.load(f)

    dubbed_videos_list = dubbed_videos_load.get(lang_code)
    # If dubbed_videos_list is None It means that the language code is not available in dubbed video mappings.
    if not dubbed_videos_list:
        return node_data

    # Get the current youtube_ids, and topic_paths from the khan api node data.
    youtube_ids = []
    topic_paths = []
    for node in node_data:
        node_kind = node.get("kind")
        if node_kind == NodeType.video:
            youtube_ids.append(node.get("youtube_id"))
        if node_kind == NodeType.topic:
            topic_paths.append(node.get("path"))

    en_nodes_path = os.path.join(build_path, "en_nodes.json")
    with open(en_nodes_path, "r") as f:
        en_node_load = ujson.load(f)

    en_node_list = []
    # The en_nodes.json must be the same data structure to node_data variable from khan api.
    for node in en_node_load:
        node_kind = node.get("kind")

        if node_kind == NodeType.video:
            youtube_id = node["youtube_id"]
            if not youtube_id in youtube_ids:
                if youtube_id in dubbed_videos_list:
                    node["youtube_id"] = dubbed_videos_list[youtube_id]
                    node["translated_youtube_lang"] = lang
                    en_node_list.append(node)
                    youtube_ids.append(youtube_id)

        # Append all topics that's not in topic_paths list.
        if node_kind == NodeType.topic:
            if not node["path"] in topic_paths:
                en_node_list.append(node)
                topic_paths.append(node["path"])

    node_data += en_node_list
    return node_data
Пример #13
0
def main():
    parser = argparse.ArgumentParser(description = "Analysis scripts for LexNorm in W-NUT 2015")
    parser.add_argument("--pred", required = True, help = "A JSON file: Your predictions over test data formatted in JSON as training data")
    parser.add_argument("--oracle", required = True, help = "A JSON file: The oracle annotations of test data formatted in JSON as training data")
    args = parser.parse_args()

    predicates = json.load(open(args.pred))
    training_list = json.load(open(args.pred))
    oov_detection_performance(training_list,predicates)
Пример #14
0
 def LoadData(self):
     fp=gzip.open('data/dictbase/word_pos.txt.gz')
     self.word_pos=json.load(fp)
     fp.close()
     fp=gzip.open('data/dictbase/word_pos_max.txt.gz')
     self.word_pos_max=json.load(fp)
     fp.close()
     fp=gzip.open('data/dictbase/word_trans.txt.gz')
     self.word_tran=json.load(fp)
     fp.close()
Пример #15
0
Файл: main.py Проект: txye/QANet
def demo(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    model = Model(config, None, word_mat, char_mat, trainable=False, demo = True)
    demo = Demo(model, config)
Пример #16
0
def update_in_resources(alias, updates):
    if type(update) is not dict: return("Updates need to be specified as key:value pairs in a dictionnary. Process Aborted.")
    keys = updates.keys()
    values = updates.values()
    
    if keys not in ['alias','tag','title']:
        return '''The updates' dictionary do not have the right keys; they must all be in ['alias','tag','title'].
        Note: Do not include 'timestamp' when doing updates. Process Aborted'''
    
    if len(keys) is not len(values):
        return("Number of Keys and Values do not match. Process Aborted.")
    
    def helper(movie, keys, values):
        for k in range(len(keys)):
            movie[keys[k]] = values[k]
        movie['timestamp'] = datetime.datetime.now()
        return movie
    
    if 'resources.json' not in os.listdir('.'):
               return " The file 'resources.json' is not in the current working directory. Process Aborted."
    
    with open('resources.json') as json_file:
               resource = ujson.load(json_file)
    
    if is_in_resources(resource, alias) == False :
        return "Movie with alias is not in resource file. Movie must be added first." %(alias)
    else:
        movie = list(filter((lambda movie : movie['alias'] in alias), resource['movies']))
        if len(movie) is not 1 : return("That's weird...multiple matches for alias given. Process Aborted.")
        else: 
            updated = helper(movie[0], keys, values); del movie
            if 'alias' not in updated.keys(): return("Update has no 'alias' key. Process Aborted.")
            if 'tag' not in updated.keys(): return("Update has no 'tag' key. Process Aborted.")
            if 'title' not in updated.keys(): return("Update has no 'title' key. Process Aborted.")
            if 'timestamp' not in updated.keys(): return("Update has no 'timestamp' key. Process Aborted.")
            deleted = delete(alias)
            if deleted is not True : return deleted
            del deleted
            
            with open('resources.json') as json_file:
                resource = ujson.load(json_file)
            
            resource['movies']. append(updated)
            resource['logs'].append({
                'timestamp': datetime.datetime.now(),
                'type': 'post',
                'message': " '%s' with alias '%s' and tag '%s' was successfully added as an update." %(updated['title'], updated['alias'], updated['tag'])
            
            })
            
            with open('resources.json', 'w') as outfile:
                ujson.dump(resource, outfile)
            return " '%s' with alias '%s' and tag '%s' was successfully added as an update." %(updated['title'], updated['alias'], updated['tag'])
Пример #17
0
def main(unused_argv):
  task = json.load(sys.stdin)
  json_path = os.path.join(
    os.path.dirname(__file__), '..', '..', 'solutions',
    'state-of-the-art.json')
  with open(json_path) as f:
    solutions = json.load(f)
  for solution in solutions:
    if (solution['problemId'] == task['id'] and
        solution['seed'] == task['sourceSeeds'][0]):
      json.dump([solution], sys.stdout)
      sys.stdout.write('\n')
Пример #18
0
    def update_unesco_regions(self):
        """
        This code will create/update unesco regions and update the country -> region mapping
        """
        import os
        import ujson
        from geodata.models import Region
        from iati.models import RegionVocabulary

        base = os.path.dirname(os.path.abspath(__file__))

        location = base + '/data_backup/unesco_regions.json'
        json_data = open(location)
        unesco_regions = ujson.load(json_data)
        json_data.close()

        location_map = base + '/data_backup/unesco_country_region_mapping.json'
        json_data_map = open(location_map)
        unesco_mapping = ujson.load(json_data_map)
        json_data_map.close()

        #save regions and put in list
        regions = []
        region_vocabulary = RegionVocabulary.objects.get_or_create(
            code=999,
            name='UNESCO')[0]

        for region_id, info in unesco_regions.items():

            center_location_string = 'POINT(' + info['longitude'] + ' ' + info['latitude'] + ')'
            center_location = fromstr(
                center_location_string,
                srid=4326)
            region = Region.objects.get_or_create(
                code=region_id,
                defaults={
                    'name': info['name'],
                    'region_vocabulary': region_vocabulary,
                    'parental_region': None,
                    'center_longlat': center_location})[0]
            regions.append(region)

        # save country -> region mapping
        for line in unesco_mapping:

            region_id = line["UNESCO Region Code"]
            country_id = line["Country ID"]
            country = Country.objects.get(code=country_id)
            for region in regions:
                if region.code == region_id:
                    country.unesco_region = region
                    country.save()
Пример #19
0
def load_tfidf(vocab_path, idf_weights_path):
    """Loads tfidf vectorizer from its components.
    :param str vocab_path: path to the vectorizer vocabulary JSON.
    :param str idf_weights_path: path to idf weights JSON.
    :rtype: sklearn.feature_extraction.text.TfidfVectorizer

    """
    tfidf = TfidfVectorizer(analyzer=lambda x: x,
                            vocabulary=json.load(open(vocab_path)))
    idf_vector = np.array(json.load(open(idf_weights_path)))
    tfidf._tfidf._idf_diag = scipy.sparse.diags([idf_vector], [0])
    tfidf.vocabulary_ = tfidf.vocabulary
    return tfidf
Пример #20
0
def insert_classes(cursor):
    """
    Fetch and insert the classes from classes.json
    :param cursor:
    :return:
    """
    ranks = dict()
    with open(RANKS_PATH, encoding='UTF-8') as ranks_file:
        ranks_dict = ujson.load(ranks_file)
        for rank, ranked_archetypes in ranks_dict.items():
            try:
                rank = int(rank.strip("Rank"))
            except ValueError:
                rank = MAX_RANK
            for ranked_classes in ranked_archetypes.values():
                for ranked_class in ranked_classes:
                    ranks[ranked_class] = rank

    with open(CLASSES_PATH, encoding='UTF-8') as classes_file:
        classes_dict = ujson.load(classes_file)
        classes = list()
        # Get list of sorted classes
        sorted_classes_ids = list()
        for class_id in classes_dict.keys():
            if '_' in class_id:
                splited_class_id = class_id.split("_", 1)
                sorted_classes_ids.append((class_id, int(splited_class_id[0].strip("Char")), int(splited_class_id[-1])))
            else:
                sorted_classes_ids.append((class_id, 0, 0))
        sorted_classes_ids.sort(key=lambda tup: tup[2])
        sorted_classes_ids.sort(key=lambda tup: tup[1])
        # Start processing them
        for class_id, archetype, char_n in sorted_classes_ids:
            _class = classes_dict[class_id]
            class_info = list()
            # Get Class Name
            class_info.append(get_value(_class, "Class", "name", str))
            # Get Class Archetype
            class_info.append(get_archetype_id(get_value(_class, "Class", "base", str)))
            # Get Rank
            class_info.append(ranks.get(class_id, 0))
            # Get Icon
            class_info.append(format_icon(get_value(_class, "Class", "icon", str)))
            # Get Temp ID
            class_info.append(class_id)

            classes.append(tuple(class_info))

        classes = tuple(classes)

        cursor.executemany("INSERT INTO classes (name, archetype, rank, icon, temp_id) VALUES (?, ?, ?, ?, ?)", classes)
Пример #21
0
def load_lex_counts():
    global w_dict
    liblogger.info("load word dict")
    cache_file = lex_count_file + ".cache"
    if using_cache and os.path.exists(cache_file):
        w_dict = json.load(open(cache_file))
        return 
    lex_counts = json.load(open(lex_count_file))
    w_sum = float(sum(lex_counts.values()))
    w_dict = dict()
    liblogger.info("norm word dict for P(x)")
    for w in lex_counts:
        w_dict[w] = math.log(lex_counts[w] / w_sum)
    json.dump(w_dict, open(cache_file, "w"))
def get_database_connection():
    s3 = boto3.resource('s3')
    metasrcs = ujson.load(
        s3.Object('net-mozaws-prod-us-west-2-pipeline-metadata',
                  'sources.json').get()['Body'])
    creds = ujson.load(
        s3.Object('net-mozaws-prod-us-west-2-pipeline-metadata',
                  '%s/write/credentials.json' % (
                      metasrcs['distribution-viewer-db']['metadata_prefix'],
                  )).get()['Body'])
    conn = psycopg2.connect(host=creds['host'], port=creds['port'],
                            user=creds['username'], password=creds['password'],
                            dbname=creds['db_name'])
    return conn, conn.cursor()
Пример #23
0
def scan_trace(dir, channels, sample_cb):
    manifest_fn = path.join(dir, 'manifest.json')
    with open(manifest_fn, 'rb') as f:
        manifest = ujson.load(f)
    if 0: print manifest

    begin_ts = manifest['beginTs']
    end_ts = manifest['endTs']
    print 'range', begin_ts, end_ts

    timeseq_infos = manifest['timeseqInfos']
    cur_samples = {}
    # There's some thought to saving memory here. Only load a chunk (of all timeseqs) at a time
    for chunk in get_chunks(begin_ts, end_ts):
        all_chunk_data = {}
        all_chunk_indexes = {}
        for tsi in timeseq_infos:
            ts_name = tsi['name'];
            if ('*' not in channels) and (ts_name not in channels): continue
            chunk_fn = path.join(dir, 'chunk_%s_%d.json.gz' % (ts_name, chunk))
            try:
                with gzip.open(chunk_fn, 'rb') as f:
                    print 'Reading', chunk_fn, '...'
                    all_chunk_data[ts_name] = ujson.load(f)
                    all_chunk_indexes[ts_name] = 0
            except:
                print chunk_fn, 'not found'
                if ts_name in all_chunk_data:
                    del all_chunk_data[ts_name]
                    del all_chunk_indexes[ts_name]
        while True:
            # Find the next item to change (smallest timestamp)
            cur_ts = end_ts + 1
            for ts_name in all_chunk_data:
                if all_chunk_indexes[ts_name] < len(all_chunk_data[ts_name]['times']):
                    ts1 = all_chunk_data[ts_name]['times'][all_chunk_indexes[ts_name]]
                    cur_ts = min(cur_ts, ts1)
            if cur_ts > end_ts: # Didn't find anything
                break
            # Now update cur_samples with all samples with matching timestamps
            for ts_name in all_chunk_data:
                if all_chunk_indexes[ts_name] < len(all_chunk_data[ts_name]['times']):
                    ts1 = all_chunk_data[ts_name]['times'][all_chunk_indexes[ts_name]]
                    if ts1 == cur_ts:
                        cur_samples[ts_name] = all_chunk_data[ts_name]['samples'][all_chunk_indexes[ts_name]]
                        all_chunk_indexes[ts_name] += 1
            # We copy so that customers can keep a copy that works after we mutate cur_samples again
            # scan_trace_deltat does this
            sample_cb(cur_ts, copy.copy(cur_samples))
Пример #24
0
def initialize(port, suffix):
    global PORT, FILTER_INFER, FILTER_APPLY, REPORT_REACTIONS, REACTION_CATEGORIES, REPORT_REACTIONS_SET
    PORT = port
    FILTER_INFER = json.load(open("../data/filter_infer_%s.json" % suffix))
    FILTER_APPLY = json.load(open("../data/filter_apply_%s.json" % suffix))
    REPORT_REACTIONS = json.load(open("../data/filter_report_%s.json" % suffix))
    REPORT_REACTIONS = [(x, y) for x, y in REPORT_REACTIONS if isinstance(y, list)]
    REACTION_CATEGORIES = defaultdict(list)
    REPORT_REACTIONS_SET = {}
    all_rxn_ids = set.union(*[set(y) for x, y in REPORT_REACTIONS])
    for category, reactions in REPORT_REACTIONS:
        REPORT_REACTIONS_SET[category] = set(reactions)
        REPORT_REACTIONS_SET["^%s" % category] = all_rxn_ids.difference(REPORT_REACTIONS_SET[category])
        for reaction in reactions:
            REACTION_CATEGORIES[long(reaction)].append(category)
Пример #25
0
 def loadVariables(self, infile, test):
     '''semi-stable variables which are not project specific'''
     self.logFilename = u'¤WLMStats.log'
     self.heritage_siteurl = 'https://tools.wmflabs.org/heritage/api'
     self.commons_siteurl = 'https://commons.wikimedia.org'
     self.gcmlimit = 250 #Images to process per API request in ImageInfo
     self.output = "output/"
     self.settings_file = infile
     self._test_gcmlimit = 5
     self._test_limit = 15
     
     #distingusih testdata
     if test:
         self.output += u'test.'
         self.settings_file = u'indata/settings.test.json'
     
     #load settings file
     requiredKeys = ['types', 'cats', 'date', 'identifier'] #keys which are explicitly called later
     try:
         f = codecs.open(self.settings_file, 'r', 'utf-8')
         self.settings = ujson.load(f)
         f.close()
         if not set(requiredKeys).issubset(set(self.settings.keys())) :
             raise KeyError("missing one of the required keys!: %s" %', '.join(requiredKeys))
     except IOError, e:
         return u'Error opening settings file: %s' %e
         exit(1)
Пример #26
0
def get_best(args):
    with open(os.path.join(args.path, 'config.json')) as f:
        save_every = json.load(f)['save_every']
    
    with open(os.path.join(args.path, 'process_0.log')) as f:
        lines = f.readlines()

    best_score = 0
    best_it = 0
    deca_scores = {}
    for l in lines:
        if 'val' in l:
            try:
                task = l.split('val_')[1].split(':')[0]
            except Exception as e:
                print(e)
                continue
            it = int(l.split('iteration_')[1].split(':')[0])
            metric = args.task_to_metric[task]
            score = float(l.split(metric+'_')[1].split(':')[0])
            if it in deca_scores:
                deca_scores[it]['deca'] += score
                deca_scores[it][metric] = score
            else:
                deca_scores[it] = {'deca': score, metric: score}
            if deca_scores[it]['deca'] > best_score:
                best_score = deca_scores[it]['deca']
                best_it = it
    print(best_it)
    print(best_score)
    return os.path.join(args.path, f'iteration_{int(best_it)}.pth')
Пример #27
0
def build_most_improved():
    # We want to make most improved ready for that table in charts
    # and not have to run around to fetch it.
    global simple_analysis

    analysis_improved = simple_analysis["improved"] = []
    with open("jsondb/schools/districts.json") as f:
        districts = ujson.load(f)
    for id, distance in mostimproved:
        real_id = id.split("-")[0]
        school_meta = school_metas[real_id]
        school_grade = school_grades[id]
        analysis_improved.append(
            (
                id,
                school_meta["name"].title(),
                districts[school_meta["district"]] if school_meta["district"] else None,
                school_meta["city"].title(),
                school_meta.get("enrollment", {}).get("2010", {}).get("total"),
                school_meta.get("enrollment", {}).get("2012", {}).get("total"),
                school_grade.get("2010", {}).get("rank"),
                school_grade.get("2012", {}).get("rank"),
                distance,
            )
        )
Пример #28
0
    def __init__(self, input='content', charset='utf-8',
                 charset_error='strict', strip_accents=None,
                 vocabulary=None,
                 normalize=True,
                 dtype=float):

        self.input = input
        self.charset = charset
        self.charset_error = charset_error
        self.strip_accents = strip_accents
        if vocabulary is not None:
            self.fixed_vocabulary = True
            if not isinstance(vocabulary, Mapping):
                vocabulary = dict((t, i) for i, t in enumerate(vocabulary))
            self.vocabulary_ = vocabulary
        else:
            self.fixed_vocabulary = False

        try:
            self.poscache = json.load(open(poscache_filename, "r"))
        except IOError:
            self.poscache = {}

        self.normalize = normalize
        self.dtype = dtype
Пример #29
0
 def get_ap_file(self, path):
     """
     Get raw data file.
     """
     with open(path, 'r') as readfile:
         data = json.load(readfile)
         return data['trendtable']
Пример #30
0
        def update_unesco_sectors(self):

            base = os.path.dirname(os.path.abspath(__file__))
            location = base + "/data_backup/unesco_sectors.json"

            json_data = open(location)
            unesco_sectors = ujson.load(json_data)

            for cr in unesco_sectors:

                try:
                    code = int(cr)
                    name = unesco_sectors[cr]['name']

                    if Sector.objects.filter(code=code).exists():
                        the_sector = Sector.objects.get(code=code)
                        the_sector.name = name
                    else:
                        the_sector = Sector(code=code, name=name)
                    the_sector.save()

                except Exception as e:
                    print "error in update_country_sectors" + str(type)
                    print e.args
                    return False
            json_data.close()
            return True
Пример #31
0
def main(args):
    if args.debug:
        logging.basicConfig(level=logging.DEBUG)

    measurements = []
    for filename in glob.iglob(
            os.path.join(args.measurements_dir, args.domain, "*", "*")):
        with open(filename) as file:
            measurements.append(json.load(file))

    as_repo = sas.create_default_as_repo()

    classifier = DnsResolutionClassifier()
    control_resolutions = get_control_resolutions(measurements)
    for resolution in control_resolutions:
        classifier.add_good_resolution(resolution)

    print("\nCONTROL")
    for resolution, count in count_resolutions(
            control_resolutions).most_common():
        print("%s -> %s: %d" % (resolution[0], resolution[1], count))

    dns_resolutions = get_dns_results(as_repo, measurements)
    show_resolutions_graph(as_repo, args.domain, control_resolutions,
                           dns_resolutions)

    print("\nTESTS")
    classified_resolutions = zip(
        dns_resolutions, classifier.classify_resolutions(dns_resolutions))

    for country_code, country_classifications in group_by(
            classified_resolutions, lambda e: e[0].country).items():
        try:
            country_name = iso3166.countries.get(country_code).name
        except KeyError:
            country_name = "Unknown"
        print("\n=============\n= %s (%s)\n=============" %
              (country_name, country_code))
        country_count = len(country_classifications)
        grouped_country_classifications = group_by(country_classifications,
                                                   lambda e: e[1])
        for classification, entries in grouped_country_classifications.items():
            class_count = len(entries)
            prefix = "All " if class_count == country_count else ""
            print(" %s%s: %d/%d" % (prefix, classification.name.lower(),
                                    class_count, country_count))
        #if len(grouped_country_classifications[DnsResolutionClassification.FREE]) == country_count:
        #    continue

        print("\n By Resolver:")
        for resolver_key, resolver_classifications in group_by(
                country_classifications,
                lambda e: make_resolver_key(as_repo, e[0])).items():
            print("  - %s:" % resolver_key)
            resolver_count = len(resolver_classifications)
            for classification, entries in group_by(resolver_classifications,
                                                    lambda e: e[1]).items():
                class_count = len(entries)
                prefix = "All " if class_count == resolver_count else ""
                print("      %s%s: %d/%d" %
                      (prefix, classification.name.lower(), class_count,
                       resolver_count))

        for classification, entries in grouped_country_classifications.items():
            if classification == DnsResolutionClassification.EMPTY or not entries:
                continue
            print("\n %s resolutions:" % classification.name)
            displayed = set()
            for resolution, _ in entries:
                display_str = ",\n     ".join([
                    "%s (%s)" %
                    (resolve_ip(ip) or ip, as_str(as_repo.get_as_for_ip(ip)))
                    for ip in sorted(resolution.ips)
                ])
                if display_str in displayed:
                    continue
                print("  - [%s] %s\n     => %s" %
                      (display_str, resolution.url.geturl(),
                       path_get(resolution.measurement,
                                ["test_keys", "requests", "failure"])))
                displayed.add(display_str)
Пример #32
0
#从没做过触摸校准
if TOUCH_CALI_FILE not in uos.listdir():
    touch = xpt2046(
        cs=TOUCH_CS,
        transpose=TFT_IS_PORTRAIT,
    )

    from touch_cali import TouchCali

    touch_cali = TouchCali(touch, TOUCH_CALI_FILE)
    touch_cali.start()

#已经做过触摸校准,直接调用触摸参数文件
else:
    with open(TOUCH_CALI_FILE, 'r') as f:
        param = ujson.load(f)
        touch_x0 = param['cal_x0']
        touch_x1 = param['cal_x1']
        touch_y0 = param['cal_y0']
        touch_y1 = param['cal_y1']

    touch = xpt2046(
        cs=TOUCH_CS,
        transpose=TFT_IS_PORTRAIT,
        cal_x0=touch_x0,
        cal_x1=touch_x1,
        cal_y0=touch_y0,
        cal_y1=touch_y1,
    )

    TOUCH_READY = 1  #表示已经配置好触摸参数
Пример #33
0
sv = Service('gacha')
jewel_limit = DailyNumberLimiter(6000)
tenjo_limit = DailyNumberLimiter(1)

GACHA_DISABLE_NOTICE = '本群转蛋功能已禁用\n如欲开启,请与维护组联系'
JEWEL_EXCEED_NOTICE = f'您今天已经抽过{jewel_limit.max}钻了,欢迎明早5点后再来!'
TENJO_EXCEED_NOTICE = f'您今天已经抽过{tenjo_limit.max}张天井券了,欢迎明早5点后再来!'
SWITCH_POOL_TIP = 'β>发送"选择卡池"可切换'
POOL = ('MIX', 'JP', 'TW', 'BL')
DEFAULT_POOL = POOL[0]

_pool_config_file = os.path.expanduser('~/.hoshino/group_pool_config.json')
_group_pool = {}
try:
    with open(_pool_config_file, encoding='utf8') as f:
        _group_pool = json.load(f)
except FileNotFoundError as e:
    sv.logger.warning(
        'group_pool_config.json not found, will create when needed.')
_group_pool = defaultdict(lambda: DEFAULT_POOL, _group_pool)


def dump_pool_config():
    with open(_pool_config_file, 'w', encoding='utf8') as f:
        json.dump(_group_pool, f, ensure_ascii=False)


gacha_10_aliases = ('抽十连', '十连', '十连!', '十连抽', '来个十连', '来发十连', '来次十连', '抽个十连',
                    '抽发十连', '抽次十连', '十连扭蛋', '扭蛋十连', '10连', '10连!', '10连抽',
                    '来个10连', '来发10连', '来次10连', '抽个10连', '抽发10连', '抽次10连',
                    '10连扭蛋', '扭蛋10连', '十連', '十連!', '十連抽', '來個十連', '來發十連',
Пример #34
0
 def __init__(self):
     if not os.path.exists(PATH):
         with open(PATH, "w") as f_x:
             ujson.dump({}, f_x)
     with open(PATH) as yt_db:
         self.db = ujson.load(yt_db)
Пример #35
0
    for recall_index, recall_item in enumerate(recall_list):
        data = [recall_item, 100 - recall_index]
        temp.append(data)
    temp_json = ujson.dumps(temp_dict)
    # break
    client.hset('GraphEm_{0}'.format(clk_cid), 'ge_main_cold', temp_json)
    client.expire('GraphEm_{0}'.format(clk_cid), 3600 * 48)


if __name__ == '__main__':
    #加载用户点击历史字典
    load_start = time.time()
    with open('../data/get_user_click_history/click_his/click_his.json',
              'r') as f:
        user_clk_his = ujson.load(f)
    print '加载点击历史字典', time.time() - load_start

    #加载相似item字典
    load_start = time.time()
    with open('../data/get_similar_item/cold_sim/cold_sim_nid', 'r') as f:
        sim_item_ge = ujson.load(f)
    print '加载相似用户字典', time.time() - load_start

    print '所有用户个数', len(user_clk_his)

    t2 = time.time()
    pool = Pool(30)

    #遍历所有用户
    for cid_index, clk_cid in enumerate(user_clk_his):
Пример #36
0
                                     int(color_rgb[0] * 255),
                                     int(color_rgb[2] * 255))
        else:
            leds[index * 7 + seg] = (0, 0, 0)


pin = Pin(5, Pin.OUT)
np = NeoPixel(pin, 28)

np.fill((0, 0, 0, 0))
np.write()

adc = ADC(0)

f = open('config.json')
config = ujson.load(f)
f.close()

if "blynk_server" in config and "blynk_port" in config:
    blynk = blynklib.Blynk(config["blynk_key"],
                           server=config["blynk_server"],
                           port=int(config["blynk_port"]),
                           log=print)
else:
    blynk = blynklib.Blynk(config["blynk_key"], log=print)


@blynk.handle_event("connect")
def connect_handler():
    blynk.internal("rtc", "sync")
    print("sent rtc sync request to blynk server")
Пример #37
0
def experiment(model_name, df, indices, ratio, round, divide_fun, n_jobs,
               res_dir, compute_conf_score):
    '''
    Single round of the experiment for a determined ratio.
    The model is instantied, trained, tested against a dataset
    and results are stored
    :param model_name: The name of the model
    :param df: pandas dataframe containing the feature vector for all the samples
    :param indices: store indices for four sets of packed_benign, unpacked_benign, packed_malicious, unpacked_malicious
    :param ratio: Tuples of ratio of benign / malicious packed
    :param round: counter of rounds
    :param divide_fun: function to divide the dataset, defined in config
    :rtype Dictionary
    '''
    id = '{}-{}-{}'.format(ratio[0], ratio[1], round)
    dprint('Entered experiment:', id)
    ratio_ben, ratio_mal = ratio

    # split between test, train
    training_packed_benign, testing_packed_benign, training_unpacked_benign, testing_unpacked_benign, training_packed_malicious, testing_packed_malicious, training_unpacked_malicious, testing_unpacked_malicious = divide_fun(
        indices, ratio_ben, ratio_mal, NUMPY_SEED + round)
    # training_packed_benign, testing_packed_benign, training_unpacked_benign, testing_unpacked_benign, training_packed_malicious, testing_packed_malicious, training_unpacked_malicious, testing_unpacked_malicious = divide_fun(df, ratio_ben, ratio_mal, NUMPY_SEED+round)

    # dprint('dividing dataset')
    train_indices = training_packed_malicious + training_packed_benign + training_unpacked_malicious + training_unpacked_benign
    test_indices = testing_packed_malicious + testing_packed_benign + testing_unpacked_malicious + testing_unpacked_benign

    verify_test_train_separated(train_indices, test_indices)

    # it means, to scale up, we need to work only on good features.
    # We get them from the RF classifier
    good_features = None
    if model_name == 'svc' or model_name == 'lsvm':
        with open(
                '{}/features-{}-{}.json'.format(
                    res_dir.replace(model_name, "rf"), ratio_ben, ratio_mal),
                'r') as f:
            rf_res = json.load(f)
            rf_features = rf_res['features']
            rf_weights = rf_res['weights']
            num = 10000
            good_features = [
                f for _, f in sorted(zip(rf_weights, rf_features),
                                     reverse=True)[:num]
            ]
            print("Only top {} features from RF considered for trainning SVM".
                  format(num))
            # rf_weights = [w for w, _ in sorted(zip(rf_weights, rf_features), reverse=True)[:num]]
        df = df[good_features + [c for c in drop_columns if c in df.columns]]
        # df = normalize(model_name, df)
    x_train = df[df.index.isin(train_indices)]
    dprint('done with dividing')
    # labels are being malicious or benign
    y_train = np.asarray(x_train['malicious'].values)
    # remove labels related to packing and type of binary
    x_train = x_train.drop(columns=drop_columns, axis=1, errors='ignore')

    # train model on training set
    model = get_model(model_name, n_jobs)
    dprint('Doing training', id)
    dprint("training size: {}".format(len(x_train)))
    model.fit(x_train, y_train)

    # importance_result = None
    if round == 0:
        weights = get_features_importances(model_name, model)
        if weights is not None:
            importances = (json.dumps(list(x_train.columns)),
                           json.dumps(weights))
            dprint('Got importances', id)

            with open(
                    '{}/features-{}-{}.json'.format(res_dir, ratio_ben,
                                                    ratio_mal), 'w') as f:
                json.dump(
                    {
                        "weights": weights,
                        "features": list(x_train.columns)
                    }, f)
        else:
            importances = (json.dumps([]), json.dumps([]))
        joblib.dump(
            model, '{}/model-{}-{}.joblib'.format(res_dir, ratio_ben,
                                                  ratio_mal))
    else:
        importances = None

    # temporarily store the size of the sets used
    stats = {
        'ratio_ben': ratio_ben * 100,
        'ratio_mal': ratio_mal * 100,
        'training_packed_malicious': len(training_packed_malicious),
        'training_unpacked_benign': len(training_unpacked_benign),
        'training_packed_benign': len(training_packed_benign),
        'training_unpacked_malicious': len(training_unpacked_malicious),
        'testing_unpacked_malicious': len(testing_unpacked_malicious),
        'testing_packed_benign': len(testing_packed_benign),
        'testing_unpacked_benign': len(testing_unpacked_benign),
        'testing_packed_malicious': len(testing_packed_malicious)
    }
    dprint(stats)

    # evaluating on a dataset with same ratio as training dataset
    # print("evaluating on the test dataset with the same ratio as the training dataset")
    packed_test = df[df.index.isin(testing_packed_benign +
                                   testing_packed_malicious)]
    unpacked_test = df[df.index.isin(testing_unpacked_benign +
                                     testing_unpacked_malicious)]
    test = (packed_test, unpacked_test)

    if round == 0 and compute_conf_score:
        results, conf = evaluate(model_name,
                                 model,
                                 test,
                                 stats,
                                 do_conf_score=True)
    else:
        results, conf = evaluate(model_name,
                                 model,
                                 test,
                                 stats,
                                 do_conf_score=False)

    dprint('Done evaluating, returning:', id)
    return {
        'results': results,
        'confidence': conf,
        'importances': importances,
        'model': model
    }
Пример #38
0
def test_sber_onfly(config):
    print('Loading emb matrices')
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.bpe_emb_file, "r") as fh:
        bpe_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.pos_emb_file, "r") as fh:
        pos_mat = np.array(json.load(fh), dtype=np.float32)

    if config.use_bpe and config.use_bpe_pretrained_codes:
        bpe_model = BPE(open(config.bpe_pretrained_codes_file, 'r'))
    elif config.use_bpe and not config.use_bpe_pretrained_codes:
        bpe_model = BPE(open(config.bpe_codes_file, 'r'))
    else:
        bpe_model = None

    word2idx_dict = pickle.load(open(config.word2idx_dict_file, 'rb'))
    char2idx_dict = pickle.load(open(config.char2idx_dict_file, 'rb'))
    bpe2idx_dict = pickle.load(open(config.bpe2idx_dict_file, 'rb'))
    pos2idx_dict = pickle.load(open(config.pos2idx_dict_file, 'rb'))

    print("Loading model...")
    model = Model(config,
                  None,
                  word_mat,
                  char_mat,
                  bpe_mat,
                  pos_mat,
                  trainable=False,
                  use_tfdata=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True
    sess = tf.Session(config=sess_config)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    if config.model_name == 'latest':
        checkpoint = tf.train.latest_checkpoint(config.save_dir)
    else:
        checkpoint = os.path.join(config.save_dir, config.model_name)
    print('Restoring from: {}'.format(checkpoint))
    saver.restore(sess, checkpoint)
    sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))

    for datafile, datatype in zip(
        [config.sber_public_file, config.sber_private_file],
        ['public', 'private']):

        datafile_squad = os.path.join(config.target_dir,
                                      "{}.json_squad".format(datatype))
        sber2squad(datafile, outfile=datafile_squad)
        data_examples, data_eval = process_file(
            config,
            datafile_squad,
            datatype,
            remove_unicode=config.remove_unicode,
            bpe_model=bpe_model,
            is_test=True)

        data_features, data_meta = build_features_notfdata(config,
                                                           data_examples,
                                                           datatype,
                                                           word2idx_dict,
                                                           char2idx_dict,
                                                           bpe2idx_dict,
                                                           pos2idx_dict,
                                                           is_test=True)

        total = data_meta["total"]

        answer_dict = {}
        remapped_dict = {}

        print(len(data_features))
        # hotfix добить длину data_examples до делителя config.batch_size
        while len(data_features) % config.batch_size != 0:
            data_features.append(data_features[-1])

        print(len(data_features))

        for step in tqdm(range(total // config.batch_size + 1)):

            def get_batch():
                batch_items = data_features[step *
                                            config.batch_size:(step + 1) *
                                            config.batch_size]
                batch = dict()
                for key in batch_items[0].keys():
                    batch[key] = np.stack([el[key] for el in batch_items])
                return batch

            batch = get_batch()

            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2],
                feed_dict={
                    model.c_ph: batch['context_idxs'],
                    model.q_ph: batch['ques_idxs'],
                    model.ch_ph: batch['context_char_idxs'],
                    model.qh_ph: batch['ques_char_idxs'],
                    model.cb_ph: batch['context_bpe_idxs'],
                    model.qb_ph: batch['ques_bpe_idxs'],
                    model.cp_ph: batch['context_pos_idxs'],
                    model.qp_ph: batch['ques_pos_idxs'],
                    model.y1_ph: batch['y1'],
                    model.y2_ph: batch['y2'],
                    model.qa_id: batch['id'],
                })

            answer_dict_, remapped_dict_ = convert_tokens(
                data_eval, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)

        path_to_save_answer = os.path.join(
            config.answer_dir, '{}.json_squad_ans'.format(datatype))
        with open(path_to_save_answer, "w") as fh:
            json.dump(remapped_dict, fh)

        sber_ans = '.'.join(path_to_save_answer.split('.')[0:-1]) + '.json_ans'
        squad_answer2sber(datafile, path_to_save_answer, outfile=sber_ans)

        print("Answer dumped: {}".format(path_to_save_answer))

    # evaluating
    # TODO: CHANGE TO ENG URL
    url = 'http://api.aibotbench.com/rusquad/qas'
    headers = {'Content-Type': 'application/json', 'Accept': 'text/plain'}
    metrics = dict()
    f1, em = 0.0, 0.0
    for datatype in ['public', 'private']:
        sber_ans = open(
            os.path.join(config.answer_dir, '{}.json_ans'.format(datatype)),
            'r').readline()
        res = requests.post(url, data=sber_ans, headers=headers)
        metrics[datatype] = eval(json.loads(res.text))
        f1 += metrics[datatype]['f1']
        em += metrics[datatype]['exact_match']
        print('{}: EM: {:.5f} F-1: {:.5f}'.format(
            datatype, metrics[datatype]['exact_match'],
            metrics[datatype]['f1']))
    print('EM avg: {:.5f} F-1 avg: {:.5f}'.format(em / 2, f1 / 2))
Пример #39
0
 def run_lighthouse_test(self, task):
     """Run a lighthouse test against the current browser session"""
     task['lighthouse_log'] = ''
     if 'url' in self.job and self.job['url'] is not None:
         self.job['shaper'].configure(self.job)
         output_path = os.path.join(task['dir'], 'lighthouse.json')
         json_file = os.path.join(task['dir'], 'lighthouse.report.json')
         json_gzip = os.path.join(task['dir'], 'lighthouse.json.gz')
         html_file = os.path.join(task['dir'], 'lighthouse.report.html')
         html_gzip = os.path.join(task['dir'], 'lighthouse.html.gz')
         time_limit = min(int(task['time_limit']), 80)
         command = [
             'lighthouse', '--disable-network-throttling',
             '--disable-cpu-throttling', '--enable-error-reporting',
             '--max-wait-for-load',
             str(int(time_limit * 1000)), '--port',
             str(task['port']), '--output', 'html', '--output', 'json',
             '--output-path', '"{0}"'.format(output_path)
         ]
         if self.job['keep_lighthouse_trace']:
             command.append('--save-assets')
         if self.options.android or 'mobile' not in self.job or not self.job[
                 'mobile']:
             command.append('--disable-device-emulation')
         command.append('"{0}"'.format(self.job['url']))
         cmd = ' '.join(command)
         self.lighthouse_command = cmd
         # Give lighthouse up to 10 minutes to run all of the audits
         try:
             lh_thread = threading.Thread(target=self.lighthouse_thread)
             lh_thread.start()
             lh_thread.join(600)
         except Exception:
             pass
         from .os_util import kill_all
         kill_all('node', True)
         self.job['shaper'].reset()
         # Rename and compress the trace file, delete the other assets
         if self.job['keep_lighthouse_trace']:
             try:
                 lh_trace_src = os.path.join(task['dir'],
                                             'lighthouse-0.trace.json')
                 if os.path.isfile(lh_trace_src):
                     # read the JSON in and re-write it line by line to match the other traces
                     with open(lh_trace_src, 'rb') as f_in:
                         trace = json.load(f_in)
                         if trace is not None and 'traceEvents' in trace:
                             lighthouse_trace = os.path.join(
                                 task['dir'], 'lighthouse_trace.json.gz')
                         with gzip.open(lighthouse_trace, 'wb', 7) as f_out:
                             f_out.write('{"traceEvents":[{}')
                             for trace_event in trace['traceEvents']:
                                 f_out.write(",\n")
                                 f_out.write(json.dumps(trace_event))
                             f_out.write("\n]}")
             except Exception:
                 pass
         # Delete all the left-over lighthouse assets
         files = glob.glob(os.path.join(task['dir'], 'lighthouse-*'))
         for file_path in files:
             try:
                 os.remove(file_path)
             except Exception:
                 pass
         if os.path.isfile(json_file):
             # Remove the raw screenshots if they were stored with the file
             lh_report = None
             with open(json_file, 'rb') as f_in:
                 lh_report = json.load(f_in)
             if lh_report is not None and 'audits' in lh_report and \
                     'screenshots' in lh_report['audits']:
                 del lh_report['audits']['screenshots']
                 with gzip.open(json_gzip, 'wb', 7) as f_out:
                     json.dump(lh_report, f_out)
             else:
                 with open(json_file, 'rb') as f_in:
                     with gzip.open(json_gzip, 'wb', 7) as f_out:
                         shutil.copyfileobj(f_in, f_out)
             try:
                 os.remove(json_file)
             except Exception:
                 pass
         if os.path.isfile(html_file):
             # Remove the raw screenshots if they were stored with the file
             with open(html_file, 'rb') as f_in:
                 lh_report = f_in.read()
                 start = lh_report.find('\n    &quot;screenshots')
                 if start >= 0:
                     end = lh_report.find('\n    },', start)
                     if end >= 0:
                         lh_report = lh_report[:start] + lh_report[end + 7:]
                 with gzip.open(html_gzip, 'wb', 7) as f_out:
                     f_out.write(lh_report)
             try:
                 os.remove(html_file)
             except Exception:
                 pass
Пример #40
0
def main():
    gl_start = time.time()
    multiprocessing.set_start_method("spawn")
    args = get_arg_parser().parse_args()
    print(ujson.dumps(vars(args), indent=4))
    random.seed(args.seed)

    args.out_data_dir = os.path.join(args.out_dir, args.subfolder)

    if os.path.exists(args.out_data_dir):
        print(f"Removing {args.out_data_dir}")
        shutil.rmtree(args.out_data_dir)
    os.makedirs(args.out_data_dir)

    # Final step is to format data for the views in _magpie
    #==============================================
    # DUMP RESULTS
    #==============================================

    mention_dump_dir = os.path.join(
        args.out_dir,
        f"_saved_mention_extractor_{os.path.splitext(os.path.basename(args.alias2cands))[0]}"
    )
    print(f"Loading qid2title from {args.qid2title}")
    with open(args.qid2title) as in_f:
        qid2title = ujson.load(in_f)
    if not os.path.exists(mention_dump_dir) or args.overwrite:
        os.makedirs(mention_dump_dir, exist_ok=True)
        print(f"Building mention extractor for {mention_dump_dir}")
        mention_extractor = MentionExtractor(max_alias_len=5,
                                             max_candidates=27,
                                             alias2qids=args.alias2cands,
                                             qid2title=qid2title)
        mention_extractor.dump(mention_dump_dir)
    mention_extractor = MentionExtractor.load(mention_dump_dir)

    print(f"Loading qid2desc from {args.qid2desc}")
    with open(args.qid2desc) as in_f:
        qid2desc = ujson.load(in_f)
    # Loading up sentences

    print(f"Loading data from {args.data_dir}...")
    files = glob.glob(f"{args.data_dir}/*.jsonl")
    if len(files) <= 0:
        print(f"Didn't find any files at {args.data_dir}")
        return

    print(f"Found {len(files)} files")
    all_sentences = []
    for f in files:
        with open(f) as in_f:
            for line in in_f:
                doc = ujson.loads(line)
                for sent in doc["sentences"]:
                    sent["doc_qid"] = doc["qid"]
                    sent["doc_title"] = doc["title"]
                    new_aliases, new_spans, new_qids = [], [], []
                    for i in range(len(sent["aliases"])):
                        if sent["label_type"][
                                i] != "Pronoun" and mention_extractor.does_alias_exist(
                                    sent["aliases"][i]):
                            new_aliases.append(sent["aliases"][i])
                            new_spans.append(sent["spans"][i])
                            new_qids.append(sent["qids"][i])
                    if len(new_aliases) > 0:
                        sent["aliases"] = new_aliases
                        sent["qids"] = new_qids
                        sent["spans"] = new_spans
                        all_sentences.append(sent)
    print(f"Extracted {len(all_sentences)} sentences")
    dump_data(args, mention_dump_dir, qid2title, qid2desc, all_sentences)

    print(
        f"Finished in {time.time()-gl_start}s. Data saved in {os.path.join(args.out_data_dir, '04_trials_gold.js')}"
    )
Пример #41
0
        #if (len(current_actor_role_synonyms) > 95):
        #    print(len(current_actor_role_synonyms))

        text_to_json = ujson.dumps({
            "value": current_actor_name,
            "synonyms": current_actor_role_synonyms
        })  # Changing the text into json

        actor_identities['items'].append(
            ujson.decode(text_to_json))  # Append the synonyms to the list

        pbar.update(progress_iterator + 1)  # Display incremented progress
        progress_iterator += 1  # Iterate the progress bar for next iteration

    pbar.finish()  #Once we've complete the scraping, end the progress bar.
    return actor_identities['items']


if __name__ == '__main__':
    with open('popular_people.json') as data_file:
        actor_json_data = ujson.load(data_file)  # Load actor data in

    formatted_json = format_json(
        actor_json_data)  # Where the majority of the magic happens
    wrapped_json = ujson.decode(
        "[{\"entries\":" + ujson.encode(formatted_json) +
        ", \"name\": \"actors\"}]"
    )  # Wrapping the JSON with dialogflow's preferred formatting
    write_json_to_disk(wrapped_json)
Пример #42
0
def read_room_data(data_dir: str) -> List[ZerverFieldsT]:
    fn = 'rooms.json'
    data_file = os.path.join(data_dir, fn)
    with open(data_file) as f:
        data = ujson.load(f)
    return data
Пример #43
0
def load_cand_map(entity_mapping_dir, alias_map_file):
    return ujson.load(open(os.path.join(entity_mapping_dir, alias_map_file)))
Пример #44
0
            headers = {"Authorization": self.config["dbl_token"]}
            url = "https://top.gg/api/bots/%d/stats" % self.user.id
            async with self.session.post(url, json=payload,
                                         headers=headers) as resp:  # nopep8
                try:
                    data = await resp.json()
                    log.info("Recieved %s %s %d %s", resp.method, resp._url,
                             resp.status, data)
                except (TypeError, ValueError):
                    log.info("Recieved %s %s %d", resp.method, resp._url,
                             resp.status)

    async def close(self):
        log.debug("close() got called, cleaning up tasks")
        try:
            await self.session.close()
        except (RuntimeError, AttributeError):
            pass

        await super().close()


if __name__ == "__main__":
    with open("config.json") as file:
        configuration = json.load(file)
    botsaber = botsaber(config=configuration)
    if configuration["debug_mode"] is True:
        botsaber.run(configuration["dev_token"])
    else:
        botsaber.run(configuration["bot_token"])
Пример #45
0
def load_title_map(entity_mapping_dir):
    return ujson.load(open(os.path.join(entity_mapping_dir, 'qid2title.json')))
Пример #46
0
    def load(self, filename=None):
        """Load file

        Parameters
        ----------
        filename : str, optional
            File path
            Default value filename given to class constructor

        Raises
        ------
        ImportError:
            Error if file format specific module cannot be imported
        IOError:
            File does not exists or has unknown file format

        Returns
        -------
        self

        """

        if filename:
            self.filename = filename
            self.format = self.detect_file_format(self.filename)

        dict.clear(self)
        if self.exists():

            if self.format == 'yaml':
                try:
                    import yaml
                except ImportError:
                    message = '{name}: Unable to import YAML module.'.format(
                        name=self.__class__.__name__)
                    self.logger.exception(message)
                    raise ImportError(message)

                try:
                    with open(self.filename, 'r') as infile:
                        dict.update(self, yaml.load(infile))

                except yaml.YAMLError as exc:
                    self.logger.error("Error while parsing YAML file [%s]" %
                                      self.filename)
                    if hasattr(exc, 'problem_mark'):
                        if exc.context is not None:
                            self.logger.error(
                                str(exc.problem_mark) + '\n  ' +
                                str(exc.problem) + ' ' + str(exc.context))
                            self.logger.error(
                                '  Please correct data and retry.')
                        else:
                            self.logger.error(
                                str(exc.problem_mark) + '\n  ' +
                                str(exc.problem))
                            self.logger.error(
                                '  Please correct data and retry.')
                    else:
                        self.logger.error(
                            "Something went wrong while parsing yaml file [%s]"
                            % self.filename)
                    return

            elif self.format == 'cpickle':
                try:
                    import cPickle as pickle
                except ImportError:
                    try:
                        import pickle
                    except ImportError:
                        message = '{name}: Unable to import pickle module.'.format(
                            name=self.__class__.__name__)
                        self.logger.exception(message)
                        raise ImportError(message)

                dict.update(self, pickle.load(open(self.filename, "rb")))

            elif self.format == 'marshal':
                try:
                    import marshal
                except ImportError:
                    message = '{name}: Unable to import marshal module.'.format(
                        name=self.__class__.__name__)
                    self.logger.exception(message)
                    raise ImportError(message)

                dict.update(self, marshal.load(open(self.filename, "rb")))

            elif self.format == 'msgpack':
                try:
                    import msgpack
                except ImportError:
                    message = '{name}: Unable to import msgpack module.'.format(
                        name=self.__class__.__name__)
                    self.logger.exception(message)
                    raise ImportError(message)

                dict.update(self, msgpack.load(open(self.filename, "rb")))

            elif self.format == 'json':
                try:
                    import ujson as json
                except ImportError:
                    try:
                        import json
                    except ImportError:
                        message = '{name}: Unable to import json module.'.format(
                            name=self.__class__.__name__)
                        self.logger.exception(message)
                        raise ImportError(message)

                dict.update(self, json.load(open(self.filename, "r")))

            elif self.format == 'txt':
                with open(self.filename, 'r') as f:
                    lines = f.readlines()
                    dict.update(self, dict(zip(range(0, len(lines)), lines)))

            else:
                message = '{name}: Unknown format [{format}]'.format(
                    name=self.__class__.__name__, format=self.filename)
                self.logger.exception(message)
                raise IOError(message)
        else:
            message = '{name}: File does not exists [{file}]'.format(
                name=self.__class__.__name__, file=self.filename)
            self.logger.exception(message)
            raise IOError(message)

        return self
Пример #47
0
 async def ytdl_callback(c_q: CallbackQuery):
     choosen_btn = c_q.matches[0].group(1)
     data_key = c_q.matches[0].group(2)
     page = c_q.matches[0].group(3)
     if os.path.exists(PATH):
         with open(PATH) as f:
             view_data = ujson.load(f)
         search_data = view_data.get(data_key)
         total = len(search_data)
     else:
         return await c_q.answer(
             "Search data doesn't exists anymore, please perform search again ...",
             show_alert=True,
         )
     if choosen_btn == "back":
         index = int(page) - 1
         del_back = index == 1
         await c_q.answer()
         back_vid = search_data.get(str(index))
         await c_q.edit_message_media(
             media=(
                 InputMediaPhoto(
                     media=back_vid.get("thumb"),
                     caption=back_vid.get("message"),
                 )
             ),
             reply_markup=yt_search_btns(
                 del_back=del_back,
                 data_key=data_key,
                 page=index,
                 vid=back_vid.get("video_id"),
                 total=total,
             ),
         )
     elif choosen_btn == "next":
         index = int(page) + 1
         if index > total:
             return await c_q.answer("That's All Folks !", show_alert=True)
         await c_q.answer()
         front_vid = search_data.get(str(index))
         await c_q.edit_message_media(
             media=(
                 InputMediaPhoto(
                     media=front_vid.get("thumb"),
                     caption=front_vid.get("message"),
                 )
             ),
             reply_markup=yt_search_btns(
                 data_key=data_key,
                 page=index,
                 vid=front_vid.get("video_id"),
                 total=total,
             ),
         )
     elif choosen_btn == "listall":
         await c_q.answer("View Changed to:  📜  List", show_alert=False)
         list_res = ""
         for vid_s in search_data:
             list_res += search_data.get(vid_s).get("list_view")
         telegraph = post_to_telegraph(
             a_title=f"Showing {total} youtube video results for the given query ...",
             content=list_res,
         )
         await c_q.edit_message_media(
             media=(
                 InputMediaPhoto(
                     media=search_data.get("1").get("thumb"),
                 )
             ),
             reply_markup=InlineKeyboardMarkup(
                 [
                     [
                         InlineKeyboardButton(
                             "↗️  Click To Open",
                             url=telegraph,
                         )
                     ],
                     [
                         InlineKeyboardButton(
                             "📰  Detailed View",
                             callback_data=f"ytdl_detail_{data_key}_{page}",
                         )
                     ],
                 ]
             ),
         )
     else:  # Detailed
         index = 1
         await c_q.answer("View Changed to:  📰  Detailed", show_alert=False)
         first = search_data.get(str(index))
         await c_q.edit_message_media(
             media=(
                 InputMediaPhoto(
                     media=first.get("thumb"),
                     caption=first.get("message"),
                 )
             ),
             reply_markup=yt_search_btns(
                 del_back=True,
                 data_key=data_key,
                 page=index,
                 vid=first.get("video_id"),
                 total=total,
             ),
         )
Пример #48
0
 def test_loadFile(self):
     f = six.StringIO("[1,2,3,4]")
     self.assertEqual([1, 2, 3, 4], ujson.load(f))
import ujson as json
import sys

log_dir = sys.argv[1]

if !log_dir:
	print('Please input log file')
	exit()

with open('spk_info.json') as f:
	spk = json.load(f)

with open(log_dir) as f:
	score = f.readlines()

g_dict = {'FF':[0, 0], 'FM':[0,0], 'MM':[0,0]} 

for i in range(3000):
	s1, s2 = score[2*i].split('_')[0:3:2]
	s1 = spk[s1[:3]]
	s2 = spk[s2[:3]]
	sdr = sum(map(float,score[2*i+1][1:-2].split()))/2

	if s1 == 'F' and s2 == 'F':
		g_dict['FF'][0] += sdr
		g_dict['FF'][1] += 1
	elif s1 == 'M' and s2 == 'M':
		g_dict['MM'][0] += sdr
		g_dict['MM'][1] += 1
	else:
		g_dict['FM'][0] += sdr
Пример #50
0
def load(filename):
    print(f'Opening {filename}')
    with open(filename, "r") as fh:
        return json.load(fh)
Пример #51
0
import lavalink
import config
import ujson
import logging

log = logging.getLogger()

time_rx = re.compile('[0-9]+')

# Languages
languages = ["english", "weeb", "tsundere"]
lang = {}

for l in languages:
    with open("lang/%s.json" % l) as f:
        lang[l] = ujson.load(f)


def getlang(la: str):
    return lang.get(la, None)


class Audio:
    def __init__(self, bot):
        self.bot = bot

        if not hasattr(bot, 'lavalink'):
            lavalink.Client(bot=bot,
                            host="0.0.0.0",
                            ws_port=3232,
                            password=config.lavalink['password'],
Пример #52
0
def process(json_file, outpur_dir, exclude_titles=None, include_titles=None):
    """
    :param json_file: original data in json format
    :param outpur_dir: the output directory of pre-processed data
    :param exclude_titles: article titles to exclude
    :param include_titles: article titles to include
    """
    para_file = "{}/paras".format(outpur_dir)
    question_file = "{}/questions".format(outpur_dir)
    sent_file = "{}/sents".format(outpur_dir)
    answer_file = "{}/answers".format(outpur_dir)
    print("Generating {} raw data...".format(json_file))
    max_sent, max_sent_len, max_que_len, max_ans_len = 0, 0, 0, 0
    with open(json_file, "r") as fh, corenlp.CoreNLPClient(
            annotators="tokenize ssplit pos ner".split(),
            endpoint="http://localhost:9099",
            timeout=50000) as client:
        source = json.load(fh)
        for article in tqdm(source["data"]):
            title = article["title"]
            if include_titles and title not in include_titles:
                continue
            if exclude_titles and title in exclude_titles:
                continue
            for para in article["paragraphs"]:
                paragraphs, questions, answers, sents, ids = [], [], [], [], []
                paragraphs_pos, questions_pos, answers_pos, sents_pos = [], [], [], []
                paragraphs_ner, questions_ner, answers_ner, sents_ner = [], [], [], []
                answers_index, sents_index = [], []
                # paragraph
                context = para["context"]
                if not context.strip():
                    continue
                ann_para = client.annotate(context)
                max_sent = max(max_sent, len(ann_para.sentence))
                max_sent_len = max(
                    max_sent_len,
                    max(map(lambda x: len(x.token), ann_para.sentence)))
                ann_para_tokens, paragraph_tokens, paragraph_pos, paragraph_ner = [], [], [], []
                for sent in ann_para.sentence:
                    for token in sent.token:
                        ann_para_tokens.append(token)
                        paragraph_tokens.append(token.word)
                        paragraph_pos.append(token.pos)
                        paragraph_ner.append(token.ner)

                # questions
                for qa in para["qas"]:
                    # question
                    ques = qa["question"]
                    id = qa["id"]
                    if not ques.strip():
                        continue
                    ann_que = client.annotate(ques)
                    max_que_len = max(max_que_len,
                                      len(ann_que.sentence[0].token))
                    question_tokens, question_pos, question_ner = [], [], []
                    for sent in ann_que.sentence:
                        for token in sent.token:
                            question_tokens.append(token.word)
                            question_pos.append(token.pos)
                            question_ner.append(token.ner)

                    # answer
                    all_answer_tokens, all_answer_pos, all_answer_ner, all_answer_index = [], [], [], []
                    all_sent_tokens, all_sent_pos, all_sent_ner, all_sent_index = [], [], [], []
                    for answer in qa["answers"]:
                        answer_text = answer["text"]
                        if not answer_text.strip():
                            continue
                        ann_ans = client.annotate(answer_text)
                        answer_tokens, answer_pos, answer_ner = [], [], []
                        for sent in ann_ans.sentence:
                            for token in sent.token:
                                answer_tokens.append(token.word)
                                answer_pos.append(token.pos)
                                answer_ner.append(token.ner)
                        all_answer_tokens.append(' '.join(answer_tokens))
                        all_answer_pos.append(' '.join(answer_pos))
                        all_answer_ner.append(' '.join(answer_ner))

                        answer_start = answer['answer_start']
                        answer_end = answer_start + len(answer_text)
                        # sentence
                        sentence = []
                        for sent in ann_para.sentence:
                            if sent.characterOffsetBegin <= answer_start <= sent.characterOffsetEnd or \
                                    sent.characterOffsetBegin <= answer_end <= sent.characterOffsetEnd:
                                sentence.append(sent)
                        sentence = [
                            token for sent in sentence for token in sent.token
                        ]
                        sentence_tokens = [token.word for token in sentence]
                        sentence_pos = [token.pos for token in sentence]
                        sentence_ner = [token.ner for token in sentence]
                        all_sent_tokens.append(' '.join(sentence_tokens))
                        all_sent_pos.append(' '.join(sentence_pos))
                        all_sent_ner.append(' '.join(sentence_ner))

                        # sentence index
                        y1_sent = sentence[0].tokenBeginIndex
                        y2_sent = sentence[-1].tokenBeginIndex
                        # answer index
                        y1_ans = None
                        for i, token in enumerate(sentence):
                            if token.beginChar - 1 <= answer_start <= token.endChar:
                                y1_ans = sentence[0].tokenBeginIndex + i
                        try:
                            assert y1_ans != None
                        except:
                            continue
                        y2_ans = y1_ans + len(answer_tokens) - 1
                        all_answer_index.append("{},{}".format(y1_ans, y2_ans))
                        all_sent_index.append("{},{}".format(y1_sent, y2_sent))

                    paragraphs.append(' '.join(paragraph_tokens))
                    paragraphs_pos.append(' '.join(paragraph_pos))
                    paragraphs_ner.append(' '.join(paragraph_ner))
                    questions.append(' '.join(question_tokens))
                    questions_pos.append(' '.join(question_pos))
                    questions_ner.append(' '.join(question_ner))
                    answers.append('\t'.join(all_answer_tokens))
                    answers_pos.append('\t'.join(all_answer_pos))
                    answers_ner.append('\t'.join(all_answer_ner))
                    answers_index.append('\t'.join(all_answer_index))
                    sents.append('\t'.join(all_sent_tokens))
                    sents_pos.append('\t'.join(all_sent_pos))
                    sents_ner.append('\t'.join(all_sent_ner))
                    sents_index.append('\t'.join(all_sent_index))
                    ids.append(id)

                # save para
                with open("{}.tok".format(para_file), 'a') as f:
                    f.write('\n'.join(paragraphs) + '\n')
                with open("{}.pos".format(para_file), 'a') as f:
                    f.write('\n'.join(paragraphs_pos) + '\n')
                with open("{}.ner".format(para_file), 'a') as f:
                    f.write('\n'.join(paragraphs_ner) + '\n')
                with open("{}.id".format(para_file), 'a') as f:
                    f.write('\n'.join(ids) + '\n')
                # save question
                with open("{}.tok".format(question_file), 'a') as f:
                    f.write('\n'.join(questions) + '\n')
                with open("{}.pos".format(question_file), 'a') as f:
                    f.write('\n'.join(questions_pos) + '\n')
                with open("{}.ner".format(question_file), 'a') as f:
                    f.write('\n'.join(questions_ner) + '\n')

                # save answer
                with open("{}.tok".format(answer_file), 'a') as f:
                    f.write('\n'.join(answers) + '\n')
                with open("{}.pos".format(answer_file), 'a') as f:
                    f.write('\n'.join(answers_pos) + '\n')
                with open("{}.ner".format(answer_file), 'a') as f:
                    f.write('\n'.join(answers_ner) + '\n')
                with open("{}.index".format(answer_file), 'a') as f:
                    f.write("\n".join(answers_index) + '\n')

                # save sent
                with open("{}.tok".format(sent_file), 'a') as f:
                    f.write('\n'.join(sents) + '\n')
                with open("{}.pos".format(sent_file), 'a') as f:
                    f.write('\n'.join(sents_pos) + '\n')
                with open("{}.ner".format(sent_file), 'a') as f:
                    f.write('\n'.join(sents_ner) + '\n')
                with open("{}.index".format(sent_file), 'a') as f:
                    f.write("\n".join(sents_index) + '\n')
    # get BIO labels
    label(para_file, answer_file)
Пример #53
0
def run():
    global _color_id

    PROTECTED_FILES = ["/main.py", "/boot.py", "/_boot.py"]

    class FileServerError(Exception):
        pass

    def setup_fallback_ap():
        unique_id = ubinascii.hexlify(machine.unique_id()).upper().decode()
        interfaces.ap.active(True)
        interfaces.ap.config(
            essid="Kyanit {}".format(unique_id),
            password="******",
            authmode=network.AUTH_WPA_WPA2_PSK,
        )

    async def leds_ap_mode(neop):
        # when fallback AP is active
        trigger = False
        while True:
            trigger = not trigger
            for idx in range(3):
                neop[idx] = (0, 0, 64) if idx == 0 and trigger else (0, 0, 0)
            neop.write()
            await runner.sleep_ms(250)

    async def check_wlan_connection():
        global _color_id

        while True:
            await runner.sleep(30)
            if not interfaces.wlan.isconnected():
                _color_id = "BBB"
            elif _color_id == "BBB":
                _color_id = colorid.from_number(
                    int(
                        ure.search("\d+$", interfaces.wlan.ifconfig()[0]).group(0)
                    )  # noqa
                )

    def action_file_list(*args):
        return httpsrv.response(
            200,
            ujson.dumps(
                [
                    path
                    for path in uos.listdir("/")
                    if "\x00" not in path  # ignore garbage files
                    and uos.stat(path)[0] == 32768  # noqa
                    and path not in PROTECTED_FILES  # noqa
                ]
            ),
            httpsrv.CT_JSON,
        )

    def action_files(method, loc, params, headers, conn, addr):
        if "/" in loc[7:]:  # only files in root dir are allowed
            raise FileServerError("not on root")

        file_name = loc[6:]

        if file_name in PROTECTED_FILES:
            raise FileServerError("restricted")

        try:
            stat = uos.stat(file_name)
        except OSError:
            if method == "GET" or method == "DELETE" or "rename" in params:
                return httpsrv.response(404, '"File Not Found"', httpsrv.CT_JSON)
        else:
            if stat[0] != 32768:
                raise FileServerError("restricted")

        if method == "DELETE":
            uos.remove(file_name)
            return httpsrv.response(200, '"OK"', httpsrv.CT_JSON)

        if method == "GET":
            with open(file_name, "rb") as file:
                # read from file, send to conn
                httpsrv.send_response(
                    conn, **(httpsrv.response(200, content_type=httpsrv.CT_PLAIN))
                )
                httpsrv.readall_from(file, into=conn)
            return None  # response already assembled above

        elif method == "PUT":
            if "rename" in params:
                uos.rename(file_name, params["rename"])
                return httpsrv.response(200, '"OK"', httpsrv.CT_JSON)

            with open(file_name, "wb") as file:
                # write to file, receive from conn
                httpsrv.readall_from(conn, into=file)
            return httpsrv.response(200, '"OK"', httpsrv.CT_JSON)

    async def reboot():
        await runner.sleep(.1)
        print("KYANIT Hard Reset!")
        machine.reset()

    def action_reboot(method, loc, params, headers, conn, addr):
        runner.stop(exc=RebootError)
        runner.get_event_loop().create_task(reboot())
        return httpsrv.response(200, '"OK"', httpsrv.CT_JSON)

    def action_state(method, loc, params, headers, conn, addr):
        return httpsrv.response(
            200,
            ujson.dumps(
                {
                    "unique_id": ubinascii.hexlify(
                        machine.unique_id()
                    ).decode().upper(),
                    "micropython_version": uos.uname().version[
                        1:uos.uname().version.index(" ")
                    ],
                    "firmware_version": __version__,
                    "color_id": _color_id,
                    "free_memory": gc.mem_free(),
                    "free_flash": uos.statvfs("/")[0] * uos.statvfs("/")[3],
                    "run_state": [
                        "ERROR {}".format(runner.get_error()[0])
                        if runner.get_error() is not None
                        else "",
                        "STOPPED",
                        "CODE.PY MISSING",
                        "CODE.PY IMPORTED",
                        "CODE.PY MAIN",
                    ][runner.get_state()],
                    "error_traceback": [
                        line.strip()
                        for line in runner.get_error()[1].split("\n")
                        if line and "Traceback" not in line
                    ]
                    if runner.get_error() is not None
                    else None,  # noqa
                }
            ),
            httpsrv.CT_JSON,
        )

    def action_runner_start(method, loc, params, headers, conn, addr):
        runner.start()
        return httpsrv.response(200, '"OK"', httpsrv.CT_JSON)

    def action_runner_stop(method, loc, params, headers, conn, addr):
        runner.stop(force=True if "force" in loc else False, exc=StoppedError)
        return httpsrv.response(200, '"OK"', httpsrv.CT_JSON)

    def action_netvar(method, loc, params, headers, conn, addr):
        if method == "POST":
            Netvar.inbound(ujson.loads(httpsrv.readall_from(conn).getvalue().decode()))
            return httpsrv.response(200, '"OK"', httpsrv.CT_JSON)
        if method == "GET":
            return httpsrv.response(
                200, ujson.dumps(Netvar.outbound()), httpsrv.CT_JSON
            )

    # Start in fallback AP mode if the button is pressed
    fallback_ap_mode = False
    button = machine.Signal(machine.Pin(BUTTON_PIN, machine.Pin.IN), invert=True)
    if button.value():
        fallback_ap_mode = True

    # Try connecting to WLAN if not in fallback AP, else activate AP
    if not fallback_ap_mode:
        try:
            wlan_info = ujson.load(open("/wlan.json"))
            ssid = wlan_info["ssid"]
            password = wlan_info["password"]
            ifconfig = wlan_info["ifconfig"] if "ifconfig" in wlan_info else "dhcp"
        except Exception:
            # fall back to AP, if can't get JSON, or malformed
            fallback_ap_mode = True
            setup_fallback_ap()
        else:
            if not interfaces.wlan_connect(
                ssid, password, ifconfig=ifconfig, timeout=20
            ):
                # fall back to AP, if can't connect
                interfaces.wlan.active(False)
                fallback_ap_mode = True
                setup_fallback_ap()
    else:
        fallback_ap_mode = True
        setup_fallback_ap()

    # Show fallback AP mode on LEDs
    if fallback_ap_mode:
        neop = neopixel.NeoPixel(machine.Pin(LEDS_PIN), 3)
        loop = runner.get_event_loop()
        loop.create_task(leds_ap_mode(neop))

    # Set Color ID
    _color_id = colorid.from_number(
        int(ure.search("\d+$", interfaces.wlan.ifconfig()[0]).group(0))  # noqa
    )

    # Set up HTTP server
    http_server = httpsrv.HTTPServer(port=3300)

    # File actions
    http_server.register("GET", "^/files$", action_file_list)
    http_server.register("GET", "^/files/$", action_file_list)
    http_server.register("GET", "^/files/.*", action_files)
    http_server.register("PUT", "^/files/.*", action_files)
    http_server.register("DELETE", "^/files/.*", action_files)

    # System actions
    http_server.register("GET", "^/sys/state$", action_state)
    http_server.register("POST", "^/sys/reboot$", action_reboot)
    http_server.register("POST", "^/sys/reboot/soft$", action_reboot)

    # Runner actions
    http_server.register("POST", "^/sys/start$", action_runner_start)
    http_server.register("POST", "^/sys/stop$", action_runner_stop)
    http_server.register("POST", "^/sys/stop/force$", action_runner_stop)

    # Netvar actions
    http_server.register("GET", "^/netvar$", action_netvar)
    http_server.register("POST", "^/netvar$", action_netvar)

    # RUN
    loop = runner.get_event_loop()
    loop.create_task(http_server.catch_requests())

    if not fallback_ap_mode:
        # start code.py if not in fallback AP mode
        loop.create_task(check_wlan_connection())
        loop.create_task(runner.starter_coro())

    try:
        loop.run_forever()
    except Exception:
        # close socket, so we can restart
        http_server.close()
        raise
Пример #54
0
def write_emoticon_data(realm_id: int, data_dir: str,
                        output_dir: str) -> List[ZerverFieldsT]:
    '''
    This function does most of the work for processing emoticons, the bulk
    of which is copying files.  We also write a json file with metadata.
    Finally, we return a list of RealmEmoji dicts to our caller.

    In our data_dir we have a pretty simple setup:

        emoticons.json - has very simple metadata on emojis:

          {
            "Emoticon": {
              "id": 9875487,
              "path": "emoticons/yasss.jpg",
              "shortcut": "yasss"
            }
          },
          {
            "Emoticon": {
              "id": 718017,
              "path": "emoticons/yayyyyy.gif",
              "shortcut": "yayyyyy"
            }
          }

        emoticons/ - contains a bunch of image files:

            slytherinsnake.gif
            spanishinquisition.jpg
            sparkle.png
            spiderman.gif
            stableparrot.gif
            stalkerparrot.gif
            supergirl.png
            superman.png

    We move all the relevant files to Zulip's more nested
    directory structure.
    '''

    logging.info('Starting to process emoticons')

    fn = 'emoticons.json'
    data_file = os.path.join(data_dir, fn)
    if not os.path.exists(data_file):
        logging.warning("HipChat export does not contain emoticons.json.")
        logging.warning("As a result, custom emoji cannot be imported.")
        return []

    with open(data_file) as f:
        data = ujson.load(f)

    if isinstance(data, dict) and 'Emoticons' in data:
        # Handle the hc-migrate export format for emoticons.json.
        flat_data = [
            dict(
                path=d['path'],
                name=d['shortcut'],
            ) for d in data['Emoticons']
        ]
    else:
        flat_data = [
            dict(
                path=d['Emoticon']['path'],
                name=d['Emoticon']['shortcut'],
            ) for d in data
        ]

    emoji_folder = os.path.join(output_dir, 'emoji')
    os.makedirs(emoji_folder, exist_ok=True)

    def process(data: ZerverFieldsT) -> ZerverFieldsT:
        source_sub_path = data['path']
        source_fn = os.path.basename(source_sub_path)
        source_path = os.path.join(data_dir, source_sub_path)

        # Use our template from RealmEmoji
        # PATH_ID_TEMPLATE = "{realm_id}/emoji/images/{emoji_file_name}"
        target_fn = source_fn
        target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format(
            realm_id=realm_id,
            emoji_file_name=target_fn,
        )
        target_path = os.path.join(emoji_folder, target_sub_path)

        os.makedirs(os.path.dirname(target_path), exist_ok=True)

        source_path = os.path.abspath(source_path)
        target_path = os.path.abspath(target_path)

        shutil.copyfile(source_path, target_path)

        return dict(
            path=target_path,
            s3_path=target_path,
            file_name=target_fn,
            realm_id=realm_id,
            name=data['name'],
        )

    emoji_records = list(map(process, flat_data))
    create_converted_data_files(emoji_records, output_dir,
                                '/emoji/records.json')

    realmemoji = [
        build_realm_emoji(
            realm_id=realm_id,
            name=rec['name'],
            id=NEXT_ID('realmemoji'),
            file_name=rec['file_name'],
        ) for rec in emoji_records
    ]
    logging.info('Done processing emoticons')

    return realmemoji
Пример #55
0
def run(options):
    # If this is just being used to download production data, do that.
    if options.get("just-download", False):
        download_s3()
        return

    # Definitive scan date for the run.
    today = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d")

    # 1. Download scan data, do a new scan, or skip altogether.
    scan_mode = options.get("scan", "skip")

    # Whether to gather domains (defaults to doing so).
    gather_mode = options.get("gather", "here")

    if scan_mode == "here":
        # 1a. Gather .gov federal subdomains.
        if gather_mode == "here":
            LOGGER.info("Gathering subdomains.")
            gather_subdomains(options)
            LOGGER.info("Subdomain gathering complete.")
        elif gather_mode == "skip":
            LOGGER.info("Skipping subdomain gathering.")

        # 1b. Scan subdomains for some types of things.
        LOGGER.info("Scanning subdomains.")
        scan_subdomains(options)
        LOGGER.info("Subdomain scanning complete")

        # 1c. Scan parent domains for all types of things.
        LOGGER.info("Scanning parent domains.")
        scan_parents(options)
        LOGGER.info("Scan of parent domains complete.")
    elif scan_mode == "download":
        LOGGER.info("Downloading latest production scan data from S3.")
        download_s3()
        LOGGER.info("Download complete.")

    # Sanity check to make sure we have what we need.
    if not os.path.exists(os.path.join(PARENTS_RESULTS, "meta.json")):
        LOGGER.info("No scan metadata downloaded, aborting.")
        exit()

    # Date can be overridden if need be, but defaults to meta.json.
    if options.get("date", None) is not None:
        the_date = options.get("date")
    else:
        # depends on YYYY-MM-DD coming first in meta.json time format
        scan_meta = ujson.load(open("data/output/parents/results/meta.json"))
        the_date = scan_meta['start_time'][0:10]

    # 2. Process and load data into Pulse's database.
    LOGGER.info("[%s] Loading data into Pulse." % the_date)
    data.processing.run(the_date, options)
    LOGGER.info("[%s] Data now loaded into Pulse." % the_date)

    # 3. Upload data to S3 (if requested).
    if options.get("upload", False):
        LOGGER.info("[%s] Syncing scan data and database to S3." % the_date)
        upload_s3(the_date)
        LOGGER.info("[%s] Scan data and database now in S3." % the_date)

    LOGGER.info("[%s] All done." % the_date)
Пример #56
0
try:
    import ujson as json
except:
    import json

logger = sv.logger
'''
Database for arena likes & dislikes
DB is a dict like: { 'md5_id': {'like': set(qq), 'dislike': set(qq)} }
'''
DB_PATH = os.path.expanduser('~/.hoshino/arena_db.json')
DB = {}
try:
    with open(DB_PATH, encoding='utf8') as f:
        DB = json.load(f)
    for k in DB:
        DB[k] = {
            'like': set(DB[k].get('like', set())),
            'dislike': set(DB[k].get('dislike', set()))
        }
except FileNotFoundError:
    logger.warning(f'arena_db.json not found, will create when needed.')


def dump_db():
    '''
    Dump the arena databese.
    json do not accept set object, this function will help to convert.
    '''
    j = {}
Пример #57
0
def read_user_data(data_dir: str) -> List[ZerverFieldsT]:
    fn = 'users.json'
    data_file = os.path.join(data_dir, fn)
    with open(data_file) as fp:
        return ujson.load(fp)
def process_file(filename, data_type, word_counter, char_counter):
    print(f"Pre-processing {data_type} examples...")
    examples = []
    eval_examples = {}
    total = 0
    with open(filename, "r") as fh:
        source = json.load(fh)
        for article in tqdm(source["data"]):
            for para in article["paragraphs"]:
                context = para["context"].replace("''",
                                                  '" ').replace("``", '" ')
                context_tokens = word_tokenize(context)

                context_bert_tokens = word_tokenize_bert(context)

                context_chars = [list(token) for token in context_tokens]
                spans = convert_idx(context, context_tokens)
                for token in context_tokens:
                    word_counter[token] += len(para["qas"])
                    for char in token:
                        char_counter[char] += len(para["qas"])
                for qa in para["qas"]:
                    total += 1
                    ques = qa["question"].replace("''",
                                                  '" ').replace("``", '" ')
                    ques_tokens = word_tokenize(ques)
                    ques_bert_tokens = word_tokenize_bert(ques)
                    ques_chars = [list(token) for token in ques_tokens]
                    for token in ques_tokens:
                        word_counter[token] += 1
                        for char in token:
                            char_counter[char] += 1
                    y1s, y2s = [], []
                    answer_texts = []
                    for answer in qa["answers"]:
                        answer_text = answer["text"]
                        answer_start = answer['answer_start']
                        answer_end = answer_start + len(answer_text)
                        answer_texts.append(answer_text)
                        answer_span = []
                        for idx, span in enumerate(spans):
                            if not (answer_end <= span[0]
                                    or answer_start >= span[1]):
                                answer_span.append(idx)
                        y1, y2 = answer_span[0], answer_span[-1]
                        y1s.append(y1)
                        y2s.append(y2)
                    example = {
                        "context_tokens": context_tokens,
                        "context_bert_tokens": context_bert_tokens,
                        "context_chars": context_chars,
                        "ques_tokens": ques_tokens,
                        "ques_bert_tokens": ques_bert_tokens,
                        "ques_chars": ques_chars,
                        "y1s": y1s,
                        "y2s": y2s,
                        "id": total
                    }
                    examples.append(example)
                    eval_examples[str(total)] = {
                        "context": context,
                        "question": ques,
                        "spans": spans,
                        "answers": answer_texts,
                        "uuid": qa["id"]
                    }
        print(f"{len(examples)} questions in total")
    return examples, eval_examples
Пример #59
0
def train(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.bpe_emb_file, "r") as fh:
        bpe_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.pos_emb_file, "r") as fh:
        pos_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.train_eval_file, "r") as fh:
        train_eval_file = json.load(fh)
    with open(config.dev_eval_file, "r") as fh:
        dev_eval_file = json.load(fh)
    with open(config.dev_meta, "r") as fh:
        meta = json.load(fh)

    dev_total = meta["total"]

    print("Building model...")
    parser = get_record_parser(config)
    train_dataset = get_batch_dataset(config.train_record_file, parser, config)
    dev_dataset = get_dataset(config.dev_record_file, parser, config)
    handle = tf.placeholder(tf.string, shape=[])
    iterator = tf.data.Iterator.from_string_handle(handle,
                                                   train_dataset.output_types,
                                                   train_dataset.output_shapes)
    train_iterator = train_dataset.make_one_shot_iterator()
    dev_iterator = dev_dataset.make_one_shot_iterator()

    model = Model(config, iterator, word_mat, char_mat, bpe_mat, pos_mat)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    loss_save = 100.0
    patience = 0
    lr = config.init_lr
    min_lr = config.min_lr

    with tf.Session(config=sess_config) as sess:
        writer = tf.summary.FileWriter(config.log_dir)
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(max_to_keep=None)
        train_handle = sess.run(train_iterator.string_handle())
        dev_handle = sess.run(dev_iterator.string_handle())
        sess.run(tf.assign(model.is_train, tf.constant(True, dtype=tf.bool)))
        sess.run(tf.assign(model.lr, tf.constant(lr, dtype=tf.float32)))

        for _ in tqdm(range(1, config.num_steps + 1)):
            global_step = sess.run(model.global_step) + 1
            if global_step < config.freeze_steps:
                loss, train_op = sess.run([model.loss, model.train_op_f],
                                          feed_dict={handle: train_handle})
            else:
                if global_step == config.freeze_steps:
                    print('Unfreezing embedding matrices')
                loss, train_op = sess.run([model.loss, model.train_op],
                                          feed_dict={handle: train_handle})

            if global_step % config.period == 0:
                loss_sum = tf.Summary(value=[
                    tf.Summary.Value(tag="model/loss", simple_value=loss),
                ])
                lr_sum = tf.Summary(value=[
                    tf.Summary.Value(tag="model/lr", simple_value=lr),
                ])
                writer.add_summary(loss_sum, global_step)
                writer.add_summary(lr_sum, global_step)
            if global_step % config.checkpoint == 0:
                sess.run(
                    tf.assign(model.is_train, tf.constant(False,
                                                          dtype=tf.bool)))
                _, summ = evaluate_batch(model, config.val_num_batches,
                                         train_eval_file, sess, "train",
                                         handle, train_handle)
                for s in summ:
                    writer.add_summary(s, global_step)

                metrics, summ = evaluate_batch(
                    model, dev_total // config.batch_size + 1, dev_eval_file,
                    sess, "dev", handle, dev_handle)
                sess.run(
                    tf.assign(model.is_train, tf.constant(True,
                                                          dtype=tf.bool)))

                dev_loss = metrics["loss"]
                if dev_loss < loss_save:
                    loss_save = dev_loss
                    patience = 0
                else:
                    patience += 1
                if patience >= config.patience and lr > min_lr:
                    lr /= 2.0
                    loss_save = dev_loss
                    patience = 0
                sess.run(tf.assign(model.lr, tf.constant(lr,
                                                         dtype=tf.float32)))
                for s in summ:
                    writer.add_summary(s, global_step)
                writer.flush()
                filename = os.path.join(config.save_dir,
                                        "model_{}.ckpt".format(global_step))
                saver.save(sess, filename)
Пример #60
0
def test_sber(config):

    prepro_test_sber(config)

    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.bpe_emb_file, "r") as fh:
        bpe_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.pos_emb_file, "r") as fh:
        pos_mat = np.array(json.load(fh), dtype=np.float32)

    for datafile, datatype in zip(
        [config.sber_public_file, config.sber_private_file],
        ['public', 'private']):

        with open(
                os.path.join(config.target_dir,
                             "{}_eval.json".format(datatype)), "r") as fh:
            data_eval_file = json.load(fh)
        with open(
                os.path.join(config.target_dir,
                             "{}_meta.json".format(datatype)), "r") as fh:
            meta = json.load(fh)

        total = meta["total"]

        print("Loading model...")
        test_batch = get_dataset(
            os.path.join(config.target_dir, "{}.tfrecords".format(datatype)),
            get_record_parser(config, is_test=True),
            config).make_one_shot_iterator()

        model = Model(config,
                      test_batch,
                      word_mat,
                      char_mat,
                      bpe_mat,
                      pos_mat,
                      trainable=False)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True

        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            if config.model_name == 'latest':
                checkpoint = tf.train.latest_checkpoint(config.save_dir)
            else:
                checkpoint = os.path.join(config.save_dir, config.model_name)
            print('Restoring from: {}'.format(checkpoint))
            saver.restore(sess, checkpoint)
            sess.run(
                tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
            answer_dict = {}
            remapped_dict = {}
            for step in tqdm(range(total // config.batch_size + 1)):
                qa_id, loss, yp1, yp2 = sess.run(
                    [model.qa_id, model.loss, model.yp1, model.yp2])
                answer_dict_, remapped_dict_ = convert_tokens(
                    data_eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
                answer_dict.update(answer_dict_)
                remapped_dict.update(remapped_dict_)

            path_to_save_answer = os.path.join(
                config.answer_dir, '{}.json_squad_ans'.format(datatype))
            with open(path_to_save_answer, "w") as fh:
                json.dump(remapped_dict, fh)

            sber_ans = '.'.join(
                path_to_save_answer.split('.')[0:-1]) + '.json_ans'
            squad_answer2sber(datafile, path_to_save_answer, outfile=sber_ans)

            print("Answer dumped: {}".format(path_to_save_answer))

        tf.reset_default_graph()

    # evaluating
    url = 'http://api.aibotbench.com/rusquad/qas'
    headers = {'Content-Type': 'application/json', 'Accept': 'text/plain'}
    metrics = dict()
    f1, em = 0.0, 0.0
    for datatype in ['public', 'private']:
        sber_ans = open(
            os.path.join(config.answer_dir, '{}.json_ans'.format(datatype)),
            'r').readline()
        res = requests.post(url, data=sber_ans, headers=headers)
        metrics[datatype] = eval(json.loads(res.text))
        f1 += metrics[datatype]['f1']
        em += metrics[datatype]['exact_match']
        print('{}: EM: {:.5f} F-1: {:.5f}'.format(
            datatype, metrics[datatype]['exact_match'],
            metrics[datatype]['f1']))
    print('EM avg: {:.5f} F-1 avg: {:.5f}'.format(em / 2, f1 / 2))