Exemplo n.º 1
0
def get_processed(a1=18, a2=24, p1=0, p2=8, l=10000, g='top-1,top-10%25,top-15%25,theory'):
  # processor is imported in functions to avoid deadlock when running
  # test_process in processor.py since that imports this module.
  import processor

  if not os.path.exists('cached_data'):
    os.makedirs('cached_data')

  processed = {}
  a1 = int(a1)
  a2 = int(a2)
  p1 = int(p1)
  p2 = int(p2)
  l = int(l)
  g = urllib.unquote(g).decode('utf8')
  goals = g.split(',')
  for goal in goals:
    filename = "cached_data/a1%ia2%ip1%ip2%il%i-%s.json" % (a1, a2, p1, p2, l, goal)
    processed_goal = []

    if os.path.isfile(filename):
      with open(filename) as fhandler:    
        processed_goal = ujson.load(fhandler)
    else:
      compatibilities = get_compatibilities(a1, a2, p1, p2, l)
      processed_goal = list(processor.process(compatibilities, lifetimes=l, goal=goal))
      with open(filename, 'w') as fhandler:
        ujson.dump(processed_goal, fhandler)
    processed[goal] = processed_goal
  return processed
Exemplo n.º 2
0
def main(args):
    """
    Main method
    Rolling like it's 2006
    """
    conn = boto.connect_s3(
            aws_access_key_id=access_key,
            aws_secret_access_key=secret_key)
    bucket = conn.get_bucket("tweettrack")
    if len(sys.argv) == 4:
        followertable = read_followertable(args[1], bucket)
        assert followertable is not None
        print "followertable is this long: %d, and we're saving it" % (len(followertable),)
        with open("followertable.json", "w") as followertable_file:
            ujson.dump(followertable, followertable_file)
    else:
        print "followerstable..."
        with open(sys.argv[4], "r") as followertable_file:
            followertable = ujson.load(followertable_file)
        print "followerstable done..."
        #print "gammas..."
        #with open(sys.argv[5], "r") as gamma_file:
        #    gammas = ujson.load(gamma_file)
        #    gc.collect()
        #print "gammas done..."
    gammas = get_gammas(args[2], bucket)
    #with open("gammas.json", "w") as gamma_file:
    #    ujson.dump(gammas, gamma_file)
    do_join(args[3], followertable, gammas, bucket)
    conn.close()
Exemplo n.º 3
0
def get_compatibilities(a1=18, a2=24, p1=0, p2=8, l=10000):
  compatibilities = []
  a1 = int(a1)
  a2 = int(a2)
  p1 = int(p1)
  p2 = int(p2)
  l = int(l)
  filename = "cached_data/a1%ia2%ip1%ip2%il%i.json" % (a1, a2, p1, p2, l)
  if not os.path.exists('cached_data'):
    os.makedirs('cached_data')
  if os.path.isfile(filename):
    with open(filename) as fhandler:    
      compatibilities = ujson.load(fhandler)
  else:
    for lt in range(1, l+1):
      # Number of candidates met per year should range between p1 and p2.
      yearly_num_candidates = []
      for a in range(0, (a2-a1)):
        yearly_num_candidates.append(random.choice(range(p1, p2)))
      for year, num_candidates in enumerate(yearly_num_candidates):
        # Compatibility scores of candidates should follow a normal distribution.
        scores = np.random.normal(size=num_candidates)
        for score in scores:
          compatibilities.append({
            'lifetime': lt,
            'candidate_score': round(score,3),
            'candidate_age_met': a1+year
          })
    with open(filename, 'w') as fhandler:
      ujson.dump(compatibilities, fhandler)
  return compatibilities
Exemplo n.º 4
0
    def process(self, id: int):
        """Increment offsets from a volume.
        """
        text = Text.query.get(id)

        tokens = text.tokens()

        # Assemble token list.

        rows = [

            dict(

                text_id=id,

                ratio=i/len(tokens),
                offset=i,

                **token._asdict()

            )

            for i, token in enumerate(tokens)

        ]

        # Flush to disk.

        path = os.path.join(self.result_dir, str(uuid.uuid4()))

        with open_makedirs(path, 'w') as fh:
            ujson.dump(rows, fh)
Exemplo n.º 5
0
def add_to_resources(movie):
    
    if type(movie) is not dict: return("Movie need to be specified as key:value pairs in a dictionnary. Process Aborted.")
    
    if 'alias' not in movie.keys(): return "Update has no 'alias' key. Process Aborted."
    if 'tag' not in movie.keys(): return "Update has no 'tag' key. Process Aborted."
    if 'title' not in movie.keys(): return "Update has no 'title' key. Process Aborted."
    
    if 'resources.json' not in os.listdir('.'):
            return " The file 'resources.json' is not in the current working directory. Process Aborted."
    
    with open('resources.json') as json_file:  
        resource = ujson.load(json_file)
    
    if is_in_resources(resource, movie['alias']) == True :
        return "%s with alias '%s' and tag '%s' is already added. Need to update?.. use the update function" %(movie['title'], movie['alias'], movie['tag'])
    else:
        movie['timestamp'] = datetime.datetime.now()
        resource['movies'].append(movie)
        
        resource['logs'].append({
        'timestamp': datetime.datetime.now(),
        'type': 'post',
        'message': " '%s' with alias '%s' and tag '%s' was successfully added." %(movie['title'], movie['alias'], movie['tag'])
        })
        
        with open('resources.json', 'w') as outfile:  
            ujson.dump(resource, outfile)
        
        return "%s with alias '%s' and tag '%s' was successfully added." %(movie['title'], movie['alias'], movie['tag'])
Exemplo n.º 6
0
    def create(self, name=None, time=None, uid=None, container=None,
               **kwargs):
        """Create a sample locally

        Parameters
        ----------
        name: str
            Name of the sample
        time: float
            Timestamp generated by the client
        uid: str
            Unique identifier for this sample
        container: str, doct.Document
            The container/group sample is contained within

        Returns
        -------
        payload: dict
            Document dict that was inserted
        """
        # TODO: Allow container to be an object
        if container:
            container = doc_or_uid_to_uid(container)
        payload = dict(uid=uid if uid else str(uuid4()),
                       name=name, time=time if time else ttime.time(),
                       container=container if container else 'NULL',
                       **kwargs)
        self.sample_list.append(payload)
        with open(self._samp_fname, 'w+') as fp:
            ujson.dump(self.sample_list, fp)
        return payload
Exemplo n.º 7
0
def semantic_labeling(train_dataset, test_dataset, train_dataset2=None, evaluate_train_set=False, reuse_rf_model=True):
    """Doing semantic labeling, train on train_dataset, and test on test_dataset.

    train_dataset2 is optionally provided in case train_dataset, and test_dataset doesn't have overlapping semantic types
    For example, given that train_dataset is soccer domains, and test_dataset is weather domains; the system isn't able
    to recognize semantic types of test_dataset because of no overlapping. We need to provide another train_dataset2, which
    has semantic types of weather domains; so that the system is able to make prediction.

    Train_dataset2 is default to train_dataset. (train_dataset is use to train RandomForest)

    :param train_dataset: str
    :param test_dataset: str
    :param train_dataset2: Optional[str]
    :param evaluate_train_set: bool
    :param reuse_rf_model: bool
    :return:
    """
    logger = get_logger("semantic-labeling-api", format_str='>>>>>> %(asctime)s - %(levelname)s:%(name)s:%(module)s:%(lineno)d:   %(message)s')

    if train_dataset2 is None:
        train_dataset2 = train_dataset
        datasets = [train_dataset, test_dataset]
    else:
        datasets = [train_dataset, test_dataset, train_dataset2]

    semantic_labeler = SemanticLabeler()
    # read data into memory
    logger.info("Read data into memory")
    semantic_labeler.read_data_sources(list(set(datasets)))
    # index datasets that haven't been indexed before

    not_indexed_datasets = list({dataset for dataset in datasets if not is_indexed(dataset)})
    if len(not_indexed_datasets) > 0:
        logger.info("Index not-indexed datasets: %s" % ",".join(not_indexed_datasets))
        semantic_labeler.train_semantic_types(not_indexed_datasets)

    # remove existing file if not reuse previous random forest model
    if not reuse_rf_model and os.path.exists("model/lr.pkl"):
        os.remove("model/lr.pkl")

    # train the model
    logger.info("Train randomforest... with args ([1], [%s]", train_dataset)
    semantic_labeler.train_random_forest([1], [train_dataset])

    # generate semantic typing
    logger.info("Generate semantic typing using: trainset: %s, for testset: %s", train_dataset, test_dataset)
    result = semantic_labeler.test_semantic_types_from_2_sets(train_dataset2, test_dataset)

    if not os.path.exists("output"):
        os.mkdir("output")
    with open("output/%s_result.json" % test_dataset, "w") as f:
        ujson.dump(result, f)

    if evaluate_train_set:
        logger.info("Generate semantic typing for trainset")
        result = semantic_labeler.test_semantic_types_from_2_sets(train_dataset2, train_dataset2)
        with open("output/%s_result.json" % train_dataset2, "w") as f:
            ujson.dump(result, f)

    return result
Exemplo n.º 8
0
 def test_dumpFileArgsError(self):
     try:
         ujson.dump([], '')
     except TypeError:
         pass
     else:
         assert False, 'expected TypeError'
Exemplo n.º 9
0
    def create(self, uid=None, time=None, container=None, **kwargs):
        """ Create a container locally.

        Parameters
        ----------
        time: float
            Timestamp generated by the client
        uid: str
            Unique identifier for this sample
        container: str, doct.Document, optional
            Container this container is contained within

        Returns
        -------
        payload: dict
            Document dict that was inserted
        """
        if container:
            container = doc_or_uid_to_uid(container)
        payload = dict(uid=uid if uid else str(uuid4()),
                       container=container if container else 'NULL',
                       time=time if time else ttime.time(), **kwargs)
        self.container_list.append(payload)
        with open(self._cont_fname, 'w+') as fp:
            ujson.dump(self.container_list, fp)
        return payload
Exemplo n.º 10
0
 def saveTweets(self):
     meaningful =  self.jsonAccepted*self.cfg['KeepAccepted'] + self.jsonPartial*self.cfg['KeepPartial'] + self.jsonExcluded*self.cfg['KeepExcluded']
     if len(meaningful)>1:
         print "\nDumping tweets to file, contains %s tweets with %s accepted, %s rejected, %s partial matches, and %s irrelevant" % (len(meaningful),
                     self.acceptedCount,
                     self.excludedCount,
                     self.partialCount,
                     self.irrelevantCount)        
    
         if self.cfg['TweetData'] != 'all':
             meaningful = cleanJson(meaningful,self.cfg,self.tweetTypes)
             
         #timeStamp = datetime.date.today().strftime("%A")
         timeStamp = self.startTime
         self.lastWrite = self.startDay
         
         if self.cfg['KeepRaw']:
             with open(self.pathOut+'Raw_'+self.cfg['FileName']+'_'+timeStamp+'.json', 'w') as outFile:
                 json.dump(self.jsonRaw,outFile)
             outFile.close()
 
         with open(self.pathOut+'FilteredTweets_'+self.cfg['FileName']+'_'+timeStamp+'.json', 'w') as outFile:
             json.dump(meaningful,outFile)
         outFile.close()
         
         print 'Json text dump complete, buffering....'    
         time.sleep(1)
         
         
         giSeeker.flushTweets(self)
     else:
         print "No tweets found for date"
     print "Updating geoPickle"
     self.geoCache = updateGeoPickle(self.geoCache,getPickleName(self.cfg),self.cfg)
def convert_to_json(lang_url, lang_code):
    """A handy json converter just pass the lang_code and the url of the json source."""
    data = requests.get(lang_url)
    node_data = ujson.loads(data.content)
    dump_json = os.path.join(BUILD_PATH, "%s_node_data.json" % lang_code)
    with open(dump_json, "w") as f:
        ujson.dump(node_data, f)
Exemplo n.º 12
0
    def saveTweets(self):
        print "\nDumping tweets to file, contains %s tweets with %s accepted, %s rejected, %s partial matches, and %s irrelevant" % (self.cfg['StopCount'],
                        self.acceptedCount,
                        self.excludedCount,
                        self.partialCount,
                        self.irrelevantCount)
        print '\tJson text dump complete....\n'
                
        meaningful =  self.jsonAccepted*self.cfg['KeepAccepted'] + self.jsonPartial*self.cfg['KeepPartial'] + self.jsonExcluded*self.cfg['KeepExcluded']
        
        if self.cfg['TweetData'] != 'all':
            meaningful = cleanJson(meaningful,self.cfg,self.tweetTypes)
            
        timeStamp = self.startTime
        
        if self.cfg['KeepRaw']:
            with open(self.pathOut+'Raw_'+self.cfg['FileName']+'_'+timeStamp+'.json', 'w') as outFile:
                json.dump(self.jsonRaw,outFile)
            outFile.close()

        with open(self.pathOut+'FilteredTweets_'+self.cfg['FileName']+'_'+timeStamp+'.json', 'w') as outFile:
            json.dump(meaningful,outFile)
        outFile.close()
        giListener.flushTweets(self) 
        print "Updating geoPickle"
        self.geoCache = updateGeoPickle(self.geoCache,self.cfg['Directory']+'caches/'+pickleName) 
Exemplo n.º 13
0
    def store_update(self, db_name, db_desc):
        """Updates the database store file db_name
        key, with db_desc value"""
        store_datas = self.extract_store_datas()

        store_datas.update({db_name: db_desc})
        json.dump(store_datas, open(self.store_file, 'w'))
Exemplo n.º 14
0
	def saveDatabase(self):
		self.proxySend("Creating dict from room objects.")
		db = {}
		for vnum, roomObj in iterItems(self.rooms):
			newRoom = {}
			newRoom["name"] = roomObj.name
			newRoom["desc"] = roomObj.desc
			newRoom["dynamicDesc"] = roomObj.dynamicDesc
			newRoom["note"] = roomObj.note
			newRoom["terrain"] = roomObj.terrain
			newRoom["light"] = roomObj.light
			newRoom["align"] = roomObj.align
			newRoom["portable"] = roomObj.portable
			newRoom["ridable"] = roomObj.ridable
			newRoom["mobFlags"] = list(roomObj.mobFlags)
			newRoom["loadFlags"] = list(roomObj.loadFlags)
			newRoom["x"] = roomObj.x
			newRoom["y"] = roomObj.y
			newRoom["z"] = roomObj.z
			newRoom["exits"] = {}
			for direction, exitObj in iterItems(roomObj.exits):
				newExit = {}
				newExit["exitFlags"] = list(exitObj.exitFlags)
				newExit["doorFlags"] = list(exitObj.doorFlags)
				newExit["door"] = exitObj.door
				newExit["to"] = exitObj.to
				newRoom["exits"][direction] = newExit
			db[vnum] = newRoom
		self.proxySend("Saving the database in JSon format.")
		with codecs.open(MAP_FILE, "wb", encoding="utf-8") as fileObj:
			json.dump(db, fileObj)
		self.proxySend("Map Database saved.")
Exemplo n.º 15
0
    def process(self, inputs):
        try:
             
            None 
            for x in inputs:
                #self.log(x)
                prov = inputs[x]

            
            #if isinstance(prov, list) and "data" in prov[0]:
            #    prov = prov[0]["data"]
            #el
                if "_d4p" in prov:
                    prov = prov["_d4p"]
                elif "provenance" in prov:
                    prov = prov["provenance"]
            
                filep = open(
                    os.environ['PROV_PATH'] +
                    "/bulk_" +
                    getUniqueId(),
                    "wr")
                ujson.dump(prov, filep)
            
                filep.close()
             
        except:
            self.log(traceback.format_exc())
Exemplo n.º 16
0
    def process(self, inputs):
        try:
            out = None
            for x in inputs:
                prov = inputs[x]

            if isinstance(prov, list) and "data" in prov[0]:
                prov = prov[0]["data"]
            elif "_d4p" in prov:
                prov = prov["_d4p"]
         

            self.bulk.append(prov)
            #self.log(os.environ['PBS_NODEFILE'])
            #self.log(socket.gethostname())
            if len(self.bulk) == 100: 
            #:
            #    None
                filep = open(
                    os.environ['PROV_PATH'] +
                    "/bulk_" +
                    getUniqueId(),
                    "wr")
                ujson.dump(self.bulk, filep)
                #
                filep.close()
                self.bulk[:]=[]
#                for x in self.bulk:
#                    del x
        except:
            self.log(traceback.format_exc())
def savemsgstore():
    try:
        f = open("generalmessage.json", "w")
        ujson.dump(generalmessagestore, f)
        f.close()
    except:
        pass
Exemplo n.º 18
0
def export_uploads_local_helper(realm, output_dir, local_dir):
    # type: (Realm, Path, Path) -> None
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    count = 0
    records = []
    for attachment in Attachment.objects.filter(realm_id=realm.id):
        local_path = os.path.join(local_dir, attachment.path_id)
        output_path = os.path.join(output_dir, attachment.path_id)
        mkdir_p(os.path.dirname(output_path))
        subprocess.check_call(["cp", "-a", local_path, output_path])
        stat = os.stat(local_path)
        record = dict(realm_id=attachment.realm.id,
                      user_profile_id=attachment.owner.id,
                      user_profile_email=attachment.owner.email,
                      s3_path=attachment.path_id,
                      path=attachment.path_id,
                      size=stat.st_size,
                      last_modified=stat.st_mtime,
                      content_type=None)
        records.append(record)

        count += 1

        if (count % 100 == 0):
            logging.info("Finished %s" % (count,))
    with open(os.path.join(output_dir, "records.json"), "w") as records_file:
        ujson.dump(records, records_file, indent=4)
Exemplo n.º 19
0
Arquivo: jstor.py Projeto: miku/siskin
    def run(self):
        names = collections.defaultdict(set)
        url = "http://www.jstor.org/kbart/collections/all-archive-titles"
        output = shellout("""curl -sL "{url}" > {output} """, url=url)

        with luigi.LocalTarget(output, format=TSV).open() as handle:
            for row in handle.iter_tsv():
                if len(row) < 27:
                    self.logger.warn("short KBART row, skipping: %s", row)
                    continue

                issns = row[1:3]
                parts = [p.strip() for p in row[26].split(";")]

                for issn in [v.strip() for v in issns]:
                    if not issn:
                        continue
                    for name in parts:
                        if not name:
                            continue
                        names[issn].add(name)

        with self.output().open('w') as output:
            import json  # ujson does not support cls keyword
            json.dump(names, output, cls=SetEncoder)
def run_experiment():
    http_client = AsyncHTTPClient()
    num_files = len(os.listdir("./urls"))
    for i, url_file in enumerate(os.listdir("./urls")):
        if not url_file.endswith(".json"):
            print "Skilling: ", url_file
            continue
        urls = json.load(open("./urls/" + url_file))
        filtered_urls = filter(data_not_exists, urls)
        random.shuffle(filtered_urls)
        p = PB.ProgressBar(maxval=len(filtered_urls)//10 + 1, widgets=("{} / {}".format(i, num_files), PB.Bar(), PB.ETA())).start()
        for urls_chunk in p(chunk_seq(filtered_urls, 10)):
            try:
                responses = yield [http_client.fetch(url['url']) for url in urls_chunk]
            except:
                print "Failed for some result in: ", urls_chunk
                continue
            for raw, response in izip(urls_chunk, responses):
                url = raw['url']
                data = {"url" : url, "body" : response.body, "desc" : raw['desc']}
                fname = url_to_filename(raw)
                try:
                    os.makedirs(os.path.dirname(fname))
                except OSError:
                    pass
                json.dump(data, open(fname, "w+"))
            time.sleep(.5)
Exemplo n.º 21
0
def write(manifest, manifest_path):
    dir_name = os.path.dirname(manifest_path)
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    with open(manifest_path, "wb") as f:
        json.dump(manifest.to_json(), f, sort_keys=True, indent=1)
        f.write("\n")
Exemplo n.º 22
0
def dump_event_queues():
    start = time.time()

    with open(settings.JSON_PERSISTENT_QUEUE_FILENAME, "w") as stored_queues:
        ujson.dump([(qid, client.to_dict()) for (qid, client) in six.iteritems(clients)], stored_queues)

    logging.info("Tornado dumped %d event queues in %.3fs" % (len(clients), time.time() - start))
Exemplo n.º 23
0
def setCookies():
    wte = raw_input("Please input your webTradeEligibility cookie: \n")
    sessionid = raw_input("Please input your sessionid cookie: \n")
    steamLogin = raw_input("Please input your steamLogin cookie: \n")
    steamLoginSecure = raw_input("Please input your steamLoginSecure cookie: \n")
    sma = raw_input("Please input your steamMachineAuth cookie (name+value together): \n")
    steamRememberLogin = raw_input("Please input your steamRememberLogin cookie: \n")

    cookies_json['webTradeEligibility'] = wte
    cookies_json['sessionid'] = sessionid
    cookies_json['steamLogin'] = steamLogin
    cookies_json['steamLoginSecure'] = steamLoginSecure
    cookies_json['steamMachineAuth'] = sma
    cookies_json['steamRememberLogin'] = steamRememberLogin

    try:
        cookies_json_file = open('util/cookies.json', 'w')
        ujson.dump(cookies_json, cookies_json_file)
        cookies_json_file.close()
    except IOError:
        print "Error opening cookie.json file"
        return False
    except ValueError:
        print "Error dumping data to cookie.json file"
        return False
Exemplo n.º 24
0
def dump_result(params, results, output_path):
    """Writes out a single result .json file in output_path.

    Parameters
    ----------
    params : dict
        Dictionary of parameter names and values
    results : dict
        Dictionary of an alignment result
    output_path : str
        Where to write out the json file
    """
    # Make a copy of params to avoid writing in-place below
    params = dict(params)
    # ujson can't handle infs, so we need to replace them manually:
    if params['norm'] == np.inf:
        params['norm'] = str(np.inf)
    # Convert params dict to a string of the form
    # param1_name_param1_value_param2_name_param2_value...
    param_string = "_".join(
        '{}_{}'.format(name, value) if type(value) != float else
        '{}_{:.3f}'.format(name, value) for name, value in params.items())
    # Construct a path where the .json results file will be written
    output_filename = os.path.join(output_path, "{}.json".format(param_string))
    # Store this result
    try:
        with open(output_filename, 'wb') as f:
            json.dump({'params': params, 'results': results}, f)
    # Ignore "OverflowError"s raised by ujson; they correspond to inf/NaN
    except OverflowError:
        pass
Exemplo n.º 25
0
def combine_dicts():
    with open('title10to100000.json') as tag200, open('title100000plus.json') as tag1500:
        tag200dict = ujson.load(tag200)
        tag500dict = ujson.load(tag1500)
        newdict = dict(chain(tag200dict.items(), tag500dict.items()))
        with open('titletagwords.json', 'w') as write:
            ujson.dump(newdict, write)
Exemplo n.º 26
0
 def run(self):
     while True:
         sleep(60)
         summary = self.statsCollector.getSummary()
         self.logger.info("Statistics update: {0}".format(summary))
         with open(self.fileName, 'w') as f:
             ujson.dump(summary, f)
Exemplo n.º 27
0
    def save(self, data):
        """Save data to file.

        Careful, this overwrites any existing data on file.
        Use self.udpate() to perform partial updates.
        """
        json.dump(data, open(self.path, 'w'))
Exemplo n.º 28
0
def _update_local(fname, qparams, replacement):
    """Update a document created using the local framework
    Parameters
    -----------
    fname: str
        Name of the query should be run
    qparams: dict
        Query parameters. Similar to online query methods
    replacement: dict
        Fields/value pair to be updated. Beware of disallowed fields
        such as time and uid
    """
    try:
        with open(fname, 'r') as fp:
            local_payload = ujson.load(fp)
        qobj = mongoquery.Query(qparams)
        for _sample in local_payload:
            try:
                if qobj.match(_sample):
                    for k, v in replacement.items():
                        _sample[k] = v
            except mongoquery.QueryError:
                pass
        with open(fname, 'w') as fp:
            ujson.dump(local_payload, fp)
    except FileNotFoundError:
        raise RuntimeWarning('Local file {} does not exist'.format(fname))
Exemplo n.º 29
0
    def __init__(self, path, writer_queue=None):
        """Initialize using path to file and optional thread-safe queue.

        Queue is used for json serializable data to be written to file when
        self.write_queued() is called.

        If the file at 'path' doesn't exist it will be created.
        """

        self.path = os.path.realpath(os.path.expanduser(path))
        if not os.path.exists(self.path):
            print("Persistence file %s does not exist yet, creating it...")
            json.dump({}, open(self.path, 'w'))
        else:
            # check for json-ness
            try:
                json.load(open(self.path))
                LOG.debug("Loaded existing persistence file %s.",
                          os.path.relpath(self.path))
            except ValueError as err:
                raise ValueError("The persistence file -> %s is not "
                                 "a valid json file. | %s"
                                 % (os.path.relpath(self.path), err))
        if writer_queue and not isinstance(writer_queue, Queue.Queue):
            raise TypeError('writer_queue should be a Queue.Queue.')
        elif writer_queue:
            self.synq = writer_queue
            self.synq._persisted = set()
        else:
            self.synq = None
Exemplo n.º 30
0
def main(argv):
    args = docopt(__doc__, argv=argv)

    params = dict(p.split(':') for p in args['--parameters'])

    # format sort paramaters.
    if args['--sort']:
        for i, field in enumerate(args['--sort']):
            key = 'sort[{0}]'.format(i)
            params[key] = field.strip().replace(':', ' ')

    query = ' '.join(args['<query>'])
    if args['--itemlist']:
        fields = ['identifier']
    else:
        fields = args['--field']
    search = search_items(query, fields=args['--field'], params=params, v2=args['--v2'])
    if args['--number-found']:
        sys.stdout.write('{0}\n'.format(search.num_found))
        sys.exit(0)
    for result in search:
        try:
            if args['--itemlist']:
                sys.stdout.write(result.get('identifier', ''))
            else:
                json.dump(result, sys.stdout)
            sys.stdout.write('\n')
        except IOError:
            sys.exit(0)
Exemplo n.º 31
0
def write_json(filename, dataset):
    with codecs.open(filename, mode="w", encoding="utf-8") as f:
        ujson.dump(dataset, f)
Exemplo n.º 32
0
 def save(self):
     with open(PATH, "w") as outfile:
         ujson.dump(self.db, outfile, indent=4)
Exemplo n.º 33
0
 def __init__(self):
     if not os.path.exists(PATH):
         d = {}
         ujson.dump(d, open(PATH, "w"))
     self.db = ujson.load(open(PATH))
Exemplo n.º 34
0
bmp180.oversample_sett = 2
bmp180.baseline = 101325
alt_init = bmp180.altitude

# BNO055
bno055 = BNO055(I2C_bus)
pitch_init = bno055.readEuler().angle_x
pitch_init = pitch_init+180 if pitch_init <= 0 else pitch_init-180
roll_init = bno055.readEuler().angle_y
# Write to file
with open("/Web/www/autre.json", "r") as f:
    autre_file = ujson.load(f)
with open("/Web/www/autre.json", "w") as f:
    autre_file[2]["data"]["target"] = pitch_init
    autre_file[1]["data"]["target"] = roll_init
    ujson.dump(autre_file, f)

# Airspeed
speed = airspeed(bmp180)
speed_init = 10

# PID controllers
pitch_pid = pid(0, 0, 0)
roll_pid = pid(0, 0, 0)
speed_pid = pid(0, 0, 0)

# Read configuration
pid_counter = 0
#config.readConfig(pitch_pid, roll_pid, speed_pid)

# Setup mode object
Exemplo n.º 35
0
def save_cm(results, num_classes):
    labels = [i for i in range(num_classes)]
    cm = confusion_matrix(results["labels"],
                          results["predicts"],
                          labels=labels)

    data = []
    for target_index, target_row in enumerate(cm):
        for predicted_index, count in enumerate(target_row):
            data.append((labels[target_index], labels[predicted_index], count))

    df_cm = pd.DataFrame(data, columns=['target', 'predicted', 'count'])

    cm_file = '/confusion_matrix.csv'
    with open(cm_file, 'w') as f:
        df_cm.to_csv(f,
                     columns=['target', 'predicted', 'count'],
                     header=False,
                     index=False)

    lines = ''
    with open(cm_file, 'r') as f:
        lines = f.read()

    metadata = {
        'outputs': [{
            'type':
            'confusion_matrix',
            'format':
            'csv',
            'schema': [
                {
                    'name': 'target',
                    'type': 'CATEGORY'
                },
                {
                    'name': 'predicted',
                    'type': 'CATEGORY'
                },
                {
                    'name': 'count',
                    'type': 'NUMBER'
                },
            ],
            'source':
            lines,
            'storage':
            'inline',
            'labels':
            list(map(str, labels)),
        }]
    }

    with open("/mlpipeline-ui-metadata.json", 'w') as f:
        ujson.dump(metadata, f)

    accuracy = accuracy_score(results["labels"], results["predicts"])
    send_manage(accuracy)

    metrics = {
        'metrics': [{
            'name': 'accuracy-score',
            'numberValue': accuracy,
            'format': "PERCENTAGE",
        }]
    }

    with open('/accuracy.json', 'w') as f:
        ujson.dump(accuracy, f)
    with open('/mlpipeline-metrics.json', 'w') as f:
        ujson.dump(metrics, f)
Exemplo n.º 36
0
def dump_pool_config():
    with open(_pool_config_file, 'w', encoding='utf8') as f:
        json.dump(_group_pool, f, ensure_ascii=False)
Exemplo n.º 37
0
 def write_settings(self):
     ujson.dump(self.__dict__, open(self.settings_path, 'w+'))
Exemplo n.º 38
0
 def __init__(self):
     if not os.path.exists(PATH):
         with open(PATH, "w") as f_x:
             ujson.dump({}, f_x)
     with open(PATH) as yt_db:
         self.db = ujson.load(yt_db)
Exemplo n.º 39
0
def run_game(game, dockers, args, sock_file, scrimmage=False):
    '''
    This contains the logic that needs to be cleaned up at the end of a game
    If there is something that needs to be cleaned up add it in the try catch
    loop surrounding the catch loop surrounding the call of the function
    '''

    # Start the unix stream server
    main_server = server.start_server(sock_file, game, dockers)

    viewer_server = server.start_viewer_server(PORT, game)

    try:
        # Start the docker instances
        for player_key in dockers:
            docker_inst = dockers[player_key]
            docker_inst.start()
            for player_ in game.players:
                if player_['id'] == player_key:
                    player = player_['player']
                    break
            if player.planet == bc.Planet.Earth:
                planet = 'earth'
            else:
                planet = 'mars'
            if player.team == bc.Team.Blue:
                team = 'blue'
            else:
                team = 'red'

            name = '[{}:{}]'.format(planet, team)
            # 10 MB of logs in scrimmage, unlimited logging otherwise
            logger = Logger(name,
                            print=not args['terminal_viewer'],
                            limit=10**7 if scrimmage else 2**63)
            docker_inst.stream_logs(line_action=logger)
            player_['logger'] = logger

        # Wait until all the code is done then clean up
        while not game.game_over:
            time.sleep(0.1)

    finally:
        main_server.shutdown()
        try:
            main_server.server_close()
        except e:
            print(e)

        if viewer_server is not None:
            viewer_server.shutdown()

    match_file = {}
    match_file['message'] = game.viewer_messages
    if not game.disconnected:
        if bc.Team.Red == game.manager.winning_team():
            winner = 'player1'
        else:
            winner = 'player2'
    else:
        winner = game.winner

    match_file['metadata'] = {
        'player1': 'player1' if scrimmage else args['dir_p1'][8:],
        'player2': 'player2' if scrimmage else args['dir_p2'][8:],
        'winner': winner
    }

    if args['docker']:
        match_output = abspath(
            os.path.join('/player', str(args['replay_filename'])))
    else:
        match_output = args['replay_filename']
        if not os.path.isabs(match_output):
            match_output = abspath(os.path.join('..', str(match_output)))

    if not scrimmage:
        print("Saving replay to", match_output)

        match_ptr = open(match_output, 'w')
        json.dump(match_file, match_ptr)
        match_ptr.close()

        return winner
    else:
        return winner, match_file
Exemplo n.º 40
0
    def run_lighthouse_test(self, task):
        """Run a lighthouse test against the current browser session"""
        task['lighthouse_log'] = ''
        if 'url' in self.job and self.job['url'] is not None:
            self.job['shaper'].configure(self.job, task)
            output_path = os.path.join(task['dir'], 'lighthouse.json')
            json_file = os.path.join(task['dir'], 'lighthouse.report.json')
            json_gzip = os.path.join(task['dir'], 'lighthouse.json.gz')
            html_file = os.path.join(task['dir'], 'lighthouse.report.html')
            html_gzip = os.path.join(task['dir'], 'lighthouse.html.gz')
            time_limit = min(int(task['time_limit']), 80)
            command = [
                'lighthouse', '"{0}"'.format(self.job['url']),
                '--disable-network-throttling', '--disable-cpu-throttling',
                '--throttling-method', 'provided', '--enable-error-reporting',
                '--max-wait-for-load',
                str(int(time_limit * 1000)), '--port',
                str(task['port']), '--output', 'html', '--output', 'json',
                '--output-path', '"{0}"'.format(output_path)
            ]
            if self.job['keep_lighthouse_trace']:
                command.append('--save-assets')
            if self.options.android or 'mobile' not in self.job or not self.job[
                    'mobile']:
                command.append('--disable-device-emulation')
                if 'user_agent_string' in self.job:
                    sanitized_user_agent = re.sub(
                        r'[^a-zA-Z0-9_\-.;:/()\[\] ]+', '',
                        self.job['user_agent_string'])
                    command.append(
                        '--chrome-flags="--user-agent=\'{0}\'"'.format(
                            sanitized_user_agent))
            if len(task['block']):
                for pattern in task['block']:
                    pattern = "'" + pattern.replace("'", "'\\''") + "'"
                    command.extend(['--blocked-url-patterns', pattern])
            if 'headers' in task:
                headers_file = os.path.join(task['dir'],
                                            'lighthouse-headers.json')
                with open(headers_file, 'wb') as f_out:
                    json.dump(task['headers'], f_out)
                command.extend(
                    ['--extra-headers', '"{0}"'.format(headers_file)])
            cmd = ' '.join(command)
            self.lighthouse_command = cmd
            # Give lighthouse up to 10 minutes to run all of the audits
            try:
                lh_thread = threading.Thread(target=self.lighthouse_thread)
                lh_thread.start()
                lh_thread.join(600)
            except Exception:
                pass
            from .os_util import kill_all
            kill_all('node', True)
            self.job['shaper'].reset()
            # Rename and compress the trace file, delete the other assets
            if self.job['keep_lighthouse_trace']:
                try:
                    lh_trace_src = os.path.join(task['dir'],
                                                'lighthouse-0.trace.json')
                    if os.path.isfile(lh_trace_src):
                        # read the JSON in and re-write it line by line to match the other traces
                        with open(lh_trace_src, 'rb') as f_in:
                            trace = json.load(f_in)
                            if trace is not None and 'traceEvents' in trace:
                                lighthouse_trace = os.path.join(
                                    task['dir'], 'lighthouse_trace.json.gz')
                            with gzip.open(lighthouse_trace, 'wb', 7) as f_out:
                                f_out.write('{"traceEvents":[{}')
                                for trace_event in trace['traceEvents']:
                                    f_out.write(",\n")
                                    f_out.write(json.dumps(trace_event))
                                f_out.write("\n]}")
                except Exception:
                    pass
            # Delete all the left-over lighthouse assets
            files = glob.glob(os.path.join(task['dir'], 'lighthouse-*'))
            for file_path in files:
                try:
                    os.remove(file_path)
                except Exception:
                    pass
            if os.path.isfile(json_file):
                # Remove the raw screenshots if they were stored with the file
                lh_report = None
                with open(json_file, 'rb') as f_in:
                    lh_report = json.load(f_in)
                modified = False
                if lh_report is not None and 'audits' in lh_report:
                    if 'screenshots' in lh_report['audits']:
                        del lh_report['audits']['screenshots']
                        modified = True
                    if 'screenshot-thumbnails' in lh_report['audits']:
                        del lh_report['audits']['screenshot-thumbnails']
                        modified = True
                if modified:
                    with gzip.open(json_gzip, 'wb', 7) as f_out:
                        json.dump(lh_report, f_out)
                else:
                    with open(json_file, 'rb') as f_in:
                        with gzip.open(json_gzip, 'wb', 7) as f_out:
                            shutil.copyfileobj(f_in, f_out)
                try:
                    os.remove(json_file)
                except Exception:
                    pass
                # Extract the audit scores
                if lh_report is not None:
                    audits = {}
                    # v1.x
                    if 'aggregations' in lh_report:
                        for entry in lh_report['aggregations']:
                            if 'name' in entry and 'total' in entry and \
                                    'scored' in entry and entry['scored']:
                                name = entry['name'].replace(' ', '')
                                audits[name] = entry['total']
                    # v2.x
                    elif 'reportCategories' in lh_report:
                        for category in lh_report['reportCategories']:
                            if 'name' in category and 'score' in category:
                                category_name = category['name'].replace(
                                    ' ', '')
                                score = float(category['score']) / 100.0
                                audits[category_name] = score
                                if category[
                                        'name'] == 'Performance' and 'audits' in category:
                                    for audit in category['audits']:
                                        if 'id' in audit and 'group' in audit and \
                                                audit['group'] == 'perf-metric' and \
                                                'result' in audit and \
                                                'rawValue' in audit['result']:
                                            name = category_name + '.' + \
                                                audit['id'].replace(' ', '')
                                            audits[name] = audit['result'][
                                                'rawValue']
                    # v3.x
                    elif 'categories' in lh_report:
                        for categoryId in lh_report['categories']:
                            category = lh_report['categories'][categoryId]
                            if 'title' not in category or 'score' not in category:
                                continue

                            category_title = category['title'].replace(' ', '')
                            audits[category_title] = category['score']

                            if categoryId != 'performance' or 'auditRefs' not in category:
                                continue

                            for auditRef in category['auditRefs']:
                                if auditRef['id'] not in lh_report['audits']:
                                    continue
                                if 'group' not in auditRef or auditRef[
                                        'group'] != 'metrics':
                                    continue
                                audit = lh_report['audits'][auditRef['id']]
                                name = category_title + '.' + audit['id']
                                audits[name] = audit['rawValue']
                    audits_gzip = os.path.join(task['dir'],
                                               'lighthouse_audits.json.gz')
                    with gzip.open(audits_gzip, 'wb', 7) as f_out:
                        json.dump(audits, f_out)
            if os.path.isfile(html_file):
                # Remove the raw screenshots if they were stored with the file
                with open(html_file, 'rb') as f_in:
                    lh_report = f_in.read()
                    start = lh_report.find('\n    &quot;screenshots')
                    if start >= 0:
                        end = lh_report.find('\n    },', start)
                        if end >= 0:
                            lh_report = lh_report[:start] + lh_report[end + 7:]
                    with gzip.open(html_gzip, 'wb', 7) as f_out:
                        f_out.write(lh_report)
                try:
                    os.remove(html_file)
                except Exception:
                    pass
Exemplo n.º 41
0
def crawl(url_list, procId):
    posts = []
    t = time.time()
    for i, url in enumerate(url_list):
        while True:
            res = requests.get(url)
            if res.status_code == 200:
                break
            else:
                time.sleep(0.5)

        soup = BeautifulSoup(res.text, 'lxml')
        
        articles = soup.find('div', {'id': 'bodyarea'}).findAll('td', {'class':  ['windowbg', 'windowbg2']})
        post_article = articles[0]
        comment_articles = articles[1:]
        
        post = {}
        try:
            post['post_user'] = post_article.find('td', 'poster_info').find('b').text
            post_article = post_article.find('td', 'td_headerandpost')
            post_article_meta = post_article.find('table').findAll('div')
            post['title'] = post_article_meta[0].text.strip()
            posted_time = post_article_meta[1].text
            if 'Today' in posted_time:
                today = datetime.today()
                post['posted_time'] = today.strftime("%B %d, %Y,") + posted_time.split("at")[1]
            else:
                post['posted_time'] = posted_time
            post['post_body'] = post_article.find('div', 'post').text
        except:
            # poll
            continue

        comment_list = []
        for comment_article in comment_articles:
            one_comment = {}
            try:
                one_comment['post_user'] = comment_article.find('td', 'poster_info').find('b').text
            except:
                print(url)
                print(comment_article)

            comment_article = comment_article.find('td', 'td_headerandpost')
            post_body = comment_article.find('div', 'post').text
            if post_body.isdigit():
                # 비어있는 코멘트?
                continue
            
            one_comment['post_body'] = post_body
            comment_article_meta = comment_article.find('table').findAll('div')
            one_comment['title'] = comment_article_meta[0].text.strip()
            posted_time = comment_article_meta[1].text
            if 'Today' in posted_time:
                today = datetime.today()
                one_comment['posted_time'] = today.strftime("%B %d, %Y,") + posted_time.split("at")[1]
            else:
                one_comment['posted_time'] = posted_time
            
            comment_list.append(one_comment)

        page_base_url = url.rpartition(".")[0]
        current_comment_num = 20
        prev_comment_page = '1'
        while True:
            time.sleep(0.3)
            comment_page_url = "%s.%d" % (page_base_url, current_comment_num)
            while True:
                res_comment = requests.get(comment_page_url)
                if res_comment.status_code == 200:
                    break
                else:
                    time.sleep(0.5)
                    
            soup_comment = BeautifulSoup(res_comment.text, 'lxml')

            current_page = soup_comment.find('div', {'id': 'bodyarea'}).find('table').find('b').text
            if current_page == prev_comment_page:
                break
            else:
                prev_comment_page = current_page
                current_comment_num += 20

            for comment_article in soup_comment.findAll('article'):
                one_comment = {}
                one_comment['post_user'] = comment_article.find('td', 'poster_info').find('b').text
                comment_article = comment_article.find('td', 'td_headerandpost')
                post_body = comment_article.find('div', 'post').text
                if post_body.isdigit():
                    # 비어있는 코멘트?
                    continue

                one_comment['post_body'] = post_body
                comment_article_meta = comment_article.find('table').findAll('div')
                one_comment['title'] = comment_article_meta[0].text.strip()
                posted_time = comment_article_meta[1].text
                if 'Today' in posted_time:
                    today = datetime.today()
                    one_comment['posted_time'] = today.strftime("%B %d, %Y,") + posted_time.split("at")[1]
                else:
                    one_comment['posted_time'] = posted_time
                
                comment_list.append(one_comment)

        post['comments'] = comment_list
        posts.append(post)

        if i % 50 == 0:
            t = time.time() - t
            print(f"{procId} - {i+1}/{len(url_list)}, {t:.2f} secondes")
            t = time.time()

            if i > 0 and i % 1000 == 0:    
                with open(f"bitcoin/bitcoin_forum_{procId}_{i//1000}.json", 'w') as f:
                    json.dump(posts, f)

                posts = []
                time.sleep(120)

        time.sleep(1)

    if len(posts) > 0:
        with open(f"bitcoin/bitcoin_forum_{procId}_last.json", 'w') as f:
            json.dump(posts, f)
Exemplo n.º 42
0
 def save(self, path):
     with path.open(mode='w') as f:
         json.dump(self.config, f)
Exemplo n.º 43
0
def save(filename, obj, message=None):
    if message is not None:
        print("Saving {}...".format(message))
        with open(filename, "w") as fh:
            json.dump(obj, fh)
Exemplo n.º 44
0
users = getUsersFromUserFriendsAsc(1)
res = []
usersCount = users.count()
for user in users:
	data = {}
	userId = user['user_id']
	data = getUserLocation(userId)
	data["user_id"] = userId
	data["friends"] = []
	i = 0
	for f in user["friends"]:
		if f != userId:
			friend = {}
			friend = getUserLocation(f)
			# friend["user_id"] = f
			# print friend
			data["friends"].append(friend)
	res.append(data)

# from pymongo import MongoClient
#
# client = MongoClient()
#
# db = client['yelp']
#
# db.user_test.insert_many(res)
#
with open('../../static/json/user_friends_location_data.json', 'w+') as outfile:
	ujson.dump(res, outfile)
Exemplo n.º 45
0
 def dump(self, filename):
     with open(filename, 'w', encoding='utf8') as f:
         json.dump(self._data, f, ensure_ascii=False)
Exemplo n.º 46
0
    def _write(
            datasetmeta,
            experiment_name,
            feature_names,
            label_names,
            nesting,
            numclasses,
            part_size,
            samplefetcher,
            tensorgetter,
            persisters={}):
        totalrows = 0
        eof = False
        while not eof:
            samples = []
            hashvariable = sha256()
            ids = []

            if callable(part_size):
                # Estimate sample size and calculate optimal part size
                try:
                    sample = samplefetcher()
                except StopIteration:
                    raise Exception('Trying to generate an empty dataset')
                sampleid = str(sample.entityid)
                ids.append(sampleid)
                hashvariable.update(sampleid)
                samples.append(sample)
                part_size = part_size(sample)

            for _ in xrange(0, part_size):
                try:
                    sample = samplefetcher()
                    sampleid = str(sample.entityid)
                    ids.append(sampleid)
                    hashvariable.update(sampleid)
                    samples.append(sample)
                except StopIteration:
                    eof = True
                    break
                except Exception as e:
                    logging.exception(e.message)

            if len(samples) == 0:
                break

            if Settings.EnsureUniqueEntitiesInDataset and len(ids) > len(set(ids)):
                raise Exception('String representations of sample ids are not unique')

            totalrows += len(samples)

            digest = hashvariable.hexdigest()
            partdir = 'input/%s' % experiment_name
            h_idx = 0
            for nest in nesting:
                partdir += '/' + digest[h_idx: h_idx + nest]
                h_idx += nest
            partdir += '/part_%s' % digest

            if os.path.isdir(partdir):
                with open(partdir + '/part.json', 'r') as f:
                    partmeta = json.load(f)
                partexists = True
            else:
                partexists = False
                partmeta = {
                    'bytesize': 0,
                    'numsamples': len(samples),
                    'unordered_features': []
                }
                os.makedirs(partdir)

            if partexists:
                # because conversion from sample to X takes time, we don't perform it, if there is already a cached
                # part on the disk. This is especially handy in the case when dataset processing had terminated due to
                # a bug in some feature, so you have to restart it.
                features_to_get = []
                for feature in feature_names:
                    featurefile = '%s/%s' % (partdir, DataSet.fname(feature))
                    if not os.path.isfile(featurefile):
                        features_to_get.append(feature)
            else:
                features_to_get = feature_names

            if len(features_to_get) > 0:
                for feature in features_to_get:
                    featurefile = '%s/%s' % (partdir, DataSet.fname(feature))

                    x = tensorgetter(samples, feature)
                    x[np.isnan(x)] = BaseFeature.MISSING_VALUE

                    try:
                        for ff in datasetmeta['features']:
                            if ff['name'] == feature:
                                if len(ff['output_shape']) == 0:
                                    cntr = ff['top10values'][0]
                                    cntr.update(x)
                                    if len(cntr) > 10:
                                        ff['top10values'][0] = Counter(dict(cntr.most_common(10)))
                                elif len(ff['output_shape']) == 1:
                                    for i in xrange(0, ff['output_shape'][0]):
                                        cntr = ff['top10values'][i]
                                        cntr.update(x[:, i])
                                        if len(cntr) > 10:
                                            ff['top10values'][i] = Counter(dict(cntr.most_common(10)))
                                else:
                                    cntr = ff['top10values'][0]
                                    cntr.update([np.mean(x)])
                                    if len(cntr) > 10:
                                        ff['top10values'][0] = Counter(dict(cntr.most_common(10)))

                                break
                    except:
                        logging.info('Cannot calculate top10values ' + traceback.format_exc())

                    if feature in persisters:
                        persisters[feature].save(featurefile, x)
                    else:
                        with open(featurefile, 'wb') as f:
                            np.save(f, x)

                    if feature not in partmeta['unordered_features']:
                        partmeta['unordered_features'].append(feature)

                    partmeta['bytesize'] += sys.getsizeof(x)

            for label in label_names:
                labelfile = '%s/Label-%s' % (partdir, DataSet.fname(label))

                x = tensorgetter(samples, label)
                x[np.isnan(x)] = BaseFeature.MISSING_VALUE

                if label in persisters:
                    persisters[label].save(labelfile, x)
                else:
                    with open(labelfile, 'wb') as f:
                        np.save(f, x)

                if numclasses > 0 and len(label_names) == 1 and 'class_counts' in datasetmeta:
                    if len(x.shape) == 1:
                        for cls in xrange(0, numclasses):
                            klass = 'Class_' + str(cls)
                            datasetmeta['class_counts'][klass] += sum(1 for y in x if y == cls)
                    elif len(x.shape) == 2 and x.shape[1] == 1:
                        for cls in xrange(0, numclasses):
                            klass = 'Class_' + str(cls)
                            datasetmeta['class_counts'][klass] += sum(1 for y in x if y[0] == cls)
                    else:
                        for cls in xrange(0, numclasses):
                            klass = 'Class_' + str(cls)
                            datasetmeta['class_counts'][klass] += sum(x[:, cls])

                partmeta['bytesize'] += sys.getsizeof(x)

            if not os.path.isfile(partdir + '/ids.txt'):
                with open(partdir + '/ids.txt', 'wb') as f:
                    f.writelines([x + "\n" for x in ids])

            with open(partdir + '/part.json', 'w') as f:
                json.dump(partmeta, f)
            logging.info('%s stored or updated. In total %d rows generated' % (partdir, totalrows))
            with open('input/%s/dataset_V5.json' % experiment_name, 'w') as f:
                json.dump(datasetmeta, f)
        sollfeatures = set([x['name'] for x in datasetmeta['features']])
        for entry in scandir('input/%s' % experiment_name):
            if entry.is_dir() and entry.name.startswith('part_'):
                metafile = 'input/%s/%s/part.json' % (experiment_name, entry.name)
                if os.path.isfile(metafile):
                    with open(metafile, 'r') as f:
                        meta = json.load(f)
                        ist = set(meta['unordered_features'])
                        missing = sollfeatures.difference(ist)
                        if len(missing) > 0:
                            logging.warning('%s does not contain following features: %s ' % (entry, str(missing)))
                            x = input(
                                'Press y to remove the part, any other key to leave it (in this case missing feature will always have missing values)')
                            if x == 'y':
                                shutil.rmtree('input/%s/%s' % (experiment_name, entry))
        with open('input/%s/dataset_V5.json' % experiment_name, 'w') as f:
            json.dump(datasetmeta, f)
        for ff in datasetmeta['features']:
            for v in ff['top10values']:
                if len(v) == 0:
                    logging.warning('Feature %s has no values' % ff['name'])
                elif len(v) == 1:
                    if v.most_common(1)[0][0] == BaseFeature.MISSING_VALUE:
                        logging.warning('Feature %s has only missing values' % ff['name'])
                    else:
                        logging.warning('Feature %s has only one value %s' % (ff['name'], v.most_common(1)[0][0]))
                elif v.most_common(1)[0][1] > 0.99 * totalrows:
                    logging.warning('Feature %s has the value %s in more than 99%% of samples' % (ff['name'],
                                                                                                  v.most_common(1)[0][
                                                                                                      0]))
        if 'class_counts' in datasetmeta:
            notpresent = []
            lessthancent = []
            for k, v in datasetmeta['class_counts'].iteritems():
                if v == 0:
                    notpresent.append(str(k))
                if v < 0.01 * totalrows:
                    lessthancent.append(str(k))

            if len(notpresent) > 0 or len(lessthancent) > 0:
                raise Exception('There is a class distribution problem. Following classes '
                                'are not present in the dataset: %s. Following classes '
                                'contribute to less than 1%% of dataset: %s'
                                % (','.join(notpresent), ','.join(lessthancent)))
        return totalrows
Exemplo n.º 47
0
def mqtt():
    mainOctopus()
    print("Hello, this will help you initialize MQTT client")
    print("ver: " + ver + " (c)octopusLAB")
    print("id: " + esp_id)
    print("Press Ctrl+C to abort")

    # TODO improve this
    # prepare directory
    if 'config' not in uos.listdir():
        uos.makedirs('config')

    run = True
    while run:
        sele = setupMenu()

        if sele == "x":
            print("Setup - exit >")
            time.sleep_ms(2000)
            print("all OK, press CTRL+D to soft reboot")
            run = False

        if sele == "si":  #system_info()
            from util.sys_info import sys_info
            sys_info()

        if sele == "cv":
            print("------- Set 0/1/str for settings ------")
            wc = {}
            wc['name'] = input("device (host)name/describe: ")
            wc['time'] = int(input("get time from server? [1/0]: "))
            wc['mysql'] = int(input("send data to mysql db [1/0]: "))
            if wc['mysql']: wc['mysqlURL'] = input("mysql Write URL: ")
            wc['mqtt'] = int(input("mqtt client [1/0]: "))
            wc['influx'] = int(input("send data to influx db [1/0]: "))
            if wc['influx']: wc['influxWriteURL'] = input("influx Write URL: ")
            wc['timer'] = int(input("timer: "))

            print("Writing to file config/mqtt_io.json")
            with open('config/mqtt_io.json', 'w') as f:
                ujson.dump(wc, f)

        if sele == "ms":
            print("Set mqtt >")
            print()
            mq = {}
            mq['mqtt_broker_ip'] = input("BROKER IP: ")
            mq['mqtt_ssl'] = int(input("> SSL (0/1): "))
            mq['mqtt_port'] = int(input("> PORT (1883/8883/?): "))
            mq['mqtt_clientid_prefix'] = input("CLIENT PREFIX: ")
            mq_user = input("Username: "******"" else mq_user
            mq_pass = input("Password: "******"" else mq_pass
            mq['mqtt_root_topic'] = input("ROOT TOPIC: ")

            print("Writing to file config/mqtt.json")
            with open('config/mqtt.json', 'w') as f:
                ujson.dump(mq, f)

        def mqtt_sub(topic, msg):
            print("MQTT Topic {0}: {1}".format(topic, msg))

        if sele == "mt":
            print("mqtt simple test:")

            print("wifi_config >")
            wifi = WiFiConnect(250)
            wifi.events_add_connecting(connecting_callback)
            wifi.events_add_connected(connected_callback)
            print("wifi.connect")
            wifi_status = wifi.connect()

            # url config: TODO > extern.

            print("mqtt_config >")
            mqtt_clientid_prefix = read_mqtt_config()["mqtt_clientid_prefix"]
            mqtt_host = read_mqtt_config()["mqtt_broker_ip"]
            mqtt_root_topic = read_mqtt_config()["mqtt_root_topic"]
            mqtt_ssl = read_mqtt_config()["mqtt_ssl"]
            mqtt_user = read_mqtt_config()["mqtt_user"]
            mqtt_pass = read_mqtt_config()["mqtt_pass"]

            mqtt_clientid = mqtt_clientid_prefix + esp_id
            c = MQTTClient(mqtt_clientid,
                           mqtt_host,
                           ssl=mqtt_ssl,
                           user=mqtt_user,
                           password=mqtt_pass)
            c.set_callback(mqtt_sub)
            print("mqtt.connect to " + mqtt_host)
            c.connect()
            """
            # c.subscribe("/octopus/device/{0}/#".format(esp_id))
            subStr = mqtt_root_topic+"/"+esp_id+"/#"
            print("subscribe (root topic + esp id):" + subStr)
            c.subscribe(subStr)
            """

            mqtt_log_topic = mqtt_root_topic + "/log"
            print("mqtt log > " + mqtt_log_topic)

            print(mqtt_log_topic)
            # mqtt_root_topic_temp = "octopus/device"
            c.publish(mqtt_log_topic,
                      esp_id)  # topic, message (value) to publish
 def save_time(self):
     file = open(TIME_FILE, "w")
     ujson.dump(utime.localtime(), file)
     file.close()
     return
Exemplo n.º 49
0
                    element.xpath(
                        ".//div[@class='company-title']/a/text()")[0].strip(),
                    #'tel': element.xpath(".//div[@class='legal-person']/span[@class='margin-r-1x']/text()")[0].strip(),
                    'legal_owner':
                    element.xpath(
                        ".//div[@class='legal-person']/text()")[0].strip(),
                    #'address': element.xpath(".//div[@class='legal-person'][1]/span/text()")[0].strip(),
                    'status':
                    element.xpath(".//div[@class='company-tags']/span/text()")
                    [0].strip(),
                    'capital':
                    element.xpath(".//div[contains(@class, 'col-3-1')]/text()")
                    [0].strip(),
                    'date':
                    element.xpath(".//div[contains(@class, 'col-3-2')]/text()")
                    [0].strip()
                    #'url': element.xpath(".//div[@class='company-title']/a/@href")[0].strip()
                })
            time.sleep(10)
        return result


if __name__ == "__main__":
    fetcher = Fetcher()
    fetcher.login()
    time.sleep(5)
    html = fetcher.get_page()

    ujson.dump(html, open('result.json', 'w'))
    fetcher.close_driver()
Exemplo n.º 50
0
        auto.add_word(word, word)
    auto.make_automaton()

    for word in tqdm(words):
        for end_ind, found in auto.iter(word):
            if found in mapping:
                mapping[found].append(word)
            else:
                mapping[found] = [word]

    return mapping


cedict = pd.read_csv(f"./data/intermediate/cedict.txt", sep="\t", index_col=0)

simplified_words = list(cedict["simplified"])
traditional_words = list(cedict["traditional"])

simplified_char_to_word = characters_to_words(simplified_words)
traditional_char_to_word = characters_to_words(traditional_words)

with gzip.open("./data/intermediate/simplified_containing_words.json.zip",
               "wt",
               encoding="utf-8") as f:
    ujson.dump(simplified_char_to_word, f)

with gzip.open("./data/intermediate/traditional_containing_words.json.zip",
               "wt",
               encoding="utf-8") as f:
    ujson.dump(traditional_char_to_word, f)
Exemplo n.º 51
0
    else:
        for dataset in [train_data, test_data, eval_data]:
            dataset.map_items(tokenizer,
                              final_url_ids,
                              final_publication_ids,
                              filter=False)
    print("Items mapped")
    mapped_data_path = Path(args.data_dir) / "mapped-data"
    if not mapped_data_path.is_dir():
        mapped_data_path.mkdir()

    train_mapped_path = mapped_data_path / "train.json"
    test_mapped_path = mapped_data_path / "test.json"
    eval_mapped_path = mapped_data_path / "evaluation.json"
    with open(train_mapped_path, "w") as file:
        json.dump(train_data.examples, file)
    with open(test_mapped_path, "w") as file:
        json.dump(test_data.examples, file)
    with open(eval_mapped_path, "w") as file:
        json.dump(eval_data.examples, file)
    print(f"Mapped Data saved to {mapped_data_path} directory")

# create weights for dataset samples to ensure only positive and negative examples are chosen in respective samples
pos_sampler = train_data.create_positive_sampler(args.target_publication)
neg_sampler = train_data.create_negative_sampler(args.target_publication)

train_batch_sampler = sampler_util.BatchSamplerWithNegativeSamples(
    pos_sampler=pos_sampler,
    neg_sampler=neg_sampler,
    items=train_data.examples,
    batch_size=args.batch_size,
Exemplo n.º 52
0
def dump_user_collection(uid: str, ucollection):
    with open(os.path.join(_collection_path, f'{uid}.json'),
              'w',
              encoding='utf8') as f:
        json.dump(ucollection, f, ensure_ascii=False)
        f.close()
Exemplo n.º 53
0
    def writefile(self, data):
        with open(self._filename, 'w') as fh:
            ujson.dump(data, fh, indent=4)

        return True
Exemplo n.º 54
0
    async def get_wiki_article(self, wiki_title: str) -> WikiArticle:
        # Note client is responsible for rate limiting as needed
        if self.cache_dir is not None:
            tokenized_file = self._get_tokenized_filename(wiki_title)
            if exists(tokenized_file):
                log.info("Load wiki article for \"%s\" from cache", wiki_title)
                with open(tokenized_file, "r") as f:
                    data = ujson.load(f)
                    return WikiArticle(data["title"], data["url"], [WikiParagraph.from_json(x) for
                                                                    x in data["paragraphs"]])

        log.info("Load wiki article for \"%s\"", wiki_title)

        async with ClientSession() as sess:
            # Use int(self.follow_redirects) since this get method doesn't support
            # bool values for some reason
            async with sess.get(url=WIKI_API,
                                params=dict(action="parse", page=wiki_title,
                                            redirects=int(self.follow_redirects),
                                            format="json")) as resp:
                data = await resp.json()

        raw_data = data["parse"]

        # Extract paragraph based on HTML tags
        # Wiki html is pretty structured, so this seems to work reasonable well
        soup = BeautifulSoup(raw_data["text"]["*"], "lxml")
        paragraphs = []
        to_find = ["p", "h2", "h3", "h4", "h5", "h6"]
        if self.extract_lists:
            to_find += ["ul", "ol"]
        for element in soup.findAll(to_find):
            if element.name[0] == "h":
                if element.get_text() == "Contents":
                    continue
                sect_name = element.find(attrs={"class": "mw-headline"}).get_text()
                para = self._sent_to_paragraph(len(paragraphs), "section", [sect_name])
                if para.n_tokens > 0:
                    paragraphs.append(para)
            elif element.name == "ul" or element.name == "ol":
                if dict(element.parent.attrs).get("class") != ["mw-parser-output"]:
                    # only extract "body" lists
                    continue
                para = self._sent_to_paragraph(len(paragraphs),
                                               "list" if element.name == "ul" else "ordered_list",
                                               [x.get_text() for x in element.findAll("li")])
                if para.n_tokens > 0:
                    paragraphs.append(para)
            else:
                # remove citations
                for citation in element.findAll("sup", {"class": "reference"}):
                    citation.extract()

                # remove citation needed
                for sub in element.findAll("sup"):
                    citations = sub.findAll("a", href=True)
                    if len(citations) == 1:
                        citation = citations[0]
                        href = citation["href"]
                        if href.startswith("#cite") or href == "/wiki/Wikipedia:Citation_needed":
                            sub.extract()
                text = element.get_text()
                para = self._text_to_paragraph(len(paragraphs), "paragraph", text)
                if para.n_tokens > 0:
                    paragraphs.append(para)

        article = WikiArticle(wiki_title, raw_data["pageid"], paragraphs)

        if self.cache_dir is not None:
            # save to cache
            with open(tokenized_file, "w") as f:
                ujson.dump(dict(title=article.title, url=article.url,
                                paragraphs=[x.to_json() for x in article.paragraphs]), f)
        return article
Exemplo n.º 55
0
def store_params(learn_args):
    log(logger.info, 'Learning args: {}'.format(learn_args))
    with open('%s.json' % learn_args['model_prefix'], 'w') as fout:
        json.dump(learn_args, fout)
Exemplo n.º 56
0
def save(filename, obj, message=None):
    if message is not None:
        print(f"Saving {message}...")
        with open(filename, "w") as fh:
            json.dump(obj, fh)
Exemplo n.º 57
0
import pickle
import ujson as json
import pandas as pd

with open("output/db.pickle", "rb") as f:
    db = pickle.load(f)

json_data = {
    kk: {str(k[0]): v.data for k, v in vv.items()}
    for kk, vv in db.data.items()
}

with open('output/data_full.json', 'w') as f:
    json.dump(json_data, f, indent=2, sort_keys=True)

for kk, vv in json_data.items():
   df = pd.DataFrame.from_dict(vv, orient='index')
   df.to_csv("output/{}.csv".format(kk), index=False, na_rep='NA')

Exemplo n.º 58
0
    async def choice_cb(_, c_q: CallbackQuery):
        if not os.path.exists(PATH):
            await c_q.answer("𝑶𝒑𝒊𝒏𝒊𝒐𝒏 𝒅𝒂𝒕𝒂 𝒅𝒐𝒏'𝒕 𝒆𝒙𝒊𝒔𝒕 𝒂𝒏𝒚𝒎𝒐𝒓𝒆.",
                             show_alert=True)
            return
        opinion_id = c_q.matches[0].group(2)
        ids = c_q.from_user.id
        counter = c_q.matches[0].group(1)
        with open(PATH) as f:
            data = ujson.load(f)
        view_data = data[str(opinion_id)]
        agree_data = "👍"
        disagree_data = "👎"

        if len(view_data) == 2:
            if str(ids) in view_data[0]:
                if view_data[0][str(ids)] == "y" and counter == "y":
                    await c_q.answer("Already Voted for 👍", show_alert=True)
                    return
                if view_data[0][str(ids)] == "n" and counter == "n":
                    await c_q.answer("Already Voted for 👎", show_alert=True)
                    return
                # Answering Query First then moving forward
                choice = _choice(counter)
                await c_q.answer(f"You Choose  {choice}", show_alert=False)
                #
                if view_data[0][str(ids)] == "y" and counter == "n":
                    agree = int(view_data[1]["agree"]) - 1
                    disagree = int(view_data[1]["disagree"]) + 1
                    view_data[1] = {"agree": agree, "disagree": disagree}
                    view_data[0][str(ids)] = "n"
                if view_data[0][str(ids)] == "n" and counter == "y":
                    agree = int(view_data[1]["agree"]) + 1
                    disagree = view_data[1]["disagree"] - 1
                    view_data[1] = {"agree": agree, "disagree": disagree}
                    view_data[0][str(ids)] = "y"
            else:
                # Answering Query First then moving forward
                choice = _choice(counter)
                await c_q.answer(f"You Choose {choice}", show_alert=False)
                #
                new_id = {ids: counter}
                view_data[0].update(new_id)
                if counter == "y":
                    agree = view_data[1]["agree"] + 1
                    disagree = view_data[1]["disagree"]
                if counter == "n":
                    agree = view_data[1]["agree"]
                    disagree = view_data[1]["disagree"] + 1
                view_data[1] = {"agree": agree, "disagree": disagree}
            data[str(opinion_id)] = view_data
            with open(PATH, "w") as outfile:
                ujson.dump(data, outfile)
        else:
            if len(view_data) == 1:
                # Answering Query First then moving forward
                choice = _choice(counter)
                await c_q.answer(f"You Choose  {choice}", show_alert=False)
                if counter == "y":
                    view_data = [{ids: "y"}, {"agree": 1, "disagree": 0}]
                if counter == "n":
                    view_data = [{ids: "n"}, {"agree": 0, "disagree": 1}]
                data[str(opinion_id)] = view_data
                with open(PATH, "w") as outfile:
                    ujson.dump(data, outfile)
        agree_data += f"  {view_data[1]['agree']}"
        disagree_data += f"  {view_data[1]['disagree']}"
        opinion_data = [
            [
                InlineKeyboardButton(agree_data,
                                     callback_data=f"op_y_{opinion_id}"),
                InlineKeyboardButton(disagree_data,
                                     callback_data=f"op_n_{opinion_id}"),
            ],
            [
                InlineKeyboardButton("📊 Stats",
                                     callback_data=f"opresult_{opinion_id}")
            ],
        ]
        try:
            await c_q.edit_message_reply_markup(
                reply_markup=InlineKeyboardMarkup(opinion_data))
        except FloodWait as e:
            await asyncio.sleep(e.x)
        except BadRequest:
            return
Exemplo n.º 59
0
def adapter(json_thing):
    response = Response(content_type="application/json")
    # We'll write to the response body "file" in hopes that it's faster than a
    # huge standard string.  Absolutely 100% untested and uninvestigated.
    ujson.dump(json_thing, response.body_file)
    return response
Exemplo n.º 60
0
 def save_json(self, fname):
     with open(os.path.join(self.quantitative_dir, fname), 'w') as fp:
         ujson.dump(self.agent_data, fp, sort_keys=True, indent=4)