Пример #1
0
def json_to_object(entity_cls, json_str):
    if isinstance(entity_cls, object):
        entity = entity_cls()
        json_dict = JSONDecoder().decode(json_str)
        for key, value in json_dict.items():
            if hasattr(entity, key):
                setattr(entity, key, value)
        return entity
    print None
Пример #2
0
def load_states():
    """Return a dictionary from state names to lists of polygons.

    >>> len(load_states()['HI'])  # Hawaii has 5 islands
    5
    """
    with open(DATA_PATH + 'states.json', encoding='utf8') as json_data_file:
        states = JSONDecoder().decode(json_data_file.read())
    return {state: format_shapes(shapes) for state, shapes in states.items()}
Пример #3
0
def load_states():
    """Return a dictionary from state names to lists of polygons.

    >>> len(load_states()['HI'])  # Hawaii has 5 islands
    5
    """
    with open(DATA_PATH + 'states.json', encoding='utf8') as json_data_file:
        states = JSONDecoder().decode(json_data_file.read())
    return {state: format_shapes(shapes) for state, shapes in states.items()}
def main(args):
    vector_space = {}
    file_list_vs = open("vector_space.txt", "r")
    vector_space = JSONDecoder().decode(file_list_vs.read())
    file_list_vs.close()

    documents = {}
    file_list_documents = open("doc_wt_sw.txt", "r")
    documents = JSONDecoder().decode(file_list_documents.read())
    file_list_documents.close()

    dic = []
    file_dic = open("words.txt", "r")
    dic = JSONDecoder().decode(file_dic.read())
    file_dic.close()

    tfidf = {}
    inv_frec_vector = []
    doc_lenght = len(vector_space)

    t0 = time()

    for word in dic:
        count = 0
        for document in documents.values():
            if word in document:
                count += 1
        inv_frec = log(doc_lenght / count)
        inv_frec_vector.append(inv_frec)

    print("done in %0.3fs." % (time() - t0))

    t0 = time()

    for key, value in vector_space.items():
        newtable = []
        for id, ter_frec in enumerate(value):
            eq = 0
            if ter_frec > 0:
                eq = ter_frec * inv_frec_vector[id]
            newtable.append(eq)
        tfidf[key] = newtable

    print("done in %0.3fs." % (time() - t0))

    file = open("tfidf.txt", "w")
    file.write(JSONEncoder().encode(tfidf))
    file.close()

    file = open("inv_frec_vector.txt", "w")
    file.write(JSONEncoder().encode(inv_frec_vector))
    file.close()
Пример #5
0
def load_states():
    """Load the coordinates of all the state outlines and return them 
    in a dictionary, from names to shapes lists.
    >>> len(load_states()['HI'])  # Hawaii has 5 islands
    5
    """
    json_data_file = open("data/states.json", encoding='utf8')
    states = JSONDecoder().decode(json_data_file.read())
    for state, shapes in states.items():
        for index, shape in enumerate(shapes):
            if type(shape[0][0]) == list:  # the shape is a single polygon
                assert len(shape) == 1, 'Multi-polygon shape'
                shape = shape[0]
            shapes[index] = [make_position(*reversed(pos)) for pos in shape]
    return states
Пример #6
0
def load_states():
    """Load the coordinates of all the state outlines and return them 
    in a dictionary, from names to shapes lists.
    >>> len(load_states()['HI'])  # Hawaii has 5 islands
    5
    """
    json_data_file = open("data/states.json", encoding='utf8')
    states = JSONDecoder().decode(json_data_file.read())
    for state, shapes in states.items():
        for index, shape in enumerate(shapes):
            if type(shape[0][0]) == list:  # the shape is a single polygon
                assert len(shape) == 1, 'Multi-polygon shape'
                shape = shape[0]
            shapes[index] = [make_position(*reversed(pos)) for pos in shape]
    return states
Пример #7
0
def load_states():
    """Load the coordinates of all the state outlines and return them
    in a database, from names to shapes lists.

    >>> len(get_value_from_key(load_states(), 'HI'))  # Hawaii has 5 islands
    5
    """
    json_data_file = open(DATA_PATH + 'states.json', encoding='utf8')
    states = JSONDecoder().decode(json_data_file.read())
    states_database = make_database()
    for state, shapes in states.items():
        states_database = add_value(states_database, state, shapes)
    for state, shapes in get_items(states_database):
        for index, shape in enumerate(shapes):
            if type(shape[0][0]) == list:  # the shape is a single polygon
                assert len(shape) == 1, 'Multi-polygon shape'
                shape = shape[0]
            shapes[index] = [make_position(*reversed(pos)) for pos in shape]
    return states_database
Пример #8
0
def load_states():
    """Load the coordinates of all the state outlines and return them
    in a database, from names to shapes lists.

    >>> len(get_value_from_key(load_states(), 'HI'))  # Hawaii has 5 islands
    5
    """
    json_data_file = open(DATA_PATH + 'states.json', encoding='utf8')
    states = JSONDecoder().decode(json_data_file.read())
    states_database = make_database()
    for state, shapes in states.items():
        states_database = add_value(states_database, state, shapes)
    for state, shapes in get_items(states_database):
        for index, shape in enumerate(shapes):
            if type(shape[0][0]) == list:  # the shape is a single polygon
                assert len(shape) == 1, 'Multi-polygon shape'
                shape = shape[0]
            shapes[index] = [make_position(*reversed(pos)) for pos in shape]
    return states_database
Пример #9
0
n_topics = 10
n_top_words = 50

t0 = time()
print("Loading dataset and extracting TF-IDF features...")

file_documents = open("documents.txt", "r")
documents = JSONDecoder().decode(file_documents.read())
file_documents.close()

file_stopwords = open("stopwords.txt", "r", errors="replace")
stopwords = file_stopwords.read().split()
file_stopwords.close()

dataset = []
for key, value in documents.items():
    dataset.append(value)

vectorizer = TfidfVectorizer(
    max_df=0.95, min_df=2, max_features=n_features, stop_words=stopwords)

tfidf = vectorizer.fit_transform(dataset[:n_samples])

print("done in %0.3fs." % (time() - t0))
print()

print("Fitting the NMF model with n_samples=%d and n_features=%d..." %
      (n_samples, n_features))

nmf = NMF(n_components=n_topics, random_state=1).fit(tfidf)
Пример #10
0
class EventData:
    def __init__(self, file_name, start_date, end_date, event_type):
        self.__dataFile = file_name
        if os.path.isfile(file_name):
            fr = open(file_name)
            json_str = fr.read()
            if json_str is None or json_str == "":
                self.__data = {}
            else:
                self.__data = JSONDecoder().decode(json_str)
            fr.close()
        else:
            self.__data = {}
        self.__startDate = start_date
        self.__endDate = end_date
        self.__type = TYPE_MILEAGE if not event_type else event_type
        self.numDays = 1 + (self.__endDate - self.__startDate).days
        self.numWeeks = self.numDays / 7

    def register_athlete(self, auth_res):
        """
    Add an athlete
    """
        athlete = auth_res['athlete']

        if str(athlete['id']) in self.__data: return
        entry = {
            'first_name': athlete['firstname'],
            'last_name': athlete['lastname'],
            'gender': athlete['sex'],
            'access_token': auth_res['access_token'],
            'refresh_token': auth_res['refresh_token'],
            'token_expires_at': auth_res['expires_at'],
            'activities': [None for i in range(self.numDays)],
            'weekly_scores': [0 for i in range(self.numWeeks)]
        }
        self.__data[str(athlete['id'])] = entry

    '''
  def register_athlete(self, auth_res, strava_club_members):
    """
    Register an athlete to this event
    """
    athlete = auth_res['athlete']
    
    for member in strava_club_members:
      if member['firstname'].strip().lower() == first_name.strip().lower() and \
           member['lastname'].strip().lower() == last_name.strip().lower():
        self.add_athlete(first_name.strip(), last_name.strip(), strava_id)
        return True
    return False
   '''

    def get_current_week_idx(self, time_zone='UTC'):
        today = datetime.now(timezone(time_zone)).date()
        ret = (today - self.__startDate).days / 7
        return min(max(ret, 0), self.numWeeks)

    def update_weekly_streak_scores(self, week_idx):
        """
    Calculate weekly running streak score for all athletes
    """
        if week_idx >= self.numWeeks: return
        for k, v in self.__data.items():
            activities = v['activities']
            base_score = 0
            penalty = 0
            drought = 0
            for i in range(7):
                if activities[week_idx * 7 + i] is None:
                    drought += 1
                else:
                    base_score += 1
                    if (drought > 0): penalty += (drought - 1)
                    drought = 0
            if (drought > 0):
                penalty += (drought - 1)
            score = min(max(base_score - penalty, 0), 6)
            v['weekly_scores'][week_idx] = score

    def update_weekly_mileages(self, week_idx):
        """
    Calculate weekly mileages for all athletes
    """
        if week_idx >= self.numWeeks: return
        for k, v in self.__data.items():
            activities = v['activities']
            mileages = 0.0
            for i in range(7):
                if activities[week_idx * 7 + i] is not None:
                    for a_id, m in activities[week_idx * 7 + i].items():
                        mileages += float(m)
            v['weekly_scores'][week_idx] = round(mileages, 2)

    def update_weekly_scores(self, week_idx):
        if self.__type == TYPE_MILEAGE:
            self.update_weekly_mileages(week_idx)
        elif self.__type == TYPE_STREAK:
            self.update_weekly_streak_scores(week_idx)

    def get_weekly_data(self, week_idx):
        """
    Get the data of the given week index
    Including the distances on all 7 days and adjusted scores
    for all the participants
    """
        ret = []
        if week_idx >= self.numWeeks or week_idx < 0:
            return ["Week:", []]

        week_start = self.__startDate + timedelta(week_idx * 7)
        week_end = week_start + timedelta(6)
        week_str = 'Week: {0.month}/{0.day}/{0.year} - {1.month}/{1.day}/{1.year}'.format(
            week_start, week_end)
        for k, v in self.__data.items():
            workouts = v['activities'][7 * week_idx:7 * week_idx + 7]
            workouts_stats = []
            for x in workouts:
                if x:
                    distance = 0.0
                    for i, m in x.items():
                        distance += m
                    workouts_stats.append("{0:.1f}".format(distance))
                else:
                    workouts_stats.append('')
            total_score = sum(Decimal(i) for i in v['weekly_scores'])
            if self.__type == TYPE_MILEAGE:
                total_score = round(total_score, 2)
            entry = {
                'name': v['first_name'] + ' ' + v['last_name'],
                'workouts': workouts_stats,
                'score': v['weekly_scores'][week_idx],
                'total_score': total_score
            }
            ret.append(entry)
        return [week_str, ret]

    def update_activities(self, strava_obj, auth, time_zone):
        """
    Update athlete activities
    """
        for athlete_id, athlete_stats in self.__data.items():
            current_time = int(time.time())
            expires_at = int(athlete_stats['token_expires_at'])
            if expires_at - current_time <= 3600:
                auth_res = auth.refresh_token(athlete_stats['refresh_token'])
                if 'access_token' in auth_res:
                    athlete_stats['access_token'] = auth_res['access_token']
                    athlete_stats['token_expires_at'] = auth_res['expires_at']
                else:
                    return
            activities = strava_obj.listAthleteActivities(
                athlete_stats['access_token'], current_time,
                current_time - 360000, None, None)
            print(activities)
            for activity in activities:
                activity = process_activity(activity)
                self.add_activity(athlete_stats, activity, time_zone)

    def add_activity(self, athlete_stats, strava_activity, time_zone):
        """
    Add an activity
    An activity is considered invalid if
    Pace > 11 min/mile for a male athlete
    Pace > 12 min/mile for a female athlete
    If the activity is manual, add it to the pending queue
    """
        if strava_activity['manual']:
            return False

        activity_id = strava_activity['id']
        gender = athlete_stats['gender']
        distance = strava_activity['distance']
        avg_pace = strava_activity['avg_pace']
        activity_date = convert_datestr(strava_activity['start_date'],
                                        time_zone).date()
        if activity_date > self.__endDate or activity_date < self.__startDate:
            return False
        if (gender == "M" and avg_pace > 11.0) or \
          (gender == "F" and avg_pace > 12.0) or \
          distance < 3.0:
            return False

        activities = athlete_stats['activities']
        idx = (activity_date - self.__startDate).days
        if activities[idx] and str(activity_id) in activities[idx]:
            return False
        if activities[idx] is None:
            activities[idx] = {str(activity_id): distance}
        else:
            activities[idx][str(activity_id)] = distance
        return True

    def save_data(self):
        """
    Save the data to the file
    """
        fr = open(self.__dataFile, 'w')
        fr.write(JSONEncoder().encode(self.__data))
        fr.close()
Пример #11
0
    def __getitem__(self, index):
        try:
            return self.__class__.__base__.__getitem__(self, index)
        except KeyError:
            if len(index) != 1:
                raise
            if winVersion.major >= 6: # Vista or later.
                return "|".join("`u%04x " % (ord(index),))
            elif ord(index) < 0x10000:
                return "|".join("`u%04x" % (ord(index),))
            raise # Bopomofo IME on WinXP does not support characters outside the BMP.

with codecs.open(os.path.join(os.path.dirname(__file__), str("{B2F9C502-1742-11D4-9790-0080C882687E}.json")), encoding="UTF-8") as json_file:
    IME_json = json_file.read()
    IME_data_dict = JSONDecoder(object_pairs_hook=OrderedDict).decode(IME_json)
symb2gesture = _Symbol2KeyDict(IME_data_dict.items())

class Translator:
    layout_index = "ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄧㄨㄩㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦ ˊˇˋ˙"
    def __init__(self, default_mapping, special_rules=None, sepchar="|"):
        split = lambda s: list(s if sepchar == "" else s.split(sepchar))
        self.default_map = dict(zip(Translator.layout_index, (split(k) for k in default_mapping)))
        self.special_rules = []
        if special_rules is not None:
            for p in special_rules:
                self.special_rules.append((re.compile(p[0], re.U), split(p[1])))
    def convert(self, subject):
        try: # Single-character cases.
            return [symb2gesture[subject]]
        except:
            pass