def POST(self, userurlkey=None): url=web.data() if len(url) > config.MAX_LEN_URL: web.ctx.status="400 Bad request" return "url too long" if userurlkey is not None: print userurlkey if len(userurlkey) < config.MIN_LEN_USERURLKEY: web.ctx.status="400 Bad request" return "key too short" if len(userurlkey) > config.MAX_LEN_URLKEY: web.ctx.status="400 Bad request" return "key too long" userurlkey = str.lower(utils.encode_string(userurlkey)) url=utils.encode_string(url) if False == utils.check_url(url): web.ctx.status="400 Bad request" return "bad url" ret, n_affected, urlkey = model.url_new(url, userurlkey) if ret != 0: return web.internalerror("db error") retval = { "is_created": (n_affected==1 or True and False), "key": urlkey } web.ctx.status="200 OK" return json.dumps(retval)
def object_create(self, res_type, obj_id, obj_dict): obj_type = res_type.replace('-', '_') obj_class = self._get_resource_class(obj_type) # Gather column values for obj and updates to backrefs # in a batch and write it at the end bch = self._obj_uuid_cf.batch() obj_cols = {} obj_cols['fq_name'] = json.dumps(obj_dict['fq_name']) obj_cols['type'] = json.dumps(obj_type) if 'parent_type' in obj_dict: # non config-root child parent_type = obj_dict['parent_type'] parent_method_type = parent_type.replace('-', '_') parent_fq_name = obj_dict['fq_name'][:-1] obj_cols['parent_type'] = json.dumps(parent_type) parent_uuid = self.fq_name_to_uuid(parent_method_type, parent_fq_name) self._create_child(bch, parent_method_type, parent_uuid, obj_type, obj_id) # Properties for prop_field in obj_class.prop_fields: field = obj_dict.get(prop_field) if field is None: continue if prop_field == 'id_perms': field['created'] = datetime.datetime.utcnow().isoformat() field['last_modified'] = field['created'] self._create_prop(bch, obj_id, prop_field, field) # References # e.g. ref_field = 'network_ipam_refs' # ref_type = 'network-ipam' # ref_link_type = 'VnSubnetsType' # is_weakref = False for ref_field in obj_class.ref_fields: ref_type, ref_link_type, _ = obj_class.ref_field_types[ref_field] refs = obj_dict.get(ref_field, []) for ref in refs: ref_uuid = self.fq_name_to_uuid(ref_type, ref['to']) ref_attr = ref.get('attr') ref_data = {'attr': ref_attr, 'is_weakref': False} self._create_ref(bch, obj_type, obj_id, ref_type.replace('-', '_'), ref_uuid, ref_data) bch.insert(obj_id, obj_cols) bch.send() # Update fqname table fq_name_str = ':'.join(obj_dict['fq_name']) fq_name_cols = { utils.encode_string(fq_name_str) + ':' + obj_id: json.dumps(None) } self._obj_fq_name_cf.insert(obj_type, fq_name_cols) return (True, '')
def read_character_information(html): reponse_character_data = {} pattern = re.compile(r'Character Information') character_data = html.find(text=pattern).parent.parent.parent.parent for data in character_data: text = encode_string(data.get_text()) two_points_index = text.find(':') reponse_character_data[camelize( text[:two_points_index])] = text[two_points_index + 1:] return reponse_character_data
def fq_name_to_uuid(self, obj_type, fq_name): method_name = obj_type.replace("-", "_") fq_name_str = ":".join(fq_name) col_start = "%s:" % (utils.encode_string(fq_name_str)) col_fin = "%s;" % (utils.encode_string(fq_name_str)) try: col_info_iter = self._obj_fq_name_cf.xget(method_name, column_start=col_start, column_finish=col_fin) except pycassa.NotFoundException: raise NoIdError("%s %s" % (obj_type, fq_name)) col_infos = list(col_info_iter) if len(col_infos) == 0: raise NoIdError("%s %s" % (obj_type, fq_name)) for (col_name, col_val) in col_infos: obj_uuid = col_name.split(":")[-1] return obj_uuid
def _object_create(self, res_type, obj_ids, obj_dict): obj_type = res_type.replace('-', '_') obj_class = self._get_resource_class(obj_type) # Gather column values for obj and updates to backrefs # in a batch and write it at the end bch = self._obj_uuid_cf.batch() obj_cols = {} obj_cols['fq_name'] = json.dumps(obj_dict['fq_name']) obj_cols['type'] = json.dumps(obj_type) if 'parent_type' in obj_dict: # non config-root child parent_type = obj_dict['parent_type'] parent_method_type = parent_type.replace('-', '_') parent_fq_name = obj_dict['fq_name'][:-1] obj_cols['parent_type'] = json.dumps(parent_type) parent_uuid = self.fq_name_to_uuid(parent_method_type, parent_fq_name) self._create_child(bch, parent_method_type, parent_uuid, obj_type, obj_ids['uuid']) # Properties for prop_field in obj_class.prop_fields: field = obj_dict.get(prop_field) if field is None: continue if prop_field == 'id_perms': field['created'] = datetime.datetime.utcnow().isoformat() field['last_modified'] = field['created'] self._create_prop(bch, obj_ids['uuid'], prop_field, field) # References # e.g. ref_field = 'network_ipam_refs' # ref_type = 'network-ipam' # ref_link_type = 'VnSubnetsType' # is_weakref = False for ref_field in obj_class.ref_fields: ref_type, ref_link_type, _ = obj_class.ref_field_types[ref_field] refs = obj_dict.get(ref_field, []) for ref in refs: ref_uuid = self.fq_name_to_uuid(ref_type, ref['to']) ref_attr = ref.get('attr') ref_data = {'attr': ref_attr, 'is_weakref': False} self._create_ref(bch, obj_type, obj_ids['uuid'], ref_type.replace('-', '_'), ref_uuid, ref_data) bch.insert(obj_ids['uuid'], obj_cols) bch.send() # Update fqname table fq_name_str = ':'.join(obj_dict['fq_name']) fq_name_cols = {utils.encode_string(fq_name_str) + ':' + obj_ids['uuid']: json.dumps(None)} self._obj_fq_name_cf.insert(obj_type, fq_name_cols) return (True, '')
def fq_name_to_uuid(self, obj_type, fq_name): method_name = obj_type.replace('-', '_') fq_name_str = ':'.join(fq_name) col_start = '%s:' % (utils.encode_string(fq_name_str)) col_fin = '%s;' % (utils.encode_string(fq_name_str)) try: col_info_iter = self._obj_fq_name_cf.xget( method_name, column_start=col_start, column_finish=col_fin) except pycassa.NotFoundException: raise NoIdError('%s %s' % (obj_type, fq_name)) col_infos = list(col_info_iter) if len(col_infos) == 0: raise NoIdError('%s %s' % (obj_type, fq_name)) for (col_name, col_val) in col_infos: obj_uuid = col_name.split(':')[-1] return obj_uuid
def POST(self, urlkey): url=web.data() if len(url) > config.MAX_LEN_URL: web.ctx.status="400 Bad request" return "url too long" if urlkey is not None: if len(urlkey) > config.MAX_LEN_URLKEY: web.ctx.status="400 Bad request" return "key too long" urlkey = str.lower(utils.encode_string(urlkey)) url=utils.encode_string(url) if False == utils.check_url(url): web.ctx.status="400 Bad request" return "bad url" ret, n_affected = model.url_modify(urlkey, url) if ret != 0: return web.internalerror("db error") ret_val={ 'n_affected': n_affected} return json.dumps(ret_val)
def read_character_death_information(html): response_character_data = [] pattern = re.compile(r'Character Deaths') character_data = html.find(text=pattern).parent.parent.parent.parent for data in character_data: text = encode_string(data.get_text()) cet_index = text.find('CET') if cet_index > 1: response_character_data.append({ 'date': text[:cet_index + 3], 'killed_by_message': text[cet_index + 3:] }) return response_character_data
def object_delete(self, res_type, obj_uuid): obj_type = res_type.replace('-', '_') obj_class = self._get_resource_class(obj_type) obj_uuid_cf = self._obj_uuid_cf try: fq_name = json.loads( obj_uuid_cf.get(obj_uuid, columns=['fq_name'])['fq_name']) except pycassa.NotFoundException: raise NoIdError(obj_uuid) bch = obj_uuid_cf.batch() # unlink from parent col_start = 'parent:' col_fin = 'parent;' col_name_iter = obj_uuid_cf.xget( obj_uuid, column_start=col_start, column_finish=col_fin) for (col_name, col_val) in col_name_iter: (_, parent_type, parent_uuid) = col_name.split(':') self._delete_child( bch, parent_type, parent_uuid, obj_type, obj_uuid) # remove refs col_start = 'ref:' col_fin = 'ref;' col_name_iter = obj_uuid_cf.xget( obj_uuid, column_start=col_start, column_finish=col_fin) for (col_name, col_val) in col_name_iter: (_, ref_type, ref_uuid) = col_name.split(':') self._delete_ref(bch, obj_type, obj_uuid, ref_type, ref_uuid) # remove link from relaxed back refs col_start = 'relaxbackref:' col_fin = 'relaxbackref;' col_name_iter = obj_uuid_cf.xget( obj_uuid, column_start=col_start, column_finish=col_fin) for (col_name, col_val) in col_name_iter: (_, backref_uuid) = col_name.split(':') self._delete_ref(bch, None, backref_uuid, obj_type, obj_uuid) bch.remove(obj_uuid) bch.send() # Update fqname table fq_name_str = ':'.join(fq_name) fq_name_col = utils.encode_string(fq_name_str) + ':' + obj_uuid self._obj_fq_name_cf.remove(obj_type, columns = [fq_name_col]) return (True, '')
def read_online_players(html): response_character_online = [] reference_content = html.findAll("tr", {"class": "LabelH"}) online_players_table = reference_content[0].parent.find_all('tr') for playerRow in online_players_table: character_data_dict = {} count = 0 cols = playerRow.find_all('td') for characterData in cols: text = encode_string(characterData.get_text()) if count == 0: character_data_dict['name'] = text elif count == 1: character_data_dict['level'] = text elif count == 2: character_data_dict['vocation'] = text response_character_online.append(character_data_dict) count += 1 return response_character_online
def object_delete(self, res_type, obj_uuid): obj_type = res_type.replace('-', '_') obj_class = self._get_resource_class(obj_type) obj_uuid_cf = self._obj_uuid_cf try: fq_name = json.loads( obj_uuid_cf.get(obj_uuid, columns=['fq_name'])['fq_name']) except pycassa.NotFoundException: raise NoIdError(obj_uuid) bch = obj_uuid_cf.batch() # unlink from parent col_start = 'parent:' col_fin = 'parent;' col_name_iter = obj_uuid_cf.xget(obj_uuid, column_start=col_start, column_finish=col_fin) for (col_name, col_val) in col_name_iter: (_, parent_type, parent_uuid) = col_name.split(':') self._delete_child(bch, parent_type, parent_uuid, obj_type, obj_uuid) # remove refs col_start = 'ref:' col_fin = 'ref;' col_name_iter = obj_uuid_cf.xget(obj_uuid, column_start=col_start, column_finish=col_fin) for (col_name, col_val) in col_name_iter: (_, ref_type, ref_uuid) = col_name.split(':') self._delete_ref(bch, obj_type, obj_uuid, ref_type, ref_uuid) bch.remove(obj_uuid) bch.send() # Update fqname table fq_name_str = ':'.join(fq_name) fq_name_col = utils.encode_string(fq_name_str) + ':' + obj_uuid self._obj_fq_name_cf.remove(obj_type, columns=[fq_name_col]) return (True, '')
def object_create(self, res_type, obj_id, obj_dict): obj_type = res_type.replace('-', '_') obj_class = self._get_resource_class(obj_type) # Gather column values for obj and updates to backrefs # in a batch and write it at the end bch = self._obj_uuid_cf.batch() obj_cols = {} obj_cols['fq_name'] = json.dumps(obj_dict['fq_name']) obj_cols['type'] = json.dumps(obj_type) if 'parent_type' in obj_dict: # non config-root child parent_type = obj_dict['parent_type'] parent_method_type = parent_type.replace('-', '_') parent_fq_name = obj_dict['fq_name'][:-1] obj_cols['parent_type'] = json.dumps(parent_type) parent_uuid = self.fq_name_to_uuid(parent_method_type, parent_fq_name) self._create_child(bch, parent_method_type, parent_uuid, obj_type, obj_id) # Properties for prop_field in obj_class.prop_fields: field = obj_dict.get(prop_field) # Specifically checking for None if field is None: continue if prop_field == 'id_perms': field['created'] = datetime.datetime.utcnow().isoformat() field['last_modified'] = field['created'] if prop_field in obj_class.prop_list_fields: # store list elements in list order # iterate on wrapped element or directly or prop field if obj_class.prop_list_field_has_wrappers[prop_field]: wrapper_field = field.keys()[0] list_coll = field[wrapper_field] else: list_coll = field for i in range(len(list_coll)): self._add_to_prop_list( bch, obj_id, prop_field, list_coll[i], str(i)) elif prop_field in obj_class.prop_map_fields: # iterate on wrapped element or directly or prop field if obj_class.prop_map_field_has_wrappers[prop_field]: wrapper_field = field.keys()[0] map_coll = field[wrapper_field] else: map_coll = field map_key_name = obj_class.prop_map_field_key_names[prop_field] for map_elem in map_coll: map_key = map_elem[map_key_name] self._set_in_prop_map( bch, obj_id, prop_field, map_elem, map_key) else: self._create_prop(bch, obj_id, prop_field, field) # References # e.g. ref_field = 'network_ipam_refs' # ref_type = 'network-ipam' # ref_link_type = 'VnSubnetsType' # is_weakref = False for ref_field in obj_class.ref_fields: ref_type, ref_link_type, _ = obj_class.ref_field_types[ref_field] refs = obj_dict.get(ref_field, []) for ref in refs: ref_uuid = self.fq_name_to_uuid(ref_type, ref['to']) ref_attr = ref.get('attr') ref_data = {'attr': ref_attr, 'is_weakref': False} self._create_ref(bch, obj_type, obj_id, ref_type.replace('-', '_'), ref_uuid, ref_data) bch.insert(obj_id, obj_cols) bch.send() # Update fqname table fq_name_str = ':'.join(obj_dict['fq_name']) fq_name_cols = {utils.encode_string(fq_name_str) + ':' + obj_id: json.dumps(None)} self._obj_fq_name_cf.insert(obj_type, fq_name_cols) return (True, '')
for i, row in enumerate(x): row = list(row) target = row.pop() d = {4: 0, 5: 0, 6: 0} for key, t_row in t_rows.items(): if i not in t_row: d[predict(roots[key], row, t_columns[key])] += 1 if max(d.values()) != 0: total += 1 if max(d.items(), key=operator.itemgetter(1))[0] == target: cnt += 1 print("accuracy ", cnt / total) """ Predictions are made on the testing set using all the trees """ f = open('prediction.csv', 'w') f.write("id,class\n") y = load_data('testing.csv') for row in y: idx = row[0] seq = encode_string(row[1]) d = {4: 0, 5: 0, 6: 0} for i, root in enumerate(roots): d[predict(root, seq, t_columns[i])] += 1 prediction = decode(max(d.items(), key=operator.itemgetter(1))[0]) f.write(str(idx) + "," + prediction + "\n") f.close()
def read_guild_information(html): guild_data = {} guild_information = html.find('div', {'id': 'GuildInformationContainer'}) guild_data['guild_information'] = encode_string( guild_information.get_text()) return guild_data
ax2.set_xlabel("Word Offset") plt.savefig('key_words_fig.pdf', format='pdf') if __name__ == '__main__': if len(sys.argv) > 1: text = utils.get_text(sys.argv[1]) if len(sys.argv) > 2: print summarize(text, int(sys.argv[2])) else: print summarize(text) while True: is_drawing = raw_input("Want to draw key words dispersion plot?[y/n]:") if is_drawing == "y" or is_drawing == "Y": keywords_ranking = word_ranking(text).most_common(15) words, scores = list(zip(*keywords_ranking)) x, y = dispersion(text, words) words = [utils.encode_string(w) for w in words] draw(words, scores, x, y) break elif is_drawing == "n" or is_drawing == "N": break else: print("Incorrect command.") sys.exit(0) else: print('There is no text to summarize') sys.exit(1)