def __init__(self, leadin='', parts=None, answers=None, values=None, number='', packet=None, tournament=None): self.leadin = leadin if parts: self.parts = parts else: self.parts = [] if answers: self.answers = answers else: self.answers = [] if values: self.values = values else: self.values = [] self.number = number self.packet = packet self.tournament = tournament self.leadin = re.sub(num_regex, '', self.leadin) self.leadin = re.sub(tb_regex, '', self.leadin) if self.leadin.startswith('<strong>'): self.leadin = '<strong>' + re.sub('^[\d]+\.[\s]*', '', self.leadin[8:]) self.leadin = '<strong>' + re.sub('^TB\.[\s]*', '', self.leadin[8:]) def clean_answer(ans): ans = ans.replace('<strong><em>', '<req>') ans = ans.replace('<em><strong>', '<req>') ans = ans.replace('</em></strong>', '</req>') ans = ans.replace('</strong></em>', '</req>') return ans self.answers = [clean_answer(answer) for answer in self.answers] self.answers_sanitized = [sanitize(answer) for answer in self.answers] self.leadin_sanitized = sanitize(self.leadin) self.parts_sanitized = [sanitize(part) for part in self.parts]
def prepare_html_file(self, html_file, skip_lines=0): with codecs.open(html_file, 'r', encoding='utf-8') as f: packet_contents = f.read() packet_contents = re.sub('<br />', '\n', packet_contents) packet_contents = map(lambda x: sanitize(x, valid_tags=['em', 'strong']), packet_contents.split('\n')) # Skip all of the packet information at the beginning first_line_with_number_index = 0 for line in packet_contents: if starts_with_number(line): break first_line_with_number_index += 1 packet_contents = packet_contents[first_line_with_number_index:] packet_contents = [x.strip() for x in packet_contents if sanitize(x).strip() != '' and len(x) > 20 and (not re.search('Tossups', x, flags=re.I)) and (not re.search('Bonuses', x, flags=re.I))] return packet_contents[skip_lines:]
def __init__(self, polynomial, divisor, verbose=True): # santize the input - remove spaces self.polynomial = utils.sanitize(polynomial) self.divisor = utils.sanitize(divisor) self.verbose = verbose if "x" not in self.polynomial: self.throw_no_x("polynomial") elif "x" not in self.divisor: self.throw_no_x("divisor") self.log_work("\n-----") self.log_work("Input:") self.log_work("\tPolynomial:\t", self.polynomial) self.log_work("\tDivisor:\t", self.divisor) self.log_work("Processing:") # get the coefficients of the polynomial orderedCoefs = self.process_polynomial() # divide the polynomial through long division results_divide = self.process_divisor(orderedCoefs) self.quotient, self.remainder = self.combine_result(results_divide) self.log_work("Result:") self.log_work("\tQuotient:\t", self.quotient) self.log_work("\tRemainder:\t", self.remainder) self.log_work("-----")
def parse_households(self, tr_elem): if 'households' not in tr_elem: return if type(tr_elem['households']) == type([]): tr_elem['households'] = sanitize(tr_elem['households'][0]) else: tr_elem['households'] = sanitize(tr_elem['households']) tr_elem['households'] = tr_elem['households'].replace(',', '') tr_elem['households'] = int(tr_elem['households'])
def parse_electorate(self, tr_elem): if 'electorates' not in tr_elem: return if type(tr_elem['electorates']) == type([]): tr_elem['electorates'] = sanitize(tr_elem['electorates'][0]) else: tr_elem['electorates'] = sanitize(tr_elem['electorates']) tr_elem['electorates'] = tr_elem['electorates'].replace(',', '') tr_elem['electorates'] = int(tr_elem['electorates'])
def parse_population(self, tr_elem): if 'population' not in tr_elem: return if type(tr_elem['population']) == type([]): #nth != 20 tr_elem['population'] = sanitize(tr_elem['population'][0]) else: tr_elem['population'] = sanitize(tr_elem['population']) tr_elem['population'] = tr_elem['population'].replace(',', '') tr_elem['population'] = int(tr_elem['population'])
def parse_counted_votes(self, consti): if 'counted_votes' not in consti: return if type(consti['counted_votes']) == type([]): #역대선거 consti['counted_votes'] = sanitize(consti['counted_votes'][0]) else: #최근선거 consti['counted_votes'] = sanitize(consti['counted_votes']) consti['counted_votes'] = consti['counted_votes'].replace(',', '') consti['counted_votes'] = int(consti['counted_votes'])
def parse_electorate(self, consti): if 'electorates' not in consti: return if type(consti['electorates']) == type([]): #역대선거 consti['electorates'] = sanitize(consti['electorates'][0]) else: #최근선거 consti['electorates'] = sanitize(consti['electorates']) consti['electorates'] = consti['electorates'].replace(',', '') consti['electorates'] = int(consti['electorates'])
def to_dict(self): return { "number": self.number, "formatted_text": self.text, "formatted_answer": self.answer, "text": sanitize(self.text, valid_tags=[]), "answer": sanitize(self.answer, valid_tags=[]), "category": self.category, "subcategory": self.subcategory, "tournament": self.tournament, "round": self.round }
def to_dict(self): return { "number": self.number, "leadin": self.leadin, "formatted_texts": self.texts, "formatted_answers": self.answers, "texts": map(lambda x: sanitize(x, valid_tags=[]), self.texts), "answers": map(lambda x: sanitize(x, valid_tags=[]), self.answers), "category": self.category, "subcategory": self.subcategory, "tournament": self.tournament, "round": self.round }
def to_dict(self): return { "number": self.number, "leadin": self.leadin, "formatted_texts": self.texts, "formatted_answers": self.answers, "texts": [sanitize(t, valid_tags=[]) for t in self.texts], "answers": [sanitize(a, valid_tags=[]) for a in self.answers], "category": self.category, "subcategory": self.subcategory, "tournament": self.tournament, "round": self.round }
def plot_boundary(X, y, coefs, title='', save=False): x_min, x_max = np.min(X[:, 0]), np.max(X[:, 0]) y_min, y_max = np.min(X[:, 1]), np.max(X[:, 1]) offset = 1 boundary_x = np.linspace(x_min - offset, x_max + offset, 1000) boundary_y = [ (utils.logit(1 / 2) - coefs['b'] - coefs['w1'] * x) / coefs['w2'] for x in boundary_x ] plt.figure(figsize=(9, 9)) plt.plot(boundary_x, boundary_y, c='g', label='Decision boundary') plt.scatter(X[y == 0, 0], X[y == 0, 1], label='class 0') plt.scatter(X[y == 1, 0], X[y == 1, 1], label='class 1') plt.xlim(x_min - offset, x_max + offset) plt.ylim(y_min - offset, y_max + offset) plt.xlabel('x1') plt.ylabel('x2') plt.title(title) plt.legend(loc="upper right", fontsize=16) if save: print("Save figure results folder.") plt.savefig('results/{}'.format(utils.sanitize(title))) plt.show()
def view_format_compare(): result = {'errmsg':'', 'formatlist':[], 'class':""} if(request.method == "GET"): return render_template('format_compare.html', **result) else: cls = utils.sanitize(request.form['cls']) result['class'] = cls if(cls=='medic'): qrystr = """SELECT PF.format, AVG(PF.healsPerMin) as avg_HPM, AVG(CAST(PF.ubers AS decimal)/CAST(PF.drops AS decimal)) as avg_UD_rate FROM PlaysFormat PF WHERE class=%s AND PF.drops <> 0 GROUP BY PF.format;""" formatlist_ptr = g.conn.execute(qrystr, (cls)) for record in formatlist_ptr: print record result['formatlist'].append(dict(zip(['format', 'avg_hpm', 'avg_udrate'], [record[0], record[1], float(record[2])] ))) else: qrystr = """SELECT PF.format, AVG(PF.kad) as avg_KAD, AVG(PF.damagepermin) as avg_DPM FROM PlaysFormat AS PF WHERE class=%s AND PF.deaths <> 0 GROUP BY PF.format;""" formatlist_ptr = g.conn.execute(qrystr, (cls)) for record in formatlist_ptr: print record result['formatlist'].append(dict(zip(['format', 'avg_kad', 'avg_dpm'], record))) return render_template('format_compare.html', **result)
def onEvent(self, event): """ Callback function handling button presses will write figure to disk. :param event: button code :return: nothing """ if self.debug: print(("You pressed key {:s}".format(event.key))) filename = self.title + "_" + self.plotType + '.png' # Spaces in filenames are a nuisance. filename = sanitize(filename) try: if not os.path.exists(self.config.getOutputPath()): makedirs(self.config.getOutputPath()) except Exception as e: print(e) sys.exit(1) # Note: l, k, g, s and f are predefined keys # With them you can: # k: toggle between lin horizontal scale and log horizontal scale # l: toggle between lin vertical scale and log vertical scale # s: open save menu # f: toggle between standard size and full screen # any other key will make that the file is saved in its current dimensions. # To get a nice plot it is wise to maximise and then press any key. Then close # the window. path = self.config.getOutputPath() + os.path.sep + filename self.fig.savefig(path, bbox_inches=0) print('Figure was saved to:', path)
def do_parse(ruthless): try: html = deepcopy(self.html) for i in utils.tags(html, 'script', 'style'): i.drop_tree() for i in utils.tags(html, 'body'): i.set('id', 'readabilityBody') if ruthless: html = utils.remove_unlikely_candidates(html) html = utils.transform_misused_divs_into_paragraphs(html) candidates = utils.score_paragraphs(html) # first try to get an article article_node = utils.get_article_element(html) if article_node: best_candidate = article_node else: best_candidate = select_best_candidate(candidates) if best_candidate: # TODO: there was some logic here about retrying if the article wasn't long enough return utils.sanitize(utils.get_article(candidates, best_candidate), candidates) else: return None except StandardError, e: log.exception('error getting summary: ') raise Unparseable(str(e)), None, sys.exc_info()[2]
def build_state_shapes(self, outfile, raw_geodir='raw_geoshapes', raw_geofile='raw_shapes_state.json'): print "Building state shape files" shapefiles_dir = download_state_shapes() geofile_path = self.converter.to_geojson(raw_geofile=raw_geofile, raw_geodir=raw_geodir, shapefile_prefix='state', shapefiles_dir=shapefiles_dir) # Format results raw_geo_re = re.compile( '^\{.*?: \{.*?"STUSPS": "([^"]+)", "NAME": "([^"]+)".*?\}.*"geometry":\s*(\{.*?\})\s*\},*$') doc_template = '{"id": "%(id)s", "state": "%(state)s", "postal": "%(postal)s", "geometry": %(coordinates)s}\n' with open(outfile, 'a') as out: print "Formatting %s into output file %s" % (geofile_path, outfile) for line in fileinput.input(geofile_path): is_good_line = self.good_line_re.match(line) if is_good_line is None: continue m = raw_geo_re.match(line) (postal, state, id, coordinates) = (m.group(1), m.group(2), sanitize(m.group(2)), m.group(3)) data = { 'id': id, 'state': state, 'postal': postal, 'coordinates': coordinates } out.write(doc_template % data)
async def define(self, message, args): """Sets the description to a mapping Arguments: self {discordClient} -- Needed message {discordMessage} -- The actual message that invoked this command args {list[str]} -- Everything that is after the command """ guild_id = message.guild.id if not args: return await disc.error_message(message, title="Error", desc="No arguments were found") if len(args) <= 1: return await disc.error_message(message, title="Error", desc="No definition was given") name = utils.sanitize(args[0]) res = db.mappings_exists(guild_id, name) if not res: return await disc.send_message(message, title="Error", desc=f"{name} was not a valid mapping") definition = " ".join(args[1:]) db.mappings_def(guild_id, name, definition) await disc.send_message( message, title="Definition set", desc=f"The mapping {name} has been defined to `{definition}`")
def __init__(self, leadin='', parts=[], answers=[], values=[], number='', packet=None, tournament=None): self.leadin = leadin self.parts = parts self.answers = answers self.number = number self.values = values self.packet = packet self.tournament = tournament self.leadin = re.sub(num_regex, '', self.leadin) self.leadin = re.sub(tb_regex, '', self.leadin) if self.leadin.startswith('<strong>'): self.leadin = '<strong>' + re.sub('^[\d]+\.[\s]*', '', self.leadin[8:]) self.leadin = '<strong>' + re.sub('^TB\.[\s]*', '', self.leadin[8:]) def clean_answer(ans): ans = ans.replace('<strong><em>', '<req>') ans = ans.replace('<em><strong>', '<req>') ans = ans.replace('</em></strong>', '</req>') ans = ans.replace('</strong></em>', '</req>') return ans self.answers = map(clean_answer, self.answers) self.answers_sanitized = map(sanitize, self.answers) self.leadin_sanitized = sanitize(self.leadin) self.parts_sanitized = map(sanitize, self.parts)
async def unmap(self, message, args): """Unmaps a command, it removes the file and the mapping in the database Arguments: self {discordClient} -- Needed message {discordMessage} -- The actual message that invoked this command args {list[str]} -- Everything that is after the command """ guild_id = message.guild.id if not args: return await disc.error_message(message, title="Error", desc="No arguments were found") name = utils.sanitize(args[0]) res = db.mappings_exists(guild_id, name) if not res: return await disc.send_message(message, title="Error", desc=f"{name} was not a valid mapping") db.mappings_rm(guild_id, name) await disc.send_message(message, title="Success !", desc=f"You have successfully unmapped {name}")
def handle(self, *args, **options): if options['debug']: logging.basicConfig(level=logging.DEBUG) count = 0 with open(options['file'], 'r') as file: for word in file: Word(word=sanitize(word)).save() count += 1 if options['limit'] == count: break self.stdout.write('Successfully loaded {} words'.format(count))
def __init__(self, dataset, name, file_id=None): self.dataset = dataset self.name = sanitize(name) self.id = file_id self.download_url = 'http://{0}/api/access/datafile/{1}'.format( dataset.connection.host, self.id) self.edit_media_uri = 'https://{0}/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/{1}'.format( dataset.connection.host, self.id)
def get_results(query, sort_type='authority', beta=0.0): """Queries the database for Page objects matching a query, gets their HITS values, weights them by word frequency, and returns a list of sorted results. sort_type can be either authority or hubbiness beta is the weight of the HITS results vs. the word frequency when beta=0.0 HITS results are alll that matters and word frequency is multiplied by zero when beta=1.0 results are based solely on word frequency.""" terms = query.split() terms = [utils.sanitize(term).lower() for term in terms] # strips accents and punctuation pages = Page.objects.select_related('tag__word_count') for term in terms: pages = pages.filter(tag__tag=term) if pages: # links_query is a list of tuple links, where links_query[i] = (source_url, target_url) links_query = Link.objects.filter(source__in=pages).values_list('source__url', 'target__url') # now build the links dict to pass into HITS links = dict([(page.url, []) for page in pages]) for link in links_query: links[link[0]].append(link[1]) #[links[link[0]].append(link[1]) for link in links_query] # run HITS (authority, hubbiness) = HITS.HITS(links) # give a boost based on word frequency tags_query = Tag.objects.filter(page__in=pages).values_list('page__url', 'word_count', 'tag') # creates a dictionary of {page.url: [count of all terms in page, count of all tags in page] tags = dict([(page.url, [0, 0]) for page in pages]) for tag in tags_query: if tag[2] in terms: # term tags[tag[0]][0] += tag[1] # add to count of search terms in page tags[tag[0]][1] += tag[1] # add to count of all tags in page # assign the pages hubbiness and authority and weights with term frequency for page in pages: page.authority = (1 - beta) * authority[page.url] + beta * tags[page.url][0] / float(tags[page.url][1]) page.hubbiness = (1 - beta) * hubbiness[page.url] + beta * tags[page.url][0] / float(tags[page.url][1]) # sort the pages if sort_type == 'hubbiness': sorter = lambda page: (page.hubbiness, page.authority) else: sorter = lambda page: (page.authority, page.hubbiness) # set the results results = sorted(pages, key=sorter, reverse=True) else: results = [] return results
def load_html(self): with codecs.open(self.filename, 'r', encoding='utf-8') as f: lines = f.readlines() prepared_lines = [] for l in lines: split_lines = re.split('<br\s*/?>', l) for split_line in split_lines: sanitized_line = sanitize(split_line).strip() if is_valid_content(sanitized_line, strippable_lines_res=self.strippable_lines_res): prepared_lines.append(sanitized_line) return prepared_lines
def add(): try: thing = request.args.get("thing") except AttributeError: return "ERROR: That doesn't look right. URL param `thing` can't be empty." if (thing != "") and (thing != None) and (thing != " "): database.add(utils.sanitize(thing)) return "[✓] Thanks for reporting that Jeffrey dropped " + thing else: return "[⚠] ERROR: That doesn't look right. URL param `thing` can't be empty."
def __init__(self, dataset, name, file_id=None): self.dataset = dataset self.name = sanitize(name) self.id = file_id self.download_url = 'http://{0}/api/access/datafile/{1}'.format( dataset.connection.host, self.id ) self.edit_media_uri = 'https://{0}/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/{1}'.format( dataset.connection.host, self.id )
def parse_candi(self, candi): if self.candidate_type == 'party_candidate': # candi['name'] == <th><strong>새정치민주연합<br>박원순</strong></th> [candi['party_name_kr'], candi['name_kr']] = list(map(sanitize, candi['name'][:2])) del candi['name'] elif self.candidate_type == 'party_list': # candi['name'] == <th><strong>새정치민주연합</strong></th> candi['party_name_kr'] = sanitize(candi['name']) del candi['name'] elif self.candidate_type == 'independent_candidate': # candi['name'] == <th><strong>조희연</strong></th> candi['name_kr'] = sanitize(candi['name']) del candi['name'] else: raise NotImplementedError("잘못된 candidate_type이 들어옴: one of three, 'party_candidate', 'party_list', or 'independent_candidate'") [candi['votenum'], candi['voterate']] = list(map(sanitize, candi['vote'][:2])) candi['votenum'] = candi['votenum'].replace(',', '') candi['votenum'] = int(candi['votenum']) candi['voterate'] = float(candi['voterate']) del candi['vote']
def format_neighborhood_shapes(self, outfile): if isfile(outfile): print "%s already exists, skipping formatting geofile" % outfile return print "Building neighborhood shape files" shapefiles_dir = loader.download_neighborhood_shapes() raw_geofile = '%s/raw_shapes_neighborhood.json' % self.raw_geo_dir self.converter.to_geojson(outfile=raw_geofile, shapefile_prefix='neighborhood', shapefiles_dir=shapefiles_dir) # Format results raw_geo_re = re.compile( '^(\{.*?)"STATE":\s*"([^"]+)".*?"CITY":\s*"([^"]+)".\s*"NAME":\s*"([^"]+)".*?\}.*"geometry":\s*(\{.*?\})\s*\},*$') leading_zero_re = re.compile('([+-])0') doc_template = '{"id": "%(id)s", "state": "%(state)s", "city": "%(city)s", "neighborhood": "%(neighborhood)s", "center_lat": %(center_lat)s, "center_lon": %(center_lon)s, "geometry": %(coordinates)s}\n' with open(outfile, 'a') as out: print "Formatting %s into output file %s" % (raw_geofile, outfile) for line in fileinput.input(raw_geofile): is_good_line = self.good_line_re.match(line) if is_good_line is None: continue m = raw_geo_re.match(line) (neighborhood, city, state, coordinates) = (m.group(4), m.group(3), m.group(2), m.group(5)) id = sanitize("%s_%s_%s" % (neighborhood, city, state)) # hack because there's no center included in neighborhood shape file # we'll use the first coord we find as the center c = json.loads(coordinates)['coordinates'] while type(c) is list and len(c) and type(c[0]) is list: c = c[0] center_lat = c[1] center_lon = c[0] data = dict( id=id, neighborhood=neighborhood, city=city, state=state, center_lat=center_lat, center_lon=center_lon, coordinates=coordinates ) out.write(doc_template % data)
def add_action(): """ Ajouter une action """ store_file = STORE['actions'] label = request.json['label'] uri = create_uri(BASE_URI_ACTION + sanitize(label)) load_rdf_file(store_file) Action(resUri=uri, label=label) save_rdf_file(store_file) # On retourne une chaine vide pour renvoyer un code HTTP 200 return jsonify({'uri': uri})
def add_utensil(): """ Ajouter un ustensile """ store_file = STORE['utensils'] label = request.json['label'] actions = [create_uri(uri) for uri in request.json['actions']] uri = create_uri(BASE_URI_UTENSIL + sanitize(label)) load_rdf_file(store_file) Utensil(resUri=uri, label=label, actions=actions) save_rdf_file(store_file) return jsonify({'uri': uri})
def prepare_html_file(self, html_file, skip_lines=0): with codecs.open(html_file, 'r', encoding='utf-8') as f: packet_contents = f.read() packet_contents = re.sub('<br />', '', packet_contents) packet_contents = [sanitize(element, valid_tags=['em', 'strong']) for element in packet_contents.split('\n')] # import ipdb; ipdb.set_trace() tossups_start = None bonuses_start = None for i, item in enumerate(packet_contents): if re.search('Tossups', item, flags=re.I) and not tossups_start: tossups_start = i + 1 elif re.search('Bonuses', item, flags=re.I) and not bonuses_start: bonuses_start = i #print(tossups_start, bonuses_start) if tossups_start is not None and bonuses_start is not None: tossups = packet_contents[tossups_start:bonuses_start] bonuses = packet_contents[bonuses_start + 1:] packet_contents = tossups + bonuses #print(html_file, '\n', tossups[-1], '\n', bonuses[0], '\n', bonuses[-1]) #print(bonuses[0]) #print(bonuses[-1]) packet_contents = [x.strip() for x in packet_contents if sanitize(x).strip() != '' and len(x) > 20 and not x.strip() in ['Extra', 'Extras'] and not re.search('^(<.*>|<.*>)', x.strip())] with open(html_file, 'w') as f: for item in packet_contents: f.write(item + '\n') return packet_contents[skip_lines:]
def __init__(self, question='', answer='', number='', packet=None, tournament=None): self.question = question self.answer = answer self.number = number self.question = re.sub(num_regex, '', self.question) self.question = re.sub(tb_regex, '', self.question) if self.question.startswith('<strong>'): self.question = '<strong>' + re.sub('^[\d]+\.[\s]*', '', self.question[8:]) self.question = '<strong>' + re.sub('^TB\.[\s]*', '', self.question[8:]) self.answer = self.answer.replace('<strong><em>', '<req>') self.answer = self.answer.replace('<em><strong>', '<req>') self.answer = self.answer.replace('</em></strong>', '</req>') self.answer = self.answer.replace('</strong></em>', '</req>') self.answer_sanitized = sanitize(self.answer, []) self.question_sanitized = sanitize(self.question, []) self.packet = packet self.tournament = tournament
def _renameNote(self, rec, first_line): basename = sanitize(first_line).lower() newfilename = basename + '.txt' i = 1 while os.path.exists(os.path.join(self.path, newfilename)): newfilename = basename + '-%s' % i + '.txt' i += 1 npath = os.path.join(self.path, newfilename) opath = self._filepath(rec) if rec['filename'] and os.path.exists(opath): log('RENAME %s TO %s' % (opath, npath)) os.rename(opath, npath) log('NEW FILENAME: %s' % newfilename) return newfilename
def main(): # ensure proper input format from command line sanitize(argv) # initialize the dnaDB dict using the dictionary file identified in input dnaDB = initDict(argv, dbFile, tmp) # read the DNA sequence to test into memory (buffer) testFile = initFile(tmp, argv) # from the dnaDB, retrieve the names of all DNA strands we want to test the new sequence against getDnaNames(dnaNameList, dnaDB) # find the maximum number of sequences of the DNA strands that exist in the new sequence and add to a list getSequences(dnaNameList, results, testFile) # search the dnaDB for a match between the list of sequences and entries in the dict. # print the name of a match, if one exists findMatch(dnaDB, dnaNameList, results) # close the DictReader closeFile(dbFile)
def get_links(self, links): """Gets the links on the page and their link texts.""" for link in links: target = link.get('href', '') if target != '': target = urlparse.urljoin(self.url, target) # translate link to proper url target = self.standardize_url(target) # standardize url if self.is_valid_link(target): # get link text words = link.findAll(text=True) words = ' '.join(words).split() link_words = [utils.sanitize(word).lower() for word in words if len(word) != 0] # links are now dictionaries of url: list_of_words self.links[target].extend(link_words)
def resume_pushing(): pushing_tasks = Task.objects.filter(status='pushing') if pushing_tasks: pcs = ByPy() for pt in pushing_tasks: if pt.item.file_exist(): push_res = pcs.upload(pt.item.get_item_path(), sanitize(pt.item.title) + '.mp4') print 'push_res:' + str(push_res) if push_res == 0 or push_res == 60: pt.status = 'pushed' os.remove(pt.item.get_item_path()) else: pt.status = 'pushing_failed' pt.save()
def format_city_shapes(self, outfile): if isfile(outfile): print "%s already exists, skipping formatting geofile" % outfile return print "Building city shape files" shapefiles_dir = loader.download_city_shapes() raw_geofile = '%s/raw_shapes_city.json' % self.raw_geo_dir self.converter.to_geojson(outfile=raw_geofile, shapefile_prefix='city', shapefiles_dir=shapefiles_dir) # Format results raw_geo_re = re.compile( '^\{.*?\{.*?\s*"STATEFP":\s*"([^"]+)".*?NAME":\s*"([^"]+)".*?"INTPTLAT":\s*"([^"]+)".*?INTPTLON":\s*"([^"]+)".*?\}.*"geometry":\s*(\{.*?\})\s*\},*$') leading_zero_re = re.compile('([+-])0') doc_template = '{"id": "%(id)s", "state": "%(state)s", "city": "%(city)s", "center_lat": %(center_lat)s, "center_lon": %(center_lon)s, "geometry": %(coordinates)s}\n' with open(outfile, 'a') as out: print "Formatting %s into output file %s" % (raw_geofile, outfile) for line in fileinput.input(raw_geofile): is_good_line = self.good_line_re.match(line) if is_good_line is None: continue city_info = raw_geo_re.match(line) state_code = city_info.group(1) city = city_info.group(2) center_lat = leading_zero_re.sub(r'\1', city_info.group(3)).replace('+', '') center_lon = leading_zero_re.sub(r'\1', city_info.group(4)).replace('+', '') coordinates = city_info.group(5) state = state_codes[state_code] if state_code in state_codes else state_code id = sanitize("%s_%s" % (city, state)) data = dict( id=id, city=city, state=state, center_lat=center_lat, center_lon=center_lon, coordinates=coordinates ) out.write(doc_template % data)
def main(): parse_arguments() # Gets the Rhythmbox player from DBus bus = dbus.SessionBus() obj = bus.get_object("org.gnome.Rhythmbox", "/org/gnome/Rhythmbox/Player") player = dbus.Interface(obj, "org.gnome.Rhythmbox.Player") filename = sanitize(player.getPlayingUri()) # Prompts user for confirmation on removing the file if confirm(filename, title="Delete this file?"): player.next() call(["rm", filename]) notify("Removed", "\"" + filename + "\"", icon="user-trash-full")
def load_html(self): with codecs.open(self.filename, 'r', encoding='utf-8') as f: lines = f.readlines() prepared_lines = [] for l in lines: # HACK to fix a common case of an answer/text not being on a new line # by using how we split up line breaks ourselves hacked_l = re.sub('(ANSWER|\[10\])', r'<br>\1', l, re.I) split_lines = re.split('<br\s*/?>', hacked_l) for split_line in split_lines: sanitized_line = sanitize(split_line).strip() if is_valid_content( sanitized_line, strippable_lines_res=self.strippable_lines_res): prepared_lines.append(sanitized_line) return prepared_lines
def __init__(self, name, study, edit_media_uri=None, download_url=None): self.name = utils.sanitize(name) self.study = study if edit_media_uri: self.is_released = False self.edit_media_uri = edit_media_uri self.id = edit_media_uri.split('/')[-2] host = urlparse.urlparse(edit_media_uri).netloc self.download_url = 'http://{0}/dvn/FileDownload/?fileId={1}'.format(host, self.id) elif download_url: self.is_released = True self.download_url = download_url self.id = download_url.split('=')[-1] else: raise utils.DataverseException( 'Files must have an edit media uri or download url.' )
def get_content(self, soup): """Gets counts of all of the words on the page.""" # based on http://groups.google.com/group/beautifulsoup/browse_thread/thread/9f6278ee2a2e4564 # remove comments comments = soup(text=lambda text:isinstance(text, BeautifulSoup.Comment)) [comment.extract() for comment in comments] # remove javascript js = soup('script') [tag.extract() for tag in js] # count words! body = soup.body(text=True) title = soup.title(text=True) self.title = ' '.join(title).strip() if len(self.title) == 0: self.title = self.url words = ' '.join(body).split() + ' '.join(title).split() words = [utils.sanitize(word).lower() for word in words if len(word) != 0] self.word_counts += Counter(words)
def format_state_shapes(self, outfile): if isfile(outfile): print "%s already exists, skipping formatting geofile" % outfile return print "Building state shape files" shapefiles_dir = loader.download_state_shapes() raw_geofile = '%s/raw_shapes_state.json' % self.raw_geo_dir self.converter.to_geojson(outfile=raw_geofile, shapefile_prefix='state', shapefiles_dir=shapefiles_dir) # Format results raw_geo_re = re.compile( '^\{.*?: \{.*?"STUSPS": "([^"]+)", "NAME": "([^"]+)".*?\}.*"geometry":\s*(\{.*?\})\s*\},*$') doc_template = '{"id": "%(id)s", "state": "%(state)s", "postal": "%(postal)s", "geometry": %(coordinates)s}\n' with open(outfile, 'a') as out: print "Formatting %s into output file %s" % (raw_geofile, outfile) for line in fileinput.input(raw_geofile): is_good_line = self.good_line_re.match(line) if is_good_line is None: continue state_info = raw_geo_re.match(line) (postal, state, coordinates) = (state_info.group(1), state_info.group(2), state_info.group(3)) data = dict( id=sanitize(state), state=state, postal=postal, coordinates=coordinates ) out.write(doc_template % data)
def build_neighborhood_shapes(self, outfile, raw_geodir='raw_geoshapes', raw_geofile='raw_shapes_neighborhood.json'): print "Building neighborhood shape files" shapefiles_dir = download_neighborhood_shapes() geofile_path = self.converter.to_geojson(raw_geofile=raw_geofile, raw_geodir=raw_geodir, shapefile_prefix='neighborhood', shapefiles_dir=shapefiles_dir) # Format results raw_geo_re = re.compile( '^(\{.*?)"STATE":\s*"([^"]+)".*?"CITY":\s*"([^"]+)".\s*"NAME":\s*"([^"]+)".*?\}.*"geometry":\s*(\{.*?\})\s*\},*$') doc_template = '{"id": "%(id)s", "state": "%(state)s", "city": "%(city)s", "neighborhood": "%(neighborhood)s", "geometry": %(coordinates)s}\n' with open(outfile, 'a') as out: print "Formatting %s into output file %s" % (geofile_path, outfile) for line in fileinput.input(geofile_path): is_good_line = self.good_line_re.match(line) if is_good_line is None: continue m = raw_geo_re.match(line) (neighborhood, city, state, coordinates) = (m.group(4), m.group(3), m.group(2), m.group(5)) id = sanitize("%s_%s_%s" % (neighborhood, city, state)) data = { 'id': id, 'neighborhood': neighborhood, 'city': city, 'state': state, 'coordinates': coordinates } out.write(doc_template % data)
def view_league_compare(): #keys(request.args) = ['attr', 'attr_val', 'entity', 'results'] context = dict([]) context['data']= [] if(request.method == "POST"): cls = utils.sanitize(request.form['cls']) print 'cls = ' + cls qrystr = """SELECT TopPlayers.league, AVG(PlaysFormat.damagepermin) FROM PlaysFormat, (SELECT PlaysOn.player AS player,TopTeams.league AS league FROM PlaysOn, ( SELECT TD.team AS team, TD.league AS league FROM TeamDivision as TD, LeagueDivision as LD WHERE LD.rank=1 AND TD.league=LD.league AND TD.division = LD.division) AS TopTeams WHERE PlaysOn.team=TopTeams.team) AS TopPlayers WHERE PlaysFormat.player=TopPlayers.player AND PlaysFormat.class = %s AND PlaysFormat.damagePerMin IS NOT NULL GROUP BY TopPlayers.league;""" print qrystr result = g.conn.execute(qrystr, (cls)) for record in result: print record context['data'].append({'league':str(record[0]), 'val':str(record[1])}) return render_template('league_compare.html', **context)
async def send(self, message, args): """Sends the image / text to discord Arguments: self {discordClient} -- Needed message {discordMessage} -- The actual message that invoked this command args {list[str]} -- Everything that is after the command """ guild_id = message.guild.id if not args: return send = utils.sanitize(args[0]) res = db.mappings_exists(guild_id, send) if not res: return path = res[0][1] if path.split(".")[-1] == "txt": await message.channel.send(utils.get_content(path)) else: await disc.send_file(message, path)
def plot_boundary(X, y, coefs, title='', colormap=False, save=False): x_min, x_max = np.min(X[:, 0]), np.max(X[:, 0]) y_min, y_max = np.min(X[:, 1]), np.max(X[:, 1]) offset = 1 q = 500 tx = np.linspace(x_min - offset, x_max + offset, num=q) ty = np.linspace(y_min - offset, y_max + offset, num=q) X_mesh, Y_mesh = np.meshgrid(tx, ty) Z = conics(X_mesh, Y_mesh, coefs) plt.figure(figsize=(9, 9)) if colormap: plt.clf plt.imshow(Z, origin="lower", extent=[ x_min - offset, x_max + offset, y_min - offset, y_max + offset ], aspect=(x_max - x_min) / (y_max - y_min)) contours = plt.contour(X_mesh, Y_mesh, Z, levels=0, colors='g') contours.collections[0].set_label('Decision boundary') plt.scatter(X[y == 0, 0], X[y == 0, 1], label='class 0') plt.scatter(X[y == 1, 0], X[y == 1, 1], label='class 1') plt.xlim(x_min - offset, x_max + offset) plt.ylim(y_min - offset, y_max + offset) plt.xlabel('x1') plt.ylabel('x2') plt.title(title) plt.legend(loc="upper right", fontsize=16) if save: print("Save figure results folder.") plt.savefig('results/{}'.format(utils.sanitize(title))) plt.show()
for java_file in conf.ITEMS_FILES: file_handler = open('./classes/%s' % java_file) data = file_handler.read().split("\n") item_regex = re.compile(conf.ITEMS_PATTERN) class_error_regex = re.compile('name \'(?P<name>\w+)\' is not defined') for line in data: if '"' in line: t = item_regex.search(line) if t: item = t.groupdict() if conf.DEBUG: print("Line: " + item['code']) item['code'] = utils.sanitize(item['code']) if conf.DEBUG: print("Sanitize: " + item['code']) try: obj = eval(item['code']) except NameError as error: # Create class for the given classname class_name = class_error_regex.search(error.__str__()).group('name') if conf.DEBUG: print("Classname: %s" % class_name) setattr(sys.modules[__name__], class_name, type(class_name, (GameItem,), {})) obj = eval(item['code']) #if obj.name == 'appleGold': if conf.DEBUG:
def _handle_post_annotation_request(user, document, request): if not user.is_authenticated: return HttpResponseForbidden() annotation = Annotation() annotation.annotator = user annotation.content = sanitize(request.POST['annotation_content']) annotation.document_this_annotation_belongs = document annotation.page_index = request.POST["page_id"].split("_")[2] annotation.height_percent = request.POST["height_percent"] annotation.width_percent = request.POST["width_percent"] annotation.top_percent = request.POST["top_percent"] annotation.left_percent = request.POST["left_percent"] annotation.frame_color = request.POST["frame_color"] annotation.is_public = True if request.POST[ "is_public"] == 'true' else False annotation.save() # send notification to the document uploader if annotation.annotator.pk != document.owner.pk: notify.send( sender=annotation.annotator, recipient=document.owner, action_object=annotation, verb='post annotation', redirect_url=annotation.url, image_url=annotation.annotator.portrait_url, description=h.handle(annotation.content), is_public=annotation.is_public, ) # send notification to the collectors, i.e., followers for user in document.collectors.all(): if annotation.annotator.pk != user.pk and document.owner.pk != user.pk: notify.send( sender=annotation.annotator, recipient=user, action_object=annotation, verb='post annotation', redirect_url=annotation.url, image_url=annotation.annotator.portrait_url, description=h.handle(annotation.content), is_public=annotation.is_public, ) context = { "document": document, 'annotation': annotation, 'ANONYMOUS_USER_PORTRAIT_URL': settings.ANONYMOUS_USER_PORTRAIT_URL, "new_annotation_id": annotation.id, } return JsonResponse( { 'new_annotationdiv_html': render(request, "file_viewer/one_annotation_div.html", context).content, 'new_annotation_id': annotation.id, 'new_annotation_uuid': str(annotation.clean_uuid), 'new_annotation_json': annotation, }, encoder=AnnotationEncoder)
# Use default or user provided maximum sample numbers maxNrTargetSamplesPerLabel = int(args.restrictedNrSubjectSamples[0]) maxNrNonTargetSamplesPerLabel = int(args.restrictedNrSubjectSamples[1]) # Let's handle any request for the license first. # We stop the program after that. if args.showLicense: l = License('LICENSE.txt') l.showLicense() exit(0) # Name of the experiment, used as _title in plots. expName = args.expName # We do not like spaces! filenames = sanitize(args.filenames) # filename = args.filename dataType = args.dataType # Threshold used by biometric system to make a decision # only of interest if you want to plot the systems Accuracy. threshold = args.threshold config = Config(args.configFilename) debug = config.getDebug() compute_eer = False compute_cllr = False eerObject = None cllrObject = None
def get_query(self): return sanitize(bvecs_mmap(self.querydir))
def get_train(self): return sanitize(bvecs_mmap(self.traindir))
def get_base_iterator(self): for i in range(self.num): filename = self.basedir.format(i) yield sanitize(fvecs_mmap(filename))
def _handle_post_annotation_reply_request(user, document, request): if request.POST["annotation_reply_content"] == "": return HttpResponse(status=200) if not user.is_authenticated: return HttpResponseForbidden() annotation_reply = AnnotationReply() annotation = Annotation.objects.get( id=int(request.POST["reply_to_annotation_id"])) annotation_reply.content = sanitize( request.POST["annotation_reply_content"]) annotation_reply.replier = user annotation_reply.reply_to_annotation = annotation annotation_reply.is_public = True if request.POST[ "is_public"] == 'true' else False annotation_poster = annotation_reply.reply_to_annotation.annotator if "reply_to_annotation_reply_id" in request.POST: annotation_reply.reply_to_annotation_reply = AnnotationReply.objects.get( id=int(request.POST["reply_to_annotation_reply_id"])) # avoid 2 duplicate notifications in the case # where the annotation I reply to and the reply I reply to are both from the same guy replier_who_i_reply_to = annotation_reply.reply_to_annotation_reply.replier if replier_who_i_reply_to.pk != annotation_poster.pk and user.pk != replier_who_i_reply_to.pk: notify.send( sender=annotation_reply.replier, recipient=annotation_reply.reply_to_annotation_reply.replier, action_object=annotation_reply, verb='reply to annotation reply', redirect_url=annotation.url, image_url=annotation_reply.replier.portrait_url, description=h.handle(annotation_reply.content), is_public=annotation_reply.is_public, ) annotation_reply.save() if user.pk != annotation_poster.pk: notify.send( sender=annotation_reply.replier, recipient=annotation_reply.reply_to_annotation.annotator, action_object=annotation_reply, verb='reply to annotation', redirect_url=annotation.url, image_url=annotation_reply.replier.portrait_url, description=h.handle(annotation_reply.content), is_public=annotation_reply.is_public, ) context = { "annotation_reply": annotation_reply, 'ANONYMOUS_USER_PORTRAIT_URL': settings.ANONYMOUS_USER_PORTRAIT_URL, } return JsonResponse( { 'new_annotationreply_html': render(request, "file_viewer/one_annotation_reply.html", context).content, 'new_annotationreply_json': annotation_reply, }, encoder=AnnotationReplyEncoder)
def parse_packet(self): lines = self.load_html() tossups = [] current_tossup = Tossup(1) # assume we start with tossups first, since that's what almost # every packet structure looks like parsing_tossups = True bonuses = [] current_bonus = Bonus(1) for l in lines: sanitized_l = sanitize(reformat_line(l)) # import pdb; pdb.set_trace() # edge case for switching from tossups to bonuses # we use -1 as a short circuit to say "use the Bonuses marker instead" if (self.num_tossups != -1 and len(tossups) + 1 >= self.num_tossups and self.bonus_leadin_re.search(sanitized_l) and current_tossup.has_content()): parsing_tossups = False if parsing_tossups: if self.tossup_text_re.search(sanitized_l): # assume finding a new tossup means we're done with the old one if current_tossup.has_content(): tossups.append(current_tossup) current_tossup = Tossup(len(tossups) + 1) current_tossup.text = self.tossup_text_re.sub("", l, count=1) current_tossup.answer = "" # TODO: handle case where the next answer line isn't actually on the next line *sigh* elif self.tossup_answer_re.search(sanitized_l): current_tossup.answer = self.tossup_answer_re.sub("", l, count=1) else: if re.search(r'^bonus(es)?$', sanitized_l, re.I): parsing_tossups = False next # this assumes everything between the current answer and the beginning of the # next tossup is wortwhile answer text # this will catch some junk in the middle occasionally, but we should # handle that by stripping it in the preparation stuff, and this approach # allows us to handle bad packet loading that turns paragraphs into multilines if current_tossup.has_content(): if current_tossup.answer != "": current_tossup.answer += (" " + l) else: current_tossup.text += (" " + l) else: if self.bonus_leadin_re.search(sanitized_l): if len(bonuses) == 0 and current_tossup.has_content(): tossups.append(current_tossup) current_tossup = Tossup(len(tossups) + 1) if current_bonus.has_content(): bonuses.append(current_bonus) current_bonus = Bonus(len(bonuses) + 1, leadin=self.bonus_leadin_re.sub( "", l, count=1)) elif self.bonuspart_text_re.search(sanitized_l): current_bonus.texts += [ self.bonuspart_text_re.sub("", l, count=1) ] elif self.bonuspart_answer_re.search(sanitized_l): current_bonus.answers += [ self.bonuspart_answer_re.sub("", l, count=1) ] else: if current_bonus.texts == [] and current_bonus.answers == []: current_bonus.leadin += (" " + l) elif len(current_bonus.texts) > len(current_bonus.answers): current_bonus.texts[-1] += (" " + l) else: current_bonus.answers[-1] += (" " + l) if current_tossup.has_content(): tossups.append(current_tossup) if current_bonus.has_content(): bonuses.append(current_bonus) for i in range(len(tossups)): tossups[i].tournament = self.tournament tossups[i].round = self.round for i in range(len(bonuses)): bonuses[i].tournament = self.tournament bonuses[i].round = self.round self.tossups = tossups self.bonuses = bonuses return (tossups, bonuses)
def title(self): return utils.sanitize(self.collection.title)
def _format_audio_name(audio_info: Dict): name = '{} - {}'.format(audio_info['artist'], audio_info['title']) name = sanitize(name, to_lower=False, alpha_numeric_only=False) return '{}.mp3'.format(name)
df_perturb = utils.load_dataframe(args.perturb_data) y_train, classes = utils.load_labels(args.train_labels) y_perturb, _ = utils.load_labels(args.perturb_labels, classes) print('loaded train data (%s genes, %s samples)' % (df_train.shape[1], df_train.shape[0])) print('loaded perturb data (%s genes, %s samples)' % (df_perturb.shape[1], df_perturb.shape[0])) # impute missing values min_value = df_train.min().min() df_train.fillna(value=min_value, inplace=True) df_perturb.fillna(value=min_value, inplace=True) # sanitize class names classes = [utils.sanitize(c) for c in classes] # determine target class try: if args.target == None: args.target = -1 else: args.target = classes.index(args.target) print('target class is: %s' % (classes[args.target])) except ValueError: print('error: class %s not found in dataset' % (args.target)) sys.exit(1) # load gene sets file if it was provided if args.gene_sets != None: print('loading gene sets...')
if 'github' in l and 'bertalan' in l ] if len(remotes) > 0: repo = 'http://github.com/' + remotes[0].split()[1][15:][:-4] except CalledProcessError: pass # Generate HTML for the page contents. readmeHtml = markdown_to_html(description) # Assign urls to the entry pages. for k, entry in enumerate(entries): entry['url'] = ( # 'entry_%d-' % k # + utils.sanitize(entry['title'].replace(' ', '_'), extra='_') + '.html') entry['save_path'] = join(wwwDir, baseProjectName, entry['url']) # Generate and write the project page. breadcrumbs = [ utils.Tag('a', href='../index.html', tagText='Home'), utils.Tag('span', tagText=baseProjectName, parseTagText=False) ] home_title_link = utils.Tag('a', tagText='Tom Bertalan', href='../index.html', cls='mdl-typography--headline', style='text-decoration:none; color:#444;')
def parse_member_party(self, member): member['party'] = sanitize(member['party'][0])