Exemplo n.º 1
0
def clean_tokens(tokens):
    # Gets the contraction or default (which is just the word) if doesn't exist
    res = [contractions.get(word, word) for word in tokens]

    # Replace numbers to words
    res = [num2words(int(word)) if is_number(word) else word for word in res]

    # remove invalid tokens
    res = [word for word in res if all(it not in word for it in invalidtokens)]
    res = [word for word in res if word != "'"]

    # Remove profanity
    res = [word for word in res if not profanity.is_profanity(word)]

    return res
Exemplo n.º 2
0
	def random_sample(self, pixels, n=1e5):
		""" Random sample points inside a healpixel.

		Parameters
		----------
		pixels : int, ndarray
			pixel number or list of pixel numbers
		n : n
			Total number of randoms to draw

		Returns
		-------
		lon, lat : positions of randoms (degr)
		"""
		if misc.is_number(pixels):
			pixels = [int(pixels)]

		n = int(n)

		# select pixels to sample
		if len(pixels) == 1:
			pix_i = np.zeros(n, dtype=int)
		else:
			pix_i = np.random.choice(len(pixels), n)

		# compute pixel centers
		theta, phi = healpy.pix2ang(self.nside, pixels, nest=self.nest)

		# convert to healpix projection
		xc, yc = self._phitheta2xy(phi, theta)

		# this is the size of a healpix cell in the projection
		step = np.pi / 2. / self.nside

		# generate randoms in a square
		x, y = np.random.uniform(-0.5, 0.5, (2, n)) * step

		x += xc[pix_i]
		y += yc[pix_i]

		phi_out, theta_out = self._xy2phitheta(x, y)

		lon = self.rad2deg * phi_out
		lat = 90 - self.rad2deg * theta_out

		return lon, lat
Exemplo n.º 3
0
	def select_cells(self, coarse_cell, coarse_nside, coarse_order=None):
		""" Returns the list of cells that fall in a (larger) cell.

		Notes
		-----
		Require coarse_nside < HealpixProjector.nside

		Parameters
		----------
		coarse_cell : int
			cell number or list defining patch of sky
		coarse_nside : int
			nside of pixelization
		coarse_order : str
			pixelization order ('ring' or 'nest')

		Returns
		-------
		list : cell indices in pixel map
		"""
		if coarse_nside == 0:
			return np.arange(self.npix, dtype=int)

		if coarse_order is None:
			coarse_order = self.order

		if coarse_nside >= self.nside:
			raise ValueError("coarse_nside (%s) must be lower than HealpixProjector.nside (%s)"%(coarse_nside, self.nside))

		# make sure input is iterable
		if misc.is_number(coarse_cell):
			coarse_cell = [int(coarse_cell)]

		coarse_grid = HealpixProjector(nside=coarse_nside, order=coarse_order)

		coarse_map = np.zeros(coarse_grid.npix, dtype='d')
		for cell in coarse_cell:
			coarse_map[cell] = 1

		map = healpy.ud_grade(coarse_map, order_in=coarse_order, order_out=self.order, nside_out=self.nside)

		pix = np.where(map > 0)[0]

		return pix
Exemplo n.º 4
0
g = open('srl_general.txt', 'r')
num_general = g.read().split(',')
g.close()
g = open('srl_general.txt', 'a')

response = urllib.request.urlopen(url)
data = response.read()
text = data.decode('utf-8')

count_new = 0
srl_arr_general = []

text_splitted = text.split('document_srl=')
for i in range(1, len(text_splitted)):
    srl = text_splitted[i].split('">')[0].split('#comment')[0]
    if (is_number(srl)):
        if (srl not in num_notices and srl not in srl_arr_general
            ):  # second statement : to prevent duplication
            srl_arr_general.append(srl)
            if (srl not in num_general):
                count_new += 1
                g.write(',' + srl)
                print('New post found : ' + srl)

g.close()

if (count_new != 0):
    print('Started generating feed...')
    # make FeedGenerator
    fg = FeedGenerator()
    fg.id('asdf')
Exemplo n.º 5
0
    def sample(self,
               density=None,
               n=None,
               cell=None,
               nside=None,
               order=None,
               min_sample=100,
               max_loops=10):
        """ Draw longitude and latitude pairs uniformly inside the mask.

		By default the points are drawn from the full sphere.  If a healpix cell
		number (or list of numbers) is given then randoms will be drawn from
		within those cells only.  In this mode both the healpix nside parameter
		and ordering scheme should be given as arguments.

		After drawing randoms the ones that fall outside the polygon mask are
		discarded.

		Notes
		-----
		Either density or n must be given as argument.  If both are given,
		density will be used.

		Parameters
		----------
		density : float
			number density of samples (number per square degree)
		n : int
			number of samples to draw (only used if density is not given)
		cell : int or list
			optional healpix cell number or list of cell numbers
		nside : int
			healpix nside parameter
		nest : bool
			if True use Nest otherwise use Ring ordering

		Returns
		-------
		lon, lat : random coordinates

		Raises
		------
		ValueError : if neither density or n are given
		TypeError : if n cannot be cast to integer type

		"""
        if density is None and n is None:
            raise ValueError(
                "sample has missing required argument.  Please pass density or n"
            )

        if (density is not None):
            try:
                float(density)
            except ValueError:
                raise ValueError("Sample density must be a number, not '%s'" %
                                 str(density))
            if density < 0 or not np.isfinite(density):
                raise ValueError("Sample density must be positive, not '%s'" %
                                 str(density))

        if n is not None:
            try:
                float(n)
            except ValueError:
                raise ValueError("Sample n must be a number, not '%s'" %
                                 str(n))

            if n < 0 or not np.isfinite(n):
                raise ValueError("Sample n must be positive, not %s" % str(n))

        if self.params['pixel_mask'] is None:
            self._build_pixel_mask()

        if cell is None:
            cell = self.params['survey_cells']  # full sky
        else:
            # sample only selected patches defined by a healpix cell
            cell = self.grid.select_cells(cell, nside, order)
            sel = self.params['pixel_mask'][cell] > 0
            cell = cell[sel]

        if len(cell) == 0:
            # if there are no cells return empty arrays
            return np.array([]), np.array([])

        if misc.is_number(cell):
            n_cells = 1
            cell = int(cell)
        else:
            n_cells = len(cell)

        density_mode = False

        if density is not None and density >= 0:
            density_mode = True
            n = int(SPHERE_AREA * 1. / self.grid.npix * n_cells * density)

        try:
            n = int(n)
        except ValueError:
            raise ValueError("Sample count must be a number, not '%s'" %
                             type(n))

        if n < 0:
            raise ValueError("Sample count must be greater than 0, not %s" %
                             str(n))

        if n == 0:
            return np.array([]), np.array([])

        lon_out = []
        lat_out = []

        count = 0
        loop = 0
        while count < n:
            remaining = max(min_sample, n - count)
            lon, lat = self.grid.random_sample(cell, remaining)

            sel = self.contains(lon, lat)
            lon, lat = lon[sel], lat[sel]
            count += len(lon)

            lon_out.append(lon)
            lat_out.append(lat)

            if density_mode:
                break

            if loop > max_loops:
                raise Exception("sample hit max loops! %i" % max_loops)

        lon_out = np.concatenate(lon_out)
        lat_out = np.concatenate(lat_out)

        if not density_mode:
            lon_out = lon_out[:n]
            lat_out = lat_out[:n]

        return lon_out, lat_out
Exemplo n.º 6
0
f = open('srl_notices.txt', 'r')
num = f.read().split(',')
f.close()
f = open('srl_notices.txt', 'a')

response = urllib.request.urlopen(url)
data = response.read()
text = data.decode('utf-8')

count_new = 0
srl_arr = []

text_splitted = text.split('document_srl=')
for i in range(1, len(text_splitted)):
    srl = text_splitted[i].split('">')[0].split('#comment')[0]
    if (is_number(srl) and srl
            not in srl_arr):  # second statement : to prevent duplication
        srl_arr.append(srl)
        if (srl not in num):
            count_new += 1
            f.write(',' + srl)
            print('New post found : ' + srl)

f.close()

if (count_new != 0):
    print('Started generating feed...')
    # make FeedGenerator
    fg = FeedGenerator()
    fg.id('asdf')
    fg.title('SNU Physics Board RSS feed - notices')