Exemplo n.º 1
0
def get_movie_info(body):
    body_line = body.split('\n')
    it = iter(body_line)
    lists = []
    count = 1
    name, grade, review_num, quote = '', '', '', ''
    while True:
        try:
            line = next(it)
            if 'class="title"' in line:
                name = get_text(line)
                next(it)
                count += 1
            elif 'class="rating_num"' in line:
                grade = get_text(line)
                next(it)
                line = next(it)
                review_num = get_text(line)
                count += 2
            elif 'class="inq"' in line:
                quote = get_text(line)
                count += 1
        except StopIteration:
            # 遇到StopIteration就退出循环
            break
        if count % 5 == 0:
            item = [name, grade, review_num, quote]
            count = 1
            lists.append(item)
    return lists
Exemplo n.º 2
0
def read_page(page):
    html = read_html(page.url)
    soup = Soup(html)

    text_top = get_text(soup.find('div', id='maegaki'))
    print(text_top.count('\n'))
    text_mid = get_text(soup.find('div', id='honbun'))
    text_bot = get_text(soup.find('div', id='atogaki'))

    texts = [text for text in (text_top, text_mid, text_bot) if text]

    story = '''

────────────────────────────────

'''.join(texts)

    text = '''────────────────────────────────

  ◆  {}

────────────────────────────────


{}'''.format(page.title, story)

    return text
Exemplo n.º 3
0
 def play_audio(self, item, title):
     print(f'点开了视频《{title}》')
     get_duration = lambda: utils.get_text(self.driver, item + '/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.TextView') or \
                            utils.get_text(self.driver, item + '/android.view.ViewGroup/android.widget.LinearLayout[2]/android.widget.TextView')
     duration_text = get_duration()
     if not duration_text:
         utils.swipe_up(self.driver)
         duration_text = get_duration()
     duration_text = duration_text.split(':')
     duration = int(duration_text[0]) * 60 + int(duration_text[1])
     print(f'这个视频要看:{duration}秒')
     utils.click(self.driver, item)
     while True:
         start = time.time()
         time.sleep(1)
         self.wait(duration)
         dots = 1
         replay = '/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.FrameLayout[2]/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.LinearLayout[1]/android.widget.FrameLayout/android.widget.FrameLayout[2]/android.widget.FrameLayout/android.widget.FrameLayout[3]/android.widget.FrameLayout[2]/android.widget.FrameLayout/android.widget.LinearLayout'
         while not utils.find_element(self.driver, replay):
             print('再等会' + dots * '.', end='\r', flush=True)
             dots = max((dots + 1) % 6, 1)
             time.sleep(1)
         print('视频放完了')
         self.audiuo_time -= time.time() - start
         if self.audio_counter == 1 and self.audiuo_time > 0:
             print(f'虽然视频看完了,但是还剩下{int(self.audiuo_time)}秒,所以重看一次')
             time.sleep(random.uniform(0.8, 1.8))
             if utils.click(self.driver, replay):
                 self.wait(duration)
         else:
             break
     self.audio_counter -= 1
     self.audio.append(title)
     print(f'看完惹,还有{self.audio_counter}个要看')
     self.driver.back()
Exemplo n.º 4
0
    def run_inference(self, image):
        """
        Performs the inferencing and OCR

        :param image: An image(numpy array) to perform inferencing on
        :return: List of text detected, returns empty list of no object is detected
        """
        input_shape = self.get_input_shape()
        preprocess_function = lambda img: quantize((img / 127.5) - 1, 128, 127)
        out_list = self.invoke(image, preprocess_function)

        # model outputs 3 tensors now, normalized to between 0 and 1
        score_map = dequantize(out_list[0], 128, 127)
        geo_loc_map = dequantize(out_list[1], 128, 127)
        geo_angle = dequantize(out_list[2], 128, 127)
        score_map = (score_map + 1) * 0.5
        geo_loc_map = (geo_loc_map + 1) * 256
        geo_angle = 0.7853981633974483 * geo_angle
        geo_map = np.concatenate((geo_loc_map, geo_angle), axis=3)

        boxes = text_detection(score_map=score_map, geo_map=geo_map)

        if boxes is not None:
            boxes = boxes[:, :8].reshape((-1, 4, 2))
            boxes[:, :, 0] /= input_shape[1] / image.shape[0]
            boxes[:, :, 1] /= input_shape[2] / image.shape[1]

            output_text = []
            for box in boxes:
                box = sort_poly(box.astype(np.int32))
                if np.linalg.norm(box[0] -
                                  box[1]) < 5 or np.linalg.norm(box[3] -
                                                                box[0]) < 5:
                    continue
                (x_max, x_min), (y_max,
                                 y_min) = xy_maxmin(box[:, 0], box[:, 1])

                if x_max > image.shape[0]:
                    x_max = image.shape[0]
                if x_min < 0:
                    x_min = 0
                if y_max > image.shape[1]:
                    y_max = image.shape[1]
                if y_min < 0:
                    y_min = 0

                cv2.polylines(image, [box.astype(np.int32).reshape(-1, 1, 2)],
                              True,
                              color=(255, 255, 0),
                              thickness=2)

                sub_img = image[y_min:y_max, x_min:x_max]
                txt = get_text(sub_img)
                if txt != '':
                    output_text.append(get_text(sub_img))
            return output_text
        return []
Exemplo n.º 5
0
    def run(self):
        while True:
            try:
                self.connect()
                time.sleep(6)  # Splash Screen

                utils.swipe_left(self.driver, y_ratio=3 / 4)
                print('开始读要闻')
                while self.artical_counter:
                    while self.artical_counter:
                        item = f'/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.RelativeLayout/android.widget.LinearLayout/android.widget.FrameLayout[2]/android.support.v4.view.ViewPager/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.view.ViewGroup/android.widget.FrameLayout/android.widget.FrameLayout[2]/android.widget.LinearLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.ListView/android.widget.FrameLayout[1]/android.widget.LinearLayout'
                        title = utils.get_text(
                            self.driver, item + '/android.widget.TextView')
                        if title and title not in self.articles:
                            self.read_article(item, title)
                        else:
                            utils.swipe_up(self.driver, y_ratio=5 / 9)

                while not utils.click(
                        self.driver,
                        '//android.widget.FrameLayout[@content-desc="电视台"]/android.widget.RelativeLayout'
                ):
                    time.sleep(1)
                print('开始看视频')

                while self.audio_counter:
                    while self.audio_counter:
                        item = f'/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.RelativeLayout/android.widget.LinearLayout/android.widget.FrameLayout[2]/android.support.v4.view.ViewPager/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.view.ViewGroup/android.widget.FrameLayout/android.widget.FrameLayout[2]/android.widget.LinearLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.ListView/android.widget.FrameLayout[1]/android.widget.LinearLayout'
                        title = utils.get_text(
                            self.driver, item +
                            '/android.widget.LinearLayout/android.widget.TextView'
                        ) or utils.get_text(
                            self.driver, item +
                            '/android.view.ViewGroup/android.widget.TextView')
                        if title and title not in self.audio:
                            self.play_audio(item, title)
                        else:
                            utils.swipe_up(self.driver, y_ratio=5 / 9)

                # self.subscribe()
                print('搞定啦~')
                break
            except KeyboardInterrupt:
                break
            except Exception as e:
                print('Exception:', e)
            finally:
                self.driver.quit()
Exemplo n.º 6
0
 def diff(self, archivo):    
     lines=utils.get_text().splitlines()
     lineas=open(archivo).readlines()
     tablas1=self.obtenerTablas(lines)
     tablas2=self.obtenerTablas(lineas)
     print("va a hacer el diff")
     print("Analizando la Base de datos actual")
     for key in tablas1.keys():
         print("\tAnalisis de Tabla "+key)
         if not tablas2.get(key):
             print("\t\tNo esta en la BD vieja")
         else:
             for campo in tablas1[key]:
                 if not campo in tablas2[key]:
                     print("\t\tFalta el campo {"+campo+"} en la BD Vieja")
     
     print("Analizando la Base de datos Vieja")
     for key in tablas2.keys():
         print("\tAnalisis de Tabla "+key)
         if not tablas1.get(key):
             print("\t\tNo esta en la BD Nueva")
         else:
             for campo in tablas2[key]:
                 if not campo in tablas1[key]:
                     print("\t\tFalta el campo {"+campo+"} en la BD Nueva")
Exemplo n.º 7
0
 def play_video(self, item):
     title = utils.get_text(
         self.driver,
         '/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.FrameLayout[2]/android.widget.FrameLayout/android.widget.FrameLayout[2]/android.view.ViewGroup/android.support.v7.widget.RecyclerView/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.FrameLayout[2]/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.TextView'
     )
     print(f'点开了视频《{title}》')
     duration = utils.get_text(
         self.driver,
         '/hierarchy/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.FrameLayout[2]/android.widget.FrameLayout/android.widget.FrameLayout[2]/android.view.ViewGroup/android.support.v7.widget.RecyclerView/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.FrameLayout[2]/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.LinearLayout[2]/android.widget.TextView'
     )
     duration_text = duration.split(' / ')[1].split(':')
     duration = int(duration_text[0]) * 60 + int(duration_text[1])
     print(f'这个视频要看:{duration}秒')
     utils.click(self.driver, item)
     self.wait(duration)
     self.audiuo_time -= duration
Exemplo n.º 8
0
 def run(self, edit):
     print("entro")
     window=sublime.active_window()
     view=window.active_view()
     text=utils.get_text()
     text=quitCurlyAlones(text)
     view.run_command("replace_all", {"text":text})
Exemplo n.º 9
0
	def on_query_completions(self, view, prefix, locations):
		if utils.get_language() != "java":return
		ultimo=utils.get_last_character()
		if ultimo=="." and utils.get_language()=="java":
			window=sublime.active_window()
			view=window.active_view()
			word=utils.get_word(-1)
			variables=Java().get_variables()
			tipo=word
			static=True
			if variables.get(word):
				tipo=variables[word]
				static=False

			package=re.findall("import ([\w.]+\.%s);"%tipo, utils.get_text())
			
			if not package:
				posibleRuta=os.path.join(PATH_JSON, "java", "lang", tipo+".json")
				if os.path.exists(posibleRuta):
					package=["java.lang."+tipo]

			if package:
				package=package[0]
				clase=self.get_project_class(package)
				if clase:
					return utils.get_completion_list(clase["members"])
				ruta=package.replace(".", os.sep)+".json"
				ruta=os.path.join(PATH_JSON, ruta)
				print("ya se determino")
				objeto=utils.load_json(ruta)
				miembros="clase" if static else "object"
				return utils.get_completion_list(objeto[miembros])
Exemplo n.º 10
0
    def _enumerate_workspaces(self):
        if not self.valid:
            sd_l.error(
                "The service document didn't pass the SWORD2 validation steps ('MUST' statements in spec). The workspaces and collections will not be enumerated."
            )
            return

        if self.sd_uri:
            sd_l.info(
                "Enumerating workspaces and collections from the service document for %s"
                % self.sd_uri)

        # Reset the internally cached set
        self.workspaces = []
        for workspace in self.service_dom.findall(NS['app'] % "workspace"):
            workspace_title = get_text(workspace, NS['atom'] % 'title')
            sd_l.debug("Found workspace '%s'" % workspace_title)
            collections = []
            for collection_element in workspace.findall(NS['app'] %
                                                        'collection'):
                # app:collection + sword extensions
                c = SDCollection()
                c.load_from_etree(collection_element)

                collections.append(c)
            self.workspaces.append((workspace_title, collections))  # Add tuple
Exemplo n.º 11
0
def parsing_item(tag):
    category = ''
    the_time = ''
    for h4_tag in tag.findAll('h4'):
        for children_of_h4_tag in h4_tag.contents:
            the_time = get_text(children_of_h4_tag) or the_time

        for strong_tag in h4_tag.findAll('strong'):
            for strong_tag_content in strong_tag.contents:
                category = get_text(strong_tag_content) or category

    for image_tag in tag.findAll('img'):
        for children_of_image_tag in image_tag.contents:
            if not children_of_image_tag.name and children_of_image_tag.strip(
            ):
                yield category, the_time, children_of_image_tag.strip()
Exemplo n.º 12
0
    def test_epoch_end(self, outputs):
        """
        Called at the end of test to aggregate outputs, similar to `validation_epoch_end`.
        :param outputs: list of individual outputs of each test step
        """

        images = []
        captions = []
        for output in outputs:
            captions.append(output["captions"])
            images.append(output["images"])
        captions = torch.cat(captions, dim=0)
        images = torch.cat(images, dim=0)

        captions = get_text(captions.cpu().numpy(), self.tokenizer)
        self.logger.experiment.add_image(captions[0], images[0])

        # Write outputs to disk
        with open("outputs.txt", "w") as file:
            for caption in captions:
                file.write(caption)
                file.write("\n")

        # create output dict
        tqdm_dict = {}
        results = {}
        results["progress_bar"] = tqdm_dict
        results["log"] = tqdm_dict

        return results
Exemplo n.º 13
0
 def on_item(self, ch, method, header, body):
     """
     Fires when we receive a new item to decode.
     """
     
     # Lookup data in store, body should actually be an ObjectId        
     item = self.userstream_store.find_one({"_id": ObjectId(body)})
     
     if utils.item_a_direct_message(item) or utils.item_a_mention(item):
         text, screen_name = (utils.get_text(item), utils.get_screen_name(item))
         print " [x] Received %r from %r" % (text, screen_name)
         
         # Any Spotify tracks?
         tracks = spotify.lookup_tracks(text)
         
         if len(tracks) > 0:
             # Save to playlist
             for track in tracks:
                 id = self.playlist_store.save({'track':track, 'status':'new', 'source':'twitter', 'from':utils.get_sender(item)})
                 # Send each track to the broadcaster's 'receive' queue, so it can be broadcast 
                 # to all connected clients
                 print " [x] Sending %r to broadcaster" % (track['track']['name'],)
                 self.amqp_primary_channel.basic_publish(exchange='',
                                                         routing_key=self.amqp_out_queue,
                                                         body=str(id),
                                                         properties=pika.BasicProperties(
                                                             delivery_mode=2, # make message persistent
                                                         ))
         
         # Confirm delivery
         ch.basic_ack(delivery_tag=method.delivery_tag)
Exemplo n.º 14
0
    def run(self, edit):
        text=utils.get_text()
        atributos=re.findall("private ([\w]+) ([\w]+)\s*;|private ([\w]+) ([\w]+)\s*=", text, flags=re.IGNORECASE)
        nombreClase=re.findall("public class ([\w]+)", text, flags=re.IGNORECASE)
        if not nombreClase or not atributos:return

        nombreClase=nombreClase[0]
        listAtributos=[]
        strAtributos=""
        strCabecera=""
        strConstructor="""\tpublic %(nombreClase)s(%(cabeceraConstructor)s){
%(atributos)s
    }"""
        for atributo in atributos:
            strCabecera+=atributo[0]+" "+atributo[1]+","
            strAtributos+="\t\tthis."+atributo[1]+"="+atributo[1]+";\n"
        if strCabecera:strCabecera=strCabecera[:-1]
        if strAtributos:strAtributos=strAtributos[:-1]

        dConstructor={"atributos":strAtributos, "nombreClase":nombreClase, "cabeceraConstructor":strCabecera}
        strConstructorMaxivo=strConstructor%dConstructor
        window=sublime.active_window()
        view=window.active_view()
        view.insert(edit, view.line(view.sel()[0]).a, """\tpublic %(nombreClase)s(%(cabeceraConstructor)s){
        this.%(atributo)s=%(atributo)s;
    }\n\n"""%{"nombreClase":nombreClase, "atributo":atributos[0][1], "cabeceraConstructor":atributos[0][0]+" "+atributos[0][1]})
        view.insert(edit, view.line(view.sel()[0]).a, strConstructorMaxivo)




                
 def on_data(self, data):
     if not data.strip():
         return True
     
     print " [x] Got:", data
     
     # Decode JSON data
     item = json.loads(data)
     
     # Save data
     id = self.store.save(item)
     
     # Is this item a direct message?
     a_direct_message = utils.item_a_direct_message(item)
     
     # Is this item a mention?
     a_mention = utils.item_a_mention(item)
         
     # Continue processing further down the chain
     if a_direct_message or a_mention:
         print " [x] Received", utils.get_screen_name(item), ":", utils.get_text(item)
         self.channel.basic_publish(exchange='',
             routing_key=self.amqp_queue,
             body=str(id),
             properties=pika.BasicProperties(
                 delivery_mode=2, # make message persistent
         ))
     
     return True
Exemplo n.º 16
0
def text_from_audio():
    fin = wave.open('audios/audio.wav')
    text = get_text(fin)
    if request.method == 'POST':
        text = request.form['transcript']
        return show_form(text)
    return render_template('transcript.html', message=text)
Exemplo n.º 17
0
    def validate(self):
        valid = True
        if not self.parsed:
            return False
        # The SWORD server MUST specify the sword:version element with a value of 2.0
        # -- MUST have sword:version element
        # -- MUST have value of '2.0'
        self.version = get_text(self.service_dom, NS['sword'] % "version")
        if self.version:
            if self.version != "2.0":
                # Not a SWORD2 server...
                # Fail here?
                sd_l.error(
                    "The service document states that the server's endpoint is not SWORD 2.0 - stated version:%s"
                    % self.version)
                valid = False
        else:
            sd_l.error("The service document did not have a sword:version")
            valid = False

        # The SWORD server MAY specify the sword:maxUploadSize (in kB) of content that can be uploaded in one request [SWORD003] as a child of the app:service element. If provided this MUST contain an integer.
        maxupload = get_text(self.service_dom, NS['sword'] % "maxUploadSize")
        if maxupload:
            try:
                self.maxUploadSize = int(maxupload)
            except ValueError:
                # Unparsable as an integer. Enough to fail a validation?
                # Strictly... yep
                sd_l.error(
                    "The service document did not have maximum upload size parseable as an integer."
                )
                valid = False

        # Check for the first workspace for a collection element, just to make sure there is something there.
        test_workspace = self.service_dom.find(NS['app'] % "workspace")
        if test_workspace != None:
            sd_l.debug(
                "At least one app:workspace found, with at least one app:collection within it."
            )
        else:
            valid = False
            sd_l.error(
                "Could not find a app:workspace element in the service document."
            )

        return valid
Exemplo n.º 18
0
 def run(self):
     s=socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     s.bind(("", 8888))
     s.listen(15)
     while True:
         con, addr=s.accept()
         con.sendall(bytes(utils.get_text(), 'UTF-8'))
     s.close()
Exemplo n.º 19
0
 def guardar(self, nombre):
     if not nombre:return
     ext=utils.get_fileext()
     if not ext:ext=utils.get_ext()
     nombre=nombre.replace(" ", "_")
     text=utils.get_text()
     text=text.replace("$", "\$");
     utils.file_write(TEMPLATES_PATH+nombre+"."+ext, text)
Exemplo n.º 20
0
 def load_from_etree(self, collection):
     """
     Parse an `etree.SubElement` into attributes in this object.
     
     Also, caches the most recently used DOM object it is passed in
     `self.dom`
     """
     self._reset()
     self.dom = collection
     self.title = get_text(collection, NS['atom'] % 'title')
     # MUST have href attribute
     self.href = collection.attrib.get('href', None)
     # Accept and Accept multipart
     for accept in collection.findall(NS['app'] % 'accept'):
         if accept.attrib.get("alternate", None) == "multipart-related":
             self.accept_multipart.append(accept.text)
         else:
             self.accept.append(accept.text)
     # Categories
     for category_element in collection.findall(NS['atom'] % 'category'):
         self.categories.append(Category(dom=category_element))
     # SWORD extensions:
     self.collectionPolicy = get_text(collection, NS['sword'] % 'collectionPolicy')
             
     # Mediation: True/False
     mediation = get_text(collection, NS['sword'] % 'mediation')
     self.mediation = mediation.lower() == "true"
             
     self.treatment = get_text(collection, NS['sword'] % 'treatment')        
     self.description = get_text(collection, NS['dcterms'] % 'abstract')
     self.service = get_text(collection, NS['sword'] % 'service', plural = True)
     self.acceptPackaging = get_text(collection, NS['sword'] % 'acceptPackaging', plural = True)
     
     # Log collection details:
     coll_l.debug(str(self))
    def run(self, edit):
        text=utils.get_text()
        lineas=text.splitlines()
        lineas=list(set(lineas))
        text=""
        for linea in lineas:text+=linea+"\n"
        utils.set_text(text)

                
Exemplo n.º 22
0
def parsing_td_time(items):
    the_time = ''
    for td_item in items:
        for div_item in td_item.find_all('div', {'class': 'airSchedule-time'}):
            time_tag = div_item.find('strong')
            if time_tag:
                for child in time_tag.contents:
                    the_time = get_text(child)
    return the_time
Exemplo n.º 23
0
    def process(self, msg):

        data = {
            'text': u.get_text(msg),
            'user_id': self.user_id
        }

        reply = self.get_reply(data)
        self.send_reply(reply)
Exemplo n.º 24
0
def crawl_script_of(title):
    print("crawling:", title)
    url = "https://imsdb.com/scripts/" + title + ".html"
    soup = get_soup(url)
    crawled = soup.select("pre")[0]

    if title in ["Shawshank-Redemption,-The", "Toy-Story"]:
        crawled = crawled.select("pre")[0]

    former = "line"
    char_elem = None
    lines = []

    for c in crawled:
        if type(c) is Tag and former == "line" and re.match(
                "^\n{1,}$", get_text(c)) is None:
            char_elem = get_text(c)
            former = "char"
            print("char:", char_elem)
        elif type(c) is NavigableString and former == "char":
            line_elem = get_text_plain(c)
            former = "line"
            print("line:", line_elem)
            print("---")
            if len(char_elem) > 1 and len(line_elem) > 1:
                if line_elem[0] != '\n':
                    lines.append([char_elem, line_elem])
        else:
            print("initial:", former)
            print(get_text(c) if type(c) is Tag else get_text_plain(c))
            former = "line"

    f = open("/mnt/UniversalUse/data/scripts_for_test/d/" + title + ".csv",
             "wt")

    for l in lines:
        l[0] = remove_brackets(l[0])
        l[1] = remove_brackets(l[1])
        if l[0] != '""' and l[1] != '""':
            print(l[0], "|||", l[1])
            f.write(l[0] + "," + l[1] + "\n")
    f.close()
Exemplo n.º 25
0
def behave(button, bot, update):
    if 'menu' in button:
        generic_menu(button['menu'], update)
    elif 'action' in button:
        action_manager(bot, update, button['action'])
    elif 'text_file' in button:
        log_message(
            update.effective_message.reply_text(
                utils.get_text(button['text_file'])))
    elif 'text' in button:
        log_message(update.effective_message.reply_text(button['text']))
Exemplo n.º 26
0
def get_info(url, soup=None):
    if soup is None:
        html = read_html(url)
        soup = Soup(html)

    info = {}
    info['artist'] = soup.find('span', {'itemprop': 'author'}).text.strip()
    info['title'] = soup.find('span', {'itemprop': 'name'}).text.strip()
    sss = get_sss(soup)
    info['novel_ex'] = get_text(sss[-2], '')
    return info
Exemplo n.º 27
0
def create_data(root):
    json_files, txt_files = get_files(root)
    keys = [os.path.splitext(os.path.basename(f))[0] for f in json_files]

    data = {}
    for key, json_fn, txt_fn in zip(keys, json_files, txt_files):
        text = get_text(txt_fn)
        with open(json_fn, "r", encoding="utf-8") as fp:
            json_info = json.load(fp)

        text_space = regex.sub(r"[\t\n]", " ", text).upper()
        text_class = np.zeros(len(text), dtype=int)

        for field in json_info.keys():
            i = field_val[field]
            v = json_info[field]

            if field == "total":
                anchor = [i.start() for i in re.finditer('TOTAL', text_space)] + \
                         [i.start() for i in re.finditer('GROSS', text_space)] + \
                         [i.start() for i in re.finditer('AMOUNT', text_space)]

                points = [i.start() for i in re.finditer(v, text_space)]

                pos, dist = -1, 99999
                for p1 in points:
                    for p2 in anchor:
                        if p1 - p2 < 0:
                            continue
                        if '\n' in text[p2: p1]:
                            continue
                        if dist > p1 - p2:
                            dist = p1 - p2
                            pos = p1
                if pos == -1:
                    pos = text_space.find(v)
            else:
                pos = text_space.find(v)
                if pos == -1:
                    s = None
                    e = 0
                    while s is None and e < 3:
                        e += 1
                        s = regex.search("(" + v + "){e<=" + str(e) + "}", text_space)
                    if s is not None:
                        v = s[0]
                        pos = text_space.find(v)

            text_class[pos: pos + len(v)] = i

        data[key] = (text, text_class)
    return data
Exemplo n.º 28
0
 def validate(self):
     valid = True
     if not self.parsed:
         return False
     # The SWORD server MUST specify the sword:version element with a value of 2.0
     # -- MUST have sword:version element
     # -- MUST have value of '2.0'
     self.version = get_text(self.service_dom, NS['sword'] % "version")
     if self.version:
         if self.version != "2.0":
             # Not a SWORD2 server...
             # Fail here?
             sd_l.error("The service document states that the server's endpoint is not SWORD 2.0 - stated version:%s" % self.version)
             valid = False
     else:
         sd_l.error("The service document did not have a sword:version")
         valid = False
     
     # The SWORD server MAY specify the sword:maxUploadSize (in kB) of content that can be uploaded in one request [SWORD003] as a child of the app:service element. If provided this MUST contain an integer.
     maxupload = get_text(self.service_dom, NS['sword'] % "maxUploadSize")
     if maxupload:
         try:
             self.maxUploadSize = int(maxupload)
         except ValueError:
             # Unparsable as an integer. Enough to fail a validation?
             # Strictly... yep
             sd_l.error("The service document did not have maximum upload size parseable as an integer.")
             valid = False
     
     # Check for the first workspace for a collection element, just to make sure there is something there.
     test_workspace = self.service_dom.find(NS['app'] % "workspace")
     if test_workspace != None:
         sd_l.debug("At least one app:workspace found, with at least one app:collection within it.")
     else:
         valid = False
         sd_l.error("Could not find a app:workspace element in the service document.")
     
     return valid
Exemplo n.º 29
0
def run_predict_txt(model):
    root = 'test_data'
    for fn in sorted(os.listdir(root)):
        try:
            if not fn.endswith('txt'):
                continue
            fn = os.path.join(root, fn)
            text = get_text(fn)

            predict(args.device, model, [fn], [text])

        except Exception as e:
            print(str(e))
            print(fn)
Exemplo n.º 30
0
def get_bleu(model, test_iterator, TRG, transformer):
    original_text = []
    generated_text = []
    with torch.no_grad():
        for i, batch in tqdm.tqdm(enumerate(test_iterator)):
            src = batch.src
            trg = batch.trg

            if transformer:
                src = src.permute(1, 0)
                trg = trg.permute(1, 0)
                try:
                    output, _ = model(src, trg)
                    output = output.permute(1, 0, 2)
                except IndexError as e:
                    logger.warning("get bleu index error {e}", e=e)
                    break
            else:
                output = model(src, trg, 0)  # turn off teacher forcing

            # trg = [trg sent len, batch size]
            # output = [trg sent len, batch size, output dim]
            if transformer:
                output = output.argmax(dim=-1).permute(1, 0)
            else:
                output = output.argmax(dim=-1)

            original_text.extend(
                [get_text(x, TRG.vocab) for x in trg.cpu().numpy().T])
            generated_text.extend([
                get_text(x, TRG.vocab)
                for x in output[1:].detach().cpu().numpy().T
            ])

    print(
        corpus_bleu([[text] for text in original_text], generated_text) * 100)
Exemplo n.º 31
0
    def run(self, edit, **args):
        if not args.get("nombre"):return
        nombre=args.get("nombre")
        for c in os.listdir(TEMPLATES_PATH):
#            print(c)
            if nombre.lower()==c.lower()[:c.rfind(".")]:
                texto=utils.file_read(TEMPLATES_PATH+"/"+c)
                self.texto=texto
                if not utils.get_text().strip():
                    self.insertar()
                else:
#                    print("no tiene texto")
                    self.texto=texto
                    window=sublime.active_window()
                    window.show_input_panel("", c[c.rfind("."):], self.crear_archivo, None, None)
Exemplo n.º 32
0
def test_get_text():
    inputs = [
        "tag",
        "message",
    ]

    def mock_input(s):
        return inputs.pop(0)

    utils.input = mock_input
    a, b = utils.get_text()
    if not a == 'tag':
        raise AssertionError()
    if not b == 'message':
        raise AssertionError()
def get_data(i, num=0):
    """
    Creates a list of (num) clean HTML news from page (i)  (Stock symbol, news text and data)

    :param i: page number
    :param num: number of wanted news from page (i)
    :return: list of (Stock symbol, news text and data)
    """
    containers = utils.get_news(i)
    news_list = list()
    for i in range(num + 1):
        s = utils.get_text(containers[i])
        if s == "not a stock news":
            continue
        news_list.append(s)
    return news_list
Exemplo n.º 34
0
def haikus_for_document(filename):
    """
    Analyzes a document for haikus. Returns a list of tuples.
    """
    text = get_text(filename)
    haikus = []
    # SpaCy has a maximum text size of 1,000,000 characters.
    # Let's use one fewer to be on the safe side.
    for chunk in chunks(
            text,
            999_999):  # this underscore syntax was introduced in Python 3.6
        doc = nlp(chunk)
        for sent in doc.sents:
            haiku = check_haiku(sent)
            if haiku:
                haikus.append(haiku)
Exemplo n.º 35
0
def text():
    if request.method == 'POST':
        # saving the recived pdf file to upload folder
        f = request.files['file']
        f.save(
            os.path.join(app.config['UPLOAD_FOLDER'],
                         secure_filename(f.filename)))

        # making pathlib 'Path' object to send to helper function
        path = Path(os.getcwd() + "/app/api_uploaded_files/" + f.filename)

        if (not path.is_file()) or (path.suffix != ".pdf"):
            return jsonify(error=400, text="Invalid Request"), 400

        text = get_text(path)

        return jsonify(pdf_name=secure_filename(f.filename), text=text)

    return render_template('index.html')
Exemplo n.º 36
0
 def run(self):
     jsonDiff=utils.load_json(DIFF_JSON)
     window=sublime.active_window()
     view=window.active_view()
     text=utils.get_text()
     filename=view.file_name()
     rutaCarpeta=get_folder(filename)
     utils.create_folder_if_not_exist(rutaCarpeta)
     nombreArchivo=time.strftime("%Y%m%d%H%M%S")
     lista=os.listdir(rutaCarpeta)
     escribir=True
     if lista:
         ultimo=max(lista)
         if filecmp.cmp(rutaCarpeta+os.sep+ultimo, filename):escribir=False
     if escribir:
         print("guardando version...")
         rutaArchivo=rutaCarpeta+os.sep+nombreArchivo
         shutil.copyfile(filename, rutaArchivo)
         jsonDiff[filename]=nombreArchivo
         utils.save_json(DIFF_JSON, jsonDiff)
Exemplo n.º 37
0
 def _enumerate_workspaces(self):
     if not self.valid:
         sd_l.error("The service document didn't pass the SWORD2 validation steps ('MUST' statements in spec). The workspaces and collections will not be enumerated.")
         return
     
     if self.sd_uri:
         sd_l.info("Enumerating workspaces and collections from the service document for %s" % self.sd_uri)
     
     # Reset the internally cached set
     self.workspaces = []
     for workspace in self.service_dom.findall(NS['app'] % "workspace"):
         workspace_title = get_text(workspace, NS['atom'] % 'title')
         sd_l.debug("Found workspace '%s'" % workspace_title)
         collections = []
         for collection_element in workspace.findall(NS['app'] % 'collection'):
             # app:collection + sword extensions
             c = SDCollection()
             c.load_from_etree(collection_element)
             
             collections.append(c)
         self.workspaces.append( (workspace_title, collections) )   # Add tuple
Exemplo n.º 38
0
  def movie(self):
    try:
      director = utils.get_text(self._tree, s.XPATH_DIRECTOR)
      genre = utils.get_text(self._tree, s.XPATH_GENRE)
      length = utils.get_text(self._tree, s.XPATH_LENGTH)
    except IndexError:
      try:
        director = utils.get_text(self._tree, s.XPATH_DIRECTOR_2)
        genre = utils.get_text(self._tree, s.XPATH_GENRE_2)
        length = utils.get_text(self._tree, s.XPATH_LENGTH_2)
      except IndexError:
        raise ScraperException()

    name_desc = utils.get_text(self._tree, s.XPATH_NAME_DESC).splitlines()
    cleaned_name_desc = utils.clean_name_and_desc(name_desc)
    name = cleaned_name_desc[0]
    desc = ''.join(cleaned_name_desc[1:])

    return Movie(name=name, desc=desc, director=director, genre=genre, length=length)
Exemplo n.º 39
0
	def __init__(self):
		text=utils.get_text()
		declaraciones=re.findall("\<([A-Z][a-z]*)\>", text)
		text=re.sub("\<([A-Z][a-z]*)\>", "", text)
		text=text.replace("< ", "").replace(" >", "").replace("<=", "").replace(">=", "")
		#text=self.clean(text)
		text=text.replace("abstract ", " ").replace("final ", " ").replace("public ", "").replace("private ", "").replace("protected ", "").replace("synchronized ", "").replace("volatile ", "").replace("class ", "public class").replace("static ", "").replace("< ", "").replace("> ", "").replace("<=", "").replace(">=", "")
		tipos=[]
		variables={}
		#print("jojo")
		#print("el texto es : "+text)
		#print(re.findall("\n[\t ]*([A-Z][\w]*[ ]+[\w_]+)", text))
		declaraciones+=re.findall("\n[\t ]*([A-Z][\w]*[ ]+[\w_]+)", text) + re.findall("[^\w]([A-Z][\w]*)\.", text) + re.findall("\(\s*([A-Z][\w]*[ ]+[\w_]+)", text) + re.findall(",\s*([A-Z][\w]*[ ]+[\w_]+)", text) + re.findall("\n[ \t]*@([A-Z][\w]*)", text) + re.findall("new ([A-Z][\w]*)", text) + re.findall("implements ([A-Z][\w]*)", text) + re.findall("extends ([A-Z][\w]*)", text) + re.findall("throws ([A-Z][\w]*)", text) +re.findall("\<([A-Z][\w]*)\>", text)
		#print(declaraciones)
		#text=self.clean(text)

		this=re.findall("extends ([A-Z][\w]*)", text)
		if this:declaraciones+=[this[0]+" this"]
		#print(declaraciones)
		for declaracion in declaraciones:
			declaracion=declaracion.strip()
			pos=declaracion.find(" ")
			if pos!=-1:
				tipo=declaracion[:pos]
				variable=declaracion[pos+1:]
				variables[variable]=tipo
				tipos.append(tipo)
			else:tipos.append(declaracion)
		newtipos=[]
		tipos=list(set(tipos))
		for tipo in tipos:
			if re.findall("import [\w.]+\.%s;"%(tipo), text):
				#print("ya esta exportado******"+tipo)
				continue
			newtipos.append(tipo)
		tipos=newtipos
		self.tipos=tipos
		self.variables=variables
Exemplo n.º 40
0
    def load_from_etree(self, collection):
        """
        Parse an `etree.SubElement` into attributes in this object.
        
        Also, caches the most recently used DOM object it is passed in
        `self.dom`
        """
        self._reset()
        self.dom = collection
        self.title = get_text(collection, NS['atom'] % 'title')
        # MUST have href attribute
        self.href = collection.attrib.get('href', None)
        # Accept and Accept multipart
        for accept in collection.findall(NS['app'] % 'accept'):
            if accept.attrib.get("alternate", None) == "multipart-related":
                self.accept_multipart.append(accept.text)
            else:
                self.accept.append(accept.text)
        # Categories
        for category_element in collection.findall(NS['atom'] % 'category'):
            self.categories.append(Category(dom=category_element))
        # SWORD extensions:
        self.collectionPolicy = get_text(collection,
                                         NS['sword'] % 'collectionPolicy')

        # Mediation: True/False
        mediation = get_text(collection, NS['sword'] % 'mediation')
        self.mediation = mediation.lower() == "true"

        self.treatment = get_text(collection, NS['sword'] % 'treatment')
        self.description = get_text(collection, NS['dcterms'] % 'abstract')
        self.service = get_text(collection,
                                NS['sword'] % 'service',
                                plural=True)
        self.acceptPackaging = get_text(collection,
                                        NS['sword'] % 'acceptPackaging',
                                        plural=True)

        # Log collection details:
        coll_l.debug(unicode(self))
Exemplo n.º 41
0
    def on_pre_save(self, view):
        lang=utils.get_language()
        if lang!="javascript" and lang!="nodejs":return
        text=utils.get_text() 

        
        text=re.sub("\$\([\"'.\w#-]*\)", "jQuery", text)
        functions=re.findall("([$A-Za-z]+)\.([\w]+)\(", text)

        jsonPath=sublime.packages_path()+os.sep+"javascript"+os.sep+"functions.json"
        if lang=="nodejs":jsonPath=sublime.packages_path()+os.sep+"javascript"+os.sep+"functions_node.json"

        d=utils.load_json(jsonPath)
        for function in functions:
            key=function[0]
            if key=="$scope":continue
            value=function[1]+"()"
            if not d.get(key):d[key]=[]
            if not value in d[key]:
                d[key].append(value)
        utils.save_json(jsonPath, d)

                
Exemplo n.º 42
0
 def on_chat_message(self, msg):
     self.save_message(msg, skip_reply=True)
     
     self.stage().on_chat_message(msg, u.get_text(msg))
    def seleccionarVista(self, index):
        if index==-1:return
        archivo=open(self.vistas[index])
        texto=texto=archivo.read()
        self.text=utils.get_text()
        archivo.close()
        nombreClase=re.findall("public\s+class\s+([\w]+)", self.text, flags=re.IGNORECASE)
        claseHereda=re.findall("public\s+class\s+[\w]+\s+extends\s+([\w]+)", self.text, flags=re.IGNORECASE)
        
        nombreClase=nombreClase[0]
        if claseHereda:
            claseHereda=claseHereda[0]
            archivos=utils.get_files({"match":claseHereda+".java", "ignores":["target", "build", ".svn", ".git", "bin"]})
            print("los archivos encontrados son : ")
            print(archivos)
            if archivos:self.text+=open(archivos[0]).read()
        print("el nombre de la clase es : "+nombreClase)
            #{"ext":"java", "ignores":["target", "build", ".svn", ".git", "bin"]}
        
        reg_listener='listener=\s*"#\{%s\.([\w]+)\}"'%nombreClase
        reg_actionListener='actionListener=\s*"#\{%s\.([\w]+)\}"'%nombreClase
        reg_action='action=\s*"#\{%s\.([\w]+)\}"'%nombreClase
        reg_complete_method='completeMethod=\s*"#\{%s\.([\w]+)\}"'%nombreClase
        
        metodos=re.findall(reg_listener, texto, flags=re.IGNORECASE)
        metodos+=re.findall(reg_actionListener, texto, flags=re.IGNORECASE)
        metodos+=re.findall(reg_complete_method, texto, flags=re.IGNORECASE)
        metodos+=re.findall(reg_action, texto, flags=re.IGNORECASE)

        
        texto=re.sub(reg_listener, "", texto,flags=re.IGNORECASE)
        texto=re.sub(reg_actionListener, "", texto,flags=re.IGNORECASE)
        texto=re.sub(reg_action, "", texto,flags=re.IGNORECASE)
        texto=re.sub(reg_complete_method, "", texto,flags=re.IGNORECASE)
        
        atributos=re.findall("#\{%s\.([\w]+)\}"%nombreClase, texto, flags=re.IGNORECASE)

        atributos=list(set(atributos))
        metodos=list(set(metodos))
        self.generado=""
        print(atributos)
        print(metodos)
        self.listAtributos=[]
        self.listMetodos=[]
        self.total=0
        self.i=0

        for atributo in atributos:
            if self.text.find("get"+atributo[0].upper()+atributo[1:]+"(")==-1:
                self.listAtributos.append([atributo])
                self.total+=1
        
        for metodo in metodos:
            if self.text.find(metodo)==-1:
                self.listMetodos.append(metodo)
        
        print(self.listAtributos)
        print(self.listMetodos)
        if not self.listAtributos and self.listMetodos:
            self.llenar()
            return
        if not self.listAtributos and not self.listMetodos:return
        self.pedir()
Exemplo n.º 44
0
def get_all_page_data(url, is_community=False):

    name = url.split("/")[-1] if len(
        url.split("/")[-1]) > 0 else url.split("/")[-2]

    if is_community:
        name = os.path.join(name, "community")
        url = url + "/community"

    data_path = os.path.join(".", "data")
    if not os.path.exists(data_path):
        os.mkdir(data_path)

    page_data_path = os.path.join(data_path, name)
    if not os.path.exists(page_data_path):
        os.mkdir(page_data_path)

    should_scrape_headless = is_community == False
    driver = initialize_driver(args.chrome,
                               args.windows,
                               is_headless=should_scrape_headless)

    driver.get(url)

    page_name = get_text(driver, './/a[@class="_64-f"]')

    print(f"Scrolling {url} until {cutoff_date}")

    scroll(driver, pd.to_datetime(cutoff_date))

    posts = driver.find_elements_by_xpath(
        '//div[contains(@class, "userContentWrapper")]')

    post_links = [get_post_links(post) for post in tqdm(posts)]

    post_links = list(set(post_links))

    with open(os.path.join(page_data_path, 'post_links.json'), 'w') as f:
        json.dump(post_links, f)

    driver.quit()

    print(f"Now scraping {len(post_links)} posts from {name}")

    for i, post_link in enumerate(post_links):

        if not is_string_url(post_link):
            continue

        print(f"Scraping {post_link}")

        driver = initialize_driver(args.chrome, args.windows)

        driver.get(post_link)

        if "/videos/" in post_link:
            post_type = "videos"
        elif "/photos/" in post_link:
            post_type = "photos"
        elif "/posts/" in post_link:
            post_type = "posts"
        elif "/notes/" in post_link:
            post_type = "notes"
        else:
            post_type = "other"

        if post_type == "notes":
            post_element = driver.find_element_by_xpath(
                './/div[contains(@class, "fb_content")]')
        else:
            post_element = driver.find_element_by_xpath(
                './/div[contains(@class, "userContentWrapper")]')

        post_data = get_post_data(driver, post_element, post_type)

        post_data["page_name"] = page_name

        with open(os.path.join(page_data_path, f'page_post_{i}.json'),
                  'w') as f:
            json.dump(post_data, f)

        driver.quit()

    if not is_community:
        get_all_page_data(url, is_community=True)
Exemplo n.º 45
0
 def run(self, edit):
     texto=""
     for linea in reversed(utils.get_text().splitlines())  :
         texto+=linea+"\n"
     utils.set_text(texto)
Exemplo n.º 46
0
#!/usr/bin/env python3

from utils import get_text

input = get_text('13')
# input = [
#     '939',
# #     # '7,13,x,x,59,x,31,19'
# #     '17,x,13,19'
# #     # '67,7,x,59,61'
#     '1789,37,47,1889'
# ]

time = int(input[0])
original = input[1].split(',')
buses = list(map(lambda x: int(x), filter(lambda x: x != 'x', original)))
arrivals = []
for bus in buses:
    prev = time // bus
    arrivals.append({
        'id': bus,
        'arrival': bus * (prev + 1)
    })

earliest = next(filter(lambda x: x['arrival'] > time, sorted(arrivals, key=lambda k: k['arrival'])))
print(earliest['id'] * (earliest['arrival'] - time))

def closure(idx, num, sign):
    def f(x):
        # print(f"x = {x} idx = {idx * sign} num = {num}")
        return (x + (idx * sign)) % num == 0
Exemplo n.º 47
0
    def run(self, edit):
        window=sublime.active_window()
        view=window.active_view()
        self.lang=utils.get_language()

        self.regMetodos={
            "python":"def\\s+%(nombre)s\\(",
            "python3":"def\\s+%(nombre)s\\(",
            "ruby":"def\\s+%(nombre)s",
            "java":"[\w].+\s+%(nombre)s\(",
            "javascript":"function\\s*%(nombre)s\\(|%(nombre)s\\s*=\\s*function\\(",
            "nodejs":"function\\s*%(nombre)s\\(|%(nombre)s\\s*=\\s*function\\(",
            "c":"\\b%(nombre)s\\([^)]*\\)\\s*\\n?\\s*\\{",
            "c#":"\\b%(nombre)s\\([^)]*\\)\\s*\\n?\\s*\\{",
            "c++":"\\b%(nombre)s\\([^)]*\\)\\s*\\n?\\s*\\{"
        }

        self.regVariables={
            "python":"\\b%(nombre)s\\s*=[^=]?|\\b%(nombre)s\\s+in\\s+|def [\\w_]+\\(.*\\b%(nombre)s",
            "python3":"\\b%(nombre)s\\s*=[^=]?|\\b%(nombre)s\\s+in\\s+|def [\\w_]+\\(.*\\b%(nombre)s",
            "ruby":"\\b%(nombre)s\\s*=[^=]?|\\b%(nombre)s\\s+in\\s+|def [\\w_]+\\(.*\\b%(nombre)s",
            "java":"\\b%(nombre)s\\s*=[^=]?|[\\w]+\\s+%(nombre)s;|[\\w]+\\s+%(nombre)s,",
            "javascript":"\\b%(nombre)s\\s*=[^=]?|var+\\s+%(nombre)s;|var+\\s+%(nombre)s,",
            "nodejs":"\\b%(nombre)s\\s*=[^=]?|var+\\s+%(nombre)s;|var+\\s+%(nombre)s,",
            "c":"\\b%(nombre)s\\s*=[^=]?|[\\w]+\\s+%(nombre)s;|[\\w]+\\s+%(nombre)s,",
            "c#":"\\b%(nombre)s\\s*=[^=]?|[\\w]+\\s+%(nombre)s;|[\\w]+\\s+%(nombre)s,",
            "c++":"\\b%(nombre)s\\s*=[^=]?|[\\w]+\\s+%(nombre)s;|[\\w]+\\s+%(nombre)s,",
            "jsf":'id\\s*=\\s*"%(nombre)s"'
        }

        self.comentarios={
            "python":'#[^\\n]\\n|"""[^"]"""',
            "python3":'#[^\\n]\\n|"""[^"]"""',
            "ruby":'#[^\\n]\\n|"""[^"]"""',
            "java":"//[^\\n]\\n|/[*][^/]*[*]/",
            "javascript":"//[^\\n]\\n|/[*][^/]*[*]/",
            "nodejs":"//[^\\n]\\n|/[*][^/]*[*]/",
            "c":"//[^\\n]\\n|/[*][^/]*[*]/",
            "c#":"//[^\\n]\\n|/[*][^/]*[*]/",
            "c++":"//[^\\n]\\n|/[*][^/]*[*]/",
            "jsf":"<!--[^-]->"
        }

        var=utils.get_word_signature()
        print(var)
        isMethod=utils.is_method()
        isUnique=var.find(".")==-1
        if self.lang=="python" and var.startswith("self."):
            isUnique=True
            var=var[var.find(".")+1:]
        elif self.lang=="java" and var.startswith("this."):
            isUnique=True
            var=var[var.find(".")+1:]

        if isMethod:
            if isUnique:self.goto_method(var)
            else:self.goto_class_method(var[:var.find(".")], var[var.find(".")+1:])
        else:
            if isUnique:
                self.goto_definition(var)
                paquete=re.findall("import\s+([\w._]+\."+var+");", utils.get_text(), flags=re.IGNORECASE)
                if var[0].isupper() and paquete:
                    print("va hacia la clase")
                    self.goto_class(paquete[0])
            else:
                self.goto_class_definition(var[:var.find(".")], var[var.find(".")+1:])
                print("no unico")
def process_file(filename, output_path=None, lang='sk', verbose=True):
    xmldoc = ET.parse(filename)
    root = xmldoc.getroot()
    organizacnaJednotka = root.find('organizacnaJednotka').text
    ilisty = root.find('informacneListy')
    if verbose:
        print "  Nasiel som %d informacnych listov." % len(ilisty.findall('informacnyList'))

    # elementy, ktore sa budu parsovat z XML-ka
    # kluc => XPath (kluc sa pouziva neskor v template)
    elements = {'kod': 'kod', 'nazov': 'nazov', 'kredit': 'kredit',
                'sposobUkoncenia': 'sposobUkoncenia',
                'studijnyProgram': 'studijneProgramy/studijnyProgram/popis',
                'datumSchvalenia': 'datumSchvalenia', 'obsahovaNapln': '_ON_/texty',
                'vahaHodnotenia': '_VH_/texty', 'garanti': 'garanti/garant/plneMeno'}
    data = []

    # spracovanie informacnych listov jednotlivych predmetov
    for il in ilisty.findall('informacnyList'):
        # preskocime predmety, ktore nie su statne skusky
        if il.find('_ON_') is None:
            continue
        d = {'lang' : lang, 'organizacnaJednotka': organizacnaJednotka}
        for key, path in elements.iteritems():
            if il.find(path) is not None:
                if path.startswith('_'):
                    d[key] = utils.get_text(il.find(path))
                elif key == 'studijnyProgram':
                    d[key] = [el.text for el in il.findall(path)]
                else:
                    d[key] = il.find(path).text
            else:
                d[key] = ''

        # uprava kodov predmetov
        d['kod'] = utils.parse_code(d['kod'])

        data.append(d)

    # nacitanie HTML sablony
    script_abs_path = os.path.dirname(os.path.abspath(__file__))
    tpl_path = os.path.join(script_abs_path, 'templates')
    env = Environment(loader=FileSystemLoader(tpl_path))

    tpl_name = 'template_statne-skusky_table_%s.html' % lang
    html_tpl = env.get_template(tpl_name)

    # zapis do suborov
    for course in data:
        kod_predmetu = course['kod']

        html = html_tpl.render(course)

	filename = '%s.html' % kod_predmetu
        if output_path is not None:
	    path = os.path.join(output_path, filename)
            if not os.path.exists(output_path):
                os.mkdir(output_path)
        else:
	    path = filename
        with open(path, 'w') as f:
            f.write(html.encode('utf8'))
Exemplo n.º 49
0
    counts = {}
    
    for char in s:
        
        if char in counts:
            counts[char] += 1
        else:
            counts[char] = 1
            
    return counts

if __name__ == '__main__':
        
    # Look for least common characters.

    counts = count_characters(utils.get_text(2))
    print counts
    counts_sorted = sorted(counts, key=counts.get)
    print counts_sorted
    print ''.join(counts_sorted[0:8])

    # Try to shift least common characters.

    import challenge_1

    s = ''.join(counts_sorted[0:8])
    s = challenge_1.shift(s,2)
    print s.lower()
    
    # aeilquty -> equality
Exemplo n.º 50
0
#!/usr/bin/env python3

from utils import get_text
from pprint import pprint

input = get_text('11')

# input = [
#   "L.LL.LL.LL",
#   "LLLLLLL.LL",
#   "L.L.L..L..",
#   "LLLL.LL.LL",
#   "L.LL.LL.LL",
#   "L.LLLLL.LL",
#   "..L.L.....",
#   "LLLLLLLLLL",
#   "L.LLLLLL.L",
#   "L.LLLLL.LL",
# ]

def count_occurences(src, y, x, n):
    result = 0
    height = len(src)
    width = len(src[0])
    
    miny = y - 1 if y - 1 >= 0 else 0
    maxy = y + 1 if y + 1 < height else height - 1

    minx = x - 1 if x - 1 >= 0 else 0 
    maxx = x + 1 if x + 1 < width else width - 1
Exemplo n.º 51
0
    for command in commands:
        direction, distance = command.split(' ')
        if direction == 'forward':
            position[0] += int(distance)
        elif direction == 'up':
            position[1] -= int(distance)
        else:
            position[1] += int(distance)

    return position[0] * position[1]


def part_two(commands):
    horizontal, vertical, aim = [0, 0, 0]
    for command in commands:
        direction, units = command.split(' ')
        if direction == 'forward':
            horizontal += int(units)
            vertical += (aim * int(units))
        elif direction == 'up':
            aim -= int(units)
        else:
            aim += int(units)

    return horizontal * vertical


data = get_text('02')
print(part_one(data))
print(part_two(data))
Exemplo n.º 52
0
 def save(self, name):
     if name==None:return
     print("antes de : "+self.rutaSamples)
     samples=utils.load_json(self.rutaSamples)
     samples[name]=utils.get_text()
     utils.save_json(self.rutaSamples, samples)
import nltk
import pandas as pd
import string
from argparse import ArgumentParser
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from utils import open_csvs, read_csvs, get_text

parser = ArgumentParser()
parser.add_argument('-f', dest='data_path')
parser.add_argument('-o', dest='out_file')
args = parser.parse_args()

if __name__ == '__main__':
    print("Reading data")
    data = read_csvs(args.data_path)
    documents = get_text(data)

    sid = SentimentIntensityAnalyzer()

    with open(args.out_file, 'w') as f:
        for x in messages:
            ss = sid.polarity_scores(x)
            out_line = x + ',' + str(ss['compound']) + '\n'
Exemplo n.º 54
0
#!/usr/bin/env python3

from utils import get_text

input = get_text('02')
store = []

for line in input:
    r, t, p = line.split(' ')
    mn, mx = map(int, r.split('-'))
    term = t.rstrip(':')
    store.append({'min': mn, 'max': mx, 'term': term, 'password': p})

result = 0

for entry in store:
    occurences = entry['password'].count(entry['term'])
    if entry['min'] <= occurences <= entry['max']:
        result += 1

print(result)

result = 0

for entry in store:
    first = entry['password'][entry['min'] - 1] == entry['term']
    second = entry['password'][entry['max'] - 1] == entry['term']
    if first ^ second:
        result += 1

print(result)
Exemplo n.º 55
0
    def validate(self):
        valid = True
        if not self.parsed:
            return False
        # The SWORD server MUST specify the sword:version element with a value of 2.0
        # -- MUST have sword:version element
        # -- MUST have value of '2.0'
        self.version = get_text(self.service_dom, NS["sword"] % "version")
        if self.version:
            if self.version != "2.0":
                # Not a SWORD2 server...
                # Fail here?
                sd_l.error(
                    "The service document states that the server's endpoint is not SWORD 2.0 - stated version:%s"
                    % self.version
                )
                valid = False
        else:
            sd_l.error("The service document did not have a sword:version")
            valid = False

        # The SWORD server MAY specify the sword:maxUploadSize (in kB) of content that can be uploaded in one request [SWORD003] as a child of the app:service element. If provided this MUST contain an integer.
        maxupload = get_text(self.service_dom, NS["sword"] % "maxUploadSize")
        if maxupload:
            try:
                self.maxUploadSize = int(maxupload)
            except ValueError:
                # Unparsable as an integer. Enough to fail a validation?
                # Strictly... yep
                sd_l.error("The service document did not have maximum upload size parseable as an integer.")
                valid = False

        # Check for the first workspace for a collection element, just to make sure there is something there.
        test_workspace = self.service_dom.find(NS["app"] % "workspace")
        if test_workspace != None:
            sd_l.debug("At least one app:workspace found, with at least one app:collection within it.")
        else:
            valid = False
            sd_l.error("Could not find a app:workspace element in the service document.")

        # The SWORD server MUST specify the app:accept element for the app:collection element.
        # If the Collection can take any format content type, it should specify */* as its
        # value [AtomPub]. It MUST also specify an app:accept element with an alternate attribute
        # set to multipart-related as required by [AtomMultipart]. The formats specified by
        # app:accept and app:accept@alternate="multipart-related" are RECOMMENDED to be the same.
        workspaces = self.service_dom.findall(NS["app"] % "workspace")
        if workspaces is not None:
            for workspace in workspaces:
                cols = workspace.findall(NS["app"] % "collection")
                for col in cols:
                    # the collection may contain a sub-service document, which means it is not
                    # beholden to the rules above
                    service = col.find(NS["sword"] % "service")
                    if service is not None:
                        continue

                    # since we have no sub-service document, we must validate
                    accept_valid = True
                    multipart_accept_valid = True
                    accepts = col.findall(NS["app"] % "accept")
                    for accept in accepts:
                        multipart = accept.get("alternate")
                        if multipart is not None:
                            if multipart != "multipart-related":
                                multipart_accept_valid = False
                                sd_l.debug("Multipart accept alternate is incorrect: " + str(multipart))
                        else:
                            # FIXME: we could test to see if the content is viable, but probably that's pointless
                            pass

                    if not multipart_accept_valid or not accept_valid:
                        sd_l.debug("Either the multipart accept or the accept fields were invalid (see above debug)")
                        valid = False

        return valid
Exemplo n.º 56
0
    def get_abv(self):
        ''' Attempts to find percentage of alcohol by volume using Bing '''
        abv = ''
        found_abv = ''

        ''' A ceiling for ABV content for validation

            We can assume BevMo does not offer kegs with this high of an ABV
        '''
        max_abv = 20.0

        if not self.parsed:
            self.parse()

        search_url = 'https://www.bing.com/search?q={0}+alcohol+content\
                     '.format('+'.join(self.name.split()))
        search_links = get_html(search_url).xpath('//a/@href')
        new_search_links = search_links[search_links.index('javascript:'):][1:]

        results = [x for x in new_search_links if x != '#' and 'site:' not in x]

        ''' Max number of links to search for alcohol by volume (ABV) '''
        num_attempts = self.num_attempts

        ''' Filter links with same domain to improve chances of matching '''
        searched_domains = set()

        ''' Add the top page results that are unique, r_it is an iterator '''
        top_results = []
        r_it = 0
        result_link = ''

        while len(top_results) < num_attempts and r_it < len(results):
            result_link = results[r_it]
            domain = '{url.netloc}'.format(url=urlparse(result_link))
            if '.' in domain:
                if domain.count('.') > 1:
                    domain = domain.split('.')[1]
                else:
                    domain = domain.split('.')[0]

            ''' Avoid already searched domains '''
            if domain in searched_domains:
                r_it += 1
            else:
                top_results.append(result_link)
                r_it += 1
                searched_domains.add(domain)

        for i in xrange(min(num_attempts, len(top_results))):
            if self.verbose:
                print('Searching {}'.format(top_results[i]))

            try:
                search_text = ''.join(get_text(get_html(top_results[i])))
            except Exception:
                continue

            ''' Retrieves partial string containing the words ABV and a % '''
            abv = re.search('(?<=[Aa][Bb][Vv])[^\d]*(\d+[.]?\d*)(?=%)|(?<=%)\
                            [^\d]*(\d+[.]?\d*)[^\d]*\
                            (?=[Aa][Bb][Cc])', search_text)
            if abv:
                abv = abv.group()

                ''' Filters for a number with or without a decimal pt '''
                abv = float(re.search('(\d+[.]?\d*)', abv).group())

                ''' If new ABV is 0.0, return previously found ABV if any
                    otherwise, move onto the next link
                '''
                if abv == 0.0:
                    if found_abv:
                        if self.verbose:
                            print('ABV for {} is {}'.format(self.name, abv))
                    else:
                        continue

                if abv < max_abv:
                    if abv < max_abv / 2:
                        if self.verbose:
                            print('ABV for {} is {}'.format(self.name, abv))

                        return abv

                    ''' Replace the new ABV only if the next is lower '''
                    if found_abv:
                        if abv < found_abv:
                            if self.verbose:
                                print('ABV for {} is {}'.format(self.name, abv))

                            return abv
                        else:
                            if self.verbose:
                                print('ABV for {} is {}\
                                      '.format(self.name, found_abv))

                            return found_abv

                    ''' Sets the new ABV to the found ABV '''
                    found_abv = abv
            else:
                if found_abv:
                    if self.verbose:
                        print('ABV for {} is {}'.format(self.name, found_abv))
                    return found_abv

        ''' No ABV was found by this point '''
        if self.verbose:
            print('ABV not found for {}'.format(self.name))

        return None
Exemplo n.º 57
0
     pos = doc.find('</p>', pos)
 
 elif tag == '>SEÇÃO':
     secao += 1
     subsecao = 0
     pos = doc.find('</p>', pos)
 
 elif tag == '>Subseção':
     subsecao += 1
     pos = doc.find('</p>', pos)
 
 elif tag[:3] == 'Art':
     artigo += 1
     idt += 1
     subartigo, inciso, paragrafo = 0, 0, 0
     t = get_text(doc, pos)
     g.writerow([idt, livro, titulo, capitulo, secao, subsecao, artigo,
                 subartigo, paragrafo, inciso, alinea, t])
     pos = doc.find('</p>', pos)
 
 elif tag == 'SubArtigo':
     i = pos
     tmp = ''
     while doc[i] != '-':
         if doc[i] in '0987654321':
             tmp += doc[i]
         i += 1
     if int(tmp) != artigo:
         artigo = int(tmp)
     
     subartigo += 1
Exemplo n.º 58
0
 def run(self, edit):
     utils.set_text(sublime.encode_value(sublime.decode_value(utils.get_text()), True))
Exemplo n.º 59
0
def train(config, sample_validation_batches):
    source_language = config.get('src_language')
    target_language = config.get('trg_language')
    EOS_token = config.get('EOS_token')
    PAD_token = config.get('PAD_token')
    SOS_token = config.get('SOS_token')
    train_iter = config.get('train_iter')
    val_iter = config.get('val_iter')
    writer_path = config.get('writer_path')
    writer_train_path = get_or_create_dir(writer_path, 'train')
    writer_val_path = get_or_create_dir(writer_path, 'val')
    writer_train = SummaryWriter(log_dir=writer_train_path)
    writer_val = SummaryWriter(log_dir=writer_val_path)
    epochs = config.get('epochs')
    training = config.get('training')
    eval_every = training.get('eval_every')
    sample_every = training.get('sample_every')
    use_attention = config.get('use_attention')
    step = 1
    for epoch in range(epochs):
        print(f'Epoch: {epoch+1}/{epochs}')
        save_weights(config)
        for i, training_batch in enumerate(train_iter):
            loss = train_batch(config, training_batch)
            writer_train.add_scalar('loss', loss, step)

            if step == 1 or step % eval_every == 0:
                val_lengths = 0
                val_losses = 0
                reference_corpus = []
                translation_corpus = []
                for val_batch in val_iter:
                    val_loss, translations = evaluate_batch(config, val_batch)
                    val_lengths += 1
                    val_losses += val_loss
                    val_batch_trg, _ = val_batch.trg
                    _, batch_size = val_batch_trg.shape
                    references = map(
                        lambda i: torch2words(target_language,
                                              val_batch_trg[:, i]),
                        range(batch_size))
                    references = map(
                        lambda words: [
                            list(
                                filter_words(words, SOS_token, EOS_token,
                                             PAD_token))
                        ], references)
                    reference_corpus.extend(references)
                    translations = map(
                        lambda translation: list2words(
                            target_language, translation), translations)
                    translations = map(
                        lambda words: list(
                            filter_words(words, SOS_token, EOS_token, PAD_token
                                         )), translations)
                    translation_corpus.extend(translations)
                bleu = compute_bleu(reference_corpus, translation_corpus)
                val_loss = val_losses / val_lengths
                writer_val.add_scalar('bleu', bleu, step)
                writer_val.add_scalar('loss', val_loss, step)

            if step % sample_every == 0:
                val_batch = sample_validation_batches(1)
                val_batch_src, val_lengths_src = val_batch.src
                val_batch_trg, _ = val_batch.trg
                s0 = val_lengths_src[0].item()
                _, translations, attention_weights = evaluate_batch(
                    config, val_batch, True)
                source_words = torch2words(source_language, val_batch_src[:,
                                                                          0])
                target_words = torch2words(target_language, val_batch_trg[:,
                                                                          0])
                translation_words = list(
                    filter(lambda word: word != PAD_token,
                           list2words(target_language, translations[0])))
                if use_attention and sum(attention_weights.shape) != 0:
                    attention_figure = visualize_attention(
                        source_words[:s0], translation_words,
                        with_cpu(attention_weights))
                    writer_val.add_figure('attention', attention_figure, step)
                text = get_text(source_words, target_words, translation_words,
                                SOS_token, EOS_token, PAD_token)
                writer_val.add_text('translation', text, step)

            step += 1

    save_weights(config)
Exemplo n.º 60
0
def extract_infolists(filename, lang='sk', mode='regular', webpages={}, verbose=True):
    """Extract all infolists with all of their courses from a study program XML file.

    Params:
        filename: path to the XML file
        lang: language

    Returns:
        list of infolists with cou dics
    """
    xmldoc = ET.parse(filename)
    root = xmldoc.getroot()
    organizacnaJednotka = root.find('organizacnaJednotka').text
    vysokaSkola = root.find('vysokaSkola').text
    fakulta = root.find('fakulta').text
    ilisty = root.find('informacneListy')
    if verbose:
        print "  Nasiel som %d informacnych listov." % len(ilisty.findall('informacnyList'))

    # elementy, ktore sa budu parsovat z XML-ka
    # kluc => XPath (kluc sa pouziva neskor v template)
    elements = {'kod': 'kod',
                'nazov': 'nazov',
                'kredit': 'kredit',
                'sposobVyucby': 'sposobVyucby',
                'rozsahTyzdenny': 'rozsahTyzdenny',
                'rozsahSemestranly': 'rozsahSemestranly',
                'rokRocnikStudPlan': 'rokRocnikStudPlan',
                'kodSemesterStudPlan': 'kodSemesterStudPlan',
                'sposobUkoncenia': 'sposobUkoncenia',
                'studijnyProgram': 'studijneProgramy/studijnyProgram/popis',
                'podmienujucePredmety': 'podmienujucePredmety',
                'vylucujucePredmety': 'vylucujucePredmety',
                'doplujuceUdaje': 'doplujuceUdaje',
                'zabezpecuju': 'zabezpecuju',
                'strucnaOsnova': '_SO_/texty',
                'ciel': '_C_/texty',
                'zaverecneHodnotenie': '_Z_/texty/p',
                'literatura': '_L_/texty',
                'priebezneHodnotenie': '_P_/texty/p',
                'obsahovaPrerekvizita': '_O_/texty',
                'sylabus': '_S_/texty',
                'datumSchvalenia': 'datumSchvalenia', 
                'vahaHodnotenia': '_VH_/texty/p',
                'garanti': 'garanti/garant/plneMeno',
                'jazyk': '_PJ_/texty/p',
                'obsahovaNapln': '_ON_/texty',
                'podmienkyAbsolvovania': '_PA_/texty',
                'vysledkyVzdelavania': '_VV_/texty'   }
    data = []

    # spracovanie informacnych listov jednotlivych predmetov
    for il in ilisty.findall('informacnyList'):
        # preskocime statne skusky, tie sa spracuvaju inym skriptom
        if mode=='regular' and (il.find('_ON_') is not None):
            continue
        if mode=='statnice' and (il.find('_ON_') is None):
            continue

        d = {'lang' : lang, 
             'organizacnaJednotka': organizacnaJednotka,
             'vysokaSkola': vysokaSkola,
             'fakulta': fakulta }
        for key, path in elements.iteritems():
            if il.find(path) is not None:
                if key != 'vahaHodnotenia' and path.startswith('_'):
                    d[key] = utils.get_text(il.find(path))
                elif key in ['studijnyProgram', 'jazyk']:
                    d[key] = [el.text for el in il.findall(path)]
                    if key == 'jazyk':
                        d[key] = list(set(d[key]))
                else:
                    d[key] = il.find(path).text
            else:
                d[key] = ''

        # uprava kodov predmetov
        d['kod'] = utils.parse_code(d['kod'])

        # domovska stranka predmetu
        if d['kod'] in webpages:
            d['webStranka'] = webpages[d['kod']]

        data.append(d)

    return data