예제 #1
0
  def save_to_csv(self, filename, data):
    """Save this data on csv file by prefecture"""
    row = []
    address = zenhan.z2h(data['address'], zenhan.ALL)
    # remove the zip code
    address = re.sub(r'%s\d+-\d+' % u'〒', '', address).strip()

    row.append(data['name'])
    row.append(data['name_kata'])
    row.append(address)
    row.append(u'\n'.join(data['routes'] or u''))

    row.append(data['prefecture'])
    row.append(data['area'])

    row.append(zenhan.z2h(data['phone'], zenhan.ALL))
    row.append(data['working_hours'])
    row.append(data['holydays'])
    row.append(data['shop_url'])

    row.append(data['credit_cards_comment'])
    row.append(u'・'.join(data['credit_cards'] or u''))

    row.append(data['seats'])
    row.append(data['stylist'])
    row.append(data['parking'])
    row.append(unicode(data['cut_price']))
    row.append(data['page_url'])

    CsvWriter.write_to_csv(filename, row, firs_row=self.first_row)
예제 #2
0
 def normalize(self,text):
     #アルファベット:全角=>半角
     text = zenhan.z2h(text,mode=1)
     #数字:全角=>半角
     text = zenhan.z2h(text,mode=2)
     #カタカナ:半角=>全角
     text = zenhan.h2z(text,mode=4)
     return text
예제 #3
0
def delete_aft(line):
    text = zenhan.z2h(line, mode=1)  #アルファベット(全角→半角)
    text = zenhan.z2h(text, mode=2)  #数字(全角→半角)
    text = zenhan.h2z(text, mode=4)  #カタカナ(半角→全角)

    text = re.sub(
        r'[\u0000-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u3004\u3007-\u303F\u3099-\u30A0\u30FB\u30FD-\u4DFF\uA000-\uE0FFF]',
        "", text)  #その他文字列削除
    return text
예제 #4
0
def delete_symbol(line):
    text = zenhan.z2h(line, mode=1)  #アルファベット(全角→半角)
    text = zenhan.z2h(text, mode=2)  #数字(全角→半角)
    text = zenhan.h2z(text, mode=4)  #カタカナ(半角→全角)

    symbol = re.sub(r'[\u0000-\uE0FFF]', "", text)  #unicode非対応の文字
    text = re.sub(
        r'[\u0000-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u3004\u3007-\u303F\u3099-\u30A0\u30FB\u30FD-\u4DFF\uA000-\uE0FFF]',
        "", text)  #その他文字列削除
    """unicode非対応の文字の削除"""
    if not symbol == "":
        text = re.sub("[%s]" % symbol, "", text)
    return text
예제 #5
0
def conversion_data_format(input_data_dict):
    """変換処理を行いリストに格納し、それを返す処理。
    """

    name = input_data_dict['name'].strip()

    gender = GENDER_MODIFIED_MAP[input_data_dict['gender']]

    birthday = input_data_dict['birthday']

    email = input_data_dict['email']

    tel = zenhan.z2h(input_data_dict['tel']).replace('ー', '-')

    zip_code = input_data_dict['post_code'].replace('ー', '')

    address = input_data_dict['address']

    item_sample = SAMPLE_PRODUCT_MAP[input_data_dict['item_num']]

    reception_datetime = parser.parse(input_data_dict['reception_date'])
    str_reception_datetime = reception_datetime.strftime('%Y/%m/%d %H:%M:%S')
    conversion_format = [
        name, gender, birthday, email, tel, zip_code, address, item_sample,
        str_reception_datetime
    ]
    return conversion_format
예제 #6
0
def normalize(ingredient):
    ingredient = ingredient.strip()

    for SURROUND in SURROUNDS:
        ingredient = SURROUND.sub(lambda s: '', ingredient)

    ingredient = OPTIONAL_START.sub(lambda s: '', ingredient)

    match = UNCLOSED_PAREN.match(ingredient)
    if match:
        ingredient = match.groups()[0]

    ingredient = zenhan.z2h(ingredient, mode=1)  # ascii
    ingredient = zenhan.h2z(ingredient, mode=4)  # kana

    # convert all katakana to hiragana
    ingredient = hiragana(ingredient)

    match = STARTS_WITH_ALPHA.match(ingredient)
    if match and not ingredient.startswith('S&B'):
        ingredient = match.groups()[0]

    for SPECIAL_SYMBOL in SPECIAL_SYMBOLS:
        ingredient = SPECIAL_SYMBOL.sub(lambda s: '', ingredient)

    ingredients = SPLIT.split(ingredient)
    ingredients = map(lambda ingr: ENDS_WITH.sub(lambda s: '', ingr), ingredients)
    ingredients = map(lambda ingr: ingr.strip(), ingredients)
    ingredients = filter(lambda ingr: ingr, ingredients)

    for ingredient in ingredients:
        yield ingredient
예제 #7
0
 def load(self, f):
     for wseq in super(WikiEdaTree, self).load(f):
         eposlist = []
         for i, word in enumerate(wseq.word_list):
             if "misc" in word and len(word["misc"]) > 0:
                 if len(word["misc"]) < 2: # space B/I
                     raise FormattingException("malformed annotation: %s" % "".join(word["misc"]))
                 word["_wpadding"] = word["misc"].pop(0)
                 stype = word["misc"].pop(0)
                 if stype not in ("B", "I"):
                     raise FormattingException("malformed annotation: %s" % "".join(word["misc"]))
                 if stype == "B":
                     word["stype"] = self.WIKI_B
                     word["misc"].pop(0)
                     word["entity"] = "".join(word["misc"])
                     del word["misc"]
                     eposlist.append(i)
                 else:
                     if i <= 0 \
                        or "stype" not in wseq.word_list[i - 1] \
                        or wseq.word_list[i - 1]["stype"] not in (self.WIKI_B, self.WIKI_I):
                         raise FormattingException("malformed annotation: I-without-B: %s" % word["wid"])
                     word["stype"] = self.WIKI_I
                     self.WIKI_I
             else:
                 word["stype"] = self.WIKI_O
         for epos in eposlist:
             mention_orig = wseq.word_list[epos]["surface"]
             for i in xrange(epos + 1, len(wseq.word_list)):
                 if wseq.word_list[i]["stype"] == self.WIKI_I:
                     mention_orig += wseq.word_list[i]["surface"]
                 else:
                     break
             wseq.word_list[epos]["mention"] = z2h(mention_orig, mode=3)
         yield wseq
예제 #8
0
 def clean_text(text):
     # del_n = re.compile('\n')
     # text = del_n.sub('',text)
     text = text.lower()
     text = unicodedata.normalize('NFKC', text)
     text = zenhan.z2h(text, zenhan.ASCII | zenhan.DIGIT)
     return text
예제 #9
0
 def zenNum2hanNum(strings):
     """
     全角数字を半角数字に変換する
     その他の文字はそのまま
     """
     strings = MultiBytes.convert2unicode(strings)
     return zenhan.z2h(strings, mode=2)
예제 #10
0
def clean_text(text):
    # del_n = re.compile('\n')
    # text = del_n.sub('',text)
    text = text.lower()
    text = unicodedata.normalize('NFKC', text)
    text = zenhan.z2h(text,zenhan.ASCII|zenhan.DIGIT)
    return text
예제 #11
0
def setEffect(string, material):
    effect = p.sub("", string)
    effect = effect.replace('-', '-')
    effect = zenhan.z2h(effect, 3)
    effect = effect.replace(material, "")
    effect = effect.replace("。", "。\n")
    return effect
예제 #12
0
def normalize(word):
    word = zenhan.z2h(word.lower(), zenhan.ASCII).strip()
    for t_word in _trim_words:
        if word.startswith(t_word):
            return word.lstrip(t_word)
    else:
        return word
예제 #13
0
def test_zenhan():
    logging.info("=========================================")
    logging.info("=               zenhan                  =")
    logging.info("=========================================")
    test_cases = get_test_cases()
    for tc in test_cases:
        title = tc['title']
        body = tc['body']

        logging.info("ひらがな(全角) to カタカナ(全角) for %s" % title)
        logging.info("Not implemented")

        logging.info("カタカナ(全角) to ひらがな(全角) for %s" % title)
        logging.info("Not implemented")

        logging.info("ひらがな(全角) to カタカナ(半角) for %s" % title)
        logging.info("Not implemented")

        logging.info("半角 to 全角 for %s" % title)
        calc_time(zenhan.h2z, body, zenhan.ASCII|zenhan.KANA|zenhan.DIGIT)
        logging.debug("result: %s" % zenhan.h2z(body, zenhan.ASCII|zenhan.KANA|zenhan.DIGIT))

        logging.info("全角 to 半角 for %s" % title)
        calc_time(zenhan.z2h, body, zenhan.ASCII|zenhan.KANA|zenhan.DIGIT)
        logging.debug("result: %s" % zenhan.z2h(body, zenhan.ASCII|zenhan.KANA|zenhan.DIGIT))
예제 #14
0
def setMaterial(string):
    string = zenhan.z2h(string.replace('-', '-'), 3)
    if '<br' in string:
        string = p.sub("", string[:string.index('<br')])
    else:
        string = p.sub("", string)
    return string
예제 #15
0
def delete_twitter(line):
    text = zenhan.z2h(line, mode=1)  #アルファベット(全角→半角)
    text = zenhan.z2h(text, mode=2)  #数字(全角→半角)
    text = zenhan.h2z(text, mode=4)  #カタカナ(半角→全角)

    symbol = re.sub(r'[\u0000-\uE0FFF]', "", text)  #unicode非対応の文字
    text = re.sub(r'https?://[\w/:%#\$&\?\(\)~\.=\+\-…]+', "", text)  #URL
    text = re.sub(r'@[\w/:%#\$&\?\(\)~\.=\+\-…]+', "", text)  #ユーザ名
    text = re.sub(r'#[\w/:%#\$&\?\(\)~\.=\+\-…]+', "", text)  #ハッシュタグ
    text = re.sub(
        r'[\u0000-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u3004\u3007-\u303F\u3099-\u30A0\u30FB\u30FD-\u4DFF\uA000-\uE0FFF]',
        "", text)  #その他文字列削除
    """unicode非対応の文字の削除"""
    if not symbol == "":
        text = re.sub("[%s]" % symbol, "", text)
    return text
예제 #16
0
def get_tweet(auth, g):
    url = "https://api.twitter.com/1.1/statuses/mentions_timeline.json"

    tweets = requests.get(url,
                          auth=auth,
                          params={
                              "count": "200",
                              "since_id": g.last_mention
                          }).json()
    if len(tweets) > 0:
        g.last_mention = tweets[0]['id_str']
        for tweet in tweets:
            try:
                got_tweet = tweet['text']
                patternScreenName = r"@[a-zA-Z0-9/:%#\$&\?\(\)~\.=\+\-_]+"
                patternUrl = r"https?://[a-zA-Z0-9/:%#\$&\?\(\)~\.=\+\-_]+"
                got_tweet = re.sub(patternScreenName, "", got_tweet)
                got_tweet = re.sub(patternUrl, "", got_tweet)
                got_tweet = re.sub(r'[\r|\t]', '', got_tweet)
                got_tweet = got_tweet.replace('\n', '')
                print('kitayo:' + got_tweet)
                utterLine = conv.parser(zh.z2h(got_tweet).lower())
                utterLineR = utterLine[::-1]
                text = "@" + str(
                    tweet['user']['screen_name']) + " " + conv.conversation(
                        utterLineR, conv.model, conv.dictionary, conv.id2wd)
                if any([text.find(ng) != -1 for ng in ng_words]):
                    text = "@" + str(tweet['user']['screen_name']) + str(
                        get_ng_word())
                put_tweet(auth, text, tweet['id'], g)
            except:
                print("wakarazu")
                text = "@" + str(tweet['user']['screen_name']) + str(
                    get_unknown_word())
                put_tweet(auth, text, tweet['id'], g)
예제 #17
0
def copy_tmp_to_forpdf(filename, gakki):
    dt = datetime.today()
    try:
        wb = px.load_workbook(filename)

        ws1 = wb['ForPDF']
        ws2 = wb['temp']

        kogi_bango = ""

        for i in range(66, 71):
            for j1 in range(2, 17, 2):  # Kougi-bango #write course number
                r1 = round(3 * j1 / 2 + 2)
                kogi_bango = ws2[cell(c=chr(i), i=j1)].value
                if kogi_bango is None:
                    pass
                elif isinstance(kogi_bango, str):
                    kogi_bango0 = kogi_bango.split(" ", 1)[0].strip()
                    ws1[cell(c=chr(i), i=r1)].value = ""
                    ws1[cell(c=chr(i), i=r1)].value =\
                        zh.z2h(text=kogi_bango0, mode=7)
                elif isinstance(kogi_bango, int):
                    kogi_bango0 = "{:06d}".format(kogi_bango)
                    ws1[cell(c=chr(i), i=r1)].value = ""
                    ws1[cell(c=chr(i), i=r1)].value =\
                        zh.z2h(text=kogi_bango0, mode=7)

            for j2 in range(3, 18, 2):  # write course title
                r2 = round(((3 * j2 + 7) / 2) - 1)
                kogi_me = ws2[cell(c=chr(i), i=j2)].value
                if kogi_me is not None:
                    kogi_me = kogi_me.replace("-", "-").\
                        replace("英語コミュニケーション", "EC")
                    ws1[cell(c=chr(i), i=r2)].value = ""
                    ws1[cell(c=chr(i), i=r2)].value =\
                        zh.z2h(text=kogi_me, mode=3)

        ws1["F2"].value = dt.strftime("%Y/%m/%d")
        if gakki != "":
            ws1["D1"].value = "Q{}".format(gakki)
        wb.save(filename)
        print("Successfully completed")
        return True

    except PermissionError:
        print("The file was not closed.")
        return False
예제 #18
0
	def norm(s):
		s = s.split("※",1)[0]
		s = s.replace(" ", " ")
		s = s.replace("-", "-")
		s = zenhan.z2h(s, mode=7)
		s = zenhan.h2z(s, mode=4)
		s = s.strip()
		return s
예제 #19
0
  def import_file(cls, filename, kind):
    """Store data from csv files.

    filename: string
    kind: hotel/restaurant

    """
    import csv
    import progressbar
    import time
    from ghost_spider.elastic import LatteHotelEs, LatteRestaurantEs

    to_class = None
    if kind == 'hotel':
      to_class = LatteHotelEs
    elif kind == 'restaurant':
      to_class = LatteRestaurantEs
    else:
      raise NotImplementedError()

    csvfile = open(filename, 'rb')
    fieldnames = cls.fieldnames
    reader = csv.DictReader(csvfile, fieldnames)

    try:
      to_class.DEBUG = False
      next(reader)  # skip the title line
      rows = list(reader)
      total = len(rows)
      progress = progressbar.AnimatedProgressBar(end=total, width=100)
      bulk = ""
      count_lines = 0
      for line, row in enumerate(rows):
        progress += 1
        progress.show_progress()
        data = {}
        for k, v in row.iteritems():
          if v:
            if not isinstance(v, (list, tuple)):
              data.update({k: v.decode('utf-8')})
        data["name_low"] = data["name"].lower()
        data["name_cleaned"] = to_class.analyze(data["name"].lower(), 'baseform_analyzer')
        data["name_cleaned"] = zenhan.z2h(data["name_cleaned"], zenhan.ASCII)
        data["url"] = data["url"].lower()
        data["kind"] = data["kind"].split('|') if data.get('kind') else []
        bulk += to_class.bulk_data(data, action="create")
        count_lines += 1
        if (count_lines % 200) == 0:
          to_class.send(bulk)
          bulk = ""

      if bulk:
        to_class.send(bulk)
      progress.show_progress()
      print " "
    finally:
      if csvfile:
        csvfile.close()
def conv(txt, unic=False):
    kZ = unicode(txt)
    kZ = zenhan.z2h(kZ)
    kZ = kZ.lower()
    kZ = zenhan.h2z(kZ)
    if unic:
        return kZ
    kZ = kZ.encode('utf8')
    return kZ
예제 #21
0
def wakati(str):
    words = []
    for line in mecab.parse(zenhan.z2h(str, mode=3).lower()).split("\n"):
        cols = line.split("\t")
        if len(cols) >= 2:
            c = cols[1].split(",")
            if not c[0] in ["助詞", "助動詞", "副詞", "記号"] and not c[1] in ["非自立", "代名詞"]:
                words.append(cols[0])
    return words
예제 #22
0
  def run(self, edit):
    for region in self.view.sel():
        select_texts = self.view.substr(region)

        if select_texts != "":
            zen2han_text = zenhan.h2z(select_texts,zenhan.KANA)
            han2zen_text = zenhan.z2h(select_texts,zenhan.KANA)
            if select_texts != zen2han_text:
                self.view.replace(edit, region, zen2han_text)
            elif select_texts != han2zen_text:
                self.view.replace(edit, region, han2zen_text)
예제 #23
0
    def run(self, edit):
        for region in self.view.sel():
            select_texts = self.view.substr(region)

            if select_texts != "":
                zen2han_text = zenhan.h2z(select_texts, zenhan.KANA)
                han2zen_text = zenhan.z2h(select_texts, zenhan.KANA)
                if select_texts != zen2han_text:
                    self.view.replace(edit, region, zen2han_text)
                elif select_texts != han2zen_text:
                    self.view.replace(edit, region, han2zen_text)
예제 #24
0
  def zenhan_search(self, statement, numOfResult):
    han_statement = zenhan.z2h(statement)
    zen_statement = zenhan.h2z(statement)
    
    han_list = self.tokenizer.split_query(han_statement)
    zen_list = self.tokenizer.split_query(zen_statement)
    
    if han_statement != zen_statement:
      to_search = han_list + zen_list
    else:
      to_search = self.tokenizer.split_query(statement) 

    return self._search(to_search, numOfResult)
예제 #25
0
    def zenhan_search(self, statement, numOfResult):
        han_statement = zenhan.z2h(statement)
        zen_statement = zenhan.h2z(statement)

        han_list = self.tokenizer.split_query(han_statement)
        zen_list = self.tokenizer.split_query(zen_statement)

        if han_statement != zen_statement:
            to_search = han_list + zen_list
        else:
            to_search = self.tokenizer.split_query(statement)

        return self._search(to_search, numOfResult)
예제 #26
0
  def save_for_production(cls, filename, data):
    """Save this data on csv file by prefecture"""
    row = []

    address = zenhan.z2h(data['address'], zenhan.ALL)
    # remove the zip code
    address = re.sub(r'%s\d+-\d+' % u'〒', '', address).strip()
    row.append(data['name'])
    row.append(data['name_kata'])
    row.append(address)
    row.append(data['parent_url_key'])
    row.append(zenhan.z2h(data['phone'], zenhan.ALL))
    # hotel_kind = u'ホテル'
    # if data.get('kind') and data.get('kind') in LocationHotelSelectors.REPLACE_HOTEL:
    #   hotel_kind = data.get('kind')
    # else:
    #   for genre in data['genre']:
    #     if genre in LocationHotelSelectors.REPLACE_HOTEL:
    #       hotel_kind = LocationHotelSelectors.REPLACE_HOTEL[genre]
    #       break
    row.append(data.get('kind') or '')
    CsvWriter.write_to_csv(filename, row, firs_row=cls.production_first_row)
예제 #27
0
파일: nobi_fetch.py 프로젝트: hkwi/our-data
def normalize(data):
	NBSP = b"\xC2\xA0".decode("UTF-8")
	data = unicodedata.normalize("NFKC", zenhan.z2h(zenhan.h2z(data.replace(NBSP, ""))))
	
	# 0x2010 -- 0x2015
	dashesU8 = [b'\xe2\x80\x90', b'\xe2\x80\x91', b'\xe2\x80\x92', b'\xe2\x80\x93', b'\xe2\x80\x94', b'\xe2\x80\x95']
	dashes = "".join([s.decode("UTF-8") for s in dashesU8])
	digits = re.match("^[0-9\\+\\-{0}]+$".format(dashes), data)
	if digits:
		for d in dashes:
			data = data.replace(d, "-")
	
	return data
예제 #28
0
def normalizeText(string):
    patternUrl = r"https?://[a-zA-Z0-9/:%#\$&\?\(\)~\.=\+\-_]+"
    patternScreenName = r"@[a-zA-Z0-9/:%#\$&\?\(\)~\.=\+\-_]+"
    patternHashtag = r"#[a-zA-Z0-9/:%#\$&\?\(\)~\.=\+\-_]+"

    rep1str = re.sub(patternUrl, "", string)
    rep2str = re.sub(patternScreenName, "", rep1str)
    rep3str = re.sub(patternHashtag, "", rep2str)
    rep4str = re.sub("(^(\s)*)|((\s)*$)", "", rep3str)
    rep5str = rep4str.replace("\n", "")
    rep6str = zh.z2h(rep5str).lower()

    return rep6str
예제 #29
0
def extract_data_from_html(filename):
    # html to table
    url_filename = return_urlfilename(filename)
    html = urlopen(url_filename)
    bsObj = BeautifulSoup(html, "html.parser")
    table = bsObj.findAll("table")

    # table to update_date
    updated_date = strptime(z2h(table[1].get_text().strip(), 2), "%Y年%m月%d日")

    # table to gpa_list
    gpa_table = table[4]
    gpa_rows = gpa_table.findAll("tr")
    gpa_list = []
    gpa_items = [
        "course_title", "lecturer", "year_completed", "grade_points", "grade",
        "credits", "gp"
    ]
    gpa_rows.pop(0)
    for gpa_row in gpa_rows:
        tmp_dict = {item: None for item in gpa_items}
        is_append_list = True
        for i, cell in enumerate(gpa_row.findAll(['td', 'th'])):
            tmp_celltext =\
                z2h(cell.get_text().strip().replace("\u3000", " "), 3)
            if i == 3 and tmp_celltext == "":
                is_append_list = False
                continue
            tmp_dict[gpa_items[i]] = tmp_celltext

        if is_append_list is True:
            # tmp_dict["year_completed"] = []
            tmp_dict["grade_points"] = int(tmp_dict["grade_points"])
            tmp_dict["credits"] = float(tmp_dict["credits"])
            tmp_dict["gp"] = float(tmp_dict["gp"])
            gpa_list.append(tmp_dict)

    return updated_date, gpa_list
예제 #30
0
  def save_to_csv(cls, filename, data):
    """Save this data on csv file by prefecture"""
    row = []

    address = zenhan.z2h(data['address'], zenhan.ASCII)
    # remove the zip code
    address = re.sub(r'%s\d+-\d+' % u'〒', '', address).strip()
    row.append(data['name'])
    row.append(data['name_kata'])
    row.append(address)

    row.append(data['prefecture'])
    row.append(data['area'])

    row.append(zenhan.z2h(data['phone'], zenhan.ALL))

    row.append(data['kind'])

    row.append(data.get('latte_url') or u'')
    row.append(data['page_url'])
    row.append(data['id'])

    CsvWriter.write_to_csv(filename, row, firs_row=cls.first_row)
예제 #31
0
def tokenize(text):
    '''
    とりあえず形態素解析して名詞だけ取り出す感じにしてる
    Extract alphabet as lower, hankaku, space-trimed
    '''

    node = mecab.parseToNode(text)
    while node:
        if node.feature.split(',')[0] == '名詞':
            try:
                yield zenhan.z2h(node.surface.lower().strip())
            except:
                yield '0'
        node = node.next
예제 #32
0
def search(browser):
    #検索値入力
    query = input("商品名入力: ")

    #amazonHPへ
    browser.get('https://www.amazon.co.jp/')

    #検索値入力
    search = browser.find_element_by_id('twotabsearchtextbox')
    search.send_keys(query)

    #検索ボタン押す
    search_btn = browser.find_element_by_class_name('nav-input')
    search_btn.click()

    time.sleep(2)

    #商品一覧取得
    items = browser.find_elements_by_class_name('s-result-item')

    time.sleep(2)

    for num, item in enumerate(items):
        try:
            item_name = item.find_element_by_tag_name('h2').text
            print(num, ':', item_name)
            print('-' * 20)
        except:
            pass

    #商品の選択
    select_num = str(input('商品番号入力: '))
    select_item_num = 'result_' + zenhan.z2h(select_num)

    time.sleep(1)

    select_item_html = browser.find_element_by_id(select_item_num)

    #その商品の詳細ページURL取得
    select_item = select_item_html.find_element_by_class_name('a-link-normal')
    select_item_url = select_item.get_attribute('href')
    #商品名取得
    select_item_name = select_item_html.find_element_by_tag_name('h2').text

    browser.get(select_item_url)

    time.sleep(1)

    return select_item_name
예제 #33
0
def month_date_end_search(line):
    """月表記のある予定終了の日付を検出し,intとintで返す."""
    zen_tilde = '~'
    # 全角スペース
    zen_space = ' '
    # 全角0
    zen_zero = '0'
    nichi = '日'
    tsuki = '月'
    dollar = '$'
    # 全角スペースを0に置き換えることで無理やり対応
    line = line.replace(zen_space, zen_zero)
    line = line.replace(zen_tilde, zen_zero)
    index_month = line.find(tsuki)
    # 日が一桁の場合の対策
    line = line.replace(tsuki, zen_zero, 1)
    # 二度目のnichiの位置を検出
    index_second_date = line.find(nichi, index_month + 1)
    # 日と曜日の位置関係から誤表記を訂正
    index_second_dollar = line.find(dollar, index_month + 1)
    if index_second_date + 1 != index_second_dollar:
        index_second_date = index_second_dollar
    # 月, 日を返す
    return int(zenhan.z2h(line[index_month - 2:index_month])), int(zenhan.z2h(line[index_second_date - 2:index_second_date]))
예제 #34
0
def date_start_search(line):
    """予定開始の日付を検出し,strで返す."""
    # 全角スペース
    zen_space = ' '
    # 全角0
    zen_zero = '0'
    nichi = '日'
    dollar = '$'
    # 全角スペースを0に置き換えることで無理やり対応
    line = line.replace(zen_space, zen_zero)
    index = line.find(nichi)
    # 日と曜日の位置関係から誤表記を訂正
    index_first_dollar = line.find(dollar, index + 1)
    if index + 1 != index_first_dollar:
        index = index_first_dollar

    # ex. 1 → 01
    #if line[index - 1] == zen_space:
    #    line[index - 1] = zen_zero
    return zenhan.z2h(line[index - 2:index])
예제 #35
0
def getCardURL(name):
    url = "https://ocg.xpg.jp/search/search.fcgi?Name=" + urllib.parse.quote(
        name.encode('Shift_JIS')) + "&Mode=0"
    try:
        fp = requests.get(url)
        soup = BeautifulSoup(fp.content, "html.parser")
        fp.close
        time.sleep(1)
        texts = soup.find_all("a", href=re.compile("/c/+"))
        for text in texts:
            name_tmp = p.sub("", str(text))
            name_tmp = zenhan.z2h(name_tmp.replace('-', '-'), 3)
            if '【' in name_tmp:
                name_tmp = name_tmp[:name_tmp.index("【")]
            if name == name_tmp:
                url_text = text.get("href")
        return url_text
    except urllib.error.HTTPError:
        time.sleep(1)
        return False
예제 #36
0
def date_end_search(line):
    """月表記のない予定終了の日付を検出し,strで返す."""
    zen_tilde = '~'
    # 全角スペース
    zen_space = ' '
    # 全角0
    zen_zero = '0'
    nichi = '日'
    dollar = '$'
    # 全角スペースと全角チルダを0に置き換えることで無理やり対応
    line = line.replace(zen_space, zen_zero)
    line = line.replace(zen_tilde, zen_zero)
    index_first_date = line.find(nichi)
    # 二度目のnichiの位置を検出
    index_second_date = line.find(nichi, index_first_date + 1)
    # 日と曜日の位置関係から誤表記を訂正
    index_second_dollar = line.find(dollar, index_first_date + 2)
    if index_second_date + 1 != index_second_dollar:
        index_second_date = index_second_dollar

    return zenhan.z2h(line[index_second_date - 2:index_second_date])
예제 #37
0
    def changeClassroom(self, classroom):
        if match(r".*,.*", classroom):
            cr = classroom
        elif classroom == "工学部1号館情報実習室1(CAE室)":
            cr = "工1-CAE室"
        elif match(r"一般教育棟.*", classroom):
            cr = classroom.replace("一般教育棟", "").replace("教室", "")
        elif match(r"工学部.*", classroom):
            cr = classroom.replace('工学部', "工").replace("号館第", "-").replace(
                "号館", "-").replace("講義室", "")
        elif match(r"情報実習室.*", classroom):
            cr = classroom.replace("情報実習室", "情")
        elif match(r"理学部.*", classroom):
            cr = classroom.replace("理学部", "理").replace("号館第", "-").replace(
                "号館", "-").replace("講義室", "")
        else:
            cr = classroom

        cr = cr.replace(" ", "")

        return zh.z2h(text=cr, mode=3)
예제 #38
0
def scp_number(msg):
    msg = zenhan.z2h(msg.casefold()).replace("-", "").replace("scp", "")
    number = re.sub("\\D", "", msg)

    if number is (None and ""):
        return None

    brt = msg.replace(number, "")

    if brt == "":
        brt = "en"

    if brt not in BRANCHS:  # 要改良
        reply = get_country_from_code(brt)
        return reply

    try:
        dictionary = pd.read_csv(currentpath + "/data/scps.csv", index_col=0)
    except FileNotFoundError as e:
        print(e)

    result = dictionary.query('branches in @brt')
    result = result.query('url.str.contains(@number)', engine='python')
    result = result[0:1].values.tolist()
    result = itertools.chain(*result)
    result = list(result)

    if len(result) == 0 or number is re.sub("\\D", "", result[0]):
        if len(number) > 4:
            return None
        if "en" in brt:
            return("scp-" + str(number) + "はまだ存在しません")
        else:
            return("scp-" + str(number) + "-" + str(brt) + "はまだ存在しません")

    return(result)
예제 #39
0
def setName(string):
    name = zenhan.z2h(p.sub("", string).replace('-', '-'), 3)
    if '】' in name:
        name = name[:name.index('【')]
    return name
def med_facility(tdfk, facid):
    tdfk = zenhan.z2h(tdfk, mode=7, ignore=()).zfill(2)
    facid = zenhan.z2h(facid, mode=7, ignore=()).zfill(7)
    return 'M' + tdfk + facid
예제 #41
0
파일: get_material.py 프로젝트: deplop/food
def get_recipe(url, dish):
    #print "**********"
    #print dish
    
    request = urllib2.Request(url)
    response = urllib2.urlopen(request)
    response1=response.read()
    
    soup = BeautifulSoup(response1,"html.parser")
    recipe =""
    for p in soup.findAll('p',text=False):  
        if p.text.find("人")!=-1 and p.text.find("材料")!=-1:
            recipe = p.text
    
    temps=recipe.split("\n")        
    elements =dict()
    amount=0
    people=0
    # print recipe
    for temp in temps:
        
        # calculate number of people (done)
        if temp.find("材料")!=-1:
            people=float(re.search("[0-9]",zenhan.z2h(temp,2)).group())
            
        # get each element for one man (done)
        elif temp!="":
            
            element=temp.replace("●","").replace("○","").replace("〇","").replace("◎","").lstrip(" ")
            if temp.find("…")!=-1:
                element= element.split("…")
            else:
                element= element.split(None,1)
            # print element    
            if people!=0:
                # print people
                # convert all string to hankaku
                if len(element) >= 2: 
                    han_element = zenhan.z2h(element[1],2)
                else: 
                    # print element
                    break
                # march unit
                #print element[1]
                #print han_element
                
                unit=re.search("[^0-9\/~ ]+",han_element).group(0)
                
                #print zenhan.z2h(element[1],2) 
                string_amount=re.search("[0-9\/ ]+",han_element.replace(unit,""))  
                #print string_amount
                if string_amount!=None:
                    amount= float(sum(Fraction(s) for s in string_amount.group(0).split()))/people
                else:
                    amount =0    
            else:
                print "people=0"
            #print element[0]+"\t"+str(amount)+"\t"+unit                
            elements.update({element[0]:[amount,zenhan.h2z(unit,4)]})

    return elements
예제 #42
0
def normalize(text):
    return zenhan.z2h(text, mode=zenhan.DIGIT | zenhan.ASCII)
예제 #43
0
def normalize_text(text):
    text = text.strip()
    text = zenhan.z2h(text, mode=7)
    for a, b in normalize_replace_map:
        text = text.replace(a, b)
    return text
예제 #44
0
파일: tests.py 프로젝트: tokibito/zenhan-py
 def test_z2h_digit_only(self):
     converted = zenhan.z2h(self.original, zenhan.DIGIT)
     self.assertEqual(converted, u("゚abcDE゙F123456アガサダナバビプペ゚"))
예제 #45
0
 def zenAlphaNum2hanAlphaNum(strings):
     """
     全角英数字を半角英数字に変換する
     """
     strings = MultiBytes.convert2unicode(strings)
     return zenhan.z2h(strings, mode=3)
예제 #46
0
파일: tests.py 프로젝트: tokibito/zenhan-py
 def test_z2h_all(self):
     converted = zenhan.z2h(self.original, zenhan.ALL)
     self.assertEqual(converted, u("゚abcDE゙F123456アガサダナバビプペ゚"))
     self.assertEqual(converted,
                      zenhan.z2h(self.original,
                                 zenhan.ASCII|zenhan.DIGIT|zenhan.KANA))
예제 #47
0
파일: tests.py 프로젝트: tokibito/zenhan-py
 def test_z2h_ascii_and_digit(self):
     converted = zenhan.z2h(self.original, zenhan.ASCII|zenhan.DIGIT)
     self.assertEqual(converted, u("゚abcDE゙F123456アガサダナバビプペ゚"))
예제 #48
0
i = 0
for p1 in tmp1.find_all("td", {"class": "a-center tb-color001"}):
    for p2 in p1.find_all("span"):
        # 銘柄コード
        #print(p2.attrs['id'])
        data['code'][i] = p2.attrs['id']
        i += 1

for i, p1 in enumerate(
        tmp1.find_all(attrs={
            "rowspan": "2",
            "class": "a-center tb-color001 w-space"
        })):
    #上場日
    data['jojodate'][i] = zenhan.z2h(p1.contents[0].string).strip()

i = 0
for p1 in tmp1.find_all(attrs={"rowspan": "2", "class": "a-left tb-color001"}):

    #会社名
    data['name'][i] = p1.find("a").text.replace('*', '').strip()

    i += 1

i = 0
for p1 in tmp1.find_all("tr"):
    for p2 in p1.find_all("td", {"class": "a-center tb-color001"}):
        for p3 in p2:
            if ("第一部" in p3 or "第二部" in p3 or "マザーズ" in p3
                    or "JQスタンダード" in p3):
예제 #49
0
파일: tests.py 프로젝트: tokibito/zenhan-py
 def test_z2h_ascii_and_kana(self):
     converted = zenhan.z2h(self.original, zenhan.ASCII|zenhan.KANA)
     self.assertEqual(converted, u("゚abcDE゙F123456アガサダナバビプペ゚"))
예제 #50
0
def zenkaku_to_hankaku(text):
    '''全角文字を半角文字に変換します。'''
    return zenhan.z2h(text, mode=7)
예제 #51
0
def setPendulumScale(scale):
    scale = int(zenhan.z2h(scale[scale.index('赤') + 1:]))
    return scale
예제 #52
0
파일: tests.py 프로젝트: tokibito/zenhan-py
 def test_z2h_kana_only(self):
     converted = zenhan.z2h(self.original, zenhan.KANA)
     self.assertEqual(converted, u("゚abcDE゙F123456アガサダナバビプペ゚"))
예제 #53
0
파일: tests.py 프로젝트: tokibito/zenhan-py
 def test_z2h_digit_and_kana(self):
     converted = zenhan.z2h(self.original, zenhan.DIGIT|zenhan.KANA)
     self.assertEqual(converted, u("゚abcDE゙F123456アガサダナバビプペ゚"))
예제 #54
0
def messaging_service():
    logger.info('messaging_service()')

    strreq = request.data
    try:
        strreq = strreq.decode()
    except AttributeError:
        pass
    jsonreq = json.loads(strreq)
    if debuglog:
        logger.info(str(jsonreq))
    reply_token = ''
    message = ''
    strMessage = ''
    user_id = ''
    etype = ''
    postbackdata = ''
    timestamp = datetime.now()

    for e in jsonreq['events']:
        etype = e['type']
        reply_token = e['replyToken']
        user_id = e['source']['userId']
        if etype == 'message':
            message = e['message']
            strMessage = message['text']
            timestamp = int(e['timestamp'])
        elif etype == 'follow':
            timestamp = int(e['timestamp'])
        elif etype == 'postback':
            logger.info(str(e))
            postbackdata = e['postback']['data']
            timestamp = int(e['timestamp'])

    date = datetime.now()
    send_line_bot_log(user_id, f"{user_id},{strMessage}," + date.strftime('%Y/%m/%d %H:%M:%S'))
    strIntent = ""
    _i = " "
    _q = " "
    _a = " "
    _n = " "

    #followevent
    if etype == 'follow' or strMessage == u'クイズ連携':
        send_line_bot_log(user_id, f"{user_id},follow message," + date.strftime('%Y/%m/%d %H:%M:%S'))
        mess = []
        mess.append("言えまてんBotです!フォローありがとう!")
        mess.append({
                 'type': 'template',
                 'altText': '確認',
                 'template' : {
                     'type': 'confirm',
                     'text': 'Clovaはお持ちですか?言えまてんクイズからのメッセージをこちらに送信しても大丈夫ですか?',
                     'actions': [
                         {
                             'type': 'postback',
                             'label': 'はい',
                             'data': 'res=yes'
                         },
                         { 
                             'type': 'postback',
                             'label': 'いいえ',
                             'data': 'res=no'
                         }
                     ]
                }
            })
        send_line_reply(reply_token, mess)

    if etype == 'postback':
        if postbackdata == 'res=yes':
            text = "ありがとう!言えまてんクイズの使い方を知りたい時は「使い方」と言ってみてね!"
            send_line_reply(reply_token, text)
            insert(user_id, "follow",  "follow", "ok" ,_i,_q,_a,_n)
        else:
            text = "残念です!言えまてんクイズの使い方を知りたい時は「使い方」と言ってみてね!"
            send_line_reply(reply_token, text)
            insert(user_id, "follow",  "follow", "ng" ,_i,_q,_a,_n)
        return True


    intent = getIntent(user_id, "reply")
    if 'date' in intent:
        postDate = intent['date']
        if datetime.strptime(postDate , '%Y/%m/%d %H:%M:%S') > datetime.now() - timedelta(hours=8):
            strIntent = intent['intent']
            _i = intent['induction'] 
            _q = intent['quiz'] 
            _a = intent['answer'] 
    

    if re.compile("こんにちは|Hello|こんばんは|おはよう").search(message['text']):
        text = "こんにちは!言えまてんボットです。よろしくね。「使い方」というと説明するよ!"
        send_line_reply(reply_token, text)
    elif re.compile("^(使い方|つかいかた|Help|ヘルプ)").search(message['text']):
        mess =  u'言えまてんクイズの使い方です。最初にひとつの言葉を10回繰り返して言ってもらいます。\n'
        mess += u'次に、その言葉に少し関係のある問題を出すので答えを考えてね。問題は全部で' + str(len(quiz)) + 'つありますよ。\n'
        mess += u'答えがあっていると正解!です。もう一度問題をやるか聞かれたら「はい」か「いいえ」と答えてね。\n'
        mess += u'「問題1」、「問題1の答え」のようにBotに言うと問題についてお答えします。問題を思いついた人は「応募」と言ってみてね。\n'
        send_line_reply(reply_token,mess) 
    elif strIntent == "post1" and len(message['text']) > 1:
        _i = message['text']
        insert(user_id, "reply",  "post2", message,_i,_q,_a,_n)
        mess = []
        mess.append( "「" + message['text'] + "」ですね。わかりました。")
        mess.append( "次に問題を教えてください。")
        send_line_reply(reply_token,mess) 
    elif strIntent == "post2" and len(message['text']) > 5:
        _q = message['text']
        insert(user_id, "reply",  "post3", message,_i,_q,_a,_n)
        mess = []
        mess.append( "問題は「" + message['text'] + "」ですね。わかりました。")
        mess.append( "次は答えを教えてください。")
        send_line_reply(reply_token,mess) 
    elif strIntent == "post3" and len(message['text']) > 0:
        _a = message['text']
        insert(user_id, "reply",  "post4", message,_i,_q,_a,_n)
        mess = []
        mess.append( "答えは「" + message['text'] + "」ですね。わかりました。")
        mess.append( "最後にニックネームを教えて!もし採用されたら問題の解説の時に紹介するね。内緒にしたい時は「匿名」と答えてね。")
        send_line_reply(reply_token,mess) 
    elif strIntent == "post4" and len(message['text']) > 0:
        _n = message['text']
        insert(user_id, "reply",  "finish", message,_i,_q,_a,_n)
        mess = []
        mess.append( "「" + message['text'] + "」さん、応募ありがとう!")
        mess.append( "参考にするね!")
        send_line_reply(reply_token,mess) 
        send_sns(str(jsonreq), _i, _q, _a, _n)
    elif re.compile("^(応募|おうぼ|投稿)").search(message['text']):
        insert(user_id, "reply",  "post1", message,_i,_q,_a,_n)
        mess = []
        mess.append( "言えまてんクイズです。新しい問題を応募してます。面白い問題を考えた人は、1.10回繰り返す言葉(キリンとか)、2.問題、3.答えの3つを教えてね。")
        mess.append( "では、10回繰り返して言ってもらうフレーズを教えてください。")
        send_line_reply(reply_token, mess)
    elif re.compile("^問題[0-9]{1,2}$").search(zenhan.z2h(message['text'])):
        match = re.compile("[0-9]{1,2}").search(zenhan.z2h(message['text']))
        num = int(match.group())
        if num > 0:
            if len(quiz) >= num:
                text = f"問題{num}: {quiz[num]['q']}"
                send_line_reply(reply_token,text) 
            else:
                text = f"問題{num}がみつかりません。"
                send_line_reply(reply_token,text) 
    elif re.compile("^問題[0-9]{1,2}の答").search(zenhan.z2h(message['text'])):
        match = re.compile("[0-9]{1,2}").search(zenhan.z2h(message['text']))
        num = int(match.group())
        if num > 0:
            if len(quiz) >= num:
                mess = []
                mess.append(f"問題{num}の答え=> {quiz[num]['a'][0]}")
                mess.append(f"問題{num}の解説=> {quiz[num]['i']}")
                send_line_reply(reply_token,mess) 
            else:
                text = f"問題{num}がみつかりません。"
                send_line_reply(reply_token,text) 

    return True
예제 #55
0
def normalize(data):
	NBSP = b"\xC2\xA0".decode("UTF-8")
	return unicodedata.normalize("NFKC", zenhan.z2h(zenhan.h2z(data.replace(NBSP, ""))))
예제 #56
0
파일: tests.py 프로젝트: tokibito/zenhan-py
 def test_z2h_ascii_only(self):
     converted = zenhan.z2h(self.original, zenhan.ASCII)
     self.assertEqual(converted, u("゚abcDE゙F123456アガサダナバビプペ゚"))