def text_blob(doc):
    """
    TextBlob Sentiment (by segment)
    :param doc: zh doc
    :return: (-1~1)
    """
    try:
        translator_en = Translator()
        en_text = translator_en.translate(doc).text
        blob = TextBlob(en_text)
    except:
        try:
            blob = TextBlob(doc)
            blob = blob.translate()
        except:
            try:
                en_text = iciba(doc, dst='en')
                blob = TextBlob(en_text)
            except:
                try:
                    gs = goslate.Goslate()
                    en_text = gs.translate(doc, 'en')
                    blob = TextBlob(en_text)
                except:
                    translator = tT(to_lang="en")
                    en_text = translator.translate(doc)
                    blob = TextBlob(en_text)
    score = []
    for sentence in blob.sentences:
        emotion = sentence.sentiment.polarity * sentence.sentiment.subjectivity
        if 0.00001 > emotion > -0.00001:
            continue
        score.append(emotion)
    return np.mean(score)
def get_html_text(url):
    '''
    通过URL获取页面内容
    并提取其中的文本信息
    :param url: 目标url
    :return: 提取得到的文本信息
    '''

    #下载页面内容
    try:
        req=requests.get(url,headers=headers,timeout=3)
        req.encoding = req.apparent_encoding
        html=req.text
        #print(html)

        soup = BeautifulSoup(html, 'lxml')

        #去除script与style
        [s.extract() for s in soup(['script','style'])]

        #提取文本 拼接为一串
        text=soup.get_text()
        lines = (line.strip() for line in text.splitlines())
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
        text = ' '.join(chunk for chunk in chunks if chunk)

        print(text)

        # tansor=Translator()
        #
        # print(tansor.detect(text).lang)
        # if tansor.detect(text).lang=='en':
        #     text=tansor.translate(text,dest="zh-cn").text
        # print(text)

        try:
            print(detect(text))
            if detect(text) != 'zh-cn':
                text = iciba(text, dst='zh')
            print(text)
        except:
            pass

        return text
    except:
        return None
def run_inference_on_image(image):
  """Runs inference on an image.

  Args:
    image: Image file name.

  Returns:
    Nothing
  """
  if not tf.gfile.Exists(image):
    tf.logging.fatal('File does not exist %s', image)
  image_data = tf.gfile.FastGFile(image, 'rb').read()

  # Creates graph from saved GraphDef.
  create_graph()

  with tf.Session() as sess:
    # Some useful tensors:
    # 'softmax:0': A tensor containing the normalized prediction across
    #   1000 labels.
    # 'pool_3:0': A tensor containing the next-to-last layer containing 2048
    #   float description of the image.
    # 'DecodeJpeg/contents:0': A tensor containing a string providing JPEG
    #   encoding of the image.
    # Runs the softmax tensor by feeding the image_data as input to the graph.
    softmax_tensor = sess.graph.get_tensor_by_name('softmax:0')
    predictions = sess.run(softmax_tensor,
                           {'DecodeJpeg/contents:0': image_data})
    predictions = np.squeeze(predictions)

    # Creates node ID --> English string lookup.
    node_lookup = NodeLookup()

    top_k = predictions.argsort()[-FLAGS.num_top_predictions:][::-1]
    for node_id in top_k:
      human_string = node_lookup.id_to_string(node_id)
      score = predictions[node_id]
      try:
        human_string_zh = iciba('Also known as ' + human_string, dst='zh')
        if sys.version_info.major <= 2:
          human_string_zh = human_string_zh.encode('utf-8')
        print('%s(%s)---(Matching = %.5f)' % (human_string, human_string_zh, score))
      except (TranslateError, ConnectError):
        print('%s---(Matching = %.5f)' % (human_string, score))
def get_html_text_by_file(fileName):
    '''
    通过已经下载的html文件 提取其中的文本信息
    :param fileName: html文件
    :return: 提取的文件信息
    '''
    try:
        with open(fileName, 'r', encoding='utf-8', errors='ignore') as f:
            html = f.read()

        soup = BeautifulSoup(html, 'lxml')

        # 去除script与style
        [s.extract() for s in soup(['script', 'style'])]

        # 提取文本 拼接为一串
        text = soup.get_text()
        lines = (line.strip() for line in text.splitlines())
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
        text = ' '.join(chunk for chunk in chunks if chunk)

        print(text)

        # tansor = Translator()
        #
        # print(tansor.detect(text).lang)
        # if tansor.detect(text).lang == 'en':
        #     text = tansor.translate(text, dest="zh-cn").text
        # print(text)
        try:
            print(detect(text))
            if detect(text) != 'zh-cn':
                text = iciba(text, dst='zh')
            print(text)
        except:
            pass

        return text

    except IOError:
        return None
示例#5
0
# from googletrans import Translator
# translator = Translator()
# print(translator.translate('hola'))

# from translate import Translator
# translator= Translator(from_lang="tr",to_lang="en")
# print(translator.translate("Bu bir dolma kalemdir."))

from translation import baidu, google, youdao, iciba

#print(google('hello world!', dst = 'zh-CN'))
#print(google('hello world!', dst = 'ru'))
print(baidu('hello world!', dst='zh'))
print(baidu('hello world!', dst='ru'))
print(youdao('hello world!', dst='zh-CN'))
print(iciba('hello world!', dst='zh'))
print(bing('hello world!', dst='zh-CHS'))
示例#6
0
from translation import Translation, baidu, google, youdao, iciba, bing

print(baidu('hello world!', dst = 'zh'))
print(youdao('hello world!', dst = 'zh-CN'))
print(iciba('hello world!', dst = 'zh'))
print(google('hello world!', dst = 'zh-CN'))
print(bing('hello world!', dst = 'zh-CHS'))
    '''

    try:

        with open(fileName, 'r', encoding='utf-8', errors='ignore') as f:
            html = f.read()

        soup = BeautifulSoup(html, 'lxml')

        input_cnt = len(soup.find_all("input"))
        pwd_input_cnt = len(soup.find_all("input", attrs={"type": "password"}))

        print((input_cnt, pwd_input_cnt))

        return (input_cnt, pwd_input_cnt)
    except:
        return (-1, -1)

if __name__ == '__main__':
    #print(get_psychology_features("http://www.iep.utm.edu/aestheti/"))
    #print(get_psychology_features_by_url("https://en.wikipedia.org/wiki/How-to"))
    #count_page_input_by_url("https://shop.dma.org/")
    #count_page_input_by_file("test.html")
    #print(get_psychology_features_by_file("test.html"))

    text="English is good"

    print(detect("你好呀"))
    if detect(text) == 'en':
        text=iciba(text, dst = 'zh')
    print(text)