Python MySQLConnector.connect примеры использования

Язык программирования: Python

Пространство имен/Пакет: MySQLConnector

Класс/Тип: MySQLConnector

Метод/Функция: connect

Примеров на hotexamples.com: 7

Python MySQLConnector.connect - 7 примеров найдено. Это лучшие примеры Python кода для MySQLConnector.MySQLConnector.connect, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

MySQLConnector(13)

connect(7)

disconnect(4)

openConnector(2)

insert(1)

query_with_string(1)

query(1)

print_history(1)

open(1)

get_history_dict(1)

get_settings(1)

clear_history(1)

get_history(1)

export_pickle(1)

export_csv(1)

close(1)

remove_invalid(1)

Пример #1

Показать файл

 def configSQL(self):
     connector = MySQLConnector(LOGIN_INFO_PATH)
     self.sqlConn = connector.connect()
     # create database if not exists
     if not DATABASE_NAME in self.sqlConn.getDatabaseList():
         self.sqlConn.createDatabase(DATABASE_NAME)
     db = self.sqlConn.getDatabase(DATABASE_NAME)
     # create tables if not exists
     if not SYMBOL_DETAIL_TABLE_NAME in db.getTableList():
         db.createTable(SYMBOL_DETAIL_TABLE_NAME, SYMBOL_DETAIL_TABLE)
     db.commit()
     self.sqlConn.close()

Пример #2

Показать файл

def updateHistData(symbols: list):
    connector = MySQLConnector(LOGIN_INFO_PATH)
    sqlConn = connector.connect()
    saveCount = 0
    for i in range(len(symbols)):
        print("updating... {}/{}".format(i + 1, len(symbols)))
        symbol = symbols[i]
        yf = yahoofinance.YFData(symbol)
        updateHistPrice(sqlConn, yf, symbol)
        updateDividend(sqlConn, yf, symbol)
        updateStocksplit(sqlConn, yf, symbol)
        checkAdjclose(sqlConn, yf, symbol)
        saveCount += 1
        if saveCount == 100:
            sqlConn.commit()
            saveCount = 0
    sqlConn.commit()
    sqlConn.close()

Пример #3

Показать файл

def getSymbolList() -> list:
    connector = MySQLConnector(LOGIN_INFO_PATH)
    sqlConn = connector.connect()
    sql = """
    SELECT `symbols`.`symbols`.`symbol`, `symbols`.`symbols`.`enable`, `symbols`.`symbol_details`.`marketCap`
    FROM `symbols`.`symbols`
    INNER JOIN `symbols`.`symbol_details` ON `symbols`.`symbols`.`symbol`=`symbols`.`symbol_details`.`symbol`
    WHERE `symbols`.`symbols`.`enable` = 1 and `symbols`.`symbol_details`.`marketCap` > 0;
    """
    data = sqlConn.execute(sql)
    symbols = data["symbol"]
    sql = """
    SELECT `symbol` FROM `symbols`.`symbol_details`
    WHERE `quoteType` = "INDEX";
    """
    data = sqlConn.execute(sql)
    symbols.extend(data["symbol"])
    sqlConn.close()
    return symbols

Пример #4

Показать файл

Файл: Analyzer.py Проект: rubychen0611/Unbiased

class Analyzer:
    def __init__(self, date):
        self.date = date
        self.connector = MySQLConnector()

        self.logger = logging.getLogger('analyzer')
        self.logger.setLevel(level=logging.DEBUG)
        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        ch.setFormatter(formatter)
        self.logger.addHandler(ch)
        self.logger.info("Finished initialization.")

    def get_sentiment_score(self, title, text):

        sent_analyzer = SentimentIntensityAnalyzer()
        title_score = sent_analyzer.polarity_scores(title)['compound']
        title_score = int(round((title_score + 1) * 10))
        sents = sent_tokenize(text)
        scores = []
        for sent in sents:
            if (len(word_tokenize(sent)) > 1):  # 句子中单词大于1个
                score = sent_analyzer.polarity_scores(sent)['compound']
                scores.append(score)
        text_score = np.mean(scores)
        text_score = int(round((text_score + 1) * 40))
        final_score = title_score + text_score
        return final_score

    def preprocess_text(self, text):
        text = text.replace("'", "''")
        return text

    def get_summary(self, title, text):
        article = Article(url='')
        article.title = title
        article.text = text
        article.download_state = ArticleDownloadState.SUCCESS
        article.is_parsed = True
        article.nlp()
        return self.preprocess_text(article.summary)

    def analyze(self):
        # load articles
        cursor = self.connector.connect()
        sql = "SELECT articleIndex, title, text FROM news.article WHERE downloadDate=%s and groupIndex is not null" % self.date
        # sql = "SELECT articleIndex, title, text FROM news.article WHERE groupIndex is not null"
        try:
            cursor.execute(sql)
            self.articles = cursor.fetchall()
            # print(self.articles)
        except:
            self.logger.info("Error: unable to fecth data")

        for article in self.articles:
            # generate summary
            summary = self.get_summary(article[1], article[2])
            # sentiment analysis
            sentiment_score = self.get_sentiment_score(article[1], article[2])
            # print(article[0] + '\t' + article[1] +'\t'+ str(sentiment_score))
            # upload to DB
            sql = "UPDATE news.article SET sentimentScore='%d',summary='%s' WHERE articleIndex = '%s'" % (
                sentiment_score, summary, article[0])
            #sql = "UPDATE news.article SET sentimentScore='%d' WHERE articleIndex = '%s'" % (sentiment_score, article[0])

            try:
                cursor.execute(sql)
                self.connector.db.commit()
                self.logger.info(
                    "Successfully upload article %s with score=%d", article[0],
                    sentiment_score)
            except:
                self.logger.info("Unable to update summary and score!")
                self.connector.db.rollback()
        self.connector.disconnect()


# analyzer = Analyzer(date='20200801')
# analyzer.analyze()

Пример #5

Показать файл

class Crawler:
    def __init__(self, last_date=None, this_date=None):
        self.connector = MySQLConnector()
        self.logger = logging.getLogger('crawler')
        self.logger.setLevel(level=logging.DEBUG)
        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)  # 输出到console的log等级的开关
        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        ch.setFormatter(formatter)
        self.logger.addHandler(ch)

        # load media list
        if last_date == None:
            last_date = self.get_last_date()
        self.last_date = datetime.datetime.strptime(last_date, '%Y%m%d').replace(tzinfo=pytz.utc)


        if this_date != None:
            self.today = this_date
        else:
            self.today = time.strftime("%Y%m%d", time.localtime())

        self.logger.info("Finished initialization.")



    def if_covid19_related(self, text):
        '''
        判断文章是否包含疫情关键词
        '''
        text_lower = text.lower()
        for kw in covid_19_keywords:
            if kw in text_lower:
                return True
        return False

    def if_url_satisfied(self, media, url):
        flag = False
        for must in media['Must']:
            if must in url:
                flag = True
        if not flag:
            return False

        if media['MustNot'] != None:
            for mustnot in media['MustNot']:
                if mustnot in url:
                    return False
        return True

    def preprocess_text(self, text):
        text = text.replace("'", "''")
        return text

    def crawl(self):
        '''
        爬新的文章（上次抓取时间之后发布的）
        '''
        self.cursor = self.connector.connect()
        cur_count = self.get_cur_count()
        for media in media_list:
            urls = media['URL']
            for idx, url in enumerate(urls):
                self.logger.info("Start crawl %s - %d  ..." % (media['Name'], idx))
                media_website = newspaper.build(url,language='en', memoize_articles=True)
                sum = len(media_website.articles)
                self.logger.info("Found %d articles" % sum )
                for i, article in enumerate(media_website.articles):
                    print(i, article.url)
                    if not self.if_url_satisfied(media, article.url):   # 网址前缀必须符合给定分类网址
                        continue
                    # print(article.url)
                    try:
                        article.download()
                        print("...download")
                        article.parse()
                        print("...parsed")
                    except:
                        continue

                    # 文章过老
                    if article.publish_date == None:
                        article.publish_date = datetime.datetime.now()

                    article.publish_date = article.publish_date.replace(tzinfo=pytz.utc)
                    if article.publish_date < self.last_date:
                        continue

                    # 文章与疫情无关
                    if not (self.if_covid19_related(article.title) or self.if_covid19_related(article.text)):
                        continue

                    # 上传到MySQL数据库
                    # article.nlp()

                    self.upload_article(cur_count, media['Index'], article)
                    self.logger.info("Successfuly download %d / %d th article %s \n %s" % (i, sum, article.url, article.title))
                    cur_count += 1

        self.connector.disconnect()

    def upload_article(self, count, media_index, article):
        #self.cursor = self.connector.connect()
        idArticle = '%s%05d' % (self.today, count)
        title = self.preprocess_text(article.title)
        url = self.preprocess_text(article.url)
        # keywords = str(article.keywords).replace("'", '"')
        # summary = self.preprocess_text(article.summary)
        text = self.preprocess_text(article.text)
        sql = """INSERT INTO `news`.`article` (`articleIndex`, `title`, `mediaIndex`, `publishDate`, `downloadDate`, `image`,`url`, `text`) 
                VALUES ('%s', '%s', '%d', '%s','%s','%s', '%s', '%s');
                """ % (
        idArticle, title, media_index, article.publish_date.strftime("%Y-%m-%d %H:%M:%S"), self.today, article.top_image,  url, text)
        try:
            self.cursor.execute(sql)
            self.connector.db.commit()

        except MySQLdb._exceptions.OperationalError:
            self.connector.db.rollback()
            self.logger.info("Unable to upload article to DB!")
        # self.connector.disconnect()


    def get_cur_count(self):
        # self.cursor = self.connector.connect()
        sql = '''select MAX(articleIndex) 
                from news.article 
              where articleIndex REGEXP '%s';''' % (self.today)
        try:
            self.cursor.execute(sql)
            max_idx = self.cursor.fetchall()
        except:
            self.logger.info("Unable to get current count!")
        #print(max_idx)
        #self.connector.disconnect()
        if max_idx[0][0] is None:
            count = 0
        else:
            count = int(max_idx[0][0][-5:]) + 1
        self.logger.info("Successfully get current count = %d ." % count)
        return count

    def get_last_date(self):
        self.cursor = self.connector.connect()
        sql = '''SELECT max(downloadDate) FROM news.article'''
        try:
            self.cursor.execute(sql)
            fetch_result = self.cursor.fetchall()
        except:
            self.logger.info("Unable to get last date!")
        self.connector.disconnect()
        if fetch_result[0][0] is None:
            last_date = "20200101"
        else:
            last_date = datetime.date.strftime(fetch_result[0][0],'%Y%m%d')
        print(last_date)
        self.logger.info("Successfully get lastdate '%s'." % last_date)
        return last_date

# crawler = Crawler(last_date = '20200708', this_date='20200709')
# crawler = Crawler()
#crawler.crawl()

Пример #6

Показать файл

Файл: Cluster.py Проект: rubychen0611/Unbiased

class Cluster():
    def __init__(self, date):
        self.date = date
        self.connector = MySQLConnector()

        self.logger = logging.getLogger('cluster')
        self.logger.setLevel(level=logging.DEBUG)
        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)  # 输出到console的log等级的开关
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        ch.setFormatter(formatter)
        self.logger.addHandler(ch)
        self.logger.info("Finished initialization.")

    def remove_punctuation(self, text):
        text = re.sub(r'[{}]+'.format(punctuation), '', text)
        return text.strip().lower()

    def load_articles(self):
        '''从数据库中获取未分类的数据'''
        cursor = self.connector.connect()
        sql = "SELECT articleIndex, title, text, mediaIndex FROM news.article WHERE downloadDate='%s'" % (
            self.date)
        # sql = '''SELECT articleIndex, title, text, mediaIndex  FROM news.article where downloadDate in ('20200728', '20200729', '20200731', '20200801')'''
        try:
            cursor.execute(sql)
            self.articles = cursor.fetchall()
            self.logger.info("Successfully loaded %d articles." %
                             len(self.articles))
        except:
            self.logger.info("Error: unable to fecth data")
        self.connector.disconnect()

    def remove_useless_articles(self):
        self.load_articles()
        useless_articles = []
        media_articles = {}
        for article in self.articles:
            media = article[3]
            if media not in media_articles:
                media_articles[media] = set()
            title = article[1]
            if title in media_articles[media]:
                useless_articles.append(article[0])  #去重
                self.logger.info("Found repeated article %s" % (article[0]))
                continue
            else:
                media_articles[media].add(title)
            media_articles[article[3]].add(article[1])
            if detect(article[1]) != 'en':
                # print(article[1])
                useless_articles.append(article[0])
                self.logger.info("Found article %s not in English" %
                                 (article[0]))
            else:
                list_words = word_tokenize(self.remove_punctuation(article[2]))
                if len(list_words) < 30:
                    useless_articles.append(article[0])
                    self.logger.info("Found article %s too short" %
                                     (article[0]))

        # print(useless_articles)
        cursor = self.connector.connect()
        for articleID in useless_articles:
            sql = "DELETE FROM `news`.`article` WHERE(`articleIndex` = %s)" % (
                articleID)
            try:
                cursor.execute(sql)
                self.connector.db.commit()
            except:
                self.connector.db.rollback()
                self.logger.info("Unable to delete useless articles")
        self.connector.disconnect()
        self.logger.info("Successfully removed %d articles" %
                         len(useless_articles))

    def encode_text(self, encode_obj='title', vector='tf-idf'):
        '''
        多种方式编码文本
        '''
        if vector == 'count':
            vectorizer = CountVectorizer(stop_words='english',
                                         lowercase=True,
                                         binary=True,
                                         tokenizer=myTokenizer)
        elif vector == 'tf-idf':
            vectorizer = TfidfVectorizer(stop_words='english',
                                         lowercase=True,
                                         tokenizer=myTokenizer)
        corpus = []
        if encode_obj == 'title':  # 标题
            idx = 1
        # elif encode_obj == 'summary':   # 摘要
        #     idx = 4
        elif encode_obj == 'text':  # 全文
            idx = 2

        for article in self.articles:
            corpus.append(article[idx])

        X = vectorizer.fit_transform(corpus)
        #print(len(vectorizer.get_feature_names()))
        # print (vectorizer.get_feature_names())
        return X

    def cluster(self):
        X = self.encode_text(encode_obj='text', vector='tf-idf')
        db_model = DBSCAN(eps=0.4, min_samples=3, metric='cosine').fit(X)
        joblib.dump(db_model, './models/model_%s.pkl' % self.date)
        self.group_list = db_model.labels_

        # 打印分组结果
        self.logger.info("Cluster results: %d groups " %
                         np.max(self.group_list))
        # for i in range(len(self.group_list)):
        #     if self.group_list[i] != -1:
        #         print(str(self.group_list[i])+'\t'+str(self.articles[i][3])+'\t'+self.articles[i][1])
        group_result = {}
        for i, label in enumerate(self.group_list):
            if label not in group_result:
                group_result[label] = []
            group_result[label].append(self.articles[i][1])
        for i in range(0, np.max(self.group_list) + 1):
            print(i)
            print(group_result[i])

    def upload_groups_to_DB(self):
        cursor = self.connector.connect()
        for i, label in enumerate(self.group_list):
            if label == -1:
                continue
            sql = "UPDATE news.article SET groupIndex='%s%03d' WHERE articleIndex = '%s'" % (
                self.date, label, self.articles[i][0])
            #print(sql)
            try:
                cursor.execute(sql)
                self.connector.db.commit()
            except:
                # 发生错误时回滚
                self.logger.info("Unable to update group index!")
                self.connector.db.rollback()
        self.connector.disconnect()
        self.logger.info("Successfully update group indices.")

Пример #7

Показать файл

Файл: Uploader.py Проект: rubychen0611/Unbiased

class Uploader:
    def __init__(self, date):
        self.date = date
        self.articles = []
        self.connector = MySQLConnector()
        leancloud.init("U83hlMObhFRFRS4kX3lOxSlq-gzGzoHsz", "Jw2Y6KFFsjI5kEz1qYqQ62da")
        logging.basicConfig(level=logging.DEBUG)

    def upload_to_old_groups(self):
        '''如果新下载的新闻被分类到旧的新闻组，直接上传'''
        pass

    def upload_new_groups(self):
        '''上传新的新闻组'''
        cursor = self.connector.connect()
        sql = "SELECT articleIndex,groupIndex,title,publishDate,image,url,mediaObjId,sentimentScore,summary \
                FROM news.article INNER JOIN news.media ON article.mediaIndex = media.index     \
                where groupIndex is REGEXP '%s'"  % self.date
        try:
            cursor.execute(sql)
            articles = cursor.fetchall()
            # print(self.articles)
        except:
            print("Error: unable to fecth data")
        self.connector.disconnect()

        self.groups = {}
        for article in articles:
            groupIndex = article[1]
            if groupIndex not in self.groups:
                self.groups[groupIndex] = []
            self.groups[groupIndex].append(article)

        for groupIndex in self.groups:
            # upload articles
            article_obj_ids = []
            group_img = None
            for article in self.groups[groupIndex]:
                Article = leancloud.Object.extend('Article')
                Media = leancloud.Object.extend('Media')
                article_obj = Article()
                article_obj.set('ArticleIndex', article[0])
                article_obj.set('GroupIndex', article[1])
                article_obj.set('Title', article[2])
                article_obj.set('Date', article[3])
                if article[4] != '' :
                    article_obj.set('ImageURL', article[4])
                    group_img = article[4]
                article_obj.set('Link', article[5])
                article_obj.set('Media', Media.create_without_data(article[6])) # Pointer
                article_obj.set('SentimentScore', article[7])
                article_obj.set('Summary', article[8])
                article_obj.save()
                article_obj_ids.append(article_obj.id)
            # upload group info
            rank_score = self.cal_group_rank_score(self.groups[groupIndex])
            # print(groupIndex + '\t' + str(rank_score))
            # print(groupIndex, rank_score)
            NewsGroup = leancloud.Object.extend('NewsGroup')
            group_obj = NewsGroup()
            group_obj.set('Title', self.groups[groupIndex][0][2])       # 第一篇文章的title作为group title
            group_obj.set('GroupIndex', groupIndex)
            group_obj.set('RankScore', rank_score)
            if group_img is not None:
                group_obj.set('ImageURL', group_img)
            group_obj.set('Articles',(article_obj_ids))
            group_obj.save()

    def cal_group_rank_score(self, articles):
        article_num = len(articles)
        if article_num == 3:
            num_score = 2
        elif 4 <= article_num <= 8:
            num_score = 3
        else:
            num_score = 1

        senti_scores = []
        media_set = set()
        for article in articles:
            senti_scores.append(article[7])
            media_set.add(article[6])
        media_num = len(media_set)
        if media_num == 1:
            media_score = 0
        elif media_num == 2:
            media_score = 1
        elif media_num <= 4:
            media_score = 2
        else:
            media_score = 3

        senti_diff = int(np.max(senti_scores) - np.min(senti_scores))
        return num_score + media_score + senti_diff