Пример #1
0
def plot(sql, title, headers):

    connection.run_sql(sql)
    results = connection.results()

    html = _build_table(results, title, headers)

    _save_table(html, results, title)
Пример #2
0
def plot_transpose(sql, title, headers):

    connection.run_sql(sql)
    results = connection.results()
    columns = connection.columns()

    transposed = []
    for index, value in enumerate(results[0]):
        transposed.append((columns[index], str(value)))

    html = _build_table(transposed, title, headers)

    _save_table(html, transposed, title)
Пример #3
0
 def save(self):
     sql = """
         insert into post (
             id,
             title,
             body,
             tags,
             parent_id,
             answer_count,
             accepted_answer_id,
             creation_date,
             last_activity_date,
             score,
             last_edit_date,
             last_editor_user_id,
             last_editor_display_name,
             post_type_id,
             comment_count,
             view_count,
             favorite_count,
             owner_user_id,
             owner_display_name,
             closed_date
         ) values (
             %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
         );
     """
     values = []
     values.append(self.data.get('Id', None))
     values.append(self.data.get('Title', None))
     values.append(self.data.get('Body', None))
     values.append(self.data.get('Tags', None))
     values.append(self.data.get('ParentId', None))
     values.append(self.data.get('AnswerCount', None))
     values.append(self.data.get('AcceptedAnswerId', None))
     values.append(self.data.get('CreationDate', None))
     values.append(self.data.get('LastActivityDate', None))
     values.append(self.data.get('Score', None))
     values.append(self.data.get('LastEditDate', None))
     values.append(self.data.get('LastEditorUserId', None))
     values.append(self.data.get('LastEditorDisplayName', None))
     values.append(self.data.get('PostTypeId', None))
     values.append(self.data.get('CommentCount', None))
     values.append(self.data.get('ViewCount', None))
     values.append(self.data.get('FavoriteCount', None))
     values.append(self.data.get('OwnerUserId', None))
     values.append(self.data.get('OwnerDisplayName', None))
     values.append(self.data.get('ClosedDate', None))
     connection.run_sql(sql, values)
Пример #4
0
    def insert_link(self, link):
        connection.run_sql(
            'SELECT count(*) FROM cited_links WHERE post = %s AND link = %s',
            [self.post_id, link])
        previous_results = connection.results()

        if previous_results[0][0] > 0:
            return

        location = urlparse(link).netloc

        print(location)

        connection.run_sql(
            'INSERT INTO cited_links (post, link, location) VALUES (%s, %s, %s)',
            [self.post_id, link, location])
Пример #5
0
def plot(sql, title, legend=False):
    connection.run_sql(sql)
    results = connection.results()

    x, y, total = _separate(results)

    N = len(x)
    ind = range(N)  # the x locations for the groups
    width = 0.50  # the width of the bars

    fig, ax = plt.subplots()
    rects = []
    bottom = [0 for element in y[0]]
    for i, data in enumerate(y):
        rects.append(ax.bar(ind, data, width, color=colors[i], bottom=bottom))
        bottom = [element + y[i][j] for j, element in enumerate(bottom)]

    # add some text for labels, title and axes ticks
    ax.set_title(title)
    ax.set_xticks([i + width / 2 for i in ind])
    ax.set_xticklabels([label for label in x])

    if (legend != False):
        plt.legend(rects, legend)

    # ax.legend(rects1[0], ('Men'))

    if (len(rects) == 1):
        label_top(rects[0], ax)
    else:
        label_middle(rects, ax)

    plt.axis([0 - width, len(x), 0, float(max(total)) * 1.618])

    plt.savefig(utils.filename_from_title(title, 'png'))
    plt.clf()
Пример #6
0
import connection
from html.parser import HTMLParser
from urllib.parse import urlparse

connection.run_sql('select body, id from post')
posts = connection.results()


class LinkFinder(HTMLParser):
    def set_post(self, post_id):
        self.post_id = post_id

    def handle_starttag(self, tag, attrs):
        if (tag == 'a'):
            for attr in attrs:
                if (attr[0] == 'href'):
                    self.insert_link(attr[1])

    def insert_link(self, link):
        connection.run_sql(
            'SELECT count(*) FROM cited_links WHERE post = %s AND link = %s',
            [self.post_id, link])
        previous_results = connection.results()

        if previous_results[0][0] > 0:
            return

        location = urlparse(link).netloc

        print(location)
Пример #7
0
import connection
from post import Post
from tcc_themes import vocabulary
from tcc_themes import theme
from tcc_themes import stemmer

connection.run_sql(
    'select stemmed_body, id, title from post where question_type <> 4')
posts = connection.results()
documents = []

for post in posts:
    text = post[0]
    stemmed_title = stemmer.process_text(post[2])
    text = text + ' ' + stemmed_title + ' ' + stemmed_title
    documents.append(text)

words = vocabulary.build(documents, N=2500)
documents_themes, topics = theme.assign(documents, words, topics=3)

connection.run_sql('delete from topic')
for i, topic in enumerate(topics):
    sql = "INSERT INTO topic VALUES (%s, %s)"
    values = [i + 1, topic]
    connection.run_sql(sql, values)

for i, topics in enumerate(documents_themes):
    str_topics = map(str, topics)
    connection.run_sql("update post set topics = '{%s}' where id = %d" %
                       (', '.join(str_topics), posts[i][1]))
Пример #8
0
import connection
from tcc_themes import stemmer

connection.run_sql('select body, id from post')
posts = connection.results()

for i, post in enumerate(posts):
    body = post[0]
    body = stemmer.process_text(body)
    connection.run_sql('update post set stemmed_body = %s where id = %s',
                       (body, post[1]))
    if i % 100 == 0 and i > 0:
        print("%d posts processados" % (i))

print("%d posts processados" % (len(posts)))
Пример #9
0
 def get_question_ids():
     sql = """
         select id from post where post_type_id = 1;
     """
     connection.run_sql(sql)
     return connection.results()
Пример #10
0
parser.add_argument('-a',
                    '--ascending',
                    dest='is_ascending',
                    type=bool,
                    default=False,
                    help='iverts the order')

args = parser.parse_args()
if args.is_ascending:
    direction = 'ASC'
else:
    direction = 'DESC'

connection.run_sql("""
    SELECT id,
           stemmed_body,
           body,
           title,
           topics
    FROM post
    WHERE question_type <> 4
    AND topics[%d] > 0.5
    ORDER BY topics[%d] %s
    LIMIT %d
""" % (args.topic, args.topic, direction, args.number_of_posts))

posts = connection.results()

for post in posts:
    Post.print(post[2], post[3], post[0])
Пример #11
0
import connection
import html
import re
from random import shuffle

OKGREEN = '\033[92m'
OKBLUE = '\033[94m'
BOLD = '\033[1m'
ENDC = '\033[0m'

regex_tags = re.compile(r'(<!--.*?-->|<[^>]*>)')
regex_dbunit = re.compile(r'(dbunit)', re.IGNORECASE)

connection.run_sql('select body, id, title from post where question_type is null and post_type_id = 1')
posts = connection.results()
connection.run_sql('select count(id) as conta from post where question_type is null and post_type_id = 1')
conta = connection.results()
conta = conta[0][0]

shuffle(posts)

for post in posts:
    body = post[0]
    title = post[2]
    body = regex_tags.sub('', body)
    body = html.unescape(body)
    body = regex_dbunit.sub(OKBLUE + BOLD + r'\1' + ENDC, body)
    conta -= 1
    print (OKGREEN + BOLD + title + ENDC + "\n")
    print (body)
    print ("\nFaltam classificar %s posts\n" % conta)