Exemplo n.º 1
0
def update_github_user_table():
    # get db connection
    db = base.connectMysqlDB(config, autocommit=False)
    cur = db.cursor()

    filenames = base.read_all_filename_none_path(paths.github_all_user_sponsor_listing_info)
    for filename in filenames:
        # read data from file
        file_path = paths.github_all_user_sponsor_listing_info + "/" + filename
        text = base.get_info_from_file(file_path)
        if text is False:
            logging.fatal("file not existed: " + file_path)
        else:
            obj = json.loads(text)
            logging.info("read file: " + file_path)
            if obj["data"]["user"]["sponsorsListing"] is not None or ("hasSponsorsListing" in obj["data"]["user"] and ["hasSponsorsListing"] == True):
                try:
                    cur.execute("update github_user "
                                "set has_sponsors_listing=True "
                                "where login='******'")
                    db.commit()
                except Exception as e:
                    logging.fatal(e)
            else:
                try:
                    cur.execute("update github_user "
                                "set has_sponsors_listing=FALSE "
                                "where login='******'")
                    db.commit()
                except Exception as e:
                    logging.fatal(e)
    cur.close()
    db.close()
def get_all_user_earliest_maintainer_time(
        month, sql=sql.all_user_earliest_maintainer_time):
    # create database connection
    db = base.connectMysqlDB(config)
    cur = db.cursor()

    # get get_all_user_earliest_maintainer_time
    sql1 = sql
    cur.execute(sql1)
    items = cur.fetchall()
    logging.info(str(items))

    # generate time interval
    users_time_interval = []
    for item in items:
        start_time = base.timestamp_to_time(
            base.datetime_to_timestamp(item[1]) - month)
        end_time = base.timestamp_to_time(
            base.datetime_to_timestamp(item[1]) + month)
        users_time_interval.append([item[0], start_time, item[1], end_time])

    # generate activity change
    users_acticity_change = []
    for item in users_time_interval:
        first_time = base.timestamp_to_time(item[1])
        mid_time = base.timestamp_to_time(item[2])
        last_time = base.timestamp_to_time(item[3])
        logging.info("first_time: " + first_time + ", mid_time: " + mid_time +
                     ", last_time: " + last_time)

    # close this database connection
    cur.close()
    db.close()
Exemplo n.º 3
0
def writeUserRepository(path, sql):
    global base_path
    base_path = path
    workQueue = Queue.Queue()

    # create database connection
    db = base.connectMysqlDB(config)
    cur = db.cursor()

    # read all the repos
    unhandled_tasks = []
    cur.execute(sql)
    items = cur.fetchall()
    for item in items:
        unhandled_tasks.append({"login": item[0]})
    logging.info("finish reading database")
    logging.info("%d tasks left for handling" % (len(unhandled_tasks)))

    # close this database connection
    cur.close()
    db.close()

    if len(unhandled_tasks) == 0:
        logging.warn("finish")
        return

    for task in unhandled_tasks:
        workQueue.put_nowait(task)

    for _ in range(THREAD_NUM):
        writeUserRepositoryThread(workQueue).start()
    workQueue.join()

    logging.info("finish")
Exemplo n.º 4
0
    def run(self):
        while not self.q.empty():
            work = self.q.get(timeout=0)
            logging.info("the number of work in queue: " + str(self.q.qsize()))

            login = work["login"]
            # get db connection
            db = base.connectMysqlDB(config, autocommit=False)
            cur = db.cursor()

            # read data from file
            try:
                directory = base_path + "/" + login
                files = os.listdir(directory)
                for file in files:
                    file_path = directory + "/" + file
                    text = base.get_info_from_file(file_path)
                    if text is False:
                        logging.warn("file not existed: " + file_path)
                    else:
                        obj = json.loads(text)
                        logging.info("read file: " + file_path)
                        count = 1
                        for edge in obj["data"]["user"][
                                "sponsorshipsAsMaintainer"]["edges"]:
                            if edge["node"]["privacyLevel"] == "PRIVATE":
                                cur.execute(
                                    "insert into github_sponsorships_as_maintainer "
                                    "(login, flag, created_at) "
                                    "values (%s, %s, %s)",
                                    (obj["data"]["user"]["login"], base.flag2,
                                     base.time_handler(
                                         edge["node"]["createdAt"])))
                            else:
                                if "company" in edge["node"]["sponsorEntity"]:
                                    flag = base.flag0
                                else:
                                    flag = base.flag1
                                cur.execute(
                                    "insert into github_sponsorships_as_maintainer "
                                    "(login, sponsor_login, flag, created_at) "
                                    "values (%s, %s, %s, %s)",
                                    (obj["data"]["user"]["login"],
                                     edge["node"]["sponsorEntity"]["login"],
                                     flag,
                                     base.time_handler(
                                         edge["node"]["createdAt"])))
                            db.commit()
                            logging.info("the " + str(count) +
                                         "th record in file: " + file_path)
                            count += 1
                self.q.task_done()
                cur.close()
                db.close()
            except Exception as e:
                logging.fatal(e)
Exemplo n.º 5
0
    def run(self):
        while not self.q.empty():
            work = self.q.get()
            logging.info("the number of work in queue: " + str(self.q.qsize()))

            login = work["login"]
            # get db connection
            db = base.connectMysqlDB(config, autocommit=False)
            cur = db.cursor()

            # read data from file
            file = base_path + "/" + login + ".json"
            text = base.get_info_from_file(file)
            if text is False:
                logging.warn("file not existed: " + file)
            else:
                obj = json.loads(text)
                logging.info("writing login data: " + login)
                if obj["data"]["user"]["hasSponsorsListing"] is True:
                    cur.execute(
                        "insert into github_user "
                        "(database_id, login, name, email,spon_maintainer_count,"
                        " spon_sponsor_count, created_at, updated_at, has_sponsors_listing) "
                        "values (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
                        (obj["data"]["user"]["databaseId"],
                         obj["data"]["user"]["login"],
                         obj["data"]["user"]["name"],
                         obj["data"]["user"]["email"], obj["data"]["user"]
                         ["sponsorshipsAsMaintainer"]["totalCount"],
                         obj["data"]["user"]["sponsorshipsAsSponsor"]
                         ["totalCount"],
                         base.time_handler(obj["data"]["user"]["createdAt"]),
                         base.time_handler(
                             obj["data"]["user"]["updatedAt"]), "1"))
                else:
                    cur.execute(
                        "insert into github_user "
                        "(database_id, login, name, email,spon_maintainer_count,"
                        " spon_sponsor_count, created_at, updated_at, has_sponsors_listing) "
                        "values (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
                        (obj["data"]["user"]["databaseId"],
                         obj["data"]["user"]["login"],
                         obj["data"]["user"]["name"],
                         obj["data"]["user"]["email"], obj["data"]["user"]
                         ["sponsorshipsAsMaintainer"]["totalCount"],
                         obj["data"]["user"]["sponsorshipsAsSponsor"]
                         ["totalCount"],
                         base.time_handler(obj["data"]["user"]["createdAt"]),
                         base.time_handler(
                             obj["data"]["user"]["updatedAt"]), "0"))
                db.commit()
                logging.info(login +
                             " ~~~~~~~~~ data commit into dababase success!!")
            self.q.task_done()
            cur.close()
            db.close()
Exemplo n.º 6
0
    def run(self):
        while not self.q.empty():
            work = self.q.get(timeout=0)
            logging.info("the number of work in queue: " + str(self.q.qsize()))

            login = work["login"]
            # get db connection
            db = base.connectMysqlDB(config, autocommit=False)
            cur = db.cursor()

            # read data from file
            try:
                file = base_path + "/" + login + ".json"
                text = base.get_info_from_file(file)
                if text is False:
                    logging.warn("file not existed: " + file)
                else:
                    obj = json.loads(text)
                    if obj["data"]["user"]["sponsorsListing"] is None:
                        logging.info("user: "******" don't create sponsors")
                    else:
                        cur.execute(
                            "SELECT * FROM github_sponsor_listing WHERE login='******'")
                        items = cur.fetchall()
                        if len(items) == 1:
                            logging.info("user: "******" had been inserted into database!")
                        else:
                            cur.execute(
                                "insert into github_sponsor_listing "
                                "(login, slug, name, tiers_total_count, created_at, short_description) "
                                "values (%s, %s, %s, %s, %s, %s)",
                                (obj["data"]["user"]["login"], obj["data"]
                                 ["user"]["sponsorsListing"]["slug"],
                                 obj["data"]["user"]["sponsorsListing"]
                                 ["name"], obj["data"]["user"]
                                 ["sponsorsListing"]["tiers"]["totalCount"],
                                 base.time_handler(
                                     obj["data"]["user"]["sponsorsListing"]
                                     ["createdAt"]), obj["data"]["user"]
                                 ["sponsorsListing"]["shortDescription"]))
                            db.commit()
                            logging.info(
                                login +
                                " ~~~~~~~~~ data commit into dababase success!!"
                            )
                self.q.task_done()
                cur.close()
                db.close()
            except Exception as e:
                logging.fatal(e)
                return
Exemplo n.º 7
0
    def run(self):
        while not self.q.empty():
            work = self.q.get(timeout=0)
            logging.info("the number of work in queue: " + str(self.q.qsize()))

            login = work["login"]
            # get db connection
            db = base.connectMysqlDB(config, autocommit=False)
            cur = db.cursor()

            # read data from file
            try:
                file = base_path + "/" + login + ".json"
                text = base.get_info_from_file(file)
                if text is False:
                    logging.warn("file not existed: " + file)
                else:
                    obj = json.loads(text)
                    if obj["data"]["user"]["sponsorsListing"] is not None:
                        logging.info(login + " ~~~~~~~~~ has " +
                                     str(obj["data"]["user"]["sponsorsListing"]
                                         ["tiers"]["totalCount"]) + " tiers")
                        count = 1
                        for edge in obj["data"]["user"]["sponsorsListing"][
                                "tiers"]["edges"]:
                            cur.execute(
                                "insert into github_sponsor_listing_tiers "
                                "(login, slug, monthly_price_in_cents, monthly_price_in_dollars, name, created_at, updated_at, description) "
                                "values (%s, %s, %s, %s, %s, %s, %s, %s)",
                                (obj["data"]["user"]["login"], obj["data"]
                                 ["user"]["sponsorsListing"]["slug"],
                                 edge["node"]["monthlyPriceInCents"],
                                 edge["node"]["monthlyPriceInDollars"],
                                 edge["node"]["name"],
                                 base.time_handler(edge["node"]["createdAt"]),
                                 base.time_handler(edge["node"]["updatedAt"]),
                                 edge["node"]["description"]))
                            db.commit()
                            # logging.info("the " + str(count) + "th tier data commit into dababase success!!")
                            count += 1
                    else:
                        logging.warn("login: "******" don't have sponsor_listing")
                        logging.warn("sponsor_listing: " + str(obj))
                self.q.task_done()
                cur.close()
                db.close()
            except Exception as e:
                logging.fatal(e)
                logging.error(e)
Exemplo n.º 8
0
    def run(self):
        while not self.q.empty():
            work = self.q.get(timeout=0)
            logging.info("the number of work in queue: " + str(self.q.qsize()))

            login = work["login"]
            # get db connection
            db = base.connectMysqlDB(config, autocommit=False)
            cur = db.cursor()

            # read data from file
            try:
                directory = base_path + "/" + login
                files = base.read_all_filename_in_directory(directory)
                for file in files:
                    text = base.get_info_from_file(file)
                    if text is False:
                        logging.warn("file not existed: " + file)
                    else:
                        obj = json.loads(text)
                        logging.info("read file: " + file)
                        count = 1
                        if "edges" not in obj["data"]["user"][
                                "commitComments"]:
                            continue
                        for node in obj["data"]["user"]["commitComments"][
                                "edges"]:
                            logging.info("the " + str(count) +
                                         "th record in file: " + file)
                            if node["node"]["commit"] is not None:
                                oid = node["node"]["commit"]["oid"]
                            else:
                                oid = ""
                            cur.execute(
                                "insert into github_commit_comment "
                                "(comm_database_id, login, created_at, updated_at, body, commit_oid) "
                                "values (%s, %s, %s, %s, %s, %s)",
                                (node["node"]["databaseId"],
                                 obj["data"]["user"]["login"],
                                 base.time_handler(node["node"]["createdAt"]),
                                 base.time_handler(node["node"]["updatedAt"]),
                                 node["node"]["body"], oid))
                            db.commit()
                            count += 1
                self.q.task_done()
                cur.close()
                db.close()
            except Exception as e:
                logging.fatal(e)
                return
Exemplo n.º 9
0
def update_github_user_flag(login, flag):
    # create database connection
    db = base.connectMysqlDB(config)
    cur = db.cursor()

    # read all the repos
    cur.execute("update github_user "
                "set flag= " + str(flag) + " "
                "where login='******'")
    items = cur.fetchall()
    logging.info("finish reading database")

    # close this database connection
    cur.close()
    db.close()
Exemplo n.º 10
0
def updateGithubSponsorshipsAsMaintainer(login, flag):
    # create database connection
    db = base.connectMysqlDB(config)
    cur = db.cursor()

    # read all the repos
    cur.execute("update github_sponsorships_as_maintainer "
                "set flag= " + flag + " "
                "where sponsor_login='******'")
    items = cur.fetchall()
    logging.info("finish reading database")

    # close this database connection
    cur.close()
    db.close()
Exemplo n.º 11
0
    def run(self):
        while not self.q.empty():
            work = self.q.get(timeout=0)
            logging.info("the number of work in queue: " + str(self.q.qsize()))

            login = work["login"]
            # get db connection
            db = base.connectMysqlDB(config, autocommit=False)
            cur = db.cursor()

            # read data from file
            try:
                directory = base_path + "/" + login
                files = base.read_all_filename_in_directory(directory)
                for file in files:
                    text = base.get_info_from_file(file)
                    if text is False:
                        logging.warn("file not existed: " + file)
                    else:
                        obj = json.loads(text)
                        logging.info("read file: " + file)
                        count = 1
                        for node in obj["data"]["user"][
                                "contributionsCollection"][
                                    "pullRequestReviewContributions"]["edges"]:
                            try:  # maybe happen duplicate key when insert data
                                cur.execute(
                                    "insert ignore into github_user_pr_review "
                                    "(pr_database_id, login, created_at, body) "
                                    "values (%s, %s, %s, %s)",
                                    (node["node"]["pullRequestReview"]
                                     ["databaseId"], node["node"]
                                     ["pullRequestReview"]["author"]["login"],
                                     base.time_handler(
                                         node["node"]["pullRequestReview"]
                                         ["createdAt"]), node["node"]
                                     ["pullRequestReview"]["body"]))
                                db.commit()
                                # logging.info("the " + str(count) + "th record in file: " + file)
                            except Exception as e:
                                logging.error(e)
                            count += 1
                self.q.task_done()
                cur.close()
                db.close()
            except Exception as e:
                logging.fatal(e)
                return
Exemplo n.º 12
0
def updateGithubUserFlag(login, flag):
    # create database connection
    db = base.connectMysqlDB(config)
    cur = db.cursor()

    # read all the repos
    cur.execute("update github_user "
                "set flag= " + flag + " "
                "where login='******'")
    items = cur.fetchall()
    logging.info("update successfully! login: "******", flag: " +
                 str(flag))

    # close this database connection
    cur.close()
    db.close()
Exemplo n.º 13
0
    def run(self):
        while not self.q.empty():
            work = self.q.get(timeout=0)
            logging.info("the number of work in queue: " + str(self.q.qsize()))

            login = work["login"]
            # get db connection
            db = base.connectMysqlDB(config, autocommit=False)
            cur = db.cursor()

            # read data from file
            try:
                directory = base_path + "/" + login
                files = os.listdir(directory)
                for file in files:
                    file_path = directory + "/" + file
                    text = base.get_info_from_file(file_path)
                    logging.info("login: "******", being handle file: " +
                                 file_path)
                    if text is False:
                        logging.warn("file not existed: " + file_path)
                    else:
                        obj = json.loads(text)
                        # logging.info("read file: " + file_path)
                        count = 1
                        for week in obj["data"]["user"][
                                "contributionsCollection"][
                                    "contributionCalendar"]["weeks"]:
                            for day in week["contributionDays"]:
                                cur.execute(
                                    "insert ignore into github_user_commits_per_day "
                                    "(login, date, weekday, contribution_count) "
                                    "values (%s, %s, %s, %s)",
                                    (obj["data"]["user"]["login"], day["date"],
                                     day["weekday"], day["contributionCount"]))
                                db.commit()
                                # logging.info("the " + str(count) + "th record in file: " + file_path)
                                count += 1
                self.q.task_done()
                cur.close()
                db.close()
            except Exception as e:
                logging.fatal("login: "******", fatal info: " + e)
                return
Exemplo n.º 14
0
def insert_user_from_json_file():
    # read all the users
    load_f = open('sponsorsListing_notnull.json', 'r')
    load_list = json.load(load_f)
    # get db connection
    db = base.connectMysqlDB(config, autocommit=False)
    cur = db.cursor()

    # read data from file
    for dict in load_list:
        logging.info(dict["login"])
        try:
            cur.execute("insert into init_user "
                        "(login) "
                        "value ('" + dict["login"] + "')")
            db.commit()
        except Exception as e:
            logging.fatal(e)
    cur.close()
    db.close()
Exemplo n.º 15
0
def insert_user_from_txt_file():
    # read all the users
    f = open('users_with_sponsorList.txt', 'r')
    logins = f.read().strip().split("\n")
    # get db connection
    db = base.connectMysqlDB(config, autocommit=False)
    cur = db.cursor()

    # read data from file
    for username in logins:
        logging.info(username)
        try:
            cur.execute("insert into init_user "
                        "(login) "
                        "value ('" + username + "')")
            db.commit()
        except Exception as e:
            logging.fatal(e)
    cur.close()
    db.close()
Exemplo n.º 16
0
def analyze_nums_change(username, sql, compare_name):
    times = get_time_range(username)
    first_time = base.timestamp_to_time(times[0])
    mid_time = base.timestamp_to_time(times[1])
    last_time = base.timestamp_to_time(times[2])
    logging.info("first_time: " + first_time + ", mid_time: " + mid_time +
                 ", last_time: " + last_time)

    # create database connection
    db = base.connectMysqlDB(config)
    cur = db.cursor()

    # get commit sum between first_time and mid_time
    sql1 = sql % (username, first_time, mid_time)
    cur.execute(sql1)
    items = cur.fetchall()
    sum1 = items[0][0]

    # get commit sum between mid_time and last_time
    sql2 = sql % (username, mid_time, end_time)
    cur.execute(sql2)
    items = cur.fetchall()
    sum2 = items[0][0]

    # close this database connection
    cur.close()
    db.close()

    # draw picture
    x1 = ["before sponsoring"]
    y1 = [sum1]
    x2 = ["after sponsoring"]
    y2 = [sum2]
    plt.bar(x1, y1, color='g', align='center')
    plt.bar(x2, y2, color='b', align='center')
    plt.title(compare_name)
    plt.show()

    logging.info("sum1: " + str(sum1) + ", sum2: " + str(sum2))
Exemplo n.º 17
0
def get_time_range(username):
    try:
        # create database connection
        db = base.connectMysqlDB(config)
        cur = db.cursor()

        sql = "select min(created_at) \
                from github_sponsorships_as_maintainer \
                where login='******'"
        cur.execute(sql)
        times = cur.fetchall()
        earliest_sponsor_time = base.datetime_to_timestamp(times[0][0])

        # close this database connection
        cur.close()
        db.close()
    except Exception as e:
        logging.error("get_time_range failed")
        logging.error(e)
    last_time = base.time_string_to_timestamp(end_time)
    first_time = 2 * earliest_sponsor_time - last_time
    mid_time = earliest_sponsor_time
    return [first_time, mid_time, last_time]
Exemplo n.º 18
0
    def run(self):
        while not self.q.empty():
            work = self.q.get(timeout=0)
            logging.info("the number of work in queue: " + str(self.q.qsize()))

            login = work["login"]
            # get db connection
            db = base.connectMysqlDB(config, autocommit=False)
            cur = db.cursor()

            # read data from file
            try:
                directory = base_path + "/" + login
                files = os.listdir(directory)
                for file in files:
                    file_path = directory + "/" + file
                    text = base.get_info_from_file(file_path)
                    if text is False:
                        logging.warn("file not existed: " + file_path)
                        continue
                    obj = json.loads(text)
                    print "read file: " + file_path
                    count = 1
                    # github user 接受了打赏,但是没有打赏过别人。
                    # 之所以将这部分数据写入 github_sponsorships_as_sponsor 表中,是为了做筛选
                    if len(obj["data"]["user"]["sponsorshipsAsSponsor"]
                           ["edges"]) == 0:
                        logging.warn("the user " + login +
                                     " doesn't sponsor others")
                        cur.execute(
                            "insert into github_sponsorships_as_sponsor "
                            "(login, sponsor_login, flag) "
                            "values (%s, %s, %s)",
                            (login, login, str(base.flag4)))
                        db.commit()
                        continue
                    for edge in obj["data"]["user"]["sponsorshipsAsSponsor"][
                            "edges"]:
                        if edge["node"]["privacyLevel"] == "PRIVATE":
                            logging.info("the " + str(count) +
                                         "th record is private in file: " +
                                         file_path)
                            count += 1
                            continue
                        else:
                            slug = edge["node"]["sponsorable"][
                                "sponsorsListing"]["slug"].split("-")[1]
                            cur.execute(
                                "insert into github_sponsorships_as_sponsor "
                                "(login, slug, sponsor_login, flag, created_at) "
                                "values (%s, %s, %s, %s, %s)",
                                (slug, edge["node"]["sponsorable"]
                                 ["sponsorsListing"]["slug"],
                                 obj["data"]["user"]["login"], str(3),
                                 base.time_handler(edge["node"]["createdAt"])))
                        db.commit()
                        logging.info("the " + str(count) +
                                     "th record in file: " + file_path)
                        count += 1
                self.q.task_done()
                cur.close()
                db.close()
            except Exception as e:
                logging.fatal(e)