def output_commit_comments(commit_comments, sha):
    with open('commit_comments.csv', 'ab') as output_csvfile:
        scream.log('commit_comments.csv opened for append..')
        ccomentswriter = UnicodeWriter(
            output_csvfile) if use_utf8 else csv.writer(output_csvfile,
                                                        dialect=MyDialect)
        for comment in commit_comments:
            assert (type(comment.id) == int or comment.id is None)
            assert (type(comment.position) == int or comment.position is None)
            assert (type(comment.line) == int or comment.line is None)
            scream.log(str(comment.commit_id))
            tempv = (
                repo.getName(),
                repo.getOwner(),
                sha,
                (' '.join(comment.body.splitlines())
                 if comment.body is not None else ''),
                (str(comment.commit_id)
                 if comment.commit_id is not None else ''),  # logged above
                (str(comment.created_at)
                 if comment.created_at is not None else ''),
                (str(comment.html_url)
                 if comment.html_url is not None else ''),
                (str(comment.id)
                 if comment.id is not None else ''),  # this is always int
                (comment.user.login if comment.user is not None else ''),
                (str(comment.line)
                 if comment.line is not None else ''),  # this is always int
                (comment.path if comment.path is not None else ''),
                (str(comment.position) if comment.position is not None else
                 ''),  # this is always int
                (str(comment.updated_at)
                 if comment.updated_at is not None else ''))
            ccomentswriter.writerow(tempv)
def report_quota_async(quota_is, quota_left):
    scream.log('report_quota_async started, quota_left is: ' + quota_left)
    secrets = []
    with open('mail_pass.txt', 'r') as passfile:
        for line in passfile:
            secrets.append(line)
    login_string = str(secrets[0]).strip()
    pass_string = str(secrets[1]).strip()

    # me == my email address
    # you == recipient's email address
    me = "*****@*****.**"
    you = "*****@*****.**"

    # Create message container - the correct MIME type is multipart/alternative.
    msg = MIMEMultipart('alternative')
    msg['Subject'] = "WikiTeams.pl - GitHub repo getter reporting"
    msg['From'] = me
    msg['To'] = you

    # Create the body of the message (a plain-text and an HTML version).
    text = "GitHub repo getter reporting!!\nGitHub API quota stands as below:\nGranted: __QUOTA_GRANTED Quota left: __QUOTA__LEFT"
    html = """\
    <html>
      <head></head>
      <body>
        <p>GitHub repo getter reporting!<br>
           GitHub API quota stands as below:<br>
           Granted: __QUOTA_GRANTED Quota left: __QUOTA__LEFT.
        </p>
      </body>
    </html>
    """

    text = text.strip('__QUOTA_GRANTED', quota_is)
    html = text.strip('__QUOTA_GRANTED', quota_is)
    text = text.strip('__QUOTA__LEFT', quota_left)
    html = text.strip('__QUOTA__LEFT', quota_left)

    # Record the MIME types of both parts - text/plain and text/html.
    part1 = MIMEText(text, 'plain')
    part2 = MIMEText(html, 'html')

    # Attach parts into message container.
    # According to RFC 2046, the last part of a multipart message, in this case
    # the HTML message, is best and preferred.
    msg.attach(part1)
    msg.attach(part2)

    # Send the message via local SMTP server.
    s = smtplib.SMTP('mail.wikiteams.pl', 587)
    s.set_debuglevel(1)
    s.ehlo()
    s.starttls()
    s.login(login_string, pass_string)
    # sendmail function takes 3 arguments: sender's address, recipient's address
    # and message to send - here it is sent as one string.
    s.sendmail(me, you, msg.as_string())
    s.quit()
def report_quota_async(quota_is, quota_left):
    scream.log('report_quota_async started, quota_left is: ' + quota_left)
    secrets = []
    with open('mail_pass.txt', 'r') as passfile:
        for line in passfile:
            secrets.append(line)
    login_string = str(secrets[0]).strip()
    pass_string = str(secrets[1]).strip()

    # me == my email address
    # you == recipient's email address
    me = "*****@*****.**"
    you = "*****@*****.**"

    # Create message container - the correct MIME type is multipart/alternative.
    msg = MIMEMultipart('alternative')
    msg['Subject'] = "WikiTeams.pl - GitHub repo getter reporting"
    msg['From'] = me
    msg['To'] = you

    # Create the body of the message (a plain-text and an HTML version).
    text = "GitHub repo getter reporting!!\nGitHub API quota stands as below:\nGranted: __QUOTA_GRANTED Quota left: __QUOTA__LEFT"
    html = """\
    <html>
      <head></head>
      <body>
        <p>GitHub repo getter reporting!<br>
           GitHub API quota stands as below:<br>
           Granted: __QUOTA_GRANTED Quota left: __QUOTA__LEFT.
        </p>
      </body>
    </html>
    """

    text = text.strip('__QUOTA_GRANTED', quota_is)
    html = text.strip('__QUOTA_GRANTED', quota_is)
    text = text.strip('__QUOTA__LEFT', quota_left)
    html = text.strip('__QUOTA__LEFT', quota_left)

    # Record the MIME types of both parts - text/plain and text/html.
    part1 = MIMEText(text, 'plain')
    part2 = MIMEText(html, 'html')

    # Attach parts into message container.
    # According to RFC 2046, the last part of a multipart message, in this case
    # the HTML message, is best and preferred.
    msg.attach(part1)
    msg.attach(part2)

    # Send the message via local SMTP server.
    s = smtplib.SMTP('mail.wikiteams.pl', 587)
    s.set_debuglevel(1)
    s.ehlo()
    s.starttls()
    s.login(login_string, pass_string)
    # sendmail function takes 3 arguments: sender's address, recipient's address
    # and message to send - here it is sent as one string.
    s.sendmail(me, you, msg.as_string())
    s.quit()
 def cleanup(self):
     scream.say('Marking thread on ' + str(self.threadId) + "/" + str(self.page) + ' as definitly finished..')
     self.finished = True
     try:
         self.conn.close()
     except:
         scream.log('MySQL connection instance already closed. Ok.')
     scream.say('Terminating/join() thread on ' + str(self.threadId) + ' ...')
    def get_data(self):
        global resume_stage

        scream.say('Preparing to build list of programmers: ' + str(self.threadId))
        if resume_stage in [None, 'contributors']:
            #try:
            scream.ssay('Checking size of a ' + str(self.repo.getKey()) + ' team')
            '1. Team size of a repository'
            self.contributors = self.repository.get_contributors()
            assert self.contributors is not None

            self.repo_contributors = set()
            self.contributors_static = self.build_list_of_programmers(self.contributors, self.repo.getKey(), self.repository)
            for contributor in self.contributors_static.items():
                scream.log_debug('move with contributor to next from contributors_static.items()', True)
                while True:
                    scream.say('Get details for a contributor..')
                    try:
                        self.contributor_login = contributor[0]
                        self.contributor_object = contributor[1]
                        scream.say(str(self.contributor_login))
                        self.repo_contributors.add(self.contributor_login)
                        scream.say(str(self.repo_contributors))
                        #developer_revealed(threading.current_thread(), self.repository, self.repo, self.contributor_object)
                        developer_revealed(self.threadId, self.repository, self.repo, self.contributor_object)
                        scream.say('Finished revealing developer')
                        break
                    except TypeError as e:
                        scream.log_error('Repo + Contributor TypeError, or paginated through' +
                                         ' contributors gave error. ' + key + ', error({0})'.
                                         format(str(e)), True)
                        repos_reported_execution_error.write(key + os.linesep)
                        if force_raise:
                            raise
                        #break
                    except socket.timeout as e:
                        scream.log_error('Timeout while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze('socket.timeout in paginate through x contributors')
                        if force_raise:
                            raise
                        #break
                    except Exception as e:
                        scream.log_error('Exception while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze(str(e) + ' in paginate through x contributors')
                        if force_raise:
                            raise
                        #break

            assert self.repo_contributors is not None
            self.repo.setContributors(self.repo_contributors)
            self.repo.setContributorsCount(len(self.repo_contributors))
            scream.log('Added contributors of count: ' + str(len(self.repo_contributors)) + ' to a repo ' + key)
        self.cleanup()
    def get_data(self):
        global resume_stage

        scream.say('Executing inside-thread method get_data() for: ' + str(self.threadId))
        if resume_stage in [None, 'contributors']:
            #try:
            scream.ssay('Checking size of a ' + str(self.repo.getKey()) + ' team')
            '1. Team size of a repository'
            self.contributors = self.repository.get_contributors()
            assert self.contributors is not None

            self.repo_contributors = set()
            self.contributors_static = self.build_list_of_programmers(self.contributors, self.repo.getKey(), self.repository)
            for contributor in self.contributors_static.items():
                scream.log_debug('move with contributor to next from contributors_static.items()', True)
                while True:
                    scream.say('Inside while True: (line 674)')
                    try:
                        self.contributor_login = contributor[0]
                        self.contributor_object = contributor[1]
                        scream.say(str(self.contributor_login))
                        self.repo_contributors.add(self.contributor_login)
                        scream.say(str(self.repo_contributors))
                        #developer_revealed(threading.current_thread(), self.repository, self.repo, self.contributor_object)
                        developer_revealed(self.threadId, self.repository, self.repo, self.contributor_object)
                        scream.say('Finished revealing developer')
                        break
                    except TypeError as e:
                        scream.log_error('Repo + Contributor TypeError, or paginated through' +
                                         ' contributors gave error. ' + key + ', error({0})'.
                                         format(str(e)), True)
                        repos_reported_execution_error.write(key + os.linesep)
                        if force_raise:
                            raise
                        #break
                    except socket.timeout as e:
                        scream.log_error('Timeout while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze('socket.timeout in paginate through x contributors')
                        if force_raise:
                            raise
                        #break
                    except Exception as e:
                        scream.log_error('Exception while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze(str(e) + ' in paginate through x contributors')
                        if force_raise:
                            raise
                        #break

            assert self.repo_contributors is not None
            self.repo.setContributors(self.repo_contributors)
            self.repo.setContributorsCount(len(self.repo_contributors))
            scream.log('Added contributors of count: ' + str(len(self.repo_contributors)) + ' to a repo ' + key)
        self.cleanup()
def output_commit_stats(commit_stats, sha):
    with open('commit_stats.csv', 'ab') as output_csvfile:
        scream.log('commit_stats.csv opened for append..')
        cstatswriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect)
        assert (type(commit_stats.additions) == int or
                commit_stats.additions is None)
        assert (type(commit_stats.deletions) == int or
                commit_stats.deletions is None)
        assert (type(commit_stats.total) == int or commit_stats.total is None)
        tempv = (repo.getName(),
                 repo.getOwner(),
                 sha,
                 (str(commit_stats.additions) if commit_stats.additions is not None else ''),  # this is always int ! str() allowed
                 (str(commit_stats.deletions) if commit_stats.deletions is not None else ''),  # this is always int ! str() allowed
                 (str(commit_stats.total) if commit_stats.total is not None else ''))  # this is always int ! str() allowed
        cstatswriter.writerow(tempv)
def output_commit_statuses(commit_statuses, sha):
    with open('commit_statuses.csv', 'ab') as output_csvfile:
        scream.log('commit_statuses.csv opened for append..')
        cstatuswriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect)
        for status in commit_statuses:
            tempv = (repo.getName(),
                     repo.getOwner(),
                     sha,
                     (str(status.created_at) if status.created_at is not None else ''),
                     (status.creator.login if status.creator is not None else ''),
                     (status.description if status.description is not None else ''),
                     (str(status.id) if status.id is not None else ''),
                     (status.state if status.state is not None else ''),
                     (str(status.updated_at) if status.updated_at is not None else ''),
                     (str(status.url) if status.url is not None else ''))
            cstatuswriter.writerow(tempv)
def output_commit_statuses(commit_statuses, sha):
    with open('commit_statuses.csv', 'ab') as output_csvfile:
        scream.log('commit_statuses.csv opened for append..')
        cstatuswriter = UnicodeWriter(
            output_csvfile) if use_utf8 else csv.writer(output_csvfile,
                                                        dialect=MyDialect)
        for status in commit_statuses:
            tempv = (
                repo.getName(), repo.getOwner(), sha,
                (str(status.created_at)
                 if status.created_at is not None else ''),
                (status.creator.login if status.creator is not None else ''),
                (status.description if status.description is not None else ''),
                (str(status.id) if status.id is not None else ''),
                (status.state if status.state is not None else ''),
                (str(status.updated_at) if status.updated_at is not None else
                 ''), (str(status.url) if status.url is not None else ''))
            cstatuswriter.writerow(tempv)
示例#10
0
def output_commit_stats(commit_stats, sha):
    with open('commit_stats.csv', 'ab') as output_csvfile:
        scream.log('commit_stats.csv opened for append..')
        cstatswriter = UnicodeWriter(
            output_csvfile) if use_utf8 else csv.writer(output_csvfile,
                                                        dialect=MyDialect)
        assert (type(commit_stats.additions) == int
                or commit_stats.additions is None)
        assert (type(commit_stats.deletions) == int
                or commit_stats.deletions is None)
        assert (type(commit_stats.total) == int or commit_stats.total is None)
        tempv = (
            repo.getName(),
            repo.getOwner(),
            sha,
            (str(commit_stats.additions) if commit_stats.additions is not None
             else ''),  # this is always int ! str() allowed
            (str(commit_stats.deletions) if commit_stats.deletions is not None
             else ''),  # this is always int ! str() allowed
            (str(commit_stats.total) if commit_stats.total is not None else '')
        )  # this is always int ! str() allowed
        cstatswriter.writerow(tempv)
示例#11
0
def output_commit_comments(commit_comments, sha):
    with open('commit_comments.csv', 'ab') as output_csvfile:
        scream.log('commit_comments.csv opened for append..')
        ccomentswriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect)
        for comment in commit_comments:
            assert (type(comment.id) == int or comment.id is None)
            assert (type(comment.position) == int or comment.position is None)
            assert (type(comment.line) == int or comment.line is None)
            scream.log(str(comment.commit_id))
            tempv = (repo.getName(),
                     repo.getOwner(),
                     sha,
                     (' '.join(comment.body.splitlines()) if comment.body is not None else ''),
                     (str(comment.commit_id) if comment.commit_id is not None else ''),  # logged above
                     (str(comment.created_at) if comment.created_at is not None else ''),
                     (str(comment.html_url) if comment.html_url is not None else ''),
                     (str(comment.id) if comment.id is not None else ''),  # this is always int
                     (comment.user.login if comment.user is not None else ''),
                     (str(comment.line) if comment.line is not None else ''),  # this is always int
                     (comment.path if comment.path is not None else ''),
                     (str(comment.position) if comment.position is not None else ''),  # this is always int
                     (str(comment.updated_at) if comment.updated_at is not None else ''))
            ccomentswriter.writerow(tempv)
    with open(repos_filename, 'rb') as source_csvfile:
        reposReader = csv.reader(source_csvfile, delimiter=',')
        for row in reposReader:
            key = str(row[0])
            print 'Adding ' + key + ' to list.'

            repo = MyRepository()
            repo.setKey(key)
            owner = key.split('/')[0]
            name = key.split('/')[1]
            repo.setInitials(name, owner)

            #check here if repo dont exist already in dictionary!
            if key in repos:
                scream.log('We already found rep ' + key +
                           ' in the dictionary..')
            else:
                repos[key] = repo


    iteration_step_count = 0

    with open(repos_csv_filename, 'ab') as csvfilerlw:
        rlw = csv.writer(csvfilerlw, delimiter=',',
                         quotechar='"', quoting=csv.QUOTE_MINIMAL)
        for key in repos:

            if resume_on_repo is not None:
                resume_on_repo_name = resume_on_repo.split(',')[0]
                resume_on_repo_owner = resume_on_repo.split(',')[1]
示例#13
0
    is_gc_turned_on = 'turned on' if str(gc.isenabled()) else 'turned off'
    scream.ssay('Garbage collector is ' + is_gc_turned_on)

    #TO DO: do it as a last item, it is less important
    #make_headers()

    scream.say('WORKING WITH INPUT FILE : ' + input_filename)
    scream.say('This can take a while, max aprox. 2 minutes...')
    filename_ = 'data/' if sys.platform == 'linux2' else 'data\\'
    filename__ = filename_ + input_filename
    with open(filename__, 'rb') as source_csvfile:
        reposReader = UnicodeReader(f=source_csvfile, dialect=RepoReaderDialect)
        reposReader.next()
        previous = ''
        for row in reposReader:
            scream.log('Processing row: ' + str(row))
            url = row[1]
            owner = row[0]
            name = row[2]

            key = owner + '/' + name
            scream.log('Key built: ' + key)

            repo = MyRepository()
            repo.setKey(key)
            repo.setInitials(name, owner)
            repo.setUrl(url)

            #check here if repo dont exist already in dictionary!
            if key == previous:
                scream.log('We already found rep ' + key +
    is_gc_turned_on = 'turned on' if str(gc.isenabled()) else 'turned off'
    scream.ssay('Garbage collector is ' + is_gc_turned_on)

    scream.say(
        'WORKING WITH INPUT FILE : ' +
        input_filename)  # simply 'result_stargazers_2013_final_mature.csv'
    scream.say('This can take a while, max aprox. 2 minutes...')
    filename_ = 'data/' if sys.platform == 'linux2' else 'data\\'
    filename__ = filename_ + input_filename  # remember it is in a /data subdir
    with open(filename__, 'rb') as source_csvfile:
        reposReader = UnicodeReader(f=source_csvfile,
                                    dialect=RepoReaderDialect)
        reposReader.next()
        previous = ''
        for row in reposReader:
            scream.log('Processing row: ' + str(row))
            url = row[1]
            owner = row[0]
            name = row[2]

            key = owner + '/' + name
            scream.log('Key built: ' + key)

            repo = MyRepository()
            repo.setKey(key)
            repo.setInitials(name, owner)
            repo.setUrl(url)

            #check here if repo dont exist already in dictionary!
            if key == previous:
                scream.log('We already found rep ' + key +
        no_of_threads = len(github_clients)
        scream.say('No of threads is currently: ' + str(no_of_threads))

    is_gc_turned_on = 'turned on' if str(gc.isenabled()) else 'turned off'
    scream.ssay('Garbage collector is ' + is_gc_turned_on)

    scream.say('WORKING WITH INPUT FILE : ' + input_filename)  # simply 'result_stargazers_2013_final_mature.csv'
    scream.say('This can take a while, max aprox. 2 minutes...')
    filename_ = 'data/' if sys.platform == 'linux2' else 'data\\'
    filename__ = filename_ + input_filename  # remember it is in a /data subdir
    with open(filename__, 'rb') as source_csvfile:
        reposReader = UnicodeReader(f=source_csvfile, dialect=RepoReaderDialect)
        reposReader.next()
        previous = ''
        for row in reposReader:
            scream.log('Processing row: ' + str(row))
            url = row[1]
            owner = row[0]
            name = row[2]

            key = owner + '/' + name
            scream.log('Key built: ' + key)

            repo = MyRepository()
            repo.setKey(key)
            repo.setInitials(name, owner)
            repo.setUrl(url)

            #check here if repo dont exist already in dictionary!
            if key == previous:
                scream.log('We already found rep ' + key +
示例#16
0
def execute_check():
    parser = argparse.ArgumentParser()
    parser.add_argument("-v",
                        "--verbose",
                        help="verbose messaging ? [True/False]",
                        action="store_true")
    args = parser.parse_args()
    if args.verbose:
        scream.intelliTag_verbose = True
        scream.say("verbosity turned on")

    threads = []

    # init connection to database
    first_conn = MSQL.connect(host=IP_ADDRESS,
                              port=3306,
                              user=open('mysqlu.dat', 'r').read(),
                              passwd=open('mysqlp.dat', 'r').read(),
                              db="github",
                              connect_timeout=50000000,
                              charset='utf8',
                              init_command='SET NAMES UTF8',
                              use_unicode=True)
    print 'Testing MySql connection...'
    print 'Pinging database: ' + (str(first_conn.ping(True)) if
                                  first_conn.ping(True) is not None else 'NaN')
    cursor = first_conn.cursor()
    cursor.execute(
        r'SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = "%s"'
        % 'github')
    rows = cursor.fetchall()
    print 'There are: ' + str(
        rows[0][0]) + ' table objects in the local GHtorrent copy'
    cursor.execute(
        r'SELECT table_name FROM information_schema.tables WHERE table_schema = "%s"'
        % 'github')
    rows = cursor.fetchall()
    if (u'users', ) and (u'projects', ) in rows:
        print 'All neccesary tables are there.'
    else:
        print 'Your database does not fit a typical description of a GitHub Torrent copy..'
        sys.exit(0)

    sample_tb_name = raw_input(
        "Please enter table/view name (of chosen data sample): ")
    cursor.execute(r'select count(distinct name) from ' + str(sample_tb_name) +
                   ' where ((name is not NULL) and (gender is NULL))')
    rows = cursor.fetchall()
    record_count = rows[0][0]
    cursor.close()

    scream.say(
        "Database seems to be working. Move on to getting list of users.")

    # populate list of users to memory
    cursor = first_conn.cursor()
    is_locked_tb = raw_input("Should I update [users] table instead of [" +
                             str(sample_tb_name) + "]? [y/n]: ")
    is_locked_tb = True if is_locked_tb in ['yes', 'y'] else False
    print 'Querying all names from the observations set.. This can take around 25-30 sec.'

    cursor.execute(r'select distinct name from ' + str(sample_tb_name) +
                   ' where ((name is not NULL) and (gender is NULL))')
    # if you are interested in how this table was created, you will probably need to read our paper and contact us as well
    # because we have some more tables with aggregated data compared to standard GitHub Torrent collection
    row = cursor.fetchone()
    iterator = 1.0

    min_name_length = 2
    print 'We hypothetize that minimum name length are ' \
        + str(min_name_length) + ' characters, like Ho, Sy, Lu'
    # http://www.answers.com/Q/What_is_the_shortest_name_in_the_world

    while row is not None:
        fullname = unicode(row[0])
        scream.log("\tFullname is: " + str(fullname.encode('unicode_escape')))
        iterator += 1
        print "[Progress]: " + str(
            (iterator / record_count) *
            100) + "% ----------- "  # [names] size: " + str(len(names))
        if len(fullname) < min_name_length:
            scream.log_warning(
                "--Found too short name field (" +
                str(fullname.encode('utf-8')) + ") from DB. Skipping..", True)
            row = cursor.fetchone()
            continue
        name = fullname.split()[0]
        # I find it quite uncommon to seperate name from surname with something else than a space
        # it does occur, but it's not in my interest to detect such human-generated dirty data at the moment
        scream.log("\tName is: " + str(name.encode('unicode_escape')))
        if name in names:
            if fullname in names[name]['persons']:
                scream.say(
                    "\tSuch fullname already classified! Rare, but can happen. Move on."
                )
            else:
                scream.say(
                    "\tAdding fullname to already classified name. Move on")
                names[name]['persons'].append(fullname)
        else:
            scream.say("\tNew name. Lets start classification.")
            names[name] = {'persons': list(), 'classification': None}
            names[name]['persons'].append(fullname)
            scream.say("\tStart the worker on name: " +
                       str(name.encode('utf-8')) + " as deriven from: " +
                       str(fullname.encode('utf-8')))
            # start the worker
            gg = GeneralGetter(int(iterator), name)
            scream.say('Creating instance of GeneralGetter complete')
            scream.say('Appending thread to collection of threads')
            threads.append(gg)
            scream.say('Append complete, threads[] now have size: ' +
                       str(len(threads)))
            scream.log_debug(
                'Starting thread ' + str(int(iterator) - 1) + '....', True)
            gg.start()
            while (num_working(threads) > 3):
                time.sleep(
                    0.2
                )  # sleeping for 200 ms - there are already 3 active threads..
        row = cursor.fetchone()

    cursor.close()
    print "Finished getting gender data, moving to database update..."

    for key in names.keys():
        collection = names[key]
        gender = collection['classification']
        for fullname in names[key]['persons']:
            cursor = first_conn.cursor()
            update_query = r'UPDATE {2} SET gender = {0} where name = "{1}"'.format(
                gender,
                fullname.encode('utf-8').replace('"', '\\"'),
                'users' if is_locked_tb else sample_tb_name)
            print update_query
            cursor.execute(update_query)
            cursor.close()

    first_conn.close()
示例#17
0
    scream.ssay('Garbage collector is ' + is_gc_turned_on)

    #TO DO: do it as a last item, it is less important
    #make_headers()

    scream.say('WORKING WITH INPUT FILE : ' + input_filename)
    scream.say('This can take a while, max aprox. 2 minutes...')
    filename_ = 'data/' if sys.platform == 'linux2' else 'data\\'
    filename__ = filename_ + input_filename
    with open(filename__, 'rb') as source_csvfile:
        reposReader = UnicodeReader(f=source_csvfile,
                                    dialect=RepoReaderDialect)
        reposReader.next()
        previous = ''
        for row in reposReader:
            scream.log('Processing row: ' + str(row))
            url = row[1]
            owner = row[0]
            name = row[2]

            key = owner + '/' + name
            scream.log('Key built: ' + key)

            repo = MyRepository()
            repo.setKey(key)
            repo.setInitials(name, owner)
            repo.setUrl(url)

            #check here if repo dont exist already in dictionary!
            if key == previous:
                scream.log('We already found rep ' + key +
示例#18
0
        gh = Github(credential_list[0]['login'], credential_list[0]['pass'])

    is_gc_turned_on = 'turned on' if str(gc.isenabled()) else 'turned off'
    scream.ssay('Garbage collector is ' + is_gc_turned_on)

    make_headers()

    for filename in file_names:
        scream.say('------ WORKING WITH FILE : ' + filename)
        filename_ = 'data/' if sys.platform == 'linux2' else 'data\\'
        filename__ = filename_ + filename + '.csv'
        with open(filename__, 'rb') as source_csvfile:
            reposReader = csv.reader(source_csvfile, delimiter=',')
            reposReader.next()
            for row in reposReader:
                scream.log('Processing row: ' + str(row))
                name = row[0]
                owner = row[1]

                #here eleminate repos without owner, rly
                if len(owner.strip()) < 1:
                    scream.log_warning('Skipping orphan repo: ' + name)
                    continue
                    #print 'length < 1'

                '12. Liczba Fork'
                forks = row[2]
                watchers = row[3]
                key = owner + '/' + name
                scream.log('Key built: ' + key)
示例#19
0
            usage()
            sys.exit()
        elif o in ("-d", "--delimiter"):
            add_delimiter_info = True
        elif o in ("-m", "--method"):
            method = a
        elif o in ("-s", "--sites"):
            sites = a
        elif o in ("-i", "--input"):
            geckoname = a

    makeHeaders()

    if 'goldpoll' in sites:
        if method == 'static':
            scream.log('Not supported yet! Use native or dont define @method at all')
        elif method == 'native':
            doc = html.parse(goldpoll_url)
            #print etree.tostring(doc)
            elements_c10 = doc.xpath('//table[@class="cl0"]')
            scream.ssay(len(elements_c10))

            for element in elements_c10:
                scream.ssay('')
                scream.ssay('Parsing HYIP..')
                hyip = Hyip()

                local_soup = BeautifulSoup(etree.tostring(element))
                hyip_name_tag = local_soup.find("a", {"class": "nhyip"})
                hyip_name = hyip_name_tag.string
                hyip_url = 'http://www.goldpoll.com' + hyip_name_tag['href']
示例#20
0
    is_gc_turned_on = 'turned on' if str(gc.isenabled()) else 'turned off'
    scream.ssay('Garbage collector is ' + is_gc_turned_on)

    make_headers()

    for filename in file_names:
        scream.say('------ WORKING WITH FILE : ' + filename)
        filename_ = 'data/' if sys.platform == 'linux2' else 'data\\'
        filename__ = filename_ + filename + '.csv'
        with open(filename__, 'rb') as source_csvfile:
            reposReader = csv.reader(source_csvfile,
                                     delimiter=',')
            reposReader.next()
            for row in reposReader:
                scream.log('Processing row: ' + str(row))
                name = row[0]
                owner = row[1]

                #here eleminate repos without owner, rly
                if len(owner.strip()) < 1:
                    scream.log_warning('Skipping orphan repo: ' + name)
                    continue
                    #print 'length < 1'

                '12. Liczba Fork'
                forks = row[2]
                watchers = row[3]
                key = owner + '/' + name
                scream.log('Key built: ' + key)