Пример #1
0
def main():
    cli_args = Config.parse_arguments()

    # A config file must be provided, or else nothing will work.
    if not hasattr(cli_args, 'config_file') or not cli_args.config_file:
        Log.error("A config file must be specified!")
        return
    Config.parse_config(cli_args.config_file)

    Log.config()

    Log.info("Started. Creating database")
    DB.create_db()

    db_session = DB.create_session()

    miner = RepositoryMiner(
        Config.repository_path,
        db_session=db_session,
        branch=Config.repository_branch
    )
    repository = miner.repository_orm

    IssueScanner.assign_issue_tracking(
        repository,
        Config.issue_tracking_system,
        Config.issue_tracking_url,
        Config.issue_tracking_username,
        Config.issue_tracking_password, db_session=db_session)

    IssueScanner.scan_for_repository(repository)
    db_session.close()
Пример #2
0
	def root():

		# if database_exists( engine.url) == False:
		# 	"""if specified db doesn't exist, create and run function to populate"""
		DB.drop_all()
		DB.create_all()
		fillSongDB()

		# < Alternative methods to get track id? > (depends on backend, who has been MIA since Tuesday..)
		# lines = request.values[ 'track_id']
		# lines = request.args.get( 'seed', 
		# 						default= '5xTtaWoae3wi06K5WfVUUH',	# Haters gonna hate, hate, hate, hate, hate
		# 						type= 'str')

		# """ get input from front-end/json and save it to User table in db """
		# lines = request.get_json( force= True)
		# for line in lines:
		# 	User.track_id = lines[ 'track_id']
		# 	assert isinstance( User.track_id, str)
		# 	DB.session.add( User.track_id)
		# DB.commit()


		export = suggestSong( parseInput())


		return APP.response_class( 
			response= json.dumps( export),
			status= 200,
			mimetype= 'application/json'
		)
Пример #3
0
def refresh():
    """Pull fresh data from Open AQ and replace existing data."""
    DB.drop_all()
    DB.create_all()
    # TODO Get data from OpenAQ, make Record objects with it, and add to db
    aq_data.add_aq_to_db()
    DB.session.commit()
    return 'Data refreshed!'
Пример #4
0
 def setUp(self):
     DB.drop_all()
     DB.create_all()
     collect = Collection('1', 'National Museum of Art',
                          'http://www.nga.gov', 'North America',
                          'Institution')
     DB.session.add(collect)
     DB.session.commit()
Пример #5
0
def makePickleSerialization():
    mydb = DB()
    listAuthors = mydb.get_authors()
    listBooks = mydb.get_books()

    listForSerialize = {'books': listBooks, 'authors': listAuthors}
    with open('files/data.pickle', 'wb') as f:
        pickle.dump(listForSerialize, f)
Пример #6
0
 def setUp(self):
     DB.drop_all()
     DB.create_all()
     artist1 = Artist(
         '1', 'Andy Warhol', '1900', 'Male', 'American',
         'http://a5.files.biography.com/image/upload/c_fit,cs_srgb,dpr_1.0,h_1200,q_80,w_1200/MTE5NDg0MDU1MTYxNzY3NDM5.jpg'
     )
     DB.session.add(artist1)
     DB.session.commit()
Пример #7
0
 def setUp(self):
     DB.drop_all()
     DB.create_all()
     style = Style(
         '1', 'Pop', 'cool',
         'http://a5.files.biography.com/image/upload/c_fit,cs_srgb,dpr_1.0,h_1200,q_80,w_1200/MTE5NDg0MDU1MTYxNzY3NDM5.jpg'
     )
     DB.session.add(style)
     DB.session.commit()
Пример #8
0
 def setUp(self):
     DB.drop_all()
     DB.create_all()
     artwork = Artwork(
         '1', 'Statue of David', 'statue', 'Statue', '1000',
         'http://a5.files.biography.com/image/upload/c_fit,cs_srgb,dpr_1.0,h_1200,q_80,w_1200/MTE5NDg0MDU1MTYxNzY3NDM5.jpg'
     )
     DB.session.add(artwork)
     DB.session.commit()
Пример #9
0
def makeJSONSerialize():
    mydb = DB()
    listAuthors = mydb.get_authors()
    listBooks = mydb.get_books()

    listForSerialize = {'books': listBooks, 'authors': listAuthors}

    with open('files/basic.json', mode='w', encoding='utf-8') as f:
        json.dump(listForSerialize, f)
Пример #10
0
def generate_spam_matrix(report_threashold):
    """
    put all known spam vectors in a matrix
    """
    db = DB.load()
    word_vectors = [(word, rm.vector)
                    for word, rm in db.reported_messages.items()
                    if rm.reports >= report_threashold]
    return generate_matrix(word_vectors)
Пример #11
0
    def init_db_sessions(self, db_session=None):
        """ Init DB session. When treahding is activated it creates one db session per thread

        Args:
            db_session: Optional if not specified it will create a new one
        """
        if db_session == None:
            self.db_session = DB.create_session()
        else:
            self.db_session = db_session
Пример #12
0
def check(user, passwd):
    try:
        i = imaplib.IMAP4_SSL("imap.gmail.com")
        i.login(user, passwd)
    except imaplib.IMAP4.error as ex:  ## kannski grípa utar
        log(str(ex))
        sys.exit(1)

    i.select()
    status, response = i.search(None, 'ALL')
    mailids = [int(a) for a in response[0].split()]
    my_mailid = DB.get().max_mailid()
    new_mailids = [a + 1 for a in range(my_mailid, max(mailids))]

    for mailid in new_mailids:
        DB.get().add_mailid(mailid)
        f = i.fetch(mailid, '(RFC822)')
        mail = f[1][0][1]
        info = f[1][0][0]
        try:
            b = Bankmail(mail)
            # .new() calls .save() :(
            Deposit.new(b.username, b.amount, "Wire", deposit=True, vsk=25.5)
            logger.email(
                "Username: {username}\nAmount: {amount}".format(**b.__dict__))
        except NotBankmail as notb:
            log("Skipping {0}: {1}".format(mailid, str(notb)))
            pass
        except ValueError as ve:
            logger.email("Skipping {0}: {1}".format(mailid, ve))
        except AttributeError:
            logger.email(
                "User {username} not found. Amount: {amount}. Parser: {bank}".
                format(**b.__dict__))
        except Exception as e:
            logger.email("Uncaught exception: " + str(e))
            sys.exit(1)

    i.close()
    i.logout()
Пример #13
0
def check(user, passwd):
    try:
        i = imaplib.IMAP4_SSL("imap.gmail.com")
        i.login(user, passwd)
    except imaplib.IMAP4.error as ex: ## kannski grípa utar
        log(str(ex))
        sys.exit(1)

    i.select()
    status, response = i.search(None, 'ALL')
    mailids = [int(a) for a in response[0].split()]
    my_mailid = DB.get().max_mailid()
    new_mailids = [a+1 for a in range(my_mailid, max(mailids))]

    for mailid in new_mailids:
        DB.get().add_mailid(mailid)
        f = i.fetch(mailid, '(RFC822)')
        mail = f[1][0][1]
        info = f[1][0][0]
        try:
            b = Bankmail(mail)
            # .new() calls .save() :(
            Deposit.new(b.username, b.amount, "Wire", deposit=True, vsk=25.5)
            logger.email("Username: {username}\nAmount: {amount}".format(**b.__dict__))
        except NotBankmail as notb:
            log("Skipping {0}: {1}".format(mailid, str(notb)))
            pass
        except ValueError as ve:
            logger.email("Skipping {0}: {1}".format(mailid, ve))
        except AttributeError:
            logger.email("User {username} not found. Amount: {amount}. Parser: {bank}".format(**b.__dict__))
        except Exception as e:
            logger.email("Uncaught exception: " + str(e))
            sys.exit(1)

    i.close()
    i.logout()
Пример #14
0
def scan_for_repository(repository):
    """ Scans the issue tracking of a repository in the DB and assigns issues to commits.

    Iterates through all recorded commits of this repository, checks their commit message for issue references,
    trys to retrieve those issues from the associated issue tracking system and saves them in the DB.

    Args:
        repository (Repository): The the repository to scan.
    """
    assert isinstance(repository, Repository)

    reset_issue_cache()

    # get issue tracking object
    Log.info("Retrieving IssueTracking for Repository " + repository.name + " with id " + str(repository.id))
    db_session = DB.create_session()
    query = db_session.query(IssueTracking).filter(IssueTracking.repository == repository)
    try:
        issue_tracking = query.one()
    except NoResultFound:
        Log.error("No IssueTracking-Entry found for Repository " + repository.name + " with id " + str(repository.id))
        db_session.close()
        return
    Log.debug("IssueTracking found. Type: " + str(issue_tracking.type))

    if issue_tracking.type == TYPE_GITHUB:
        retrieve = GitHub.retrieve
        extract_pattern = '#[0-9]+'
        transform = lambda x: x[1:]
    elif issue_tracking.type == TYPE_JIRA:
        retrieve = Jira.retrieve
        extract_pattern = Config.issue_scanner_issue_id_regex
        if not extract_pattern:
            extract_pattern = '[A-Z][A-Z]+-[0-9]*'  # default extract pattern, not really good
        transform = None
    else:
        Log.error("No Implementation found for IssueTracking-Type '" + str(issue_tracking.type) + "'")
        db_session.close()
        return

    repository = issue_tracking.repository
    for commit in repository.commits:
        issue_ids = extract_issue_ids(commit.message, extract_pattern, transform=transform)
        for issue_id in issue_ids:
            process_issue(issue_tracking, commit, issue_id, retrieve, db_session)

    Log.info("Issue Analysis completed")
    db_session.close()
    reset_issue_cache()
Пример #15
0
    def __init__(self):
        """ Initialization func"""

        self.view = View()
        self.mydb = DB()
        self.choices = {"1": self.show_books,
                        "2": self.show_authors,
                        "3": self.add_book,
                        "4": self.add_author,
                        "5": self.add_genre,
                        "6": self.delete_book_by_name,
                        "7": self.delete_book_by_id,
                        "8": self.find_books,
                        "9": self.find_author,
                        "10": self.quit}
Пример #16
0
 def __init__(self, handlers):
     settings = dict(
         template_path=ROOT_JOIN('templates'),
         static_path=ROOT_JOIN('static'),
         debug=True,
         cookie_secret=Env.COOKIE_SEC,
         admin_user=Env.ADMIN_USER,
         default_handler_class=PyHub404,
         default_avatar=Env.AVATAR,
     )
     settings.update({'X-Spider-Key': Env.POST_KEY})
     super(BaseApplication, self).__init__(handlers=handlers, **settings)
     self.db = DB(Env)
     self.github = GitHub(Env.GITHUB_ID, Env.GITHUB_SEC,
                          Env.GITHUB_REDIRECT)
Пример #17
0
def report_spam():
    """if spam message already exists or is close to a known message add a report count. else add as new entry in db."""
    data = request.get_json()
    reported_message = data['message']
    vector = message_to_vector(reported_message)

    similar_msg, dist = closest_spam(vector, 0)

    db = DB.load()
    if dist > EPSILON:
        db.reported_messages[similar_msg].reports += 1
    else:
        db.add_new_message(reported_message, normalize_vector(vector).tolist())

    db.save()
    return jsonify({})
Пример #18
0
def assign_issue_tracking(repository, issue_tracking_type, url, username=None, password=None, db_session=None):
    """ Assigns

    Args:
        repository (Repository): The repository (ORM-Object) to assign the issue tracking to.
        issue_tracking_type (str): The issue tracking system type. Use one of the TYPE_X constants from IssueTracking.
        url (str): The url for the issue tracking API.
        username (str): Optional. The username for authentication.
        password (str): Optional. The password for authentication.
        db_session (Session): Optional. The db session to use. If not provided, a new one will be created.
    """
    assert isinstance(repository, Repository)

    close_db_session = False
    if not db_session:
        db_session = DB.create_session()
        close_db_session = True

    if repository.issueTracking is not None:
        Log.info("Repository " + repository.name + " with id " + str(
            repository.id) + " already has an issue tracker assigned")

        repository.issueTracking.type = issue_tracking_type
        repository.issueTracking.url = url
        repository.issueTracking.username = username
        repository.issueTracking.password = password
        db_session.commit()
    else:
        Log.info(
            "Creating new " + issue_tracking_type + " IssueTracking for Repository " + repository.name +
            " with id " + str(repository.id))
        issue_tracking = IssueTracking(
            repository=repository,
            type=issue_tracking_type,
            url=url,
            username=username,
            password=password
        )
        db_session.add(issue_tracking)

        repository.issueTracking = issue_tracking
        db_session.commit()

    if close_db_session:
        db_session.close()
Пример #19
0
class Controller:
    """The controller """

    def __init__(self):
        """ Initialization func"""

        self.view = View()
        self.mydb = DB()
        self.choices = {"1": self.show_books,
                        "2": self.show_authors,
                        "3": self.add_book,
                        "4": self.add_author,
                        "5": self.add_genre,
                        "6": self.delete_book_by_name,
                        "7": self.delete_book_by_id,
                        "8": self.find_books,
                        "9": self.find_author,
                        "10": self.quit}

    def run(self):
        """func run show menu on a display.
            This func call func display_menu from class view in view.py
           if user enter incorrect number func give error"""

        while True:
            self.view.display_menu()
            choice = input("Enter an option: ")
            action = self.choices.get(choice)
            if action:
                action()
            else:
                print("{0} is not a valid choice".format(choice))

    def show_books(self):
        """func show_books get list of books.
        This func call func get_books from class my.db in model.py.
         Variable i goes across row in table books"""

        for i in self.mydb.get_books():
            self.view.print_smth("Book name: %s" % i["name"])
            self.view.print_smth("Author: %s %s" % (i["fname"], i["lname"]))
            self.view.print_smth("Genre: %s" % i["g.name"])
            self.view.print_smth("--------------------------------")

    def show_authors(self):
        """func show_authors get list of authors.
        This func call func get_authors from class mydb in model.py.
         Variable i goes across row in table author"""

        for i in self.mydb.get_authors():
            self.view.print_smth("Author: %s %s" % (i["FNAME"], i["LNAME"]))

    def add_book(self):
        """func add_book add book in table book.
        vars books_name and authors_id input by user.
        This func call func add_book from class mydb in model.py"""

        books_name = input('Enter books name: ')
        authors_id = input('Authors id: ')
        genre_id = input('Genre id: ')
        self.mydb.add_book(books_name, int(authors_id), int(genre_id))

    def add_author(self):
        """func add_author add author in table author.
         vars authors_name, authors lastname and authors_age input by user.
        This func call func add_author from class mydb in model.py"""

        authors_name = input('Enter authors name: ')
        authors_lastname = input('Enter authors last name: ')
        # authors_age = input('Enter authors age: ')
        self.mydb.add_author(authors_name, authors_lastname)

    def add_genre(self):
        """func add genre in table genre. vars genre name input by user.
        This func call func add_book from class mydb in model.py"""

        genres_name = input('Enter a new genre: ')
        self.mydb.add_genre(genres_name)

    def delete_book_by_name(self):
        """func delete book in table books by name."""
        books_name = input('Enter books name: ')
        self.mydb.delete_book_by_name(books_name)

    def delete_book_by_id(self):
        """func delete book in table books my Id"""
        books_id = input('Enter books ID: ')
        self.mydb.delete_book_by_id(int(books_id))

    def find_books(self):
        """func searching book by part of its name"""
        books_name = input('Search: ')
        self.view.print_smth(self.mydb.find_books(books_name))

    def find_author(self):
        """ffunc searching book by part of his name"""
        authors_name = input('Search: ')
        self.view.print_smth(self.mydb.find_author(authors_name))

    #   исправьте здесь методы

    # def serializePickle(self):
    #     pickles.makePickleSerialization()
    #
    # def deserializePickle(self):
    #     lista, listb = pickles.getPickleDeserialize()
    #     print(lista)
    #     print(listb)
    #
    # def serializeJSON(self):
    #     jsons.makeJSONSerialize()
    #
    # def deserializeJSON(self):
    #     lista, listb = jsons.getJSONDeserialize()
    #     print(lista)
    #     print(listb)

    @staticmethod
    def quit():
        """func quit exit from program"""

        print("Bye")
        quit()
Пример #20
0
    def __process_commit(self, commit, previous_commit, project_size, project_file_count, db_session=None):
        """Process a single commit.

        Args:
            commit: Actual commit
            previous_commit: Previous commit for creating differences
            project_size: Actual size of the project
            project_file_count: Acutal filecount of the project
            db_session: db session...
        Returns: commit_orm object

        """
        db_session_local = None
        if not db_session:
            db_session_local = True
            db_session = DB.create_session()

        added_files_thread = None
        changed_files_thread = None
        deleted_files_thread = None

        manipulated_files = self.__get_changed_files(commit, previous_commit)

        added_files = manipulated_files['added_files']
        added_files_count = len(added_files)
        deleted_files = manipulated_files['deleted_files']
        deleted_files_count = len(deleted_files)
        changed_files = manipulated_files['changed_files']
        changed_files_count = len(changed_files)
        renamed_files = manipulated_files['renamed_files']
        renamed_files_count = len(renamed_files)
        files_diff = manipulated_files['files_diff']

        new_project_file_count = project_file_count + added_files_count - deleted_files_count

        commit_time = datetime.datetime.utcfromtimestamp(commit.committed_date)
        commit_id = str(commit)

        commit_orm = self.__create_new_commit(db_session, commit_id, self.repository_id, commit.message,
                                              commit.author.email,
                                              commit_time, 0, 0, 0, 0, project_size, new_project_file_count)

        # no files were changed at all / very unlikley
        if (not added_files) and (not deleted_files) and (not changed_files) and (not renamed_files) and (
                not renamed_files):
            return commit_orm

        if added_files:
            for file in added_files:
                programming_language = self.__get_programming_langunage(file.path)

                file_orm = self.__create_new_file(db_session, self.repository_id,
                                                  programming_language)

                created_version = self.__create_new_version(db_session, file_orm.id, commit_id, 0, 0, 0, file.path)

                # skip this file because language is not interessting for us
                if not programming_language:
                    added_files_count -= 1
                    continue

                self.__process_file_diff(db_session, commit_id, file, files_diff, created_version)

        if deleted_files:
            for file in deleted_files:
                programming_language = self.__get_programming_langunage(file.path)
                if not programming_language:
                    deleted_files_count -= 1

                try:
                    version_orm = self.__process_deleted_or_changed_file(db_session, commit_id, file,
                                                                         programming_language,
                                                                         files_diff)
                    version_orm.deleted = True
                    version_orm.file_size = 0
                except ValueError as e:
                    Log.warning("Warning processing commit: " + str(commit_id) + ". File affected: " + str(
                        file.path) + " Reason: " + str(e))

        if changed_files:
            for file in changed_files:
                programming_language = self.__get_programming_langunage(file.path)
                if not programming_language:
                    changed_files_count -= 1

                try:
                    self.__process_deleted_or_changed_file(db_session, commit_id, file, programming_language,
                                                           files_diff)
                except ValueError as e:
                    Log.warning("Warning processing commit: " + str(commit_id) + ". File affected: " + str(
                        file.path) + " Reason: " + str(e))

        # for renamed files just create a new one and link to the old one
        if renamed_files:
            for file in renamed_files:
                old_file = file['old_file']
                new_file = file['new_file']

                old_version_orm = db_session.query(Commit, Version).filter(Commit.id == Version.commit_id,
                                                                           Version.path == str(old_file.path),
                                                                           Commit.repository_id == str(
                                                                               self.repository_id)).order_by(
                    desc(Commit.timestamp)).first()

                programming_language = self.__get_programming_langunage(new_file.path)

                if not old_version_orm:
                    Log.warning("Could not process commit " + str(
                        commit_id) + ". Could not process rename because old file was not found. Old file: " + str(
                        old_file.path) + " new file: " + str(new_file.path))
                    file_orm = self.__create_new_file(db_session, self.repository_id,
                                                      programming_language)
                    old_version_orm = self.__create_new_version(db_session, file_orm.id, commit_id, 0, 0, 0,
                                                                new_file.path)
                    version_orm = old_version_orm
                else:
                    old_version_orm = old_version_orm.Version
                    version_orm = self.__create_new_version(db_session, old_version_orm.file_id, commit_id, 0, 0, 0,
                                                        new_file.path)

                # skip this file because language is not interessting for us
                if not programming_language:
                    renamed_files_count -= 1
                    continue

                version_orm.file_size = old_version_orm.file_size
                self.__process_file_diff(db_session, commit_id, new_file, files_diff, version_orm)

        commit_orm.added_files_count = added_files_count
        commit_orm.deleted_files_count = deleted_files_count
        commit_orm.changed_files_count = changed_files_count
        commit_orm.renamed_files_count = renamed_files_count

        if added_files_thread:
            added_files_thread.join()
        if changed_files_thread:
            changed_files_thread.join()
        if deleted_files_thread:
            deleted_files_thread.join()

        if db_session_local:
            db_session.close()

        return commit_orm
Пример #21
0
def get_dataset_from_db(repository,
                        start,
                        end,
                        feature_list,
                        target_id,
                        ngram_sizes=None,
                        ngram_levels=None,
                        label="",
                        eager_load=False,
                        sparse=False):
    """ Reads a dataset from a repository in a specific time range

    Args:
        repository (Repository): The repository to query. Can also be its name as a string
        start (datetime): The start range
        end (datetime): The end range
        feature_list (list[str]): A list of the feature-IDs to be read into the dataset.
        target_id (str): The ID of the target. Use a TARGET_X constant from UpcomingBugsForVersion
        ngram_sizes (list[int]): Optional. The ngram-sizes to be loaded in the set (e.g. [1, 2] for 1-grams and 2-grams)
        ngram_levels (list[int]): Optional. The ngram-levels to be loaded in the dataset.
        label (str): The label to be assigned to the dataset.
        eager_load (bool): If true, all data will be loaded eagerly. This reduces database calls, but uses a lot of RAM.
        sparse (bool): If the data and target matrices should be sparse. Recommended in combination with ngrams.

    Returns:
        Dataset: The populated dataset.
    """
    if ngram_sizes and type(ngram_sizes) != list:
        ngram_sizes = [ngram_sizes]
    if ngram_levels and type(ngram_levels) != list:
        ngram_sizes = [ngram_levels]
    use_ngrams = True if ngram_sizes and ngram_levels else False

    session = DB.create_session()

    if type(repository) is str:
        repository_name = repository
        repository = get_repository_by_name(session, repository_name)
        if repository is None:
            logging.error(
                "Repository with name %s not found! Returning no Dataset" %
                repository_name)
            return None

    commits = get_commits_in_range(session,
                                   repository,
                                   start,
                                   end,
                                   eager_load_ngrams=use_ngrams and eager_load,
                                   eager_load_features=eager_load)
    if commits is None:
        logging.error("Could not retrieve commits! Returning no Dataset")
        return None
    logging.debug("Commits received.")

    if len(commits) == 0:
        logging.error("No Commits found!")
        return None

    versions = []
    for commit in commits:
        versions += commit.versions
    logging.debug("%i commits with %i versions found." %
                  (len(commits), len(versions)))

    feature_count = len(feature_list)
    logging.debug("%i features found." % feature_count)

    ngram_count = 0
    if use_ngrams:
        ngrams = get_ngram_vector_list(versions[0], ngram_sizes, ngram_levels)
        ngram_count = sum([ngram.vector_size for ngram in ngrams])
        logging.debug(
            "Ngram sizes %s and levels %s amount to %i total ngrams." %
            (str(ngram_sizes), str(ngram_levels), ngram_count))

    dataset = Dataset(feature_count + ngram_count,
                      len(versions),
                      feature_list,
                      target_id,
                      start,
                      end,
                      ngram_sizes,
                      ngram_levels,
                      label,
                      sparse=sparse,
                      dok=True)
    i = 0
    for version in versions:
        if len(version.upcoming_bugs) == 0:
            raise Exception(
                "Version %s has no upcoming_bugs entry. Can't retrieve target!"
                % version.id)
        target = version.upcoming_bugs[0].get_target(target_id)
        if target is None:
            raise Exception(
                "Upcoming_bugs entry of Version %s has no target %s!" %
                (version.id, target))
        dataset.target[i] = target

        j = 0
        for feature_value in version.feature_values:
            if feature_value.feature_id in feature_list:
                if not sparse or feature_value.value != 0:
                    dataset.data[i, j] = feature_value.value
                j += 1
        if use_ngrams:
            for ngram_vector in get_ngram_vector_list(version, ngram_sizes,
                                                      ngram_levels):
                for ngram_value in ngram_vector.ngram_values.split(','):
                    ngram_value = int(ngram_value)
                    if not sparse or ngram_value != 0:
                        dataset.data[i, j] = ngram_value
                    j += 1

        if i % 100 == 0:
            logging.info("{0:.2f}% of versions processed.".format(
                i / len(versions) * 100))

        i += 1
    logging.info("All versions processed.")

    if sparse:
        dataset.to_csr()

    session.close()
    return dataset
Пример #22
0
def reset():
    DB.drop_all()
    DB.create_all()
    return render_template('base.html', title='Reset Database')
Пример #23
0
#### set FLASK_APP=appPart2.py
###flask run

from flask import Flask, render_template, request
from model import DB, User
from twitter import insert_example_users
from os import getenv

app = Flask(__name__)

app.config['DATABASE_URL'] = 'sqlite:///db.sqlite3'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False

DB.init_app(app)


@app.route('/')
def root():
    return render_template('base.html', title="Home", users=User.query.all())


@app.route('/compare')
def compare():
    user0, user1 = sorted([request.values['user0'], request.values['user1']])

    if user0 == user1:
        message = 'Cannot compare a user to themselves'
    else:
        prediction = predict_user(user0, user1, request.values['tweet_text'])
        message = "{} is more likely to be said by {} than {}".format(
            request.values['tweet_text'], user1 if prediction else user0,
Пример #24
0
 def tearDown(self):
     """
     Test deletion
     """
     DB.session.remove()
     DB.drop_all()
Пример #25
0
import glob

from model import DB
from license_plate_detector import detect_license_plate
import os

license_number = 1234
db = DB()

images = r'images'
cropped_imgs = r'cropped'
path_to_directory = r'C:\Users\USER\PycharmProjects\hackathon2_flask'
imgs_path = os.path.join(path_to_directory, images)
cropped_imgs_path = os.path.join(path_to_directory, cropped_imgs)


def main_function():
    global license_number
    imgs_names = []
    for crp in os.listdir(cropped_imgs_path):
        imgs_names.append(crp)
    for img_name in imgs_names:
        # license_number = find_license_plate_number(os.path.join(cropped_imgs_path, img_name))
        license_number += 200
        _, date, time, area, _ = img_name.split('_')
        date = date.replace('-', '/')
        time = time.replace('-', ':')

        db.insert_report(license_number, date, time,
                         os.path.join(imgs_path, img_name), area)
Пример #26
0
#!/usr/bin/env python3
'''
Building the default database and populating it.
'''
import random
from model import DB, Donor, Donation

# pylint: disable = C0103

DB.connect()

# This line will allow you "upgrade" an existing database by
# dropping all existing tables from it.
DB.drop_tables([Donor, Donation])

DB.create_tables([Donor, Donation])

alice = Donor(name="Alice")
alice.save()

bob = Donor(name="Bob")
bob.save()

charlie = Donor(name="Charlie")
charlie.save()

donors = [alice, bob, charlie]

for x in range(30):
    Donation(donor=random.choice(donors), value=random.randint(100,
                                                               10000)).save()
Пример #27
0
"""
	Flask app for receiving spotify track IDs and returning suggested songs
	based on acoustic similarities
"""

import json
import pickle
from flask import Flask, request, render_template
from model import DB, Song, User
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists

APP = Flask( __name__)
APP.config[ 'SQLALCHEMY_DATABASE_URI'] = 'sqlite:///spotify.db'
APP.config[ 'SQLALCHEMY_TRACK_MODIFICATIONS'] = False
DB.init_app( APP)

df_fileName = 'spotify2019.csv'
# engine = create_engine( 'sqlite:///spotify.db')
engine = create_engine( 'SQLALCHEMY_DATABASE_URI')
curse = engine.connect()


def fillSongDB():
	"""
	Fill db's Song table with given CSV
		(Does not need to execute every time app is run?)
	"""
	df = pd.read_csv( df_fileName)
	df.to_sql( con= engine, index_label= 'id', 
			   name= Song.__tablename__, if_exists= 'replace')
Пример #28
0
 def __init__(self):
     self.db = DB()
     if not os.path.exists(db_file):
         os.mkdir(db_file)
Пример #29
0
import loaders
import sys

from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *

from sqlalchemy.sql import or_

from model import Author, Book, DB
from dialogs import AuthDialog, AddAuthorDialog, AddBookDialog

from ui import Ui_ViewInfo

db = DB('library.db')


class MyForm(QMainWindow):

    db = db

    def __init__(self, parent=None):
        QWidget.__init__(self, parent)
        self.ui = Ui_ViewInfo()
        self.ui.setupUi(self)
        self.reload()
        self.nameFilters = {'JSON (*.json)': loaders.json_loader,
                            'XML (*.xml)': loaders.xml_loader}

    @staticmethod
    def _append_items(model, items):
Пример #30
0
from model import DB
from datetime import datetime
import csv

mydb = DB()

arr = []

with open('customer.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print('header')
        else:
            print(row[-1])
            row[-1] = datetime.strptime(row[-1], '%d/%m/%Y %H:%M')
            arr.append(row)
        line_count += 1
        print(f'Processed {line_count} lines.')
mydb.insert_data(arr[1:])
Пример #31
0
def main():
    cli_args = parse_arguments()
    try:
        Config.read_config(cli_args.config_file)
    except ConfigError:
        die("Config File %s could not be read correctly! " %
            cli_args.config_file)
    init_logging()
    logging.info("Starting ML Pipeline!")
    logging.info("Initializing Database")
    try:
        DB.init_db()
    except DBError:
        die("DB Model could not be created!")

    logging.info("Reading training dataset")
    train_dataset = Dataset.get_dataset(
        Config.repository_name,
        Config.dataset_train_start,
        Config.dataset_train_end,
        Config.dataset_features,
        Config.dataset_target,
        ngram_sizes=Config.dataset_ngram_sizes,
        ngram_levels=Config.dataset_ngram_levels,
        label="Training",
        cache=Config.dataset_cache,
        eager_load=Config.database_eager_load,
        sparse=Config.dataset_sparse)
    if train_dataset is None:
        die("Training Dataset could not be created!")
    if Config.ml_log_transform_target:
        train_dataset.target = LogTransform.log_transform(
            train_dataset.target, base=Config.ml_log_transform_base)

    logging.info("Reading test dataset")
    test_dataset = Dataset.get_dataset(
        Config.repository_name,
        Config.dataset_test_start,
        Config.dataset_test_end,
        Config.dataset_features,
        Config.dataset_target,
        ngram_sizes=Config.dataset_ngram_sizes,
        ngram_levels=Config.dataset_ngram_levels,
        label="Test",
        cache=Config.dataset_cache,
        eager_load=Config.database_eager_load,
        sparse=Config.dataset_sparse)
    if test_dataset is None:
        die("Test Dataset could not be created!")
    if Config.ml_log_transform_target:
        test_dataset.target = LogTransform.log_transform(
            test_dataset.target, base=Config.ml_log_transform_base)

    logging.info("Creating and training model with training dataset")
    model = Model.create_model(Config.ml_model,
                               feature_scaling=Config.ml_feature_scaling,
                               polynomial_degree=Config.ml_polynomial_degree,
                               cross_validation=Config.ml_cross_validation,
                               alpha=Config.ml_alpha,
                               C=Config.ml_C,
                               kernel=Config.ml_kernel,
                               svr_degree=Config.ml_svr_degree,
                               svr_epsilon=Config.ml_svr_epsilon,
                               svr_gamma=Config.ml_svr_gamma,
                               svr_coef0=Config.ml_svr_coef0,
                               sparse=Config.dataset_sparse)

    Model.train_model(model, train_dataset)

    logging.info("Model successfully trained.")

    logging.debug("Creating predictions...")
    baseline_mean_prediction = Predict.predict_mean(
        train_dataset, test_dataset.target.shape[0])
    baseline_med_prediction = Predict.predict_median(
        train_dataset, test_dataset.target.shape[0])
    baseline_wr_prediction = Predict.predict_weighted_random(
        train_dataset, test_dataset.target.shape[0])
    training_prediction = Predict.predict_with_model(train_dataset, model)
    test_prediction = Predict.predict_with_model(test_dataset, model)

    logging.debug("Creating reports from predictions")

    train_target = train_dataset.target
    test_target = test_dataset.target
    if Config.ml_log_transform_target:
        train_target = LogTransform.exp_transform(train_target,
                                                  Config.ml_log_transform_base)
        training_prediction = LogTransform.exp_transform(
            training_prediction, Config.ml_log_transform_base)
        test_target = LogTransform.exp_transform(test_target,
                                                 Config.ml_log_transform_base)
        test_prediction = LogTransform.exp_transform(
            test_prediction, Config.ml_log_transform_base)
        baseline_mean_prediction = LogTransform.exp_transform(
            baseline_mean_prediction, Config.ml_log_transform_base)
        baseline_med_prediction = LogTransform.exp_transform(
            baseline_med_prediction, Config.ml_log_transform_base)
        baseline_wr_prediction = LogTransform.exp_transform(
            baseline_wr_prediction, Config.ml_log_transform_base)

    baseline_mean_report = Reporting.Report(test_target,
                                            baseline_mean_prediction,
                                            "Mean Baseline")
    baseline_med_report = Reporting.Report(test_target,
                                           baseline_med_prediction,
                                           "Median Baseline")
    baseline_wr_report = Reporting.Report(test_target, baseline_wr_prediction,
                                          "Weighted Random Baseline")
    training_report = Reporting.Report(train_target, training_prediction,
                                       "Training")
    test_report = Reporting.Report(test_target, test_prediction, "Test")

    base_entry = Scoreboard.create_entry_from_config(baseline_wr_report)
    test_entry = Scoreboard.create_entry_from_config(test_report)
    Scoreboard.add_entry(base_entry)
    Scoreboard.add_entry(test_entry)
    Scoreboard.write_entries()
    base_ranking = Scoreboard.get_ranking(base_entry,
                                          Scoreboard.RATING_ATTRIBUTE_R2S)
    test_ranking = Scoreboard.get_ranking(test_entry,
                                          Scoreboard.RATING_ATTRIBUTE_R2S)

    if Config.reporting_display or Config.reporting_save:
        config_table = Reporting.get_config_table()
        add_to_report(config_table.table)

        add_to_report(baseline_mean_report)
        add_to_report(baseline_med_report)
        add_to_report(baseline_wr_report)
        add_to_report(training_report)
        add_to_report(test_report)

        comparisation_table = Reporting.get_report_comparisation_table(
            [baseline_wr_report, training_report, test_report],
            [Reporting.SCORE_R2S, Reporting.SCORE_MAE, Reporting.SCORE_MDE])
        add_to_report(comparisation_table.table)

        category_table = Reporting.get_category_table(
            train_target, training_prediction, label="Training prediction")
        add_to_report(category_table.table)

        category_table = Reporting.get_category_table(test_target,
                                                      test_prediction,
                                                      label="Test prediction")
        add_to_report(category_table.table)

        confusion_matrix_table, classification_report = Reporting.get_confusion_matrix(
            train_target, training_prediction, label="Training prediction")
        add_to_report(confusion_matrix_table.table)
        add_to_report(classification_report)
        confusion_matrix_table, classification_report = Reporting.get_confusion_matrix(
            test_target, test_prediction, label="Test prediction")
        add_to_report(confusion_matrix_table.table)
        add_to_report(classification_report)

        if Config.ml_polynomial_degree == 1:
            # Determining top features only makes sense without polynomial features.
            top_features_table = Reporting.get_top_features_table(
                model, train_dataset.feature_list, 10)
            if top_features_table is not None:
                add_to_report(top_features_table.table)

        add_to_report("Base ranking: %i" % base_ranking)
        add_to_report("Test ranking: %i" % test_ranking)
        if test_ranking == 0:
            add_to_report("Congratulations! Best one so far!")
        elif base_ranking > test_ranking:
            add_to_report("Hey, at least better than the baseline!")
        else:
            add_to_report("Do you even learn?")

        if Config.reporting_display:
            print(report_str)

        if Config.reporting_save:
            Reporting.save_report_file(report_str,
                                       filename=Config.reporting_file)

        if Config.reporting_target_histogram:
            Reporting.plot_target_histogram(
                train_dataset,
                display=Config.reporting_display_charts,
                save=Config.reporting_save_charts,
            )

        if Config.reporting_validation_curve and Config.ml_cross_validation:
            Reporting.plot_validation_curve(
                model_type=Config.ml_model,
                train_dataset=train_dataset,
                alpha=Config.ml_alpha,
                C=Config.ml_C,
                feature_scaling=Config.ml_feature_scaling,
                polynomial_degree=Config.ml_polynomial_degree,
                kernel=Config.ml_kernel,
                svr_degree=Config.ml_svr_degree,
                svr_epsilon=Config.ml_svr_epsilon,
                svr_gamma=Config.ml_svr_gamma,
                svr_coef0=Config.ml_svr_coef0,
                sparse=Config.dataset_sparse,
                display=Config.reporting_display_charts,
                save=Config.reporting_save_charts)

        if Config.reporting_learning_curve:
            Reporting.plot_learning_curve(
                train_dataset=train_dataset,
                estimator=model,
                display=Config.reporting_display_charts,
                save=Config.reporting_save_charts)

        if Config.reporting_confusion_matrix_chart:
            Reporting.plot_confusion_matrix(
                ground_truth=train_target,
                predicted=training_prediction,
                label="Training",
                display=Config.reporting_display_charts,
                save=Config.reporting_save_charts)
            Reporting.plot_confusion_matrix(
                ground_truth=test_target,
                predicted=test_prediction,
                label="Test",
                display=Config.reporting_display_charts,
                save=Config.reporting_save_charts)

    logging.info("All done. Exiting ML Pipeline")
Пример #32
0
#!/usr/bin/env python3

# Russell Felts
# Flask To Do Activity 01

""" Scripts to run to set up our database """

from datetime import datetime

from passlib.hash import pbkdf2_sha256

from model import DB, User, Task

# Create the database tables for our model
DB.connect()
DB.drop_tables([User, Task])
DB.create_tables([User, Task])

Task(name="Do the laundry.").save()
Task(name="Do the dishes.", performed=datetime.now()).save()

User(name="admin", password=pbkdf2_sha256.hash("password")).save()
User(name="bob", password=pbkdf2_sha256.hash("bobbob")).save()
Пример #33
0
from model import DB
from model import Tuser
dbconn = DB()
session = dbconn.dbconnect()
print(session)
new_user = Tuser(id='12', name='Bob')
# 添加到session:
session.add(new_user)
# 提交即保存到数据库:
session.commit()
# 关闭session:
session.close()
Пример #34
0
class Controller:
    def __init__(self):
        self.db = DB()
        if not os.path.exists(db_file):
            os.mkdir(db_file)

    @run_async
    def user_exists(self, user_id, user_name):
        user = self.db.get_user(user_id)
        if not user:
            self.db.insert_user(user_id, user_name)
            return False
        return True

    @run_async
    def start(self, bot, update):
        user_id = update.message.chat_id
        user_name = update.message.from_user.first_name
        if self.user_exists(user_id, user_name):
            update.message.reply_text("Send me a valid Youtube URL: ")
        else:
            update.message.reply_text(
                "Wellcome {}.\nSend me a valid Youtube URL: ".format(
                    user_name))

    @run_async
    def url_message(self, bot, update):
        chat_id = update.message.chat_id
        user_name = update.message.from_user.first_name
        dt = datetime.datetime.now().strftime("%s")
        out_file = db_file + str(chat_id) + dt
        info_file = out_file + '.info.json'
        # parse message string to search youtube urls and stract ID
        try:
            url_id = get_url_id(update.message.text)
        except:
            bot.send_message(
                chat_id=chat_id,
                text=
                "Ups!\nSeems something went wrong while downloading the song\nCheck you sent me a valid youtube link"
            )

        # check if user exists, if not, register the user.
        self.user_exists(chat_id, user_name)

        # check if audio is on telegram server. consulting match between youtube id an file_id
        audio_file = self.db.get_file(url_id)
        if audio_file:
            file_record = audio_file
            self.db.update_record(url_id)
            t_audio = telegram.Audio(file_record['t_audio']['file_id'],
                                     file_record['t_audio']['duration'])
            filesize = ((int(float(file_record['filesize'])) / 1048576))
            bot.send_audio(
                chat_id=chat_id,
                audio=t_audio,
                caption="File size: {0:.2f} MB\nVia -> @Jutubot".format(
                    filesize),
                timeout=1000)
        else:
            message_info = bot.send_message(chat_id=chat_id,
                                            text='Downloading...',
                                            disable_notification='True')
            ydl_opts = {
                'outtmpl':
                out_file + '.%(ext)s',
                'writeinfojson':
                info_file,
                'format':
                'bestaudio',
                'postprocessors': [{
                    'key': 'FFmpegExtractAudio',
                    'preferredcodec': 'mp3',
                    'preferredquality': '192',
                }],
                'logger':
                MyLogger(),
                'progress_hooks': [my_hook]
            }

            # Download song from url_id
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                try:
                    ydl.download([url_id])
                except Exception as e:
                    print(str(e))
                    bot.editMessageText(
                        chat_id=chat_id,
                        message_id=message_info['message_id'],
                        text=
                        "Ups!\nSeems something went wrong while downloading the song\nCheck you sent me a valid youtube link"
                    )

            out_file += '.mp3'
            data = parseInfoFile(info_file)
            os.remove(info_file)

            file_record = {
                '_id': url_id,
                'last_download': dt,
                'download_count': 1,
                **data
            }

            bot.editMessageText(chat_id=chat_id,
                                message_id=message_info['message_id'],
                                text='Sending...',
                                disable_notification='True')

            bot.send_chat_action(chat_id=chat_id,
                                 action='record_audio',
                                 timeout=100)

            tmp_send_file = db_file + file_record['title'] + '.mp3'
            shutil.copyfile(out_file, tmp_send_file)
            tag_file(tmp_send_file, file_record)
            filesize = ((int(float(file_record['filesize'])) / 1048576))

            # Send audio and save Telegram.Audio on t_audio
            t_audio = bot.send_audio(
                chat_id=chat_id,
                audio=open(tmp_send_file, 'rb'),
                title=file_record['title'],
                performer=file_record['performer'],
                caption="File size: {0:.2f} MB\nVia -> @Jutubot".format(
                    filesize),
                timeout=1000)['audio']

            file_record['t_audio'] = t_audio.to_dict()

            os.remove(tmp_send_file)
            os.remove(out_file)
            bot.delete_message(chat_id=chat_id,
                               message_id=message_info['message_id'])

        self.db.add_to_history(chat_id, file_record)
        try:
            self.db.insert_file_record(file_record)
        except Exception as e:
            print(str(e))
            pass