def import_data(filename): for index, line in enumerate(open(filename)): cols = line.split('\t') if len(cols) != 4: sys.stderr.write('{}\n'.format(line)) continue timestamp = unix_time(str2datetime(cols[2])) translation = Translation( id=uuid64.issue(timestamp), source_lang=cols[0].strip(), target_lang=cols[1].strip(), hash=hashlib.sha1(cols[3].strip()).hexdigest(), data=json.loads(cols[3]) ) print('{}: Processing data ({}, {}, {})'.format( index + 1, translation.source_lang, translation.target_lang, timestamp)) try: db.session.add(translation) db.session.commit() except IntegrityError: db.session.rollback()
def import_data(filename): for index, line in enumerate(open(filename)): cols = line.split('\t') if len(cols) != 4: sys.stderr.write('{}\n'.format(line)) continue timestamp = unix_time(str2datetime(cols[2])) translation = Translation(id=uuid64.issue(timestamp), source_lang=cols[0].strip(), target_lang=cols[1].strip(), hash=hashlib.sha1( cols[3].strip()).hexdigest(), data=json.loads(cols[3])) print('{}: Processing data ({}, {}, {})'.format( index + 1, translation.source_lang, translation.target_lang, timestamp)) try: db.session.add(translation) db.session.commit() except IntegrityError: db.session.rollback()
def store_phrases(source_lang, target_lang, observed_at, phrases): """ :type observed_at: datetime """ for source_text, target_texts in phrases: for target_text in target_texts: record_id = uuid64.issue() statement = ( "INSERT INTO phrase (id, observed_at, source_lang, " "target_lang, source_text, target_text, count) VALUES(" "'{}', '{}', '{}', '{}', '{}', '{}', '0');".format( record_id, observed_at, source_lang, target_lang, source_text.replace("'", "''"), target_text.replace("'", "''"), ) ) print(statement) statement2 = ( "UPDATE phrase SET count = count + 1 WHERE " "source_lang = '{}' AND " "target_lang = '{}' AND " "source_text = '{}' AND " "target_text = '{}';".format( source_lang, target_lang, source_text.replace("'", "''"), target_text.replace("'", "''") ) ) print(statement2)
def create(cls, commit=True, **kwargs): kwargs.update(dict(id=uuid64.issue())) instance = cls(**kwargs) if hasattr(instance, 'timestamp') \ and getattr(instance, 'timestamp') is None: instance.timestamp = datetime.utcnow() return instance.save(commit=commit)
class CRUDMixin(object): """Copied from https://realpython.com/blog/python/python-web-applications-with-flask-part-ii/ """ # noqa __table_args__ = {'extend_existing': True} id = db.Column(db.BigInteger, primary_key=True, autoincrement=False, default=uuid64.issue()) @classmethod def create(cls, commit=True, **kwargs): if 'id' not in kwargs: kwargs.update(dict(id=uuid64.issue())) instance = cls(**kwargs) if hasattr(instance, 'timestamp') \ and getattr(instance, 'timestamp') is None: instance.timestamp = datetime.utcnow() return instance.save(commit=commit) @classmethod def get(cls, id): return cls.query.get(id) # We will also proxy Flask-SqlAlchemy's get_or_404 # for symmetry @classmethod def get_or_404(cls, id): return cls.query.get_or_404(id) @classmethod def exists(cls, **kwargs): row = cls.query.filter_by(**kwargs).first() return row is not None def update(self, commit=True, **kwargs): for attr, value in kwargs.iteritems(): setattr(self, attr, value) return commit and self.save() or self def save(self, commit=True): db.session.add(self) if commit: db.session.commit() return self def delete(self, commit=True): db.session.delete(self) return commit and db.session.commit()
def create(cls, commit=True, ignore_if_exists=False, **kwargs): if 'id' not in kwargs: kwargs.update(dict(id=uuid64.issue())) instance = cls(**kwargs) if hasattr(instance, 'created_at') \ and getattr(instance, 'created_at') is None: instance.created_at = datetime.utcnow() try: return instance.save(commit=commit) except (IntegrityError, InvalidRequestError): if ignore_if_exists: db.session.rollback() return cls.find(**kwargs) else: raise
def store_sentences(source_lang, target_lang, observed_at, sentences): """ :type observed_at: datetime :param sentences: list of (source, target) sentences """ for source_text, target_text in sentences: source_hash = hashlib.sha1(source_text.encode("utf-8")).hexdigest() statement = ( "INSERT INTO sentence (id, observed_at, source_lang, " "target_lang, source_text_hash, source_text, target_text) " "VALUES('{}', '{}', '{}', '{}', '{}', '{}', '{}');".format( uuid64.issue(), observed_at, source_lang, target_lang, source_hash, source_text.replace("'", "''"), target_text.replace("'", "''"), ) ) print(statement)
class CRUDMixin(object): """Copied from https://realpython.com/blog/python/python-web-applications-with-flask-part-ii/ """ # noqa __table_args__ = {'extend_existing': True} # type: Any id = db.Column(db.BigInteger, primary_key=True, autoincrement=False, default=uuid64.issue()) @classmethod def create(cls, commit=True, ignore_if_exists=False, **kwargs): if 'id' not in kwargs: kwargs.update(dict(id=uuid64.issue())) instance = cls(**kwargs) if hasattr(instance, 'created_at') \ and getattr(instance, 'created_at') is None: instance.created_at = datetime.utcnow() try: return instance.save(commit=commit) except (IntegrityError, InvalidRequestError): if ignore_if_exists: db.session.rollback() return cls.find(**kwargs) else: raise @classmethod def get(cls, id): return cls.query.get(id) # We will also proxy Flask-SqlAlchemy's get_or_404 # for symmetry @classmethod def get_or_404(cls, id): return cls.query.get_or_404(id) @classmethod def find(cls, **kwargs): return cls.query.filter_by(**kwargs).first() @classmethod def exists(cls, **kwargs): row = cls.find(**kwargs) return row is not None def update(self, commit=True, **kwargs): for attr, value in kwargs.iteritems(): setattr(self, attr, value) return commit and self.save() or self def save(self, commit=True): db.session.add(self) if commit: db.session.commit() return self def delete(self, commit=True): db.session.delete(self) return commit and db.session.commit() def __iter__(self): for column in self.__table__.columns: yield column.name, str(getattr(self, column.name))