示例#1
0
    class Position(pw.Model):
        position_name = pw_pext.CharField(null=False)
        city = pw_pext.CharField(null=True)
        business_zones = pw_pext.ArrayField(
            pw_pext.CharField,
            default=[],
            null=True)
        company_full_name = pw_pext.CharField(null=True)
        company_short_name = pw_pext.CharField(null=True)
        company_lable_list = pw_pext.ArrayField(
            pw_pext.CharField,
            default=[],
            null=True)
        company_size = pw_pext.CharField(null=True)
        education = pw_pext.CharField(null=True)
        finance_stage = pw_pext.CharField(null=True)
        first_type = pw_pext.CharField(null=True)
        industry_field = pw_pext.CharField(null=True)
        job_nature = pw_pext.CharField(null=True)
        position_lables = pw_pext.ArrayField(
            pw_pext.CharField,
            default=[],
            null=True)
        salary = pw_pext.CharField(null=True)
        salary_max = pw_pext.IntegerField(null=True)
        salary_min = pw_pext.IntegerField(null=True)
        salary_avg = pw_pext.IntegerField(null=True)
        second_type = pw_pext.CharField(null=True)
        work_year = pw_pext.CharField(null=True)

        class Meta:
            db_table = "position"
示例#2
0
文件: models.py 项目: yuuuuuy/lagou
class Position(BaseModel):
    position_name = postgres_ext.CharField(verbose_name='职位名称',
                                           max_length=255,
                                           null=False)
    city = postgres_ext.CharField(verbose_name='城市', max_length=255, null=True)
    # 工作地点
    business_zones = postgres_ext.ArrayField(postgres_ext.CharField,
                                             default=[],
                                             null=True)
    company_full_name = postgres_ext.CharField(verbose_name='公司全称',
                                               max_length=255,
                                               null=True)
    company_short_name = postgres_ext.CharField(verbose_name='公司简称',
                                                max_length=255,
                                                null=True)
    company_lable_list = postgres_ext.ArrayField(postgres_ext.CharField,
                                                 default=[],
                                                 null=True)
    company_size = postgres_ext.CharField(verbose_name='公司规模(人)',
                                          max_length=255,
                                          null=True)
    education = postgres_ext.CharField(verbose_name='学历',
                                       max_length=255,
                                       null=True)
    finance_stage = postgres_ext.CharField(verbose_name='公司财务情况(A轮)',
                                           max_length=255,
                                           null=True)
    first_type = postgres_ext.CharField(verbose_name='一级分类(如后端、前端)',
                                        max_length=255,
                                        null=True)
    industry_field = postgres_ext.CharField(verbose_name='公司领域',
                                            max_length=255,
                                            null=True)
    job_nature = postgres_ext.CharField(verbose_name='工作性质(全职or实习)',
                                        max_length=255,
                                        null=True)
    position_lables = postgres_ext.CharField(postgres_ext.CharField,
                                             default=[],
                                             null=True)
    salary = postgres_ext.CharField(verbose_name='薪资范围',
                                    max_length=255,
                                    null=True)
    salary_max = postgres_ext.IntegerField(verbose_name='最高薪资', null=True)
    salary_min = postgres_ext.IntegerField(verbose_name='最低薪资', null=True)
    salary_avg = postgres_ext.IntegerField(verbose_name='平均薪资', null=True)
    second_type = postgres_ext.CharField(verbose_name='二级分类(如 java、python)',
                                         max_length=255,
                                         null=True)
    work_year = postgres_ext.CharField(verbose_name='工作年限',
                                       max_length=255,
                                       null=True)
示例#3
0
class PostgresMaster(PostgresModel):

    ### PEEWEE FIELDS ###

    id = peewee.IntegerField(primary_key=True)
    artists = postgres_ext.BinaryJSONField(null=True)
    genres = postgres_ext.ArrayField(peewee.TextField, null=True)
    main_release_id = peewee.IntegerField(null=True)
    styles = postgres_ext.ArrayField(peewee.TextField, null=True)
    title = peewee.TextField()
    year = peewee.IntegerField(null=True)

    ### PEEWEE META ###

    class Meta:
        db_table = 'masters'

    ### PUBLIC METHODS ###

    @classmethod
    def bootstrap(cls):
        cls.drop_table(True)
        cls.create_table()
        cls.bootstrap_pass_one()

    @classmethod
    def from_element(cls, element):
        data = cls.tags_to_fields(element)
        return cls(**data)

    @classmethod
    def bootstrap_pass_one(cls):
        PostgresModel.bootstrap_pass_one(
            model_class=cls,
            xml_tag='master',
            name_attr='title',
            skip_without=['title'],
        )
示例#4
0
class Retailer(peewee.Model):
    name = peewee.CharField()
    description = peewee.CharField(null=True)
    picture = peewee.CharField(null=True)
    active = peewee.BooleanField(default=True)
    ad_model = peewee.CharField(null=True,
                                verbose_name=u'c/a')  #TODO: to choices
    legal_info = peewee.TextField(null=True)
    countries = peewee.ManyToManyField(Country,
                                       backref='retailers',
                                       through_model=DefferedThroughRetailer)
    currency = peewee.ForeignKeyField(Currency)
    sm_accounts_white = postgres_ext.ArrayField(peewee.IntegerField, null=True)
    sm_accounts_black = postgres_ext.ArrayField(peewee.IntegerField, null=True)

    #NOTE: deprecated, will be removed
    datafeed_url = peewee.CharField(null=True)
    index_key = peewee.CharField(max_length=3, default='rus')
    url_key_mode = peewee.CharField(max_length=1, default='F')
    datafeed_params = postgres_ext.BinaryJSONField(default={})
    international = peewee.BooleanField(default=False)

    class Meta:
        database = db
        schema = 'store'
        table_alias = 'r'

    @property
    def feed(self):
        return self.feed_settings.first()

    @property
    def feeds(self):
        return [
            fs for fs in self.feed_settings.where(FeedSettings.active == True)
        ]
示例#5
0
class PostgresRelease(PostgresModel):

    ### CLASS VARIABLES ###

    _artists_mapping = {}

    _companies_mapping = {}

    _tracks_mapping = {}

    class BootstrapPassTwoWorker(multiprocessing.Process):

        def __init__(self, indices):
            multiprocessing.Process.__init__(self)
            self.indices = indices

        def run(self):
            proc_name = self.name
            corpus = {}
            total = len(self.indices)
            for i, release_id in enumerate(self.indices):
                with PostgresRelease._meta.database.execution_context():
                    progress = float(i) / total
                    try:
                        PostgresRelease.bootstrap_pass_two_single(
                            release_id=release_id,
                            annotation=proc_name,
                            corpus=corpus,
                            progress=progress,
                            )
                    except:
                        print('ERROR:', release_id, proc_name)
                        traceback.print_exc()

    ### PEEWEE FIELDS ###

    id = peewee.IntegerField(primary_key=True)
    artists = postgres_ext.BinaryJSONField(null=True, index=False)
    companies = postgres_ext.BinaryJSONField(null=True, index=False)
    country = peewee.TextField(null=True, index=False)
    extra_artists = postgres_ext.BinaryJSONField(null=True, index=False)
    formats = postgres_ext.BinaryJSONField(null=True, index=False)
    genres = postgres_ext.ArrayField(peewee.TextField, null=True, index=False)
    identifiers = postgres_ext.BinaryJSONField(null=True, index=False)
    labels = postgres_ext.BinaryJSONField(null=True, index=False)
    master_id = peewee.IntegerField(null=True, index=False)
    notes = peewee.TextField(null=True, index=False)
    release_date = peewee.DateTimeField(null=True, index=False)
    styles = postgres_ext.ArrayField(peewee.TextField, null=True, index=False)
    title = peewee.TextField(index=False)
    tracklist = postgres_ext.BinaryJSONField(null=True, index=False)

    ### PEEWEE META ###

    class Meta:
        db_table = 'releases'

    ### PUBLIC METHODS ###

    @classmethod
    def bootstrap(cls):
        cls.drop_table(True)
        cls.create_table()
        cls.bootstrap_pass_one()
        cls.bootstrap_pass_two()

    @classmethod
    def bootstrap_pass_one(cls):
        PostgresModel.bootstrap_pass_one(
            model_class=cls,
            xml_tag='release',
            name_attr='title',
            skip_without=['title'],
            )

    @classmethod
    def get_indices(cls, pessimistic=False):
        indices = []
        if not pessimistic:
            maximum_id = cls.select(
                peewee.fn.Max(cls.id)).scalar()
            step = maximum_id // multiprocessing.cpu_count()
            for start in range(0, maximum_id, step):
                stop = start + step
                indices.append(range(start, stop))
        else:
            query = cls.select(cls.id)
            query = query.order_by(cls.id)
            query = query.tuples()
            all_ids = tuple(_[0] for _ in query)
            ratio = [1] * (multiprocessing.cpu_count() * 2)
            for chunk in sequencetools.partition_sequence_by_ratio_of_lengths(
                all_ids, ratio):
                indices.append(chunk)
        return indices

    @classmethod
    def get_release_iterator(cls, pessimistic=False):
        if not pessimistic:
            maximum_id = cls.select(peewee.fn.Max(cls.id)).scalar()
            for i in range(1, maximum_id + 1):
                query = cls.select().where(cls.id == i)
                if not query.count():
                    continue
                document = query.get()
                yield document
        else:
            id_query = cls.select(cls.id)
            for release in id_query:
                release_id = release.id
                release = cls.select().where(cls.id == release_id).get()
                yield release

    @classmethod
    def bootstrap_pass_two(cls, pessimistic=False):
        indices = cls.get_indices(pessimistic=pessimistic)
        workers = [cls.BootstrapPassTwoWorker(x) for x in indices]
        for worker in workers:
            worker.start()
        for worker in workers:
            worker.join()
        for worker in workers:
            worker.terminate()

    @classmethod
    def bootstrap_pass_two_single(
        cls,
        release_id,
        annotation='',
        corpus=None,
        progress=None,
        ):
        skipped_template = u'{} (Pass 2) {:.3%} [{}]\t[SKIPPED] (id:{}) [{:.8f}]: {}'
        changed_template = u'{} (Pass 2) {:.3%} [{}]\t          (id:{}) [{:.8f}]: {}'
        query = cls.select().where(cls.id == release_id)
        if not query.count():
            return
        document = query.get()
        with systemtools.Timer(verbose=False) as timer:
            changed = document.resolve_references(corpus)
        if not changed:
            message = skipped_template.format(
                cls.__name__.upper(),
                progress,
                annotation,
                document.id,
                timer.elapsed_time,
                document.title,
                )
            print(message)
            return
        document.save()
        message = changed_template.format(
            cls.__name__.upper(),
            progress,
            annotation,
            document.id,
            timer.elapsed_time,
            document.title,
            )
        print(message)

    @classmethod
    def element_to_artist_credits(cls, element):
        result = []
        if element is None or not len(element):
            return result
        for subelement in element:
            data = cls.tags_to_fields(
                subelement,
                ignore_none=True,
                mapping=cls._artists_mapping,
                )
            result.append(data)
        return result

    @classmethod
    def element_to_company_credits(cls, element):
        result = []
        if element is None or not len(element):
            return result
        for subelement in element:
            data = cls.tags_to_fields(
                subelement,
                ignore_none=True,
                mapping=cls._companies_mapping,
                )
            result.append(data)
        return result

    @classmethod
    def element_to_formats(cls, element):
        result = []
        if element is None or not len(element):
            return result
        for subelement in element:
            document = {
                'name': subelement.get('name'),
                'quantity': subelement.get('qty'),
                }
            if subelement.get('text'):
                document['text'] = subelement.get('text')
            if len(subelement):
                subelement = subelement[0]
                descriptions = Bootstrapper.element_to_strings(subelement)
                document['descriptions'] = descriptions
            result.append(document)
        return result

    @classmethod
    def element_to_identifiers(cls, element):
        result = []
        if element is None or not len(element):
            return result
        for subelement in element:
            data = {
                'description': subelement.get('description'),
                'type': subelement.get('type'),
                'value': subelement.get('value'),
                }
            result.append(data)
        return result

    @classmethod
    def element_to_label_credits(cls, element):
        result = []
        if element is None or not len(element):
            return result
        for subelement in element:
            data = {
                'catalog_number': subelement.get('catno'),
                'name': subelement.get('name'),
                }
            result.append(data)
        return result

    @classmethod
    def element_to_roles(cls, element):
        def from_text(text):
            name = ''
            current_buffer = ''
            details = []
            had_detail = False
            bracket_depth = 0
            for character in text:
                if character == '[':
                    bracket_depth += 1
                    if bracket_depth == 1 and not had_detail:
                        name = current_buffer
                        current_buffer = ''
                        had_detail = True
                    elif 1 < bracket_depth:
                        current_buffer += character
                elif character == ']':
                    bracket_depth -= 1
                    if not bracket_depth:
                        details.append(current_buffer)
                        current_buffer = ''
                    else:
                        current_buffer += character
                else:
                    current_buffer += character
            if current_buffer and not had_detail:
                name = current_buffer
            name = name.strip()
            detail = ', '.join(_.strip() for _ in details)
            result = {'name': name}
            if detail:
                result['detail'] = detail
            return result
        credit_roles = []
        if element is None or not element.text:
            return credit_roles or None
        current_text = ''
        bracket_depth = 0
        for character in element.text:
            if character == '[':
                bracket_depth += 1
            elif character == ']':
                bracket_depth -= 1
            elif not bracket_depth and character == ',':
                current_text = current_text.strip()
                if current_text:
                    credit_roles.append(from_text(current_text))
                current_text = ''
                continue
            current_text += character
        current_text = current_text.strip()
        if current_text:
            credit_roles.append(from_text(current_text))
        return credit_roles or None

    @classmethod
    def element_to_tracks(cls, element):
        result = []
        if element is None or not len(element):
            return result
        for subelement in element:
            data = cls.tags_to_fields(
                subelement,
                ignore_none=True,
                mapping=cls._tracks_mapping,
                )
            result.append(data)
        return result

    @classmethod
    def from_element(cls, element):
        data = cls.tags_to_fields(element)
        data['id'] = int(element.get('id'))
        return cls(**data)

    def resolve_references(self, corpus, spuriously=False):
        import discograph
        changed = False
        spurious_id = 0
        for entry in self.labels:
            name = entry['name']
            entity_key = (2, name)
            if not spuriously:
                discograph.PostgresEntity.update_corpus(corpus, entity_key)
            if entity_key in corpus:
                entry['id'] = corpus[entity_key]
                changed = True
            elif spuriously:
                spurious_id -= 1
                corpus[entity_key] = spurious_id
                entry['id'] = corpus[entity_key]
                changed = True
        return changed