地址 : Marshmallow详解

marshmallow是一个用来将复杂的orm对象与python原生数据类型之间相互转换的库,

简而言之,就是实现object -> dictobjects -> liststring -> dictstring -> list

入门使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import datetime
from marshmallow import Schema, fields, pprint

class User(object):
def __init__(self, name, email):
self.name = name
self.email = email
self.created_at = datetime.datetime.now()

def __repr__(self):
return '<User(name={self.name!r})>'.format(self=self)


class UserSchema(Schema):
name = fields.Str()
email = fields.Email()
created_at = fields.DateTime()


# 序列化
user = User(name="Monty", email="monty@python.org")
schema = UserSchema()
result = schema.dump(user) # dump()方法实现obj -> dict
pprint(result)
# {'created_at': '2020-06-27T11:24:27.546501',
# 'email': 'monty@python.org',
# 'name': 'Monty'}

# 过滤输出
# only参数用来指定输出的字段,也可以用exclude参数来排除不输出的字段,达到一样的效果。
summary_schema = UserSchema(only=('name', 'email'))
pprint(summary_schema.dump(user)) # {'email': 'monty@python.org', 'name': 'Monty'}

# 反序列化
user_data = {
'created_at': '2014-08-11T05:26:03.869245',
'email': u'ken@yahoo.com',
'name': u'Ken'
}
schema = UserSchema()
result = schema.load(user_data)
pprint(result)
# {'created_at': datetime.datetime(2014, 8, 11, 5, 26, 3, 869245),
# 'email': 'ken@yahoo.com',
# 'name': 'Ken'}

Schema

  • 对一个类或者一个json数据实现相互转换(即序列化和反序列化,序列化的意思是将数据转化为可存储或可传输的数据类型),需要一个中间载体,这个载体就是Schema。
  • 除了转换以外,Schema还可以用来做数据校验。每个需要转换的类,都需要一个对应的Schema:

Serializing(序列化)

序列化使用schema中的dump()dumps()方法,其中,dump() 方法实现obj -> dictdumps()方法实现 obj -> string,由于Flask能直接序列化dict(使用jsonify),而且你肯定还会对dict进一步处理,没必要现在转化成string,所以通常Flask与Marshmallow配合序列化时,用 dump()方法即可:

1
2
3
4
5
6
7
8
9
from marshmallow import pprint

user = User(name="Monty", email="monty@python.org")
schema = UserSchema()
result = schema.dump(user)
pprint(result.data)
# {"name": "Monty",
# "email": "monty@python.org",
# "created_at": "2014-08-17T14:54:16.049594+00:00"}

Deserializing(反序列化)

相对dump()的方法就是load()了,可以将字典等类型转换成应用层的数据结构,即orm对象:

1
2
3
4
5
6
7
8
9
10
11
12
# 反序列化为字典
user_data = {
'created_at': '2014-08-11T05:26:03.869245',
'email': u'ken@yahoo.com',
'name': u'Ken'
}
schema = UserSchema()
result = schema.load(user_data)
pprint(result)
# {'name': 'Ken',
# 'email': 'ken@yahoo.com',
# 'created_at': datetime.datetime(2014, 8, 11, 5, 26, 3, 869245)},
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# 反序列化为object
from marshmallow import Schema, fields, post_load

class User(object):
def __init__(self, name, email):
self.name = name
self.email = email
self.created_at = datetime.datetime.now()

class UserSchema(Schema):
name = fields.Str()
email = fields.Email()
created_at = fields.DateTime()

@post_load
def make_user(self, data, **kwargs):
return User(**data)

user_data = {
'email': u'ken@yahoo.com',
'name': u'Ken'
}
schema = UserSchema()
result = schema.load(user_data)
pprint(result) # <User(name='Ken')>

Objects <-> List(序列化嵌套对象)

如果多个对象的集合如果是可迭代的 , 只需在schema中增加一个参数:many=True,即:

1
2
3
4
5
6
7
8
9
10
11
12
13
user1 = User(name="Mick", email="mick@stones.com")
user2 = User(name="Keith", email="keith@stones.com")
users = [user1, user2]

# option 1:
schema = UserSchema(many=True)
result = schema.dump(users)
print(result)

# Option 2:
schema = UserSchema()
result = schema.dump(users, many=True)
print(result)

Validation验证

1
2
3
4
5
6
7
8
9
from marshmallow import ValidationError

try:
result = UserSchema().load({"name": "John", "email": "foo"})
except ValidationError as err:
# 错误信息
pprint(err.messages) # {"email": ['"foo" is not a valid email address.']}
# 验证通过的字段
pprint(err.valid_data) # {"name": "John"}

你可以向内建的field中传入validate 参数来定制验证的逻辑,validate的值可以是函数,匿名函数lambda,或者是定义了__call__的对象:

简单来说 , validate 的值是必须是可调用对象

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
def validate_func(age):
if age < 18:
raise ValidationError('年龄必须大于等于18') # 自定义错误信息
return True

class UserSchema(Schema):
name = fields.Str()
email = fields.Email()
age = fields.Number(validate=validate_func) # 年龄必须大于等于18


try:
in_data = {'name': 'Mick', 'email': 'mick@stones.com', 'age': 16}
result = UserSchema().load(in_data)
except ValidationError as err:
pprint(err.messages) # {'age': ['年龄必须大于等于18']}
pprint(err.valid_data) # {'email': 'mick@stones.com', 'name': 'Mick'}

使用装饰器注册验证方法

也可以使用validates 装饰器注册验证方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
class UserSchema(Schema):
name = fields.Str()
email = fields.Email()
age = fields.Number() # 年龄必须大于等于18

@validates('age')
def validate_func(self, age):
if age < 18:
raise ValidationError('年龄必须大于等于18') # 自定义错误信息


# 等同于
def validate_func(age):
if age < 18:
raise ValidationError('年龄必须大于等于18') # 自定义错误信息

class UserSchema(Schema):
name = fields.Str()
email = fields.Email()
age = fields.Number(validate=validate_func) # 年龄必须大于等于18

Fields的required参数,default参数和error_messages参数

webargs差不多

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
class UserSchema(Schema):
name = fields.String(
required=True
)
age = fields.Integer(
required=True,
error_messages={'required': 'Age is required.'}
)
city = fields.String(
required=True,
error_messages={"required": {"message": "City required", "code": 400}},
)
email = fields.Email()
birthdate = fields.DateTime(
default=datetime.datetime(2020, 9, 9)
)

try:
result = UserSchema().load({"email": "foo@bar.com"})
except ValidationError as err:
pprint(err.messages)
# {'age': ['Age is required.'],
# 'city': {'code': 400, 'message': 'City required'},
# 'name': ['Missing data for required field.']}

忽略required=True的字段

使用partial参数

1
2
3
4
5
6
7
8
9
class UserSchema(Schema):
name = fields.String(required=True)
age = fields.Integer(required=True)

# 或者 result = UserSchema(partial=('name',)).load({'age': 42})
# 虽然name为required=True,但是还是能顺利执行
result = UserSchema().load({"age": 42}, partial=("name",))

pprint(result) # {'age': 42}

处理未知字段

默认情况下,如果传入了未知的字段(Schema里没有的字段),执行load()方法会抛出一个 ValidationError 异常。这种行为可以通过更改 unknown 选项来修改。

unknown 有三个值:

  • EXCLUDE: 直接扔掉未知字段
  • INCLUDE: 接受未知字段
  • RAISE: 抛出异常 , 默认
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
class UserSchema(Schema):
email = fields.Email()
created_time = fields.DateTime()

class Meta:
unknown = INCLUDE


user_data = {
'email': u'ken@yahoo.com',
"未知字段": "123"
}
schema = UserSchema()
result = schema.load(user_data)
pprint(result) # {'email': 'ken@yahoo.com', '未知字段': '123'}
1
2
# 方法二:在实例化Schema类的时候设置参数unknown的值
shema = UserSchema(unknown=EXCLUDE)

Schema.validate(单纯的校验数据)

如果只是想用Schema去验证数据, 而不进行反序列化生成对象, 可以使用Schema.validate()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
class UserSchema(Schema):
name = fields.Str(required=True, error_messages={"required": "name字段必须填写"})
email = fields.Email()
created_time = fields.DateTime()


user = {"name": "tty", "email": "tty@python"}
schema = UserSchema()
res = schema.validate(user)
print(res) # {'email': ['Not a valid email address.']}

user1 = {"name": "tty", "email": "tty@python.org"}
schema = UserSchema()
res1 = schema.validate(user1)
print(res1) # {}

指定序列化/反序列化键

fields的attribute指定序列化键

简单来说就是 : 明确定义fields将从什么属性名取值

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
class User:
def __init__(self, name, email):
self.name = name
self.email = email
self.created_time = datetime.datetime.now()


class UserSchema(Schema):
full_name = fields.String(attribute="name")
email_address = fields.Email(attribute="email") # 指定序列化键 , 降重email属性中取值
created_at = fields.DateTime(attribute="created_time")


user = User("ttty", email="ttty@python.org")
schema = UserSchema()
res = schema.dump(user)
print(res) # {'full_name': 'ttty', 'created_at': '2020-06-27T12:56:16.822442', 'email_address': 'ttty@python.org'}

fields的load_from指定反序列化键

1
2
3
4
5
6
7
8
9
10
class UserSchema(Schema):
full_name = fields.String(load_from="name")
email_address = fields.Email(load_from="email")
created_at = fields.DateTime(load_from="created_time")

user = {"full_name": "ttty", "email_address": "ttty@python.org"}
schema = UserSchema()
res = schema.load(user)
print(res)
# {'email_address': 'ttty@python.org', 'full_name': 'ttty'}

fileds的data_key同时满足序列化与反序列化的方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
class UserSchema(Schema):
full_name = fields.String(data_key="name")
email_address = fields.Email(data_key="email")
created_at = fields.DateTime(data_key="created_time")

# 序列化
user = {"full_name": "ttty", "email_address": "ttty@python.org"}
schema = UserSchema()
res = schema.dump(user)
print(res) # {'name': 'ttty', 'email': 'ttty@python.org'}

# 反序列化
user1 = {"name": "ttty", "email": "ttty@python.org"}
schema = UserSchema()
res = schema.load(user1)
print(res) # {'email_address': 'ttty@python.org', 'full_name': 'ttty'}

使用fields.Function创建新的字段

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
class User(object):
def __init__(self, name, email):
self.name = name
self.email = email
self.created_at = datetime.datetime.now()

class UserSchema(Schema):
uppername = fields.Function(lambda obj: obj.name.upper())
class Meta:
fields = ("name", "email", "created_at", "uppername")

user = User(name="Monty", email="monty@python.org")
schema = UserSchema()
result = schema.dump(user)
pprint(result)
# {'created_at': '2020-06-27T17:30:08.231909',
# 'email': 'monty@python.org',
# 'name': 'Monty',
# 'uppername': 'MONTY'}

使用ordered选项进行排序

ordered选项设置为true。这将指示marshmallow将数据序列化到collections.OrderedDict

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class User:
def __init__(self, name, email):
self.name = name
self.email = email
self.created_time = datetime.datetime.now()


class UserSchema(Schema):
uppername = fields.Function(lambda obj: obj.name.upper())

class Meta:
fields = ("name", "email", "created_time", "uppername")
ordered = True # 开启排序


u = User("Charlie", "charlie@stones.com")
schema = UserSchema()
res = schema.dump(u)
print(isinstance(res, OrderedDict)) # True
print(res)
# OrderedDict([
# ('name', 'Charlie'),
# ('email', 'charlie@stones.com'),
# ('created_time', '2019-08-05T20:22:05.788540+00:00'),
# ('uppername', 'CHARLIE')
# ])

自定义字段

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from marshmallow import Schema, fields

class String128(fields.String):
"""
长度为128的字符串类型
"""
default_error_messages = {
"type": "该字段只能是字符串类型",
"invalid": "该字符串长度必须大于6",
}

def _deserialize(self, value, attr, data, **kwargs):
if not isinstance(value, str):
self.fail("type")
if len(value) < 6:
self.fail("invalid")

class AppSchema(Schema):
name = String128(required=True)
priority = fields.Integer()
obj_type = String128()
link = String128()
deploy = fields.Dict()
description = fields.String()
projects = fields.List(cls_or_instance=fields.Dict)


app = {
"name": "app11",
"priority": 2,
"obj_type": "web",
"link": "123.123.00.2",
"deploy": {"deploy1": "deploy1", "deploy2": "deploy2"},
"description": "app111 test111",
"projects": [{"id": 2}]
}

schema = AppSchema()
res = schema.validate(app)
print(res) # {'obj_type': ['该字符串长度必须大于6'], 'name': ['该字符串长度必须大于6']}

数据处理扩展

  • pre_load(self,value):序列化之前的操作
  • post_load(self,value):序列化之后的操作
  • pre_dump(self,value):反序列化之前的操作
  • post_dump(self,value):反序列化后的操作
  • prepost的处理中,可以出发ValidationError
  • 当有多个处理的时候应该让其他的处理在第一个处理中调用,而不是使用这些装饰器多次
  • 处理在dumpload中触发的ValidationError:定义handle_error(self, exc, data)
1
2
3
4
5
6
7
8
9
10
11
12
13
class UserSchema(Schema):
name = fields.Str()
slug = fields.Str()

@pre_load
def slugify_name(self, in_data, **kwargs):
in_data['slug'] = in_data['slug'].lower().strip().replace(' ', '-')
return in_data


schema = UserSchema()
result = schema.load({'name': 'Steve', 'slug': 'Steve Loria '})
print(result) # => {'name': 'Steve', 'slug': 'steve-loria'}