This document introduces valid_model, a Python library for declarative data modeling. It allows defining data models using descriptors to specify data types and validation rules. This provides strict typing while remaining unopinionated about persistence. Custom descriptors can extend the library's functionality. The library aims to enable use cases like database modeling, form validation, and API request/response objects.
2. Introducing valid_model
It includes:
- base class - Object
- basic descriptors - Integer, Float, DateTime, String, ...
- nesting descriptors - Dict, List, Set, EmbeddedObject
3. Most similar libraries are tightly integrated to a persistence layer:
SQLAlchemy, Django ORM, mongokit, etc.
Or are targeted at web forms:
Formencode, colander, deform
So the goal was to build a highly flexible unopinionated data modeling
library.
4. Some Use Cases
Database data model
Form validation
Test fixtures
API request/response objects
Scrubbing and normalizing data
Data migration
5. car = {
'make': None,
'model': None,
'doors': None,
'horsepower': None,
}
class Car(object):
def __init__(self, make=None, model=None, doors=None,
horsepower=None):
self.make = make
self.model = model
self.doors = doors
self.horsepower = horsepower
It is valid python to arbitrarily add new instance attributes in other methods, which can lead to
headaches (and pylint complaints)
6. At least I know the fields ahead of time but what datatypes are these attributes?
def horse_check(value):
if value == 1:
raise ValidationError('Is this powered by an actual horse?')
elif value <= 0:
raise ValidationError('Phantom horses?')
return True
class Car(Object):
make = String(nullable=False)
model = String()
doors = Integer(validator=lambda x: x<=5)
horsepower = Integer(validator=horse_check)
7. Nested Schemas is Easy
class Person(Object):
name = String(nullable=False)
homepage = String()
class BlogPost(Object):
title = String(nullable=False, mutator=lambda x: x.title())
updated = DateTime(nullable=False, default=datetime.utcnow)
published = DateTime()
author = EmbeddedObject(Person)
contributors = List(value=EmbeddedObject(Person), nullable=False)
tags = List(value=String(nullable=False), nullable=False)
def validate(self):
super(BlogPost, self).validate()
if self.published is not None and self.published > self.updated:
raise ValidationError('a post cannot be published at a later date
than it was updated')
post = BlogPost(title='example post', author={'name': 'Josh'}, tags=['tag1', 'tag2'])
>>> print post
{'updated': datetime.datetime(2014, 10, 7, 13, 43, 1, 960174),
'author': {'homepage': None, 'name': u'Josh'},
'contributors': [], 'title': u'Example Post', 'tags': [u'tag1', u'tag2'], 'published': None}
8. valid_model also provides something closer to strict typing
class Car(Object):
make = String(nullable=False)
model = String()
doors = Integer(validator=lambda x: x<=5)
horsepower = Integer(validator=horse_check)
>>> Car(doors='five')
valid_model.exc.ValidationError: 'five' is not an int
>>> Car(doors=10)
valid_model.exc.ValidationError: doors
>>> Car(horsepower=1)
valid_model.exc.ValidationError: Is this powered by an actual horse?
>>> Car(make=None)
valid_model.exc.ValidationError: make is not nullable
9. Normalize your data when it gets set
class HTTPAccessLog(Object):
code = Integer(nullable=False)
status = String(nullable=False, mutator=lambda x: x.upper())
timestamp = DateTime(default=datetime.utcnow)
def validate(self):
super(HTTPAccessLog, self).validate()
if not self.status.startswith(unicode(self.code)):
raise ValidationError('code and status do not match')
>>> ping = HTTPAccessLog()
>>> ping.code = 404
>>> ping.status = '404 not found'
>>> print ping
{'status': u'404 NOT FOUND', 'timestamp': datetime.datetime(2014, 10, 7, 13, 36, 15, 217678),
'code': 404}
10. Descriptors Tangent
Python descriptors are fancy attributes.
class SomeDescriptor(object):
def __get__(self, instance, klass=None):
.
def __set__(self, instance, value):
.
def __del__(self, instance):
.
class Foo(object):
b = SomeDescriptor()
11. @property Descriptors
@property is the most common
class Foo(object):
@property
def a(self):
return self._a
@a.setter
def a(self, value):
self._a = value
# Make an attribute readonly by not defining the setter.
@property
def readonly(self):
return self._private_var
#Lazily initialize or cache expensive calculations
@property
def expensive_func(self):
if self._result is None:
self._result = expensive_func()
return self._result
12. Customizing Descriptors is Easy
Extending existing descriptors works like subclassing anything else in python
class SuperDateTime(DateTime):
def __set__(self, instance, value):
if isinstance(value, basestring):
value = dateutils.parse(value)
elif isinstance(value, (int, float)):
value = datetime.utcfromtimestamp(value)
super(SuperDateTime, self).__set__(instance, value)
class Decimal(Generic):
def __set__(self, instance, value):
if not isinstance(value, decimal.Decimal):
raise ValidationError('{} is not a decimal'.format(self.name))
super(Decimal, self).__set__(instance, value)
13. Simple wrappers for persistence
An example of using MongoDB with Redis as a cache
class PersistBlogPost(object):
def __init__(self, mongo_collection, redis_conn):
...
def insert(self, post):
self.mongo_collection.insert(post.__json__())
def find(self, title):
post = self.redis_conn.get(title)
if post:
return pickle.loads(post)
else:
post = self.mongo_collection.find_one({'title': title})
if post:
post = BlogPost(**post)
self.redis_conn.set(title, pickle.dumps(post))
return post