357 lines
13 KiB
Python
357 lines
13 KiB
Python
# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
|
|
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
|
|
# (c) 2005 Ian Bicking, Clark C. Evans and contributors
|
|
# This module is part of the Python Paste Project and is released under
|
|
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
|
"""
|
|
This module handles sending static content such as in-memory data or
|
|
files. At this time it has cache helpers and understands the
|
|
if-modified-since request header.
|
|
"""
|
|
|
|
import os, time, mimetypes, zipfile, tarfile
|
|
from paste.httpexceptions import *
|
|
from paste.httpheaders import *
|
|
|
|
CACHE_SIZE = 4096
|
|
BLOCK_SIZE = 4096 * 16
|
|
|
|
__all__ = ['DataApp', 'FileApp', 'DirectoryApp', 'ArchiveStore']
|
|
|
|
class DataApp(object):
|
|
"""
|
|
Returns an application that will send content in a single chunk,
|
|
this application has support for setting cache-control and for
|
|
responding to conditional (or HEAD) requests.
|
|
|
|
Constructor Arguments:
|
|
|
|
``content`` the content being sent to the client
|
|
|
|
``headers`` the headers to send /w the response
|
|
|
|
The remaining ``kwargs`` correspond to headers, where the
|
|
underscore is replaced with a dash. These values are only
|
|
added to the headers if they are not already provided; thus,
|
|
they can be used for default values. Examples include, but
|
|
are not limited to:
|
|
|
|
``content_type``
|
|
``content_encoding``
|
|
``content_location``
|
|
|
|
``cache_control()``
|
|
|
|
This method provides validated construction of the ``Cache-Control``
|
|
header as well as providing for automated filling out of the
|
|
``EXPIRES`` header for HTTP/1.0 clients.
|
|
|
|
``set_content()``
|
|
|
|
This method provides a mechanism to set the content after the
|
|
application has been constructed. This method does things
|
|
like changing ``Last-Modified`` and ``Content-Length`` headers.
|
|
|
|
"""
|
|
|
|
allowed_methods = ('GET', 'HEAD')
|
|
|
|
def __init__(self, content, headers=None, allowed_methods=None,
|
|
**kwargs):
|
|
assert isinstance(headers, (type(None), list))
|
|
self.expires = None
|
|
self.content = None
|
|
self.content_length = None
|
|
self.last_modified = 0
|
|
if allowed_methods is not None:
|
|
self.allowed_methods = allowed_methods
|
|
self.headers = headers or []
|
|
for (k, v) in kwargs.items():
|
|
header = get_header(k)
|
|
header.update(self.headers, v)
|
|
ACCEPT_RANGES.update(self.headers, bytes=True)
|
|
if not CONTENT_TYPE(self.headers):
|
|
CONTENT_TYPE.update(self.headers)
|
|
if content is not None:
|
|
self.set_content(content)
|
|
|
|
def cache_control(self, **kwargs):
|
|
self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None
|
|
return self
|
|
|
|
def set_content(self, content, last_modified=None):
|
|
assert content is not None
|
|
if last_modified is None:
|
|
self.last_modified = time.time()
|
|
else:
|
|
self.last_modified = last_modified
|
|
self.content = content
|
|
self.content_length = len(content)
|
|
LAST_MODIFIED.update(self.headers, time=self.last_modified)
|
|
return self
|
|
|
|
def content_disposition(self, **kwargs):
|
|
CONTENT_DISPOSITION.apply(self.headers, **kwargs)
|
|
return self
|
|
|
|
def __call__(self, environ, start_response):
|
|
method = environ['REQUEST_METHOD'].upper()
|
|
if method not in self.allowed_methods:
|
|
exc = HTTPMethodNotAllowed(
|
|
'You cannot %s a file' % method,
|
|
headers=[('Allow', ','.join(self.allowed_methods))])
|
|
return exc(environ, start_response)
|
|
return self.get(environ, start_response)
|
|
|
|
def calculate_etag(self):
|
|
return '"%s-%s"' % (self.last_modified, self.content_length)
|
|
|
|
def get(self, environ, start_response):
|
|
headers = self.headers[:]
|
|
current_etag = self.calculate_etag()
|
|
ETAG.update(headers, current_etag)
|
|
if self.expires is not None:
|
|
EXPIRES.update(headers, delta=self.expires)
|
|
|
|
try:
|
|
client_etags = IF_NONE_MATCH.parse(environ)
|
|
if client_etags:
|
|
for etag in client_etags:
|
|
if etag == current_etag or etag == '*':
|
|
# horribly inefficient, n^2 performance, yuck!
|
|
for head in list_headers(entity=True):
|
|
head.delete(headers)
|
|
start_response('304 Not Modified', headers)
|
|
return [b'']
|
|
except HTTPBadRequest as exce:
|
|
return exce.wsgi_application(environ, start_response)
|
|
|
|
# If we get If-None-Match and If-Modified-Since, and
|
|
# If-None-Match doesn't match, then we should not try to
|
|
# figure out If-Modified-Since (which has 1-second granularity
|
|
# and just isn't as accurate)
|
|
if not client_etags:
|
|
try:
|
|
client_clock = IF_MODIFIED_SINCE.parse(environ)
|
|
if (client_clock is not None
|
|
and client_clock >= int(self.last_modified)):
|
|
# horribly inefficient, n^2 performance, yuck!
|
|
for head in list_headers(entity=True):
|
|
head.delete(headers)
|
|
start_response('304 Not Modified', headers)
|
|
return [b''] # empty body
|
|
except HTTPBadRequest as exce:
|
|
return exce.wsgi_application(environ, start_response)
|
|
|
|
(lower, upper) = (0, self.content_length - 1)
|
|
range = RANGE.parse(environ)
|
|
if range and 'bytes' == range[0] and 1 == len(range[1]):
|
|
(lower, upper) = range[1][0]
|
|
upper = upper or (self.content_length - 1)
|
|
if upper >= self.content_length or lower > upper:
|
|
return HTTPRequestRangeNotSatisfiable((
|
|
"Range request was made beyond the end of the content,\r\n"
|
|
"which is %s long.\r\n Range: %s\r\n") % (
|
|
self.content_length, RANGE(environ))
|
|
).wsgi_application(environ, start_response)
|
|
|
|
content_length = upper - lower + 1
|
|
CONTENT_RANGE.update(headers, first_byte=lower, last_byte=upper,
|
|
total_length = self.content_length)
|
|
CONTENT_LENGTH.update(headers, content_length)
|
|
if range or content_length != self.content_length:
|
|
start_response('206 Partial Content', headers)
|
|
else:
|
|
start_response('200 OK', headers)
|
|
if self.content is not None:
|
|
return [self.content[lower:upper+1]]
|
|
return (lower, content_length)
|
|
|
|
class FileApp(DataApp):
|
|
"""
|
|
Returns an application that will send the file at the given
|
|
filename. Adds a mime type based on ``mimetypes.guess_type()``.
|
|
See DataApp for the arguments beyond ``filename``.
|
|
"""
|
|
|
|
def __init__(self, filename, headers=None, **kwargs):
|
|
self.filename = filename
|
|
content_type, content_encoding = self.guess_type()
|
|
if content_type and 'content_type' not in kwargs:
|
|
kwargs['content_type'] = content_type
|
|
if content_encoding and 'content_encoding' not in kwargs:
|
|
kwargs['content_encoding'] = content_encoding
|
|
DataApp.__init__(self, None, headers, **kwargs)
|
|
|
|
def guess_type(self):
|
|
return mimetypes.guess_type(self.filename)
|
|
|
|
def update(self, force=False):
|
|
stat = os.stat(self.filename)
|
|
if not force and stat.st_mtime == self.last_modified:
|
|
return
|
|
self.last_modified = stat.st_mtime
|
|
if stat.st_size < CACHE_SIZE:
|
|
fh = open(self.filename,"rb")
|
|
self.set_content(fh.read(), stat.st_mtime)
|
|
fh.close()
|
|
else:
|
|
self.content = None
|
|
self.content_length = stat.st_size
|
|
# This is updated automatically if self.set_content() is
|
|
# called
|
|
LAST_MODIFIED.update(self.headers, time=self.last_modified)
|
|
|
|
def get(self, environ, start_response):
|
|
is_head = environ['REQUEST_METHOD'].upper() == 'HEAD'
|
|
if 'max-age=0' in CACHE_CONTROL(environ).lower():
|
|
self.update(force=True) # RFC 2616 13.2.6
|
|
else:
|
|
self.update()
|
|
if not self.content:
|
|
if not os.path.exists(self.filename):
|
|
exc = HTTPNotFound(
|
|
'The resource does not exist',
|
|
comment="No file at %r" % self.filename)
|
|
return exc(environ, start_response)
|
|
try:
|
|
file = open(self.filename, 'rb')
|
|
except (IOError, OSError) as e:
|
|
exc = HTTPForbidden(
|
|
'You are not permitted to view this file (%s)' % e)
|
|
return exc.wsgi_application(
|
|
environ, start_response)
|
|
retval = DataApp.get(self, environ, start_response)
|
|
if isinstance(retval, list):
|
|
# cached content, exception, or not-modified
|
|
if is_head:
|
|
return [b'']
|
|
return retval
|
|
(lower, content_length) = retval
|
|
if is_head:
|
|
return [b'']
|
|
file.seek(lower)
|
|
file_wrapper = environ.get('wsgi.file_wrapper', None)
|
|
if file_wrapper:
|
|
return file_wrapper(file, BLOCK_SIZE)
|
|
else:
|
|
return _FileIter(file, size=content_length)
|
|
|
|
class _FileIter(object):
|
|
|
|
def __init__(self, file, block_size=None, size=None):
|
|
self.file = file
|
|
self.size = size
|
|
self.block_size = block_size or BLOCK_SIZE
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def next(self):
|
|
chunk_size = self.block_size
|
|
if self.size is not None:
|
|
if chunk_size > self.size:
|
|
chunk_size = self.size
|
|
self.size -= chunk_size
|
|
data = self.file.read(chunk_size)
|
|
if not data:
|
|
raise StopIteration
|
|
return data
|
|
__next__ = next
|
|
|
|
def close(self):
|
|
self.file.close()
|
|
|
|
|
|
class DirectoryApp(object):
|
|
"""
|
|
Returns an application that dispatches requests to corresponding FileApps based on PATH_INFO.
|
|
FileApp instances are cached. This app makes sure not to serve any files that are not in a subdirectory.
|
|
To customize FileApp creation override ``DirectoryApp.make_fileapp``
|
|
"""
|
|
|
|
def __init__(self, path):
|
|
self.path = os.path.abspath(path)
|
|
if not self.path.endswith(os.path.sep):
|
|
self.path += os.path.sep
|
|
assert os.path.isdir(self.path)
|
|
self.cached_apps = {}
|
|
|
|
make_fileapp = FileApp
|
|
|
|
def __call__(self, environ, start_response):
|
|
path_info = environ['PATH_INFO']
|
|
app = self.cached_apps.get(path_info)
|
|
if app is None:
|
|
path = os.path.join(self.path, path_info.lstrip('/'))
|
|
if not os.path.normpath(path).startswith(self.path):
|
|
app = HTTPForbidden()
|
|
elif os.path.isfile(path):
|
|
app = self.make_fileapp(path)
|
|
self.cached_apps[path_info] = app
|
|
else:
|
|
app = HTTPNotFound(comment=path)
|
|
return app(environ, start_response)
|
|
|
|
|
|
class ArchiveStore(object):
|
|
"""
|
|
Returns an application that serves up a DataApp for items requested
|
|
in a given zip or tar archive.
|
|
|
|
Constructor Arguments:
|
|
|
|
``filepath`` the path to the archive being served
|
|
|
|
``cache_control()``
|
|
|
|
This method provides validated construction of the ``Cache-Control``
|
|
header as well as providing for automated filling out of the
|
|
``EXPIRES`` header for HTTP/1.0 clients.
|
|
"""
|
|
|
|
def __init__(self, filepath):
|
|
if zipfile.is_zipfile(filepath):
|
|
self.archive = zipfile.ZipFile(filepath,"r")
|
|
elif tarfile.is_tarfile(filepath):
|
|
self.archive = tarfile.TarFileCompat(filepath,"r")
|
|
else:
|
|
raise AssertionError("filepath '%s' is not a zip or tar " % filepath)
|
|
self.expires = None
|
|
self.last_modified = time.time()
|
|
self.cache = {}
|
|
|
|
def cache_control(self, **kwargs):
|
|
self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None
|
|
return self
|
|
|
|
def __call__(self, environ, start_response):
|
|
path = environ.get("PATH_INFO","")
|
|
if path.startswith("/"):
|
|
path = path[1:]
|
|
application = self.cache.get(path)
|
|
if application:
|
|
return application(environ, start_response)
|
|
try:
|
|
info = self.archive.getinfo(path)
|
|
except KeyError:
|
|
exc = HTTPNotFound("The file requested, '%s', was not found." % path)
|
|
return exc.wsgi_application(environ, start_response)
|
|
if info.filename.endswith("/"):
|
|
exc = HTTPNotFound("Path requested, '%s', is not a file." % path)
|
|
return exc.wsgi_application(environ, start_response)
|
|
content_type, content_encoding = mimetypes.guess_type(info.filename)
|
|
# 'None' is not a valid content-encoding, so don't set the header if
|
|
# mimetypes.guess_type returns None
|
|
if content_encoding is not None:
|
|
app = DataApp(None, content_type = content_type,
|
|
content_encoding = content_encoding)
|
|
else:
|
|
app = DataApp(None, content_type = content_type)
|
|
app.set_content(self.archive.read(path),
|
|
time.mktime(info.date_time + (0,0,0)))
|
|
self.cache[path] = app
|
|
app.expires = self.expires
|
|
return app(environ, start_response)
|
|
|