From 5ac3a8a1214dd7d801e8f2928d0ee45adbcd72a0 Mon Sep 17 00:00:00 2001 From: Ethan Dalool Date: Sun, 22 Jul 2018 20:18:06 -0700 Subject: [PATCH] Add caching.cached_endpoint decorator for 304'ing any url. --- .../etiquette_flask/caching.py | 141 +++++++++++++----- .../etiquette_flask/etiquette_flask/common.py | 11 +- 2 files changed, 111 insertions(+), 41 deletions(-) diff --git a/frontends/etiquette_flask/etiquette_flask/caching.py b/frontends/etiquette_flask/etiquette_flask/caching.py index b1f685b..2696f89 100644 --- a/frontends/etiquette_flask/etiquette_flask/caching.py +++ b/frontends/etiquette_flask/etiquette_flask/caching.py @@ -1,23 +1,6 @@ -''' -This file provides the FileCacheManager to serve ETag and Cache-Control headers -for files on disk. - -We consider the following cases: - -Client does not have the file (or has disabled their cache, effectively same): - Server sends file, provides ETag, and tells client to save it for max-age. - -Client has the file, but it has been a long time, beyond the max-age: - Client provides the old ETag. If it's still valid, Server responds with - 304 Not Modified and no data. Client keeps the file. - -Client has the file, and it is within the max-age: - Client does not make a request at all. - -This FileCacheManager uses the file's MD5 hash as the ETag, and will only -recalculate it if the file's mtime has changed since the last request. -''' - +import flask; from flask import request +import functools +import hashlib import time import etiquette @@ -25,42 +8,130 @@ import etiquette from voussoirkit import cacheclass from voussoirkit import pathclass + +def cached_endpoint(max_age): + ''' + The cached_endpoint decorator can be used on slow endpoints that don't need + to be constantly updated or endpoints that produce large, static responses. + + WARNING: The return value of the endpoint is shared with all users. + You should never use this cache on an endpoint that provides private + or personalized data, and you should not try to pass other headers through + the response. + + When the function is run, its return value is stored and a random etag is + generated so that subsequent runs can respond with 304. This way, large + response bodies do not need to be transmitted often. + + Given a nonzero max_age, the endpoint will only be run once per max_age + seconds on a global basis (not per-user). This way, you can prevent a slow + function from being run very often. In-between requests will just receive + the previous return value (still using 200 or 304 as appropriate for the + client's provided etag). + + An example use case would be large-sized data dumps that don't need to be + precisely up to date every time. + ''' + state = { + 'max_age': max_age, + 'stored_value': None, + 'stored_etag': None, + 'headers': {'ETag': None, 'Cache-Control': f'max-age={max_age}'}, + 'last_run': 0, + } + + def wrapper(function): + @functools.wraps(function) + def wrapped(*args, **kwargs): + if (not state['max_age']) or (time.time() - state['last_run'] > state['max_age']): + value = function(*args, **kwargs) + if isinstance(value, flask.Response): + value = value.response + if value != state['stored_value']: + state['stored_value'] = value + state['stored_etag'] = etiquette.helpers.random_hex(20) + state['headers']['ETag'] = state['stored_etag'] + state['last_run'] = time.time() + else: + value = state['stored_value'] + + client_etag = request.headers.get('If-None-Match', None) + if client_etag == state['stored_etag']: + response = flask.Response(status=304, headers=state['headers']) + else: + response = flask.Response(value, status=200, headers=state['headers']) + + return response + return wrapped + return wrapper + + class FileCacheManager: - def __init__(self, maxlen, max_filesize, max_age): + ''' + The FileCacheManager serves ETag and Cache-Control headers for disk files. + + We consider the following cases: + + Client does not have the file (or has disabled their cache): + Server sends file, provides ETag, tells client to save it for max-age. + + Client has the file, but it has been a long time, beyond the max-age: + Client provides the old ETag. If it's still valid, Server responds with + 304 Not Modified and no data. Client keeps the old file. + + Client has the file, and it is within the max-age: + Client does not make a request at all. + + We use the file's MD5 hash as the ETag, and will only recalculate it if the + file's mtime has changed since the last request. + ''' + def __init__(self, maxlen, max_age, max_filesize): self.cache = cacheclass.Cache(maxlen=maxlen) - self.max_filesize = int(max_filesize) self.max_age = int(max_age) + self.max_filesize = max(int(max_filesize), 0) or None def get(self, filepath): - if (self.max_filesize is not None) and (filepath.size > self.max_filesize): - #print('I\'m not going to cache that!') - return None - try: return self.cache[filepath] except KeyError: pass + + if (self.max_filesize is not None) and (filepath.size > self.max_filesize): + return None + cache_file = CacheFile(filepath, max_age=self.max_age) self.cache[filepath] = cache_file return cache_file + def matches(self, request, filepath): + client_etag = request.headers.get('If-None-Match', None) + if client_etag is None: + return False + + server_value = self.get(filepath) + if server_value is None: + return False + + server_etag = server_value.get_etag() + if client_etag != server_etag: + return False + + return server_value.get_headers() + + class CacheFile: def __init__(self, filepath, max_age): self.filepath = filepath - self.max_age = max_age + self.max_age = int(max_age) self._stored_hash_time = None self._stored_hash_value = None def get_etag(self): - if self._stored_hash_value is None: - refresh = True - elif self.filepath.stat.st_mtime > self._stored_hash_time: - refresh = True - else: - refresh = False + mtime = self.filepath.stat.st_mtime + do_refresh = (self._stored_hash_value is None) or (mtime > self._stored_hash_time) - if refresh: - self._stored_hash_time = self.filepath.stat.st_mtime + if do_refresh: + self._stored_hash_time = mtime self._stored_hash_value = etiquette.helpers.hash_file_md5(self.filepath) return self._stored_hash_value diff --git a/frontends/etiquette_flask/etiquette_flask/common.py b/frontends/etiquette_flask/etiquette_flask/common.py index 8d2123f..dcd4c7b 100644 --- a/frontends/etiquette_flask/etiquette_flask/common.py +++ b/frontends/etiquette_flask/etiquette_flask/common.py @@ -120,12 +120,10 @@ def send_file(filepath, override_mimetype=None): if not filepath.is_file: flask.abort(404) - cache_file = file_cache_manager.get(filepath) - if cache_file is not None: - client_etag = request.headers.get('If-None-Match', None) - if client_etag and client_etag == cache_file.get_etag(): - response = flask.Response(status=304, headers=cache_file.get_headers()) - return response + headers = file_cache_manager.matches(request=request, filepath=filepath) + if headers: + response = flask.Response(status=304, headers=headers) + return response outgoing_headers = {} if override_mimetype is not None: @@ -174,6 +172,7 @@ def send_file(filepath, override_mimetype=None): outgoing_headers['Accept-Ranges'] = 'bytes' outgoing_headers['Content-Length'] = (range_max - range_min) + 1 + cache_file = file_cache_manager.get(filepath) if cache_file is not None: outgoing_headers.update(cache_file.get_headers())