Move and rename LocalClientBucket to something more fitting.
This commit is contained in:
parent
0a5b4394ed
commit
7d783c6903
|
@ -3,6 +3,7 @@ source = relstorage
|
||||||
omit =
|
omit =
|
||||||
relstorage/tests/*
|
relstorage/tests/*
|
||||||
relstorage/adapters/tests/*
|
relstorage/adapters/tests/*
|
||||||
|
relstorage/cache/tests/*
|
||||||
# Omit the main oracle adapter file since we don't test it at all
|
# Omit the main oracle adapter file since we don't test it at all
|
||||||
relstorage/adapters/oracle.py
|
relstorage/adapters/oracle.py
|
||||||
|
|
||||||
|
|
|
@ -718,358 +718,7 @@ class StorageCache(object):
|
||||||
log.debug("Checkpoints already shifted to %s. "
|
log.debug("Checkpoints already shifted to %s. "
|
||||||
"len(delta_after0) == %d.", old_value, len(self.delta_after0))
|
"len(delta_after0) == %d.", old_value, len(self.delta_after0))
|
||||||
|
|
||||||
_OSA = object.__setattr__
|
from .mapping import SizedLRUMapping as LocalClientBucket
|
||||||
|
|
||||||
from .lru import SizedLRU
|
|
||||||
from .lru import ProtectedLRU
|
|
||||||
from .lru import ProbationLRU
|
|
||||||
from .lru import EdenLRU
|
|
||||||
|
|
||||||
class LocalClientBucket(object):
|
|
||||||
"""
|
|
||||||
A map that keeps a record of its approx. size.
|
|
||||||
|
|
||||||
keys must be `str`` and values must be byte strings.
|
|
||||||
|
|
||||||
This class is not threadsafe, accesses to __setitem__ and get_and_bubble_all
|
|
||||||
must be protected by a lock.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# What multiplier of the number of items in the cache do we apply
|
|
||||||
# to determine when to age the frequencies?
|
|
||||||
_age_factor = 10
|
|
||||||
|
|
||||||
# When did we last age?
|
|
||||||
_aged_at = 0
|
|
||||||
|
|
||||||
# Percentage of our byte limit that should be dedicated
|
|
||||||
# to the main "protected" generation
|
|
||||||
_gen_protected_pct = 0.8
|
|
||||||
# Percentage of our byte limit that should be dedicated
|
|
||||||
# to the initial "eden" generation
|
|
||||||
_gen_eden_pct = 0.1
|
|
||||||
# Percentage of our byte limit that should be dedicated
|
|
||||||
# to the "probationary"generation
|
|
||||||
_gen_probation_pct = 0.1
|
|
||||||
# By default these numbers add up to 1.0, but it would be possible to
|
|
||||||
# overcommit by making them sum to more than 1.0. (For very small
|
|
||||||
# limits, the rounding will also make them overcommit).
|
|
||||||
|
|
||||||
def __init__(self, limit):
|
|
||||||
# We experimented with using OOBTree and LOBTree
|
|
||||||
# for the type of self._dict. The OOBTree has a similar
|
|
||||||
# but slightly slower performance profile (as would be expected
|
|
||||||
# given the big-O complexity) as a dict, but very large ones can't
|
|
||||||
# be pickled in a single shot! The LOBTree works faster and uses less
|
|
||||||
# memory than the OOBTree or the dict *if* all the keys are integers;
|
|
||||||
# which they currently are not. Plus the LOBTrees are slower on PyPy than its
|
|
||||||
# own dict specializations. We were hoping to be able to write faster pickles with
|
|
||||||
# large BTrees, but since that's not the case, we abandoned the idea.
|
|
||||||
|
|
||||||
# This holds all the ring entries, no matter which ring they are in.
|
|
||||||
self._dict = {}
|
|
||||||
|
|
||||||
|
|
||||||
self._protected = ProtectedLRU(int(limit * self._gen_protected_pct))
|
|
||||||
self._probation = ProbationLRU(int(limit * self._gen_probation_pct),
|
|
||||||
self._protected,
|
|
||||||
self._dict)
|
|
||||||
self._eden = EdenLRU(int(limit * self._gen_eden_pct),
|
|
||||||
self._probation,
|
|
||||||
self._protected,
|
|
||||||
self._dict)
|
|
||||||
self._gens = [None, None, None, None] # 0 isn't used
|
|
||||||
for x in (self._protected, self._probation, self._eden):
|
|
||||||
self._gens[x.PARENT_CONST] = x
|
|
||||||
self._gens = tuple(self._gens)
|
|
||||||
self._hits = 0
|
|
||||||
self._misses = 0
|
|
||||||
self._sets = 0
|
|
||||||
self.limit = limit
|
|
||||||
self._next_age_at = 1000
|
|
||||||
|
|
||||||
@property
|
|
||||||
def size(self):
|
|
||||||
return self._eden.size + self._protected.size + self._probation.size
|
|
||||||
|
|
||||||
def reset_stats(self):
|
|
||||||
self._hits = 0
|
|
||||||
self._misses = 0
|
|
||||||
self._sets = 0
|
|
||||||
self._aged_at = 0
|
|
||||||
self._next_age_at = 0
|
|
||||||
|
|
||||||
def stats(self):
|
|
||||||
total = self._hits + self._misses
|
|
||||||
return {
|
|
||||||
'hits': self._hits,
|
|
||||||
'misses': self._misses,
|
|
||||||
'sets': self._sets,
|
|
||||||
'ratio': self._hits/total if total else 0,
|
|
||||||
'size': len(self._dict),
|
|
||||||
'bytes': self.size,
|
|
||||||
'eden_stats': self._eden.stats(),
|
|
||||||
'prot_stats': self._protected.stats(),
|
|
||||||
'prob_stats': self._probation.stats(),
|
|
||||||
}
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self._dict)
|
|
||||||
|
|
||||||
def _age(self):
|
|
||||||
# Age only when we're full and would thus need to evict; this
|
|
||||||
# makes initial population faster. It's cheaper to calculate this
|
|
||||||
# AFTER the operations, though, because we read it from C.
|
|
||||||
#if self.size < self.limit:
|
|
||||||
# return
|
|
||||||
|
|
||||||
# Age the whole thing periodically based on the number of
|
|
||||||
# operations we've done that would have altered popularity.
|
|
||||||
# Dynamically calculate how often we need to age. By default, this is
|
|
||||||
# based on what Caffeine's PerfectFrequency does: 10 * max
|
|
||||||
# cache entries
|
|
||||||
dct = self._dict
|
|
||||||
age_period = self._age_factor * len(dct)
|
|
||||||
operations = self._hits + self._sets
|
|
||||||
if operations - self._aged_at < age_period:
|
|
||||||
self._next_age_at = age_period
|
|
||||||
return
|
|
||||||
if self.size < self.limit:
|
|
||||||
return
|
|
||||||
|
|
||||||
self._aged_at = operations
|
|
||||||
now = time.time()
|
|
||||||
log.debug("Beginning frequency aging for %d cache entries",
|
|
||||||
len(dct))
|
|
||||||
SizedLRU.age_lists(self._eden, self._probation, self._protected)
|
|
||||||
done = time.time()
|
|
||||||
log.debug("Aged %d cache entries in %s", done - now)
|
|
||||||
|
|
||||||
self._next_age_at = int(self._aged_at * 1.5) # in case the dict shrinks
|
|
||||||
|
|
||||||
return self._aged_at
|
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
|
||||||
"""
|
|
||||||
Set an item.
|
|
||||||
|
|
||||||
If the memory limit would be exceeded, remove old items until
|
|
||||||
that is no longer the case.
|
|
||||||
|
|
||||||
If we need to age popularity counts, do so.
|
|
||||||
"""
|
|
||||||
# These types are gated by LocalClient, we don't need to double
|
|
||||||
# check.
|
|
||||||
#assert isinstance(key, str)
|
|
||||||
#assert isinstance(value, bytes)
|
|
||||||
|
|
||||||
dct = self._dict
|
|
||||||
|
|
||||||
if key in dct:
|
|
||||||
entry = dct[key]
|
|
||||||
self._gens[entry.cffi_ring_node.r_parent].update_MRU(entry, value)
|
|
||||||
else:
|
|
||||||
lru = self._eden
|
|
||||||
entry = lru.add_MRU(key, value)
|
|
||||||
dct[key] = entry
|
|
||||||
|
|
||||||
self._sets += 1
|
|
||||||
|
|
||||||
# Do we need to move this up above the eviction choices?
|
|
||||||
# Inline some of the logic about whether to age or not; avoiding the
|
|
||||||
# call helps speed
|
|
||||||
if self._hits + self._sets > self._next_age_at:
|
|
||||||
self._age()
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def __contains__(self, key):
|
|
||||||
return key in self._dict
|
|
||||||
|
|
||||||
def __delitem__(self, key):
|
|
||||||
entry = self._dict[key]
|
|
||||||
del self._dict[key]
|
|
||||||
self._gens[entry.cffi_ring_node.r_parent].remove(entry)
|
|
||||||
|
|
||||||
def get_and_bubble_all(self, keys):
|
|
||||||
dct = self._dict
|
|
||||||
gens = self._gens
|
|
||||||
res = {}
|
|
||||||
for key in keys:
|
|
||||||
entry = dct.get(key)
|
|
||||||
if entry is not None:
|
|
||||||
self._hits += 1
|
|
||||||
gens[entry.cffi_ring_node.r_parent].on_hit(entry)
|
|
||||||
res[key] = entry.value
|
|
||||||
else:
|
|
||||||
self._misses += 1
|
|
||||||
return res
|
|
||||||
|
|
||||||
def get(self, key):
|
|
||||||
# Testing only. Does not bubble or increment.
|
|
||||||
entry = self._dict.get(key)
|
|
||||||
if entry is not None:
|
|
||||||
return entry.value
|
|
||||||
|
|
||||||
def __getitem__(self, key):
|
|
||||||
# Testing only. Doesn't bubble.
|
|
||||||
entry = self._dict[key]
|
|
||||||
entry.frequency += 1
|
|
||||||
return entry.value
|
|
||||||
|
|
||||||
# Benchmark for the general approach:
|
|
||||||
|
|
||||||
# Pickle is about 3x faster than marshal if we write single large
|
|
||||||
# objects, surprisingly. If we stick to writing smaller objects, the
|
|
||||||
# difference narrows to almost negligible.
|
|
||||||
|
|
||||||
# Writing 525MB of data, 655K keys (no compression):
|
|
||||||
# - code as-of commit e58126a (the previous major optimizations for version 1 format)
|
|
||||||
# version 1 format, solid dict under 3.4: write: 3.8s/read 7.09s
|
|
||||||
# 2.68s to update ring, 2.6s to read pickle
|
|
||||||
#
|
|
||||||
# -in a btree under 3.4: write: 4.8s/read 8.2s
|
|
||||||
# written as single list of the items
|
|
||||||
# 3.1s to load the pickle, 2.6s to update the ring
|
|
||||||
#
|
|
||||||
# -in a dict under 3.4: write: 3.7s/read 7.6s
|
|
||||||
# written as the dict and updated into the dict
|
|
||||||
# 2.7s loading the pickle, 2.9s to update the dict
|
|
||||||
# - in a dict under 3.4: write: 3.0s/read 12.8s
|
|
||||||
# written by iterating the ring and writing one key/value pair
|
|
||||||
# at a time, so this is the only solution that
|
|
||||||
# automatically preserves the LRU property (and would be amenable to
|
|
||||||
# capping read based on time, and written file size); this format also lets us avoid the
|
|
||||||
# full write buffer for HIGHEST_PROTOCOL < 4
|
|
||||||
# 2.5s spent in pickle.load, 8.9s spent in __setitem__,5.7s in ring.add
|
|
||||||
# - in a dict: write 3.2/read 9.1s
|
|
||||||
# same as above, but custom code to set the items
|
|
||||||
# 1.9s in pickle.load, 4.3s in ring.add
|
|
||||||
# - same as above, but in a btree: write 2.76s/read 10.6
|
|
||||||
# 1.8s in pickle.load, 3.8s in ring.add,
|
|
||||||
#
|
|
||||||
# For the final version with optimizations, the write time is 2.3s/read is 6.4s
|
|
||||||
|
|
||||||
_FILE_VERSION = 4
|
|
||||||
|
|
||||||
def load_from_file(self, cache_file):
|
|
||||||
now = time.time()
|
|
||||||
# Unlike write_to_file, using the raw stream
|
|
||||||
# is fine for both Py 2 and 3.
|
|
||||||
unpick = Unpickler(cache_file)
|
|
||||||
|
|
||||||
# Local optimizations
|
|
||||||
load = unpick.load
|
|
||||||
|
|
||||||
version = load()
|
|
||||||
if version != self._FILE_VERSION: # pragma: no cover
|
|
||||||
raise ValueError("Incorrect version of cache_file")
|
|
||||||
|
|
||||||
entries_oldest_first = list()
|
|
||||||
entries_oldest_first_append = entries_oldest_first.append
|
|
||||||
try:
|
|
||||||
while 1:
|
|
||||||
entries_oldest_first_append(load())
|
|
||||||
except EOFError:
|
|
||||||
pass
|
|
||||||
count = len(entries_oldest_first)
|
|
||||||
|
|
||||||
def _insert_entries(entries):
|
|
||||||
stored = 0
|
|
||||||
|
|
||||||
# local optimizations
|
|
||||||
data = self._dict
|
|
||||||
main = self._protected
|
|
||||||
ring_add = main.add_MRU
|
|
||||||
limit = main.limit
|
|
||||||
|
|
||||||
# Need to reoptimize this.
|
|
||||||
# size = self.size # update locally, copy back at end
|
|
||||||
|
|
||||||
for k, v in entries:
|
|
||||||
if k in data:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if main.size >= limit:
|
|
||||||
break
|
|
||||||
|
|
||||||
data[k] = ring_add(k, v)
|
|
||||||
|
|
||||||
stored += 1
|
|
||||||
|
|
||||||
return stored
|
|
||||||
|
|
||||||
stored = 0
|
|
||||||
if not self._dict:
|
|
||||||
# Empty, so quickly take everything they give us,
|
|
||||||
# oldest first so that the result is actually LRU
|
|
||||||
stored = _insert_entries(entries_oldest_first)
|
|
||||||
else:
|
|
||||||
# Loading more data into an existing bucket.
|
|
||||||
# Load only the *new* keys, trying to get the newest ones
|
|
||||||
# because LRU is going to get messed up anyway.
|
|
||||||
|
|
||||||
entries_newest_first = reversed(entries_oldest_first)
|
|
||||||
stored = _insert_entries(entries_newest_first)
|
|
||||||
|
|
||||||
then = time.time()
|
|
||||||
log.info("Examined %d and stored %d items from %s in %s",
|
|
||||||
count, stored, cache_file, then - now)
|
|
||||||
return count, stored
|
|
||||||
|
|
||||||
def write_to_file(self, cache_file):
|
|
||||||
now = time.time()
|
|
||||||
# pickling the items is about 3x faster than marshal
|
|
||||||
|
|
||||||
|
|
||||||
# Under Python 2, (or generally, under any pickle protocol
|
|
||||||
# less than 4, when framing was introduced) whether we are
|
|
||||||
# writing to an io.BufferedWriter, a <file> opened by name or
|
|
||||||
# fd, with default buffer or a large (16K) buffer, putting the
|
|
||||||
# Pickler directly on top of that stream is SLOW for large
|
|
||||||
# singe objects. Writing a 512MB dict takes ~40-50seconds. If
|
|
||||||
# instead we use a BytesIO to buffer in memory, that time goes
|
|
||||||
# down to about 7s. However, since we switched to writing many
|
|
||||||
# smaller objects, that need goes away.
|
|
||||||
|
|
||||||
pickler = Pickler(cache_file, -1) # Highest protocol
|
|
||||||
dump = pickler.dump
|
|
||||||
|
|
||||||
dump(self._FILE_VERSION) # Version marker
|
|
||||||
|
|
||||||
# Dump all the entries in increasing order of popularity (
|
|
||||||
# so that when we read them back in the least popular items end up LRU).
|
|
||||||
# Anything with a popularity of 0 probably hasn't been accessed in a long
|
|
||||||
# time, so don't dump it.
|
|
||||||
|
|
||||||
# Age them now, writing only the most popular. (But don't age in place just
|
|
||||||
# in case we're still being used.)
|
|
||||||
|
|
||||||
entries = list(sorted((e for e in itervalues(self._dict) if e.frequency // 2),
|
|
||||||
key=lambda e: e.frequency))
|
|
||||||
|
|
||||||
if len(entries) < len(self._dict):
|
|
||||||
log.info("Ignoring %d items for writing due to inactivity",
|
|
||||||
len(self._dict) - len(entries))
|
|
||||||
|
|
||||||
# Don't bother writing more than we'll be able to store.
|
|
||||||
count_written = 0
|
|
||||||
bytes_written = 0
|
|
||||||
byte_limit = self._protected.limit
|
|
||||||
for entry in entries:
|
|
||||||
bytes_written += entry.len
|
|
||||||
count_written += 1
|
|
||||||
if bytes_written > byte_limit:
|
|
||||||
break
|
|
||||||
|
|
||||||
dump((entry.key, entry.value))
|
|
||||||
|
|
||||||
then = time.time()
|
|
||||||
stats = self.stats()
|
|
||||||
log.info("Wrote %d items to %s in %s. Total hits %s; misses %s; ratio %s",
|
|
||||||
count_written, cache_file, then - now,
|
|
||||||
stats['hits'], stats['misses'], stats['ratio'])
|
|
||||||
|
|
||||||
|
|
||||||
class LocalClient(object):
|
class LocalClient(object):
|
||||||
"""A memcache-like object that stores in Python dictionaries."""
|
"""A memcache-like object that stores in Python dictionaries."""
|
||||||
|
|
|
@ -0,0 +1,394 @@
|
||||||
|
##############################################################################
|
||||||
|
#
|
||||||
|
# Copyright (c) 2009 Zope Foundation and Contributors.
|
||||||
|
# All Rights Reserved.
|
||||||
|
#
|
||||||
|
# This software is subject to the provisions of the Zope Public License,
|
||||||
|
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
|
||||||
|
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
|
||||||
|
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
|
||||||
|
# FOR A PARTICULAR PURPOSE.
|
||||||
|
#
|
||||||
|
##############################################################################
|
||||||
|
from __future__ import absolute_import, print_function, division
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
from relstorage._compat import itervalues
|
||||||
|
from relstorage._compat import PY3
|
||||||
|
if PY3:
|
||||||
|
# On Py3, use the built-in pickle, so that we can get
|
||||||
|
# protocol 4 when available. It is *much* faster at writing out
|
||||||
|
# individual large objects such as the cache dict (about 3-4x faster)
|
||||||
|
from pickle import Unpickler
|
||||||
|
from pickle import Pickler
|
||||||
|
else:
|
||||||
|
# On Py2, zodbpickle gives us protocol 3, but we don't
|
||||||
|
# use its special binary type
|
||||||
|
from relstorage._compat import Unpickler
|
||||||
|
from relstorage._compat import Pickler
|
||||||
|
|
||||||
|
|
||||||
|
from .lru import SizedLRU
|
||||||
|
from .lru import ProtectedLRU
|
||||||
|
from .lru import ProbationLRU
|
||||||
|
from .lru import EdenLRU
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class SizedLRUMapping(object):
|
||||||
|
"""
|
||||||
|
A map that keeps a record of its approx. size.
|
||||||
|
|
||||||
|
keys must be `str`` and values must be byte strings.
|
||||||
|
|
||||||
|
This class is not threadsafe, accesses to __setitem__ and get_and_bubble_all
|
||||||
|
must be protected by a lock.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# What multiplier of the number of items in the cache do we apply
|
||||||
|
# to determine when to age the frequencies?
|
||||||
|
_age_factor = 10
|
||||||
|
|
||||||
|
# When did we last age?
|
||||||
|
_aged_at = 0
|
||||||
|
|
||||||
|
# Percentage of our byte limit that should be dedicated
|
||||||
|
# to the main "protected" generation
|
||||||
|
_gen_protected_pct = 0.8
|
||||||
|
# Percentage of our byte limit that should be dedicated
|
||||||
|
# to the initial "eden" generation
|
||||||
|
_gen_eden_pct = 0.1
|
||||||
|
# Percentage of our byte limit that should be dedicated
|
||||||
|
# to the "probationary"generation
|
||||||
|
_gen_probation_pct = 0.1
|
||||||
|
# By default these numbers add up to 1.0, but it would be possible to
|
||||||
|
# overcommit by making them sum to more than 1.0. (For very small
|
||||||
|
# limits, the rounding will also make them overcommit).
|
||||||
|
|
||||||
|
def __init__(self, limit):
|
||||||
|
# We experimented with using OOBTree and LOBTree
|
||||||
|
# for the type of self._dict. The OOBTree has a similar
|
||||||
|
# but slightly slower performance profile (as would be expected
|
||||||
|
# given the big-O complexity) as a dict, but very large ones can't
|
||||||
|
# be pickled in a single shot! The LOBTree works faster and uses less
|
||||||
|
# memory than the OOBTree or the dict *if* all the keys are integers;
|
||||||
|
# which they currently are not. Plus the LOBTrees are slower on PyPy than its
|
||||||
|
# own dict specializations. We were hoping to be able to write faster pickles with
|
||||||
|
# large BTrees, but since that's not the case, we abandoned the idea.
|
||||||
|
|
||||||
|
# This holds all the ring entries, no matter which ring they are in.
|
||||||
|
self._dict = {}
|
||||||
|
|
||||||
|
|
||||||
|
self._protected = ProtectedLRU(int(limit * self._gen_protected_pct))
|
||||||
|
self._probation = ProbationLRU(int(limit * self._gen_probation_pct),
|
||||||
|
self._protected,
|
||||||
|
self._dict)
|
||||||
|
self._eden = EdenLRU(int(limit * self._gen_eden_pct),
|
||||||
|
self._probation,
|
||||||
|
self._protected,
|
||||||
|
self._dict)
|
||||||
|
self._gens = [None, None, None, None] # 0 isn't used
|
||||||
|
for x in (self._protected, self._probation, self._eden):
|
||||||
|
self._gens[x.PARENT_CONST] = x
|
||||||
|
self._gens = tuple(self._gens)
|
||||||
|
self._hits = 0
|
||||||
|
self._misses = 0
|
||||||
|
self._sets = 0
|
||||||
|
self.limit = limit
|
||||||
|
self._next_age_at = 1000
|
||||||
|
|
||||||
|
@property
|
||||||
|
def size(self):
|
||||||
|
return self._eden.size + self._protected.size + self._probation.size
|
||||||
|
|
||||||
|
def reset_stats(self):
|
||||||
|
self._hits = 0
|
||||||
|
self._misses = 0
|
||||||
|
self._sets = 0
|
||||||
|
self._aged_at = 0
|
||||||
|
self._next_age_at = 0
|
||||||
|
|
||||||
|
def stats(self):
|
||||||
|
total = self._hits + self._misses
|
||||||
|
return {
|
||||||
|
'hits': self._hits,
|
||||||
|
'misses': self._misses,
|
||||||
|
'sets': self._sets,
|
||||||
|
'ratio': self._hits/total if total else 0,
|
||||||
|
'size': len(self._dict),
|
||||||
|
'bytes': self.size,
|
||||||
|
'eden_stats': self._eden.stats(),
|
||||||
|
'prot_stats': self._protected.stats(),
|
||||||
|
'prob_stats': self._probation.stats(),
|
||||||
|
}
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._dict)
|
||||||
|
|
||||||
|
def _age(self):
|
||||||
|
# Age only when we're full and would thus need to evict; this
|
||||||
|
# makes initial population faster. It's cheaper to calculate this
|
||||||
|
# AFTER the operations, though, because we read it from C.
|
||||||
|
#if self.size < self.limit:
|
||||||
|
# return
|
||||||
|
|
||||||
|
# Age the whole thing periodically based on the number of
|
||||||
|
# operations we've done that would have altered popularity.
|
||||||
|
# Dynamically calculate how often we need to age. By default, this is
|
||||||
|
# based on what Caffeine's PerfectFrequency does: 10 * max
|
||||||
|
# cache entries
|
||||||
|
dct = self._dict
|
||||||
|
age_period = self._age_factor * len(dct)
|
||||||
|
operations = self._hits + self._sets
|
||||||
|
if operations - self._aged_at < age_period:
|
||||||
|
self._next_age_at = age_period
|
||||||
|
return
|
||||||
|
if self.size < self.limit:
|
||||||
|
return
|
||||||
|
|
||||||
|
self._aged_at = operations
|
||||||
|
now = time.time()
|
||||||
|
log.debug("Beginning frequency aging for %d cache entries",
|
||||||
|
len(dct))
|
||||||
|
SizedLRU.age_lists(self._eden, self._probation, self._protected)
|
||||||
|
done = time.time()
|
||||||
|
log.debug("Aged %d cache entries in %s", done - now)
|
||||||
|
|
||||||
|
self._next_age_at = int(self._aged_at * 1.5) # in case the dict shrinks
|
||||||
|
|
||||||
|
return self._aged_at
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
"""
|
||||||
|
Set an item.
|
||||||
|
|
||||||
|
If the memory limit would be exceeded, remove old items until
|
||||||
|
that is no longer the case.
|
||||||
|
|
||||||
|
If we need to age popularity counts, do so.
|
||||||
|
"""
|
||||||
|
# These types are gated by LocalClient, we don't need to double
|
||||||
|
# check.
|
||||||
|
#assert isinstance(key, str)
|
||||||
|
#assert isinstance(value, bytes)
|
||||||
|
|
||||||
|
dct = self._dict
|
||||||
|
|
||||||
|
if key in dct:
|
||||||
|
entry = dct[key]
|
||||||
|
self._gens[entry.cffi_ring_node.r_parent].update_MRU(entry, value)
|
||||||
|
else:
|
||||||
|
lru = self._eden
|
||||||
|
entry = lru.add_MRU(key, value)
|
||||||
|
dct[key] = entry
|
||||||
|
|
||||||
|
self._sets += 1
|
||||||
|
|
||||||
|
# Do we need to move this up above the eviction choices?
|
||||||
|
# Inline some of the logic about whether to age or not; avoiding the
|
||||||
|
# call helps speed
|
||||||
|
if self._hits + self._sets > self._next_age_at:
|
||||||
|
self._age()
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def __contains__(self, key):
|
||||||
|
return key in self._dict
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
entry = self._dict[key]
|
||||||
|
del self._dict[key]
|
||||||
|
self._gens[entry.cffi_ring_node.r_parent].remove(entry)
|
||||||
|
|
||||||
|
def get_and_bubble_all(self, keys):
|
||||||
|
dct = self._dict
|
||||||
|
gens = self._gens
|
||||||
|
res = {}
|
||||||
|
for key in keys:
|
||||||
|
entry = dct.get(key)
|
||||||
|
if entry is not None:
|
||||||
|
self._hits += 1
|
||||||
|
gens[entry.cffi_ring_node.r_parent].on_hit(entry)
|
||||||
|
res[key] = entry.value
|
||||||
|
else:
|
||||||
|
self._misses += 1
|
||||||
|
return res
|
||||||
|
|
||||||
|
def get(self, key):
|
||||||
|
# Testing only. Does not bubble or increment.
|
||||||
|
entry = self._dict.get(key)
|
||||||
|
if entry is not None:
|
||||||
|
return entry.value
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
# Testing only. Doesn't bubble.
|
||||||
|
entry = self._dict[key]
|
||||||
|
entry.frequency += 1
|
||||||
|
return entry.value
|
||||||
|
|
||||||
|
# Benchmark for the general approach:
|
||||||
|
|
||||||
|
# Pickle is about 3x faster than marshal if we write single large
|
||||||
|
# objects, surprisingly. If we stick to writing smaller objects, the
|
||||||
|
# difference narrows to almost negligible.
|
||||||
|
|
||||||
|
# Writing 525MB of data, 655K keys (no compression):
|
||||||
|
# - code as-of commit e58126a (the previous major optimizations for version 1 format)
|
||||||
|
# version 1 format, solid dict under 3.4: write: 3.8s/read 7.09s
|
||||||
|
# 2.68s to update ring, 2.6s to read pickle
|
||||||
|
#
|
||||||
|
# -in a btree under 3.4: write: 4.8s/read 8.2s
|
||||||
|
# written as single list of the items
|
||||||
|
# 3.1s to load the pickle, 2.6s to update the ring
|
||||||
|
#
|
||||||
|
# -in a dict under 3.4: write: 3.7s/read 7.6s
|
||||||
|
# written as the dict and updated into the dict
|
||||||
|
# 2.7s loading the pickle, 2.9s to update the dict
|
||||||
|
# - in a dict under 3.4: write: 3.0s/read 12.8s
|
||||||
|
# written by iterating the ring and writing one key/value pair
|
||||||
|
# at a time, so this is the only solution that
|
||||||
|
# automatically preserves the LRU property (and would be amenable to
|
||||||
|
# capping read based on time, and written file size); this format also lets us avoid the
|
||||||
|
# full write buffer for HIGHEST_PROTOCOL < 4
|
||||||
|
# 2.5s spent in pickle.load, 8.9s spent in __setitem__,5.7s in ring.add
|
||||||
|
# - in a dict: write 3.2/read 9.1s
|
||||||
|
# same as above, but custom code to set the items
|
||||||
|
# 1.9s in pickle.load, 4.3s in ring.add
|
||||||
|
# - same as above, but in a btree: write 2.76s/read 10.6
|
||||||
|
# 1.8s in pickle.load, 3.8s in ring.add,
|
||||||
|
#
|
||||||
|
# For the final version with optimizations, the write time is 2.3s/read is 6.4s
|
||||||
|
|
||||||
|
_FILE_VERSION = 4
|
||||||
|
|
||||||
|
def load_from_file(self, cache_file):
|
||||||
|
now = time.time()
|
||||||
|
# Unlike write_to_file, using the raw stream
|
||||||
|
# is fine for both Py 2 and 3.
|
||||||
|
unpick = Unpickler(cache_file)
|
||||||
|
|
||||||
|
# Local optimizations
|
||||||
|
load = unpick.load
|
||||||
|
|
||||||
|
version = load()
|
||||||
|
if version != self._FILE_VERSION: # pragma: no cover
|
||||||
|
raise ValueError("Incorrect version of cache_file")
|
||||||
|
|
||||||
|
entries_oldest_first = list()
|
||||||
|
entries_oldest_first_append = entries_oldest_first.append
|
||||||
|
try:
|
||||||
|
while 1:
|
||||||
|
entries_oldest_first_append(load())
|
||||||
|
except EOFError:
|
||||||
|
pass
|
||||||
|
count = len(entries_oldest_first)
|
||||||
|
|
||||||
|
def _insert_entries(entries):
|
||||||
|
stored = 0
|
||||||
|
|
||||||
|
# local optimizations
|
||||||
|
data = self._dict
|
||||||
|
main = self._protected
|
||||||
|
ring_add = main.add_MRU
|
||||||
|
limit = main.limit
|
||||||
|
|
||||||
|
# Need to reoptimize this.
|
||||||
|
# size = self.size # update locally, copy back at end
|
||||||
|
|
||||||
|
for k, v in entries:
|
||||||
|
if k in data:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if main.size >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
data[k] = ring_add(k, v)
|
||||||
|
|
||||||
|
stored += 1
|
||||||
|
|
||||||
|
return stored
|
||||||
|
|
||||||
|
stored = 0
|
||||||
|
if not self._dict:
|
||||||
|
# Empty, so quickly take everything they give us,
|
||||||
|
# oldest first so that the result is actually LRU
|
||||||
|
stored = _insert_entries(entries_oldest_first)
|
||||||
|
else:
|
||||||
|
# Loading more data into an existing bucket.
|
||||||
|
# Load only the *new* keys, trying to get the newest ones
|
||||||
|
# because LRU is going to get messed up anyway.
|
||||||
|
|
||||||
|
entries_newest_first = reversed(entries_oldest_first)
|
||||||
|
stored = _insert_entries(entries_newest_first)
|
||||||
|
|
||||||
|
then = time.time()
|
||||||
|
log.info("Examined %d and stored %d items from %s in %s",
|
||||||
|
count, stored, cache_file, then - now)
|
||||||
|
return count, stored
|
||||||
|
|
||||||
|
def write_to_file(self, cache_file):
|
||||||
|
now = time.time()
|
||||||
|
# pickling the items is about 3x faster than marshal
|
||||||
|
|
||||||
|
|
||||||
|
# Under Python 2, (or generally, under any pickle protocol
|
||||||
|
# less than 4, when framing was introduced) whether we are
|
||||||
|
# writing to an io.BufferedWriter, a <file> opened by name or
|
||||||
|
# fd, with default buffer or a large (16K) buffer, putting the
|
||||||
|
# Pickler directly on top of that stream is SLOW for large
|
||||||
|
# singe objects. Writing a 512MB dict takes ~40-50seconds. If
|
||||||
|
# instead we use a BytesIO to buffer in memory, that time goes
|
||||||
|
# down to about 7s. However, since we switched to writing many
|
||||||
|
# smaller objects, that need goes away.
|
||||||
|
|
||||||
|
pickler = Pickler(cache_file, -1) # Highest protocol
|
||||||
|
dump = pickler.dump
|
||||||
|
|
||||||
|
dump(self._FILE_VERSION) # Version marker
|
||||||
|
|
||||||
|
# Dump all the entries in increasing order of popularity (
|
||||||
|
# so that when we read them back in the least popular items end up LRU).
|
||||||
|
# Anything with a popularity of 0 probably hasn't been accessed in a long
|
||||||
|
# time, so don't dump it.
|
||||||
|
|
||||||
|
# Age them now, writing only the most popular. (But don't age in place just
|
||||||
|
# in case we're still being used.)
|
||||||
|
|
||||||
|
# XXX: Together with only writing what will fit in the protected space,
|
||||||
|
# is this optimal? One of the goals is to speed up startup, which may access
|
||||||
|
# objects that are never or rarely used again. They'll tend to wind up in
|
||||||
|
# the probation space over time, or at least have a very low frequency.
|
||||||
|
# Maybe we shouldn't prevent writing aged items, and maybe we should fill up
|
||||||
|
# probation and eden too. We probably want to allow the user to specify
|
||||||
|
# a size limit at this point.
|
||||||
|
|
||||||
|
entries = list(sorted((e for e in itervalues(self._dict) if e.frequency // 2),
|
||||||
|
key=lambda e: e.frequency))
|
||||||
|
|
||||||
|
if len(entries) < len(self._dict):
|
||||||
|
log.info("Ignoring %d items for writing due to inactivity",
|
||||||
|
len(self._dict) - len(entries))
|
||||||
|
|
||||||
|
# Don't bother writing more than we'll be able to store.
|
||||||
|
count_written = 0
|
||||||
|
bytes_written = 0
|
||||||
|
byte_limit = self._protected.limit
|
||||||
|
for entry in entries:
|
||||||
|
bytes_written += entry.len
|
||||||
|
count_written += 1
|
||||||
|
if bytes_written > byte_limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
dump((entry.key, entry.value))
|
||||||
|
|
||||||
|
then = time.time()
|
||||||
|
stats = self.stats()
|
||||||
|
log.info("Wrote %d items to %s in %s. Total hits %s; misses %s; ratio %s",
|
||||||
|
count_written, cache_file, then - now,
|
||||||
|
stats['hits'], stats['misses'], stats['ratio'])
|
|
@ -373,11 +373,11 @@ class StorageCacheTests(unittest.TestCase):
|
||||||
self.assertEqual(c.delta_after1, {})
|
self.assertEqual(c.delta_after1, {})
|
||||||
|
|
||||||
|
|
||||||
class LocalClientBucketTests(unittest.TestCase):
|
class SizedLRUMappingTests(unittest.TestCase):
|
||||||
|
|
||||||
def getClass(self):
|
def getClass(self):
|
||||||
from relstorage.cache import LocalClientBucket
|
from relstorage.cache.mapping import SizedLRUMapping
|
||||||
return LocalClientBucket
|
return SizedLRUMapping
|
||||||
|
|
||||||
def test_set_bytes_value(self):
|
def test_set_bytes_value(self):
|
||||||
b = self.getClass()(100)
|
b = self.getClass()(100)
|
||||||
|
@ -962,7 +962,8 @@ class MockPoller(object):
|
||||||
if tid > after_tid and tid <= last_tid)
|
if tid > after_tid and tid <= last_tid)
|
||||||
|
|
||||||
def local_benchmark():
|
def local_benchmark():
|
||||||
from relstorage.cache import LocalClient, LocalClientBucket
|
from relstorage.cache.mapping import SizedLRUMapping
|
||||||
|
from relstorage.cache import LocalClient
|
||||||
options = MockOptions()
|
options = MockOptions()
|
||||||
options.cache_local_mb = 100
|
options.cache_local_mb = 100
|
||||||
options.cache_local_compression = 'none'
|
options.cache_local_compression = 'none'
|
||||||
|
@ -1219,7 +1220,8 @@ def local_benchmark():
|
||||||
do_times()
|
do_times()
|
||||||
|
|
||||||
def save_load_benchmark():
|
def save_load_benchmark():
|
||||||
from relstorage.cache import LocalClientBucket, _Loader
|
from relstorage.cache.mapping import SizedLRUMapping as LocalClientBucket
|
||||||
|
from relstorage.cache import _Loader
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import os
|
import os
|
||||||
import itertools
|
import itertools
|
||||||
|
@ -1297,7 +1299,7 @@ def save_load_benchmark():
|
||||||
def test_suite():
|
def test_suite():
|
||||||
suite = unittest.TestSuite()
|
suite = unittest.TestSuite()
|
||||||
suite.addTest(unittest.makeSuite(StorageCacheTests))
|
suite.addTest(unittest.makeSuite(StorageCacheTests))
|
||||||
suite.addTest(unittest.makeSuite(LocalClientBucketTests))
|
suite.addTest(unittest.makeSuite(SizedLRUMappingTests))
|
||||||
suite.addTest(unittest.makeSuite(LocalClientTests))
|
suite.addTest(unittest.makeSuite(LocalClientTests))
|
||||||
return suite
|
return suite
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue