Move and rename LocalClientBucket to something more fitting.

This commit is contained in:
Jason Madden 2016-09-20 06:15:45 -05:00
parent 0a5b4394ed
commit 7d783c6903
No known key found for this signature in database
GPG Key ID: 349F84431A08B99E
4 changed files with 404 additions and 358 deletions

View File

@ -3,6 +3,7 @@ source = relstorage
omit =
relstorage/tests/*
relstorage/adapters/tests/*
relstorage/cache/tests/*
# Omit the main oracle adapter file since we don't test it at all
relstorage/adapters/oracle.py

View File

@ -718,358 +718,7 @@ class StorageCache(object):
log.debug("Checkpoints already shifted to %s. "
"len(delta_after0) == %d.", old_value, len(self.delta_after0))
_OSA = object.__setattr__
from .lru import SizedLRU
from .lru import ProtectedLRU
from .lru import ProbationLRU
from .lru import EdenLRU
class LocalClientBucket(object):
"""
A map that keeps a record of its approx. size.
keys must be `str`` and values must be byte strings.
This class is not threadsafe, accesses to __setitem__ and get_and_bubble_all
must be protected by a lock.
"""
# What multiplier of the number of items in the cache do we apply
# to determine when to age the frequencies?
_age_factor = 10
# When did we last age?
_aged_at = 0
# Percentage of our byte limit that should be dedicated
# to the main "protected" generation
_gen_protected_pct = 0.8
# Percentage of our byte limit that should be dedicated
# to the initial "eden" generation
_gen_eden_pct = 0.1
# Percentage of our byte limit that should be dedicated
# to the "probationary"generation
_gen_probation_pct = 0.1
# By default these numbers add up to 1.0, but it would be possible to
# overcommit by making them sum to more than 1.0. (For very small
# limits, the rounding will also make them overcommit).
def __init__(self, limit):
# We experimented with using OOBTree and LOBTree
# for the type of self._dict. The OOBTree has a similar
# but slightly slower performance profile (as would be expected
# given the big-O complexity) as a dict, but very large ones can't
# be pickled in a single shot! The LOBTree works faster and uses less
# memory than the OOBTree or the dict *if* all the keys are integers;
# which they currently are not. Plus the LOBTrees are slower on PyPy than its
# own dict specializations. We were hoping to be able to write faster pickles with
# large BTrees, but since that's not the case, we abandoned the idea.
# This holds all the ring entries, no matter which ring they are in.
self._dict = {}
self._protected = ProtectedLRU(int(limit * self._gen_protected_pct))
self._probation = ProbationLRU(int(limit * self._gen_probation_pct),
self._protected,
self._dict)
self._eden = EdenLRU(int(limit * self._gen_eden_pct),
self._probation,
self._protected,
self._dict)
self._gens = [None, None, None, None] # 0 isn't used
for x in (self._protected, self._probation, self._eden):
self._gens[x.PARENT_CONST] = x
self._gens = tuple(self._gens)
self._hits = 0
self._misses = 0
self._sets = 0
self.limit = limit
self._next_age_at = 1000
@property
def size(self):
return self._eden.size + self._protected.size + self._probation.size
def reset_stats(self):
self._hits = 0
self._misses = 0
self._sets = 0
self._aged_at = 0
self._next_age_at = 0
def stats(self):
total = self._hits + self._misses
return {
'hits': self._hits,
'misses': self._misses,
'sets': self._sets,
'ratio': self._hits/total if total else 0,
'size': len(self._dict),
'bytes': self.size,
'eden_stats': self._eden.stats(),
'prot_stats': self._protected.stats(),
'prob_stats': self._probation.stats(),
}
def __len__(self):
return len(self._dict)
def _age(self):
# Age only when we're full and would thus need to evict; this
# makes initial population faster. It's cheaper to calculate this
# AFTER the operations, though, because we read it from C.
#if self.size < self.limit:
# return
# Age the whole thing periodically based on the number of
# operations we've done that would have altered popularity.
# Dynamically calculate how often we need to age. By default, this is
# based on what Caffeine's PerfectFrequency does: 10 * max
# cache entries
dct = self._dict
age_period = self._age_factor * len(dct)
operations = self._hits + self._sets
if operations - self._aged_at < age_period:
self._next_age_at = age_period
return
if self.size < self.limit:
return
self._aged_at = operations
now = time.time()
log.debug("Beginning frequency aging for %d cache entries",
len(dct))
SizedLRU.age_lists(self._eden, self._probation, self._protected)
done = time.time()
log.debug("Aged %d cache entries in %s", done - now)
self._next_age_at = int(self._aged_at * 1.5) # in case the dict shrinks
return self._aged_at
def __setitem__(self, key, value):
"""
Set an item.
If the memory limit would be exceeded, remove old items until
that is no longer the case.
If we need to age popularity counts, do so.
"""
# These types are gated by LocalClient, we don't need to double
# check.
#assert isinstance(key, str)
#assert isinstance(value, bytes)
dct = self._dict
if key in dct:
entry = dct[key]
self._gens[entry.cffi_ring_node.r_parent].update_MRU(entry, value)
else:
lru = self._eden
entry = lru.add_MRU(key, value)
dct[key] = entry
self._sets += 1
# Do we need to move this up above the eviction choices?
# Inline some of the logic about whether to age or not; avoiding the
# call helps speed
if self._hits + self._sets > self._next_age_at:
self._age()
return True
def __contains__(self, key):
return key in self._dict
def __delitem__(self, key):
entry = self._dict[key]
del self._dict[key]
self._gens[entry.cffi_ring_node.r_parent].remove(entry)
def get_and_bubble_all(self, keys):
dct = self._dict
gens = self._gens
res = {}
for key in keys:
entry = dct.get(key)
if entry is not None:
self._hits += 1
gens[entry.cffi_ring_node.r_parent].on_hit(entry)
res[key] = entry.value
else:
self._misses += 1
return res
def get(self, key):
# Testing only. Does not bubble or increment.
entry = self._dict.get(key)
if entry is not None:
return entry.value
def __getitem__(self, key):
# Testing only. Doesn't bubble.
entry = self._dict[key]
entry.frequency += 1
return entry.value
# Benchmark for the general approach:
# Pickle is about 3x faster than marshal if we write single large
# objects, surprisingly. If we stick to writing smaller objects, the
# difference narrows to almost negligible.
# Writing 525MB of data, 655K keys (no compression):
# - code as-of commit e58126a (the previous major optimizations for version 1 format)
# version 1 format, solid dict under 3.4: write: 3.8s/read 7.09s
# 2.68s to update ring, 2.6s to read pickle
#
# -in a btree under 3.4: write: 4.8s/read 8.2s
# written as single list of the items
# 3.1s to load the pickle, 2.6s to update the ring
#
# -in a dict under 3.4: write: 3.7s/read 7.6s
# written as the dict and updated into the dict
# 2.7s loading the pickle, 2.9s to update the dict
# - in a dict under 3.4: write: 3.0s/read 12.8s
# written by iterating the ring and writing one key/value pair
# at a time, so this is the only solution that
# automatically preserves the LRU property (and would be amenable to
# capping read based on time, and written file size); this format also lets us avoid the
# full write buffer for HIGHEST_PROTOCOL < 4
# 2.5s spent in pickle.load, 8.9s spent in __setitem__,5.7s in ring.add
# - in a dict: write 3.2/read 9.1s
# same as above, but custom code to set the items
# 1.9s in pickle.load, 4.3s in ring.add
# - same as above, but in a btree: write 2.76s/read 10.6
# 1.8s in pickle.load, 3.8s in ring.add,
#
# For the final version with optimizations, the write time is 2.3s/read is 6.4s
_FILE_VERSION = 4
def load_from_file(self, cache_file):
now = time.time()
# Unlike write_to_file, using the raw stream
# is fine for both Py 2 and 3.
unpick = Unpickler(cache_file)
# Local optimizations
load = unpick.load
version = load()
if version != self._FILE_VERSION: # pragma: no cover
raise ValueError("Incorrect version of cache_file")
entries_oldest_first = list()
entries_oldest_first_append = entries_oldest_first.append
try:
while 1:
entries_oldest_first_append(load())
except EOFError:
pass
count = len(entries_oldest_first)
def _insert_entries(entries):
stored = 0
# local optimizations
data = self._dict
main = self._protected
ring_add = main.add_MRU
limit = main.limit
# Need to reoptimize this.
# size = self.size # update locally, copy back at end
for k, v in entries:
if k in data:
continue
if main.size >= limit:
break
data[k] = ring_add(k, v)
stored += 1
return stored
stored = 0
if not self._dict:
# Empty, so quickly take everything they give us,
# oldest first so that the result is actually LRU
stored = _insert_entries(entries_oldest_first)
else:
# Loading more data into an existing bucket.
# Load only the *new* keys, trying to get the newest ones
# because LRU is going to get messed up anyway.
entries_newest_first = reversed(entries_oldest_first)
stored = _insert_entries(entries_newest_first)
then = time.time()
log.info("Examined %d and stored %d items from %s in %s",
count, stored, cache_file, then - now)
return count, stored
def write_to_file(self, cache_file):
now = time.time()
# pickling the items is about 3x faster than marshal
# Under Python 2, (or generally, under any pickle protocol
# less than 4, when framing was introduced) whether we are
# writing to an io.BufferedWriter, a <file> opened by name or
# fd, with default buffer or a large (16K) buffer, putting the
# Pickler directly on top of that stream is SLOW for large
# singe objects. Writing a 512MB dict takes ~40-50seconds. If
# instead we use a BytesIO to buffer in memory, that time goes
# down to about 7s. However, since we switched to writing many
# smaller objects, that need goes away.
pickler = Pickler(cache_file, -1) # Highest protocol
dump = pickler.dump
dump(self._FILE_VERSION) # Version marker
# Dump all the entries in increasing order of popularity (
# so that when we read them back in the least popular items end up LRU).
# Anything with a popularity of 0 probably hasn't been accessed in a long
# time, so don't dump it.
# Age them now, writing only the most popular. (But don't age in place just
# in case we're still being used.)
entries = list(sorted((e for e in itervalues(self._dict) if e.frequency // 2),
key=lambda e: e.frequency))
if len(entries) < len(self._dict):
log.info("Ignoring %d items for writing due to inactivity",
len(self._dict) - len(entries))
# Don't bother writing more than we'll be able to store.
count_written = 0
bytes_written = 0
byte_limit = self._protected.limit
for entry in entries:
bytes_written += entry.len
count_written += 1
if bytes_written > byte_limit:
break
dump((entry.key, entry.value))
then = time.time()
stats = self.stats()
log.info("Wrote %d items to %s in %s. Total hits %s; misses %s; ratio %s",
count_written, cache_file, then - now,
stats['hits'], stats['misses'], stats['ratio'])
from .mapping import SizedLRUMapping as LocalClientBucket
class LocalClient(object):
"""A memcache-like object that stores in Python dictionaries."""

394
relstorage/cache/mapping.py vendored Normal file
View File

@ -0,0 +1,394 @@
##############################################################################
#
# Copyright (c) 2009 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
from __future__ import absolute_import, print_function, division
import logging
import time
from relstorage._compat import itervalues
from relstorage._compat import PY3
if PY3:
# On Py3, use the built-in pickle, so that we can get
# protocol 4 when available. It is *much* faster at writing out
# individual large objects such as the cache dict (about 3-4x faster)
from pickle import Unpickler
from pickle import Pickler
else:
# On Py2, zodbpickle gives us protocol 3, but we don't
# use its special binary type
from relstorage._compat import Unpickler
from relstorage._compat import Pickler
from .lru import SizedLRU
from .lru import ProtectedLRU
from .lru import ProbationLRU
from .lru import EdenLRU
log = logging.getLogger(__name__)
class SizedLRUMapping(object):
"""
A map that keeps a record of its approx. size.
keys must be `str`` and values must be byte strings.
This class is not threadsafe, accesses to __setitem__ and get_and_bubble_all
must be protected by a lock.
"""
# What multiplier of the number of items in the cache do we apply
# to determine when to age the frequencies?
_age_factor = 10
# When did we last age?
_aged_at = 0
# Percentage of our byte limit that should be dedicated
# to the main "protected" generation
_gen_protected_pct = 0.8
# Percentage of our byte limit that should be dedicated
# to the initial "eden" generation
_gen_eden_pct = 0.1
# Percentage of our byte limit that should be dedicated
# to the "probationary"generation
_gen_probation_pct = 0.1
# By default these numbers add up to 1.0, but it would be possible to
# overcommit by making them sum to more than 1.0. (For very small
# limits, the rounding will also make them overcommit).
def __init__(self, limit):
# We experimented with using OOBTree and LOBTree
# for the type of self._dict. The OOBTree has a similar
# but slightly slower performance profile (as would be expected
# given the big-O complexity) as a dict, but very large ones can't
# be pickled in a single shot! The LOBTree works faster and uses less
# memory than the OOBTree or the dict *if* all the keys are integers;
# which they currently are not. Plus the LOBTrees are slower on PyPy than its
# own dict specializations. We were hoping to be able to write faster pickles with
# large BTrees, but since that's not the case, we abandoned the idea.
# This holds all the ring entries, no matter which ring they are in.
self._dict = {}
self._protected = ProtectedLRU(int(limit * self._gen_protected_pct))
self._probation = ProbationLRU(int(limit * self._gen_probation_pct),
self._protected,
self._dict)
self._eden = EdenLRU(int(limit * self._gen_eden_pct),
self._probation,
self._protected,
self._dict)
self._gens = [None, None, None, None] # 0 isn't used
for x in (self._protected, self._probation, self._eden):
self._gens[x.PARENT_CONST] = x
self._gens = tuple(self._gens)
self._hits = 0
self._misses = 0
self._sets = 0
self.limit = limit
self._next_age_at = 1000
@property
def size(self):
return self._eden.size + self._protected.size + self._probation.size
def reset_stats(self):
self._hits = 0
self._misses = 0
self._sets = 0
self._aged_at = 0
self._next_age_at = 0
def stats(self):
total = self._hits + self._misses
return {
'hits': self._hits,
'misses': self._misses,
'sets': self._sets,
'ratio': self._hits/total if total else 0,
'size': len(self._dict),
'bytes': self.size,
'eden_stats': self._eden.stats(),
'prot_stats': self._protected.stats(),
'prob_stats': self._probation.stats(),
}
def __len__(self):
return len(self._dict)
def _age(self):
# Age only when we're full and would thus need to evict; this
# makes initial population faster. It's cheaper to calculate this
# AFTER the operations, though, because we read it from C.
#if self.size < self.limit:
# return
# Age the whole thing periodically based on the number of
# operations we've done that would have altered popularity.
# Dynamically calculate how often we need to age. By default, this is
# based on what Caffeine's PerfectFrequency does: 10 * max
# cache entries
dct = self._dict
age_period = self._age_factor * len(dct)
operations = self._hits + self._sets
if operations - self._aged_at < age_period:
self._next_age_at = age_period
return
if self.size < self.limit:
return
self._aged_at = operations
now = time.time()
log.debug("Beginning frequency aging for %d cache entries",
len(dct))
SizedLRU.age_lists(self._eden, self._probation, self._protected)
done = time.time()
log.debug("Aged %d cache entries in %s", done - now)
self._next_age_at = int(self._aged_at * 1.5) # in case the dict shrinks
return self._aged_at
def __setitem__(self, key, value):
"""
Set an item.
If the memory limit would be exceeded, remove old items until
that is no longer the case.
If we need to age popularity counts, do so.
"""
# These types are gated by LocalClient, we don't need to double
# check.
#assert isinstance(key, str)
#assert isinstance(value, bytes)
dct = self._dict
if key in dct:
entry = dct[key]
self._gens[entry.cffi_ring_node.r_parent].update_MRU(entry, value)
else:
lru = self._eden
entry = lru.add_MRU(key, value)
dct[key] = entry
self._sets += 1
# Do we need to move this up above the eviction choices?
# Inline some of the logic about whether to age or not; avoiding the
# call helps speed
if self._hits + self._sets > self._next_age_at:
self._age()
return True
def __contains__(self, key):
return key in self._dict
def __delitem__(self, key):
entry = self._dict[key]
del self._dict[key]
self._gens[entry.cffi_ring_node.r_parent].remove(entry)
def get_and_bubble_all(self, keys):
dct = self._dict
gens = self._gens
res = {}
for key in keys:
entry = dct.get(key)
if entry is not None:
self._hits += 1
gens[entry.cffi_ring_node.r_parent].on_hit(entry)
res[key] = entry.value
else:
self._misses += 1
return res
def get(self, key):
# Testing only. Does not bubble or increment.
entry = self._dict.get(key)
if entry is not None:
return entry.value
def __getitem__(self, key):
# Testing only. Doesn't bubble.
entry = self._dict[key]
entry.frequency += 1
return entry.value
# Benchmark for the general approach:
# Pickle is about 3x faster than marshal if we write single large
# objects, surprisingly. If we stick to writing smaller objects, the
# difference narrows to almost negligible.
# Writing 525MB of data, 655K keys (no compression):
# - code as-of commit e58126a (the previous major optimizations for version 1 format)
# version 1 format, solid dict under 3.4: write: 3.8s/read 7.09s
# 2.68s to update ring, 2.6s to read pickle
#
# -in a btree under 3.4: write: 4.8s/read 8.2s
# written as single list of the items
# 3.1s to load the pickle, 2.6s to update the ring
#
# -in a dict under 3.4: write: 3.7s/read 7.6s
# written as the dict and updated into the dict
# 2.7s loading the pickle, 2.9s to update the dict
# - in a dict under 3.4: write: 3.0s/read 12.8s
# written by iterating the ring and writing one key/value pair
# at a time, so this is the only solution that
# automatically preserves the LRU property (and would be amenable to
# capping read based on time, and written file size); this format also lets us avoid the
# full write buffer for HIGHEST_PROTOCOL < 4
# 2.5s spent in pickle.load, 8.9s spent in __setitem__,5.7s in ring.add
# - in a dict: write 3.2/read 9.1s
# same as above, but custom code to set the items
# 1.9s in pickle.load, 4.3s in ring.add
# - same as above, but in a btree: write 2.76s/read 10.6
# 1.8s in pickle.load, 3.8s in ring.add,
#
# For the final version with optimizations, the write time is 2.3s/read is 6.4s
_FILE_VERSION = 4
def load_from_file(self, cache_file):
now = time.time()
# Unlike write_to_file, using the raw stream
# is fine for both Py 2 and 3.
unpick = Unpickler(cache_file)
# Local optimizations
load = unpick.load
version = load()
if version != self._FILE_VERSION: # pragma: no cover
raise ValueError("Incorrect version of cache_file")
entries_oldest_first = list()
entries_oldest_first_append = entries_oldest_first.append
try:
while 1:
entries_oldest_first_append(load())
except EOFError:
pass
count = len(entries_oldest_first)
def _insert_entries(entries):
stored = 0
# local optimizations
data = self._dict
main = self._protected
ring_add = main.add_MRU
limit = main.limit
# Need to reoptimize this.
# size = self.size # update locally, copy back at end
for k, v in entries:
if k in data:
continue
if main.size >= limit:
break
data[k] = ring_add(k, v)
stored += 1
return stored
stored = 0
if not self._dict:
# Empty, so quickly take everything they give us,
# oldest first so that the result is actually LRU
stored = _insert_entries(entries_oldest_first)
else:
# Loading more data into an existing bucket.
# Load only the *new* keys, trying to get the newest ones
# because LRU is going to get messed up anyway.
entries_newest_first = reversed(entries_oldest_first)
stored = _insert_entries(entries_newest_first)
then = time.time()
log.info("Examined %d and stored %d items from %s in %s",
count, stored, cache_file, then - now)
return count, stored
def write_to_file(self, cache_file):
now = time.time()
# pickling the items is about 3x faster than marshal
# Under Python 2, (or generally, under any pickle protocol
# less than 4, when framing was introduced) whether we are
# writing to an io.BufferedWriter, a <file> opened by name or
# fd, with default buffer or a large (16K) buffer, putting the
# Pickler directly on top of that stream is SLOW for large
# singe objects. Writing a 512MB dict takes ~40-50seconds. If
# instead we use a BytesIO to buffer in memory, that time goes
# down to about 7s. However, since we switched to writing many
# smaller objects, that need goes away.
pickler = Pickler(cache_file, -1) # Highest protocol
dump = pickler.dump
dump(self._FILE_VERSION) # Version marker
# Dump all the entries in increasing order of popularity (
# so that when we read them back in the least popular items end up LRU).
# Anything with a popularity of 0 probably hasn't been accessed in a long
# time, so don't dump it.
# Age them now, writing only the most popular. (But don't age in place just
# in case we're still being used.)
# XXX: Together with only writing what will fit in the protected space,
# is this optimal? One of the goals is to speed up startup, which may access
# objects that are never or rarely used again. They'll tend to wind up in
# the probation space over time, or at least have a very low frequency.
# Maybe we shouldn't prevent writing aged items, and maybe we should fill up
# probation and eden too. We probably want to allow the user to specify
# a size limit at this point.
entries = list(sorted((e for e in itervalues(self._dict) if e.frequency // 2),
key=lambda e: e.frequency))
if len(entries) < len(self._dict):
log.info("Ignoring %d items for writing due to inactivity",
len(self._dict) - len(entries))
# Don't bother writing more than we'll be able to store.
count_written = 0
bytes_written = 0
byte_limit = self._protected.limit
for entry in entries:
bytes_written += entry.len
count_written += 1
if bytes_written > byte_limit:
break
dump((entry.key, entry.value))
then = time.time()
stats = self.stats()
log.info("Wrote %d items to %s in %s. Total hits %s; misses %s; ratio %s",
count_written, cache_file, then - now,
stats['hits'], stats['misses'], stats['ratio'])

View File

@ -373,11 +373,11 @@ class StorageCacheTests(unittest.TestCase):
self.assertEqual(c.delta_after1, {})
class LocalClientBucketTests(unittest.TestCase):
class SizedLRUMappingTests(unittest.TestCase):
def getClass(self):
from relstorage.cache import LocalClientBucket
return LocalClientBucket
from relstorage.cache.mapping import SizedLRUMapping
return SizedLRUMapping
def test_set_bytes_value(self):
b = self.getClass()(100)
@ -962,7 +962,8 @@ class MockPoller(object):
if tid > after_tid and tid <= last_tid)
def local_benchmark():
from relstorage.cache import LocalClient, LocalClientBucket
from relstorage.cache.mapping import SizedLRUMapping
from relstorage.cache import LocalClient
options = MockOptions()
options.cache_local_mb = 100
options.cache_local_compression = 'none'
@ -1219,7 +1220,8 @@ def local_benchmark():
do_times()
def save_load_benchmark():
from relstorage.cache import LocalClientBucket, _Loader
from relstorage.cache.mapping import SizedLRUMapping as LocalClientBucket
from relstorage.cache import _Loader
from io import BytesIO
import os
import itertools
@ -1297,7 +1299,7 @@ def save_load_benchmark():
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(StorageCacheTests))
suite.addTest(unittest.makeSuite(LocalClientBucketTests))
suite.addTest(unittest.makeSuite(SizedLRUMappingTests))
suite.addTest(unittest.makeSuite(LocalClientTests))
return suite