relstorage/relstorage/cache/mapping.py

##############################################################################
#
# Copyright (c) 2009 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
from __future__ import absolute_import, print_function, division

import logging

import time

from relstorage._compat import itervalues
from relstorage._compat import PY3
if PY3:
    # On Py3, use the built-in pickle, so that we can get
    # protocol 4 when available. It is *much* faster at writing out
    # individual large objects such as the cache dict (about 3-4x faster)
    from pickle import Unpickler
    from pickle import Pickler
else:
    # On Py2, zodbpickle gives us protocol 3, but we don't
    # use its special binary type
    from relstorage._compat import Unpickler
    from relstorage._compat import Pickler


from .lru import SizedLRU
from .lru import ProtectedLRU
from .lru import ProbationLRU
from .lru import EdenLRU

log = logging.getLogger(__name__)


class SizedLRUMapping(object):
    """
    A map that keeps a record of its approx. size.

    keys must be `str`` and values must be byte strings.

    This class is not threadsafe, accesses to __setitem__ and get_and_bubble_all
    must be protected by a lock.
    """

    # What multiplier of the number of items in the cache do we apply
    # to determine when to age the frequencies?
    _age_factor = 10

    # When did we last age?
    _aged_at = 0

    # Percentage of our byte limit that should be dedicated
    # to the main "protected" generation
    _gen_protected_pct = 0.8
    # Percentage of our byte limit that should be dedicated
    # to the initial "eden" generation
    _gen_eden_pct = 0.1
    # Percentage of our byte limit that should be dedicated
    # to the "probationary"generation
    _gen_probation_pct = 0.1
    # By default these numbers add up to 1.0, but it would be possible to
    # overcommit by making them sum to more than 1.0. (For very small
    # limits, the rounding will also make them overcommit).

    def __init__(self, limit):
        # We experimented with using OOBTree and LOBTree
        # for the type of self._dict. The OOBTree has a similar
        # but slightly slower performance profile (as would be expected
        # given the big-O complexity) as a dict, but very large ones can't
        # be pickled in a single shot! The LOBTree works faster and uses less
        # memory than the OOBTree or the dict *if* all the keys are integers;
        # which they currently are not. Plus the LOBTrees are slower on PyPy than its
        # own dict specializations. We were hoping to be able to write faster pickles with
        # large BTrees, but since that's not the case, we abandoned the idea.

        # This holds all the ring entries, no matter which ring they are in.
        self._dict = {}


        self._protected = ProtectedLRU(int(limit * self._gen_protected_pct))
        self._probation = ProbationLRU(int(limit * self._gen_probation_pct),
                                        self._protected,
                                        self._dict)
        self._eden = EdenLRU(int(limit * self._gen_eden_pct),
                             self._probation,
                             self._protected,
                             self._dict)
        self._gens = [None, None, None, None] # 0 isn't used
        for x in (self._protected, self._probation, self._eden):
            self._gens[x.PARENT_CONST] = x
        self._gens = tuple(self._gens)
        self._hits = 0
        self._misses = 0
        self._sets = 0
        self.limit = limit
        self._next_age_at = 1000

    @property
    def size(self):
        return self._eden.size + self._protected.size + self._probation.size

    def reset_stats(self):
        self._hits = 0
        self._misses = 0
        self._sets = 0
        self._aged_at = 0
        self._next_age_at = 0

    def stats(self):
        total = self._hits + self._misses
        return {
            'hits': self._hits,
            'misses': self._misses,
            'sets': self._sets,
            'ratio': self._hits/total if total else 0,
            'size': len(self._dict),
            'bytes': self.size,
            'eden_stats': self._eden.stats(),
            'prot_stats': self._protected.stats(),
            'prob_stats': self._probation.stats(),
        }

    def __len__(self):
        return len(self._dict)

    def _age(self):
        # Age only when we're full and would thus need to evict; this
        # makes initial population faster. It's cheaper to calculate this
        # AFTER the operations, though, because we read it from C.
        #if self.size < self.limit:
        #    return

        # Age the whole thing periodically based on the number of
        # operations we've done that would have altered popularity.
        # Dynamically calculate how often we need to age. By default, this is
        # based on what Caffeine's PerfectFrequency does: 10 * max
        # cache entries
        dct = self._dict
        age_period = self._age_factor * len(dct)
        operations = self._hits + self._sets
        if operations - self._aged_at < age_period:
            self._next_age_at = age_period
            return
        if self.size < self.limit:
            return

        self._aged_at = operations
        now = time.time()
        log.debug("Beginning frequency aging for %d cache entries",
                 len(dct))
        SizedLRU.age_lists(self._eden, self._probation, self._protected)
        done = time.time()
        log.debug("Aged %d cache entries in %s", done - now)

        self._next_age_at = int(self._aged_at * 1.5) # in case the dict shrinks

        return self._aged_at

    def __setitem__(self, key, value):
        """
        Set an item.

        If the memory limit would be exceeded, remove old items until
        that is no longer the case.

        If we need to age popularity counts, do so.
        """
        # These types are gated by LocalClient, we don't need to double
        # check.
        #assert isinstance(key, str)
        #assert isinstance(value, bytes)

        dct = self._dict

        if key in dct:
            entry = dct[key]
            self._gens[entry.cffi_ring_node.r_parent].update_MRU(entry, value)
        else:
            lru = self._eden
            entry = lru.add_MRU(key, value)
            dct[key] = entry

        self._sets += 1

        # Do we need to move this up above the eviction choices?
        # Inline some of the logic about whether to age or not; avoiding the
        # call helps speed
        if self._hits + self._sets > self._next_age_at:
            self._age()

        return True

    def __contains__(self, key):
        return key in self._dict

    def __delitem__(self, key):
        entry = self._dict[key]
        del self._dict[key]
        self._gens[entry.cffi_ring_node.r_parent].remove(entry)

    def get_and_bubble_all(self, keys):
        dct = self._dict
        gens = self._gens
        res = {}
        for key in keys:
            entry = dct.get(key)
            if entry is not None:
                self._hits += 1
                gens[entry.cffi_ring_node.r_parent].on_hit(entry)
                res[key] = entry.value
            else:
                self._misses += 1
        return res

    def get(self, key):
        # Testing only. Does not bubble or increment.
        entry = self._dict.get(key)
        if entry is not None:
            return entry.value

    def __getitem__(self, key):
        # Testing only. Doesn't bubble.
        entry = self._dict[key]
        entry.frequency += 1
        return entry.value

    # Benchmark for the general approach:

    # Pickle is about 3x faster than marshal if we write single large
    # objects, surprisingly. If we stick to writing smaller objects, the
    # difference narrows to almost negligible.

    # Writing 525MB of data, 655K keys (no compression):
    # - code as-of commit e58126a (the previous major optimizations for version 1 format)
    #    version 1 format, solid dict under 3.4: write: 3.8s/read 7.09s
    #    2.68s to update ring, 2.6s to read pickle
    #
    # -in a btree under 3.4: write: 4.8s/read 8.2s
    #    written as single list of the items
    #    3.1s to load the pickle, 2.6s to update the ring
    #
    # -in a dict under 3.4: write: 3.7s/read 7.6s
    #    written as the dict and updated into the dict
    #    2.7s loading the pickle, 2.9s to update the dict
    # - in a dict under 3.4: write: 3.0s/read 12.8s
    #    written by iterating the ring and writing one key/value pair
    #     at a time, so this is the only solution that
    #     automatically preserves the LRU property (and would be amenable to
    #     capping read based on time, and written file size); this format also lets us avoid the
    #     full write buffer for HIGHEST_PROTOCOL < 4
    #    2.5s spent in pickle.load, 8.9s spent in __setitem__,5.7s in ring.add
    # - in a dict: write 3.2/read 9.1s
    #    same as above, but custom code to set the items
    #   1.9s in pickle.load, 4.3s in ring.add
    # - same as above, but in a btree: write 2.76s/read 10.6
    #    1.8s in pickle.load, 3.8s in ring.add,
    #
    # For the final version with optimizations, the write time is 2.3s/read is 6.4s

    _FILE_VERSION = 4

    def load_from_file(self, cache_file):
        now = time.time()
        # Unlike write_to_file, using the raw stream
        # is fine for both Py 2 and 3.
        unpick = Unpickler(cache_file)

        # Local optimizations
        load = unpick.load

        version = load()
        if version != self._FILE_VERSION: # pragma: no cover
            raise ValueError("Incorrect version of cache_file")

        entries_oldest_first = list()
        entries_oldest_first_append = entries_oldest_first.append
        try:
            while 1:
                entries_oldest_first_append(load())
        except EOFError:
            pass
        count = len(entries_oldest_first)

        def _insert_entries(entries):
            stored = 0

            # local optimizations
            data = self._dict
            main = self._protected
            ring_add = main.add_MRU
            limit = main.limit

            # Need to reoptimize this.
#            size = self.size # update locally, copy back at end

            for k, v in entries:
                if k in data:
                    continue

                if main.size >= limit:
                    break

                data[k] = ring_add(k, v)

                stored += 1

            return stored

        stored = 0
        if not self._dict:
            # Empty, so quickly take everything they give us,
            # oldest first so that the result is actually LRU
            stored = _insert_entries(entries_oldest_first)
        else:
            # Loading more data into an existing bucket.
            # Load only the *new* keys, trying to get the newest ones
            # because LRU is going to get messed up anyway.

            entries_newest_first = reversed(entries_oldest_first)
            stored = _insert_entries(entries_newest_first)

        then = time.time()
        log.info("Examined %d and stored %d items from %s in %s",
                 count, stored, cache_file, then - now)
        return count, stored

    def write_to_file(self, cache_file):
        now = time.time()
        # pickling the items is about 3x faster than marshal


        # Under Python 2, (or generally, under any pickle protocol
        # less than 4, when framing was introduced) whether we are
        # writing to an io.BufferedWriter, a <file> opened by name or
        # fd, with default buffer or a large (16K) buffer, putting the
        # Pickler directly on top of that stream is SLOW for large
        # singe objects. Writing a 512MB dict takes ~40-50seconds. If
        # instead we use a BytesIO to buffer in memory, that time goes
        # down to about 7s. However, since we switched to writing many
        # smaller objects, that need goes away.

        pickler = Pickler(cache_file, -1) # Highest protocol
        dump = pickler.dump

        dump(self._FILE_VERSION) # Version marker

        # Dump all the entries in increasing order of popularity (
        # so that when we read them back in the least popular items end up LRU).
        # Anything with a popularity of 0 probably hasn't been accessed in a long
        # time, so don't dump it.

        # Age them now, writing only the most popular. (But don't age in place just
        # in case we're still being used.)

        # XXX: Together with only writing what will fit in the protected space,
        # is this optimal? One of the goals is to speed up startup, which may access
        # objects that are never or rarely used again. They'll tend to wind up in
        # the probation space over time, or at least have a very low frequency.
        # Maybe we shouldn't prevent writing aged items, and maybe we should fill up
        # probation and eden too. We probably want to allow the user to specify
        # a size limit at this point.

        entries = list(sorted((e for e in itervalues(self._dict) if e.frequency // 2),
                              key=lambda e: e.frequency))

        if len(entries) < len(self._dict):
            log.info("Ignoring %d items for writing due to inactivity",
                     len(self._dict) - len(entries))

        # Don't bother writing more than we'll be able to store.
        count_written = 0
        bytes_written = 0
        byte_limit = self._protected.limit
        for entry in entries:
            bytes_written += entry.len
            count_written += 1
            if bytes_written > byte_limit:
                break

            dump((entry.key, entry.value))

        then = time.time()
        stats = self.stats()
        log.info("Wrote %d items to %s in %s. Total hits %s; misses %s; ratio %s",
                 count_written, cache_file, then - now,
                 stats['hits'], stats['misses'], stats['ratio'])