Module olivia.lib.transientsequence
TransientSequence class.
Expand source code
"""TransientSequence class."""
import pickle
import zlib
from collections.abc import Sequence
class TransientSequence(Sequence):
"""
A fixed size Sequence type for managing large in-memory structures.
A TransientSequence is a fixed size linear array of arbitrary objects that are compressed/decompressed
on the fly and automatically deleted from memory after a certain number of accesses.
Objects must implement __len__(). Only simple accessors are supported (no slicing).
TransientSequence has been developed to efficiently manage large successor sets of graphs using bit vector
representations. It should be used with care in a general setting as implicit compression on accessors
could be dangerous.
Parameters
----------
size: int
Amount of objects to be stored (fixed).
class_type: class, optional
Type of objects to be stored.
compressor: function, optional
Compressing function. Defaults to simple pickle/zlib compressor.
decompressor: function, optional
Decompressing function. Defaults to simple zlib/unpickle decompressor.
compression_threshold: int, optional
Upper size (len) limit to keep objects uncompressed.
expiry_array: sequence or None, optional
Number of accesses per object before deletion. Reassigning reverts the counter to its original value.
"""
@staticmethod
def _DEFAULT_COMPRESSOR(value):
return zlib.compress(pickle.dumps(value))
@staticmethod
def _DEFAULT_DECOMPRESSOR(value):
return pickle.loads(zlib.decompress(value))
def __init__(self, size, class_type=object, compressor=_DEFAULT_COMPRESSOR.__func__,
decompressor=_DEFAULT_DECOMPRESSOR.__func__,
compression_threshold=1000, expiry_array=None):
"""
Create and initialize a TransientSequence.
Parameters
----------
size: int
Amount of objects to be stored (fixed).
class_type: class, optional
Type of objects to be stored.
compressor: function, optional
Compressing function. Defaults to simple pickle/zlib compressor.
decompressor: function, optional
Decompressing function. Defaults to simple zlib/unpickle decompressor.
compression_threshold: int, optional
Upper size (len) limit to keep objects uncompressed.
expiry_array: sequence or None, optional
Number of accesses per object before deletion. Reassigning reverts the counter to its original value.
"""
self._data = [None] * size
self._compressed = [False] * size
self._ct = compression_threshold
self._compressor = compressor
self._decompressor = decompressor
self._class_type = class_type
self._expiry = expiry_array
if expiry_array is not None:
self._expiry_original = expiry_array.copy()
def __setitem__(self, index, value):
"""
Assign value to position 'index'.
Parameters
----------
index : int
A position in the TransientSequence.
value : object
An object or object of class_type.
Returns
-------
None
Notes
-----
Transparently compresses the object if length > compression_threshold.
"""
if value is None:
self._data[index] = None
return
if len(value) == 0:
self._data[index] = None
if not isinstance(value, self._class_type):
value = self._class_type(value)
if len(value) > self._ct:
self._data[index] = self._compressor(value)
self._compressed[index] = True
else:
self._data[index] = value
self._compressed[index] = False
if self._expiry is not None:
self._expiry[index] = self._expiry_original[index]
def __getitem__(self, index):
"""
Return the object at position 'index'.
Parameters
----------
index : int
A position in the TransientSequence.
Returns
-------
object : object
Object or object of class_type.
Notes
-----
Transparently decompresses the object if needed and deletes the reference if compression_threshold
is exceeded.
"""
if self._data[index] is None:
out = self._class_type()
elif self._compressed[index]:
out = self._decompressor(self._data[index])
else:
out = self._data[index]
if self._expiry is not None:
self._expiry[index] -= 1
if self._expiry[index] == 0:
self._data[index] = None
return out
def __len__(self):
"""
Return the length of the TransientSequence.
Returns
-------
length : int
Number of objects in the TransientSequence (length)
"""
return len(self._data)
Classes
class TransientSequence (size, class_type=builtins.object, compressor=<function TransientSequence._DEFAULT_COMPRESSOR>, decompressor=<function TransientSequence._DEFAULT_DECOMPRESSOR>, compression_threshold=1000, expiry_array=None)
-
A fixed size Sequence type for managing large in-memory structures.
A TransientSequence is a fixed size linear array of arbitrary objects that are compressed/decompressed on the fly and automatically deleted from memory after a certain number of accesses.
Objects must implement len(). Only simple accessors are supported (no slicing).
TransientSequence has been developed to efficiently manage large successor sets of graphs using bit vector representations. It should be used with care in a general setting as implicit compression on accessors could be dangerous.
Parameters
size
:int
- Amount of objects to be stored (fixed).
class_type
:class
, optional- Type of objects to be stored.
compressor
:function
, optional- Compressing function. Defaults to simple pickle/zlib compressor.
decompressor
:function
, optional- Decompressing function. Defaults to simple zlib/unpickle decompressor.
compression_threshold
:int
, optional- Upper size (len) limit to keep objects uncompressed.
expiry_array
:sequence
orNone
, optional- Number of accesses per object before deletion. Reassigning reverts the counter to its original value.
Create and initialize a TransientSequence.
Parameters
size
:int
- Amount of objects to be stored (fixed).
class_type
:class
, optional- Type of objects to be stored.
compressor
:function
, optional- Compressing function. Defaults to simple pickle/zlib compressor.
decompressor
:function
, optional- Decompressing function. Defaults to simple zlib/unpickle decompressor.
compression_threshold
:int
, optional- Upper size (len) limit to keep objects uncompressed.
expiry_array
:sequence
orNone
, optional- Number of accesses per object before deletion. Reassigning reverts the counter to its original value.
Expand source code
class TransientSequence(Sequence): """ A fixed size Sequence type for managing large in-memory structures. A TransientSequence is a fixed size linear array of arbitrary objects that are compressed/decompressed on the fly and automatically deleted from memory after a certain number of accesses. Objects must implement __len__(). Only simple accessors are supported (no slicing). TransientSequence has been developed to efficiently manage large successor sets of graphs using bit vector representations. It should be used with care in a general setting as implicit compression on accessors could be dangerous. Parameters ---------- size: int Amount of objects to be stored (fixed). class_type: class, optional Type of objects to be stored. compressor: function, optional Compressing function. Defaults to simple pickle/zlib compressor. decompressor: function, optional Decompressing function. Defaults to simple zlib/unpickle decompressor. compression_threshold: int, optional Upper size (len) limit to keep objects uncompressed. expiry_array: sequence or None, optional Number of accesses per object before deletion. Reassigning reverts the counter to its original value. """ @staticmethod def _DEFAULT_COMPRESSOR(value): return zlib.compress(pickle.dumps(value)) @staticmethod def _DEFAULT_DECOMPRESSOR(value): return pickle.loads(zlib.decompress(value)) def __init__(self, size, class_type=object, compressor=_DEFAULT_COMPRESSOR.__func__, decompressor=_DEFAULT_DECOMPRESSOR.__func__, compression_threshold=1000, expiry_array=None): """ Create and initialize a TransientSequence. Parameters ---------- size: int Amount of objects to be stored (fixed). class_type: class, optional Type of objects to be stored. compressor: function, optional Compressing function. Defaults to simple pickle/zlib compressor. decompressor: function, optional Decompressing function. Defaults to simple zlib/unpickle decompressor. compression_threshold: int, optional Upper size (len) limit to keep objects uncompressed. expiry_array: sequence or None, optional Number of accesses per object before deletion. Reassigning reverts the counter to its original value. """ self._data = [None] * size self._compressed = [False] * size self._ct = compression_threshold self._compressor = compressor self._decompressor = decompressor self._class_type = class_type self._expiry = expiry_array if expiry_array is not None: self._expiry_original = expiry_array.copy() def __setitem__(self, index, value): """ Assign value to position 'index'. Parameters ---------- index : int A position in the TransientSequence. value : object An object or object of class_type. Returns ------- None Notes ----- Transparently compresses the object if length > compression_threshold. """ if value is None: self._data[index] = None return if len(value) == 0: self._data[index] = None if not isinstance(value, self._class_type): value = self._class_type(value) if len(value) > self._ct: self._data[index] = self._compressor(value) self._compressed[index] = True else: self._data[index] = value self._compressed[index] = False if self._expiry is not None: self._expiry[index] = self._expiry_original[index] def __getitem__(self, index): """ Return the object at position 'index'. Parameters ---------- index : int A position in the TransientSequence. Returns ------- object : object Object or object of class_type. Notes ----- Transparently decompresses the object if needed and deletes the reference if compression_threshold is exceeded. """ if self._data[index] is None: out = self._class_type() elif self._compressed[index]: out = self._decompressor(self._data[index]) else: out = self._data[index] if self._expiry is not None: self._expiry[index] -= 1 if self._expiry[index] == 0: self._data[index] = None return out def __len__(self): """ Return the length of the TransientSequence. Returns ------- length : int Number of objects in the TransientSequence (length) """ return len(self._data)
Ancestors
- collections.abc.Sequence
- collections.abc.Reversible
- collections.abc.Collection
- collections.abc.Sized
- collections.abc.Iterable
- collections.abc.Container