While designing a new programming language, I wondered if I would cache query results by default or not. Caching has advantages and disadvantages. I Found a solution that has the best of both worlds. The solution is also possible in C#!
This is the problem that needs to be solved.
public class LazyListTest
{
private int _count = 0;
public void Test()
{
var numbers = Enumerable.Range(1, 40);
var numbersQuery = numbers.Select(GetElement);
var total = numbersQuery.Take(3)
.Concat(numbersQuery.Take(10))
.Concat(numbersQuery.Take(3))
.Sum();
Console.WriteLine(_count);
}
private int GetElement(int value)
{
_count++;
// Some slow stuff here...
return value*100;
}
}
When you run Test(), only the first 10 elements needs to be used but the _count field counts up to 16. Due lazy evaluation, the first 3 elements of numbersQuery are processed 3 times. If you want the first 3 elements to be processed only once you could use ToList():
var numbersQuery = numbers.Select(GetElement).ToList();
All elements are iterating immediately. So elements 11-40 are processed and will not be used. The solution: Use a lazy list. Now the _count field will be 10. Exactly the caching you want.
var numbersQuery = numbers.Select(GetElement).ToLazyList();
No elements are iterating here. The solution LazyList provides is: cache while you are iterating. This is the code of LazyList:
public class LazyList<T> : IEnumerable<T>
{
private readonly ICollection<T> _cache;
private readonly IEnumerator<T> _enumerator;
public LazyList(IEnumerable<T> source)
{
if (source == null)
{
throw new ArgumentNullException("source");
}
_cache = source as ICollection<T>;
if (_cache == null) // Needs caching
{
_cache = new List<T>();
_enumerator = source.GetEnumerator();
}
else // Needs no caching
{
_enumerator = Enumerable.Empty<T>().GetEnumerator();
}
}
private IEnumerable<T> Items()
{
return _cache.Concat(ItemsOnce());
}
private IEnumerable<T> ItemsOnce()
{
while (_enumerator.MoveNext())
{
var value = _enumerator.Current;
_cache.Add(value);
yield return value;
}
_enumerator.Dispose();
}
public IEnumerator<T> GetEnumerator()
{
return Items().GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
For convenience an extention method:
public static class LazyListExtensions
{
public static LazyList<T> ToLazyList<T>(this IEnumerable<T> source)
{
return new LazyList<T>(source);
}
}
This solution is quite simple but is has a bug:
var range = Enumerable.Range(1, 5);
var lazy = new LazyList<int>(range);
foreach (var x in lazy)
{
foreach (var y in lazy)
{
Console.WriteLine("{0} {1}", x, y);
}
}
That only writes out 5 entries - it should write out 25.
This is my solution for this problem:
public class LazyList<T> : IEnumerable<T>, IDisposable
{
private readonly IList<T> _cache;
private IEnumerator<T> _sourceEnumerator;
public bool AllElementsAreCached { get; private set; }
public LazyList(IEnumerable<T> source)
{
if (source == null)
{
throw new ArgumentNullException("source");
}
_cache = source as IList<T>;
if (_cache == null) // Needs caching
{
_cache = new List<T>();
_sourceEnumerator = source.GetEnumerator();
}
else // Needs no caching
{
AllElementsAreCached = true;
_sourceEnumerator = Enumerable.Empty<T>().GetEnumerator();
}
}
public IEnumerator<T> GetEnumerator()
{
return AllElementsAreCached ? _cache.GetEnumerator() : new LazyListEnumerator(this);
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
~LazyList()
{
Dispose(false);
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
if (_sourceEnumerator != null)
{
_sourceEnumerator.Dispose();
_sourceEnumerator = null;
}
}
// No native resources to free.
}
private class LazyListEnumerator : IEnumerator<T>
{
private readonly LazyList<T> _lazyList;
private readonly object _lock = new object();
private const int StartIndex = -1;
private int _index = StartIndex;
public LazyListEnumerator(LazyList<T> lazyList)
{
_lazyList = lazyList;
}
public bool MoveNext()
{
var result = true;
_index++;
if (IndexItemIsInCache) // Double-checked locking pattern
{
SetCurrentToIndex();
}
else
{
lock (_lock)
{
if (IndexItemIsInCache)
{
SetCurrentToIndex();
}
else
{
result = !_lazyList.AllElementsAreCached && _lazyList._sourceEnumerator.MoveNext();
if (result)
{
Current = _lazyList._sourceEnumerator.Current;
_lazyList._cache.Add(_lazyList._sourceEnumerator.Current);
}
else if (!_lazyList.AllElementsAreCached)
{
_lazyList.AllElementsAreCached = true;
_lazyList._sourceEnumerator.Dispose();
}
}
}
}
return result;
}
private bool IndexItemIsInCache
{
get
{
return _index < _lazyList._cache.Count;
}
}
private void SetCurrentToIndex()
{
Current = _lazyList._cache[_index];
}
public void Reset()
{
_index = StartIndex;
}
public T Current { get; private set; }
object IEnumerator.Current
{
get { return Current; }
}
public void Dispose()
{
// The _lazyList._sourceEnumerator
// is disposed in LazyList
}
}
}
Note that when all elements are already cached, in GetEnumerator the Enumerator of "List