BigHashsetSa.cs

Big Hash Set based onĀ BigArray.cs

Uses a single array.

using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
[DebuggerDisplay("Count = {" + nameof(Count) + "}")]
[Serializable]
public class BigHashsetSa<T> : MonitorActionFunc, IEnumerable
{
    public enum Method
    {
        Grow,
        Compress
    }
    private volatile BigArray<Bucket>        _buckets;
    private          long                    _count;
    private          Method                  _method;
    internal         IBigEqualityComparer<T> Comparer;
    public BigHashsetSa(long size, Method method = Method.Grow) : this(size, new BigComparer<T>(), method)
    {
    }
    public BigHashsetSa(long size, IBigEqualityComparer<T> comparer, Method method = Method.Grow)
    {
        if (comparer == null)
            comparer = new BigComparer<T>();
        Comparer = comparer;
        _buckets = new BigArray<Bucket>(size);
        Count    = 0;
        _method  = method;
    }
    public long Count
    {
        get
        {
            return Lock(this, () =>
            {
                return _count;
            });
        }
        private set
        {
            Lock(this, () =>
            {
                _count = value;
            });
        }
    }
    public long                       ElementCount         => GetElementCount();
    public long                       NumberOfEmptyBuckets => GetNumberOfEmptyBuckets();
    public (long mDepth, long index)  MaximumBucketDepth   => GetMaximumBucketDepth();
    public float                      LoadRatio            => GetLoadRatio();
    public KeyValuePair<long, long>[] BucketDepthList      => GetBucketDepthList();
    IEnumerator IEnumerable.GetEnumerator()
    {
        return GetEnumerator();
    }
    public void Clear()
    {
        _buckets.Clear();
    }
    public bool Add(T item)
    {
        return Lock(this, () =>
        {
            if (_method == Method.Grow)
                EnsureSize();
            var hashCode = Comparer.GetHashCode(item) & long.MaxValue;
            if (FindEntry(item, hashCode).APos != -1)
                return false;
            var pos = hashCode % _buckets.Length;
            if (_buckets[pos] == null)
                _buckets[pos] = new Bucket();
            _buckets[pos].Add(item);
            Count++;
            return true;
        });
    }
    public T[] ToArray()
    {
        var newArray = new T[Count];
        using (var en = GetEnumerator())
        {
            var ptr = 0;
            while (en.MoveNext())
            {
                var value = en.Current;
                if (value == null)
                    break;
                newArray[ptr++] = value;
            }
            return newArray;
        }
    }
    private (long APos, long BPos) FindEntry(T item, long hashCode)
    {
        if (Count == 0)
            return (-1, -1);
        if (hashCode == 0)
        {
            var a = 0;
        }
        var aPos = hashCode % _buckets.Length;
        var bPos = 0;
        if (_buckets[aPos] == null)
        {
            _buckets[aPos] = new Bucket();
            return (-1, -1);
        }
        foreach (var i in _buckets[aPos].Values)
        {
            if (Comparer.Equals(i, item))
                return (aPos, bPos);
            bPos++;
        }
        return (-1, -1);
    }
    private void EnsureSize()
    {
        if (Count >= _buckets.Length)
        {
            var cArray = ToArray();
            _buckets = new BigArray<Bucket>(_buckets.Length + BigArray<T>.Granularity);
            foreach (var i in cArray)
            {
                var hashCode = Comparer.GetHashCode(i) & long.MaxValue;
                var pos      = hashCode % _buckets.Length;
                if (_buckets[pos] == null)
                    _buckets[pos] = new Bucket();
                _buckets[pos].Add(i);
            }
        }
    }
    public bool Contains(T item)
    {
        return Lock(this, () =>
        {
            var hashCode = Comparer.GetHashCode(item) & long.MaxValue;
            return FindEntry(item, hashCode).APos != -1;
        });
    }
    public IEnumerator<T> GetEnumerator()
    {
        return Lock(this, () =>
        {
            return GetEnum();
        });
    }
    public IEnumerator<T> GetEnum()
    {
        for (var i = 0; i < _buckets.Length; i++)
            if (_buckets[i] != null)
                for (var j = 0; j < _buckets[i].Count; ++j)
                    yield return _buckets[i].Values[j];
    }
    public long GetElementCount()
    {
        var count = 0;
        for (var i = 0; i < _buckets.Length; i++)
            if (_buckets[i] != null)
            {
                var c = _buckets[i].Count;
                count += c;
            }
        return count;
    }
    public long GetNumberOfEmptyBuckets()
    {
        var count = 0;
        for (var i = 0; i < _buckets.Length; i++)
            if (_buckets[i] == null)
                count++;
        return count;
    }
    public long GetNumberOfFilledBuckets()
    {
        var count = 0;
        for (var i = 0; i < _buckets.Length; i++)
            if (_buckets[i] != null)
                count++;
        return count;
    }
    public (long mDepth, long index) GetMaximumBucketDepth()
    {
        var max = 0;
        var j   = 0;
        for (var i = 0; i < _buckets.Length; i++)
            if (_buckets[i] != null)
            {
                var count = _buckets[i].Count;
                if (count > max)
                {
                    max = count;
                    j   = i;
                }
            }
        return (max, j);
    }
    public KeyValuePair<long, long>[] GetBucketDepthList()
    {
        var bdic = new Dictionary<long, long>();
        for (var i = 0; i < _buckets.Length; i++)
            if (_buckets[i] != null)
            {
                var count = _buckets[i].Count;
                if (!bdic.ContainsKey(count))
                {
                    bdic.Add(count, 0);
                    bdic[count]++;
                }
                else
                {
                    bdic[count]++;
                }
            }
        return bdic.OrderByDescending(x => x.Value).ToArray();
    }
    public float GetLoadRatio()
    {
        var x = Count;
        var y = _buckets.Length;
        var r = x / (float) y;
        return r;
    }
    internal class Bucket
    {
        public int Count;
        public T[] Values;
        public Bucket()
        {
            Values = new T[2];
            Count  = 0;
        }
        public void Add(T item)
        {
            if (Count >= Values.Length)
            {
                var ta = new T[Values.Length + 1];
                Array.Copy(Values, 0, ta, 0, Count);
                Values = ta;
            }
            Values[Count++] = item;
        }
    }
}

BigHashSet.cs

Big Hash Set based on BigArray.cs

Uses two arrays, for a single array use BigHashsetSa.cs

using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.Serialization;
using System.Threading;
[Serializable]
[DebuggerDisplay("Count = {Count}")]
[Obsolete]
public class BigHashSet<T> : MonitorActionFunc, IEnumerable<T>, ISerializable, IDeserializationCallback
{
    private readonly SerializationInfo sInfo;
    public volatile  Table             _table = new Table();
    public BigHashSet() : this(BigArray<T>.Granularity, new BigComparer<T>())
    {
    }
    public BigHashSet(long size) : this(size, new BigComparer<T>())
    {
    }
    public BigHashSet(IBigEqualityComparer<T> comparer) : this(BigArray<T>.Granularity, comparer)
    {
    }
    public BigHashSet(long size, IBigEqualityComparer<T> comparer)
    {
        Lock(this, () =>
        {
            if (size < BigArray<T>.Granularity)
                size = BigArray<T>.Granularity;
            if (comparer == null)
                comparer = new DynComparer64<T>();
            _table.Comparer                            = comparer;
            _table.HashBuckets                         = new BigArray<long>(size);
            _table.Slots                               = new BigArray<Slot>(size);
            _table._count                              = 0;
            _table.Position                            = -1;
            _table.HashBuckets.OverrideAutoConcurrency = true;
            _table.Slots.OverrideAutoConcurrency       = true;
        });
    }
    public BigHashSet(IEnumerable<T> collection)
    {
        Lock(this, () =>
        {
            var size = BigArray<T>.Granularity;
            _table.Comparer                            = new DynComparer64<T>();
            _table.HashBuckets                         = new BigArray<long>(size);
            _table.Slots                               = new BigArray<Slot>(size);
            _table._count                              = 0;
            _table.Position                            = -1;
            _table.HashBuckets.OverrideAutoConcurrency = true;
            _table.Slots.OverrideAutoConcurrency       = true;
            foreach (var item in collection)
                Insert(item, true);
        });
    }
    public BigHashSet(IEnumerable<T> collection, IBigEqualityComparer<T> comparer)
    {
        Lock(this, () =>
        {
            if (comparer == null)
                comparer = new DynComparer64<T>();
            var size = BigArray<T>.Granularity;
            _table.Comparer                            = comparer;
            _table.Comparer                            = new DynComparer64<T>();
            _table.HashBuckets                         = new BigArray<long>(size);
            _table.Slots                               = new BigArray<Slot>(size);
            _table._count                              = 0;
            _table.Position                            = -1;
            _table.HashBuckets.OverrideAutoConcurrency = true;
            _table.Slots.OverrideAutoConcurrency       = true;
            foreach (var item in collection)
                Insert(item, true);
        });
    }
    protected BigHashSet(SerializationInfo info, StreamingContext context)
    {
        sInfo = info;
    }
    public long Count
    {
        get
        {
            return Lock(this, () =>
            {
                return _table._count;
            });
        }
        private set
        {
            Lock(this, () =>
            {
                _table._count = value;
            });
        }
    }
    public T this[T item]
    {
        get
        {
            return Lock(this, () =>
            {
                var pos = FindEntry(item);
                if (pos == -1)
                    throw new Exception($"Getter: Index out of bounds {pos} must be contained within set.");
                return _table.Slots[pos]._value;
            });
        }
        set => Insert(item, true);
    }
    public T this[long index]
    {
        get
        {
            return Lock(this, () =>
            {
                if (index > _table._count || _table._count == 0)
                    SpinWait.SpinUntil(() => index < _table._count && _table._count > 0, 100);
                if (index > _table._count)
                    throw new Exception($"Getter: Index out of bounds {index} must be less than {_table._count}");
                return _table.Slots[index]._value;
            });
        }
    }
    public void OnDeserialization(object sender)
    {
        Lock(this, () =>
        {
            if (sInfo == null)
                return;
            var size = sInfo.GetInt64("Capacity");
            if (size != 0)
            {
                Clear();
                _table.HashBuckets = new BigArray<long>(size);
                _table.Slots       = new BigArray<Slot>(size);
                _table.Comparer    = (IBigEqualityComparer<T>) sInfo.GetValue("Comparer", typeof(IBigEqualityComparer<T>));
                _table._count      = sInfo.GetInt64("Count");
                _table.Position    = -1;
                var array = (Slot[][]) sInfo.GetValue("Elements", typeof(Slot[][]));
                if (array == null)
                    throw new SerializationException("Missing Elements.");
                var buckets = (long[][]) sInfo.GetValue("Buckets", typeof(long[][]));
                if (buckets == null)
                    throw new SerializationException("Missing Buckets.");
                _table.Slots.FromArray(array);
                _table.HashBuckets.FromArray(buckets);
            }
        });
    }
    IEnumerator<T> IEnumerable<T>.GetEnumerator()
    {
        return GetEnumerator();
    }
    IEnumerator IEnumerable.GetEnumerator()
    {
        return GetEnumerator();
    }
    public void GetObjectData(SerializationInfo info, StreamingContext context)
    {
        Lock(this, () =>
        {
            info.AddValue("Comparer", _table.Comparer, typeof(IBigEqualityComparer<T>));
            info.AddValue("Capacity", _table.HashBuckets.Length);
            info.AddValue("Count",    _table._count);
            var array = _table.Slots.ToArray();
            info.AddValue("Elements", array, typeof(BigArray<Slot>));
            var buck = _table.HashBuckets.ToArray();
            info.AddValue("Buckets", buck, typeof(BigArray<long>));
        });
    }
    public void Clear()
    {
        Lock(this, () =>
        {
            var size = BigArray<T>.Granularity;
            _table.HashBuckets = new BigArray<long>(size);
            _table.Slots       = new BigArray<Slot>(size);
            _table._count      = 0;
            _table.Position    = -1;
        });
    }
    public bool Add(T item)
    {
        return Insert(item, true);
    }
    public void AddRange(IEnumerable<T> collection)
    {
        Lock(this, () =>
        {
            foreach (var item in collection)
                Insert(item, true);
        });
    }
    public bool Contains(T item)
    {
        return Insert(item, false);
    }
    private long InternalGetHashCode(T item)
    {
        if (item == null)
            return 0;
        return _table.Comparer.GetHashCode(item) & long.MaxValue;
    }
    internal bool Insert(T item, bool add)
    {
        return Lock(this, () =>
        {
            var hashCode = InternalGetHashCode(item);
            if (FindEntry(item, hashCode) != -1)
                return true;
            _table.Position = -1;
            if (add)
            {
                if (_table._count >= _table.Slots.Length)
                {
                    var newSize        = _table.HashBuckets.Length << 1;
                    var newHashBuckets = new BigArray<long>(newSize);
                    var newSlots       = new BigArray<Slot>(newSize);
                    for (var i = 0L; i < _table._count; i++)
                    {
                        var pos = _table.Slots[i]._hashCode % newSize;
                        newSlots[i]         = new Slot(_table.Slots[i]._hashCode, newHashBuckets[pos] - 1, _table.Slots[i]._value);
                        newHashBuckets[pos] = i + 1;
                    }
                    _table.HashBuckets = newHashBuckets;
                    _table.Slots       = newSlots;
                }
                var hashPos = hashCode % _table.HashBuckets.Length;
                var news    = new Slot(hashCode, _table.HashBuckets[hashPos] - 1, item);
                _table.Slots[_table._count] = news;
                _table.HashBuckets[hashPos] = _table._count + 1;
                _table.Position             = _table._count;
                _table._count++;
            }
            return false;
        });
    }
    private long FindEntry(T item, long hashCode)
    {
        return Lock(this, () =>
        {
            for (var position = _table.HashBuckets[hashCode % _table.HashBuckets.Length] - 1; position >= 0; position = _table.Slots[position]._next)
                if (_table.Slots[position]._hashCode == hashCode && Equals(_table.Slots[position]._value, item))
                {
                    _table.Position = position;
                    return _table.Position;
                }
            return -1;
        });
    }
    public T[] ToArray()
    {
        return Lock(this, () =>
        {
            var array = new T[Count];
            for (var i = 0L; i < Count; i++)
                if (_table.Slots[i]._hashCode >= 0)
                    array[i] = _table.Slots[i]._value;
            return array;
        });
    }
    public long FindEntry(T item)
    {
        return FindEntry(item, InternalGetHashCode(item));
    }
    public bool Remove(T item)
    {
        return Lock(this, () =>
        {
            var hashCode = _table.Comparer.GetHashCode(item) & long.MaxValue;
            for (var position = _table.HashBuckets[hashCode % _table.HashBuckets.Length] - 1; position >= 0; position = _table.Slots[position]._next)
                if (_table.Slots[position]._hashCode == hashCode && Equals(_table.Slots[position]._value, item))
                {
                    var hashPos = hashCode % _table.HashBuckets.Length;
                    var news    = new Slot(0, -1, default);
                    _table.Slots[position]      = news;
                    _table.HashBuckets[hashPos] = -1;
                    return true;
                }
            return false;
        });
    }
    public IEnumerator<T> GetEnumerator()
    {
        return Lock(this, () =>
        {
            return GetEnum();
        });
    }
    public IEnumerator<T> GetEnum()
    {
        for (var i = 0; i < Count; i++)
            if (_table.Slots[i]._hashCode >= 0)
                yield return _table.Slots[i]._value;
    }
    public class Table
    {
        public            long                    _count;
        internal          IBigEqualityComparer<T> Comparer;
        internal volatile BigArray<long>          HashBuckets;
        public            long                    Position;
        internal volatile BigArray<Slot>          Slots;
    }
    [Serializable]
    internal struct Slot
    {
        public readonly long _hashCode;
        public readonly long _next;
        public readonly T    _value;
        public Slot(long hashcode, long next, T value)
        {
            _hashCode = hashcode;
            _next     = next;
            _value    = value;
        }
    }
}