BucketHashSet3.cs

Single Array HashSet Class

Uses only a single array for tracking and storage.

Updated: July-11,2021

using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
[DebuggerDisplay("Count = {Count}")]
public class BucketHashSet3<T> : IEnumerable
{
    private  Bucket[]             _buckets;
    internal IEqualityComparer<T> Comparer;
    private  int                  HiBucketCount;
    public BucketHashSet3() : this(1_000_000, 10, null)
    {
    }
    public BucketHashSet3(int sizeHint, int sizeHintBucket) : this(sizeHint, sizeHintBucket, null)
    {
    }
    public BucketHashSet3(int sizeHint, int sizeHintBucket, IEqualityComparer<T> comparer)
    {
        if (comparer == null)
            comparer = EqualityComparer<T>.Default;
        Comparer      = comparer;
        _buckets      = new Bucket[sizeHint];
        Count         = 0;
        HiBucketCount = 0;
        for (var i = 0; i < _buckets.Length; ++i)
            _buckets[i].Allocate(sizeHintBucket);
    }
    public int Count
    {
        get;
        private set;
    }
    IEnumerator IEnumerable.GetEnumerator()
    {
        return GetEnumerator();
    }
    public void Zero()
    {
        _buckets = null;
        Count    = 0;
    }
    public bool Add(T item)
    {
        var hashCode = Comparer.GetHashCode(item) & int.MaxValue;
        var apos     = FindEntry(item, hashCode);
        if (apos != -1)
            return false;
        var pos = hashCode % _buckets.Length;
        _buckets[pos].Add(item);
        Count++;
        return true;
    }
    public T[] ToArray()
    {
        var newArray = new T[Count];
        using (var en = GetEnumerator())
        {
            var ptr = 0;
            while (en.MoveNext())
            {
                var value = en.Current;
                if (value == null)
                    break;
                newArray[ptr++] = value;
            }
            return newArray;
        }
    }
    private int FindEntry(T item, int hashCode)
    {
        if (Count == 0)
            return -1;
        var Pos = hashCode % _buckets.Length;
        if (_buckets[Pos].Count == 0)
            return -1;
        var cnt = _buckets[Pos].Count;
        if (cnt > HiBucketCount)
            HiBucketCount = cnt;
        if (HiBucketCount >= 100)
        {
            Resize();
            HiBucketCount = 0;
            return FindEntry(item, hashCode);
        }
        for (var i = 0; i < cnt; ++i)
        {
            var v = _buckets[Pos].Values[i];
            if (Comparer.Equals(v, item))
                return Pos;
        }
        return -1;
    }
    private void Resize()
    {
        var cArray  = ToArray();
        var newSize = _buckets.Length << 1;
        _buckets = new Bucket[newSize];
        for (var i = 0; i < newSize; ++i)
            _buckets[i].Allocate();
        foreach (var i in cArray)
            _buckets[BucketPosition(i)].Add(i);
    }
    private int BucketPosition(T item)
    {
        var hashCode = Comparer.GetHashCode(item) & int.MaxValue;
        var pos      = hashCode % _buckets.Length;
        return pos;
    }
    public bool Contains(T item)
    {
        var hashCode = Comparer.GetHashCode(item) & int.MaxValue;
        return FindEntry(item, hashCode) != -1;
    }
    public IEnumerator<T> GetEnumerator()
    {
        return GetEnum();
    }
    public IEnumerator<T> GetEnum()
    {
        for (var i = 0; i < _buckets.Length; i++)
        for (var j = 0; j < _buckets[i].Count; ++j)
            yield return _buckets[i].Values[j];
    }
    internal struct Bucket
    {
        public int Count;
        public T[] Values;
        public void Add(T item)
        {
            if (Count >= Values.Length)
                Array.Resize(ref Values, Values.Length << 1);
            Values[Count++] = item;
        }
        public void Allocate(int size = 10)
        {
            Values = new T[size];
        }
    }
}