BoyerMoore.cs

Boyer Moore Search Algorithm

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
public class BoyerMoore
{
    private int[]  _jumpTable;
    private byte[] _pattern;
    private int    _patternLength;
    public BoyerMoore()
    {
    }
    public BoyerMoore(byte[] pattern)
    {
        SetPattern(pattern);
    }
    public void SetPattern(byte[] pattern)
    {
        _pattern       = pattern;
        _jumpTable     = new int[256];
        _patternLength = _pattern.Length;
        for (var index = 0; index < 256; index++)
            _jumpTable[index] = _patternLength;
        for (var index = 0; index < _patternLength - 1; index++)
            _jumpTable[_pattern[index]] = _patternLength - index - 1;
    }
    public unsafe int Search(byte[] searchArray, int startIndex = 0)
    {
        if (_pattern == null)
            throw new Exception("Pattern has not been set.");
        if (_patternLength > searchArray.Length)
            throw new Exception("Search Pattern length exceeds search array length.");
        var index                 = startIndex;
        var limit                 = searchArray.Length - _patternLength;
        var patternLengthMinusOne = _patternLength     - 1;
        var Moves                 = 0;
        fixed (byte* pointerToByteArray = searchArray)
        {
            var pointerToByteArrayStartingIndex = pointerToByteArray + startIndex;
            fixed (byte* pointerToPattern = _pattern)
            {
                while (index <= limit)
                {
                    var j = patternLengthMinusOne;
                    while (j >= 0 && pointerToPattern[j] == pointerToByteArrayStartingIndex[index + j])
                        j--;
                    if (j < 0)
                        return index;
                    index += Math.Max(_jumpTable[pointerToByteArrayStartingIndex[index + j]] - _patternLength + 1 + j,
                        1);
                    Moves++;
                }
            }
        }
        return -1;
    }
    public unsafe (List<int>, int) SearchAll(byte[] searchArray, int startIndex = 0)
    {
        if (_pattern == null)
            throw new Exception("Pattern has not been set.");
        if (_patternLength > searchArray.Length)
            throw new Exception("Search Pattern length exceeds search array length.");
        var index                 = startIndex;
        var limit                 = searchArray.Length - _patternLength;
        var patternLengthMinusOne = _patternLength     - 1;
        var list                  = new List<int>();
        var Moves                 = 0;
        fixed (byte* pointerToByteArray = searchArray)
        {
            var pointerToByteArrayStartingIndex = pointerToByteArray + startIndex;
            fixed (byte* pointerToPattern = _pattern)
            {
                while (index <= limit)
                {
                    var j = patternLengthMinusOne;
                    while (j >= 0 && pointerToPattern[j] == pointerToByteArrayStartingIndex[index + j])
                        j--;
                    if (j < 0)
                        list.Add(index);
                    index += Math.Max(_jumpTable[pointerToByteArrayStartingIndex[index + j]] - _patternLength + 1 + j,
                        1);
                    Moves++;
                }
            }
        }
        return (list, Moves);
    }
    public int SuperSearch(byte[] searchArray, int nth, int start = 0)
    {
        var e = start;
        var c = 0;
        do
        {
            e = Search(searchArray, e);
            if (e == -1)
                return -1;
            c++;
            e++;
        } while (c < nth);
        return e - 1;
    }
    public static bool ContainsAny(byte[] sPattern, params byte[][] list)
    {
        return list.Select(v => new BoyerMoore(v)).Any(bm => bm.Search(sPattern) != -1);
    }
    public static bool ContainsAny(string sPattern, params string[] list)
    {
        return list.Select(s => new BoyerMoore(s.ToLower().GetBytes(Encoding.ASCII)))
            .Any(bm => bm.Search(sPattern.ToLower().GetBytes(Encoding.ASCII)) != -1);
    }
    }

Leave a Reply

Your email address will not be published. Required fields are marked *