arithmetic encoding algorithm in csharp

Arithmetic encoding is a data compression technique that compresses input data into a smaller size by using probabilities of symbols rather than a fixed-length code that appears in classical entropy encoding algorithms. Here is a sample implementation of the Arithmetic Encoding Algorithm in C#:

main.cs
using System;
using System.Collections.Generic;

namespace ArithmeticCoding
{
    public class ArithmeticEncoder
    {
        private const int NumBits = 32;
        private uint low;
        private uint high;
        private uint pending_bits;
        private int num_pending_bits;
        private int[] cumulative_frequency;
        private uint total_symbols;

        public ArithmeticEncoder()
        {
            low = 0;
            high = uint.MaxValue;
            pending_bits = 0;
            num_pending_bits = 0;
            cumulative_frequency = new int[257];
            total_symbols = 0;
        }

        private void shift()
        {
            uint bit = 0;
            if ((low & 0x80000000) == (high & 0x80000000))
            {
                return;
            }
            if (low >= 0x80000000)
            {
                pending_bits |= 1u;
            }
            else if (high < 0x80000000)
            {
                bit = 1;
                pending_bits |= 1u;
            }
            else
            {
                num_pending_bits += 1;
                pending_bits <<= 1;
            }
            low <<= 1;
            high <<= 1;
            high |= bit;
        }

        private void output_bit(bool bit, List<bool> output)
        {
            pending_bits <<= 1;
            if (bit)
            {
                pending_bits |= 1u;
            }
            num_pending_bits += 1;
            if (num_pending_bits == NumBits)
            {
                output.Add((pending_bits >> 24 & 0xFF) != 0);
                output.Add((pending_bits >> 16 & 0xFF) != 0);
                output.Add((pending_bits >> 8 & 0xFF) != 0);
                output.Add((pending_bits & 0xFF) != 0);
                num_pending_bits = 0;
                pending_bits = 0;
            }
        }

        public List<bool> Encode(byte[] input)
        {
            List<bool> output = new List<bool>();
            var frequency = new int[257];
            foreach (var b in input)
            {
                frequency[b]++;
                total_symbols++;
            }
            cumulative_frequency[0] = 0;
            for (int i = 0; i < 256; i++)
            {
                cumulative_frequency[i + 1] = cumulative_frequency[i] + frequency[i];
            }
            for (int i = 0; i < input.Length; i++)
            {
                var symbol = input[i];
                var range = (ulong)(high - low) + 1;
                high = low + (uint)(range * cumulative_frequency[symbol + 1] / total_symbols - 1);
                low = low + (uint)(range * cumulative_frequency[symbol] / total_symbols);
                while (true)
                {
                    if ((low & 0x80000000) == (high & 0x80000000))
                    {
                        output_bit((high & 0x80000000) != 0, output);
                        while (num_pending_bits > 0)
                        {
                            output_bit((high & 0x80000000) == 0, output);
                            num_pending_bits--;
                        }
                    }
                    else if ((low & 0x40000000) != 0 && (high & 0x40000000) == 0)
                    {
                        num_pending_bits++;
                        low &= 0x3fffffff;
                        high |= 0x40000000;
                    }
                    else
                    {
                        break;
                    }
                    low <<= 1;
                    high <<= 1;
                    high |= 1;
                }
            }
            output_bit((low & (1u << 31)) != 0, output);
            while (num_pending_bits > 0)
            {
                output_bit((low & (1u << 31)) == 0, output);
                num_pending_bits--;
            }
            return output;
        }
    }
}
3876 chars
127 lines

This C# implementation of the Arithmetic Encoding Algorithm can be used to compress any byte data, but it also requires a decompressor that can faithfully reconstruct the original byte data from the compressed output.

gistlibby LogSnag