Skip to content

VarInt Integer Serialization Format

Variable-length integers are a MSB base-128 encoding of the number. The high bit in each byte signifies whether another digit follows. To make sure the encoding is one-to-one, one is subtracted from all but the last digit.

Thus, the byte sequence a[] with length len, where all but the last byte has bit 128 set, encodes the number:

(a[len-1] & 0x7F) + sum(i=1..len-1, 128^i*((a[len-i-1] & 0x7F)+1))

Properties: * Very small (0-127: 1 byte, 128-16511: 2 bytes, 16512-2113663: 3 bytes) * Every integer has exactly one encoding * Encoding does not depend on size of original integer type * No redundancy: every (infinite) byte sequence corresponds to a list of encoded integers.

Test Vectors

  • 0: [0x00]
  • 1: [0x01]
  • 127: [0x7F]
  • 128: [0x80 0x00]
  • 255: [0x80 0x7F]
  • 256: [0x81 0x00]
  • 16383: [0xFE 0x7F]
  • 16384: [0xFF 0x00]
  • 16511: [0xFF 0x7F]
  • 65535: [0x82 0xFE 0x7F]
  • 2^32: [0x8E 0xFE 0xFE 0xFF 0x00]

C++ Code

template <typename Stream, VarIntMode Mode, typename I>
void WriteVarInt(Stream &os, I n)
{
    CheckVarIntMode<Mode, I>();
    unsigned char tmp[(sizeof(n) * 8 + 6) / 7];
    int len = 0;
    while (true)
    {
        tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00);
        if (n <= 0x7F)
            break;
        n = (n >> 7) - 1;
        len++;
    }
    do
    {
        ser_writedata8(os, tmp[len]);
    } while (len--);
}

template <typename Stream, VarIntMode Mode, typename I>
I ReadVarInt(Stream &is)
{
    CheckVarIntMode<Mode, I>();
    I n = 0;
    while (true)
    {
        unsigned char chData = ser_readdata8(is);
        if (n > (std::numeric_limits<I>::max() >> 7))
        {
            throw std::ios_base::failure("ReadVarInt(): size too large");
        }
        n = (n << 7) | (chData & 0x7F);
        if (chData & 0x80)
        {
            if (n == std::numeric_limits<I>::max())
            {
                throw std::ios_base::failure("ReadVarInt(): size too large");
            }
            n++;
        }
        else
        {
            return n;
        }
    }
}

Kotlin Code

fun varint(num: Long, format: SerializationType = SerializationType.UNKNOWN): NEXAserialized
{
    var tmp = ByteArray((8*8 + 6)/7)
    var n = num
    var pos:Int = 0
    while (true)
    {
        tmp[pos] = ((n and 0x7F) or ( if (pos != 0) 0x80L else 0L)).toByte()
        if (n <= 0x7F) break
        n = (n shr 7) - 1
        pos += 1
    }
    // Now write it backwards
    val tmp1:ByteArray = tmp.slice(pos downTo 0).toByteArray()
    return NEXAserialized(tmp1, format)
}

fun devarint(): Long
{
    var ret = 0L
    while (true)
    {
        var chData: Long = deuint8().toLong()
        ret = (ret shl 7) or (chData and 0x7f)
        if (chData >= 0x80)
        {
            ret++;
        }
        else
        {
            return ret;
        }
    }
}