VarInt Integer Serialization Format
Variable-length integers are a MSB base-128 encoding of the number. The high bit in each byte signifies whether another digit follows. To make sure the encoding is one-to-one, one is subtracted from all but the last digit.
Thus, the byte sequence a[] with length len, where all but the last byte has bit 128 set, encodes the number:
(a[len-1] & 0x7F) + sum(i=1..len-1, 128^i*((a[len-i-1] & 0x7F)+1))
Properties: * Very small (0-127: 1 byte, 128-16511: 2 bytes, 16512-2113663: 3 bytes) * Every integer has exactly one encoding * Encoding does not depend on size of original integer type * No redundancy: every (infinite) byte sequence corresponds to a list of encoded integers.
Test Vectors
- 0: [0x00]
- 1: [0x01]
- 127: [0x7F]
- 128: [0x80 0x00]
- 255: [0x80 0x7F]
- 256: [0x81 0x00]
- 16383: [0xFE 0x7F]
- 16384: [0xFF 0x00]
- 16511: [0xFF 0x7F]
- 65535: [0x82 0xFE 0x7F]
- 2^32: [0x8E 0xFE 0xFE 0xFF 0x00]
C++ Code
template <typename Stream, VarIntMode Mode, typename I>
void WriteVarInt(Stream &os, I n)
{
CheckVarIntMode<Mode, I>();
unsigned char tmp[(sizeof(n) * 8 + 6) / 7];
int len = 0;
while (true)
{
tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00);
if (n <= 0x7F)
break;
n = (n >> 7) - 1;
len++;
}
do
{
ser_writedata8(os, tmp[len]);
} while (len--);
}
template <typename Stream, VarIntMode Mode, typename I>
I ReadVarInt(Stream &is)
{
CheckVarIntMode<Mode, I>();
I n = 0;
while (true)
{
unsigned char chData = ser_readdata8(is);
if (n > (std::numeric_limits<I>::max() >> 7))
{
throw std::ios_base::failure("ReadVarInt(): size too large");
}
n = (n << 7) | (chData & 0x7F);
if (chData & 0x80)
{
if (n == std::numeric_limits<I>::max())
{
throw std::ios_base::failure("ReadVarInt(): size too large");
}
n++;
}
else
{
return n;
}
}
}
Kotlin Code
fun varint(num: Long, format: SerializationType = SerializationType.UNKNOWN): NEXAserialized
{
var tmp = ByteArray((8*8 + 6)/7)
var n = num
var pos:Int = 0
while (true)
{
tmp[pos] = ((n and 0x7F) or ( if (pos != 0) 0x80L else 0L)).toByte()
if (n <= 0x7F) break
n = (n shr 7) - 1
pos += 1
}
// Now write it backwards
val tmp1:ByteArray = tmp.slice(pos downTo 0).toByteArray()
return NEXAserialized(tmp1, format)
}
fun devarint(): Long
{
var ret = 0L
while (true)
{
var chData: Long = deuint8().toLong()
ret = (ret shl 7) or (chData and 0x7f)
if (chData >= 0x80)
{
ret++;
}
else
{
return ret;
}
}
}