302 lines
7.9 KiB
Nim
302 lines
7.9 KiB
Nim
|
import std/hashes, std/streams, std/strutils, std/bitops, std/unicode, std/endians
|
||
|
import base58/bitcoin, cbor, siphash
|
||
|
import ./blobsets/priv/hex
|
||
|
|
||
|
import nimcrypto, nimcrypto/blake2
|
||
|
|
||
|
const
|
||
|
digestLen* = 32
|
||
|
## Length of a chunk digest.
|
||
|
cidSize* = digestLen
|
||
|
## Size of CID object in memory
|
||
|
blobLeafSize* = 1 shl 14
|
||
|
## Size of blob leaves.
|
||
|
blobLeafSizeMask* = not(not(0) shl 14)
|
||
|
visualLen = 32 * 3
|
||
|
|
||
|
maxChunkSize* {.deprecated} = blobLeafSize
|
||
|
|
||
|
type
|
||
|
Blake2b256* = Blake2bContext[256]
|
||
|
|
||
|
BlobId* = MDigest[Blake2b256.bits]
|
||
|
## Blob Identifier
|
||
|
SetId* = MDigest[Blake2b256.bits]
|
||
|
## Set Identifier
|
||
|
|
||
|
Cid* {.deprecated} = BlobId
|
||
|
|
||
|
func `$`*(bh: BlobId): string =
|
||
|
## Convert a blob hash to a visual representation.
|
||
|
const baseRune = 0x2800
|
||
|
result = newString(visualLen)
|
||
|
var pos = 0
|
||
|
for b in bh.data.items:
|
||
|
let r = (Rune)baseRune or b.int
|
||
|
fastToUTF8Copy(r, result, pos, true)
|
||
|
|
||
|
func toBlobId*(s: string): BlobId =
|
||
|
## Parse a visual blob hash to binary.
|
||
|
if s.len == visualLen:
|
||
|
var
|
||
|
pos: int
|
||
|
r: Rune
|
||
|
for b in result.data.mitems:
|
||
|
fastRuneAt(s, pos, r, true)
|
||
|
b = r.byte
|
||
|
|
||
|
proc `==`*(x, y: BlobId): bool = x.data == y.data
|
||
|
## Compare two BlobIds.
|
||
|
|
||
|
proc `==`*(cbor: CborNode; cid: BlobId): bool =
|
||
|
## Compare a CBOR node with a BlobId.
|
||
|
if cbor.kind == cborBytes:
|
||
|
for i in 0..<digestLen:
|
||
|
if cid.data[i] != cbor.bytes[i].uint8:
|
||
|
return false
|
||
|
result = true
|
||
|
|
||
|
proc hash*(cid: BlobId): Hash =
|
||
|
## Reduce a BlobId into an integer for use in tables.
|
||
|
var zeroKey: Key
|
||
|
result = cast[Hash](sipHash(cid.data, zeroKey))
|
||
|
|
||
|
proc toCbor*(cid: BlobId): CborNode = newCborBytes cid.data
|
||
|
## Generate a CBOR representation of a BlobId.
|
||
|
|
||
|
proc toBlobId*(cbor: CborNode): BlobId =
|
||
|
## Generate a CBOR representation of a BlobId.
|
||
|
assert(cbor.bytes.len == digestLen)
|
||
|
for i in 0..<digestLen:
|
||
|
result.data[i] = cbor.bytes[i].uint8
|
||
|
|
||
|
{.deprecated: [newCborBytes: toCbor].}
|
||
|
|
||
|
proc toHex*(cid: BlobId): string = hex.encode(cid.data)
|
||
|
## Return BlobId encoded in hexidecimal.
|
||
|
|
||
|
proc writeUvarint*(s: Stream; n: SomeInteger) =
|
||
|
## Write an IPFS varint
|
||
|
var n = n
|
||
|
while true:
|
||
|
let c = int8(n and 0x7f)
|
||
|
n = n shr 7
|
||
|
if n == 0:
|
||
|
s.write((char)c.char)
|
||
|
break
|
||
|
else:
|
||
|
s.write((char)c or 0x80)
|
||
|
|
||
|
proc readUvarint*(s: Stream): BiggestInt =
|
||
|
## Read an IPFS varint
|
||
|
var shift: int
|
||
|
while shift < (9*8):
|
||
|
let c = (BiggestInt)s.readChar
|
||
|
result = result or ((c and 0x7f) shl shift)
|
||
|
if (c and 0x80) == 0:
|
||
|
break
|
||
|
shift.inc 7
|
||
|
|
||
|
proc toIpfs*(cid: BlobId): string =
|
||
|
## Return BlobId encoded in IPFS multimulti.
|
||
|
const
|
||
|
multiRaw = 0x55
|
||
|
multiBlake2b_256 = 0xb220
|
||
|
let s = newStringStream()
|
||
|
s.writeUvarint 1
|
||
|
s.writeUvarint multiRaw
|
||
|
s.writeUvarint multi_blake2b_256
|
||
|
s.writeUvarint digestLen
|
||
|
for e in cid.data:
|
||
|
s.write e
|
||
|
s.setPosition 0
|
||
|
result = 'z' & bitcoin.encode(s.readAll)
|
||
|
close s
|
||
|
|
||
|
const
|
||
|
zeroChunk* = "8ddb61928ec76e4ee904cd79ed977ab6f5d9187f1102975060a6ba6ce10e5481".toDigest
|
||
|
## BlobId of zero chunk of maximum size.
|
||
|
|
||
|
proc take*(cid: var BlobId; buf: var string) =
|
||
|
## Take a raw digest from a string buffer.
|
||
|
doAssert(buf.len == digestLen)
|
||
|
copyMem(cid.data[0].addr, buf[0].addr, digestLen)
|
||
|
|
||
|
proc dagHash*(buf: pointer; len: Natural): BlobId =
|
||
|
## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.
|
||
|
assert(len <= maxChunkSize)
|
||
|
var b: Blake2b256
|
||
|
init(b)
|
||
|
update(b, buf, len)
|
||
|
b.finish()
|
||
|
|
||
|
proc dagHash*(data: string): BlobId =
|
||
|
## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.
|
||
|
assert(data.len <= maxChunkSize)
|
||
|
var b: Blake2b256
|
||
|
init(b)
|
||
|
update(b, data)
|
||
|
b.finish()
|
||
|
|
||
|
proc verify*(cid: BlobId; data: string): bool =
|
||
|
## Verify that a string of data corresponds to a BlobId.
|
||
|
var b: Blake2b256
|
||
|
init(b)
|
||
|
update(b, data)
|
||
|
finish(b) == cid
|
||
|
|
||
|
iterator simpleChunks*(s: Stream; size = maxChunkSize): string =
|
||
|
## Iterator that breaks a stream into simple chunks.
|
||
|
doAssert(size <= maxChunkSize)
|
||
|
var tmp = newString(size)
|
||
|
while not s.atEnd:
|
||
|
tmp.setLen(size)
|
||
|
tmp.setLen(s.readData(tmp[0].addr, size))
|
||
|
yield tmp
|
||
|
|
||
|
func isNonZero*(bh: BlobId): bool =
|
||
|
## Test if a blob hash is not zeroed.
|
||
|
var r: byte
|
||
|
for b in bh.data.items:
|
||
|
{.unroll.}
|
||
|
r = r or b
|
||
|
r != 0
|
||
|
|
||
|
{.deprecated: [isValid: isNonZero].}
|
||
|
|
||
|
type
|
||
|
Key = int64
|
||
|
|
||
|
const
|
||
|
keyBits = sizeof(Key) shl 3
|
||
|
keyChunkBits = fastLog2 keyBits
|
||
|
keyChunkMask = not ((not 0.Key) shl (keyChunkBits))
|
||
|
|
||
|
func toKey(s: string): Key =
|
||
|
var key: siphash.Key
|
||
|
let b = sipHash(toOpenArrayByte(s, s.low, s.high), key)
|
||
|
cast[Key](b)
|
||
|
|
||
|
func toCbor(k: Key): CborNode =
|
||
|
## Keys are endian independent.
|
||
|
newCborBytes cast[array[sizeof(k), byte]](k)
|
||
|
|
||
|
type
|
||
|
setKind* = enum hotNode, coldNode, leafNode
|
||
|
BlobSet* = ref BlobSetObj
|
||
|
BlobSetObj = object
|
||
|
case kind*: setKind
|
||
|
of hotNode:
|
||
|
bitmap: Key
|
||
|
table*: seq[BlobSet]
|
||
|
of coldNode:
|
||
|
setId*: SetId
|
||
|
of leafNode:
|
||
|
key: Key
|
||
|
blob: BlobId
|
||
|
size: BiggestInt
|
||
|
|
||
|
func newBlobSet*(): BlobSet =
|
||
|
BlobSet(kind: hotNode, table: newSeqOfCap[BlobSet](2))
|
||
|
|
||
|
func sparseIndex(x: Key): int = int(x and keyChunkMask)
|
||
|
|
||
|
func compactIndex(t: BlobSet; x: Key): int =
|
||
|
if (x and keyChunkMask) != 0:
|
||
|
# TODO: bug in shr and shl, cannot shift all bits out
|
||
|
result = (int)countSetBits(t.bitmap shl (keyBits - x.sparseIndex))
|
||
|
|
||
|
func masked(t: BlobSet; x: Key): bool =
|
||
|
((t.bitmap shr x.sparseIndex) and 1) != 0
|
||
|
|
||
|
func nodeCount*(bs: BlobSet): int =
|
||
|
## Count of internal nodes in set.
|
||
|
result = 1
|
||
|
for n in bs.table:
|
||
|
assert(n.kind != coldNode, "cannot count cold nodes")
|
||
|
if n.kind == hotNode:
|
||
|
result.inc n.nodeCount
|
||
|
|
||
|
func leafCount*(bs: BlobSet): int =
|
||
|
## Count of leaves in set.
|
||
|
for n in bs.table:
|
||
|
assert(n.kind != coldNode, "cannot count leaves of cold nodes")
|
||
|
if n.kind == leafNode:
|
||
|
result.inc 1
|
||
|
else:
|
||
|
result.inc n.leafCount
|
||
|
|
||
|
func search*(t: BlobSet; name: string): BlobId =
|
||
|
var
|
||
|
t = t
|
||
|
key = name.toKey
|
||
|
while true:
|
||
|
assert(key != 0, "keyspace exhausted during search")
|
||
|
if t.masked(key):
|
||
|
t = t.table[t.compactIndex(key)]
|
||
|
if t.kind == leafNode:
|
||
|
result = t.blob
|
||
|
break
|
||
|
key = key shr keyChunkBits
|
||
|
else:
|
||
|
raise newException(KeyError, "blob set does not contain key")
|
||
|
|
||
|
func insert(t, l: BlobSet; depth: int) =
|
||
|
## This procedure is recursive to a depth of keyBits/keyChunkBits.
|
||
|
doAssert(depth < (keyBits div keyChunkBits), "key space exhausted during insert")
|
||
|
let key = l.key shr (depth * keyChunkBits)
|
||
|
if t.masked(key):
|
||
|
let
|
||
|
depth = depth + 1
|
||
|
i = t.compactIndex(key)
|
||
|
case t.table[i].kind
|
||
|
of hotNode:
|
||
|
t.table[i].insert(l, depth)
|
||
|
of coldNode:
|
||
|
raiseAssert("cannot insert into cold node")
|
||
|
of leafNode:
|
||
|
if t.table[i].key == l.key:
|
||
|
raise newException(KeyError, "key collision in blob set")
|
||
|
let
|
||
|
subtrei = newBlobSet()
|
||
|
subtrei.insert(t.table[i], depth)
|
||
|
subtrei.insert(l, depth)
|
||
|
t.table[i] = subtrei
|
||
|
else:
|
||
|
t.bitmap = t.bitmap or (Key(1) shl key.sparseIndex)
|
||
|
t.table.insert(l, t.compactIndex(key))
|
||
|
|
||
|
func insert*(t: BlobSet; name: string; blob: BlobId; size: BiggestInt) =
|
||
|
## Insert a blob hash into a trie.
|
||
|
let leaf = BlobSet(kind: leafNode, key: name.toKey, blob: blob, size: size)
|
||
|
insert(t, leaf, 0)
|
||
|
|
||
|
func isEmpty*(s: BlobSet): bool = s.bitmap == Key(0)
|
||
|
## Test if a set is empty.
|
||
|
|
||
|
func toCbor*(x: BlobSet): CborNode =
|
||
|
const
|
||
|
nodeTag = 0
|
||
|
leafTag = 1
|
||
|
let array = newCborArray()
|
||
|
case x.kind
|
||
|
of hotNode:
|
||
|
var
|
||
|
map = x.bitmap
|
||
|
buf = newCborBytes(sizeof(Key))
|
||
|
when not sizeof(Key) == 8:
|
||
|
{.error: "unknown key conversion".}
|
||
|
bigEndian64(buf.bytes[0].addr, map.addr)
|
||
|
array.add buf
|
||
|
for y in x.table:
|
||
|
array.add y.toCbor
|
||
|
newCborTag(nodeTag, array)
|
||
|
of coldNode:
|
||
|
array.add x.setId.data
|
||
|
newCborTag(nodeTag, array)
|
||
|
of leafNode:
|
||
|
array.add x.key.toCbor
|
||
|
array.add x.blob.data
|
||
|
array.add x.size
|
||
|
newCborTag(leafTag, array)
|