717 lines
21 KiB
Nim
717 lines
21 KiB
Nim
import std/asyncdispatch, std/asyncstreams
|
|
import std/hashes, std/streams, std/strutils, std/bitops, std/unicode, std/endians
|
|
import std/streams, std/strutils, std/random
|
|
import cbor, siphash
|
|
|
|
import ./blobsets/priv/hex, ./blobsets/priv/nimcrypto, ./blobsets/priv/nimcrypto/blake2
|
|
|
|
const
|
|
digestLen* = 32
|
|
## Length of a chunk digest.
|
|
cidSize* = digestLen
|
|
## Size of CID object in memory
|
|
blobLeafSize* = 1 shl 14
|
|
## Size of blob leaves.
|
|
blobLeafSizeMask* = not(not(0) shl 14)
|
|
blobHexLen* = 32 * 2
|
|
blobVisualLen* = 32 * 3
|
|
|
|
type
|
|
Blake2b256* = Blake2bContext[256]
|
|
|
|
BlobId* = MDigest[Blake2b256.bits]
|
|
## Blob Identifier
|
|
SetId* = MDigest[Blake2b256.bits]
|
|
## Set Identifier
|
|
|
|
Cid* {.deprecated} = BlobId
|
|
|
|
func `$`*(bh: BlobId): string =
|
|
## Convert a blob hash to a visual representation.
|
|
const baseRune = 0x2800
|
|
result = newString(blobVisualLen)
|
|
var pos = 0
|
|
for b in bh.data.items:
|
|
let r = (Rune)baseRune or b.int
|
|
fastToUTF8Copy(r, result, pos, true)
|
|
|
|
func parseStringId[T](s: string): T =
|
|
case s.len
|
|
of blobHexLen:
|
|
hex.decode s, result.data
|
|
of blobVisualLen:
|
|
var
|
|
pos: int
|
|
r: Rune
|
|
for b in result.data.mitems:
|
|
fastRuneAt(s, pos, r, true)
|
|
b = r.byte
|
|
else:
|
|
raise newException(ValueError, "invalid blobset id encoding")
|
|
|
|
func parseCborId[T](c: CborNode): T =
|
|
## Parse a CBOR node to binary.
|
|
if c.bytes.len == result.data.len:
|
|
for i in 0..result.data.high:
|
|
result.data[i] = c.bytes[i]
|
|
|
|
func toBlobId*(s: string): BlobId =
|
|
## Parse a visual blob hash to binary.
|
|
parseStringId[BlobId] s
|
|
|
|
func toBlobId(c: CborNode): BlobId =
|
|
## Parse a CBOR blob hash to binary.
|
|
parseCborId[BlobId] c
|
|
|
|
func toSetId*(s: string): SetId =
|
|
## Parse a visual set hash to binary.
|
|
parseStringId[SetId] s
|
|
|
|
func toSetId(c: CborNode): SetId =
|
|
## Parse a CBOR set hash to binary.
|
|
parseCborId[SetId] c
|
|
|
|
proc `==`*(x, y: BlobId): bool = x.data == y.data
|
|
## Compare two BlobIds.
|
|
|
|
proc `==`*(cbor: CborNode; cid: BlobId): bool =
|
|
## Compare a CBOR node with a BlobId.
|
|
if cbor.kind == cborBytes:
|
|
for i in 0..<digestLen:
|
|
if cid.data[i] != cbor.bytes[i].uint8:
|
|
return false
|
|
result = true
|
|
|
|
proc hash*(cid: BlobId): Hash =
|
|
## Reduce a BlobId into an integer for use in tables.
|
|
var zeroKey: Key
|
|
result = cast[Hash](sipHash(cid.data, zeroKey))
|
|
|
|
proc toCbor*(cid: BlobId): CborNode = newCborBytes cid.data
|
|
## Generate a CBOR representation of a BlobId.
|
|
|
|
proc toBlobId*(cbor: CborNode): BlobId =
|
|
## Generate a CBOR representation of a BlobId.
|
|
assert(cbor.bytes.len == digestLen)
|
|
for i in 0..<digestLen:
|
|
result.data[i] = cbor.bytes[i].uint8
|
|
|
|
proc toHex*(id: BlobId|SetId): string = hex.encode(id.data)
|
|
## Return BlobId encoded in hexidecimal.
|
|
|
|
proc verify*(cid: BlobId; data: string): bool =
|
|
## Verify that a string of data corresponds to a BlobId.
|
|
var b: Blake2b256
|
|
init(b)
|
|
update(b, data)
|
|
finish(b) == cid
|
|
|
|
func isNonZero*(bh: BlobId): bool =
|
|
## Test if a blob hash is not zeroed.
|
|
var r: byte
|
|
for b in bh.data.items:
|
|
{.unroll.}
|
|
r = r or b
|
|
r != 0
|
|
|
|
type
|
|
BlobKind* = enum
|
|
dataBlob, metaBlob
|
|
|
|
proc `$`*(k: BlobKind): string =
|
|
case k
|
|
of dataBlob: "data"
|
|
of metaBlob: "meta"
|
|
|
|
type
|
|
BlobStream* = ref BlobStreamObj
|
|
BlobStreamObj* = object of RootObj
|
|
closeImpl*: proc (s: BlobStream) {.nimcall, gcsafe.}
|
|
sizeImpl*: proc (s: BlobStream): BiggestInt {.nimcall, gcsafe.}
|
|
setPosImpl*: proc (s: BlobStream; pos: BiggestInt) {.nimcall, gcsafe.}
|
|
getPosImpl*: proc (s: BlobStream): BiggestInt {.nimcall, gcsafe.}
|
|
readImpl*: proc (s: BlobStream; buffer: pointer; bufLen: int): Future[int] {.nimcall, gcsafe.}
|
|
IngestStream* = ref IngestStreamObj
|
|
IngestStreamObj* = object of RootObj
|
|
cancelImpl*: proc (s: IngestStream) {.nimcall, gcsafe.}
|
|
finishImpl*: proc (s: IngestStream): Future[tuple[id: BlobId, size: BiggestInt]] {.nimcall, gcsafe.}
|
|
ingestImpl*: proc (s: IngestStream; buf: pointer; size: int): Future[void] {.nimcall, gcsafe.}
|
|
|
|
proc close*(s: BlobStream) =
|
|
assert(not s.closeImpl.isNil)
|
|
s.closeImpl(s)
|
|
|
|
proc size*(s: BlobStream): BiggestInt =
|
|
assert(not s.sizeImpl.isNil)
|
|
s.sizeImpl(s)
|
|
|
|
proc `pos=`*(s: BlobStream; pos: BiggestInt) =
|
|
assert(not s.setPosImpl.isNil)
|
|
s.setPosImpl(s, pos)
|
|
|
|
proc pos*(s: BlobStream): BiggestInt =
|
|
assert(not s.getPosImpl.isNil)
|
|
s.getPosImpl(s)
|
|
|
|
proc read*(s: BlobStream; buf: pointer; len: Natural): Future[int] =
|
|
assert(not s.readImpl.isNil)
|
|
s.readImpl(s, buf, len)
|
|
|
|
proc cancel*(s: IngestStream): tuple[id: BlobId, size: BiggestInt] =
|
|
## Cancel and close ingest stream
|
|
assert(not s.cancelImpl.isNil)
|
|
s.cancelImpl(s)
|
|
|
|
proc finish*(s: IngestStream): Future[tuple[id: BlobId, size: BiggestInt]] =
|
|
## Finish ingest stream
|
|
assert(not s.finishImpl.isNil)
|
|
s.finishImpl(s)
|
|
|
|
proc ingest*(s: IngestStream; buf: pointer; size: Natural): Future[void] =
|
|
## Ingest stream
|
|
assert(not s.ingestImpl.isNil)
|
|
s.ingestImpl(s, buf, size)
|
|
|
|
proc ingest*(s: IngestStream; buf: string): Future[void] =
|
|
## Ingest stream
|
|
assert(not s.ingestImpl.isNil)
|
|
s.ingestImpl(s, buf[0].unsafeAddr, buf.len)
|
|
|
|
type
|
|
BlobStore* = ref BlobStoreObj
|
|
BlobStoreObj* = object of RootObj
|
|
closeImpl*: proc (s: BlobStore) {.nimcall, gcsafe.}
|
|
containsImpl*: proc (s: BlobStore; id: BlobId; kind: BlobKind): Future[bool] {.nimcall, gcsafe.}
|
|
openBlobStreamImpl*: proc (s: BlobStore; id: BlobId; size: BiggestInt; kind: BlobKind): BlobStream {.nimcall, gcsafe.}
|
|
openIngestStreamImpl*: proc (s: BlobStore; size: BiggestInt; kind: BlobKind): IngestStream {.nimcall, gcsafe.}
|
|
|
|
proc close*(s: BlobStore) =
|
|
## Close active store resources.
|
|
if not s.closeImpl.isNil: s.closeImpl(s)
|
|
|
|
proc contains*(s: BlobStore; id: BlobId; kind: BlobKind): Future[bool] =
|
|
## Check if the store contains a blob.
|
|
assert(not s.openBlobStreamImpl.isNil)
|
|
s.containsImpl(s, id, kind)
|
|
|
|
proc openBlobStream*(s: BlobStore; id: BlobId; size = 0.BiggestInt; kind = dataBlob): BlobStream =
|
|
## Return a new `BlobStream` for reading a blob.
|
|
assert(isNonZero id)
|
|
assert(not s.openBlobStreamImpl.isNil)
|
|
s.openBlobStreamImpl(s, id, size, kind)
|
|
|
|
proc openIngestStream*(s: BlobStore; size = 0.BiggestInt; kind = dataBlob): IngestStream =
|
|
## Return a new `IngestStream` for ingesting a blob.
|
|
assert(not s.openIngestStreamImpl.isNil)
|
|
s.openIngestStreamImpl(s, size, kind)
|
|
|
|
proc ingest*(store: BlobStore; buf: string): Future[BlobId] {.async.} =
|
|
let stream = store.openIngestStream(buf.len.BiggestInt, dataBlob)
|
|
await stream.ingest(buf[0].unsafeAddr, buf.len)
|
|
let (id, size) = await stream.finish()
|
|
return id
|
|
|
|
type Key* = distinct uint64
|
|
|
|
proc `and` * (x, y: Key): Key {.borrow.}
|
|
proc `not` * (x: Key): Key {.borrow.}
|
|
proc `shl` * (x: Key; y: int): Key {.borrow.}
|
|
proc `shr` * (x: Key; y: int): Key {.borrow.}
|
|
proc `==` * (x, y: Key): bool {.borrow.}
|
|
|
|
const
|
|
keyBits = sizeof(Key) * 8
|
|
keyChunkBits = fastLog2 keyBits
|
|
keyChunkMask = Key((1 shl keyChunkBits)-1)
|
|
maxDepth = keyBits/keyChunkBits
|
|
|
|
func `$`*(k: Key): string = k.BiggestInt.toHex(keyBits div 4)
|
|
|
|
func toKey*(s: string): Key =
|
|
var key: siphash.Key
|
|
let b = sipHash(toOpenArrayByte(s, s.low, s.high), key)
|
|
cast[Key](b)
|
|
|
|
const
|
|
# CBOR tags
|
|
nodeTag = 0
|
|
leafTag = 1
|
|
|
|
type
|
|
SetKind* = enum hotNode, coldNode, leafNode
|
|
BlobSet* = ref BlobSetObj
|
|
BlobSetObj = object
|
|
case kind*: SetKind
|
|
of hotNode:
|
|
bitmap: uint64
|
|
table: seq[BlobSet]
|
|
of coldNode:
|
|
setId*: SetId
|
|
of leafNode:
|
|
key: Key
|
|
blob*: BlobId
|
|
size: BiggestInt
|
|
|
|
func isCold*(bs: BlobSet): bool = bs.kind == coldNode
|
|
func isHot*(bs: BlobSet): bool = bs.kind == hotNode
|
|
|
|
func toCbor*(x: BlobSet): CborNode =
|
|
case x.kind
|
|
of hotNode:
|
|
let array = newCborArray()
|
|
let bitmap = newCborInt(x.bitmap)
|
|
assert(bitmap.getInt.uint64 == x.bitmap, $bitmap.getInt.uint64 & " != " & $x.bitmap)
|
|
array.add bitmap
|
|
for y in x.table:
|
|
assert(not y.isNil)
|
|
array.add y.toCbor
|
|
newCborTag(nodeTag, array)
|
|
of coldNode:
|
|
newCborTag(nodeTag, x.setId.data.newCborBytes)
|
|
of leafNode:
|
|
let array = newCborArray()
|
|
array.add x.key.uint64
|
|
array.add x.blob.data
|
|
array.add x.size
|
|
newCborTag(leafTag, array)
|
|
|
|
func newBlobSet*(): BlobSet =
|
|
## Create a new hot blob set.
|
|
BlobSet(kind: hotNode, table: newSeqOfCap[BlobSet](2))
|
|
|
|
func newBlobSet*(id: SetId): BlobSet =
|
|
## Create a new cold blob set.
|
|
BlobSet(kind: coldNode, setId: id)
|
|
|
|
template sparseIndex(x: Key): uint64 = (uint64)x and keyChunkMask
|
|
template mask(x: Key): uint64 = 1'u64 shl int(x and keyChunkMask)
|
|
|
|
func compactIndex(t: BlobSet; x: Key): int =
|
|
if (x and keyChunkMask) != Key(0):
|
|
# TODO: bug in shr and shl, cannot shift all bits out
|
|
result = (int)countSetBits(t.bitmap shl (keyBits - x.sparseIndex.int))
|
|
|
|
func masked(t: BlobSet; x: Key): bool =
|
|
((t.bitmap shr x.sparseIndex) and 1) != 0
|
|
|
|
func isEmpty*(s: BlobSet): bool = s.bitmap == 0'u64
|
|
## Test if a set is empty.
|
|
|
|
iterator dumpBlob*(store: BlobStore; id: BlobId): string =
|
|
var
|
|
stream = store.openBlobStream(id, kind=dataBlob)
|
|
buf = newString(blobLeafSize)
|
|
defer:
|
|
close stream
|
|
while true:
|
|
buf.setLen(blobLeafSize)
|
|
let n = waitFor stream.read(buf[0].addr, buf.len)
|
|
if n == 0:
|
|
break
|
|
buf.setLen(n)
|
|
yield buf
|
|
|
|
proc loadSet(store: BlobStore; id: SetId; depth: int): Future[BlobSet] {.async.} =
|
|
assert(isNonZero id)
|
|
assert((not Key(0)) shr depth != Key(0), "loadSet trie is too deep")
|
|
var
|
|
stream = store.openBlobStream(id, kind=metaBlob)
|
|
buf = newString(blobLeafSize)
|
|
defer:
|
|
close stream
|
|
let n = await stream.read(buf[0].addr, buf.len)
|
|
assert(n != 0, "read zero of set " & $id)
|
|
buf.setLen(n)
|
|
let
|
|
tagPair = parseCbor buf
|
|
c = tagPair.val
|
|
bitmap = c.seq[0].getInt.uint64
|
|
if bitmap.countSetBits != c.seq.len-1:
|
|
let bits = bitmap.countSetBits
|
|
raise newException(ValueError, "invalid set CBOR, bitmap has " & $bits & " bits and sequence len is " & $c.seq.len)
|
|
result = BlobSet(
|
|
kind: hotNode,
|
|
bitmap: bitmap,
|
|
table: newSeqOfCap[BlobSet](c.seq.len-1))
|
|
for i in 1..c.seq.high:
|
|
let node = c[i].val
|
|
case c[i].tag.int
|
|
of nodeTag:
|
|
let child = await loadSet(store, node.toSetId, depth+1)
|
|
result.table.add child
|
|
of leafTag:
|
|
let
|
|
leaf = BlobSet(
|
|
kind: leafNode,
|
|
key: (Key)getNum[uint64] node[0],
|
|
blob: parseCborId[BlobId] node[1],
|
|
size: getInt node[2])
|
|
result.table.add leaf
|
|
else:
|
|
raise newException(ValueError, "invalid set CBOR")
|
|
|
|
proc load*(store: BlobStore; id: SetId): Future[BlobSet] =
|
|
loadSet(store, id, 0)
|
|
|
|
proc load*(store: BlobStore; node: BlobSet): Future[BlobSet] =
|
|
load(store, node.setId)
|
|
|
|
proc randomApply*(store: BlobStore; trie: BlobSet; rng: var Rand;
|
|
f: proc(id: BlobId; size: BiggestInt)) =
|
|
## Apply to random leaf if the set is not empty.
|
|
var
|
|
retry = 0
|
|
trie = trie
|
|
i = rng.rand(max(1, countSetBits(trie.bitmap))-1)
|
|
while trie.bitmap != 0:
|
|
let next = trie.table[i]
|
|
case next.kind
|
|
of leafNode:
|
|
f(next.blob, next.size)
|
|
break
|
|
of coldNode:
|
|
trie.table[i] = waitFor store.load(next)
|
|
of hotNode:
|
|
trie = next
|
|
i = rng.rand(countSetBits(trie.bitmap)-1)
|
|
|
|
type MemberStream* = FutureStream[tuple[key: Key; id: BlobId; size: BiggestInt]]
|
|
|
|
proc newMemberStream*(): FutureStream[tuple[key: Key; id: BlobId; size: BiggestInt]] =
|
|
newFutureStream[tuple[key: Key; id: BlobId; size: BiggestInt]]()
|
|
|
|
proc streamMembers*(stream: FutureStream[tuple[key: Key; id: BlobId; size: BiggestInt]];
|
|
store: BlobStore; trie: BlobSet) {.async.} =
|
|
## Pass each set member to the specified future stream in random order.
|
|
var
|
|
path: array[maxDepth.int, tuple[mask: uint64, trie: BlobSet]]
|
|
level = 0
|
|
rng = initRand(rand(high int))
|
|
if trie.isCold:
|
|
path[0].trie = await store.load(trie)
|
|
else:
|
|
path[0].trie = trie
|
|
path[0].mask = not(0'u64) shr (64 - path[0].trie.table.len)
|
|
# set the bits of indexes to hit
|
|
while (not stream.finished) and (0 < level or path[0].mask != 0'u64):
|
|
if path[level].mask == 0'u64:
|
|
dec level
|
|
continue
|
|
let
|
|
i = rng.rand(path[level].trie.table.high)
|
|
bi = 1'u64 shl i
|
|
if (path[level].mask and bi) == 0'u64:
|
|
continue
|
|
path[level].mask = path[level].mask xor bi
|
|
var node = path[level].trie.table[i]
|
|
if node.kind == leafNode:
|
|
let val: tuple[key: Key; id: BlobId; size: BiggestInt] =
|
|
(node.key, node.blob, node.size)
|
|
await stream.write(val)
|
|
else:
|
|
if node.isCold:
|
|
node = await store.load(node)
|
|
inc level
|
|
path[level].mask = not (not(0'u64) shl node.table.len)
|
|
path[level].trie = node
|
|
complete stream
|
|
|
|
func nodeCount(bs: BlobSet): int =
|
|
## Count of internal nodes in set.
|
|
result = 1
|
|
for n in bs.table:
|
|
assert(n.kind != coldNode, "cannot count cold nodes")
|
|
if n.kind == hotNode:
|
|
result.inc n.nodeCount
|
|
|
|
func leafCount(bs: BlobSet): int =
|
|
## Count of leaves in set.
|
|
for n in bs.table:
|
|
assert(n.kind != coldNode, "cannot count leaves of cold nodes")
|
|
if n.kind == leafNode:
|
|
result.inc 1
|
|
else:
|
|
result.inc n.leafCount
|
|
|
|
#[
|
|
proc search*(store: BlobStore; trie: BlobSet; name: string): Future[BlobId] {.async.} =
|
|
let key = name.toKey
|
|
var
|
|
n = trie
|
|
k = key
|
|
level = 0
|
|
while k != Key(0) and n.masked(k):
|
|
let i = n.compactIndex(k)
|
|
if n.table[i].isCold:
|
|
n.table[i] = await store.load(n.table[i])
|
|
n = n.table[i]
|
|
if n.kind == leafNode:
|
|
if n.key == key:
|
|
return n.blob
|
|
break
|
|
k = k shr keyChunkBits
|
|
inc level
|
|
raise newException(KeyError, "key not in blob set")
|
|
]#
|
|
|
|
func apply(bs: BlobSet; cb: proc (leaf: BlobSet)) =
|
|
## Apply a callback to each set element.
|
|
for node in bs.table:
|
|
if node.isNil:
|
|
raiseAssert(bs.table.repr)
|
|
case node.kind
|
|
of hotNode:
|
|
apply(node, cb)
|
|
of leafNode:
|
|
cb(node)
|
|
else:
|
|
raiseAssert("cannot apply to node type " & $node.kind)
|
|
|
|
proc apply*(store: BlobStore; trie: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =
|
|
## Apply a procedure to a named blob, if it is present
|
|
let key = name.toKey
|
|
var
|
|
n = trie
|
|
k = key
|
|
while k != Key(0) and n.masked(k):
|
|
let i = n.compactIndex(k)
|
|
if n.table[i].isCold:
|
|
n.table[i] = waitFor store.load(n.table[i])
|
|
n = n.table[i]
|
|
if n.kind == leafNode:
|
|
if n.key == key:
|
|
f(n.blob, n.size)
|
|
break
|
|
k = k shr keyChunkBits
|
|
|
|
proc contains*(store: BlobStore; bs: BlobSet; name: string): bool =
|
|
var found = false
|
|
apply(store, bs, name) do (id: BlobId; size: BiggestInt):
|
|
found = true
|
|
found
|
|
|
|
proc insert(store: BlobStore; trie, l: BlobSet; depth: int): Future[BlobSet] {.async.} =
|
|
## This procedure is recursive to a depth of keyBits/keyChunkBits.
|
|
doAssert(depth < (keyBits div keyChunkBits), "key space exhausted during insert")
|
|
var bs = BlobSet(kind: hotNode, bitmap: trie.bitmap, table: trie.table)
|
|
let key = l.key shr (depth * keyChunkBits)
|
|
if bs.masked(key):
|
|
let
|
|
depth = depth + 1
|
|
i = bs.compactIndex(key)
|
|
if bs.table[i].isCold:
|
|
bs.table[i] = await store.load(bs.table[i])
|
|
case bs.table[i].kind
|
|
of hotNode:
|
|
bs.table[i] = await insert(store, bs.table[i], l, depth)
|
|
of leafNode:
|
|
if bs.table[i].key == l.key:
|
|
bs.table[i] = l
|
|
else:
|
|
var subtrie = newBlobSet()
|
|
subtrie = await insert(store, subtrie, bs.table[i], depth)
|
|
subtrie = await insert(store, subtrie, l, depth)
|
|
bs.table[i] = subtrie
|
|
of coldNode:
|
|
discard
|
|
else:
|
|
bs.bitmap = bs.bitmap or key.mask
|
|
bs.table.insert(l, bs.compactIndex(key))
|
|
return bs
|
|
|
|
proc insert*(store: BlobStore; trie, node: BlobSet): Future[BlobSet] =
|
|
## Insert set node `node` into `trie`.
|
|
insert(store, trie, node, 0)
|
|
|
|
proc insert*(store: BlobStore; t: BlobSet; key: Key; blob: BlobId; size: BiggestInt): Future[BlobSet] =
|
|
## Insert a blob hash into a trie.
|
|
let leaf = BlobSet(kind: leafNode, key: key, blob: blob, size: size)
|
|
insert(store, t, leaf)
|
|
|
|
proc insert*(store: BlobStore; t: BlobSet; name: string; blob: BlobId; size: BiggestInt): Future[BlobSet] =
|
|
insert(store, t, name.toKey, blob, size)
|
|
|
|
proc remove(store: BlobStore; trie: BlobSet; fullKey: Key; depth: int): Future[BlobSet] {.async.} =
|
|
var res = trie
|
|
let key = fullKey shr (depth * keyChunkBits)
|
|
if res.masked(key):
|
|
let
|
|
depth = depth + 1
|
|
i = res.compactIndex(key)
|
|
if res.table[i].isCold:
|
|
res.table[i] = await store.load(res.table[i])
|
|
trie.table[i] = res.table[i]
|
|
case res.table[i].kind
|
|
of hotNode:
|
|
res.table[i] = await remove(store, res.table[i], fullKey, depth)
|
|
of leafNode:
|
|
if res.table.len == 2:
|
|
res.table.delete(i)
|
|
res = res.table[0]
|
|
else:
|
|
res.table.delete(i)
|
|
res.bitmap = res.bitmap xor key.mask
|
|
of coldNode:
|
|
discard # previously handled
|
|
return res
|
|
|
|
proc remove*(store: BlobStore; trie: BlobSet; key: Key): Future[BlobSet] =
|
|
## Remove a blob from a trie.
|
|
if trie.isEmpty:
|
|
result = newFuture[BlobSet]()
|
|
result.complete trie
|
|
else:
|
|
result = remove(store, trie, key, 0)
|
|
|
|
proc remove*(store: BlobStore; trie: BlobSet; name: string): Future[BlobSet] =
|
|
remove(store, trie, name.toKey)
|
|
|
|
proc union*(store: BlobStore; sets: varargs[BlobSet]): BlobSet =
|
|
## Return the union of `sets`.
|
|
# TODO: lazy-load set
|
|
var fresh = newBlobSet()
|
|
proc freshInsert(leaf: BlobSet) =
|
|
fresh = waitFor insert(store, fresh, leaf)
|
|
for bs in sets:
|
|
assert(not bs.isnil)
|
|
bs.apply(freshInsert)
|
|
result = fresh
|
|
|
|
func leafCount*(size: Natural): int = (size+blobLeafSize-1) div blobLeafSize
|
|
|
|
func compressTree*(leaves: var openArray[BlobId]) =
|
|
var
|
|
ctx: Blake2b256
|
|
nodeOffset = 0
|
|
nodeDepth = 0
|
|
len = leaves.len
|
|
while len > 1:
|
|
nodeOffset = 0
|
|
inc nodeDepth
|
|
var pos, next: int
|
|
while pos < len:
|
|
ctx.init do (params: var Blake2bParams):
|
|
params.fanout = 2
|
|
params.depth = 255
|
|
params.leafLength = blobLeafSize
|
|
params.nodeOffset = nodeOffset
|
|
params.nodeDepth = nodeDepth
|
|
inc nodeOffset
|
|
ctx.update(leaves[pos].data)
|
|
inc pos
|
|
if pos < len:
|
|
ctx.update(leaves[pos].data)
|
|
inc pos
|
|
leaves[next] = ctx.finish()
|
|
inc next
|
|
len = next
|
|
# TODO: BLAKE2 tree finalization flags
|
|
|
|
proc blobHash*(s: string): BlobId =
|
|
doAssert(s.len <= blobLeafSize)
|
|
var
|
|
ctx: Blake2b256
|
|
leaves: array[1, BlobId]
|
|
ctx.init do (params: var Blake2bParams):
|
|
params.fanout = 2
|
|
params.depth = 255
|
|
params.leafLength = blobLeafSize
|
|
params.nodeOffset = 0
|
|
if s.len > 0:
|
|
ctx.update(unsafeAddr s[0], s.len)
|
|
leaves[0] = finish ctx
|
|
compressTree(leaves)
|
|
leaves[0]
|
|
|
|
proc commit*(store: BlobStore; bs: BlobSet): Future[BlobSet] {.async.} =
|
|
if bs.isCold: return bs
|
|
let tmp = BlobSet(kind: hotNode, bitmap: bs.bitmap, table: bs.table)
|
|
for e in tmp.table.mitems:
|
|
if e.isHot:
|
|
let cold = await store.commit e
|
|
assert(not cold.isNil)
|
|
e = cold
|
|
var buf = encode tmp.toCbor
|
|
let
|
|
localId = blobHash(buf)
|
|
present = await store.contains(localId, metaBlob)
|
|
if present:
|
|
return BlobSet(kind: coldNode, setId: localId)
|
|
else:
|
|
let stream = store.openIngestStream(size=buf.len, kind=metaBlob)
|
|
await stream.ingest(buf)
|
|
let (storeId, _) = await finish(stream)
|
|
assert(localId == storeId)
|
|
return BlobSet(kind: coldNode, setId: storeId)
|
|
|
|
#
|
|
# Null Store implementation
|
|
#
|
|
|
|
type
|
|
NullIngestStream = ref NullIngestStreamObj
|
|
NullIngestStreamObj = object of IngestStreamObj
|
|
ctx: Blake2b256
|
|
leaves: seq[BlobId]
|
|
pos, nodeOffset: BiggestInt
|
|
|
|
proc nullBlobClose(s: BlobStream) = discard
|
|
|
|
proc setPosNull(s: BlobStream; pos: BiggestInt) = discard
|
|
proc getPosNull(s: BlobStream): BiggestInt = discard
|
|
|
|
proc nullBlobRead(s: BlobStream; buffer: pointer; len: Natural): Future[int] =
|
|
result = newFuture[int]()
|
|
complete result, 0
|
|
|
|
proc nullOpenBlobStream(s: BlobStore; id: BlobId; size: BiggestInt; kind: BlobKind): BlobStream =
|
|
BlobStream(
|
|
closeImpl: nullBlobClose,
|
|
setPosImpl: setPosNull,
|
|
getPosImpl: getPosNull,
|
|
readImpl: nullBlobRead)
|
|
|
|
proc nullFinish(s: IngestStream): Future[tuple[id: BlobId, size: BiggestInt]] =
|
|
var s = NullIngestStream(s)
|
|
s.leaves.add finish(s.ctx)
|
|
compressTree(s.leaves)
|
|
var pair: tuple[id: BlobId, size: BiggestInt]
|
|
pair.id = s.leaves[0]
|
|
pair.size = s.pos
|
|
result = newFuture[tuple[id: BlobId, size: BiggestInt]]()
|
|
complete result, pair
|
|
|
|
proc nullIngest(s: IngestStream; buf: pointer; len: Natural): Future[void] =
|
|
var
|
|
s = NullIngestStream(s)
|
|
off = 0
|
|
buf = cast[ptr array[blobLeafSize, byte]](buf)
|
|
while off < len:
|
|
var n = min(blobLeafSize, len-off)
|
|
let leafOff = int(s.pos and blobLeafSizeMask)
|
|
if leafOff == 0:
|
|
if s.pos > 0:
|
|
s.leaves.add finish(s.ctx)
|
|
s.ctx.init do (params: var Blake2bParams):
|
|
params.fanout = 2
|
|
params.depth = 255
|
|
params.leafLength = blobLeafSize
|
|
params.nodeOffset = s.nodeOffset
|
|
inc s.nodeOffset
|
|
else:
|
|
n = min(n, blobLeafSize-leafOff)
|
|
s.ctx.update(buf[off].addr, n)
|
|
off.inc n
|
|
s.pos.inc n
|
|
result = newFuture[void]()
|
|
complete result
|
|
|
|
proc nullOpenIngestStream(s: BlobStore; size: BiggestInt; kind: BlobKind): IngestStream =
|
|
NullIngestStream(
|
|
finishImpl: nullFinish, ingestImpl: nullIngest, leaves: newSeq[BlobId]())
|
|
|
|
proc newNullStore*(): BlobStore =
|
|
BlobStore(
|
|
openBlobStreamImpl: nullOpenBlobStream,
|
|
openIngestStreamImpl: nullOpenIngestStream)
|