diff --git a/src/blobsets.nim b/src/blobsets.nim index 9e1b0ee..f4bdce3 100644 --- a/src/blobsets.nim +++ b/src/blobsets.nim @@ -138,24 +138,27 @@ func isNonZero*(bh: BlobId): bool = r = r or b r != 0 -type - Key* = uint64 +type Key* = distinct uint64 + +proc `and` * (x, y: Key): Key {.borrow.} +proc `not` * (x: Key): Key {.borrow.} +proc `shl` * (x: Key; y: int): Key {.borrow.} +proc `shr` * (x: Key; y: int): Key {.borrow.} +proc `==` * (x, y: Key): bool {.borrow.} const keyBits = sizeof(Key) * 8 keyChunkBits = fastLog2 keyBits - keyChunkMask = not ((not 0.Key) shl (keyChunkBits)) + keyChunkMask = Key((1 shl keyChunkBits)-1) maxDepth = keyBits/keyChunkBits +func `$`*(k: Key): string = k.BiggestInt.toHex(keyBits div 4) + func toKey*(s: string): Key = var key: siphash.Key let b = sipHash(toOpenArrayByte(s, s.low, s.high), key) cast[Key](b) -func toCbor(k: Key): CborNode = - ## Keys are endian independent. - newCborBytes cast[array[sizeof(k), byte]](k) - const # CBOR tags nodeTag = 0 @@ -190,7 +193,7 @@ func toCbor*(x: BlobSet): CborNode = newCborTag(nodeTag, x.setId.data.newCborBytes) of leafNode: let array = newCborArray() - array.add x.key + array.add x.key.uint64 array.add x.blob.data array.add x.size newCborTag(leafTag, array) @@ -208,7 +211,7 @@ func isHot*(bs: BlobSet): bool = bs.kind == hotNode func sparseIndex(x: Key): int = int(x and keyChunkMask) func compactIndex(t: BlobSet; x: Key): int = - if (x and keyChunkMask) != 0: + if (x and keyChunkMask) != Key(0): # TODO: bug in shr and shl, cannot shift all bits out result = (int)countSetBits(t.bitmap shl (keyBits - x.sparseIndex)) @@ -241,7 +244,7 @@ func search*(trie: BlobSet; name: string): BlobId = n = trie k = key level = 0 - while k != 0 and n.masked(k): + while k != Key(0) and n.masked(k): n = n.table[n.compactIndex(k)] if n.kind == leafNode: if n.key == key: @@ -270,7 +273,7 @@ func apply*(trie: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) var n = trie k = key - while k != 0 and n.masked(k): + while k != Key(0) and n.masked(k): n = n.table[n.compactIndex(k)] if n.kind == leafNode: if n.key == key: @@ -314,12 +317,14 @@ func insert(trie, l: BlobSet; depth: int): BlobSet = func insert*(trie, node: BlobSet): BlobSet = insert(trie, node, 0) ## Insert set node `node` into `trie`. -func insert*(t: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet = +func insert*(t: BlobSet; key: Key; blob: BlobId; size: BiggestInt): BlobSet = ## Insert a blob hash into a trie. - # TODO: this is not functional! - let leaf = BlobSet(kind: leafNode, key: name.toKey, blob: blob, size: size) + let leaf = BlobSet(kind: leafNode, key: key, blob: blob, size: size) insert(t, leaf) +func insert*(t: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet = + insert(t, name.toKey, blob, size) + func remove(trie: BlobSet; key: Key; depth: int): BlobSet = result = trie let key = key shr (depth * keyChunkBits) @@ -560,8 +565,7 @@ iterator dumpBlob*(store: BlobStore; id: BlobId): string = yield buf proc loadSet(store: BlobStore; id: SetId; depth: int): Future[BlobSet] {.async.} = - if (not Key(0)) shr depth == 0: - raiseAssert("loadSet trie is too deep") + assert((not Key(0)) shr depth != Key(0), "loadSet trie is too deep") var stream = store.openBlobStream(id, kind=metaBlob) buf = newString(blobLeafSize) @@ -591,7 +595,7 @@ proc loadSet(store: BlobStore; id: SetId; depth: int): Future[BlobSet] {.async.} let leaf = BlobSet( kind: leafNode, - key: getNum[Key] node[0], + key: (Key)getNum[uint64] node[0], blob: parseCborId[BlobId] node[1], size: getInt node[2]) result.table.add leaf @@ -617,9 +621,13 @@ proc apply*(store: BlobStore; bs: BlobSet; name: string; f: proc (id: BlobId; si doAssert(bs.kind == hotNode) apply(bs, name, f) +proc insert*(store: BlobStore; bs: BlobSet; key: Key; blob: BlobId; size: BiggestInt): BlobSet = + # TODO: lazy-load set + insert(bs, key, blob, size) + proc insert*(store: BlobStore; bs: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet = # TODO: lazy-load set - insert(bs, name, blob, size) + insert(store, bs, name.toKey, blob, size) proc remove*(store: BlobStore; bs: BlobSet; name: string): BlobSet = # TODO: lazy-load set @@ -638,11 +646,10 @@ proc union*(store: BlobStore; sets: varargs[BlobSet]): BlobSet = import random -proc randomApply*(store: BlobStore; trie: BlobSet; seed: int64; +proc randomApply*(store: BlobStore; trie: BlobSet; rng: var Rand; f: proc(id: BlobId; size: BiggestInt)) = ## Apply to random leaf if the set is not empty. var - rng = initRand(seed) retry = 0 trie = trie i = rng.rand(max(1, countSetBits(trie.bitmap))-1) @@ -658,20 +665,19 @@ proc randomApply*(store: BlobStore; trie: BlobSet; seed: int64; trie = next i = rng.rand(countSetBits(trie.bitmap)-1) -proc applyAll*(store: BlobStore; trie: BlobSet; seed: int64; - f: proc(id: BlobId; size: BiggestInt)) = +iterator items*(store: BlobStore; trie: BlobSet; rng: var Rand): + tuple[key: Key; id: BlobId; size: BiggestInt] = ## Apply to all members of the set in a pseuedo-random order ## derived from `seed`. - if trie.table.len == 0: return + # TODO: add a progress value using the set bits in the bitmasks of each level? var - rng = initRand(seed) path: array[maxDepth.int, tuple[mask: uint64, trie: BlobSet]] level = 0 if trie.kind == coldNode: path[0].trie = store.load(trie.setId) else: path[0].trie = trie - path[0].mask = not (not(0'u64) shl path[0].trie.table.len) + path[0].mask = not(0'u64) shr (64 - path[0].trie.table.len) # set the bits of indexes to hit while 0 < level or path[0].mask != 0'u64: if path[level].mask == 0'u64: @@ -685,7 +691,7 @@ proc applyAll*(store: BlobStore; trie: BlobSet; seed: int64; path[level].mask = path[level].mask xor bi var node = path[level].trie.table[i] if node.kind == leafNode: - f(node.blob, node.size) + yield (node.key, node.blob, node.size) else: if node.kind == coldNode: node = store.load(node.setId) diff --git a/tests/test_http.nim b/tests/test_http.nim index 198a4a8..2bcca99 100644 --- a/tests/test_http.nim +++ b/tests/test_http.nim @@ -33,6 +33,7 @@ suite "store": var setId: SetId bs: BlobSet + rng = initRand(rand(int.high)) const count = 64 @@ -65,7 +66,7 @@ suite "store": test "random": for i in 1..count: - store.randomApply(bs, i) do (id: BlobId; size: BiggestInt): + store.randomApply(bs, rng) do (id: BlobId; size: BiggestInt): echo "randomApply: ", id, " ", size let stream = store.openBlobStream(id, size, dataBlob) close stream @@ -74,7 +75,7 @@ suite "store": bs = load(client, setId) for i in 1..count: var found = false - store.applyAll(bs, i) do (id: BlobId; size: BiggestInt): + for key, id, size in items(store, bs, rng): if i == size: found = true if not found: echo i, " not found"