fixup! applyAll (iterator)

This commit is contained in:
Ehmry - 2019-03-17 22:02:39 +01:00
parent 7b44dddf5b
commit d8119acc85
2 changed files with 35 additions and 28 deletions

View File

@ -138,24 +138,27 @@ func isNonZero*(bh: BlobId): bool =
r = r or b
r != 0
type
Key* = uint64
type Key* = distinct uint64
proc `and` * (x, y: Key): Key {.borrow.}
proc `not` * (x: Key): Key {.borrow.}
proc `shl` * (x: Key; y: int): Key {.borrow.}
proc `shr` * (x: Key; y: int): Key {.borrow.}
proc `==` * (x, y: Key): bool {.borrow.}
const
keyBits = sizeof(Key) * 8
keyChunkBits = fastLog2 keyBits
keyChunkMask = not ((not 0.Key) shl (keyChunkBits))
keyChunkMask = Key((1 shl keyChunkBits)-1)
maxDepth = keyBits/keyChunkBits
func `$`*(k: Key): string = k.BiggestInt.toHex(keyBits div 4)
func toKey*(s: string): Key =
var key: siphash.Key
let b = sipHash(toOpenArrayByte(s, s.low, s.high), key)
cast[Key](b)
func toCbor(k: Key): CborNode =
## Keys are endian independent.
newCborBytes cast[array[sizeof(k), byte]](k)
const
# CBOR tags
nodeTag = 0
@ -190,7 +193,7 @@ func toCbor*(x: BlobSet): CborNode =
newCborTag(nodeTag, x.setId.data.newCborBytes)
of leafNode:
let array = newCborArray()
array.add x.key
array.add x.key.uint64
array.add x.blob.data
array.add x.size
newCborTag(leafTag, array)
@ -208,7 +211,7 @@ func isHot*(bs: BlobSet): bool = bs.kind == hotNode
func sparseIndex(x: Key): int = int(x and keyChunkMask)
func compactIndex(t: BlobSet; x: Key): int =
if (x and keyChunkMask) != 0:
if (x and keyChunkMask) != Key(0):
# TODO: bug in shr and shl, cannot shift all bits out
result = (int)countSetBits(t.bitmap shl (keyBits - x.sparseIndex))
@ -241,7 +244,7 @@ func search*(trie: BlobSet; name: string): BlobId =
n = trie
k = key
level = 0
while k != 0 and n.masked(k):
while k != Key(0) and n.masked(k):
n = n.table[n.compactIndex(k)]
if n.kind == leafNode:
if n.key == key:
@ -270,7 +273,7 @@ func apply*(trie: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt))
var
n = trie
k = key
while k != 0 and n.masked(k):
while k != Key(0) and n.masked(k):
n = n.table[n.compactIndex(k)]
if n.kind == leafNode:
if n.key == key:
@ -314,12 +317,14 @@ func insert(trie, l: BlobSet; depth: int): BlobSet =
func insert*(trie, node: BlobSet): BlobSet = insert(trie, node, 0)
## Insert set node `node` into `trie`.
func insert*(t: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet =
func insert*(t: BlobSet; key: Key; blob: BlobId; size: BiggestInt): BlobSet =
## Insert a blob hash into a trie.
# TODO: this is not functional!
let leaf = BlobSet(kind: leafNode, key: name.toKey, blob: blob, size: size)
let leaf = BlobSet(kind: leafNode, key: key, blob: blob, size: size)
insert(t, leaf)
func insert*(t: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet =
insert(t, name.toKey, blob, size)
func remove(trie: BlobSet; key: Key; depth: int): BlobSet =
result = trie
let key = key shr (depth * keyChunkBits)
@ -560,8 +565,7 @@ iterator dumpBlob*(store: BlobStore; id: BlobId): string =
yield buf
proc loadSet(store: BlobStore; id: SetId; depth: int): Future[BlobSet] {.async.} =
if (not Key(0)) shr depth == 0:
raiseAssert("loadSet trie is too deep")
assert((not Key(0)) shr depth != Key(0), "loadSet trie is too deep")
var
stream = store.openBlobStream(id, kind=metaBlob)
buf = newString(blobLeafSize)
@ -591,7 +595,7 @@ proc loadSet(store: BlobStore; id: SetId; depth: int): Future[BlobSet] {.async.}
let
leaf = BlobSet(
kind: leafNode,
key: getNum[Key] node[0],
key: (Key)getNum[uint64] node[0],
blob: parseCborId[BlobId] node[1],
size: getInt node[2])
result.table.add leaf
@ -617,9 +621,13 @@ proc apply*(store: BlobStore; bs: BlobSet; name: string; f: proc (id: BlobId; si
doAssert(bs.kind == hotNode)
apply(bs, name, f)
proc insert*(store: BlobStore; bs: BlobSet; key: Key; blob: BlobId; size: BiggestInt): BlobSet =
# TODO: lazy-load set
insert(bs, key, blob, size)
proc insert*(store: BlobStore; bs: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet =
# TODO: lazy-load set
insert(bs, name, blob, size)
insert(store, bs, name.toKey, blob, size)
proc remove*(store: BlobStore; bs: BlobSet; name: string): BlobSet =
# TODO: lazy-load set
@ -638,11 +646,10 @@ proc union*(store: BlobStore; sets: varargs[BlobSet]): BlobSet =
import random
proc randomApply*(store: BlobStore; trie: BlobSet; seed: int64;
proc randomApply*(store: BlobStore; trie: BlobSet; rng: var Rand;
f: proc(id: BlobId; size: BiggestInt)) =
## Apply to random leaf if the set is not empty.
var
rng = initRand(seed)
retry = 0
trie = trie
i = rng.rand(max(1, countSetBits(trie.bitmap))-1)
@ -658,20 +665,19 @@ proc randomApply*(store: BlobStore; trie: BlobSet; seed: int64;
trie = next
i = rng.rand(countSetBits(trie.bitmap)-1)
proc applyAll*(store: BlobStore; trie: BlobSet; seed: int64;
f: proc(id: BlobId; size: BiggestInt)) =
iterator items*(store: BlobStore; trie: BlobSet; rng: var Rand):
tuple[key: Key; id: BlobId; size: BiggestInt] =
## Apply to all members of the set in a pseuedo-random order
## derived from `seed`.
if trie.table.len == 0: return
# TODO: add a progress value using the set bits in the bitmasks of each level?
var
rng = initRand(seed)
path: array[maxDepth.int, tuple[mask: uint64, trie: BlobSet]]
level = 0
if trie.kind == coldNode:
path[0].trie = store.load(trie.setId)
else:
path[0].trie = trie
path[0].mask = not (not(0'u64) shl path[0].trie.table.len)
path[0].mask = not(0'u64) shr (64 - path[0].trie.table.len)
# set the bits of indexes to hit
while 0 < level or path[0].mask != 0'u64:
if path[level].mask == 0'u64:
@ -685,7 +691,7 @@ proc applyAll*(store: BlobStore; trie: BlobSet; seed: int64;
path[level].mask = path[level].mask xor bi
var node = path[level].trie.table[i]
if node.kind == leafNode:
f(node.blob, node.size)
yield (node.key, node.blob, node.size)
else:
if node.kind == coldNode:
node = store.load(node.setId)

View File

@ -33,6 +33,7 @@ suite "store":
var
setId: SetId
bs: BlobSet
rng = initRand(rand(int.high))
const count = 64
@ -65,7 +66,7 @@ suite "store":
test "random":
for i in 1..count:
store.randomApply(bs, i) do (id: BlobId; size: BiggestInt):
store.randomApply(bs, rng) do (id: BlobId; size: BiggestInt):
echo "randomApply: ", id, " ", size
let stream = store.openBlobStream(id, size, dataBlob)
close stream
@ -74,7 +75,7 @@ suite "store":
bs = load(client, setId)
for i in 1..count:
var found = false
store.applyAll(bs, i) do (id: BlobId; size: BiggestInt):
for key, id, size in items(store, bs, rng):
if i == size: found = true
if not found:
echo i, " not found"