Fix CBOR key sign-bit error
This commit is contained in:
parent
3da2accfd5
commit
a0aeaaef09
|
@ -80,7 +80,7 @@ proc ingestMain() =
|
|||
let path = normalizedPath args[i]
|
||||
set = store.insertPath(set, path.getFileInfo.kind, path)
|
||||
let final = store.commit set
|
||||
writeLine(stdout, final.setId)
|
||||
writeLine(stdout, final.setId.toHex)
|
||||
|
||||
type
|
||||
EvalError = object of CatchableError
|
||||
|
@ -340,8 +340,8 @@ proc readAtom(r: Reader): Atom =
|
|||
elif token.contains DirSep:
|
||||
# TODO: memoize this, store a table of paths to atoms
|
||||
newAtomPath token
|
||||
elif token.len == blobVisualLen:
|
||||
Atom(kind: atomBlob, blob: token.toBlobId)
|
||||
elif token.len in { blobHexLen, blobVisualLen }:
|
||||
Atom(kind: atomSet, bs: newBlobSet(token.toSetId))
|
||||
else:
|
||||
Atom(kind: atomSymbol, sym: token.normalize)
|
||||
#except:
|
||||
|
@ -474,14 +474,21 @@ proc hexFunc(env: Env; args: NodeObj): NodeRef =
|
|||
of atomBlob:
|
||||
a.blob.toHex.newAtomString.newNode
|
||||
of atomSet:
|
||||
let cold = commit(env.store, a.bs)
|
||||
cold.setId.toHex.newAtomString.newNode
|
||||
case a.bs.kind
|
||||
of hotNode:
|
||||
let cold = commit(env.store, a.bs)
|
||||
cold.setId.toHex.newAtomString.newNode
|
||||
of coldNode:
|
||||
a.bs.setId.toHex.newAtomString.newNode
|
||||
of leafNode:
|
||||
a.bs.blob.toHex.newAtomString.newNode
|
||||
else:
|
||||
newNodeError("cannot convert to hex", args)
|
||||
|
||||
proc keyFunc(env: Env; args: NodeObj): NodeRef =
|
||||
assertArgCount(args, 1)
|
||||
args.atom.str.toKey.newAtom.newNode
|
||||
let k = args.atom.str.toKey
|
||||
k.newAtom.newNode
|
||||
|
||||
proc ingestFunc(env: Env; args: NodeObj): NodeRef =
|
||||
var bs: BlobSet
|
||||
|
@ -516,7 +523,7 @@ proc listFunc(env: Env; args: NodeObj): NodeRef =
|
|||
|
||||
proc loadFunc(env: Env; args: NodeObj): NodeRef =
|
||||
assertArgCount(args, 1)
|
||||
let bs = env.store.loadSet args.atom.bs.setId
|
||||
let bs = env.store.load(args.atom.bs.setId)
|
||||
bs.newAtom.newNode
|
||||
|
||||
proc mapFunc(env: Env; args: NodeObj): NodeRef =
|
||||
|
|
132
src/blobsets.nim
132
src/blobsets.nim
|
@ -113,7 +113,7 @@ proc take*(cid: var BlobId; buf: var string) =
|
|||
doAssert(buf.len == digestLen)
|
||||
copyMem(cid.data[0].addr, buf[0].addr, digestLen)
|
||||
|
||||
proc dagHash*(buf: pointer; len: Natural): BlobId =
|
||||
proc blobHash*(buf: pointer; len: Natural): BlobId =
|
||||
## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.
|
||||
assert(len <= maxChunkSize)
|
||||
var b: Blake2b256
|
||||
|
@ -121,7 +121,7 @@ proc dagHash*(buf: pointer; len: Natural): BlobId =
|
|||
update(b, buf, len)
|
||||
b.finish()
|
||||
|
||||
proc dagHash*(data: string): BlobId =
|
||||
proc blobHash*(data: string): BlobId =
|
||||
## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.
|
||||
assert(data.len <= maxChunkSize)
|
||||
var b: Blake2b256
|
||||
|
@ -156,7 +156,7 @@ func isNonZero*(bh: BlobId): bool =
|
|||
{.deprecated: [isValid: isNonZero].}
|
||||
|
||||
type
|
||||
Key* = int64
|
||||
Key* = uint64
|
||||
|
||||
const
|
||||
keyBits = sizeof(Key) shl 3
|
||||
|
@ -183,18 +183,42 @@ type
|
|||
BlobSetObj = object
|
||||
case kind*: SetKind
|
||||
of hotNode:
|
||||
bitmap: Key
|
||||
bitmap: uint64
|
||||
table: seq[BlobSet]
|
||||
of coldNode:
|
||||
setId*: SetId
|
||||
of leafNode:
|
||||
key: Key
|
||||
blob: BlobId
|
||||
blob*: BlobId
|
||||
size: BiggestInt
|
||||
|
||||
func toCbor*(x: BlobSet): CborNode =
|
||||
case x.kind
|
||||
of hotNode:
|
||||
let array = newCborArray()
|
||||
let bitmap = newCborInt(x.bitmap)
|
||||
assert(bitmap.getInt.uint64 == x.bitmap, $bitmap.getInt.uint64 & " != " & $x.bitmap)
|
||||
array.add bitmap
|
||||
for y in x.table:
|
||||
array.add y.toCbor
|
||||
newCborTag(nodeTag, array)
|
||||
of coldNode:
|
||||
newCborTag(nodeTag, x.setId.data.newCborBytes)
|
||||
of leafNode:
|
||||
let array = newCborArray()
|
||||
array.add x.key
|
||||
array.add x.blob.data
|
||||
array.add x.size
|
||||
newCborTag(leafTag, array)
|
||||
|
||||
func newBlobSet*(): BlobSet =
|
||||
## Create a new hot blob set.
|
||||
BlobSet(kind: hotNode, table: newSeqOfCap[BlobSet](2))
|
||||
|
||||
func newBlobSet*(id: SetId): BlobSet =
|
||||
## Create a new cold blob set.
|
||||
BlobSet(kind: coldNode, setId: id)
|
||||
|
||||
func sparseIndex(x: Key): int = int(x and keyChunkMask)
|
||||
|
||||
func compactIndex(t: BlobSet; x: Key): int =
|
||||
|
@ -205,7 +229,7 @@ func compactIndex(t: BlobSet; x: Key): int =
|
|||
func masked(t: BlobSet; x: Key): bool =
|
||||
((t.bitmap shr x.sparseIndex) and 1) != 0
|
||||
|
||||
func isEmpty*(s: BlobSet): bool = s.bitmap == Key(0)
|
||||
func isEmpty*(s: BlobSet): bool = s.bitmap == 0'u64
|
||||
## Test if a set is empty.
|
||||
|
||||
func nodeCount*(bs: BlobSet): int =
|
||||
|
@ -225,20 +249,21 @@ func leafCount*(bs: BlobSet): int =
|
|||
else:
|
||||
result.inc n.leafCount
|
||||
|
||||
func search*(t: BlobSet; name: string): BlobId =
|
||||
func search*(trie: BlobSet; name: string): BlobId =
|
||||
let key = name.toKey
|
||||
var
|
||||
t = t
|
||||
key = name.toKey
|
||||
while true:
|
||||
assert(key != 0, "keyspace exhausted during search")
|
||||
if t.masked(key):
|
||||
t = t.table[t.compactIndex(key)]
|
||||
if t.kind == leafNode:
|
||||
result = t.blob
|
||||
break
|
||||
key = key shr keyChunkBits
|
||||
else:
|
||||
raise newException(KeyError, "blob set does not contain key")
|
||||
n = trie
|
||||
k = key
|
||||
level = 0
|
||||
while k != 0 and n.masked(k):
|
||||
n = n.table[n.compactIndex(k)]
|
||||
if n.kind == leafNode:
|
||||
if n.key == key:
|
||||
return n.blob
|
||||
break
|
||||
k = k shr keyChunkBits
|
||||
inc level
|
||||
raise newException(KeyError, "key not in blob set")
|
||||
|
||||
func apply(bs: BlobSet; cb: proc (leaf: BlobSet)) =
|
||||
## Apply a callback to each set element.
|
||||
|
@ -253,21 +278,19 @@ func apply(bs: BlobSet; cb: proc (leaf: BlobSet)) =
|
|||
else:
|
||||
raiseAssert("cannot apply to node type " & $node.kind)
|
||||
|
||||
func apply*(t: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =
|
||||
func apply*(trie: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =
|
||||
## Apply a procedure to a named blob, if it is present
|
||||
let key = name.toKey
|
||||
var
|
||||
t = t
|
||||
key = name.toKey
|
||||
while true:
|
||||
assert(key != 0, "keyspace exhausted during search")
|
||||
if t.masked(key):
|
||||
t = t.table[t.compactIndex(key)]
|
||||
if t.kind == leafNode:
|
||||
f(t.blob, t.size)
|
||||
break
|
||||
key = key shr keyChunkBits
|
||||
else:
|
||||
n = trie
|
||||
k = key
|
||||
while k != 0 and n.masked(k):
|
||||
n = n.table[n.compactIndex(k)]
|
||||
if n.kind == leafNode:
|
||||
if n.key == key:
|
||||
f(n.blob, n.size)
|
||||
break
|
||||
k = k shr keyChunkBits
|
||||
|
||||
func contains*(bs: BlobSet; name: string): bool =
|
||||
var found = false
|
||||
|
@ -275,7 +298,7 @@ func contains*(bs: BlobSet; name: string): bool =
|
|||
found = true
|
||||
result = found
|
||||
|
||||
func insert(trie, l: BlobSet; depth: int): BlobSet =
|
||||
func insert(trie, l: BlobSet; depth: int; name = ""): BlobSet =
|
||||
## This procedure is recursive to a depth of keyBits/keyChunkBits.
|
||||
# TODO: not functional?
|
||||
doAssert(depth < (keyBits div keyChunkBits), "key space exhausted during insert")
|
||||
|
@ -294,21 +317,22 @@ func insert(trie, l: BlobSet; depth: int): BlobSet =
|
|||
if result.table[i].key == l.key:
|
||||
raise newException(KeyError, "key collision in blob set")
|
||||
var subtrie = newBlobSet()
|
||||
subtrie = subtrie.insert(result.table[i], depth)
|
||||
subtrie = subtrie.insert(l, depth)
|
||||
subtrie = subtrie.insert(result.table[i], depth, "<unknown>")
|
||||
subtrie = subtrie.insert(l, depth, name)
|
||||
result.table[i] = subtrie
|
||||
assert(result.table[i].kind == hotNode)
|
||||
else:
|
||||
result.bitmap = result.bitmap or (Key(1) shl key.sparseIndex)
|
||||
result.bitmap = result.bitmap or (1'u64 shl key.sparseIndex)
|
||||
result.table.insert(l, result.compactIndex(key))
|
||||
|
||||
func insert*(trie, node: BlobSet): BlobSet = insert(trie, node, 0)
|
||||
func insert*(trie, node: BlobSet; name = ""): BlobSet = insert(trie, node, 0, name)
|
||||
## Insert set node `node` into `trie`.
|
||||
|
||||
func insert*(t: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet =
|
||||
## Insert a blob hash into a trie.
|
||||
# TODO: this is not functional!
|
||||
let leaf = BlobSet(kind: leafNode, key: name.toKey, blob: blob, size: size)
|
||||
insert(t, leaf)
|
||||
insert(t, leaf, name)
|
||||
|
||||
func remove(trie: BlobSet; key: Key; depth: int): BlobSet =
|
||||
result = trie
|
||||
|
@ -345,23 +369,6 @@ func remove*(trie: BlobSet; name: string): BlobSet =
|
|||
if result.isNil:
|
||||
result = newBlobSet()
|
||||
|
||||
func toCbor*(x: BlobSet): CborNode =
|
||||
case x.kind
|
||||
of hotNode:
|
||||
let array = newCborArray()
|
||||
array.add x.bitmap
|
||||
for y in x.table:
|
||||
array.add y.toCbor
|
||||
newCborTag(nodeTag, array)
|
||||
of coldNode:
|
||||
newCborTag(nodeTag, x.setId.data.newCborBytes)
|
||||
of leafNode:
|
||||
let array = newCborArray()
|
||||
array.add x.key
|
||||
array.add x.blob.data
|
||||
array.add x.size
|
||||
newCborTag(leafTag, array)
|
||||
|
||||
func leafCount*(size: Natural): int = (size+blobLeafSize-1) div blobLeafSize
|
||||
|
||||
func compressTree*(leaves: var seq[BlobId]) =
|
||||
|
@ -561,7 +568,7 @@ iterator dumpBlob*(store: BlobStore; id: BlobId): string =
|
|||
yield buf
|
||||
|
||||
proc loadSet(store: BlobStore; id: SetId; depth: int): Future[BlobSet] {.async.} =
|
||||
if Key.high shr depth == 0:
|
||||
if (not Key(0)) shr depth == 0:
|
||||
raiseAssert("loadSet trie is too deep")
|
||||
var
|
||||
stream = store.openBlobStream(id, kind=metaBlob)
|
||||
|
@ -569,13 +576,15 @@ proc loadSet(store: BlobStore; id: SetId; depth: int): Future[BlobSet] {.async.}
|
|||
defer:
|
||||
close stream
|
||||
let n = await stream.read(buf[0].addr, buf.len)
|
||||
assert(n != 0, "read zero of set " & $id)
|
||||
buf.setLen(n)
|
||||
let
|
||||
c = buf.parseCbor.val
|
||||
bitmap = c.seq[0].getInt
|
||||
tagPair = parseCbor buf
|
||||
c = tagPair.val
|
||||
bitmap = c.seq[0].getInt.uint64
|
||||
if bitmap.countSetBits != c.seq.len-1:
|
||||
let bits = bitmap.countSetBits
|
||||
raise newException(ValueError, "invalid set CBOR, bitmap is " & $bits & " and sequence len is " & $c.seq.len)
|
||||
raise newException(ValueError, "invalid set CBOR, bitmap has " & $bits & " bits and sequence len is " & $c.seq.len)
|
||||
result = BlobSet(
|
||||
kind: hotNode,
|
||||
bitmap: bitmap,
|
||||
|
@ -590,14 +599,14 @@ proc loadSet(store: BlobStore; id: SetId; depth: int): Future[BlobSet] {.async.}
|
|||
let
|
||||
leaf = BlobSet(
|
||||
kind: leafNode,
|
||||
key: getInt node[0],
|
||||
key: getNum[Key] node[0],
|
||||
blob: parseCborId[BlobId] node[1],
|
||||
size: getInt node[2])
|
||||
result.table.add leaf
|
||||
else:
|
||||
raise newException(ValueError, "invalid set CBOR")
|
||||
|
||||
proc loadSet*(store: BlobStore; id: SetId): BlobSet =
|
||||
proc load*(store: BlobStore; id: SetId): BlobSet =
|
||||
waitFor loadSet(store, id, 0)
|
||||
|
||||
proc commit*(store: BlobStore; bs: BlobSet): BlobSet =
|
||||
|
@ -615,7 +624,8 @@ proc commit*(store: BlobStore; bs: BlobSet): BlobSet =
|
|||
|
||||
proc apply*(store: BlobStore; bs: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =
|
||||
# TODO: lazy-load set
|
||||
bs.apply(name, f)
|
||||
doAssert(bs.kind == hotNode)
|
||||
apply(bs, name, f)
|
||||
|
||||
proc insert*(store: BlobStore; bs: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet =
|
||||
# TODO: lazy-load set
|
||||
|
|
|
@ -2,25 +2,61 @@ import std/asyncdispatch, std/net, std/random, std/strutils, std/unittest
|
|||
|
||||
import ../src/blobsets, ../src/blobsets/filestores, ../src/blobsets/httpstores, ../src/blobsets/httpservers
|
||||
|
||||
let
|
||||
port = (Port)rand(1 shl 15)
|
||||
store = newFileStore("/tmp/store")
|
||||
server = newHttpStoreServer(store)
|
||||
asyncCheck server.serve(port)
|
||||
|
||||
let
|
||||
url = "http://127.0.0.1:$1/" % $port
|
||||
client = newHttpStore url
|
||||
|
||||
suite "Http store":
|
||||
randomize()
|
||||
|
||||
let
|
||||
port = (Port)rand(1 shl 15)
|
||||
store = newNullStore()
|
||||
server = newHttpStoreServer(store)
|
||||
asyncCheck server.serve(port)
|
||||
|
||||
let
|
||||
url = "http://127.0.0.1:$1/" % $port
|
||||
client = newHttpStore url
|
||||
|
||||
var
|
||||
blob: BlobId
|
||||
size: BiggestInt
|
||||
|
||||
test "ingest":
|
||||
(blob, size) = client.ingestFile("tests/test_http.nim")
|
||||
(blob, size) = waitFor client.ingestFile("tests/test_http.nim")
|
||||
test "dump":
|
||||
for chunk in store.dumpBlob(blob):
|
||||
echo chunk
|
||||
discard chunk
|
||||
|
||||
test "ingest":
|
||||
(blob, size) = waitFor client.ingestFile("tests/test_http.nim")
|
||||
|
||||
|
||||
suite "store":
|
||||
var
|
||||
setId: SetId
|
||||
|
||||
const count = 256
|
||||
|
||||
test "commit":
|
||||
var bs = newBlobSet()
|
||||
for i in 1..count:
|
||||
let
|
||||
name = $i
|
||||
blob = blobHash name
|
||||
bs = insert(bs, name, blob, 0)
|
||||
setId = commit(client, bs).setId
|
||||
|
||||
test "load":
|
||||
var bs = load(client, setId)
|
||||
for i in 1..count:
|
||||
let
|
||||
name = $i
|
||||
blob = blobHash name
|
||||
other = bs.search(name)
|
||||
doAssert(other == blob)
|
||||
for i in 1..count:
|
||||
let
|
||||
i = i and 0x8000
|
||||
name = $i
|
||||
apply(client, bs, name) do (id: BlobId; size: BiggestInt):
|
||||
echo "inserted ", name, " - ", name.toKey
|
||||
echo "applied ", name, " - ", ($(i xor 0x8000)).toKey
|
||||
raiseAssert("apply succedded for a key not inserted")
|
||||
|
|
|
@ -5,13 +5,13 @@ import ../src/blobsets
|
|||
suite "Blob set tests":
|
||||
|
||||
var
|
||||
randomCid = dagHash(newString(maxChunkSize))
|
||||
randomCid = blobHash(newString(maxChunkSize))
|
||||
|
||||
# test "zero blob":
|
||||
# doAssert(randomCid == zeroChunk)
|
||||
|
||||
proc randomize() =
|
||||
randomCid = dagHash(randomCid.data.addr, randomCid.data.len)
|
||||
randomCid = blobHash(randomCid.data.addr, randomCid.data.len)
|
||||
|
||||
proc testPath(s: BlobSet; root: string): BlobSet =
|
||||
for path in walkDirRec(root):
|
||||
|
@ -36,6 +36,20 @@ suite "Blob set tests":
|
|||
doAssert(not contains(a, "bar"))
|
||||
doAssert(not contains(b, "bar"))
|
||||
|
||||
test "apply":
|
||||
var bs = newBlobSet()
|
||||
for i in 1..1024:
|
||||
let
|
||||
name = $i
|
||||
blob = blobHash name
|
||||
bs = insert(bs, name, blob, 0)
|
||||
for i in 1..1024:
|
||||
let
|
||||
name = $i
|
||||
blob = blobHash name
|
||||
other = bs.search(name)
|
||||
doAssert(other == blob)
|
||||
|
||||
test "sets":
|
||||
var s = newBlobSet()
|
||||
for kind, key, val in getopt():
|
||||
|
|
Loading…
Reference in New Issue