Fix CBOR key sign-bit error

This commit is contained in:
Ehmry - 2019-02-15 21:56:21 +01:00
parent 3da2accfd5
commit a0aeaaef09
4 changed files with 149 additions and 82 deletions

View File

@ -80,7 +80,7 @@ proc ingestMain() =
let path = normalizedPath args[i]
set = store.insertPath(set, path.getFileInfo.kind, path)
let final = store.commit set
writeLine(stdout, final.setId)
writeLine(stdout, final.setId.toHex)
type
EvalError = object of CatchableError
@ -340,8 +340,8 @@ proc readAtom(r: Reader): Atom =
elif token.contains DirSep:
# TODO: memoize this, store a table of paths to atoms
newAtomPath token
elif token.len == blobVisualLen:
Atom(kind: atomBlob, blob: token.toBlobId)
elif token.len in { blobHexLen, blobVisualLen }:
Atom(kind: atomSet, bs: newBlobSet(token.toSetId))
else:
Atom(kind: atomSymbol, sym: token.normalize)
#except:
@ -474,14 +474,21 @@ proc hexFunc(env: Env; args: NodeObj): NodeRef =
of atomBlob:
a.blob.toHex.newAtomString.newNode
of atomSet:
let cold = commit(env.store, a.bs)
cold.setId.toHex.newAtomString.newNode
case a.bs.kind
of hotNode:
let cold = commit(env.store, a.bs)
cold.setId.toHex.newAtomString.newNode
of coldNode:
a.bs.setId.toHex.newAtomString.newNode
of leafNode:
a.bs.blob.toHex.newAtomString.newNode
else:
newNodeError("cannot convert to hex", args)
proc keyFunc(env: Env; args: NodeObj): NodeRef =
assertArgCount(args, 1)
args.atom.str.toKey.newAtom.newNode
let k = args.atom.str.toKey
k.newAtom.newNode
proc ingestFunc(env: Env; args: NodeObj): NodeRef =
var bs: BlobSet
@ -516,7 +523,7 @@ proc listFunc(env: Env; args: NodeObj): NodeRef =
proc loadFunc(env: Env; args: NodeObj): NodeRef =
assertArgCount(args, 1)
let bs = env.store.loadSet args.atom.bs.setId
let bs = env.store.load(args.atom.bs.setId)
bs.newAtom.newNode
proc mapFunc(env: Env; args: NodeObj): NodeRef =

View File

@ -113,7 +113,7 @@ proc take*(cid: var BlobId; buf: var string) =
doAssert(buf.len == digestLen)
copyMem(cid.data[0].addr, buf[0].addr, digestLen)
proc dagHash*(buf: pointer; len: Natural): BlobId =
proc blobHash*(buf: pointer; len: Natural): BlobId =
## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.
assert(len <= maxChunkSize)
var b: Blake2b256
@ -121,7 +121,7 @@ proc dagHash*(buf: pointer; len: Natural): BlobId =
update(b, buf, len)
b.finish()
proc dagHash*(data: string): BlobId =
proc blobHash*(data: string): BlobId =
## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.
assert(data.len <= maxChunkSize)
var b: Blake2b256
@ -156,7 +156,7 @@ func isNonZero*(bh: BlobId): bool =
{.deprecated: [isValid: isNonZero].}
type
Key* = int64
Key* = uint64
const
keyBits = sizeof(Key) shl 3
@ -183,18 +183,42 @@ type
BlobSetObj = object
case kind*: SetKind
of hotNode:
bitmap: Key
bitmap: uint64
table: seq[BlobSet]
of coldNode:
setId*: SetId
of leafNode:
key: Key
blob: BlobId
blob*: BlobId
size: BiggestInt
func toCbor*(x: BlobSet): CborNode =
case x.kind
of hotNode:
let array = newCborArray()
let bitmap = newCborInt(x.bitmap)
assert(bitmap.getInt.uint64 == x.bitmap, $bitmap.getInt.uint64 & " != " & $x.bitmap)
array.add bitmap
for y in x.table:
array.add y.toCbor
newCborTag(nodeTag, array)
of coldNode:
newCborTag(nodeTag, x.setId.data.newCborBytes)
of leafNode:
let array = newCborArray()
array.add x.key
array.add x.blob.data
array.add x.size
newCborTag(leafTag, array)
func newBlobSet*(): BlobSet =
## Create a new hot blob set.
BlobSet(kind: hotNode, table: newSeqOfCap[BlobSet](2))
func newBlobSet*(id: SetId): BlobSet =
## Create a new cold blob set.
BlobSet(kind: coldNode, setId: id)
func sparseIndex(x: Key): int = int(x and keyChunkMask)
func compactIndex(t: BlobSet; x: Key): int =
@ -205,7 +229,7 @@ func compactIndex(t: BlobSet; x: Key): int =
func masked(t: BlobSet; x: Key): bool =
((t.bitmap shr x.sparseIndex) and 1) != 0
func isEmpty*(s: BlobSet): bool = s.bitmap == Key(0)
func isEmpty*(s: BlobSet): bool = s.bitmap == 0'u64
## Test if a set is empty.
func nodeCount*(bs: BlobSet): int =
@ -225,20 +249,21 @@ func leafCount*(bs: BlobSet): int =
else:
result.inc n.leafCount
func search*(t: BlobSet; name: string): BlobId =
func search*(trie: BlobSet; name: string): BlobId =
let key = name.toKey
var
t = t
key = name.toKey
while true:
assert(key != 0, "keyspace exhausted during search")
if t.masked(key):
t = t.table[t.compactIndex(key)]
if t.kind == leafNode:
result = t.blob
break
key = key shr keyChunkBits
else:
raise newException(KeyError, "blob set does not contain key")
n = trie
k = key
level = 0
while k != 0 and n.masked(k):
n = n.table[n.compactIndex(k)]
if n.kind == leafNode:
if n.key == key:
return n.blob
break
k = k shr keyChunkBits
inc level
raise newException(KeyError, "key not in blob set")
func apply(bs: BlobSet; cb: proc (leaf: BlobSet)) =
## Apply a callback to each set element.
@ -253,21 +278,19 @@ func apply(bs: BlobSet; cb: proc (leaf: BlobSet)) =
else:
raiseAssert("cannot apply to node type " & $node.kind)
func apply*(t: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =
func apply*(trie: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =
## Apply a procedure to a named blob, if it is present
let key = name.toKey
var
t = t
key = name.toKey
while true:
assert(key != 0, "keyspace exhausted during search")
if t.masked(key):
t = t.table[t.compactIndex(key)]
if t.kind == leafNode:
f(t.blob, t.size)
break
key = key shr keyChunkBits
else:
n = trie
k = key
while k != 0 and n.masked(k):
n = n.table[n.compactIndex(k)]
if n.kind == leafNode:
if n.key == key:
f(n.blob, n.size)
break
k = k shr keyChunkBits
func contains*(bs: BlobSet; name: string): bool =
var found = false
@ -275,7 +298,7 @@ func contains*(bs: BlobSet; name: string): bool =
found = true
result = found
func insert(trie, l: BlobSet; depth: int): BlobSet =
func insert(trie, l: BlobSet; depth: int; name = ""): BlobSet =
## This procedure is recursive to a depth of keyBits/keyChunkBits.
# TODO: not functional?
doAssert(depth < (keyBits div keyChunkBits), "key space exhausted during insert")
@ -294,21 +317,22 @@ func insert(trie, l: BlobSet; depth: int): BlobSet =
if result.table[i].key == l.key:
raise newException(KeyError, "key collision in blob set")
var subtrie = newBlobSet()
subtrie = subtrie.insert(result.table[i], depth)
subtrie = subtrie.insert(l, depth)
subtrie = subtrie.insert(result.table[i], depth, "<unknown>")
subtrie = subtrie.insert(l, depth, name)
result.table[i] = subtrie
assert(result.table[i].kind == hotNode)
else:
result.bitmap = result.bitmap or (Key(1) shl key.sparseIndex)
result.bitmap = result.bitmap or (1'u64 shl key.sparseIndex)
result.table.insert(l, result.compactIndex(key))
func insert*(trie, node: BlobSet): BlobSet = insert(trie, node, 0)
func insert*(trie, node: BlobSet; name = ""): BlobSet = insert(trie, node, 0, name)
## Insert set node `node` into `trie`.
func insert*(t: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet =
## Insert a blob hash into a trie.
# TODO: this is not functional!
let leaf = BlobSet(kind: leafNode, key: name.toKey, blob: blob, size: size)
insert(t, leaf)
insert(t, leaf, name)
func remove(trie: BlobSet; key: Key; depth: int): BlobSet =
result = trie
@ -345,23 +369,6 @@ func remove*(trie: BlobSet; name: string): BlobSet =
if result.isNil:
result = newBlobSet()
func toCbor*(x: BlobSet): CborNode =
case x.kind
of hotNode:
let array = newCborArray()
array.add x.bitmap
for y in x.table:
array.add y.toCbor
newCborTag(nodeTag, array)
of coldNode:
newCborTag(nodeTag, x.setId.data.newCborBytes)
of leafNode:
let array = newCborArray()
array.add x.key
array.add x.blob.data
array.add x.size
newCborTag(leafTag, array)
func leafCount*(size: Natural): int = (size+blobLeafSize-1) div blobLeafSize
func compressTree*(leaves: var seq[BlobId]) =
@ -561,7 +568,7 @@ iterator dumpBlob*(store: BlobStore; id: BlobId): string =
yield buf
proc loadSet(store: BlobStore; id: SetId; depth: int): Future[BlobSet] {.async.} =
if Key.high shr depth == 0:
if (not Key(0)) shr depth == 0:
raiseAssert("loadSet trie is too deep")
var
stream = store.openBlobStream(id, kind=metaBlob)
@ -569,13 +576,15 @@ proc loadSet(store: BlobStore; id: SetId; depth: int): Future[BlobSet] {.async.}
defer:
close stream
let n = await stream.read(buf[0].addr, buf.len)
assert(n != 0, "read zero of set " & $id)
buf.setLen(n)
let
c = buf.parseCbor.val
bitmap = c.seq[0].getInt
tagPair = parseCbor buf
c = tagPair.val
bitmap = c.seq[0].getInt.uint64
if bitmap.countSetBits != c.seq.len-1:
let bits = bitmap.countSetBits
raise newException(ValueError, "invalid set CBOR, bitmap is " & $bits & " and sequence len is " & $c.seq.len)
raise newException(ValueError, "invalid set CBOR, bitmap has " & $bits & " bits and sequence len is " & $c.seq.len)
result = BlobSet(
kind: hotNode,
bitmap: bitmap,
@ -590,14 +599,14 @@ proc loadSet(store: BlobStore; id: SetId; depth: int): Future[BlobSet] {.async.}
let
leaf = BlobSet(
kind: leafNode,
key: getInt node[0],
key: getNum[Key] node[0],
blob: parseCborId[BlobId] node[1],
size: getInt node[2])
result.table.add leaf
else:
raise newException(ValueError, "invalid set CBOR")
proc loadSet*(store: BlobStore; id: SetId): BlobSet =
proc load*(store: BlobStore; id: SetId): BlobSet =
waitFor loadSet(store, id, 0)
proc commit*(store: BlobStore; bs: BlobSet): BlobSet =
@ -615,7 +624,8 @@ proc commit*(store: BlobStore; bs: BlobSet): BlobSet =
proc apply*(store: BlobStore; bs: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =
# TODO: lazy-load set
bs.apply(name, f)
doAssert(bs.kind == hotNode)
apply(bs, name, f)
proc insert*(store: BlobStore; bs: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet =
# TODO: lazy-load set

View File

@ -2,25 +2,61 @@ import std/asyncdispatch, std/net, std/random, std/strutils, std/unittest
import ../src/blobsets, ../src/blobsets/filestores, ../src/blobsets/httpstores, ../src/blobsets/httpservers
let
port = (Port)rand(1 shl 15)
store = newFileStore("/tmp/store")
server = newHttpStoreServer(store)
asyncCheck server.serve(port)
let
url = "http://127.0.0.1:$1/" % $port
client = newHttpStore url
suite "Http store":
randomize()
let
port = (Port)rand(1 shl 15)
store = newNullStore()
server = newHttpStoreServer(store)
asyncCheck server.serve(port)
let
url = "http://127.0.0.1:$1/" % $port
client = newHttpStore url
var
blob: BlobId
size: BiggestInt
test "ingest":
(blob, size) = client.ingestFile("tests/test_http.nim")
(blob, size) = waitFor client.ingestFile("tests/test_http.nim")
test "dump":
for chunk in store.dumpBlob(blob):
echo chunk
discard chunk
test "ingest":
(blob, size) = waitFor client.ingestFile("tests/test_http.nim")
suite "store":
var
setId: SetId
const count = 256
test "commit":
var bs = newBlobSet()
for i in 1..count:
let
name = $i
blob = blobHash name
bs = insert(bs, name, blob, 0)
setId = commit(client, bs).setId
test "load":
var bs = load(client, setId)
for i in 1..count:
let
name = $i
blob = blobHash name
other = bs.search(name)
doAssert(other == blob)
for i in 1..count:
let
i = i and 0x8000
name = $i
apply(client, bs, name) do (id: BlobId; size: BiggestInt):
echo "inserted ", name, " - ", name.toKey
echo "applied ", name, " - ", ($(i xor 0x8000)).toKey
raiseAssert("apply succedded for a key not inserted")

View File

@ -5,13 +5,13 @@ import ../src/blobsets
suite "Blob set tests":
var
randomCid = dagHash(newString(maxChunkSize))
randomCid = blobHash(newString(maxChunkSize))
# test "zero blob":
# doAssert(randomCid == zeroChunk)
proc randomize() =
randomCid = dagHash(randomCid.data.addr, randomCid.data.len)
randomCid = blobHash(randomCid.data.addr, randomCid.data.len)
proc testPath(s: BlobSet; root: string): BlobSet =
for path in walkDirRec(root):
@ -36,6 +36,20 @@ suite "Blob set tests":
doAssert(not contains(a, "bar"))
doAssert(not contains(b, "bar"))
test "apply":
var bs = newBlobSet()
for i in 1..1024:
let
name = $i
blob = blobHash name
bs = insert(bs, name, blob, 0)
for i in 1..1024:
let
name = $i
blob = blobHash name
other = bs.search(name)
doAssert(other == blob)
test "sets":
var s = newBlobSet()
for kind, key, val in getopt():