Load and commit

This commit is contained in:
Ehmry - 2018-12-27 01:32:59 +01:00
parent f01bf5083a
commit c5cc2f35a5
3 changed files with 191 additions and 120 deletions

View File

@ -5,8 +5,8 @@ import std/nre, std/os, std/strutils, std/tables, std/parseopt, std/streams, std
import cbor import cbor
import ./blobsets, ./blobsets/filestores import ./blobsets, ./blobsets/filestores
when defined(genode): #when defined(genode):
import dagfsclient # import dagfsclient
#else: #else:
# import ./blobsets/tcp # import ./blobsets/tcp
@ -43,7 +43,6 @@ proc insertPath(store: BlobStore; bs: BlobSet; kind: PathComponent; path: string
except: except:
let e = getCurrentException() let e = getCurrentException()
writeLine(stderr, "failed to ingest '", path, "', ", e.msg) writeLine(stderr, "failed to ingest '", path, "', ", e.msg)
# raise e
proc ingestMain() = proc ingestMain() =
var args = newSeq[string]() var args = newSeq[string]()
@ -252,7 +251,11 @@ proc print(a: Atom; s: Stream) =
s.write '|' s.write '|'
s.write $a.size s.write $a.size
of atomSet: of atomSet:
s.write "«set»" case a.bs.kind
of coldNode:
s.write $a.bs.setId
else:
s.write "«set»"
of atomString: of atomString:
s.write '"' s.write '"'
s.write a.str s.write a.str
@ -390,7 +393,8 @@ proc cborFunc(env: Env; arg: NodeObj): NodeRef =
proc commitFunc(env: Env; arg: NodeObj): NodeRef = proc commitFunc(env: Env; arg: NodeObj): NodeRef =
assertArgCount(arg, 1) assertArgCount(arg, 1)
raiseAssert("not implemented") let cold = commit(env.store, arg.atom.bs)
cold.newAtom.newNode
#[ #[
proc copyFunc(env: Env; args: NodeObj): NodeRef = proc copyFunc(env: Env; args: NodeObj): NodeRef =
@ -474,6 +478,11 @@ proc listFunc(env: Env; args: NodeObj): NodeRef =
while not result.tailRef.nextRef.isNil: while not result.tailRef.nextRef.isNil:
result.tailRef = result.tailRef.nextRef result.tailRef = result.tailRef.nextRef
proc loadFunc(env: Env; args: NodeObj): NodeRef =
assertArgCount(args, 1)
let bs = env.store.loadSet args.atom.bs.setId
bs.newAtom.newNode
proc mapFunc(env: Env; args: NodeObj): NodeRef = proc mapFunc(env: Env; args: NodeObj): NodeRef =
assertArgCount(args, 2) assertArgCount(args, 2)
result = newNodeList() result = newNodeList()
@ -551,6 +560,7 @@ proc newEnv(store: BlobStore): Env =
result.bindEnv "ingest", ingestFunc result.bindEnv "ingest", ingestFunc
result.bindEnv "insert", insertFunc result.bindEnv "insert", insertFunc
result.bindEnv "key", keyFunc result.bindEnv "key", keyFunc
result.bindEnv "load", loadFunc
result.bindEnv "list", listFunc result.bindEnv "list", listFunc
result.bindEnv "map", mapFunc result.bindEnv "map", mapFunc
#result.bindEnv "merge", mergeFunc #result.bindEnv "merge", mergeFunc
@ -638,7 +648,7 @@ proc replMain() =
scripted = true scripted = true
let let
#store = openStore() #store = openStore()
store = newNullStore() # newFileStore("/tmp/blobs") store = newFileStore("/tmp/blobs")
env = newEnv(store) env = newEnv(store)
outStream = stdout.newFileStream outStream = stdout.newFileStream
readLine = if scripted: readLineSimple else: readLineFromStdin readLine = if scripted: readLineSimple else: readLineFromStdin

View File

@ -36,8 +36,7 @@ func `$`*(bh: BlobId): string =
let r = (Rune)baseRune or b.int let r = (Rune)baseRune or b.int
fastToUTF8Copy(r, result, pos, true) fastToUTF8Copy(r, result, pos, true)
func toBlobId*(s: string): BlobId = func parseStringId[T](s: string): T =
## Parse a visual blob hash to binary.
if s.len == blobVisualLen: if s.len == blobVisualLen:
var var
pos: int pos: int
@ -46,6 +45,28 @@ func toBlobId*(s: string): BlobId =
fastRuneAt(s, pos, r, true) fastRuneAt(s, pos, r, true)
b = r.byte b = r.byte
func parseCborId[T](c: CborNode): T =
## Parse a CBOR node to binary.
if c.bytes.len == result.data.len:
for i in 0..result.data.high:
result.data[i] = c.bytes[i]
func toBlobId*(s: string): BlobId =
## Parse a visual blob hash to binary.
parseStringId[BlobId] s
func toBlobId(c: CborNode): BlobId =
## Parse a CBOR blob hash to binary.
parseCborId[BlobId] c
func toSetId*(s: string): SetId =
## Parse a visual set hash to binary.
parseStringId[SetId] s
func toSetId(c: CborNode): SetId =
## Parse a CBOR set hash to binary.
parseCborId[SetId] c
proc `==`*(x, y: BlobId): bool = x.data == y.data proc `==`*(x, y: BlobId): bool = x.data == y.data
## Compare two BlobIds. ## Compare two BlobIds.
@ -182,14 +203,19 @@ func toCbor(k: Key): CborNode =
## Keys are endian independent. ## Keys are endian independent.
newCborBytes cast[array[sizeof(k), byte]](k) newCborBytes cast[array[sizeof(k), byte]](k)
const
# CBOR tags
nodeTag = 0
leafTag = 1
type type
setKind* = enum hotNode, coldNode, leafNode SetKind* = enum hotNode, coldNode, leafNode
BlobSet* = ref BlobSetObj BlobSet* = ref BlobSetObj
BlobSetObj = object BlobSetObj = object
case kind*: setKind case kind*: SetKind
of hotNode: of hotNode:
bitmap: Key bitmap: Key
table*: seq[BlobSet] table: seq[BlobSet]
of coldNode: of coldNode:
setId*: SetId setId*: SetId
of leafNode: of leafNode:
@ -351,33 +377,51 @@ func remove*(trie: BlobSet; name: string): BlobSet =
result = newBlobSet() result = newBlobSet()
func toCbor*(x: BlobSet): CborNode = func toCbor*(x: BlobSet): CborNode =
const
nodeTag = 0
leafTag = 1
let array = newCborArray()
case x.kind case x.kind
of hotNode: of hotNode:
var let array = newCborArray()
map = x.bitmap array.add x.bitmap
buf = newCborBytes(sizeof(Key))
when not sizeof(Key) == 8:
{.error: "unknown key conversion".}
bigEndian64(buf.bytes[0].addr, map.addr)
array.add buf
for y in x.table: for y in x.table:
array.add y.toCbor array.add y.toCbor
newCborTag(nodeTag, array) newCborTag(nodeTag, array)
of coldNode: of coldNode:
array.add x.setId.data newCborTag(nodeTag, x.setId.data.newCborBytes)
newCborTag(nodeTag, array)
of leafNode: of leafNode:
array.add x.key.toCbor let array = newCborArray()
array.add x.key
array.add x.blob.data array.add x.blob.data
array.add x.size array.add x.size
newCborTag(leafTag, array) newCborTag(leafTag, array)
func leafCount*(size: Natural): int = (size+blobLeafSize-1) div blobLeafSize func leafCount*(size: Natural): int = (size+blobLeafSize-1) div blobLeafSize
func compressTree*(leaves: var seq[BlobId]) =
var
ctx: Blake2b256
nodeOffset = 0
nodeDepth = 0
while leaves.len > 1:
nodeOffset = 0
inc nodeDepth
var pos, next: int
while pos < leaves.len:
ctx.init do (params: var Blake2bParams):
params.fanout = 2
params.depth = 255
params.leafLength = blobLeafSize
params.nodeOffset = nodeOffset
params.nodeDepth = nodeDepth
inc nodeOffset
ctx.update(leaves[pos].data)
inc pos
if pos < leaves.len:
ctx.update(leaves[pos].data)
inc pos
leaves[next] = ctx.finish()
inc next
leaves.setLen(next)
# TODO: BLAKE2 tree finalization flags
type type
BlobKind* = enum BlobKind* = enum
dataBlob, metaBlob dataBlob, metaBlob
@ -421,98 +465,8 @@ type
openBlobStreamImpl*: proc (s: BlobStore; id: BlobId; size: BiggestInt; kind: BlobKind): BlobStream {.nimcall, gcsafe.} openBlobStreamImpl*: proc (s: BlobStore; id: BlobId; size: BiggestInt; kind: BlobKind): BlobStream {.nimcall, gcsafe.}
openIngestStreamImpl*: proc (s: BlobStore; size: BiggestInt; kind: BlobKind): IngestStream {.nimcall, gcsafe.} openIngestStreamImpl*: proc (s: BlobStore; size: BiggestInt; kind: BlobKind): IngestStream {.nimcall, gcsafe.}
proc close*(s: BlobStore) = #
## Close active store resources. # Null Store implementation
if not s.closeImpl.isNil: s.closeImpl(s)
proc openBlobStream*(s: BlobStore; id: BlobId; size = 0.BiggestInt; kind = dataBlob): BlobStream =
## Return a new `BlobStream` for reading a blob.
assert(not s.openBlobStreamImpl.isNil)
s.openBlobStreamImpl(s, id, size, kind)
proc openIngestStream*(s: BlobStore; size = 0.BiggestInt; kind = dataBlob): IngestStream =
## Return a new `IngestStream` for ingesting a blob.
assert(not s.openIngestStreamImpl.isNil)
s.openIngestStreamImpl(s, size, kind)
func compressTree*(leaves: var seq[BlobId]) =
var
ctx: Blake2b256
nodeOffset = 0
nodeDepth = 0
while leaves.len > 1:
nodeOffset = 0
inc nodeDepth
var pos, next: int
while pos < leaves.len:
ctx.init do (params: var Blake2bParams):
params.fanout = 2
params.depth = 255
params.leafLength = blobLeafSize
params.nodeOffset = nodeOffset
params.nodeDepth = nodeDepth
inc nodeOffset
ctx.update(leaves[pos].data)
inc pos
if pos < leaves.len:
ctx.update(leaves[pos].data)
inc pos
leaves[next] = ctx.finish()
inc next
leaves.setLen(next)
# TODO: BLAKE2 tree finalization flags
iterator dumpBlob*(store: BlobStore; id: BlobId): string =
var
stream = store.openBlobStream(id, kind=dataBlob)
buf = newString(blobLeafSize)
defer:
close stream
while true:
buf.setLen(blobLeafSize)
let n = stream.read(buf[0].addr, buf.len)
if n == 0:
break
buf.setLen(n)
yield buf
proc commit*(store: BlobStore; bs: BlobSet): BlobSet =
assert(bs.kind == hotNode)
for e in bs.table.mitems:
case e.kind
of coldNode, leafNode: discard
of hotNode:
e = store.commit e
let stream = store.openIngestStream(kind=metaBlob)
var buf = encode bs.toCbor
stream.ingest(buf)
let (id, _) = finish stream
result = BlobSet(kind: coldNode, setId: id)
proc apply*(store: BlobStore; bs: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =
# TODO: lazy-load set
bs.apply(name, f)
proc insert*(store: BlobStore; bs: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet =
# TODO: lazy-load set
insert(bs, name, blob, size)
proc remove*(store: BlobStore; bs: BlobSet; name: string): BlobSet =
# TODO: lazy-load set
remove(bs, name)
proc union*(store: BlobStore; sets: varargs[BlobSet]): BlobSet =
## Return the union of `sets`.
# TODO: lazy-load set
var fresh = newBlobSet()
proc freshInsert(leaf: BlobSet) =
fresh = insert(fresh, leaf)
for bs in sets:
assert(not bs.isnil)
bs.apply(freshInsert)
result = fresh
# Store implementations
# #
type type
@ -567,3 +521,106 @@ proc newNullStore*(): BlobStore =
BlobStore( BlobStore(
openBlobStreamImpl: nullOpenBlobStream, openBlobStreamImpl: nullOpenBlobStream,
openIngestStreamImpl: nullOpenIngestStream) openIngestStreamImpl: nullOpenIngestStream)
proc close*(s: BlobStore) =
## Close active store resources.
if not s.closeImpl.isNil: s.closeImpl(s)
proc openBlobStream*(s: BlobStore; id: BlobId; size = 0.BiggestInt; kind = dataBlob): BlobStream =
## Return a new `BlobStream` for reading a blob.
assert(not s.openBlobStreamImpl.isNil)
s.openBlobStreamImpl(s, id, size, kind)
proc openIngestStream*(s: BlobStore; size = 0.BiggestInt; kind = dataBlob): IngestStream =
## Return a new `IngestStream` for ingesting a blob.
assert(not s.openIngestStreamImpl.isNil)
s.openIngestStreamImpl(s, size, kind)
iterator dumpBlob*(store: BlobStore; id: BlobId): string =
var
stream = store.openBlobStream(id, kind=dataBlob)
buf = newString(blobLeafSize)
defer:
close stream
while true:
buf.setLen(blobLeafSize)
let n = stream.read(buf[0].addr, buf.len)
if n == 0:
break
buf.setLen(n)
yield buf
proc loadSet(store: BlobStore; id: SetId; depth: int): BlobSet =
if Key.high shr depth == 0:
raiseAssert("loadSet trie is too deep")
var
stream = store.openBlobStream(id, kind=metaBlob)
buf = newString(blobLeafSize)
defer:
close stream
let n = stream.read(buf[0].addr, buf.len)
buf.setLen(n)
let
c = buf.parseCbor.val
bitmap = c.seq[0].getInt
if bitmap.countSetBits != c.seq.len-1:
let bits = bitmap.countSetBits
raise newException(ValueError, "invalid set CBOR, bitmap is " & $bits & " and sequence len is " & $c.seq.len)
result = BlobSet(
kind: hotNode,
bitmap: bitmap,
table: newSeqOfCap[BlobSet](c.seq.len-1))
for i in 1..c.seq.high:
let node = c[i].val
case c[i].tag.int
of nodeTag:
result.table.add loadSet(store, node.toSetId, depth+1)
of leafTag:
let
leaf = BlobSet(
kind: leafNode,
key: getInt node[0],
blob: parseCborId[BlobId] node[1],
size: getInt node[2])
result.table.add leaf
else:
raise newException(ValueError, "invalid set CBOR")
proc loadSet*(store: BlobStore; id: SetId): BlobSet =
loadSet store, id, 0
proc commit*(store: BlobStore; bs: BlobSet): BlobSet =
assert(bs.kind == hotNode)
for e in bs.table.mitems:
case e.kind
of coldNode, leafNode: discard
of hotNode:
e = store.commit e
let stream = store.openIngestStream(kind=metaBlob)
var buf = encode bs.toCbor
stream.ingest(buf)
let (id, _) = finish stream
result = BlobSet(kind: coldNode, setId: id)
proc apply*(store: BlobStore; bs: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =
# TODO: lazy-load set
bs.apply(name, f)
proc insert*(store: BlobStore; bs: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet =
# TODO: lazy-load set
insert(bs, name, blob, size)
proc remove*(store: BlobStore; bs: BlobSet; name: string): BlobSet =
# TODO: lazy-load set
remove(bs, name)
proc union*(store: BlobStore; sets: varargs[BlobSet]): BlobSet =
## Return the union of `sets`.
# TODO: lazy-load set
var fresh = newBlobSet()
proc freshInsert(leaf: BlobSet) =
fresh = insert(fresh, leaf)
for bs in sets:
assert(not bs.isnil)
bs.apply(freshInsert)
result = fresh

View File

@ -51,7 +51,11 @@ proc fsOpenBlobStream(s: BlobStore; id: BlobId; size: BiggestInt; kind: BlobKind
var fs = FileStore(s) var fs = FileStore(s)
let stream = FsBlobStream() let stream = FsBlobStream()
result = stream result = stream
stream.path = fs.root / $id case kind
of dataBlob:
stream.path = fs.root / "data" / $id
of metaBlob:
stream.path = fs.root / "blob" / $id
stream.file = openAsync(stream.path, fmRead) stream.file = openAsync(stream.path, fmRead)
stream.closeImpl = fsBlobClose stream.closeImpl = fsBlobClose
stream.readImpl = fsBlobRead stream.readImpl = fsBlobRead
@ -108,8 +112,8 @@ proc fsOpenIngestStream(s: BlobStore; size: BiggestInt; kind: BlobKind): IngestS
stream.leaves = newSeq[BlobId]() stream.leaves = newSeq[BlobId]()
proc newFileStore*(root = "/"): FileStore = proc newFileStore*(root = "/"): FileStore =
if not existsDir(root): createDir(root / "data")
createDir root createDir(root / "blob")
new result new result
result.openBlobStreamImpl = fsOpenBlobStream result.openBlobStreamImpl = fsOpenBlobStream
result.openIngestStreamImpl = fsOpenIngestStream result.openIngestStreamImpl = fsOpenIngestStream