blobsets/src/blobsets.nim

695 lines
21 KiB
Nim
Raw Normal View History

import ./blobsets/priv/hex
import base32, cbor, siphash, tiger
2018-12-21 03:50:36 +01:00
import std/asyncdispatch, std/asyncstreams
import std/hashes, std/streams, std/strutils, std/bitops, std/unicode, std/endians, std/random
2018-12-21 03:50:36 +01:00
const
digestSize* = 24
2018-12-21 03:50:36 +01:00
## Length of a chunk digest.
blobLeafSize* = 1 shl 10
## Size of blob hash leaves (THEX/ADC).
blobLeafSizeMask* = blobLeafSize - 1
blobHexLen* = digestSize * 2
blobBase32Len* = (digestSize * 5 div 3) - 1
blobVisualLen* = digestSize * 3
2018-12-21 03:50:36 +01:00
type
BlobId* = TigerDigest
2018-12-21 03:50:36 +01:00
## Blob Identifier
SetId* = TigerDigest
2018-12-21 03:50:36 +01:00
## Set Identifier
2018-12-27 01:32:59 +01:00
func parseStringId[T](s: string): T =
2018-12-27 19:22:59 +01:00
case s.len
of blobHexLen:
hex.decode s, result.data
of blobBase32Len:
var tmp = base32.decode(s)
copyMem(result.data[0].addr, tmp[0].addr, digestSize)
2018-12-27 19:22:59 +01:00
of blobVisualLen:
2018-12-21 03:50:36 +01:00
var
pos: int
r: Rune
for b in result.data.mitems:
fastRuneAt(s, pos, r, true)
b = byte(r.int and 0xff)
2018-12-27 19:22:59 +01:00
else:
raise newException(ValueError, "invalid blobset id encoding of len " & $s.len)
2018-12-21 03:50:36 +01:00
2018-12-27 01:32:59 +01:00
func parseCborId[T](c: CborNode): T =
## Parse a CBOR node to binary.
if c.bytes.len == result.data.len:
for i in 0..result.data.high:
result.data[i] = c.bytes[i]
func toBlobId*(s: string): BlobId =
## Parse a visual blob hash to binary.
parseStringId[BlobId] s
func toSetId*(s: string): SetId =
## Parse a visual set hash to binary.
parseStringId[SetId] s
func toSetId(c: CborNode): SetId =
## Parse a CBOR set hash to binary.
parseCborId[SetId] c
2018-12-21 03:50:36 +01:00
proc `==`*(x, y: BlobId): bool = x.data == y.data
## Compare two BlobIds.
proc `==`*(cbor: CborNode; cid: BlobId): bool =
## Compare a CBOR node with a BlobId.
if cbor.kind == cborBytes:
for i in 0..<digestSize:
2018-12-21 03:50:36 +01:00
if cid.data[i] != cbor.bytes[i].uint8:
return false
result = true
proc hash*(cid: BlobId): Hash =
## Reduce a BlobId into an integer for use in tables.
var zeroKey: Key
result = cast[Hash](sipHash(cid.data, zeroKey))
proc toCbor*(id: BlobId): CborNode = newCborBytes id.data
2018-12-21 03:50:36 +01:00
## Generate a CBOR representation of a BlobId.
proc toBlobId*(cbor: CborNode): BlobId =
## Generate a CBOR representation of a BlobId.
assert(cbor.bytes.len == digestSize)
for i in 0..<digestSize:
2018-12-21 03:50:36 +01:00
result.data[i] = cbor.bytes[i].uint8
func `$`*(bh: BlobId): string =
## Convert a blob hash to a visual representation.
const baseRune = 0x2800
result = newString(blobVisualLen)
var pos = 0
for b in bh.data.items:
let r = (Rune)baseRune or b.int
fastToUTF8Copy(r, result, pos, true)
2018-12-27 19:22:59 +01:00
proc toHex*(id: BlobId|SetId): string = hex.encode(id.data)
2018-12-21 03:50:36 +01:00
## Return BlobId encoded in hexidecimal.
func toBase32*(bh: BlobId): string =
## Encode a blob hash into base32
base32.encode(cast[array[digestSize,char]](bh.data), pad=false)
proc verify*(id: BlobId; data: string): bool =
2018-12-21 03:50:36 +01:00
## Verify that a string of data corresponds to a BlobId.
id == tiger(data)
2018-12-21 03:50:36 +01:00
func isNonZero*(bh: BlobId): bool =
## Test if a blob hash is not zeroed.
var r: byte
for b in bh.data.items:
{.unroll.}
r = r or b
r != 0
2019-03-17 22:53:50 +01:00
type
BlobKind* = enum
dataBlob, metaBlob
proc `$`*(k: BlobKind): string =
case k
of dataBlob: "data"
of metaBlob: "meta"
type
BlobStream* = ref BlobStreamObj
BlobStreamObj* = object of RootObj
closeImpl*: proc (s: BlobStream) {.nimcall, gcsafe.}
sizeImpl*: proc (s: BlobStream): BiggestInt {.nimcall, gcsafe.}
setPosImpl*: proc (s: BlobStream; pos: BiggestInt) {.nimcall, gcsafe.}
getPosImpl*: proc (s: BlobStream): BiggestInt {.nimcall, gcsafe.}
readImpl*: proc (s: BlobStream; buffer: pointer; bufLen: int): Future[int] {.nimcall, gcsafe.}
IngestStream* = ref IngestStreamObj
IngestStreamObj* = object of RootObj
cancelImpl*: proc (s: IngestStream) {.nimcall, gcsafe.}
finishImpl*: proc (s: IngestStream): Future[tuple[id: BlobId, size: BiggestInt]] {.nimcall, gcsafe.}
ingestImpl*: proc (s: IngestStream; buf: pointer; size: int): Future[void] {.nimcall, gcsafe.}
proc close*(s: BlobStream) =
assert(not s.closeImpl.isNil)
s.closeImpl(s)
proc size*(s: BlobStream): BiggestInt =
assert(not s.sizeImpl.isNil)
s.sizeImpl(s)
proc `pos=`*(s: BlobStream; pos: BiggestInt) =
assert(not s.setPosImpl.isNil)
s.setPosImpl(s, pos)
proc pos*(s: BlobStream): BiggestInt =
assert(not s.getPosImpl.isNil)
s.getPosImpl(s)
proc read*(s: BlobStream; buf: pointer; len: Natural): Future[int] =
assert(not s.readImpl.isNil)
s.readImpl(s, buf, len)
proc cancel*(s: IngestStream) =
2019-03-17 22:53:50 +01:00
## Cancel and close ingest stream
assert(not s.cancelImpl.isNil)
s.cancelImpl(s)
proc finish*(s: IngestStream): Future[tuple[id: BlobId, size: BiggestInt]] =
## Finish ingest stream
assert(not s.finishImpl.isNil)
s.finishImpl(s)
proc ingest*(s: IngestStream; buf: pointer; size: Natural): Future[void] =
## Ingest stream
assert(not s.ingestImpl.isNil)
s.ingestImpl(s, buf, size)
proc ingest*(s: IngestStream; buf: string): Future[void] =
## Ingest stream
assert(not s.ingestImpl.isNil)
s.ingestImpl(s, buf[0].unsafeAddr, buf.len)
type
BlobStore* = ref BlobStoreObj
BlobStoreObj* = object of RootObj
closeImpl*: proc (s: BlobStore) {.nimcall, gcsafe.}
containsImpl*: proc (s: BlobStore; id: BlobId; kind: BlobKind): Future[bool] {.nimcall, gcsafe.}
2019-03-17 22:53:50 +01:00
openBlobStreamImpl*: proc (s: BlobStore; id: BlobId; size: BiggestInt; kind: BlobKind): BlobStream {.nimcall, gcsafe.}
openIngestStreamImpl*: proc (s: BlobStore; size: BiggestInt; kind: BlobKind): IngestStream {.nimcall, gcsafe.}
proc close*(s: BlobStore) =
## Close active store resources.
if not s.closeImpl.isNil: s.closeImpl(s)
proc contains*(s: BlobStore; id: BlobId; kind: BlobKind): Future[bool] =
## Check if the store contains a blob.
assert(not s.openBlobStreamImpl.isNil)
s.containsImpl(s, id, kind)
2019-03-17 22:53:50 +01:00
proc openBlobStream*(s: BlobStore; id: BlobId; size = 0.BiggestInt; kind = dataBlob): BlobStream =
## Return a new `BlobStream` for reading a blob.
assert(isNonZero id)
2019-03-17 22:53:50 +01:00
assert(not s.openBlobStreamImpl.isNil)
s.openBlobStreamImpl(s, id, size, kind)
proc openIngestStream*(s: BlobStore; size = 0.BiggestInt; kind = dataBlob): IngestStream =
## Return a new `IngestStream` for ingesting a blob.
assert(not s.openIngestStreamImpl.isNil)
s.openIngestStreamImpl(s, size, kind)
proc ingest*(store: BlobStore; buf: string): Future[BlobId] {.async.} =
let stream = store.openIngestStream(buf.len.BiggestInt, dataBlob)
await stream.ingest(buf[0].unsafeAddr, buf.len)
let (id, _) = await stream.finish()
2019-03-17 22:53:50 +01:00
return id
2019-03-17 22:02:39 +01:00
type Key* = distinct uint64
proc `and` * (x, y: Key): Key {.borrow.}
proc `not` * (x: Key): Key {.borrow.}
proc `shl` * (x: Key; y: int): Key {.borrow.}
proc `shr` * (x: Key; y: int): Key {.borrow.}
proc `==` * (x, y: Key): bool {.borrow.}
2018-12-21 03:50:36 +01:00
const
2019-03-16 11:52:24 +01:00
keyBits = sizeof(Key) * 8
2018-12-21 03:50:36 +01:00
keyChunkBits = fastLog2 keyBits
2019-03-17 22:02:39 +01:00
keyChunkMask = Key((1 shl keyChunkBits)-1)
2019-03-16 11:52:24 +01:00
maxDepth = keyBits/keyChunkBits
2018-12-21 03:50:36 +01:00
2019-03-17 22:02:39 +01:00
func `$`*(k: Key): string = k.BiggestInt.toHex(keyBits div 4)
2018-12-23 03:23:10 +01:00
func toKey*(s: string): Key =
2018-12-21 03:50:36 +01:00
var key: siphash.Key
let b = sipHash(toOpenArrayByte(s, s.low, s.high), key)
cast[Key](b)
2018-12-27 01:32:59 +01:00
const
# CBOR tags
nodeTag = 0
leafTag = 1
2018-12-21 03:50:36 +01:00
type
2018-12-27 01:32:59 +01:00
SetKind* = enum hotNode, coldNode, leafNode
2018-12-21 03:50:36 +01:00
BlobSet* = ref BlobSetObj
BlobSetObj = object
2018-12-27 01:32:59 +01:00
case kind*: SetKind
2018-12-21 03:50:36 +01:00
of hotNode:
2019-02-15 21:56:21 +01:00
bitmap: uint64
2018-12-27 01:32:59 +01:00
table: seq[BlobSet]
2018-12-21 03:50:36 +01:00
of coldNode:
setId*: SetId
of leafNode:
key: Key
2019-02-15 21:56:21 +01:00
blob*: BlobId
2018-12-21 03:50:36 +01:00
size: BiggestInt
2019-03-17 22:53:50 +01:00
func isCold*(bs: BlobSet): bool = bs.kind == coldNode
func isHot*(bs: BlobSet): bool = bs.kind == hotNode
2019-02-15 21:56:21 +01:00
func toCbor*(x: BlobSet): CborNode =
case x.kind
of hotNode:
let array = newCborArray()
let bitmap = newCborInt(x.bitmap)
assert(bitmap.getInt.uint64 == x.bitmap, $bitmap.getInt.uint64 & " != " & $x.bitmap)
array.add bitmap
for y in x.table:
2019-03-18 21:50:58 +01:00
assert(not y.isNil)
2019-02-15 21:56:21 +01:00
array.add y.toCbor
newCborTag(nodeTag, array)
of coldNode:
newCborTag(nodeTag, x.setId.data.newCborBytes)
of leafNode:
let array = newCborArray()
2019-03-17 22:02:39 +01:00
array.add x.key.uint64
2019-02-15 21:56:21 +01:00
array.add x.blob.data
array.add x.size
newCborTag(leafTag, array)
2018-12-21 03:50:36 +01:00
func newBlobSet*(): BlobSet =
2019-02-15 21:56:21 +01:00
## Create a new hot blob set.
2018-12-21 03:50:36 +01:00
BlobSet(kind: hotNode, table: newSeqOfCap[BlobSet](2))
2019-02-15 21:56:21 +01:00
func newBlobSet*(id: SetId): BlobSet =
## Create a new cold blob set.
BlobSet(kind: coldNode, setId: id)
template sparseIndex(x: Key): uint64 = (uint64)x and keyChunkMask
template mask(x: Key): uint64 = 1'u64 shl int(x and keyChunkMask)
2018-12-21 03:50:36 +01:00
func compactIndex(t: BlobSet; x: Key): int =
2019-03-17 22:02:39 +01:00
if (x and keyChunkMask) != Key(0):
2018-12-21 03:50:36 +01:00
# TODO: bug in shr and shl, cannot shift all bits out
result = (int)countSetBits(t.bitmap shl (keyBits - x.sparseIndex.int))
2018-12-21 03:50:36 +01:00
func masked(t: BlobSet; x: Key): bool =
((t.bitmap shr x.sparseIndex) and 1) != 0
2019-02-15 21:56:21 +01:00
func isEmpty*(s: BlobSet): bool = s.bitmap == 0'u64
2018-12-23 08:23:21 +01:00
## Test if a set is empty.
2019-03-17 22:53:50 +01:00
iterator dumpBlob*(store: BlobStore; id: BlobId): string =
var
stream = store.openBlobStream(id, kind=dataBlob)
buf = newString(blobLeafSize)
defer:
close stream
while true:
buf.setLen(blobLeafSize)
let n = waitFor stream.read(buf[0].addr, buf.len)
if n == 0:
break
buf.setLen(n)
yield buf
proc loadSet(store: BlobStore; id: SetId; depth: int): Future[BlobSet] {.async.} =
assert(isNonZero id)
2019-03-17 22:53:50 +01:00
assert((not Key(0)) shr depth != Key(0), "loadSet trie is too deep")
let
2019-03-17 22:53:50 +01:00
stream = store.openBlobStream(id, kind=metaBlob)
streamSize = stream.size
2019-03-17 22:53:50 +01:00
defer:
close stream
var buf = if streamSize == 0:
newString(4 shl 10)
else:
newString(stream.size)
2019-03-17 22:53:50 +01:00
let n = await stream.read(buf[0].addr, buf.len)
assert(n != 0, "read zero of set " & $id)
buf.setLen(n)
let
tagPair = parseCbor buf
c = tagPair.val
bitmap = c.seq[0].getInt.uint64
if bitmap.countSetBits != c.seq.len-1:
let bits = bitmap.countSetBits
raise newException(ValueError, "invalid set CBOR, bitmap has " & $bits & " bits and sequence len is " & $c.seq.len)
result = BlobSet(
kind: hotNode,
bitmap: bitmap,
table: newSeqOfCap[BlobSet](c.seq.len-1))
for i in 1..c.seq.high:
let node = c[i].val
case c[i].tag.int
of nodeTag:
let child = await loadSet(store, node.toSetId, depth+1)
result.table.add child
of leafTag:
let
leaf = BlobSet(
kind: leafNode,
key: (Key)getNum[uint64] node[0],
blob: parseCborId[BlobId] node[1],
size: getInt node[2])
result.table.add leaf
else:
raise newException(ValueError, "invalid set CBOR")
2019-03-18 22:50:00 +01:00
proc load*(store: BlobStore; id: SetId): Future[BlobSet] =
loadSet(store, id, 0)
2019-03-17 22:53:50 +01:00
2019-03-18 22:50:00 +01:00
proc load*(store: BlobStore; node: BlobSet): Future[BlobSet] =
2019-03-17 22:53:50 +01:00
load(store, node.setId)
proc randomApply*(store: BlobStore; trie: BlobSet; rng: var Rand;
f: proc(id: BlobId; size: BiggestInt)) =
## Apply to random leaf if the set is not empty.
var
trie = trie
i = rng.rand(max(1, countSetBits(trie.bitmap))-1)
while trie.bitmap != 0:
let next = trie.table[i]
case next.kind
of leafNode:
f(next.blob, next.size)
break
of coldNode:
2019-03-18 22:50:00 +01:00
trie.table[i] = waitFor store.load(next)
2019-03-17 22:53:50 +01:00
of hotNode:
trie = next
i = rng.rand(countSetBits(trie.bitmap)-1)
type MemberStream* = FutureStream[tuple[key: Key; id: BlobId; size: BiggestInt]]
proc newMemberStream*(): FutureStream[tuple[key: Key; id: BlobId; size: BiggestInt]] =
newFutureStream[tuple[key: Key; id: BlobId; size: BiggestInt]]()
proc streamMembers*(stream: FutureStream[tuple[key: Key; id: BlobId; size: BiggestInt]];
store: BlobStore; trie: BlobSet) {.async.} =
## Pass each set member to the specified future stream in random order.
2019-03-17 22:53:50 +01:00
var
path: array[maxDepth.int, tuple[mask: uint64, trie: BlobSet]]
level = 0
rng = initRand(rand(high int))
2019-03-17 22:53:50 +01:00
if trie.isCold:
2019-03-18 22:50:00 +01:00
path[0].trie = await store.load(trie)
2019-03-17 22:53:50 +01:00
else:
path[0].trie = trie
path[0].mask = not(0'u64) shr (64 - path[0].trie.table.len)
# set the bits of indexes to hit
while (not stream.finished) and (0 < level or path[0].mask != 0'u64):
2019-03-17 22:53:50 +01:00
if path[level].mask == 0'u64:
dec level
continue
let
i = rng.rand(path[level].trie.table.high)
bi = 1'u64 shl i
if (path[level].mask and bi) == 0'u64:
continue
path[level].mask = path[level].mask xor bi
var node = path[level].trie.table[i]
if node.kind == leafNode:
let val: tuple[key: Key; id: BlobId; size: BiggestInt] =
(node.key, node.blob, node.size)
await stream.write(val)
2019-03-17 22:53:50 +01:00
else:
if node.isCold:
2019-03-18 22:50:00 +01:00
node = await store.load(node)
2019-03-17 22:53:50 +01:00
inc level
path[level].mask = not (not(0'u64) shl node.table.len)
path[level].trie = node
complete stream
2019-03-17 22:53:50 +01:00
func nodeCount(bs: BlobSet): int =
2018-12-21 03:50:36 +01:00
## Count of internal nodes in set.
result = 1
for n in bs.table:
assert(n.kind != coldNode, "cannot count cold nodes")
if n.kind == hotNode:
result.inc n.nodeCount
2019-03-17 22:53:50 +01:00
func leafCount(bs: BlobSet): int =
2018-12-21 03:50:36 +01:00
## Count of leaves in set.
for n in bs.table:
assert(n.kind != coldNode, "cannot count leaves of cold nodes")
if n.kind == leafNode:
result.inc 1
else:
result.inc n.leafCount
2018-12-24 21:19:03 +01:00
func apply(bs: BlobSet; cb: proc (leaf: BlobSet)) =
## Apply a callback to each set element.
for node in bs.table:
if node.isNil:
raiseAssert(bs.table.repr)
case node.kind
of hotNode:
apply(node, cb)
of leafNode:
cb(node)
else:
raiseAssert("cannot apply to node type " & $node.kind)
2019-03-17 22:53:50 +01:00
proc apply*(store: BlobStore; trie: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =
2018-12-24 21:19:03 +01:00
## Apply a procedure to a named blob, if it is present
2019-02-15 21:56:21 +01:00
let key = name.toKey
2018-12-24 21:19:03 +01:00
var
2019-02-15 21:56:21 +01:00
n = trie
k = key
2019-03-17 22:02:39 +01:00
while k != Key(0) and n.masked(k):
2019-03-17 22:53:50 +01:00
let i = n.compactIndex(k)
if n.table[i].isCold:
2019-03-18 22:50:00 +01:00
n.table[i] = waitFor store.load(n.table[i])
2019-03-17 22:53:50 +01:00
n = n.table[i]
2019-02-15 21:56:21 +01:00
if n.kind == leafNode:
if n.key == key:
f(n.blob, n.size)
2018-12-24 21:19:03 +01:00
break
2019-02-15 21:56:21 +01:00
k = k shr keyChunkBits
2018-12-24 21:19:03 +01:00
2019-03-17 22:53:50 +01:00
proc contains*(store: BlobStore; bs: BlobSet; name: string): bool =
2018-12-24 21:19:03 +01:00
var found = false
2019-03-17 22:53:50 +01:00
apply(store, bs, name) do (id: BlobId; size: BiggestInt):
2018-12-24 21:19:03 +01:00
found = true
2019-03-17 22:53:50 +01:00
found
2018-12-24 21:19:03 +01:00
2019-03-18 22:50:00 +01:00
proc insert(store: BlobStore; trie, l: BlobSet; depth: int): Future[BlobSet] {.async.} =
2018-12-21 03:50:36 +01:00
## This procedure is recursive to a depth of keyBits/keyChunkBits.
doAssert(depth < (keyBits div keyChunkBits), "key space exhausted during insert")
2019-03-18 22:50:00 +01:00
var bs = BlobSet(kind: hotNode, bitmap: trie.bitmap, table: trie.table)
2018-12-21 03:50:36 +01:00
let key = l.key shr (depth * keyChunkBits)
2019-03-18 22:50:00 +01:00
if bs.masked(key):
2018-12-21 03:50:36 +01:00
let
depth = depth + 1
2019-03-18 22:50:00 +01:00
i = bs.compactIndex(key)
if bs.table[i].isCold:
bs.table[i] = await store.load(bs.table[i])
case bs.table[i].kind
2018-12-21 03:50:36 +01:00
of hotNode:
2019-03-18 22:50:00 +01:00
bs.table[i] = await insert(store, bs.table[i], l, depth)
2018-12-21 03:50:36 +01:00
of leafNode:
2019-03-18 22:50:00 +01:00
if bs.table[i].key == l.key:
bs.table[i] = l
2019-02-16 23:28:29 +01:00
else:
var subtrie = newBlobSet()
2019-03-18 22:50:00 +01:00
subtrie = await insert(store, subtrie, bs.table[i], depth)
subtrie = await insert(store, subtrie, l, depth)
bs.table[i] = subtrie
2019-03-17 22:53:50 +01:00
of coldNode:
discard
2018-12-21 03:50:36 +01:00
else:
2019-03-18 22:50:00 +01:00
bs.bitmap = bs.bitmap or key.mask
bs.table.insert(l, bs.compactIndex(key))
return bs
2018-12-21 03:50:36 +01:00
2019-03-18 22:50:00 +01:00
proc insert*(store: BlobStore; trie, node: BlobSet): Future[BlobSet] =
2018-12-24 21:19:03 +01:00
## Insert set node `node` into `trie`.
2019-03-17 22:53:50 +01:00
insert(store, trie, node, 0)
2018-12-23 08:23:21 +01:00
2019-03-18 22:50:00 +01:00
proc insert*(store: BlobStore; t: BlobSet; key: Key; blob: BlobId; size: BiggestInt): Future[BlobSet] =
2018-12-21 03:50:36 +01:00
## Insert a blob hash into a trie.
2019-03-17 22:02:39 +01:00
let leaf = BlobSet(kind: leafNode, key: key, blob: blob, size: size)
2019-03-17 22:53:50 +01:00
insert(store, t, leaf)
2018-12-21 03:50:36 +01:00
2019-03-18 22:50:00 +01:00
proc insert*(store: BlobStore; t: BlobSet; name: string; blob: BlobId; size: BiggestInt): Future[BlobSet] =
2019-03-17 22:53:50 +01:00
insert(store, t, name.toKey, blob, size)
2019-03-17 22:02:39 +01:00
2019-03-18 22:50:00 +01:00
proc remove(store: BlobStore; trie: BlobSet; fullKey: Key; depth: int): Future[BlobSet] {.async.} =
var res = trie
let key = fullKey shr (depth * keyChunkBits)
2019-03-18 22:50:00 +01:00
if res.masked(key):
2018-12-24 21:19:03 +01:00
let
depth = depth + 1
2019-03-18 22:50:00 +01:00
i = res.compactIndex(key)
if res.table[i].isCold:
res.table[i] = await store.load(res.table[i])
trie.table[i] = res.table[i]
case res.table[i].kind
2018-12-24 21:19:03 +01:00
of hotNode:
2019-03-18 22:50:00 +01:00
res.table[i] = await remove(store, res.table[i], fullKey, depth)
2018-12-24 21:19:03 +01:00
of leafNode:
2019-03-18 22:50:00 +01:00
if res.table.len == 2:
res.table.delete(i)
res = res.table[0]
else:
2019-03-18 22:50:00 +01:00
res.table.delete(i)
res.bitmap = res.bitmap xor key.mask
2019-03-17 22:53:50 +01:00
of coldNode:
discard # previously handled
2019-03-18 22:50:00 +01:00
return res
2018-12-24 21:19:03 +01:00
2019-03-18 22:50:00 +01:00
proc remove*(store: BlobStore; trie: BlobSet; key: Key): Future[BlobSet] =
2018-12-24 21:19:03 +01:00
## Remove a blob from a trie.
if trie.isEmpty:
2019-03-18 22:50:00 +01:00
result = newFuture[BlobSet]()
result.complete trie
2018-12-24 21:19:03 +01:00
else:
2019-03-17 22:53:50 +01:00
result = remove(store, trie, key, 0)
2018-12-24 21:19:03 +01:00
2019-03-18 22:50:00 +01:00
proc remove*(store: BlobStore; trie: BlobSet; name: string): Future[BlobSet] =
remove(store, trie, name.toKey)
2019-03-17 22:53:50 +01:00
proc union*(store: BlobStore; sets: varargs[BlobSet]): BlobSet =
## Return the union of `sets`.
# TODO: lazy-load set
var fresh = newBlobSet()
proc freshInsert(leaf: BlobSet) =
2019-03-18 22:50:00 +01:00
fresh = waitFor insert(store, fresh, leaf)
2019-03-17 22:53:50 +01:00
for bs in sets:
assert(not bs.isnil)
bs.apply(freshInsert)
result = fresh
2018-12-24 21:19:03 +01:00
func leafCount*(size: Natural): int = (size+blobLeafSize-1) div blobLeafSize
func compressTree*(leaves: var openArray[BlobId]) =
2018-12-27 01:32:59 +01:00
var
ctx: TigerState
len = leaves.len
while 1 < len:
2018-12-27 01:32:59 +01:00
var pos, next: int
while pos+1 < len:
init ctx
ctx.update [1'u8]
ctx.update leaves[pos+0].data
ctx.update leaves[pos+1].data
pos.inc 2
2018-12-27 01:32:59 +01:00
leaves[next] = ctx.finish()
inc next
if pos < len:
leaves[next] = leaves[pos]
inc next
len = next
2018-12-27 01:32:59 +01:00
func blobHash*(s: string): BlobId =
var
ctx: TigerState
leaves = newSeqOfCap[BlobId](leafCount s.len)
off: int
while true:
init ctx
ctx.update [0'u8]
let n = min(blobLeafSize, s.len - off)
if 0 < n:
ctx.update(unsafeAddr s[off], n)
off.inc n
leaves.add(finish ctx)
if off == s.len: break
compressTree(leaves)
leaves[0]
proc commit*(store: BlobStore; bs: BlobSet): Future[BlobSet] {.async.} =
if bs.isCold: return bs
let tmp = BlobSet(kind: hotNode, bitmap: bs.bitmap, table: bs.table)
for e in tmp.table.mitems:
2019-03-18 21:50:58 +01:00
if e.isHot:
let cold = await store.commit e
assert(not cold.isNil)
e = cold
var buf = encode tmp.toCbor
2019-03-18 21:50:58 +01:00
let
localId = blobHash(buf)
present = await store.contains(localId, metaBlob)
if present:
return BlobSet(kind: coldNode, setId: localId)
else:
let stream = store.openIngestStream(size=buf.len, kind=metaBlob)
await stream.ingest(buf)
let (storeId, _) = await finish(stream)
2019-03-18 21:50:58 +01:00
assert(localId == storeId)
return BlobSet(kind: coldNode, setId: storeId)
2018-12-27 01:32:59 +01:00
#
# Null Store implementation
#
type
NullIngestStream = ref NullIngestStreamObj
NullIngestStreamObj = object of IngestStreamObj
ctx: TigerState
2018-12-27 01:32:59 +01:00
leaves: seq[BlobId]
pos: BiggestInt
2018-12-27 01:32:59 +01:00
proc nullBlobClose(s: BlobStream) = discard
proc nullBlobSize(s: BlobStream): BiggestInt =
discard
2019-01-20 17:14:32 +01:00
proc setPosNull(s: BlobStream; pos: BiggestInt) = discard
proc getPosNull(s: BlobStream): BiggestInt = discard
2019-02-08 16:57:46 +01:00
proc nullBlobRead(s: BlobStream; buffer: pointer; len: Natural): Future[int] =
result = newFuture[int]()
complete result, 0
2018-12-27 01:32:59 +01:00
proc nullOpenBlobStream(s: BlobStore; id: BlobId; size: BiggestInt; kind: BlobKind): BlobStream =
2019-01-20 17:14:32 +01:00
BlobStream(
closeImpl: nullBlobClose,
sizeImpl: nullBlobSize,
2019-01-20 17:14:32 +01:00
setPosImpl: setPosNull,
getPosImpl: getPosNull,
readImpl: nullBlobRead)
2018-12-27 01:32:59 +01:00
2019-02-10 13:47:40 +01:00
proc nullFinish(s: IngestStream): Future[tuple[id: BlobId, size: BiggestInt]] =
2018-12-27 01:32:59 +01:00
var s = NullIngestStream(s)
if s.pos == 0 or s.pos mod blobLeafSize != 0:
s.leaves.add finish(s.ctx)
2018-12-27 01:32:59 +01:00
compressTree(s.leaves)
2019-02-10 13:47:40 +01:00
var pair: tuple[id: BlobId, size: BiggestInt]
pair.id = s.leaves[0]
pair.size = s.pos
result = newFuture[tuple[id: BlobId, size: BiggestInt]]()
complete result, pair
2018-12-27 01:32:59 +01:00
proc appendLeaf(s: NullIngestStream) =
s.leaves.add(finish s.ctx)
init s.ctx
s.ctx.update [0'u8]
proc nullIngest(s: IngestStream; data: pointer; size: Natural): Future[void] =
let
2018-12-27 01:32:59 +01:00
s = NullIngestStream(s)
buf = cast[ptr UncheckedArray[byte]](data)
var dataOff: int
let leafOff = s.pos.int mod blobLeafSize
if leafOff != 0:
let leafFill = min(blobLeafSize - leafOff, size)
s.ctx.update(buf[0].addr, leafFill)
dataOff.inc leafFill
if leafFill < size:
appendLeaf s
while dataOff+blobLeafSize <= size:
s.ctx.update(buf[dataOff].addr, blobLeafSize)
dataOff.inc blobLeafSize
appendLeaf s
if dataOff != size:
s.ctx.update(buf[dataOff].addr, size - dataOff)
s.pos.inc size
2019-02-08 16:57:46 +01:00
result = newFuture[void]()
complete result
2018-12-27 01:32:59 +01:00
proc nullOpenIngestStream(s: BlobStore; size: BiggestInt; kind: BlobKind): IngestStream =
let s = NullIngestStream(
2018-12-27 01:32:59 +01:00
finishImpl: nullFinish, ingestImpl: nullIngest, leaves: newSeq[BlobId]())
result = s
init s.ctx
s.ctx.update [0'u8]
2018-12-27 01:32:59 +01:00
proc newNullStore*(): BlobStore =
BlobStore(
openBlobStreamImpl: nullOpenBlobStream,
openIngestStreamImpl: nullOpenIngestStream)