blobsets/src/blobsets.nim

import std/hashes, std/streams, std/strutils, std/bitops, std/unicode, std/endians
import base58/bitcoin, cbor, siphash
import ./blobsets/priv/hex
import std/streams, std/strutils

import nimcrypto, nimcrypto/blake2

const
  digestLen* = 32
    ## Length of a chunk digest.
  cidSize* = digestLen
    ## Size of CID object in memory
  blobLeafSize* = 1 shl 14
    ## Size of blob leaves.
  blobLeafSizeMask* = not(not(0) shl 14)
  blobVisualLen* = 32 * 3

  maxChunkSize* {.deprecated} = blobLeafSize

type
  Blake2b256* = Blake2bContext[256]

  BlobId* = MDigest[Blake2b256.bits]
    ## Blob Identifier
  SetId* = MDigest[Blake2b256.bits]
    ## Set Identifier

  Cid* {.deprecated} = BlobId

func `$`*(bh: BlobId): string =
  ## Convert a blob hash to a visual representation.
  const baseRune = 0x2800
  result = newString(blobVisualLen)
  var pos = 0
  for b in bh.data.items:
    let r = (Rune)baseRune or b.int
    fastToUTF8Copy(r, result, pos, true)

func toBlobId*(s: string): BlobId =
  ## Parse a visual blob hash to binary.
  if s.len == blobVisualLen:
    var
      pos: int
      r: Rune
    for b in result.data.mitems:
      fastRuneAt(s, pos, r, true)
      b = r.byte

proc `==`*(x, y: BlobId): bool = x.data == y.data
  ## Compare two BlobIds.

proc `==`*(cbor: CborNode; cid: BlobId): bool =
  ## Compare a CBOR node with a BlobId.
  if cbor.kind == cborBytes:
    for i in 0..<digestLen:
      if cid.data[i] != cbor.bytes[i].uint8:
        return false
    result = true

proc hash*(cid: BlobId): Hash =
  ## Reduce a BlobId into an integer for use in tables.
  var zeroKey: Key
  result = cast[Hash](sipHash(cid.data, zeroKey))

proc toCbor*(cid: BlobId): CborNode = newCborBytes cid.data
  ## Generate a CBOR representation of a BlobId.

proc toBlobId*(cbor: CborNode): BlobId =
  ## Generate a CBOR representation of a BlobId.
  assert(cbor.bytes.len == digestLen)
  for i in 0..<digestLen:
    result.data[i] = cbor.bytes[i].uint8

{.deprecated: [newCborBytes: toCbor].}

proc toHex*(cid: BlobId): string = hex.encode(cid.data)
  ## Return BlobId encoded in hexidecimal.

proc writeUvarint*(s: Stream; n: SomeInteger) =
  ## Write an IPFS varint
  var n = n
  while true:
    let c = int8(n and 0x7f)
    n = n shr 7
    if n == 0:
      s.write((char)c.char)
      break
    else:
      s.write((char)c or 0x80)

proc readUvarint*(s: Stream): BiggestInt =
  ## Read an IPFS varint
  var shift: int
  while shift < (9*8):
    let c = (BiggestInt)s.readChar
    result = result or ((c and 0x7f) shl shift)
    if (c and 0x80) == 0:
      break
    shift.inc 7

proc toIpfs*(cid: BlobId): string =
  ## Return BlobId encoded in IPFS multimulti.
  const
    multiRaw = 0x55
    multiBlake2b_256 = 0xb220
  let s = newStringStream()
  s.writeUvarint 1
  s.writeUvarint multiRaw
  s.writeUvarint multi_blake2b_256
  s.writeUvarint digestLen
  for e in cid.data:
    s.write e
  s.setPosition 0
  result = 'z' & bitcoin.encode(s.readAll)
  close s

const
  zeroChunk* = "8ddb61928ec76e4ee904cd79ed977ab6f5d9187f1102975060a6ba6ce10e5481".toDigest
    ## BlobId of zero chunk of maximum size.

proc take*(cid: var BlobId; buf: var string) =
  ## Take a raw digest from a string buffer.
  doAssert(buf.len == digestLen)
  copyMem(cid.data[0].addr, buf[0].addr, digestLen)

proc dagHash*(buf: pointer; len: Natural): BlobId =
  ## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.
  assert(len <= maxChunkSize)
  var b: Blake2b256
  init(b)
  update(b, buf, len)
  b.finish()

proc dagHash*(data: string): BlobId =
  ## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.
  assert(data.len <= maxChunkSize)
  var b: Blake2b256
  init(b)
  update(b, data)
  b.finish()

proc verify*(cid: BlobId; data: string): bool =
  ## Verify that a string of data corresponds to a BlobId.
  var b: Blake2b256
  init(b)
  update(b, data)
  finish(b) == cid

iterator simpleChunks*(s: Stream; size = maxChunkSize): string =
  ## Iterator that breaks a stream into simple chunks.
  doAssert(size <= maxChunkSize)
  var tmp = newString(size)
  while not s.atEnd:
    tmp.setLen(size)
    tmp.setLen(s.readData(tmp[0].addr, size))
    yield tmp

func isNonZero*(bh: BlobId): bool =
  ## Test if a blob hash is not zeroed.
  var r: byte
  for b in bh.data.items:
    {.unroll.}
    r = r or b
  r != 0

{.deprecated: [isValid: isNonZero].}

type
  Key = int64

const
  keyBits = sizeof(Key) shl 3
  keyChunkBits = fastLog2 keyBits
  keyChunkMask = not ((not 0.Key) shl (keyChunkBits))

func toKey*(s: string): Key =
  var key: siphash.Key
  let b = sipHash(toOpenArrayByte(s, s.low, s.high), key)
  cast[Key](b)

func toCbor(k: Key): CborNode =
  ## Keys are endian independent.
  newCborBytes cast[array[sizeof(k), byte]](k)

type
  setKind* = enum hotNode, coldNode, leafNode
  BlobSet* = ref BlobSetObj
  BlobSetObj = object
    case kind*: setKind
    of hotNode:
      bitmap: Key
      table*: seq[BlobSet]
    of coldNode:
      setId*: SetId
    of leafNode:
      key: Key
      blob: BlobId
      size: BiggestInt

func newBlobSet*(): BlobSet =
  BlobSet(kind: hotNode, table: newSeqOfCap[BlobSet](2))

func sparseIndex(x: Key): int = int(x and keyChunkMask)

func compactIndex(t: BlobSet; x: Key): int =
  if (x and keyChunkMask) != 0:
    # TODO: bug in shr and shl, cannot shift all bits out
    result = (int)countSetBits(t.bitmap shl (keyBits - x.sparseIndex))

func masked(t: BlobSet; x: Key): bool =
  ((t.bitmap shr x.sparseIndex) and 1) != 0

func isEmpty*(s: BlobSet): bool = s.bitmap == Key(0)
  ## Test if a set is empty.

func nodeCount*(bs: BlobSet): int =
  ## Count of internal nodes in set.
  result = 1
  for n in bs.table:
    assert(n.kind != coldNode, "cannot count cold nodes")
    if n.kind == hotNode:
      result.inc n.nodeCount

func leafCount*(bs: BlobSet): int =
  ## Count of leaves in set.
  for n in bs.table:
    assert(n.kind != coldNode, "cannot count leaves of cold nodes")
    if n.kind == leafNode:
      result.inc 1
    else:
      result.inc n.leafCount

func search*(t: BlobSet; name: string): BlobId =
  var
    t = t
    key = name.toKey
  while true:
    assert(key != 0, "keyspace exhausted during search")
    if t.masked(key):
      t = t.table[t.compactIndex(key)]
      if t.kind == leafNode:
        result = t.blob
        break
      key = key shr keyChunkBits
    else:
      raise newException(KeyError, "blob set does not contain key")

func apply(bs: BlobSet; cb: proc (leaf: BlobSet)) =
  ## Apply a callback to each set element.
  for node in bs.table:
    if node.isNil:
      raiseAssert(bs.table.repr)
    case node.kind
    of hotNode:
      apply(node, cb)
    of leafNode:
      cb(node)
    else:
      raiseAssert("cannot apply to node type " & $node.kind)

func apply*(t: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =
  ## Apply a procedure to a named blob, if it is present
  var
    t = t
    key = name.toKey
  while true:
    assert(key != 0, "keyspace exhausted during search")
    if t.masked(key):
      t = t.table[t.compactIndex(key)]
      if t.kind == leafNode:
        f(t.blob, t.size)
        break
      key = key shr keyChunkBits
    else:
      break

func contains*(bs: BlobSet; name: string): bool =
  var found = false
  apply(bs, name) do (id: BlobId; size: BiggestInt):
    found = true
  result = found

func insert(trie, l: BlobSet; depth: int): BlobSet =
  ## This procedure is recursive to a depth of keyBits/keyChunkBits.
  # TODO: not functional?
  doAssert(depth < (keyBits div keyChunkBits), "key space exhausted during insert")
  result = BlobSet(kind: hotNode, bitmap: trie.bitmap, table: trie.table)
  let key = l.key shr (depth * keyChunkBits)
  if result.masked(key):
    let
      depth = depth + 1
      i = result.compactIndex(key)
    case result.table[i].kind
    of hotNode:
      result.table[i] = insert(result.table[i], l, depth)
    of coldNode:
      raiseAssert("cannot insert into cold node")
    of leafNode:
      if result.table[i].key == l.key:
        raise newException(KeyError, "key collision in blob set")
      var subtrie = newBlobSet()
      subtrie = subtrie.insert(result.table[i], depth)
      subtrie = subtrie.insert(l, depth)
      result.table[i] = subtrie
  else:
    result.bitmap = result.bitmap or (Key(1) shl key.sparseIndex)
    result.table.insert(l, result.compactIndex(key))

func insert*(trie, node: BlobSet): BlobSet = insert(trie, node, 0)
  ## Insert set node `node` into `trie`.

func insert*(t: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet =
  ## Insert a blob hash into a trie.
  # TODO: this is not functional!
  let leaf = BlobSet(kind: leafNode, key: name.toKey, blob: blob, size: size)
  insert(t, leaf)

func remove(trie: BlobSet; key: Key; depth: int): BlobSet =
  result = trie
  let key = key shr (depth * keyChunkBits)
  if trie.masked(key):
    let
      depth = depth + 1
      i = trie.compactIndex(key)
    case trie.table[i].kind
    of hotNode:
      let newTrie = remove(trie.table[i], key, depth)
      if newTrie != trie.table[i]:
        if newTrie.isNil:
          if trie.table.len == 1:
            result = nil
        else:
          result = newBlobSet()
          for j in trie.table.low..trie.table.high:
            if j == i: continue
            result = insert(result, newTrie, depth)
    of coldNode:
      raiseAssert("cannot remove from cold node")
    of leafNode:
      if trie.table.len == 1:
        result = nil

func remove*(trie: BlobSet; name: string): BlobSet =
  ## Remove a blob from a trie.
  if trie.isEmpty:
    result = trie
  else:
    let key = name.toKey
    result = remove(trie, key, 0)
    if result.isNil:
      result = newBlobSet()

func toCbor*(x: BlobSet): CborNode =
  const
    nodeTag = 0
    leafTag = 1
  let array = newCborArray()
  case x.kind
  of hotNode:
    var
      map = x.bitmap
      buf = newCborBytes(sizeof(Key))
    when not sizeof(Key) == 8:
      {.error: "unknown key conversion".}
    bigEndian64(buf.bytes[0].addr, map.addr)
    array.add buf
    for y in x.table:
      array.add y.toCbor
    newCborTag(nodeTag, array)
  of coldNode:
    array.add x.setId.data
    newCborTag(nodeTag, array)
  of leafNode:
    array.add x.key.toCbor
    array.add x.blob.data
    array.add x.size
    newCborTag(leafTag, array)

func leafCount*(size: Natural): int = (size+blobLeafSize-1) div blobLeafSize

type
  BlobKind* = enum
    dataBlob, metaBlob

  BlobStream* = ref BlobStreamObj
  BlobStreamObj* = object of RootObj
    closeImpl*: proc (s: BlobStream) {.nimcall, gcsafe.}
    readImpl*: proc (s: BlobStream; buffer: pointer; bufLen: int): int {.nimcall, gcsafe.}
  IngestStream* = ref IngestStreamObj
  IngestStreamObj* = object of RootObj
    finishImpl*: proc (s: IngestStream): tuple[id: BlobId, size: BiggestInt] {.nimcall, gcsafe.}
    ingestImpl*: proc (s: IngestStream; buf: pointer; size: int) {.nimcall, gcsafe.}

proc close*(s: BlobStream) =
  assert(not s.closeImpl.isNil)
  s.closeImpl(s)

proc read*(s: BlobStream; buf: pointer; len: Natural): int =
  assert(not s.readImpl.isNil)
  result = s.readImpl(s, buf, len)

proc finish*(s: IngestStream): tuple[id: BlobId, size: BiggestInt] =
  ## Finish ingest stream
  assert(not s.finishImpl.isNil)
  s.finishImpl(s)

proc ingest*(s: IngestStream; buf: pointer; size: Natural) =
  ## Ingest stream
  assert(not s.ingestImpl.isNil)
  s.ingestImpl(s, buf, size)

proc ingest*(s: IngestStream; buf: var string) =
  ## Ingest stream
  assert(not s.ingestImpl.isNil)
  s.ingestImpl(s, buf[0].addr, buf.len)

type
  BlobStore* = ref BlobStoreObj
  BlobStoreObj* = object of RootObj
    closeImpl*: proc (s: BlobStore) {.nimcall, gcsafe.}
    openBlobStreamImpl*: proc (s: BlobStore; id: BlobId; size: BiggestInt; kind: BlobKind): BlobStream {.nimcall, gcsafe.}
    openIngestStreamImpl*: proc (s: BlobStore; size: BiggestInt; kind: BlobKind): IngestStream {.nimcall, gcsafe.}

proc close*(s: BlobStore) =
  ## Close active store resources.
  if not s.closeImpl.isNil: s.closeImpl(s)

proc openBlobStream*(s: BlobStore; id: BlobId; size = 0.BiggestInt; kind = dataBlob): BlobStream =
  ## Return a new `BlobStream` for reading a blob.
  assert(not s.openBlobStreamImpl.isNil)
  s.openBlobStreamImpl(s, id, size, kind)

proc openIngestStream*(s: BlobStore; size = 0.BiggestInt; kind = dataBlob): IngestStream =
  ## Return a new `IngestStream` for ingesting a blob.
  assert(not s.openIngestStreamImpl.isNil)
  s.openIngestStreamImpl(s, size, kind)

func compressTree*(leaves: var seq[BlobId]) =
  var
    ctx: Blake2b256
    nodeOffset = 0
    nodeDepth = 0
  while leaves.len > 1:
    nodeOffset = 0
    inc nodeDepth
    var pos, next: int
    while pos < leaves.len:
      ctx.init do (params: var Blake2bParams):
        params.fanout = 2
        params.depth = 255
        params.leafLength = blobLeafSize
        params.nodeOffset = nodeOffset
        params.nodeDepth = nodeDepth
      inc nodeOffset
      ctx.update(leaves[pos].data)
      inc pos
      if pos < leaves.len:
        ctx.update(leaves[pos].data)
        inc pos
      leaves[next] = ctx.finish()
      inc next
    leaves.setLen(next)
  # TODO: BLAKE2 tree finalization flags

iterator dumpBlob*(store: BlobStore; id: BlobId): string =
  var
    stream = store.openBlobStream(id, kind=dataBlob)
    buf = newString(blobLeafSize)
  defer:
    close stream
  while true:
    buf.setLen(blobLeafSize)
    let n = stream.read(buf[0].addr, buf.len)
    if n == 0:
      break
    buf.setLen(n)
    yield buf

proc commit*(store: BlobStore; bs: BlobSet): BlobSet =
  assert(bs.kind == hotNode)
  for e in bs.table.mitems:
    case e.kind
    of coldNode, leafNode: discard
    of hotNode:
      e = store.commit e
  let stream = store.openIngestStream(kind=metaBlob)
  var buf = encode bs.toCbor
  stream.ingest(buf)
  let (id, _) = finish stream
  result = BlobSet(kind: coldNode, setId: id)

proc apply*(store: BlobStore; bs: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =
  # TODO: lazy-load set
  bs.apply(name, f)

proc insert*(store: BlobStore; bs: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet =
  # TODO: lazy-load set
  insert(bs, name, blob, size)

proc remove*(store: BlobStore; bs: BlobSet; name: string): BlobSet =
  # TODO: lazy-load set
  remove(bs, name)

proc union*(store: BlobStore; sets: varargs[BlobSet]): BlobSet =
  ## Return the union of `sets`.
  # TODO: lazy-load set
  var fresh = newBlobSet()
  proc freshInsert(leaf: BlobSet) =
    fresh = insert(fresh, leaf)
  for bs in sets:
    assert(not bs.isnil)
    bs.apply(freshInsert)
  result = fresh

# Store implementations
#

type
  NullIngestStream = ref NullIngestStreamObj
  NullIngestStreamObj = object of IngestStreamObj
    ctx: Blake2b256
    leaves: seq[BlobId]
    pos, nodeOffset: BiggestInt

proc nullBlobClose(s: BlobStream) = discard

proc nullBlobRead(s: BlobStream; buffer: pointer; len: Natural): int = 0

proc nullOpenBlobStream(s: BlobStore; id: BlobId; size: BiggestInt; kind: BlobKind): BlobStream =
  BlobStream(closeImpl: nullBlobClose, readImpl: nullBlobRead)

proc nullFinish(s: IngestStream): tuple[id: BlobId, size: BiggestInt] =
  var s = NullIngestStream(s)
  s.leaves.add finish(s.ctx)
  compressTree(s.leaves)
  result.id = s.leaves[0]
  result.size = s.pos

proc nullIngest(s: IngestStream; buf: pointer; len: Natural) =
  var
    s = NullIngestStream(s)
    off = 0
    buf = cast[ptr array[blobLeafSize, byte]](buf)
  while off < len:
    var n = min(blobLeafSize, len-off)
    let leafOff = int(s.pos and blobLeafSizeMask)
    if leafOff == 0:
      if s.pos > 0:
        s.leaves.add finish(s.ctx)
      s.ctx.init do (params: var Blake2bParams):
        params.fanout = 2
        params.depth = 255
        params.leafLength = blobLeafSize
        params.nodeOffset = s.nodeOffset
        inc s.nodeOffset
    else:
      n = min(n, blobLeafSize-leafOff)
    s.ctx.update(buf[off].addr, n)
    off.inc n
    s.pos.inc n

proc nullOpenIngestStream(s: BlobStore; size: BiggestInt; kind: BlobKind): IngestStream =
  NullIngestStream(
    finishImpl: nullFinish, ingestImpl: nullIngest, leaves: newSeq[BlobId]())

proc newNullStore*(): BlobStore =
  BlobStore(
    openBlobStreamImpl: nullOpenBlobStream,
    openIngestStreamImpl: nullOpenIngestStream)
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`import std/hashes, std/streams, std/strutils, std/bitops, std/unicode, std/endians`
			`import base58/bitcoin, cbor, siphash`
			`import ./blobsets/priv/hex`
Functional inserts 2018-12-24 21:19:03 +01:00			`import std/streams, std/strutils`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00
			`import nimcrypto, nimcrypto/blake2`

			`const`
			`digestLen* = 32`
			`## Length of a chunk digest.`
			`cidSize* = digestLen`
			`## Size of CID object in memory`
			`blobLeafSize* = 1 shl 14`
			`## Size of blob leaves.`
			`blobLeafSizeMask* = not(not(0) shl 14)`
Recursive REPL ingest 2018-12-23 03:23:10 +01:00			`blobVisualLen* = 32 * 3`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00
			`maxChunkSize* {.deprecated} = blobLeafSize`

			`type`
			`Blake2b256* = Blake2bContext[256]`

			`BlobId* = MDigest[Blake2b256.bits]`
			`## Blob Identifier`
			`SetId* = MDigest[Blake2b256.bits]`
			`## Set Identifier`

			`Cid* {.deprecated} = BlobId`

			func `$`*(bh: BlobId): string =
			`## Convert a blob hash to a visual representation.`
			`const baseRune = 0x2800`
Recursive REPL ingest 2018-12-23 03:23:10 +01:00			`result = newString(blobVisualLen)`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`var pos = 0`
			`for b in bh.data.items:`
			`let r = (Rune)baseRune or b.int`
			`fastToUTF8Copy(r, result, pos, true)`

			`func toBlobId*(s: string): BlobId =`
			`## Parse a visual blob hash to binary.`
Recursive REPL ingest 2018-12-23 03:23:10 +01:00			`if s.len == blobVisualLen:`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`var`
			`pos: int`
			`r: Rune`
			`for b in result.data.mitems:`
			`fastRuneAt(s, pos, r, true)`
			`b = r.byte`

			proc `==`*(x, y: BlobId): bool = x.data == y.data
			`## Compare two BlobIds.`

			proc `==`*(cbor: CborNode; cid: BlobId): bool =
			`## Compare a CBOR node with a BlobId.`
			`if cbor.kind == cborBytes:`
			`for i in 0..<digestLen:`
			`if cid.data[i] != cbor.bytes[i].uint8:`
			`return false`
			`result = true`

			`proc hash*(cid: BlobId): Hash =`
			`## Reduce a BlobId into an integer for use in tables.`
			`var zeroKey: Key`
			`result = cast[Hash](sipHash(cid.data, zeroKey))`

			`proc toCbor*(cid: BlobId): CborNode = newCborBytes cid.data`
			`## Generate a CBOR representation of a BlobId.`

			`proc toBlobId*(cbor: CborNode): BlobId =`
			`## Generate a CBOR representation of a BlobId.`
			`assert(cbor.bytes.len == digestLen)`
			`for i in 0..<digestLen:`
			`result.data[i] = cbor.bytes[i].uint8`

			`{.deprecated: [newCborBytes: toCbor].}`

			`proc toHex*(cid: BlobId): string = hex.encode(cid.data)`
			`## Return BlobId encoded in hexidecimal.`

			`proc writeUvarint*(s: Stream; n: SomeInteger) =`
			`## Write an IPFS varint`
			`var n = n`
			`while true:`
			`let c = int8(n and 0x7f)`
			`n = n shr 7`
			`if n == 0:`
			`s.write((char)c.char)`
			`break`
			`else:`
			`s.write((char)c or 0x80)`

			`proc readUvarint*(s: Stream): BiggestInt =`
			`## Read an IPFS varint`
			`var shift: int`
			`while shift < (9*8):`
			`let c = (BiggestInt)s.readChar`
			`result = result or ((c and 0x7f) shl shift)`
			`if (c and 0x80) == 0:`
			`break`
			`shift.inc 7`

			`proc toIpfs*(cid: BlobId): string =`
			`## Return BlobId encoded in IPFS multimulti.`
			`const`
			`multiRaw = 0x55`
			`multiBlake2b_256 = 0xb220`
			`let s = newStringStream()`
			`s.writeUvarint 1`
			`s.writeUvarint multiRaw`
			`s.writeUvarint multi_blake2b_256`
			`s.writeUvarint digestLen`
			`for e in cid.data:`
			`s.write e`
			`s.setPosition 0`
			`result = 'z' & bitcoin.encode(s.readAll)`
			`close s`

			`const`
			`zeroChunk* = "8ddb61928ec76e4ee904cd79ed977ab6f5d9187f1102975060a6ba6ce10e5481".toDigest`
			`## BlobId of zero chunk of maximum size.`

			`proc take*(cid: var BlobId; buf: var string) =`
			`## Take a raw digest from a string buffer.`
			`doAssert(buf.len == digestLen)`
			`copyMem(cid.data[0].addr, buf[0].addr, digestLen)`

			`proc dagHash*(buf: pointer; len: Natural): BlobId =`
			`## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.`
			`assert(len <= maxChunkSize)`
			`var b: Blake2b256`
			`init(b)`
			`update(b, buf, len)`
			`b.finish()`

			`proc dagHash*(data: string): BlobId =`
			`## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.`
			`assert(data.len <= maxChunkSize)`
			`var b: Blake2b256`
			`init(b)`
			`update(b, data)`
			`b.finish()`

			`proc verify*(cid: BlobId; data: string): bool =`
			`## Verify that a string of data corresponds to a BlobId.`
			`var b: Blake2b256`
			`init(b)`
			`update(b, data)`
			`finish(b) == cid`

			`iterator simpleChunks*(s: Stream; size = maxChunkSize): string =`
			`## Iterator that breaks a stream into simple chunks.`
			`doAssert(size <= maxChunkSize)`
			`var tmp = newString(size)`
			`while not s.atEnd:`
			`tmp.setLen(size)`
			`tmp.setLen(s.readData(tmp[0].addr, size))`
			`yield tmp`

			`func isNonZero*(bh: BlobId): bool =`
			`## Test if a blob hash is not zeroed.`
			`var r: byte`
			`for b in bh.data.items:`
			`{.unroll.}`
			`r = r or b`
			`r != 0`

			`{.deprecated: [isValid: isNonZero].}`

			`type`
			`Key = int64`

			`const`
			`keyBits = sizeof(Key) shl 3`
			`keyChunkBits = fastLog2 keyBits`
			`keyChunkMask = not ((not 0.Key) shl (keyChunkBits))`

Recursive REPL ingest 2018-12-23 03:23:10 +01:00			`func toKey*(s: string): Key =`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`var key: siphash.Key`
			`let b = sipHash(toOpenArrayByte(s, s.low, s.high), key)`
			`cast[Key](b)`

			`func toCbor(k: Key): CborNode =`
			`## Keys are endian independent.`
			`newCborBytes cast[array[sizeof(k), byte]](k)`

			`type`
			`setKind* = enum hotNode, coldNode, leafNode`
			`BlobSet* = ref BlobSetObj`
			`BlobSetObj = object`
			`case kind*: setKind`
			`of hotNode:`
			`bitmap: Key`
			`table*: seq[BlobSet]`
			`of coldNode:`
			`setId*: SetId`
			`of leafNode:`
			`key: Key`
			`blob: BlobId`
			`size: BiggestInt`

			`func newBlobSet*(): BlobSet =`
			`BlobSet(kind: hotNode, table: newSeqOfCap[BlobSet](2))`

			`func sparseIndex(x: Key): int = int(x and keyChunkMask)`

			`func compactIndex(t: BlobSet; x: Key): int =`
			`if (x and keyChunkMask) != 0:`
			`# TODO: bug in shr and shl, cannot shift all bits out`
			`result = (int)countSetBits(t.bitmap shl (keyBits - x.sparseIndex))`

			`func masked(t: BlobSet; x: Key): bool =`
			`((t.bitmap shr x.sparseIndex) and 1) != 0`

union works (really a merge) 2018-12-23 08:23:21 +01:00			`func isEmpty*(s: BlobSet): bool = s.bitmap == Key(0)`
			`## Test if a set is empty.`

Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`func nodeCount*(bs: BlobSet): int =`
			`## Count of internal nodes in set.`
			`result = 1`
			`for n in bs.table:`
			`assert(n.kind != coldNode, "cannot count cold nodes")`
			`if n.kind == hotNode:`
			`result.inc n.nodeCount`

			`func leafCount*(bs: BlobSet): int =`
			`## Count of leaves in set.`
			`for n in bs.table:`
			`assert(n.kind != coldNode, "cannot count leaves of cold nodes")`
			`if n.kind == leafNode:`
			`result.inc 1`
			`else:`
			`result.inc n.leafCount`

			`func search*(t: BlobSet; name: string): BlobId =`
			`var`
			`t = t`
			`key = name.toKey`
			`while true:`
			`assert(key != 0, "keyspace exhausted during search")`
			`if t.masked(key):`
			`t = t.table[t.compactIndex(key)]`
			`if t.kind == leafNode:`
			`result = t.blob`
			`break`
			`key = key shr keyChunkBits`
			`else:`
			`raise newException(KeyError, "blob set does not contain key")`

Functional inserts 2018-12-24 21:19:03 +01:00			`func apply(bs: BlobSet; cb: proc (leaf: BlobSet)) =`
			`## Apply a callback to each set element.`
			`for node in bs.table:`
			`if node.isNil:`
			`raiseAssert(bs.table.repr)`
			`case node.kind`
			`of hotNode:`
			`apply(node, cb)`
			`of leafNode:`
			`cb(node)`
			`else:`
			`raiseAssert("cannot apply to node type " & $node.kind)`

			`func apply*(t: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =`
			`## Apply a procedure to a named blob, if it is present`
			`var`
			`t = t`
			`key = name.toKey`
			`while true:`
			`assert(key != 0, "keyspace exhausted during search")`
			`if t.masked(key):`
			`t = t.table[t.compactIndex(key)]`
			`if t.kind == leafNode:`
			`f(t.blob, t.size)`
			`break`
			`key = key shr keyChunkBits`
			`else:`
			`break`

			`func contains*(bs: BlobSet; name: string): bool =`
			`var found = false`
			`apply(bs, name) do (id: BlobId; size: BiggestInt):`
			`found = true`
			`result = found`

			`func insert(trie, l: BlobSet; depth: int): BlobSet =`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`## This procedure is recursive to a depth of keyBits/keyChunkBits.`
Functional inserts 2018-12-24 21:19:03 +01:00			`# TODO: not functional?`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`doAssert(depth < (keyBits div keyChunkBits), "key space exhausted during insert")`
Functional inserts 2018-12-24 21:19:03 +01:00			`result = BlobSet(kind: hotNode, bitmap: trie.bitmap, table: trie.table)`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`let key = l.key shr (depth * keyChunkBits)`
Functional inserts 2018-12-24 21:19:03 +01:00			`if result.masked(key):`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`let`
			`depth = depth + 1`
Functional inserts 2018-12-24 21:19:03 +01:00			`i = result.compactIndex(key)`
			`case result.table[i].kind`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`of hotNode:`
Functional inserts 2018-12-24 21:19:03 +01:00			`result.table[i] = insert(result.table[i], l, depth)`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`of coldNode:`
			`raiseAssert("cannot insert into cold node")`
			`of leafNode:`
Functional inserts 2018-12-24 21:19:03 +01:00			`if result.table[i].key == l.key:`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`raise newException(KeyError, "key collision in blob set")`
Functional inserts 2018-12-24 21:19:03 +01:00			`var subtrie = newBlobSet()`
			`subtrie = subtrie.insert(result.table[i], depth)`
			`subtrie = subtrie.insert(l, depth)`
			`result.table[i] = subtrie`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`else:`
Functional inserts 2018-12-24 21:19:03 +01:00			`result.bitmap = result.bitmap or (Key(1) shl key.sparseIndex)`
			`result.table.insert(l, result.compactIndex(key))`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00
Functional inserts 2018-12-24 21:19:03 +01:00			`func insert*(trie, node: BlobSet): BlobSet = insert(trie, node, 0)`
			## Insert set node `node` into `trie`.
union works (really a merge) 2018-12-23 08:23:21 +01:00
Functional inserts 2018-12-24 21:19:03 +01:00			`func insert*(t: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet =`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`## Insert a blob hash into a trie.`
Functional inserts 2018-12-24 21:19:03 +01:00			`# TODO: this is not functional!`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`let leaf = BlobSet(kind: leafNode, key: name.toKey, blob: blob, size: size)`
union works (really a merge) 2018-12-23 08:23:21 +01:00			`insert(t, leaf)`
Ingest and dump blobs 2018-12-21 03:50:36 +01:00
Functional inserts 2018-12-24 21:19:03 +01:00			`func remove(trie: BlobSet; key: Key; depth: int): BlobSet =`
			`result = trie`
			`let key = key shr (depth * keyChunkBits)`
			`if trie.masked(key):`
			`let`
			`depth = depth + 1`
			`i = trie.compactIndex(key)`
			`case trie.table[i].kind`
			`of hotNode:`
			`let newTrie = remove(trie.table[i], key, depth)`
			`if newTrie != trie.table[i]:`
			`if newTrie.isNil:`
			`if trie.table.len == 1:`
			`result = nil`
			`else:`
			`result = newBlobSet()`
			`for j in trie.table.low..trie.table.high:`
			`if j == i: continue`
			`result = insert(result, newTrie, depth)`
			`of coldNode:`
			`raiseAssert("cannot remove from cold node")`
			`of leafNode:`
			`if trie.table.len == 1:`
			`result = nil`

			`func remove*(trie: BlobSet; name: string): BlobSet =`
			`## Remove a blob from a trie.`
			`if trie.isEmpty:`
			`result = trie`
			`else:`
			`let key = name.toKey`
			`result = remove(trie, key, 0)`
			`if result.isNil:`
			`result = newBlobSet()`

Ingest and dump blobs 2018-12-21 03:50:36 +01:00			`func toCbor*(x: BlobSet): CborNode =`
			`const`
			`nodeTag = 0`
			`leafTag = 1`
			`let array = newCborArray()`
			`case x.kind`
			`of hotNode:`
			`var`
			`map = x.bitmap`
			`buf = newCborBytes(sizeof(Key))`
			`when not sizeof(Key) == 8:`
			`{.error: "unknown key conversion".}`
			`bigEndian64(buf.bytes[0].addr, map.addr)`
			`array.add buf`
			`for y in x.table:`
			`array.add y.toCbor`
			`newCborTag(nodeTag, array)`
			`of coldNode:`
			`array.add x.setId.data`
			`newCborTag(nodeTag, array)`
			`of leafNode:`
			`array.add x.key.toCbor`
			`array.add x.blob.data`
			`array.add x.size`
			`newCborTag(leafTag, array)`
Functional inserts 2018-12-24 21:19:03 +01:00
			`func leafCount*(size: Natural): int = (size+blobLeafSize-1) div blobLeafSize`

			`type`
			`BlobKind* = enum`
			`dataBlob, metaBlob`

			`BlobStream* = ref BlobStreamObj`
			`BlobStreamObj* = object of RootObj`
			`closeImpl*: proc (s: BlobStream) {.nimcall, gcsafe.}`
			`readImpl*: proc (s: BlobStream; buffer: pointer; bufLen: int): int {.nimcall, gcsafe.}`
			`IngestStream* = ref IngestStreamObj`
			`IngestStreamObj* = object of RootObj`
			`finishImpl*: proc (s: IngestStream): tuple[id: BlobId, size: BiggestInt] {.nimcall, gcsafe.}`
			`ingestImpl*: proc (s: IngestStream; buf: pointer; size: int) {.nimcall, gcsafe.}`

			`proc close*(s: BlobStream) =`
			`assert(not s.closeImpl.isNil)`
			`s.closeImpl(s)`

			`proc read*(s: BlobStream; buf: pointer; len: Natural): int =`
			`assert(not s.readImpl.isNil)`
			`result = s.readImpl(s, buf, len)`

			`proc finish*(s: IngestStream): tuple[id: BlobId, size: BiggestInt] =`
			`## Finish ingest stream`
			`assert(not s.finishImpl.isNil)`
			`s.finishImpl(s)`

			`proc ingest*(s: IngestStream; buf: pointer; size: Natural) =`
			`## Ingest stream`
			`assert(not s.ingestImpl.isNil)`
			`s.ingestImpl(s, buf, size)`

			`proc ingest*(s: IngestStream; buf: var string) =`
			`## Ingest stream`
			`assert(not s.ingestImpl.isNil)`
			`s.ingestImpl(s, buf[0].addr, buf.len)`

			`type`
			`BlobStore* = ref BlobStoreObj`
			`BlobStoreObj* = object of RootObj`
			`closeImpl*: proc (s: BlobStore) {.nimcall, gcsafe.}`
			`openBlobStreamImpl*: proc (s: BlobStore; id: BlobId; size: BiggestInt; kind: BlobKind): BlobStream {.nimcall, gcsafe.}`
			`openIngestStreamImpl*: proc (s: BlobStore; size: BiggestInt; kind: BlobKind): IngestStream {.nimcall, gcsafe.}`

			`proc close*(s: BlobStore) =`
			`## Close active store resources.`
			`if not s.closeImpl.isNil: s.closeImpl(s)`

			`proc openBlobStream*(s: BlobStore; id: BlobId; size = 0.BiggestInt; kind = dataBlob): BlobStream =`
			## Return a new `BlobStream` for reading a blob.
			`assert(not s.openBlobStreamImpl.isNil)`
			`s.openBlobStreamImpl(s, id, size, kind)`

			`proc openIngestStream*(s: BlobStore; size = 0.BiggestInt; kind = dataBlob): IngestStream =`
			## Return a new `IngestStream` for ingesting a blob.
			`assert(not s.openIngestStreamImpl.isNil)`
			`s.openIngestStreamImpl(s, size, kind)`

			`func compressTree*(leaves: var seq[BlobId]) =`
			`var`
			`ctx: Blake2b256`
			`nodeOffset = 0`
			`nodeDepth = 0`
			`while leaves.len > 1:`
			`nodeOffset = 0`
			`inc nodeDepth`
			`var pos, next: int`
			`while pos < leaves.len:`
			`ctx.init do (params: var Blake2bParams):`
			`params.fanout = 2`
			`params.depth = 255`
			`params.leafLength = blobLeafSize`
			`params.nodeOffset = nodeOffset`
			`params.nodeDepth = nodeDepth`
			`inc nodeOffset`
			`ctx.update(leaves[pos].data)`
			`inc pos`
			`if pos < leaves.len:`
			`ctx.update(leaves[pos].data)`
			`inc pos`
			`leaves[next] = ctx.finish()`
			`inc next`
			`leaves.setLen(next)`
			`# TODO: BLAKE2 tree finalization flags`

			`iterator dumpBlob*(store: BlobStore; id: BlobId): string =`
			`var`
			`stream = store.openBlobStream(id, kind=dataBlob)`
			`buf = newString(blobLeafSize)`
			`defer:`
			`close stream`
			`while true:`
			`buf.setLen(blobLeafSize)`
			`let n = stream.read(buf[0].addr, buf.len)`
			`if n == 0:`
			`break`
			`buf.setLen(n)`
			`yield buf`

			`proc commit*(store: BlobStore; bs: BlobSet): BlobSet =`
			`assert(bs.kind == hotNode)`
			`for e in bs.table.mitems:`
			`case e.kind`
			`of coldNode, leafNode: discard`
			`of hotNode:`
			`e = store.commit e`
			`let stream = store.openIngestStream(kind=metaBlob)`
			`var buf = encode bs.toCbor`
			`stream.ingest(buf)`
			`let (id, _) = finish stream`
			`result = BlobSet(kind: coldNode, setId: id)`

			`proc apply*(store: BlobStore; bs: BlobSet; name: string; f: proc (id: BlobId; size: BiggestInt)) =`
			`# TODO: lazy-load set`
			`bs.apply(name, f)`

			`proc insert*(store: BlobStore; bs: BlobSet; name: string; blob: BlobId; size: BiggestInt): BlobSet =`
			`# TODO: lazy-load set`
			`insert(bs, name, blob, size)`

			`proc remove*(store: BlobStore; bs: BlobSet; name: string): BlobSet =`
			`# TODO: lazy-load set`
			`remove(bs, name)`

			`proc union*(store: BlobStore; sets: varargs[BlobSet]): BlobSet =`
			## Return the union of `sets`.
			`# TODO: lazy-load set`
			`var fresh = newBlobSet()`
			`proc freshInsert(leaf: BlobSet) =`
			`fresh = insert(fresh, leaf)`
			`for bs in sets:`
			`assert(not bs.isnil)`
			`bs.apply(freshInsert)`
			`result = fresh`

			`# Store implementations`
			`#`

			`type`
			`NullIngestStream = ref NullIngestStreamObj`
			`NullIngestStreamObj = object of IngestStreamObj`
			`ctx: Blake2b256`
			`leaves: seq[BlobId]`
			`pos, nodeOffset: BiggestInt`

			`proc nullBlobClose(s: BlobStream) = discard`

			`proc nullBlobRead(s: BlobStream; buffer: pointer; len: Natural): int = 0`

			`proc nullOpenBlobStream(s: BlobStore; id: BlobId; size: BiggestInt; kind: BlobKind): BlobStream =`
			`BlobStream(closeImpl: nullBlobClose, readImpl: nullBlobRead)`

			`proc nullFinish(s: IngestStream): tuple[id: BlobId, size: BiggestInt] =`
			`var s = NullIngestStream(s)`
			`s.leaves.add finish(s.ctx)`
			`compressTree(s.leaves)`
			`result.id = s.leaves[0]`
			`result.size = s.pos`

			`proc nullIngest(s: IngestStream; buf: pointer; len: Natural) =`
			`var`
			`s = NullIngestStream(s)`
			`off = 0`
			`buf = cast[ptr array[blobLeafSize, byte]](buf)`
			`while off < len:`
			`var n = min(blobLeafSize, len-off)`
			`let leafOff = int(s.pos and blobLeafSizeMask)`
			`if leafOff == 0:`
			`if s.pos > 0:`
			`s.leaves.add finish(s.ctx)`
			`s.ctx.init do (params: var Blake2bParams):`
			`params.fanout = 2`
			`params.depth = 255`
			`params.leafLength = blobLeafSize`
			`params.nodeOffset = s.nodeOffset`
			`inc s.nodeOffset`
			`else:`
			`n = min(n, blobLeafSize-leafOff)`
			`s.ctx.update(buf[off].addr, n)`
			`off.inc n`
			`s.pos.inc n`

			`proc nullOpenIngestStream(s: BlobStore; size: BiggestInt; kind: BlobKind): IngestStream =`
			`NullIngestStream(`
			`finishImpl: nullFinish, ingestImpl: nullIngest, leaves: newSeq[BlobId]())`

			`proc newNullStore*(): BlobStore =`
			`BlobStore(`
			`openBlobStreamImpl: nullOpenBlobStream,`
			`openIngestStreamImpl: nullOpenIngestStream)`