Ingest and dump blobs

2018-12-21 03:50:36 +01:00 · 2018-12-21 03:50:36 +01:00 · 1aba9dcd42
parent c6fcfecd1d
commit 1aba9dcd42
14 changed files with 949 additions and 473 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,12 +1,2 @@
 nimcache
-ipldrepl
-/dagfs_repl
-/genode/dagfs_genode/dagfs_fs
-/genode/dagfs_genode/dagfs_fs_store
-/genode/dagfs_genode/dagfs_rom
-/genode/dagfs_genode/dagfs_tcp_store
-/genode/dagfs_genode/bin/dagfs_fs
-/genode/dagfs_genode/bin/dagfs_fs_store
-/genode/dagfs_genode/bin/dagfs_rom
-/genode/dagfs_genode/bin/dagfs_server
-/genode/dagfs_genode/bin/dagfs_tcp_store
+blobset
--- a/blobsets.nimble
+++ b/blobsets.nimble
@ -0,0 +1,12 @@
+# Package
+
+version       = "0.1.2"
+author        = "Emery Hemingway"
+description   = "Sets of named blobs"
+license       = "AGPLv3"
+srcDir        = "src"
+
+requires "nim >= 0.18.0", "base58", "cbor >= 0.5.1", "siphash", "nimcrypto"
+
+bin = @["blobset"]
+skipFiles = @["blobset.nim"]
--- a/dagfs.nimble
+++ b/dagfs.nimble
@ -1,12 +0,0 @@
-# Package
-
-version       = "0.1.2"
-author        = "Emery Hemingway"
-description   = "A simple content addressed file-system"
-license       = "GPLv3"
-srcDir        = "src"
-
-requires "nim >= 0.18.0", "base58", "cbor >= 0.5.1", "siphash"
-
-bin = @["dagfs_repl"]
-skipFiles = @["dagfs_repl.nim"]
--- a/src/dagfs_repl.nim
+++ b/src/dagfs_repl.nim
@ -1,17 +1,25 @@
-import nre, os, strutils, tables, parseopt, streams, cbor
+when not isMainModule:
+  {.error: "this module is not a library, import blobsets instead".}

-import ./dagfs, ./dagfs/stores, ./dagfs/fsnodes
+import std/nre, std/os, std/strutils, std/tables, std/parseopt, std/streams, std/rdstdin
+import cbor
+import ./blobsets, ./blobsets/stores, ./blobsets/fsnodes
+
+when defined(genode):
+    import dagfsclient
+else:
+  import ./blobsets/tcp

 type
  EvalError = object of CatchableError

-type
  Env = ref EnvObj

  AtomKind = enum
    atomPath
    atomCid
    atomString
+    atomNum
    atomSymbol
    atomError

@ -24,6 +32,8 @@ type
      cid: Cid
    of atomString:
      str: string
+    of atomNum:
+      num: BiggestInt
    of atomSymbol:
      sym: string
    of atomError:
@ -55,8 +65,9 @@ type
    nextRef: NodeRef

  EnvObj = object
-    store: DagfsStore
+    store: BlobStore
    bindings: Table[string, NodeObj]
+    blobs: Table[string, tuple[id: BlobId, size: BiggestInt]]
    paths: Table[string, FsNode]
    cids: Table[Cid, FsNode]

@ -79,6 +90,9 @@ proc newAtomPath(s: string): Atom =
 proc newAtomString(s: string): Atom =
  Atom(kind: atomString, str: s)

+proc newAtom(i: Natural): Atom =
+  Atom(kind: atomNum, num: i)
+
 proc newNodeError(msg: string; n: NodeObj): NodeRef =
  var p = new NodeRef
  p[] = n
@ -139,6 +153,13 @@ template returnError(n: NodeObj) =
    if n.atom.kind == atomError:
      return n.atom.newNode

+proc getBlob(env: Env; path: string): tuple[id: BlobId, size: BiggestInt] =
+  result = env.blobs.getOrDefault(path)
+  if result.size == 0:
+    result = env.store.ingestFile(path)
+    if result.size != 0:
+      env.blobs[path] = result
+
 proc getFile(env: Env; path: string): FsNode =
  result = env.paths.getOrDefault path
  if result.isNil:
@ -201,6 +222,8 @@ proc print(a: Atom; s: Stream) =
      if not valid: break
      f.write chunk
    ]#
+  of atomNum:
+    s.write $a.num
  of atomSymbol:
    s.write a.sym
  of atomError:
@ -217,7 +240,7 @@ proc print(ast: NodeObj; s: Stream) =
    for n in ast.list:
      s.write " "
      n.print(s)
-    s.write ")"
+    s.write " )"
  of nodeFunc:
    s.write "#<procedure "
    s.write ast.name
@ -247,7 +270,7 @@ proc readAtom(r: Reader): Atom =
      # TODO: memoize this, store a table of paths to atoms
      newAtomPath token
    elif token.len == 46 or token.len > 48:
-      Atom(kind: atomCid, cid: token.parseCid)
+      Atom(kind: atomCid, cid: token.toBlobId)
    else:
      Atom(kind: atomSymbol, sym: token.normalize)
  #except:
@ -388,6 +411,14 @@ proc ingestFunc(env: Env; args: NodeObj): NodeRef =
    cid = env.store.putDag(root.toCbor)
  cid.newAtom.newNode

+proc blobFunc(env: Env; args: NodeObj): NodeRef =
+  assertArgCount(args, 1)
+  let (blob, size) = env.getBlob args.atom.path
+  result = newNodeList()
+  result.append blob.newAtom.newNode
+  result.append size.newAtom.newNode
+    # TODO: natural number atom
+
 proc listFunc(env: Env; args: NodeObj): NodeRef =
  ## Standard Lisp 'list' function.
  result = newNodeList()
@ -461,10 +492,11 @@ proc bindEnv(env: Env; name: string; fun: Func) =
  assert(not env.bindings.contains name)
  env.bindings[name] = NodeObj(kind: nodeFunc, fun: fun, name: name)

-proc newEnv(store: DagfsStore): Env =
+proc newEnv(store: BlobStore): Env =
  result = Env(
    store: store,
    bindings: initTable[string, NodeObj](),
+    blobs: initTable[string, tuple[id: BlobId, size: BiggestInt]](),
    paths: initTable[string, FsNode](),
    cids: initTable[Cid, FsNode]())
  result.bindEnv "apply", applyFunc
@ -481,6 +513,7 @@ proc newEnv(store: DagfsStore): Env =
  result.bindEnv "path", pathFunc
  result.bindEnv "root", rootFunc
  result.bindEnv "walk", walkFunc
+  result.bindEnv "blob", blobFunc

 proc eval(ast: NodeRef; env: Env): NodeRef

@ -534,55 +567,37 @@ proc eval(ast: NodeRef; env: Env): NodeRef =
  except OSError:
    newNodeError(getCurrentExceptionMsg(), input)

-var scripted = false
+proc readLineSimple(prompt: string; line: var TaintedString): bool =
+  stdin.readLine(line)

 when defined(genode):
-  import dagfsclient
-  proc openStore(): DagfsStore =
+  proc openStore(): BlobStore =
    result = newDagfsClient("repl")
-    scripted = true # do not use linenoise for the moment
-    #[
-    for kind, key, value in getopt():
-      if kind == cmdShortOption and key == "s":
-        scripted = true
-      else:
-        quit "unhandled argument " & key
-    ]#
 else:
-  import ./dagfs/tcp
-  proc openStore(): DagfsStore =
+  proc openStore(): BlobStore =
    var host = ""
    for kind, key, value in getopt():
-      case kind
-      of cmdShortOption:
-        if key == "s":
-          scripted = true
-        else:
-          quit "unhandled argument " & key
-      of cmdArgument:
-        if host != "":
-          quit "only a single store path argument is accepted"
-        host = key
-      else:
-        quit "unhandled argument " & key
+      if kind == cmdShortOption:
+        if key == "h":
+          if host != "":
+            quit "only a single store path argument is accepted"
+          host = value
    if host == "": host = "127.0.0.1"
    try: result = newTcpClient(host)
    except:
          quit("failed to connect to store at $1 ($2)" % [host, getCurrentExceptionMsg()])

-
-import rdstdin
-
-proc readLineSimple(prompt: string; line: var TaintedString): bool =
-  stdin.readLine(line)
-
-proc main() =
+proc replMain() =
+  var scripted: bool
+  for kind, key, value in getopt():
+    if kind == cmdShortOption and key == "s":
+      scripted = true
  let
-    store = openStore()
+    #store = openStore()
+    store = newFileStore("/tmp/blobs")
    env = newEnv(store)
    outStream = stdout.newFileStream
    readLine = if scripted: readLineSimple else: readLineFromStdin
-
  var
    reader = newReader()
    line = newStringOfCap 128
@ -594,5 +609,67 @@ proc main() =
        outStream.write "\n"
        flush outStream

+proc dumpMain() =
+  var args = newSeq[string]()
+  for kind, key, val in getopt():
+    if kind == cmdArgument:
+      args.add key
+  if args.len > 1:
+    let store = newFileStore("/tmp/blobs")
+    for i in 1..args.high:
+      try:
+        for chunk in store.dumpBlob(args[i].toBlobId):
+          write(stdout, chunk)
+      except:
+        writeLine(stderr, "failed to dump '", args[i], "', ", getCurrentExceptionMsg())
+        quit(-1)
+
+proc insertPath(set: BlobSet; store: BlobStore; kind: PathComponent; path: string) =
+  try:
+    case kind
+    of pcFile, pcLinkToFile:
+      let (id, size) = store.ingestFile(path)
+      set.insert(path, id, size)
+      writeLine(stdout, id, align($size, 11), " ", path)
+    of pcDir, pcLinkToDir:
+      for kind, subPath in path.walkDir:
+        set.insertPath(store, kind, normalizedPath subPath)
+  except:
+    let e = getCurrentException()
+    writeLine(stderr, "failed to ingest '", path, "', ", e.msg)
+    # raise e
+
+proc ingestMain() =
+  var args = newSeq[string]()
+  for kind, key, val in getopt():
+    if kind == cmdArgument:
+      args.add key
+  if args.len > 1:
+    var set = newBlobSet()
+    #let store = newFileStore("/tmp/blobs")
+    let store = newNullStore()
+    for i in 1..args.high:
+      let path = normalizedPath args[i]
+      set.insertPath(store, path.getFileInfo.kind, path)
+    let final = store.commit set
+    writeLine(stdout, final.setId)
+
+proc main() =
+  var cmd = ""
+  for kind, key, val in getopt():
+    if kind == cmdArgument:
+      cmd = key
+      break
+  case normalize(cmd)
+  of "":
+    quit("no subcommand specified")
+  #of "repl":
+  #  replMain()
+  of "dump":
+    dumpMain()
+  of "ingest":
+    ingestMain()
+  else:
+    quit("no such subcommand ")
+
 main()
-quit 0 # Genode doesn't implicitly quit
--- a/src/blobsets.nim
+++ b/src/blobsets.nim
@ -0,0 +1,301 @@
+import std/hashes, std/streams, std/strutils, std/bitops, std/unicode, std/endians
+import base58/bitcoin, cbor, siphash
+import ./blobsets/priv/hex
+
+import nimcrypto, nimcrypto/blake2
+
+const
+  digestLen* = 32
+    ## Length of a chunk digest.
+  cidSize* = digestLen
+    ## Size of CID object in memory
+  blobLeafSize* = 1 shl 14
+    ## Size of blob leaves.
+  blobLeafSizeMask* = not(not(0) shl 14)
+  visualLen = 32 * 3
+
+  maxChunkSize* {.deprecated} = blobLeafSize
+
+type
+  Blake2b256* = Blake2bContext[256]
+
+  BlobId* = MDigest[Blake2b256.bits]
+    ## Blob Identifier
+  SetId* = MDigest[Blake2b256.bits]
+    ## Set Identifier
+
+  Cid* {.deprecated} = BlobId
+
+func `$`*(bh: BlobId): string =
+  ## Convert a blob hash to a visual representation.
+  const baseRune = 0x2800
+  result = newString(visualLen)
+  var pos = 0
+  for b in bh.data.items:
+    let r = (Rune)baseRune or b.int
+    fastToUTF8Copy(r, result, pos, true)
+
+func toBlobId*(s: string): BlobId =
+  ## Parse a visual blob hash to binary.
+  if s.len == visualLen:
+    var
+      pos: int
+      r: Rune
+    for b in result.data.mitems:
+      fastRuneAt(s, pos, r, true)
+      b = r.byte
+
+proc `==`*(x, y: BlobId): bool = x.data == y.data
+  ## Compare two BlobIds.
+
+proc `==`*(cbor: CborNode; cid: BlobId): bool =
+  ## Compare a CBOR node with a BlobId.
+  if cbor.kind == cborBytes:
+    for i in 0..<digestLen:
+      if cid.data[i] != cbor.bytes[i].uint8:
+        return false
+    result = true
+
+proc hash*(cid: BlobId): Hash =
+  ## Reduce a BlobId into an integer for use in tables.
+  var zeroKey: Key
+  result = cast[Hash](sipHash(cid.data, zeroKey))
+
+proc toCbor*(cid: BlobId): CborNode = newCborBytes cid.data
+  ## Generate a CBOR representation of a BlobId.
+
+proc toBlobId*(cbor: CborNode): BlobId =
+  ## Generate a CBOR representation of a BlobId.
+  assert(cbor.bytes.len == digestLen)
+  for i in 0..<digestLen:
+    result.data[i] = cbor.bytes[i].uint8
+
+{.deprecated: [newCborBytes: toCbor].}
+
+proc toHex*(cid: BlobId): string = hex.encode(cid.data)
+  ## Return BlobId encoded in hexidecimal.
+
+proc writeUvarint*(s: Stream; n: SomeInteger) =
+  ## Write an IPFS varint
+  var n = n
+  while true:
+    let c = int8(n and 0x7f)
+    n = n shr 7
+    if n == 0:
+      s.write((char)c.char)
+      break
+    else:
+      s.write((char)c or 0x80)
+
+proc readUvarint*(s: Stream): BiggestInt =
+  ## Read an IPFS varint
+  var shift: int
+  while shift < (9*8):
+    let c = (BiggestInt)s.readChar
+    result = result or ((c and 0x7f) shl shift)
+    if (c and 0x80) == 0:
+      break
+    shift.inc 7
+
+proc toIpfs*(cid: BlobId): string =
+  ## Return BlobId encoded in IPFS multimulti.
+  const
+    multiRaw = 0x55
+    multiBlake2b_256 = 0xb220
+  let s = newStringStream()
+  s.writeUvarint 1
+  s.writeUvarint multiRaw
+  s.writeUvarint multi_blake2b_256
+  s.writeUvarint digestLen
+  for e in cid.data:
+    s.write e
+  s.setPosition 0
+  result = 'z' & bitcoin.encode(s.readAll)
+  close s
+
+const
+  zeroChunk* = "8ddb61928ec76e4ee904cd79ed977ab6f5d9187f1102975060a6ba6ce10e5481".toDigest
+    ## BlobId of zero chunk of maximum size.
+
+proc take*(cid: var BlobId; buf: var string) =
+  ## Take a raw digest from a string buffer.
+  doAssert(buf.len == digestLen)
+  copyMem(cid.data[0].addr, buf[0].addr, digestLen)
+
+proc dagHash*(buf: pointer; len: Natural): BlobId =
+  ## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.
+  assert(len <= maxChunkSize)
+  var b: Blake2b256
+  init(b)
+  update(b, buf, len)
+  b.finish()
+
+proc dagHash*(data: string): BlobId =
+  ## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.
+  assert(data.len <= maxChunkSize)
+  var b: Blake2b256
+  init(b)
+  update(b, data)
+  b.finish()
+
+proc verify*(cid: BlobId; data: string): bool =
+  ## Verify that a string of data corresponds to a BlobId.
+  var b: Blake2b256
+  init(b)
+  update(b, data)
+  finish(b) == cid
+
+iterator simpleChunks*(s: Stream; size = maxChunkSize): string =
+  ## Iterator that breaks a stream into simple chunks.
+  doAssert(size <= maxChunkSize)
+  var tmp = newString(size)
+  while not s.atEnd:
+    tmp.setLen(size)
+    tmp.setLen(s.readData(tmp[0].addr, size))
+    yield tmp
+
+func isNonZero*(bh: BlobId): bool =
+  ## Test if a blob hash is not zeroed.
+  var r: byte
+  for b in bh.data.items:
+    {.unroll.}
+    r = r or b
+  r != 0
+
+{.deprecated: [isValid: isNonZero].}
+
+type
+  Key = int64
+
+const
+  keyBits = sizeof(Key) shl 3
+  keyChunkBits = fastLog2 keyBits
+  keyChunkMask = not ((not 0.Key) shl (keyChunkBits))
+
+func toKey(s: string): Key =
+  var key: siphash.Key
+  let b = sipHash(toOpenArrayByte(s, s.low, s.high), key)
+  cast[Key](b)
+
+func toCbor(k: Key): CborNode =
+  ## Keys are endian independent.
+  newCborBytes cast[array[sizeof(k), byte]](k)
+
+type
+  setKind* = enum hotNode, coldNode, leafNode
+  BlobSet* = ref BlobSetObj
+  BlobSetObj = object
+    case kind*: setKind
+    of hotNode:
+      bitmap: Key
+      table*: seq[BlobSet]
+    of coldNode:
+      setId*: SetId
+    of leafNode:
+      key: Key
+      blob: BlobId
+      size: BiggestInt
+
+func newBlobSet*(): BlobSet =
+  BlobSet(kind: hotNode, table: newSeqOfCap[BlobSet](2))
+
+func sparseIndex(x: Key): int = int(x and keyChunkMask)
+
+func compactIndex(t: BlobSet; x: Key): int =
+  if (x and keyChunkMask) != 0:
+    # TODO: bug in shr and shl, cannot shift all bits out
+    result = (int)countSetBits(t.bitmap shl (keyBits - x.sparseIndex))
+
+func masked(t: BlobSet; x: Key): bool =
+  ((t.bitmap shr x.sparseIndex) and 1) != 0
+
+func nodeCount*(bs: BlobSet): int =
+  ## Count of internal nodes in set.
+  result = 1
+  for n in bs.table:
+    assert(n.kind != coldNode, "cannot count cold nodes")
+    if n.kind == hotNode:
+      result.inc n.nodeCount
+
+func leafCount*(bs: BlobSet): int =
+  ## Count of leaves in set.
+  for n in bs.table:
+    assert(n.kind != coldNode, "cannot count leaves of cold nodes")
+    if n.kind == leafNode:
+      result.inc 1
+    else:
+      result.inc n.leafCount
+
+func search*(t: BlobSet; name: string): BlobId =
+  var
+    t = t
+    key = name.toKey
+  while true:
+    assert(key != 0, "keyspace exhausted during search")
+    if t.masked(key):
+      t = t.table[t.compactIndex(key)]
+      if t.kind == leafNode:
+        result = t.blob
+        break
+      key = key shr keyChunkBits
+    else:
+      raise newException(KeyError, "blob set does not contain key")
+
+func insert(t, l: BlobSet; depth: int) =
+  ## This procedure is recursive to a depth of keyBits/keyChunkBits.
+  doAssert(depth < (keyBits div keyChunkBits), "key space exhausted during insert")
+  let key = l.key shr (depth * keyChunkBits)
+  if t.masked(key):
+    let
+      depth = depth + 1
+      i = t.compactIndex(key)
+    case t.table[i].kind
+    of hotNode:
+      t.table[i].insert(l, depth)
+    of coldNode:
+      raiseAssert("cannot insert into cold node")
+    of leafNode:
+      if t.table[i].key == l.key:
+        raise newException(KeyError, "key collision in blob set")
+      let
+        subtrei = newBlobSet()
+      subtrei.insert(t.table[i], depth)
+      subtrei.insert(l, depth)
+      t.table[i] = subtrei
+  else:
+    t.bitmap = t.bitmap or (Key(1) shl key.sparseIndex)
+    t.table.insert(l, t.compactIndex(key))
+
+func insert*(t: BlobSet; name: string; blob: BlobId; size: BiggestInt) =
+  ## Insert a blob hash into a trie.
+  let leaf = BlobSet(kind: leafNode, key: name.toKey, blob: blob, size: size)
+  insert(t, leaf, 0)
+
+func isEmpty*(s: BlobSet): bool = s.bitmap == Key(0)
+  ## Test if a set is empty.
+
+func toCbor*(x: BlobSet): CborNode =
+  const
+    nodeTag = 0
+    leafTag = 1
+  let array = newCborArray()
+  case x.kind
+  of hotNode:
+    var
+      map = x.bitmap
+      buf = newCborBytes(sizeof(Key))
+    when not sizeof(Key) == 8:
+      {.error: "unknown key conversion".}
+    bigEndian64(buf.bytes[0].addr, map.addr)
+    array.add buf
+    for y in x.table:
+      array.add y.toCbor
+    newCborTag(nodeTag, array)
+  of coldNode:
+    array.add x.setId.data
+    newCborTag(nodeTag, array)
+  of leafNode:
+    array.add x.key.toCbor
+    array.add x.blob.data
+    array.add x.size
+    newCborTag(leafTag, array)
--- a/src/blobsets/fsnodes.nim
+++ b/src/blobsets/fsnodes.nim
@ -1,6 +1,6 @@
 import strutils, streams, tables, cbor, os, math, asyncfile, asyncdispatch

-import ../dagfs, ./stores
+import ../blobsets, ./stores

 type EntryKey = enum
  typeKey = 1,
@ -50,12 +50,11 @@ proc size*(u: FsNode): BiggestInt =

 proc newFsRoot*(): FsNode =
  FsNode(
-    cid: initCid(),
    kind: dirNode,
    entries: initOrderedTable[string, FsNode](8))

 proc newUnixfsFile*(): FsNode =
-  FsNode(kind: fileNode, cid: initCid())
+  FsNode(kind: fileNode)

 proc newUnixfsDir*(cid: Cid): FsNode =
  FsNode(cid: cid, kind: dirNode)
@ -248,7 +247,7 @@ proc lookupFile*(dir: FsNode; name: string): tuple[cid: Cid, size: BiggestInt] =
    result.cid = f.cid
    result.size = f.size

-proc addFile*(store: DagfsStore; path: string): FsNode =
+proc addFile*(store: BlobStore; path: string): FsNode =
  ## Add a file to the store and a FsNode.
  let
    file = openAsync(path, fmRead)
@ -284,7 +283,7 @@ proc addFile*(store: DagfsStore; path: string): FsNode =
      u.cid = store.putDag(u.toCbor)
  result = u

-proc addDir*(store: DagfsStore; dirPath: string): FsNode =
+proc addDir*(store: BlobStore; dirPath: string): FsNode =
  var dRoot = newFsRoot()
  for kind, path in walkDir dirPath:
    var child: FsNode
@ -300,12 +299,12 @@ proc addDir*(store: DagfsStore; dirPath: string): FsNode =
    cid = store.putDag(dag)
  result = newUnixfsDir(cid)

-proc open*(store: DagfsStore; cid: Cid): FsNode =
+proc open*(store: BlobStore; cid: Cid): FsNode =
  assert cid.isValid
  let raw = store.get(cid)
  result = parseFs(raw, cid)

-proc openDir*(store: DagfsStore; cid: Cid): FsNode =
+proc openDir*(store: BlobStore; cid: Cid): FsNode =
  assert cid.isValid
  var raw = ""
  try: store.get(cid, raw)
@ -314,7 +313,7 @@ proc openDir*(store: DagfsStore; cid: Cid): FsNode =
  result = parseFs(raw, cid)
  assert(result.kind == dirNode)

-proc walk*(store: DagfsStore; dir: FsNode; path: string; cache = true): FsNode =
+proc walk*(store: BlobStore; dir: FsNode; path: string; cache = true): FsNode =
  ## Walk a path down a root.
  assert(dir.kind == dirNode)
  result = dir
@ -336,7 +335,7 @@ proc walk*(store: DagfsStore; dir: FsNode; path: string; cache = true): FsNode =
    result = next

 #[
-iterator fileChunks*(store: DagfsStore; file: FsNode): string =
+iterator fileChunks*(store: BlobStore; file: FsNode): string =
  ## Iterate over the links in a file and return futures for link data.
  if file.cid.isRaw:
    yield store.get(file.cid)
@ -350,7 +349,7 @@ iterator fileChunks*(store: DagfsStore; file: FsNode): string =
      inc i
 ]#

-proc readBuffer*(store: DagfsStore; file: FsNode; pos: BiggestInt;
+proc readBuffer*(store: BlobStore; file: FsNode; pos: BiggestInt;
                 buf: pointer; size: int): int =
  ## Read a UnixFS file into a buffer. May return zero for any failure.
  assert(pos > -1)
--- a/src/blobsets/ipfsdaemon.nim
+++ b/src/blobsets/ipfsdaemon.nim
@ -1,6 +1,6 @@
 import httpclient, json, base58/bitcoin, streams, cbor, tables

-import ../dagfs, ./stores, ./fsnodes
+import ../blobsets, ./stores, ./fsnodes

 type
  IpfsStore* = ref IpfsStoreObj
--- a/src/blobsets/priv/blake2.nim
+++ b/src/blobsets/priv/blake2.nim
@ -1,10 +1,73 @@
+import std/bitops, std/endians
+
 type
-   Blake2b* = object
-      hash: array[8, uint64]
-      offset: array[2, uint64]
-      buffer: array[128, uint8]
-      buffer_idx: uint8
-      hash_size: uint8
+  Blake2b* = object
+    hash: array[8, uint64]
+    offset: array[2, uint64]
+    buffer: array[128, uint8]
+    buffer_idx: uint8
+    hash_size: uint8
+
+  Blake2bParams* = object
+    b: array[64, byte]
+  Blake2sParams* = object
+    b: array[32, byte]
+  Blake2Params* = Blake2bParams | Blake2sParams
+  
+proc params(c: var Blake2b): ptr Blake2bParams =
+  cast[ptr Blake2bParams](c.hash.addr)
+
+proc `digestLen=`*(p: ptr Blake2bParams; x: range[1..64]) =
+  p.b[0] = (uint8)x
+proc `digestLen=`*(p: ptr Blake2sParams; x: range[1..32]) =
+  p.b[0] = (uint8)x
+proc `keyLen=`*(p: ptr Blake2bParams; x: range[0..64]) =
+  p.b[1] = (uint8)x
+proc `keyLen=`*(p: ptr Blake2sParams; x: range[0..32]) =
+  p.b[1] = (uint8)x
+proc `fanout=`*(p: ptr Blake2Params; x: Natural) =
+  p.b[2] = (uint8)x
+proc `depth=`*(p: ptr Blake2Params; x: Natural) =
+  p.b[3] = (uint8)x
+
+proc `leafLength=`*(p: ptr Blake2Params; x: Natural) =
+  var x = x; littleEndian32(p.b[4].addr, x.addr)
+
+proc `nodeOffset=`*(p: ptr Blake2bParams; x: Natural) =
+  var x = x; littleEndian64(p.b[8].addr, x.addr)
+proc `nodeOffset=`*(p: ptr Blake2sParams; x: Natural) =
+  var tmp: int64
+  littleEndian64(tmp.addr, p.b[8].addr)
+  tmp = (tmp and 0xffffffff) or (x shl 32)
+  littleEndian64(p.b[8].addr, tmp.addr)
+
+proc `nodeDepth=`*(p: ptr Blake2bParams; x: Natural) =
+  p.b[16] = (uint8)x
+proc `nodeDepth=`*(p: ptr Blake2sParams; x: Natural) =
+  p.b[14] = (uint8)x
+
+proc `innerLength=`*(p: ptr Blake2bParams; x: Natural) =
+  p.b[17] = (uint8)x
+proc `innerLength=`*(p: ptr Blake2sParams; x: Natural) =
+  p.b[15] = (uint8)x
+
+proc `salt=`*(p: ptr Blake2bParams; salt: pointer; len: Natural) =
+  copyMem(p.b[32].addr, salt, min(len, 16))
+proc `salt=`*(p: ptr Blake2sParams; salt: pointer; len: Natural) =
+  copyMem(p.b[16].addr, salt, min(len, 8))
+
+proc `personal=`*(p: ptr Blake2bParams; salt: pointer; len: Natural) =
+  copyMem(p.b[48].addr, salt, min(len, 16))
+proc `personal=`*(p: ptr Blake2sParams; salt: pointer; len: Natural) =
+  copyMem(p.b[24].addr, salt, min(len, 8))
+
+proc init(p: ptr Blake2Params) =
+  when p is Blake2bParams:
+    p.digestLen = 64
+  else:
+    p.digestLen = 32
+  p.fanout = 1
+  p.depth = 1

 const Blake2bIV =
   [ 0x6a09e667f3bcc908'u64, 0xbb67ae8584caa73b'u64,
@ -33,20 +96,17 @@ proc inc(a: var array[2, uint64], b: uint8) =
 proc padding(a: var array[128, uint8], b: uint8) =
   for i in b..127: a[i] = 0

-proc ror64(x: uint64, n: int): uint64 {.inline.} =
-   result = (x shr n) or (x shl (64 - n))
-
 proc G (v: var array[16, uint64], 
        a,b,c,d: int, x,y: uint64) 
       {.inline.} =
   v[a] = v[a] + v[b] + x
-   v[d] = ror64(v[d] xor v[a], 32)
+   v[d] = rotateRightBits(v[d] xor v[a], 32)
   v[c] = v[c] + v[d]
-   v[b] = ror64(v[b] xor v[c], 24)
+   v[b] = rotateRightBits(v[b] xor v[c], 24)
   v[a] = v[a] + v[b] + y
-   v[d] = ror64(v[d] xor v[a], 16)
+   v[d] = rotateRightBits(v[d] xor v[a], 16)
   v[c] = v[c] + v[d]
-   v[b] = ror64(v[b] xor v[c], 63)
+   v[b] = rotateRightBits(v[b] xor v[c], 63)

 proc compress(c: var Blake2b, last: int = 0) =
   var input, v: array[16, uint64]
@ -72,7 +132,7 @@ proc compress(c: var Blake2b, last: int = 0) =
   c.buffer_idx = 0

 {.push boundChecks: off.}
-proc blake2b_update*(c: var Blake2b, buf: pointer, data_size: int) =
+proc update*(c: var Blake2b, buf: pointer, data_size: int) =
  var data = cast[ptr array[0, uint8]](buf)
  for i in 0..<data_size:
    if c.buffer_idx == 128:
@ -82,7 +142,7 @@ proc blake2b_update*(c: var Blake2b, buf: pointer, data_size: int) =
    inc(c.buffer_idx)
 {.pop.}

-proc blake2b_update*(c: var Blake2b, data: cstring|string|seq|uint8, data_size: int) =
+proc update*(c: var Blake2b, data: cstring|string|seq|uint8, data_size: int) =
   for i in 0..<data_size:
      if c.buffer_idx == 128:
         inc(c.offset, c.buffer_idx)
@ -95,82 +155,36 @@ proc blake2b_update*(c: var Blake2b, data: cstring|string|seq|uint8, data_size:
         c.buffer[c.buffer_idx] = data
      inc(c.buffer_idx)

-proc blake2b_init*(c: var Blake2b, hash_size: uint8, 
-                   key: cstring = nil, key_size: int = 0) =
-   assert(hash_size >= 1'u8 and hash_size <= 64'u8)
-   assert(key_size  >= 0    and key_size  <= 64)
-   c.hash = Blake2bIV
-   c.hash[0] = c.hash[0] xor 0x01010000 xor cast[uint64](key_size shl 8) xor hash_size
-   c.hash_size = hash_size
+proc initBlake2b*(key: pointer = nil, key_size: range[0..64] = 0): Blake2b =
+   init(result.params)
+   result.hash_size = result.params.b[0]
+   result.params.keyLen = keySize
+   for i in 0..7:
+     result.hash[i] = Blake2bIV[i]
   if key_size > 0:
-      blake2b_update(c, key, key_size)
-      padding(c.buffer, c.buffer_idx)
-      c.buffer_idx = 128
+      update(result, key, key_size)
+      padding(result.buffer, result.buffer_idx)
+      result.buffer_idx = 128

-proc blake2b_final*(c: var Blake2b): seq[uint8] =
+proc initBlake2b*(customize: proc(params: ptr Blake2bParams)): Blake2b =
+  let p = result.params
+  init(p)
+  customize(p)
+  result.hash_size = p.b[0]
+  for i in 0..7:
+    result.hash[0] = result.hash[0] xor Blake2bIV[i]
+
+proc finish*(c: var Blake2b): seq[uint8] =
   result = newSeq[uint8](c.hash_size)
   inc(c.offset, c.buffer_idx)
   padding(c.buffer, c.buffer_idx)
   compress(c, 1)
   for i in 0'u8..<c.hash_size:
     result[i.int] = cast[uint8]((c.hash[i div 8] shr (8'u8 * (i and 7)) and 0xFF))
-   zeroMem(addr(c), sizeof(c))

-proc `$`*(d: seq[uint8]): string =
+proc `$`(d: seq[uint8]): string =
  const digits = "0123456789abcdef"
  result = ""
  for i in 0..high(d):
    add(result, digits[(d[i].int shr 4) and 0xF])
    add(result, digits[d[i].int and 0xF])
-
-proc getBlake2b*(s: string, hash_size: uint8, key: string = ""): string =
-   var b: Blake2b
-   blake2b_init(b, hash_size, cstring(key), len(key))
-   blake2b_update(b, s, len(s))
-   result = $blake2b_final(b)
-
-when isMainModule:
-   import strutils, hex
-  
-   proc hex2str(s: string): string =
-     hex.decode s
-
-   assert(getBlake2b("abc", 4, "abc") == "b8f97209")
-   assert(getBlake2b(nil, 4, "abc")   == "8ef2d47e")
-   assert(getBlake2b("abc", 4)        == "63906248")
-   assert(getBlake2b(nil, 4)          == "1271cf25")
-
-   var b1, b2: Blake2b
-   blake2b_init(b1, 4)
-   blake2b_init(b2, 4)
-   blake2b_update(b1, 97'u8, 1)
-   blake2b_update(b1, 98'u8, 1)
-   blake2b_update(b1, 99'u8, 1)
-   blake2b_update(b2, @[97'u8, 98'u8, 99'u8], 3)
-   assert($blake2b_final(b1) == $blake2b_final(b2))
-  
-   let f = open("blake2b-kat.txt", fmRead)
-   var
-      data, key, hash, r: string
-      b: Blake2b
-   while true:
-      try:
-         data = f.readLine()
-         data = hex2str(data[4.int..data.high])
-         key  = f.readLine()
-         key  = hex2str(key[5..key.high])
-         hash = f.readLine()
-         hash = hash[6..hash.high]
-         r = getBlake2b(data, 64, key)
-         assert(r == hash)
-
-         blake2b_init(b, 64, key, 64)
-         for i in 0..high(data):
-            blake2b_update(b, ($data[i]).cstring, 1)
-         assert($blake2b_final(b) == hash)
-
-         discard f.readLine()
-      except IOError: break
-   close(f)
-   echo "ok"
-
--- a/src/blobsets/priv/hex.nim
+++ b/src/blobsets/priv/hex.nim
--- a/src/blobsets/replicator.nim
+++ b/src/blobsets/replicator.nim
@ -1,5 +1,5 @@
 import std/streams, std/strutils, std/os, cbor
-import ../dagfs, ./stores
+import ../blobsets, ./stores

 type
  DagfsReplicator* = ref DagfsReplicatorObj
--- a/src/blobsets/stores.nim
+++ b/src/blobsets/stores.nim
@ -0,0 +1,403 @@
+import std/streams, std/strutils, std/os
+import std/asyncfile, std/asyncdispatch
+import cbor
+import ../blobsets, ./priv/hex
+
+import nimcrypto/blake2
+
+type
+  MissingChunk* = ref object of CatchableError
+    cid*: Cid ## Missing chunk identifier
+  BufferTooSmall* = object of CatchableError
+
+template raiseMissing*(cid: Cid) =
+  raise MissingChunk(msg: "chunk missing from store", cid: cid)
+
+func leafCount(size: Natural): int = (size+blobLeafSize-1) div blobLeafSize
+
+type
+  BlobStream* = ref BlobStreamObj
+  BlobStreamObj = object of RootObj
+    closeImpl*: proc (s: BlobStream) {.nimcall, gcsafe.}
+    readImpl*: proc (s: BlobStream; buffer: pointer; bufLen: int): int {.nimcall, gcsafe.}
+  IngestStream* = ref IngestStreamObj
+  IngestStreamObj = object of RootObj
+    finishImpl*: proc (s: IngestStream): tuple[id: BlobId, size: BiggestInt] {.nimcall, gcsafe.}
+    ingestImpl*: proc (s: IngestStream; buf: pointer; size: int) {.nimcall, gcsafe.}
+
+proc close*(s: BlobStream) =
+  assert(not s.closeImpl.isNil)
+  s.closeImpl(s)
+
+proc read*(s: BlobStream; buf: pointer; len: Natural): int =
+  assert(not s.readImpl.isNil)
+  result = s.readImpl(s, buf, len)
+
+proc finish*(s: IngestStream): tuple[id: BlobId, size: BiggestInt] =
+  ## Finish ingest stream
+  assert(not s.finishImpl.isNil)
+  s.finishImpl(s)
+
+proc ingest*(s: IngestStream; buf: pointer; size: Natural) =
+  ## Ingest stream
+  assert(not s.ingestImpl.isNil)
+  s.ingestImpl(s, buf, size)
+
+proc ingest*(s: IngestStream; buf: var string) =
+  ## Ingest stream
+  assert(not s.ingestImpl.isNil)
+  s.ingestImpl(s, buf[0].addr, buf.len)
+
+type
+  BlobStore* = ref BlobStoreObj
+  BlobStoreObj* = object of RootObj
+    closeImpl*: proc (s: BlobStore) {.nimcall, gcsafe.}
+    putBufferImpl*: proc (s: BlobStore; buf: pointer; len: Natural): Cid {.nimcall, gcsafe.}
+    putImpl*: proc (s: BlobStore; chunk: string): Cid {.nimcall, gcsafe.}
+    getBufferImpl*: proc (s: BlobStore; cid: Cid; buf: pointer; len: Natural): int {.nimcall, gcsafe.}
+    getImpl*: proc (s: BlobStore; cid: Cid; result: var string) {.nimcall, gcsafe.}
+    openBlobStreamImpl*: proc (s: BlobStore; id: BlobId; size: BiggestInt): BlobStream {.nimcall, gcsafe.}
+    openIngestStreamImpl*: proc (s: BlobStore; size: BiggestInt): IngestStream {.nimcall, gcsafe.}
+
+proc close*(s: BlobStore) =
+  ## Close active store resources.
+  if not s.closeImpl.isNil: s.closeImpl(s)
+
+proc putBuffer*(s: BlobStore; buf: pointer; len: Natural): Cid =
+  ## Put a chunk into the store.
+  assert(0 < len and len <= maxChunkSize)
+  assert(not s.putBufferImpl.isNil)
+  s.putBufferImpl(s, buf, len)
+
+proc put*(s: BlobStore; chunk: string): Cid =
+  ## Place a raw block to the store. The hash argument specifies a required
+  ## hash algorithm, or defaults to a algorithm choosen by the store
+  ## implementation.
+  assert(0 < chunk.len and chunk.len <= maxChunkSize)
+  assert(not s.putImpl.isNil)
+  s.putImpl(s, chunk)
+
+proc getBuffer*(s: BlobStore; cid: Cid; buf: pointer; len: Natural): int =
+  ## Copy a raw block from the store into a buffer pointer.
+  assert(0 < len)
+  assert(not s.getBufferImpl.isNil)
+  result = s.getBufferImpl(s, cid, buf, len)
+  assert(0 < result)
+
+proc get*(s: BlobStore; cid: Cid; result: var string) =
+  ## Retrieve a raw block from the store.
+  assert(not s.getImpl.isNil)
+  s.getImpl(s, cid, result)
+  assert(result.len > 0)
+
+proc openBlobStream*(s: BlobStore; id: BlobId; size = 0.BiggestInt): BlobStream =
+  ## Return a new `BlobStream` for reading a blob.
+  assert(not s.openBlobStreamImpl.isNil)
+  s.openBlobStreamImpl(s, id, size)
+
+proc openIngestStream*(s: BlobStore; size = 0.BiggestInt): IngestStream =
+  ## Return a new `IngestStream` for ingesting a blob.
+  assert(not s.openIngestStreamImpl.isNil)
+  s.openIngestStreamImpl(s, size)
+
+proc get*(s: BlobStore; cid: Cid): string =
+  ## Retrieve a raw block from the store.
+  result = ""
+  s.get(cid, result)
+
+proc putDag*(s: BlobStore; dag: CborNode): Cid =
+  ## Place an Dagfs node in the store.
+  var raw = encode dag
+  s.put raw
+
+proc getDag*(s: BlobStore; cid: Cid): CborNode =
+  ## Retrieve an CBOR DAG from the store.
+  let stream = newStringStream(s.get(cid))
+  result = parseCbor stream
+  close stream
+
+type
+  FileStore* = ref FileStoreObj
+    ## A store that writes nodes and leafs as files.
+  FileStoreObj = object of BlobStoreObj
+    root, buf: string
+
+proc parentAndFile(fs: FileStore; cid: Cid): (string, string) =
+  ## Generate the parent path and file path of CID within the store.
+  let digest = hex.encode(cid.data)
+  result[0]  = fs.root / digest[0..1]
+  result[1]  = result[0] / digest[2..digest.high]
+
+proc fsPutBuffer(s: BlobStore; buf: pointer; len: Natural): Cid =
+  var fs = FileStore(s)
+  result = dagHash(buf, len)
+  if result != zeroChunk:
+    let (dir, path) = fs.parentAndFile(result)
+    if not existsDir dir:
+      createDir dir
+    if not existsFile path:
+      fs.buf.setLen(len)
+      copyMem(addr fs.buf[0], buf, fs.buf.len)
+      let
+        tmp = fs.root / "tmp"
+      writeFile(tmp, fs.buf)
+      moveFile(tmp, path)
+
+proc fsPut(s: BlobStore; chunk: string): Cid =
+  var fs = FileStore(s)
+  result = dagHash chunk
+  if result != zeroChunk:
+    let (dir, path) = fs.parentAndFile(result)
+    if not existsDir dir:
+      createDir dir
+    if not existsFile path:
+      let
+        tmp = fs.root / "tmp"
+      writeFile(tmp, chunk)
+      moveFile(tmp, path)
+
+proc fsGetBuffer(s: BlobStore; cid: Cid; buf: pointer; len: Natural): int =
+  var fs = FileStore(s)
+  let (_, path) = fs.parentAndFile cid
+  if existsFile path:
+    let fSize = path.getFileSize
+    if maxChunkSize < fSize:
+      discard tryRemoveFile path
+      raiseMissing cid
+    if len.int64 < fSize:
+      raise newException(BufferTooSmall, "file is $1 bytes, buffer is $2" % [$fSize, $len])
+    let file = open(path, fmRead)
+    result = file.readBuffer(buf, len)
+    close file
+  if result == 0:
+    raiseMissing cid
+
+proc fsGet(s: BlobStore; cid: Cid; result: var string) =
+  var fs = FileStore(s)
+  let (_, path) = fs.parentAndFile cid
+  if existsFile path:
+    let fSize = path.getFileSize
+    if fSize > maxChunkSize:
+      discard tryRemoveFile path
+      raiseMissing cid
+    result.setLen fSize.int
+    let
+     file = open(path, fmRead)
+     n = file.readChars(result, 0, result.len)
+    close file
+    doAssert(n == result.len)
+  else:
+    raiseMissing cid
+
+func compressTree(leaves: var seq[BlobId]) =
+  var
+    ctx: Blake2b256
+    nodeOffset = 0
+    nodeDepth = 0
+  while leaves.len > 1:
+    nodeOffset = 0
+    inc nodeDepth
+    var pos, next: int
+    while pos < leaves.len:
+      ctx.init do (params: var Blake2bParams):
+        params.fanout = 2
+        params.depth = 255
+        params.leafLength = blobLeafSize
+        params.nodeOffset = nodeOffset
+        params.nodeDepth = nodeDepth
+      inc nodeOffset
+      ctx.update(leaves[pos].data)
+      inc pos
+      if pos < leaves.len:
+        ctx.update(leaves[pos].data)
+        inc pos
+      leaves[next] = ctx.finish()
+      inc next
+    leaves.setLen(next)
+  # TODO: BLAKE2 tree finalization flags
+
+iterator dumpBlob*(store: BlobStore; id: BlobId): string =
+  var
+    stream = store.openBlobStream(id)
+    buf = newString(blobLeafSize)
+  defer:
+    close stream
+  while true:
+    buf.setLen(blobLeafSize)
+    let n = stream.read(buf[0].addr, buf.len)
+    if n == 0:
+      break
+    buf.setLen(n)
+    yield buf
+
+proc ingestFile*(store: BlobStore; path: string): tuple[id: BlobId, size: BiggestInt] =
+  ## Ingest a file and return blob metadata.
+  let
+    file = openAsync(path, fmRead)
+    fileSize = file.getFileSize
+  defer:
+    close file
+  let stream = store.openIngestStream(fileSize)
+  if fileSize > 0:
+    var buf = newString(min(blobLeafSize, fileSize))
+    while true:
+      let n = waitFor file.readBuffer(buf[0].addr, buf.len)
+      if n == 0: break
+      stream.ingest(buf[0].addr, n)
+  result = finish stream
+
+proc commit*(store: BlobStore; bs: BlobSet): BlobSet =
+  assert(bs.kind == hotNode)
+  for e in bs.table.mitems:
+    case e.kind
+    of coldNode, leafNode: discard
+    of hotNode:
+      e = store.commit e
+  let stream = store.openIngestStream()
+  var buf = encode bs.toCbor
+  stream.ingest(buf)
+  let (id, _) = finish stream
+  result = BlobSet(kind: coldNode, setId: id)
+
+type
+  FsBlobStream = ref FsBlobStreamObj
+  FsBlobStreamObj = object of BlobStreamObj
+    path: string
+    file: AsyncFile
+
+  NullIngestStream = ref NullIngestStreamObj
+  NullIngestStreamObj = object of IngestStreamObj
+    ctx: Blake2b256
+    leaves: seq[BlobId]
+    pos, nodeOffset: BiggestInt
+
+  FsIngestStream = ref FsIngestStreamObj
+  FsIngestStreamObj = object of IngestStreamObj
+    ctx: Blake2b256
+    leaves: seq[BlobId]
+    path: string
+    file: AsyncFile
+    pos, nodeOffset: BiggestInt
+
+proc nullBlobClose(s: BlobStream) = discard
+
+proc nullBlobRead(s: BlobStream; buffer: pointer; len: Natural): int = 0
+
+proc nullOpenBlobStream(s: BlobStore; id: BlobId; size: BiggestInt): BlobStream =
+  BlobStream(closeImpl: nullBlobClose, readImpl: nullBlobRead)
+
+proc fsBlobClose(s: BlobStream) =
+  var s = FsBlobStream(s)
+  close s.file
+
+proc fsBlobRead(s: BlobStream; buffer: pointer; len: Natural): int =
+  var s = FsBlobStream(s)
+  result = waitFor s.file.readBuffer(buffer, len)
+
+proc fsOpenBlobStream(s: BlobStore; id: BlobId; size: BiggestInt): BlobStream =
+  var fs = FileStore(s)
+  let stream = FsBlobStream()
+  result = stream
+  stream.path = fs.root / $id
+  stream.file = openAsync(stream.path, fmRead)
+  stream.closeImpl = fsBlobClose
+  stream.readImpl = fsBlobRead
+
+proc fsFinish(s: IngestStream): tuple[id: BlobId, size: BiggestInt] =
+  var s = FsIngestStream(s)
+  close s.file
+  s.leaves.add finish(s.ctx)
+  compressTree(s.leaves)
+  result.id = s.leaves[0]
+  result.size = s.pos
+  moveFile(s.path, s.path.parentDir / $(result.id))
+
+proc nullFinish(s: IngestStream): tuple[id: BlobId, size: BiggestInt] =
+  var s = NullIngestStream(s)
+  s.leaves.add finish(s.ctx)
+  compressTree(s.leaves)
+  result.id = s.leaves[0]
+  result.size = s.pos
+
+proc nullIngest(s: IngestStream; buf: pointer; len: Natural) =
+  var
+    s = NullIngestStream(s)
+    off = 0
+    buf = cast[ptr array[blobLeafSize, byte]](buf)
+  while off < len:
+    var n = min(blobLeafSize, len-off)
+    let leafOff = int(s.pos and blobLeafSizeMask)
+    if leafOff == 0:
+      if s.pos > 0:
+        s.leaves.add finish(s.ctx)
+      s.ctx.init do (params: var Blake2bParams):
+        params.fanout = 2
+        params.depth = 255
+        params.leafLength = blobLeafSize
+        params.nodeOffset = s.nodeOffset
+        inc s.nodeOffset
+    else:
+      n = min(n, blobLeafSize-leafOff)
+    s.ctx.update(buf[off].addr, n)
+    off.inc n
+    s.pos.inc n
+
+proc fsIngest(s: IngestStream; buf: pointer; len: Natural) =
+  var
+    s = FsIngestStream(s)
+    off = 0
+    buf = cast[ptr array[blobLeafSize, byte]](buf)
+  while off < len:
+    var n = min(blobLeafSize, len-off)
+    let leafOff = int(s.pos and blobLeafSizeMask)
+    if leafOff == 0:
+      if s.pos > 0:
+        s.leaves.add finish(s.ctx)
+      s.ctx.init do (params: var Blake2bParams):
+        params.fanout = 2
+        params.depth = 255
+        params.leafLength = blobLeafSize
+        params.nodeOffset = s.nodeOffset
+        inc s.nodeOffset
+    else:
+      n = min(n, blobLeafSize-leafOff)
+    s.ctx.update(buf[off].addr, n)
+    waitFor s.file.writeBuffer(buf[off].addr, n)
+    off.inc n
+    s.pos.inc n
+
+proc nullOpenIngestStream(s: BlobStore; size: BiggestInt): IngestStream =
+  NullIngestStream(
+    finishImpl: nullFinish, ingestImpl: nullIngest, leaves: newSeq[BlobId]())
+
+proc fsOpenIngestStream(s: BlobStore; size: BiggestInt): IngestStream =
+  var fs = FileStore(s)
+  let stream = FsIngestStream()
+  result = stream
+  stream.finishImpl = fsFinish
+  stream.ingestImpl = fsIngest
+  stream.path = fs.root / "ingest"
+  stream.file = openAsync(stream.path, fmWrite)
+  if size > 0:
+    stream.file.setFileSize(size)
+    stream.leaves = newSeqOfCap[BlobId](leafCount size)
+  else:
+    stream.leaves = newSeq[BlobId]()
+
+proc newNullStore*(): BlobStore =
+  BlobStore(
+    openBlobStreamImpl: nullOpenBlobStream,
+    openIngestStreamImpl: nullOpenIngestStream)
+
+proc newFileStore*(root: string): FileStore =
+  if not existsDir(root):
+    createDir root
+  new result
+  result.putBufferImpl = fsPutBuffer
+  result.putImpl = fsPut
+  result.getBufferImpl = fsGetBuffer
+  result.getImpl = fsGet
+  result.openBlobStreamImpl = fsOpenBlobStream
+  result.openIngestStreamImpl = fsOpenIngestStream
+  result.root = root
+  result.buf = ""
--- a/src/blobsets/tcp.nim
+++ b/src/blobsets/tcp.nim
@ -1,5 +1,5 @@
 import std/asyncnet, std/asyncdispatch, std/streams
-import ../dagfs, ./stores
+import ../blobsets, ./stores

 const
  defaultPort = Port(1023)
@ -55,9 +55,9 @@ type
  TcpServer* = ref TcpServerObj
  TcpServerObj = object
    sock: AsyncSocket
-    store: DagfsStore
+    store: BlobStore

-proc newTcpServer*(store: DagfsStore; port = defaultPort): TcpServer =
+proc newTcpServer*(store: BlobStore; port = defaultPort): TcpServer =
  ## Create a new TCP server that serves `store`.
  result = TcpServer(sock: newAsyncSocket(buffered=true), store: store)
  result.sock.bindAddr(port)
@ -153,11 +153,11 @@ proc close*(server: TcpServer) =

 type
  TcpClient* = ref TcpClientObj
-  TcpClientObj = object of DagfsStoreObj
+  TcpClientObj = object of BlobStoreObj
    sock: AsyncSocket
    buf: string

-proc tcpClientPutBuffer(s: DagfsStore; buf: pointer; len: Natural): Cid =
+proc tcpClientPutBuffer(s: BlobStore; buf: pointer; len: Natural): Cid =
  var client = TcpClient(s)
  result = dagHash(buf, len)
  if result != zeroChunk:
@ -186,7 +186,7 @@ proc tcpClientPutBuffer(s: DagfsStore; buf: pointer; len: Natural): Cid =
      else:
        raiseAssert "invalid server message"

-proc tcpClientPut(s: DagfsStore; chunk: string): Cid =
+proc tcpClientPut(s: BlobStore; chunk: string): Cid =
  var client = TcpClient(s)
  result = dagHash chunk
  if result != zeroChunk:
@ -215,7 +215,7 @@ proc tcpClientPut(s: DagfsStore; chunk: string): Cid =
      else:
        raiseAssert "invalid server message"

-proc tcpClientGetBuffer(s: DagfsStore; cid: Cid; buf: pointer; len: Natural): int =
+proc tcpClientGetBuffer(s: BlobStore; cid: Cid; buf: pointer; len: Natural): int =
  var
    client = TcpClient(s)
    msg: Message
@ -242,7 +242,7 @@ proc tcpClientGetBuffer(s: DagfsStore; cid: Cid; buf: pointer; len: Natural): in
    else:
      raiseMissing cid

-proc tcpClientGet(s: DagfsStore; cid: Cid; result: var string) =
+proc tcpClientGet(s: BlobStore; cid: Cid; result: var string) =
  result.setLen maxChunkSize
  let n = s.getBuffer(cid, result[0].addr, result.len)
  result.setLen n
--- a/src/dagfs.nim
+++ b/src/dagfs.nim
@ -1,153 +0,0 @@
-import std/hashes, std/streams, std/strutils
-import base58/bitcoin, cbor, siphash
-import ./dagfs/priv/hex, ./dagfs/priv/blake2
-
-const
-  maxChunkSize* = 1 shl 18
-    ## Maximum supported chunk size.
-  digestLen* = 32
-    ## Length of a chunk digest.
-  cidSize* = digestLen
-    ## Size of CID object in memory
-
-type Cid* = object
-  ## Chunk IDentifier
-  digest*: array[digestLen, uint8]
-
-proc initCid*(): Cid = Cid()
-  ## Initialize an invalid CID.
-
-proc isValid*(x: Cid): bool =
-  ## Check that a CID has been properly initialized.
-  for c in x.digest.items:
-    if c != 0: return true
-
-proc `==`*(x, y: Cid): bool =
-  ## Compare two CIDs.
-  for i in 0..<digestLen:
-    if x.digest[i] != y.digest[i]:
-      return false
-  true
-
-proc `==`*(cbor: CborNode; cid: Cid): bool =
-  ## Compare a CBOR node with a CID.
-  if cbor.kind == cborBytes:
-    for i in 0..<digestLen:
-      if cid.digest[i] != cbor.bytes[i].uint8:
-        return false
-    result = true
-
-proc hash*(cid: Cid): Hash =
-  ## Reduce a CID into an integer for use in tables.
-  var zeroKey: Key
-  result = cast[Hash](sipHash(cid.digest, zeroKey))
-
-proc toCbor*(cid: Cid): CborNode = newCborBytes cid.digest
-  ## Generate a CBOR representation of a CID.
-
-proc toCid*(cbor: CborNode): Cid =
-  ## Generate a CBOR representation of a CID.
-  assert(cbor.bytes.len == digestLen)
-  for i in 0..<digestLen:
-    result.digest[i] = cbor.bytes[i].uint8
-
-{.deprecated: [newCborBytes: toCbor].}
-
-proc toHex*(cid: Cid): string = hex.encode(cid.digest)
-  ## Return CID encoded in hexidecimal.
-
-proc writeUvarint*(s: Stream; n: SomeInteger) =
-  ## Write an IPFS varint
-  var n = n
-  while true:
-    let c = int8(n and 0x7f)
-    n = n shr 7
-    if n == 0:
-      s.write((char)c.char)
-      break
-    else:
-      s.write((char)c or 0x80)
-
-proc readUvarint*(s: Stream): BiggestInt =
-  ## Read an IPFS varint
-  var shift: int
-  while shift < (9*8):
-    let c = (BiggestInt)s.readChar
-    result = result or ((c and 0x7f) shl shift)
-    if (c and 0x80) == 0:
-      break
-    shift.inc 7
-
-proc toIpfs*(cid: Cid): string =
-  ## Return CID encoded in IPFS multimulti.
-  const
-    multiRaw = 0x55
-    multiBlake2b_256 = 0xb220
-  let s = newStringStream()
-  s.writeUvarint 1
-  s.writeUvarint multiRaw
-  s.writeUvarint multi_blake2b_256
-  s.writeUvarint digestLen
-  for e in cid.digest:
-    s.write e
-  s.setPosition 0
-  result = 'z' & bitcoin.encode(s.readAll)
-  close s
-
-proc `$`*(cid: Cid): string = toHex cid
-  ## Return CID in base 58, the default textual encoding.
-
-proc parseCid*(s: string): Cid =
-  ## Detect CID encoding and parse from a string.
-  var raw = parseHexStr s
-  if raw.len != digestLen:
-    raise newException(ValueError, "invalid ID length")
-  for i in 0..<digestLen:
-    result.digest[i] = raw[i].byte
-
-const
-  zeroChunk* = parseCid "8ddb61928ec76e4ee904cd79ed977ab6f5d9187f1102975060a6ba6ce10e5481"
-    ## CID of zero chunk of maximum size.
-
-proc take*(cid: var Cid; buf: var string) =
-  ## Take a raw digest from a string buffer.
-  doAssert(buf.len == digestLen)
-  copyMem(cid.digest[0].addr, buf[0].addr, digestLen)
-
-proc dagHash*(buf: pointer; len: Natural): Cid =
-  ## Generate a CID for a string of data using the BLAKE2b hash algorithm.
-  assert(len <= maxChunkSize)
-  var b: Blake2b
-  blake2b_init(b, digestLen, nil, 0)
-  blake2b_update(b, buf, len)
-  var s = blake2b_final(b)
-  copyMem(result.digest[0].addr, s[0].addr, digestLen)
-
-proc dagHash*(data: string): Cid =
-  ## Generate a CID for a string of data using the BLAKE2b hash algorithm.
-  assert(data.len <= maxChunkSize)
-  var b: Blake2b
-  blake2b_init(b, digestLen, nil, 0)
-  blake2b_update(b, data, data.len)
-  var s = blake2b_final(b)
-  copyMem(result.digest[0].addr, s[0].addr, digestLen)
-
-proc verify*(cid: Cid; data: string): bool =
-  ## Verify that a string of data corresponds to a CID.
-  var b: Blake2b
-  blake2b_init(b, digestLen, nil, 0)
-  blake2b_update(b, data, data.len)
-  let digest = blake2b_final(b)
-  for i in 0..<digestLen:
-    if cid.digest[i] != digest[i]:
-      return false
-  true
-
-iterator simpleChunks*(s: Stream; size = maxChunkSize): string =
-  ## Iterator that breaks a stream into simple chunks.
-  doAssert(size <= maxChunkSize)
-  var tmp = newString(size)
-  while not s.atEnd:
-    tmp.setLen(size)
-    tmp.setLen(s.readData(tmp[0].addr, size))
-    yield tmp
--- a/src/dagfs/stores.nim
+++ b/src/dagfs/stores.nim
@ -1,155 +0,0 @@
-import std/streams, std/strutils, std/os
-import std/asyncfile, std/asyncdispatch
-import cbor
-import ../dagfs, ./priv/hex
-
-type
-  MissingChunk* = ref object of CatchableError
-    cid*: Cid ## Missing chunk identifier
-  BufferTooSmall* = object of CatchableError
-
-template raiseMissing*(cid: Cid) =
-  raise MissingChunk(msg: "chunk missing from store", cid: cid)
-
-type
-  DagfsStore* = ref DagfsStoreObj
-  DagfsStoreObj* = object of RootObj
-    closeImpl*: proc (s: DagfsStore) {.nimcall, gcsafe.}
-    putBufferImpl*: proc (s: DagfsStore; buf: pointer; len: Natural): Cid {.nimcall, gcsafe.}
-    putImpl*: proc (s: DagfsStore; chunk: string): Cid {.nimcall, gcsafe.}
-    getBufferImpl*: proc (s: DagfsStore; cid: Cid; buf: pointer; len: Natural): int {.nimcall, gcsafe.}
-    getImpl*: proc (s: DagfsStore; cid: Cid; result: var string) {.nimcall, gcsafe.}
-
-proc close*(s: DagfsStore) =
-  ## Close active store resources.
-  if not s.closeImpl.isNil: s.closeImpl(s)
-
-proc putBuffer*(s: DagfsStore; buf: pointer; len: Natural): Cid =
-  ## Put a chunk into the store.
-  assert(0 < len and len <= maxChunkSize)
-  assert(not s.putBufferImpl.isNil)
-  s.putBufferImpl(s, buf, len)
-
-proc put*(s: DagfsStore; chunk: string): Cid =
-  ## Place a raw block to the store. The hash argument specifies a required
-  ## hash algorithm, or defaults to a algorithm choosen by the store
-  ## implementation.
-  assert(0 < chunk.len and chunk.len <= maxChunkSize)
-  assert(not s.putImpl.isNil)
-  s.putImpl(s, chunk)
-
-proc getBuffer*(s: DagfsStore; cid: Cid; buf: pointer; len: Natural): int =
-  ## Copy a raw block from the store into a buffer pointer.
-  assert(cid.isValid)
-  assert(0 < len)
-  assert(not s.getBufferImpl.isNil)
-  result = s.getBufferImpl(s, cid, buf, len)
-  assert(0 < result)
-
-proc get*(s: DagfsStore; cid: Cid; result: var string) =
-  ## Retrieve a raw block from the store.
-  assert(not s.getImpl.isNil)
-  assert cid.isValid
-  s.getImpl(s, cid, result)
-  assert(result.len > 0)
-
-proc get*(s: DagfsStore; cid: Cid): string =
-  ## Retrieve a raw block from the store.
-  result = ""
-  s.get(cid, result)
-
-proc putDag*(s: DagfsStore; dag: CborNode): Cid =
-  ## Place an Dagfs node in the store.
-  var raw = encode dag
-  s.put raw
-
-proc getDag*(s: DagfsStore; cid: Cid): CborNode =
-  ## Retrieve an CBOR DAG from the store.
-  let stream = newStringStream(s.get(cid))
-  result = parseCbor stream
-  close stream
-
-type
-  FileStore* = ref FileStoreObj
-    ## A store that writes nodes and leafs as files.
-  FileStoreObj = object of DagfsStoreObj
-    root, buf: string
-
-proc parentAndFile(fs: FileStore; cid: Cid): (string, string) =
-  ## Generate the parent path and file path of CID within the store.
-  let digest = hex.encode(cid.digest)
-  result[0]  = fs.root / digest[0..1]
-  result[1]  = result[0] / digest[2..digest.high]
-
-proc fsPutBuffer(s: DagfsStore; buf: pointer; len: Natural): Cid =
-  var fs = FileStore(s)
-  result = dagHash(buf, len)
-  if result != zeroChunk:
-    let (dir, path) = fs.parentAndFile(result)
-    if not existsDir dir:
-      createDir dir
-    if not existsFile path:
-      fs.buf.setLen(len)
-      copyMem(addr fs.buf[0], buf, fs.buf.len)
-      let
-        tmp = fs.root / "tmp"
-      writeFile(tmp, fs.buf)
-      moveFile(tmp, path)
-
-proc fsPut(s: DagfsStore; chunk: string): Cid =
-  var fs = FileStore(s)
-  result = dagHash chunk
-  if result != zeroChunk:
-    let (dir, path) = fs.parentAndFile(result)
-    if not existsDir dir:
-      createDir dir
-    if not existsFile path:
-      let
-        tmp = fs.root / "tmp"
-      writeFile(tmp, chunk)
-      moveFile(tmp, path)
-
-proc fsGetBuffer(s: DagfsStore; cid: Cid; buf: pointer; len: Natural): int =
-  var fs = FileStore(s)
-  let (_, path) = fs.parentAndFile cid
-  if existsFile path:
-    let fSize = path.getFileSize
-    if maxChunkSize < fSize:
-      discard tryRemoveFile path
-      raiseMissing cid
-    if len.int64 < fSize:
-      raise newException(BufferTooSmall, "file is $1 bytes, buffer is $2" % [$fSize, $len])
-    let file = open(path, fmRead)
-    result = file.readBuffer(buf, len)
-    close file
-  if result == 0:
-    raiseMissing cid
-
-proc fsGet(s: DagfsStore; cid: Cid; result: var string) =
-  var fs = FileStore(s)
-  let (_, path) = fs.parentAndFile cid
-  if existsFile path:
-    let fSize = path.getFileSize
-    if fSize > maxChunkSize:
-      discard tryRemoveFile path
-      raiseMissing cid
-    result.setLen fSize.int
-    let
-     file = open(path, fmRead)
-     n = file.readChars(result, 0, result.len)
-    close file
-    doAssert(n == result.len)
-  else:
-    raiseMissing cid
-
-proc newFileStore*(root: string): FileStore =
-  ## Blocks retrieved by `get` are not hashed and verified.
-  if not existsDir(root):
-    createDir root
-  new result
-  result.putBufferImpl = fsPutBuffer
-  result.putImpl = fsPut
-  result.getBufferImpl = fsGetBuffer
-  result.getImpl = fsGet
-  result.root = root
-  result.buf = ""