Refactor UnixFS

UnixFS files now contain a seq of links. Walking will cache nodes in intermediate directories.
2017-12-14 23:52:57 -06:00 · 2017-12-14 23:52:57 -06:00 · 3c83a65341
commit 3c83a65341
parent 6fd4756222
5 changed files with 281 additions and 169 deletions
--- a/README.md
+++ b/README.md
@ -12,6 +12,11 @@ A Lisp REPL utility for storing files and directories in IPLD.
 Standard Lisp `apply` function, apply a list as arguments to a function.
 #### `(cbor <cid>)`
 Return CBOR encoding of UnixFS node as a diagnostic string.
 Provided for illustrating canonicalized CBOR encoding.
 #### `(cons <head> <tail>)`
 Standard Lisp `cons` function, prepend to a list.
--- a/ipld.nim
+++ b/ipld.nim
@ -5,7 +5,6 @@ type Cid* = object
  hash*: MulticodecTag
  codec*: MulticodecTag
  ver*: int
  logicalLen*: int # not included in canonical representation
 proc initCid*(): Cid =
  ## Initialize an invalid CID.
@ -46,7 +45,8 @@ proc toBin(cid: Cid): string =
 proc toRaw*(cid: Cid): string =
  MultibaseTag.Identity.char & cid.toBIn
-proc toCbor*(cid: Cid): CborNode = newCborBytes cid.toRaw
+proc newCborBytes*(cid: Cid): CborNode = newCborBytes cid.toRaw
 proc toCbor*(cid: Cid): CborNode {.deprecated.} = cid.newCborBytes
 proc toHex*(cid: Cid): string =
  MultibaseTag.Base16.char & hex.encode(cid.toBin)
@ -85,15 +85,13 @@ proc parseCid*(s: string): Cid =
  result.digest = raw[off..raw.high]
  result.hash = hash.MulticodecTag
  result.codec = codec.MulticodecTag
  result.logicalLen = -1
 proc CidSha256*(data: string; codec = MulticodecTag.Raw): Cid =
  Cid(
    digest: $computeSHA256(data),
    hash: MulticodecTag.Sha2_256,
    codec: codec,
-    ver: 1,
+    ver: 1)
    logicalLen: data.len)
 proc verify*(cid: Cid; data: string): bool =
  case cid.hash
@ -139,13 +137,13 @@ proc merge*(dag, other: Dag) =
    result = newCborArray()
    dag["links"] = result
  if not otherLinks.isNil:
-    for link in otherlinks.list:
+    for link in otherlinks.seq:
      block insert:
        var i: int
-        while i < result.list.len:
+        while i < result.seq.len:
-          let L = result.list[i]
+          let L = result.seq[i]
          if L["name"].getString == link["name"].getString:
-            result.list[i] = link
+            result.seq[i] = link
              # replace
            break insert
          inc i
@ -169,8 +167,8 @@ proc fileLen*(dag: Dag; name: string): int =
 ]#
 iterator simpleChunks*(s: Stream; size = 256 * 1024): (Cid, string) =
  var result: (Cid, string)
  while not s.atEnd:
    var result: (Cid, string)
    result[1] = s.readStr size
    result[0] = result[1].CidSHA256(MulticodecTag.Raw)
    yield result
--- a/ipldrepl.nim
+++ b/ipldrepl.nim
@ -1,4 +1,4 @@
-import rdstdin, nre, os, strutils, tables, asyncdispatch, asyncstreams, parseopt, streams
+import rdstdin, nre, os, strutils, tables, asyncdispatch, asyncstreams, parseopt, streams, cbor
 import ipld, ipldstore, unixfs, multiformats
@ -154,10 +154,6 @@ proc append(list: NodeRef; n: NodeObj) =
  p[] = n
  list.append p
 proc isAtom(n: Node): bool = n.kind == nodeAtom
 proc isFunc(n: Node): bool = n.kind == nodeFunc
 proc isList(n: Node): bool = n.kind == nodeList
 proc getFile(env: Env; path: string): UnixFsNode =
  result = env.paths.getOrDefault path
  if result.isNil:
@ -188,9 +184,8 @@ proc getUnixfs(env: Env; cid: Cid): UnixFsNode =
  assert cid.isValid
  result = env.cids.getOrDefault cid
  if result.isNil:
-    let dag = waitFor env.store.getDag(cid)
+    let raw = waitFor env.store.get(cid)
-    assert(not dag.isNil)
+    result = parseUnixfs(raw, cid)
    result = parseUnixfs(dag, cid)
    env.cids[cid] = result
    when not defined(release):
      inc env.cidCacheMiss
@ -230,7 +225,7 @@ proc print(a: Atom; s: Stream) =
    s.write ':'
    s.write a.fName
    s.write ':'
-    s.write $a.file.fSize
+    s.write $a.file.size
  of atomDir:
    s.write "\n"
    s.write $a.dir.cid
@ -380,6 +375,16 @@ proc catFunc(env: Env; arg: NodeObj): Node =
 ]#
  result = newNodeError("cat not implemented", arg)
 proc cborFunc(env: Env; arg: NodeObj): Node =
  let a = arg.atom
  if a.cid.isDagCbor:
    let
      ufsNode = env.getUnixfs a.cid
      diag = $ufsNode.toCbor
    diag.newAtomString.newNode
  else:
    "".newAtomString.newNode
 proc consFunc(env: Env; args: NodeObj): Node =
  result = newNodeList()
  let
@ -449,14 +454,14 @@ proc lsFunc(env: Env; args: NodeObj): Node =
    let a = n.atom
    if a.cid.isDagCbor:
        let ufsNode = env.getUnixfs a.cid
-        if ufsNode.kind == rootNode:
+        if ufsNode.isDir:
          for name, u in ufsNode.items:
            assert(not name.isNil)
            assert(not u.isNil, name & " is nil")
            case u.kind:
-            of fileNode:
+            of fileNode, shallowFile:
              result.append Atom(kind: atomFile, fName: name, file: u).newNode
-            of dirNode, rootNode:
+            of dirNode, shallowDir:
              result.append Atom(kind: atomDir, dName: name, dir: u).newNode
    else:
      raiseAssert("ls over a raw IPLD block")
@ -519,6 +524,7 @@ proc newEnv(storePath: string): Env =
    cids: initTable[Cid, UnixfsNode]())
  result.bindEnv "apply", applyFunc
  result.bindEnv "cat", catFunc
  result.bindEnv "cbor", cborFunc
  result.bindEnv "cons", consFunc
  result.bindEnv "define", defineFunc
  result.bindEnv "dump", dumpFunc
--- a/ipldstore.nim
+++ b/ipldstore.nim
@ -11,41 +11,45 @@ type
  IpldStore* = ref IpldStoreObj
  IpldStoreObj* = object of RootObj
    closeImpl*: proc (s: IpldStore) {.nimcall, gcsafe.}
-    putRawImpl*: proc (s: IpldStore; blk: string): Future[Cid] {.nimcall, gcsafe.}
+    putImpl*: proc (s: IpldStore; blk: string): Future[Cid] {.nimcall, gcsafe.}
-    getRawImpl*: proc (s: IpldStore; cid: Cid): Future[string] {.nimcall, gcsafe.}
+    getImpl*: proc (s: IpldStore; cid: Cid): Future[string] {.nimcall, gcsafe.}
    putDagImpl*: proc (s: IpldStore; dag: Dag): Future[Cid] {.nimcall, gcsafe.}
    fileStreamImpl*: proc (s: IpldStore; cid: Cid; fut: FutureStream[string]): Future[void] {.nimcall, gcsafe.}
 proc close*(s: IpldStore) =
  ## Close active store resources.
  if not s.closeImpl.isNil: s.closeImpl(s)
-proc putRaw*(s: IpldStore; blk: string): Future[Cid] =
+proc put*(s: IpldStore; blk: string): Future[Cid] =
  ## Place a raw block to the store.
-  assert(not s.putRawImpl.isNil)
+  assert(not s.putImpl.isNil)
-  s.putRawImpl(s, blk)
+  s.putImpl(s, blk)
-proc getRaw*(s: IpldStore; cid: Cid): Future[string] =
+proc get*(s: IpldStore; cid: Cid): Future[string] =
  ## Retrieve a raw block from the store.
  assert cid.isValid
-  assert(not s.getRawImpl.isNil)
+  assert(not s.getImpl.isNil)
-  result = s.getRawImpl(s, cid)
+  s.getImpl(s, cid)
-  echo "returning future for generic getRaw"
+ 
 {.deprecated: [putRaw: put, getRaw: get].}
-proc putDag*(s: IpldStore; dag: Dag): Future[Cid] =
+proc putDag*(s: IpldStore; dag: Dag): Future[Cid] {.async.} =
  ## Place an IPLD node in the store.
-  assert(not s.putDagImpl.isNil)
+  assert(not s.putImpl.isNil)
-  s.putDagImpl(s, dag)
+  let
    raw = dag.toBinary
    cid = raw.CidSha256(MulticodecTag.DagCbor)
  discard await s.putImpl(s, raw)
  result = cid
 proc getDag*(s: IpldStore; cid: Cid): Future[Dag] {.async.} =
  ## Retrieve an IPLD node from the store.
  assert cid.isValid
-  assert(not s.getRawImpl.isNil)
+  assert(not s.getImpl.isNil)
-  let raw = await s.getRawImpl(s, cid)
+  let raw = await s.getImpl(s, cid)
  assert(not raw.isNil)
  result = parseDag raw
-proc fileStream*(s: IpldStore; cid: Cid; fut: FutureStream[string]): Future[void] {.async.} =
+proc fileStream*(s: IpldStore; cid: Cid; fut: FutureStream[string]): Future[void] {.async, deprecated.} =
  ## Asynchronously stream a file from a CID list.
  ## TODO: doesn't need to be a file, can be a raw CID or
  ## a DAG that is simply a list of other CIDs.
@ -55,7 +59,7 @@ proc fileStream*(s: IpldStore; cid: Cid; fut: FutureStream[string]): Future[void
  else:
    # use the simple implementation
    if cid.isRaw:
-      let blk = await s.getRaw(cid)
+      let blk = await s.get(cid)
      await fut.write(blk)
    elif cid.isDagCbor:
      let dag = await s.getDag(cid)
@ -99,13 +103,13 @@ proc putToFile(fs: FileStore; cid: Cid; blk: string) {.async.} =
    close file
    moveFile(tmp, path)
-proc fsPutRaw(s: IpldStore; blk: string): Future[Cid] {.async.} =
+proc fsPut(s: IpldStore; blk: string): Future[Cid] {.async.} =
  var fs = FileStore(s)
  let cid = blk.CidSha256
  await fs.putToFile(cid, blk)
-proc fsGetRaw(s: IpldStore; cid: Cid): Future[string] =
+proc fsGet(s: IpldStore; cid: Cid): Future[string] =
-  result = newFuture[string]("fsGetRaw")
+  result = newFuture[string]("fsGet")
  var fs = FileStore(s)
  let (_, path) = fs.parentAndFile cid
  if existsFile path:
@ -120,14 +124,6 @@ proc fsGetRaw(s: IpldStore; cid: Cid): Future[string] =
  if not result.finished:
    result.fail cid.newMissingObject
 proc fsPutDag(s: IpldStore; dag: Dag): Future[Cid] {.async.} =
  var fs = FileStore(s)
  let
    blk = dag.toBinary
    cid = blk.CidSha256(MulticodecTag.DagCbor)
  await fs.putToFile(cid, blk)
  result = cid
 proc fsFileStreamRecurs(fs: FileStore; cid: Cid; fut: FutureStream[string]) {.async.} =
  if cid.isRaw:
    let (_, path) = fs.parentAndFile cid
@ -160,8 +156,7 @@ proc newFileStore*(root: string): FileStore =
  if not existsDir(root):
    createDir root
  new result
-  result.putRawImpl = fsPutRaw
+  result.putImpl = fsPut
-  result.getRawImpl = fsGetRaw
+  result.getImpl = fsGet
  result.putDagImpl = fsPutDag
  result.fileStreamImpl = fsFileStream
  result.root = root
--- a/unixfs.nim
+++ b/unixfs.nim
@ -1,10 +1,10 @@
-import asyncdispatch, strutils, multiformats, streams, tables, cbor, os, hex
+import asyncdispatch, strutils, multiformats, streams, tables, cbor, os, hex, math
 import ipld, ipldstore
 type EntryKey = enum
  typeKey = 1,
-  contentKey = 2,
+  dataKey = 2,
  sizeKey = 3
 type UnixFsType* = enum
@ -12,37 +12,46 @@ type UnixFsType* = enum
  ufsDir = 1
 type UnixFsKind* = enum
-  rootNode,
+  fileNode,
  dirNode,
-  fileNode
+  shallowDir,
  shallowFile
 type
  FileLink* = object
    cid*: Cid
    size*: int
  UnixFsNode* = ref object
    cid: Cid
    case kind*: UnixFsKind
    of rootNode:
      entries: OrderedTable[string, UnixFsNode]
    of dirNode:
      discard
    of fileNode:
-      fSize*: BiggestInt
+      links*: seq[FileLink]
    of dirNode:
      entries: OrderedTable[string, UnixFsNode]
    of shallowFile, shallowDir:
      discard
    size: BiggestInt
 proc cid*(u: UnixFsNode): Cid =
  assert u.cid.isValid
  u.cid
-proc isFile*(u: UnixfsNode): bool = u.kind == fileNode
+proc isFile*(u: UnixfsNode): bool = u.kind in { fileNode, shallowFile }
 proc isDir*(u: UnixfsNode): bool = u.kind in { dirNode, shallowDir }
-proc isDir*(u: UnixfsNode): bool = u.kind in {rootNode, dirNode}
+proc size*(u: UnixfsNode): BiggestInt =
  if u.kind == dirNode: u.entries.len.BiggestInt
  else: u.size
 proc newUnixFsRoot*(): UnixFsNode =
  UnixFsNode(
    cid: initCid(),
-    kind: rootNode,
+    kind: dirNode,
    entries: initOrderedTable[string, UnixFsNode](8))
-proc newUnixFsFile*(cid: Cid; size: int): UnixFsNode =
+proc newUnixfsFile*(): UnixFsNode =
-  UnixFsNode(kind: fileNode, cid: cid, fSize: size)
+  UnixFsNode(kind: fileNode, cid: initCid())
 proc newUnixfsDir*(cid: Cid): UnixFsNode =
  UnixFsNode(cid: cid, kind: dirNode)
@ -56,69 +65,155 @@ proc addDir*(root: var UnixFsNode; name: string; cid: Cid) {.deprecated.} =
 proc addFile*(root: var UnixFsNode; name: string; cid: Cid; size: BiggestInt) {.deprecated.} =
  assert cid.isValid
-  root.add name, UnixFsNode(kind: fileNode, cid: cid, fSize: size)
+  root.add name, UnixFsNode(kind: fileNode, cid: cid, size: size)
 proc del*(dir: var UnixFsNode; name: string) =
  dir.entries.del name
-proc toCbor*(root: UnixFsNode): CborNode =
+const
-  result = newCborMap()
+  DirTag* = 0xda3c80 ## CBOR tag for UnixFS directories
-  for name, node in root.entries:
+  FileTag* = 0xda3c81 ## CBOR tag for UnixFS files
    var entry = newCborMap()
    case node.kind
    of rootNode, dirNode:
      entry[typeKey.int] = newCborInt ufsDir.int
      entry[contentKey.int] = node.cid.toCbor
    of fileNode:
      entry[typeKey.int] = newCborInt ufsFile.int
      entry[contentKey.int] = node.cid.toCbor
      entry[sizeKey.int] = newCborInt node.fSize
    result[name] = entry
  # TODO: the CBOR maps must be sorted
-proc parseUnixfs*(c: CborNode; cid: Cid): UnixFsNode =
+proc toCbor*(u: UnixFsNode): CborNode =
-  assert(not c.isNil)
+  case u.kind
-  result = newUnixFsRoot()
+  of fileNode:
    if u.links.isNil:
      raiseAssert "cannot encode single-chunk files"
    let array = newCborArray()
    array.seq.setLen u.links.len
    for i in 0..u.links.high:
      let L = newCborMap()
      # typeEntry is reserved but not in use
      L[dataKey.int] = u.links[i].cid.newCborBytes
      L[sizeKey.int] = u.links[i].size.newCborInt
      array.seq[i] = L
    result = newCborTag(FileTag, array)
  of dirNode:
    let map = newCborMap()
    for name, node in u.entries:
      var entry = newCborMap()
      case node.kind
      of fileNode, shallowFile:
        entry[typeKey.int] = ufsFile.int.newCborInt
        entry[dataKey.int] = node.cid.newCborBytes
        entry[sizeKey.int] = node.size.newCborInt
      of dirNode:
        entry[typeKey.int] = ufsDir.int.newCborInt
        entry[dataKey.int] = node.cid.newCborBytes
        entry[sizeKey.int] = node.entries.len.newCborInt
      of shallowdir:
        entry[typeKey.int] = ufsDir.int.newCborInt
        entry[dataKey.int] = node.cid.newCborBytes
        entry[sizeKey.int] = node.size.int.newCborInt
      map[name] = entry
    # TODO: the CBOR maps must be sorted
    result = newCborTag(DirTag, map)
  else:
    raiseAssert "shallow UnixfsNodes can not be encoded"
 template parseAssert(cond: bool; msg = "") =
  if not cond: raise newException(
    ValueError,
    if msg == "": "invalid UnixFS CBOR" else: "invalid UnixFS CBOR, " & msg)
 proc parseUnixfs*(raw: string; cid: Cid): UnixFsNode =
  ## Parse a string containing CBOR data into a UnixFsNode.
  assert(not raw.isNil)
  new result
  result.cid = cid
-  for k, v in c.map.pairs:
+  var
-    let
+    c: CborParser
-      name = k.getString
+    buf = ""
-      t = v[typeKey.int].getInt.UnixFsType
+  open(c, newStringStream(raw))
-      subCid = v[contentKey.int].getBytes.parseCid
+  next c
-    case t
+  parseAssert(c.kind == CborEventKind.cborTag, "data not tagged")
-    of ufsDir:
+  let tag = c.parseTag
-      result.addDir(name, subCid)
+  if tag == FileTag:
-    of ufsFile:
+    result.kind = fileNode
-      let size = v[sizeKey.int]
+    next c
-      if not size.isNil:
+    parseAssert(c.kind == CborEventKind.cborArray, "file data not an array")
-        result.addFile(name, subCid, size.getInt)
+    let nLinks = c.arrayLen
-      else:
+    result.links = newSeq[FileLink](nLinks)
-        result.addFile(name, subCid, 0)
+    for i in 0..<nLinks:
-    else:
+      next c
-      discard
+      parseAssert(c.kind == CborEventKind.cborMap, "file array does not contain maps")
      let nAttrs = c.mapLen
      for _ in 1..nAttrs:
        next c
        parseAssert(c.kind == CborEventKind.cborPositive, "link map key not an integer")
        let key = c.parseInt.EntryKey
        next c
        case key
        of typeKey:
          parseAssert(false, "type file links are not supported")
        of dataKey:
          parseAssert(c.kind == CborEventKind.cborBytes, "CID not encoded as bytes")
          c.readBytes buf
          result.links[i].cid = buf.parseCid
        of sizeKey:
          parseAssert(c.kind == CborEventKind.cborPositive, "link size not encoded properly")
          result.links[i].size = c.parseInt
          result.size.inc result.links[i].size
  elif tag == DirTag:
    result.kind = dirNode
    next c
    parseAssert(c.kind == CborEventKind.cborMap)
    let dirLen = c.mapLen
    parseAssert(dirLen != -1, raw)
    result.entries = initOrderedTable[string, UnixFsNode](dirLen.nextPowerOfTwo)
    for i in 1 .. dirLen:
      next c
      parseAssert(c.kind == CborEventKind.cborText, raw)
      c.readText buf
      parseAssert(not buf.contains({ '/', '\0'}), raw)
      next c
      parseAssert(c.kind == CborEventKind.cborMap)
      let nAttrs = c.mapLen
      parseAssert(nAttrs > 1, raw)
      let entry = new UnixFsNode
      result.entries[buf] = entry
      for i in 1 .. nAttrs:
        next c
        parseAssert(c.kind == CborEventKind.cborPositive)
        case c.parseInt.EntryKey
        of typeKey:
          next c
          case c.parseInt.UnixFsType
          of ufsFile: entry.kind = shallowFile
          of ufsDir: entry.kind = shallowDir
        of dataKey:
          next c
          c.readBytes buf
          entry.cid = buf.parseCid
        of sizeKey:
          next c
          entry.size = c.parseInt
  else:
    parseAssert(false, raw)
  next c
  parseAssert(c.kind == cborEof, "trailing data")
-proc toStream*(dir: UnixFsNode; s: Stream) =
+proc toStream*(node: UnixFsNode; s: Stream) =
-  doAssert(dir.kind == rootNode)
+  let c = node.toCbor()
  let c = dir.toCbor()
  c.toStream s
-iterator items*(root: UnixFsNode): (string, UnixFsNode) =
+iterator items*(dir: UnixFsNode): (string, UnixFsNode) =
-  assert(not root.isNil)
+  assert(not dir.isNil)
-  assert(root.kind == rootNode)
+  assert(dir.kind == dirNode)
-  for k, v in root.entries.pairs:
+  for k, v in dir.entries.pairs:
    yield (k, v)
 proc containsFile*(dir: UnixFsNode; name: string): bool =
-  doAssert(dir.kind == rootNode)
+  doAssert(dir.kind == dirNode)
  dir.entries.contains name
 proc `[]`*(dir: UnixFsNode; name: string): UnixFsNode =
-  if dir.kind == rootNode:
+  if dir.kind == dirNode:
    result = dir.entries.getOrDefault name
 proc `[]`*(dir: UnixFsNode; index: int): (string, UnixfsNode) =
  result[0] = ""
-  if dir.kind == rootNode:
+  if dir.kind == dirNode:
    var i = 0
    for name, node in dir.entries.pairs:
      if i == index:
@ -127,36 +222,34 @@ proc `[]`*(dir: UnixFsNode; index: int): (string, UnixfsNode) =
      inc i
 proc lookupFile*(dir: UnixFsNode; name: string): tuple[cid: Cid, size: BiggestInt] =
-  doAssert(dir.kind == rootNode)
+  doAssert(dir.kind == dirNode)
  let f = dir.entries[name]
  if f.kind == fileNode:
    result.cid = f.cid
-    result.size = f.fSize
+    result.size = f.size
 proc addFile*(store: IpldStore; path: string): Future[UnixFsNode] {.async.} =
-  ## Add a file to the store and return the CID and file size.
+  ## Add a file to the store and a UnixfsNode.
  var
    fCid = initCid()
    fSize = 0
  let
    fStream = newFileStream(path, fmRead)
-    fRoot = newDag()
+    u = newUnixfsFile()
  for cid, chunk in fStream.simpleChunks:
-    discard await store.putRaw(chunk)
+    discard await store.put(chunk)
-    fRoot.add(cid, chunk.len)
+    if u.links.isNil:
-    fCid = cid
+      u.links = newSeqOfCap[FileLink](1)
-    fSize.inc chunk.len
+    u.links.add FileLink(cid: cid, size: chunk.len)
-  if fSize == 0:
+    u.size.inc chunk.len
  if u.size == 0:
    # return the CID for a raw nothing
-    fCid = CidSha256("")
+    u.cid = CidSha256("")
  else:
-    if fRoot["links"].len == 1:
+    if u.links.len == 1:
-      # take a shortcut and return the bare chunk CID
+      # take a shortcut use the raw chunk CID
-      discard
+      u.cid = u.links[0].cid
    else:
-      fCid = await store.putDag(fRoot)
+      u.cid = await store.putDag(u.toCbor)
-    close fStream
+  result = u
-  result = newUnixfsFile(fCid, fSize)
+  close fStream
 proc addDir*(store: IpldStore; dirPath: string): Future[UnixFsNode] {.async.} =
  var dRoot = newUnixFsRoot()
@ -175,28 +268,50 @@ proc addDir*(store: IpldStore; dirPath: string): Future[UnixFsNode] {.async.} =
    cid = await store.putDag(dag)
  result = newUnixfsDir(cid)
 proc open*(store: IpldStore; cid: Cid): Future[UnixfsNode] {.async.} =
  assert cid.isValid
  assert(not cid.isRaw)
  let raw = await store.get(cid)
  result = parseUnixfs(raw, cid)
 proc openDir*(store: IpldStore; cid: Cid): Future[UnixfsNode] {.async.} =
  assert cid.isValid
-  let dag = await store.getDag(cid)
+  let raw = await store.get(cid)
-  assert(not dag.isNil)
+  assert(not raw.isNil)
-  result = parseUnixfs(dag, cid)
+  result = parseUnixfs(raw, cid)
-  assert(result.kind == rootNode)
+  assert(result.kind == dirNode)
-proc walk*(store: IpldStore; root: UnixfsNode; path: string): Future[UnixfsNode] {.async.} =
+proc walk*(store: IpldStore; dir: UnixfsNode; path: string; cache = true): Future[UnixfsNode] {.async.} =
  ## Walk a path down a root.
-  assert root.cid.isValid
+  assert dir.cid.isValid
  assert(path != "")
-  result = root
+  assert(dir.kind == dirNode)
  result = dir
  for name in split(path, DirSep):
    if name == "": continue
    if result.kind == fileNode:
      result = nil
      break
-    result = result[name]
+    var next = result[name]
-    if result.isNil: break
+    if next.isNil:
-    if result.kind == dirNode:
+      result = nil
-      result = await store.openDir result.cid
+      break
-        # fetch and parse the directory as a root
+    if (next.kind in {shallowFile, shallowDir}) and (not next.cid.isRaw):
      let raw = await store.get(next.cid)
      next = parseUnixfs(raw, next.cid)
      if cache:
        result.entries[name] = next
    result = next
 iterator fileChunks*(store: IpldStore; file: UnixfsNode): Future[string] =
  ## Iterate over the links in a file and return futures for link data.
  if file.cid.isRaw:
    yield store.get(file.cid)
  else:
    var i = 0
    while i < file.links.len:
      yield store.get(file.links[i].cid)
      inc i
 proc readBuffer*(store: IpldStore; file: UnixfsNode; pos: BiggestInt;
                 buf: pointer; size: int): Future[int] {.async.} =
@ -204,25 +319,24 @@ proc readBuffer*(store: IpldStore; file: UnixfsNode; pos: BiggestInt;
  assert(pos > -1)
  var
    filePos = 0
-    bufPos = 0
+  if pos < file.size:
  if pos < file.fSize:
    if file.cid.isRaw:
      let pos = pos.int
-      var blk = await store.getRaw(file.cid)
+      var blk = await store.get(file.cid)
      if pos < blk.high:
        copyMem(buf, blk[pos].addr, min(blk.len - pos, size))
-    elif file.cid.isDagCbor:
+      result = size
-      let dag = await store.getDag(file.cid)
+    else:
-      for link in dag["links"].items:
+      for i in 0..file.links.high:
-        let linkSize = link["size"].getInt().int
+        let linkSize = file.links[i].size
        if filePos <= pos and pos < filePos+linkSize:
-          let linkCid = link["cid"].getBytes.parseCid
+          var chunk = await store.get(file.links[i].cid)
          var chunk = await store.getRaw(linkCid)
          let
            chunkPos = int(pos - filePos)
            n = min(chunk.len-chunkPos, size)
          copyMem(buf, chunk[chunkPos].addr, n)
-          return n
+          result = n
          break
        filePos.inc linkSize
 proc path(fs: FileStore; cid: Cid): string =
@ -246,23 +360,17 @@ proc dumpPaths*(paths: var seq[string]; store: FileStore; cid: Cid) =
  ## TODO: use CBOR tags rather than reconstitute UnixFS nodes.
  paths.add store.path(cid)
  if cid.isDagCbor:
-    let dag = waitFor store.getDag(cid)
+    let u = waitFor store.open(cid)
-    if dag.kind == cborMap:
+    case u.kind:
-      if dag.contains("links"):
+    of fileNode:
-        for cbor in dag["links"].items:
+      assert(not u.links.isNil)
-          if cbor.contains("cid"):
+      for i in 0..u.links.high:
-            paths.add store.path(cbor["cid"].getString.parseCid)
+        paths.add store.path(u.links[i].cid)
-      else:
+    of dirNode:
-        let ufsNode = parseUnixfs(dag, cid)
+      for _, child in u.items:
-        case ufsNode.kind
+        paths.dumpPaths(store, child.cid)
-        of fileNode:
+    else:
-          for link in dag["links"].items:
+      raiseAssert "cannot dump shallow nodes"
            paths.dumpPaths(store, link["cid"].getBytes.parseCid)
        of rootNode:
          for _, u in ufsNode.items:
            paths.dumpPaths(store, u.cid)
        of dirNode:
          raiseAssert "cannot dump child dir"
 iterator dumpPaths*(store: FileStore; cid: Cid): string =
  var collector = newSeq[string]()