Refactor UnixFS

UnixFS files now contain a seq of links. Walking will cache nodes
in intermediate directories.
This commit is contained in:
Ehmry - 2017-12-14 23:52:57 -06:00
parent 6fd4756222
commit 3c83a65341
5 changed files with 281 additions and 169 deletions

View File

@ -12,6 +12,11 @@ A Lisp REPL utility for storing files and directories in IPLD.
Standard Lisp `apply` function, apply a list as arguments to a function. Standard Lisp `apply` function, apply a list as arguments to a function.
#### `(cbor <cid>)`
Return CBOR encoding of UnixFS node as a diagnostic string.
Provided for illustrating canonicalized CBOR encoding.
#### `(cons <head> <tail>)` #### `(cons <head> <tail>)`
Standard Lisp `cons` function, prepend to a list. Standard Lisp `cons` function, prepend to a list.

View File

@ -5,7 +5,6 @@ type Cid* = object
hash*: MulticodecTag hash*: MulticodecTag
codec*: MulticodecTag codec*: MulticodecTag
ver*: int ver*: int
logicalLen*: int # not included in canonical representation
proc initCid*(): Cid = proc initCid*(): Cid =
## Initialize an invalid CID. ## Initialize an invalid CID.
@ -46,7 +45,8 @@ proc toBin(cid: Cid): string =
proc toRaw*(cid: Cid): string = proc toRaw*(cid: Cid): string =
MultibaseTag.Identity.char & cid.toBIn MultibaseTag.Identity.char & cid.toBIn
proc toCbor*(cid: Cid): CborNode = newCborBytes cid.toRaw proc newCborBytes*(cid: Cid): CborNode = newCborBytes cid.toRaw
proc toCbor*(cid: Cid): CborNode {.deprecated.} = cid.newCborBytes
proc toHex*(cid: Cid): string = proc toHex*(cid: Cid): string =
MultibaseTag.Base16.char & hex.encode(cid.toBin) MultibaseTag.Base16.char & hex.encode(cid.toBin)
@ -85,15 +85,13 @@ proc parseCid*(s: string): Cid =
result.digest = raw[off..raw.high] result.digest = raw[off..raw.high]
result.hash = hash.MulticodecTag result.hash = hash.MulticodecTag
result.codec = codec.MulticodecTag result.codec = codec.MulticodecTag
result.logicalLen = -1
proc CidSha256*(data: string; codec = MulticodecTag.Raw): Cid = proc CidSha256*(data: string; codec = MulticodecTag.Raw): Cid =
Cid( Cid(
digest: $computeSHA256(data), digest: $computeSHA256(data),
hash: MulticodecTag.Sha2_256, hash: MulticodecTag.Sha2_256,
codec: codec, codec: codec,
ver: 1, ver: 1)
logicalLen: data.len)
proc verify*(cid: Cid; data: string): bool = proc verify*(cid: Cid; data: string): bool =
case cid.hash case cid.hash
@ -139,13 +137,13 @@ proc merge*(dag, other: Dag) =
result = newCborArray() result = newCborArray()
dag["links"] = result dag["links"] = result
if not otherLinks.isNil: if not otherLinks.isNil:
for link in otherlinks.list: for link in otherlinks.seq:
block insert: block insert:
var i: int var i: int
while i < result.list.len: while i < result.seq.len:
let L = result.list[i] let L = result.seq[i]
if L["name"].getString == link["name"].getString: if L["name"].getString == link["name"].getString:
result.list[i] = link result.seq[i] = link
# replace # replace
break insert break insert
inc i inc i
@ -169,8 +167,8 @@ proc fileLen*(dag: Dag; name: string): int =
]# ]#
iterator simpleChunks*(s: Stream; size = 256 * 1024): (Cid, string) = iterator simpleChunks*(s: Stream; size = 256 * 1024): (Cid, string) =
var result: (Cid, string)
while not s.atEnd: while not s.atEnd:
var result: (Cid, string)
result[1] = s.readStr size result[1] = s.readStr size
result[0] = result[1].CidSHA256(MulticodecTag.Raw) result[0] = result[1].CidSHA256(MulticodecTag.Raw)
yield result yield result

View File

@ -1,4 +1,4 @@
import rdstdin, nre, os, strutils, tables, asyncdispatch, asyncstreams, parseopt, streams import rdstdin, nre, os, strutils, tables, asyncdispatch, asyncstreams, parseopt, streams, cbor
import ipld, ipldstore, unixfs, multiformats import ipld, ipldstore, unixfs, multiformats
@ -154,10 +154,6 @@ proc append(list: NodeRef; n: NodeObj) =
p[] = n p[] = n
list.append p list.append p
proc isAtom(n: Node): bool = n.kind == nodeAtom
proc isFunc(n: Node): bool = n.kind == nodeFunc
proc isList(n: Node): bool = n.kind == nodeList
proc getFile(env: Env; path: string): UnixFsNode = proc getFile(env: Env; path: string): UnixFsNode =
result = env.paths.getOrDefault path result = env.paths.getOrDefault path
if result.isNil: if result.isNil:
@ -188,9 +184,8 @@ proc getUnixfs(env: Env; cid: Cid): UnixFsNode =
assert cid.isValid assert cid.isValid
result = env.cids.getOrDefault cid result = env.cids.getOrDefault cid
if result.isNil: if result.isNil:
let dag = waitFor env.store.getDag(cid) let raw = waitFor env.store.get(cid)
assert(not dag.isNil) result = parseUnixfs(raw, cid)
result = parseUnixfs(dag, cid)
env.cids[cid] = result env.cids[cid] = result
when not defined(release): when not defined(release):
inc env.cidCacheMiss inc env.cidCacheMiss
@ -230,7 +225,7 @@ proc print(a: Atom; s: Stream) =
s.write ':' s.write ':'
s.write a.fName s.write a.fName
s.write ':' s.write ':'
s.write $a.file.fSize s.write $a.file.size
of atomDir: of atomDir:
s.write "\n" s.write "\n"
s.write $a.dir.cid s.write $a.dir.cid
@ -380,6 +375,16 @@ proc catFunc(env: Env; arg: NodeObj): Node =
]# ]#
result = newNodeError("cat not implemented", arg) result = newNodeError("cat not implemented", arg)
proc cborFunc(env: Env; arg: NodeObj): Node =
let a = arg.atom
if a.cid.isDagCbor:
let
ufsNode = env.getUnixfs a.cid
diag = $ufsNode.toCbor
diag.newAtomString.newNode
else:
"".newAtomString.newNode
proc consFunc(env: Env; args: NodeObj): Node = proc consFunc(env: Env; args: NodeObj): Node =
result = newNodeList() result = newNodeList()
let let
@ -449,14 +454,14 @@ proc lsFunc(env: Env; args: NodeObj): Node =
let a = n.atom let a = n.atom
if a.cid.isDagCbor: if a.cid.isDagCbor:
let ufsNode = env.getUnixfs a.cid let ufsNode = env.getUnixfs a.cid
if ufsNode.kind == rootNode: if ufsNode.isDir:
for name, u in ufsNode.items: for name, u in ufsNode.items:
assert(not name.isNil) assert(not name.isNil)
assert(not u.isNil, name & " is nil") assert(not u.isNil, name & " is nil")
case u.kind: case u.kind:
of fileNode: of fileNode, shallowFile:
result.append Atom(kind: atomFile, fName: name, file: u).newNode result.append Atom(kind: atomFile, fName: name, file: u).newNode
of dirNode, rootNode: of dirNode, shallowDir:
result.append Atom(kind: atomDir, dName: name, dir: u).newNode result.append Atom(kind: atomDir, dName: name, dir: u).newNode
else: else:
raiseAssert("ls over a raw IPLD block") raiseAssert("ls over a raw IPLD block")
@ -519,6 +524,7 @@ proc newEnv(storePath: string): Env =
cids: initTable[Cid, UnixfsNode]()) cids: initTable[Cid, UnixfsNode]())
result.bindEnv "apply", applyFunc result.bindEnv "apply", applyFunc
result.bindEnv "cat", catFunc result.bindEnv "cat", catFunc
result.bindEnv "cbor", cborFunc
result.bindEnv "cons", consFunc result.bindEnv "cons", consFunc
result.bindEnv "define", defineFunc result.bindEnv "define", defineFunc
result.bindEnv "dump", dumpFunc result.bindEnv "dump", dumpFunc

View File

@ -11,41 +11,45 @@ type
IpldStore* = ref IpldStoreObj IpldStore* = ref IpldStoreObj
IpldStoreObj* = object of RootObj IpldStoreObj* = object of RootObj
closeImpl*: proc (s: IpldStore) {.nimcall, gcsafe.} closeImpl*: proc (s: IpldStore) {.nimcall, gcsafe.}
putRawImpl*: proc (s: IpldStore; blk: string): Future[Cid] {.nimcall, gcsafe.} putImpl*: proc (s: IpldStore; blk: string): Future[Cid] {.nimcall, gcsafe.}
getRawImpl*: proc (s: IpldStore; cid: Cid): Future[string] {.nimcall, gcsafe.} getImpl*: proc (s: IpldStore; cid: Cid): Future[string] {.nimcall, gcsafe.}
putDagImpl*: proc (s: IpldStore; dag: Dag): Future[Cid] {.nimcall, gcsafe.}
fileStreamImpl*: proc (s: IpldStore; cid: Cid; fut: FutureStream[string]): Future[void] {.nimcall, gcsafe.} fileStreamImpl*: proc (s: IpldStore; cid: Cid; fut: FutureStream[string]): Future[void] {.nimcall, gcsafe.}
proc close*(s: IpldStore) = proc close*(s: IpldStore) =
## Close active store resources. ## Close active store resources.
if not s.closeImpl.isNil: s.closeImpl(s) if not s.closeImpl.isNil: s.closeImpl(s)
proc putRaw*(s: IpldStore; blk: string): Future[Cid] = proc put*(s: IpldStore; blk: string): Future[Cid] =
## Place a raw block to the store. ## Place a raw block to the store.
assert(not s.putRawImpl.isNil) assert(not s.putImpl.isNil)
s.putRawImpl(s, blk) s.putImpl(s, blk)
proc getRaw*(s: IpldStore; cid: Cid): Future[string] = proc get*(s: IpldStore; cid: Cid): Future[string] =
## Retrieve a raw block from the store. ## Retrieve a raw block from the store.
assert cid.isValid assert cid.isValid
assert(not s.getRawImpl.isNil) assert(not s.getImpl.isNil)
result = s.getRawImpl(s, cid) s.getImpl(s, cid)
echo "returning future for generic getRaw"
{.deprecated: [putRaw: put, getRaw: get].}
proc putDag*(s: IpldStore; dag: Dag): Future[Cid] = proc putDag*(s: IpldStore; dag: Dag): Future[Cid] {.async.} =
## Place an IPLD node in the store. ## Place an IPLD node in the store.
assert(not s.putDagImpl.isNil) assert(not s.putImpl.isNil)
s.putDagImpl(s, dag) let
raw = dag.toBinary
cid = raw.CidSha256(MulticodecTag.DagCbor)
discard await s.putImpl(s, raw)
result = cid
proc getDag*(s: IpldStore; cid: Cid): Future[Dag] {.async.} = proc getDag*(s: IpldStore; cid: Cid): Future[Dag] {.async.} =
## Retrieve an IPLD node from the store. ## Retrieve an IPLD node from the store.
assert cid.isValid assert cid.isValid
assert(not s.getRawImpl.isNil) assert(not s.getImpl.isNil)
let raw = await s.getRawImpl(s, cid) let raw = await s.getImpl(s, cid)
assert(not raw.isNil) assert(not raw.isNil)
result = parseDag raw result = parseDag raw
proc fileStream*(s: IpldStore; cid: Cid; fut: FutureStream[string]): Future[void] {.async.} = proc fileStream*(s: IpldStore; cid: Cid; fut: FutureStream[string]): Future[void] {.async, deprecated.} =
## Asynchronously stream a file from a CID list. ## Asynchronously stream a file from a CID list.
## TODO: doesn't need to be a file, can be a raw CID or ## TODO: doesn't need to be a file, can be a raw CID or
## a DAG that is simply a list of other CIDs. ## a DAG that is simply a list of other CIDs.
@ -55,7 +59,7 @@ proc fileStream*(s: IpldStore; cid: Cid; fut: FutureStream[string]): Future[void
else: else:
# use the simple implementation # use the simple implementation
if cid.isRaw: if cid.isRaw:
let blk = await s.getRaw(cid) let blk = await s.get(cid)
await fut.write(blk) await fut.write(blk)
elif cid.isDagCbor: elif cid.isDagCbor:
let dag = await s.getDag(cid) let dag = await s.getDag(cid)
@ -99,13 +103,13 @@ proc putToFile(fs: FileStore; cid: Cid; blk: string) {.async.} =
close file close file
moveFile(tmp, path) moveFile(tmp, path)
proc fsPutRaw(s: IpldStore; blk: string): Future[Cid] {.async.} = proc fsPut(s: IpldStore; blk: string): Future[Cid] {.async.} =
var fs = FileStore(s) var fs = FileStore(s)
let cid = blk.CidSha256 let cid = blk.CidSha256
await fs.putToFile(cid, blk) await fs.putToFile(cid, blk)
proc fsGetRaw(s: IpldStore; cid: Cid): Future[string] = proc fsGet(s: IpldStore; cid: Cid): Future[string] =
result = newFuture[string]("fsGetRaw") result = newFuture[string]("fsGet")
var fs = FileStore(s) var fs = FileStore(s)
let (_, path) = fs.parentAndFile cid let (_, path) = fs.parentAndFile cid
if existsFile path: if existsFile path:
@ -120,14 +124,6 @@ proc fsGetRaw(s: IpldStore; cid: Cid): Future[string] =
if not result.finished: if not result.finished:
result.fail cid.newMissingObject result.fail cid.newMissingObject
proc fsPutDag(s: IpldStore; dag: Dag): Future[Cid] {.async.} =
var fs = FileStore(s)
let
blk = dag.toBinary
cid = blk.CidSha256(MulticodecTag.DagCbor)
await fs.putToFile(cid, blk)
result = cid
proc fsFileStreamRecurs(fs: FileStore; cid: Cid; fut: FutureStream[string]) {.async.} = proc fsFileStreamRecurs(fs: FileStore; cid: Cid; fut: FutureStream[string]) {.async.} =
if cid.isRaw: if cid.isRaw:
let (_, path) = fs.parentAndFile cid let (_, path) = fs.parentAndFile cid
@ -160,8 +156,7 @@ proc newFileStore*(root: string): FileStore =
if not existsDir(root): if not existsDir(root):
createDir root createDir root
new result new result
result.putRawImpl = fsPutRaw result.putImpl = fsPut
result.getRawImpl = fsGetRaw result.getImpl = fsGet
result.putDagImpl = fsPutDag
result.fileStreamImpl = fsFileStream result.fileStreamImpl = fsFileStream
result.root = root result.root = root

View File

@ -1,10 +1,10 @@
import asyncdispatch, strutils, multiformats, streams, tables, cbor, os, hex import asyncdispatch, strutils, multiformats, streams, tables, cbor, os, hex, math
import ipld, ipldstore import ipld, ipldstore
type EntryKey = enum type EntryKey = enum
typeKey = 1, typeKey = 1,
contentKey = 2, dataKey = 2,
sizeKey = 3 sizeKey = 3
type UnixFsType* = enum type UnixFsType* = enum
@ -12,37 +12,46 @@ type UnixFsType* = enum
ufsDir = 1 ufsDir = 1
type UnixFsKind* = enum type UnixFsKind* = enum
rootNode, fileNode,
dirNode, dirNode,
fileNode shallowDir,
shallowFile
type type
FileLink* = object
cid*: Cid
size*: int
UnixFsNode* = ref object UnixFsNode* = ref object
cid: Cid cid: Cid
case kind*: UnixFsKind case kind*: UnixFsKind
of rootNode:
entries: OrderedTable[string, UnixFsNode]
of dirNode:
discard
of fileNode: of fileNode:
fSize*: BiggestInt links*: seq[FileLink]
of dirNode:
entries: OrderedTable[string, UnixFsNode]
of shallowFile, shallowDir:
discard
size: BiggestInt
proc cid*(u: UnixFsNode): Cid = proc cid*(u: UnixFsNode): Cid =
assert u.cid.isValid assert u.cid.isValid
u.cid u.cid
proc isFile*(u: UnixfsNode): bool = u.kind == fileNode proc isFile*(u: UnixfsNode): bool = u.kind in { fileNode, shallowFile }
proc isDir*(u: UnixfsNode): bool = u.kind in { dirNode, shallowDir }
proc isDir*(u: UnixfsNode): bool = u.kind in {rootNode, dirNode} proc size*(u: UnixfsNode): BiggestInt =
if u.kind == dirNode: u.entries.len.BiggestInt
else: u.size
proc newUnixFsRoot*(): UnixFsNode = proc newUnixFsRoot*(): UnixFsNode =
UnixFsNode( UnixFsNode(
cid: initCid(), cid: initCid(),
kind: rootNode, kind: dirNode,
entries: initOrderedTable[string, UnixFsNode](8)) entries: initOrderedTable[string, UnixFsNode](8))
proc newUnixFsFile*(cid: Cid; size: int): UnixFsNode = proc newUnixfsFile*(): UnixFsNode =
UnixFsNode(kind: fileNode, cid: cid, fSize: size) UnixFsNode(kind: fileNode, cid: initCid())
proc newUnixfsDir*(cid: Cid): UnixFsNode = proc newUnixfsDir*(cid: Cid): UnixFsNode =
UnixFsNode(cid: cid, kind: dirNode) UnixFsNode(cid: cid, kind: dirNode)
@ -56,69 +65,155 @@ proc addDir*(root: var UnixFsNode; name: string; cid: Cid) {.deprecated.} =
proc addFile*(root: var UnixFsNode; name: string; cid: Cid; size: BiggestInt) {.deprecated.} = proc addFile*(root: var UnixFsNode; name: string; cid: Cid; size: BiggestInt) {.deprecated.} =
assert cid.isValid assert cid.isValid
root.add name, UnixFsNode(kind: fileNode, cid: cid, fSize: size) root.add name, UnixFsNode(kind: fileNode, cid: cid, size: size)
proc del*(dir: var UnixFsNode; name: string) = proc del*(dir: var UnixFsNode; name: string) =
dir.entries.del name dir.entries.del name
proc toCbor*(root: UnixFsNode): CborNode = const
result = newCborMap() DirTag* = 0xda3c80 ## CBOR tag for UnixFS directories
for name, node in root.entries: FileTag* = 0xda3c81 ## CBOR tag for UnixFS files
var entry = newCborMap()
case node.kind
of rootNode, dirNode:
entry[typeKey.int] = newCborInt ufsDir.int
entry[contentKey.int] = node.cid.toCbor
of fileNode:
entry[typeKey.int] = newCborInt ufsFile.int
entry[contentKey.int] = node.cid.toCbor
entry[sizeKey.int] = newCborInt node.fSize
result[name] = entry
# TODO: the CBOR maps must be sorted
proc parseUnixfs*(c: CborNode; cid: Cid): UnixFsNode = proc toCbor*(u: UnixFsNode): CborNode =
assert(not c.isNil) case u.kind
result = newUnixFsRoot() of fileNode:
if u.links.isNil:
raiseAssert "cannot encode single-chunk files"
let array = newCborArray()
array.seq.setLen u.links.len
for i in 0..u.links.high:
let L = newCborMap()
# typeEntry is reserved but not in use
L[dataKey.int] = u.links[i].cid.newCborBytes
L[sizeKey.int] = u.links[i].size.newCborInt
array.seq[i] = L
result = newCborTag(FileTag, array)
of dirNode:
let map = newCborMap()
for name, node in u.entries:
var entry = newCborMap()
case node.kind
of fileNode, shallowFile:
entry[typeKey.int] = ufsFile.int.newCborInt
entry[dataKey.int] = node.cid.newCborBytes
entry[sizeKey.int] = node.size.newCborInt
of dirNode:
entry[typeKey.int] = ufsDir.int.newCborInt
entry[dataKey.int] = node.cid.newCborBytes
entry[sizeKey.int] = node.entries.len.newCborInt
of shallowdir:
entry[typeKey.int] = ufsDir.int.newCborInt
entry[dataKey.int] = node.cid.newCborBytes
entry[sizeKey.int] = node.size.int.newCborInt
map[name] = entry
# TODO: the CBOR maps must be sorted
result = newCborTag(DirTag, map)
else:
raiseAssert "shallow UnixfsNodes can not be encoded"
template parseAssert(cond: bool; msg = "") =
if not cond: raise newException(
ValueError,
if msg == "": "invalid UnixFS CBOR" else: "invalid UnixFS CBOR, " & msg)
proc parseUnixfs*(raw: string; cid: Cid): UnixFsNode =
## Parse a string containing CBOR data into a UnixFsNode.
assert(not raw.isNil)
new result
result.cid = cid result.cid = cid
for k, v in c.map.pairs: var
let c: CborParser
name = k.getString buf = ""
t = v[typeKey.int].getInt.UnixFsType open(c, newStringStream(raw))
subCid = v[contentKey.int].getBytes.parseCid next c
case t parseAssert(c.kind == CborEventKind.cborTag, "data not tagged")
of ufsDir: let tag = c.parseTag
result.addDir(name, subCid) if tag == FileTag:
of ufsFile: result.kind = fileNode
let size = v[sizeKey.int] next c
if not size.isNil: parseAssert(c.kind == CborEventKind.cborArray, "file data not an array")
result.addFile(name, subCid, size.getInt) let nLinks = c.arrayLen
else: result.links = newSeq[FileLink](nLinks)
result.addFile(name, subCid, 0) for i in 0..<nLinks:
else: next c
discard parseAssert(c.kind == CborEventKind.cborMap, "file array does not contain maps")
let nAttrs = c.mapLen
for _ in 1..nAttrs:
next c
parseAssert(c.kind == CborEventKind.cborPositive, "link map key not an integer")
let key = c.parseInt.EntryKey
next c
case key
of typeKey:
parseAssert(false, "type file links are not supported")
of dataKey:
parseAssert(c.kind == CborEventKind.cborBytes, "CID not encoded as bytes")
c.readBytes buf
result.links[i].cid = buf.parseCid
of sizeKey:
parseAssert(c.kind == CborEventKind.cborPositive, "link size not encoded properly")
result.links[i].size = c.parseInt
result.size.inc result.links[i].size
elif tag == DirTag:
result.kind = dirNode
next c
parseAssert(c.kind == CborEventKind.cborMap)
let dirLen = c.mapLen
parseAssert(dirLen != -1, raw)
result.entries = initOrderedTable[string, UnixFsNode](dirLen.nextPowerOfTwo)
for i in 1 .. dirLen:
next c
parseAssert(c.kind == CborEventKind.cborText, raw)
c.readText buf
parseAssert(not buf.contains({ '/', '\0'}), raw)
next c
parseAssert(c.kind == CborEventKind.cborMap)
let nAttrs = c.mapLen
parseAssert(nAttrs > 1, raw)
let entry = new UnixFsNode
result.entries[buf] = entry
for i in 1 .. nAttrs:
next c
parseAssert(c.kind == CborEventKind.cborPositive)
case c.parseInt.EntryKey
of typeKey:
next c
case c.parseInt.UnixFsType
of ufsFile: entry.kind = shallowFile
of ufsDir: entry.kind = shallowDir
of dataKey:
next c
c.readBytes buf
entry.cid = buf.parseCid
of sizeKey:
next c
entry.size = c.parseInt
else:
parseAssert(false, raw)
next c
parseAssert(c.kind == cborEof, "trailing data")
proc toStream*(dir: UnixFsNode; s: Stream) = proc toStream*(node: UnixFsNode; s: Stream) =
doAssert(dir.kind == rootNode) let c = node.toCbor()
let c = dir.toCbor()
c.toStream s c.toStream s
iterator items*(root: UnixFsNode): (string, UnixFsNode) = iterator items*(dir: UnixFsNode): (string, UnixFsNode) =
assert(not root.isNil) assert(not dir.isNil)
assert(root.kind == rootNode) assert(dir.kind == dirNode)
for k, v in root.entries.pairs: for k, v in dir.entries.pairs:
yield (k, v) yield (k, v)
proc containsFile*(dir: UnixFsNode; name: string): bool = proc containsFile*(dir: UnixFsNode; name: string): bool =
doAssert(dir.kind == rootNode) doAssert(dir.kind == dirNode)
dir.entries.contains name dir.entries.contains name
proc `[]`*(dir: UnixFsNode; name: string): UnixFsNode = proc `[]`*(dir: UnixFsNode; name: string): UnixFsNode =
if dir.kind == rootNode: if dir.kind == dirNode:
result = dir.entries.getOrDefault name result = dir.entries.getOrDefault name
proc `[]`*(dir: UnixFsNode; index: int): (string, UnixfsNode) = proc `[]`*(dir: UnixFsNode; index: int): (string, UnixfsNode) =
result[0] = "" result[0] = ""
if dir.kind == rootNode: if dir.kind == dirNode:
var i = 0 var i = 0
for name, node in dir.entries.pairs: for name, node in dir.entries.pairs:
if i == index: if i == index:
@ -127,36 +222,34 @@ proc `[]`*(dir: UnixFsNode; index: int): (string, UnixfsNode) =
inc i inc i
proc lookupFile*(dir: UnixFsNode; name: string): tuple[cid: Cid, size: BiggestInt] = proc lookupFile*(dir: UnixFsNode; name: string): tuple[cid: Cid, size: BiggestInt] =
doAssert(dir.kind == rootNode) doAssert(dir.kind == dirNode)
let f = dir.entries[name] let f = dir.entries[name]
if f.kind == fileNode: if f.kind == fileNode:
result.cid = f.cid result.cid = f.cid
result.size = f.fSize result.size = f.size
proc addFile*(store: IpldStore; path: string): Future[UnixFsNode] {.async.} = proc addFile*(store: IpldStore; path: string): Future[UnixFsNode] {.async.} =
## Add a file to the store and return the CID and file size. ## Add a file to the store and a UnixfsNode.
var
fCid = initCid()
fSize = 0
let let
fStream = newFileStream(path, fmRead) fStream = newFileStream(path, fmRead)
fRoot = newDag() u = newUnixfsFile()
for cid, chunk in fStream.simpleChunks: for cid, chunk in fStream.simpleChunks:
discard await store.putRaw(chunk) discard await store.put(chunk)
fRoot.add(cid, chunk.len) if u.links.isNil:
fCid = cid u.links = newSeqOfCap[FileLink](1)
fSize.inc chunk.len u.links.add FileLink(cid: cid, size: chunk.len)
if fSize == 0: u.size.inc chunk.len
if u.size == 0:
# return the CID for a raw nothing # return the CID for a raw nothing
fCid = CidSha256("") u.cid = CidSha256("")
else: else:
if fRoot["links"].len == 1: if u.links.len == 1:
# take a shortcut and return the bare chunk CID # take a shortcut use the raw chunk CID
discard u.cid = u.links[0].cid
else: else:
fCid = await store.putDag(fRoot) u.cid = await store.putDag(u.toCbor)
close fStream result = u
result = newUnixfsFile(fCid, fSize) close fStream
proc addDir*(store: IpldStore; dirPath: string): Future[UnixFsNode] {.async.} = proc addDir*(store: IpldStore; dirPath: string): Future[UnixFsNode] {.async.} =
var dRoot = newUnixFsRoot() var dRoot = newUnixFsRoot()
@ -175,28 +268,50 @@ proc addDir*(store: IpldStore; dirPath: string): Future[UnixFsNode] {.async.} =
cid = await store.putDag(dag) cid = await store.putDag(dag)
result = newUnixfsDir(cid) result = newUnixfsDir(cid)
proc open*(store: IpldStore; cid: Cid): Future[UnixfsNode] {.async.} =
assert cid.isValid
assert(not cid.isRaw)
let raw = await store.get(cid)
result = parseUnixfs(raw, cid)
proc openDir*(store: IpldStore; cid: Cid): Future[UnixfsNode] {.async.} = proc openDir*(store: IpldStore; cid: Cid): Future[UnixfsNode] {.async.} =
assert cid.isValid assert cid.isValid
let dag = await store.getDag(cid) let raw = await store.get(cid)
assert(not dag.isNil) assert(not raw.isNil)
result = parseUnixfs(dag, cid) result = parseUnixfs(raw, cid)
assert(result.kind == rootNode) assert(result.kind == dirNode)
proc walk*(store: IpldStore; root: UnixfsNode; path: string): Future[UnixfsNode] {.async.} = proc walk*(store: IpldStore; dir: UnixfsNode; path: string; cache = true): Future[UnixfsNode] {.async.} =
## Walk a path down a root. ## Walk a path down a root.
assert root.cid.isValid assert dir.cid.isValid
assert(path != "") assert(path != "")
result = root assert(dir.kind == dirNode)
result = dir
for name in split(path, DirSep): for name in split(path, DirSep):
if name == "": continue if name == "": continue
if result.kind == fileNode: if result.kind == fileNode:
result = nil result = nil
break break
result = result[name] var next = result[name]
if result.isNil: break if next.isNil:
if result.kind == dirNode: result = nil
result = await store.openDir result.cid break
# fetch and parse the directory as a root if (next.kind in {shallowFile, shallowDir}) and (not next.cid.isRaw):
let raw = await store.get(next.cid)
next = parseUnixfs(raw, next.cid)
if cache:
result.entries[name] = next
result = next
iterator fileChunks*(store: IpldStore; file: UnixfsNode): Future[string] =
## Iterate over the links in a file and return futures for link data.
if file.cid.isRaw:
yield store.get(file.cid)
else:
var i = 0
while i < file.links.len:
yield store.get(file.links[i].cid)
inc i
proc readBuffer*(store: IpldStore; file: UnixfsNode; pos: BiggestInt; proc readBuffer*(store: IpldStore; file: UnixfsNode; pos: BiggestInt;
buf: pointer; size: int): Future[int] {.async.} = buf: pointer; size: int): Future[int] {.async.} =
@ -204,25 +319,24 @@ proc readBuffer*(store: IpldStore; file: UnixfsNode; pos: BiggestInt;
assert(pos > -1) assert(pos > -1)
var var
filePos = 0 filePos = 0
bufPos = 0 if pos < file.size:
if pos < file.fSize:
if file.cid.isRaw: if file.cid.isRaw:
let pos = pos.int let pos = pos.int
var blk = await store.getRaw(file.cid) var blk = await store.get(file.cid)
if pos < blk.high: if pos < blk.high:
copyMem(buf, blk[pos].addr, min(blk.len - pos, size)) copyMem(buf, blk[pos].addr, min(blk.len - pos, size))
elif file.cid.isDagCbor: result = size
let dag = await store.getDag(file.cid) else:
for link in dag["links"].items: for i in 0..file.links.high:
let linkSize = link["size"].getInt().int let linkSize = file.links[i].size
if filePos <= pos and pos < filePos+linkSize: if filePos <= pos and pos < filePos+linkSize:
let linkCid = link["cid"].getBytes.parseCid var chunk = await store.get(file.links[i].cid)
var chunk = await store.getRaw(linkCid)
let let
chunkPos = int(pos - filePos) chunkPos = int(pos - filePos)
n = min(chunk.len-chunkPos, size) n = min(chunk.len-chunkPos, size)
copyMem(buf, chunk[chunkPos].addr, n) copyMem(buf, chunk[chunkPos].addr, n)
return n result = n
break
filePos.inc linkSize filePos.inc linkSize
proc path(fs: FileStore; cid: Cid): string = proc path(fs: FileStore; cid: Cid): string =
@ -246,23 +360,17 @@ proc dumpPaths*(paths: var seq[string]; store: FileStore; cid: Cid) =
## TODO: use CBOR tags rather than reconstitute UnixFS nodes. ## TODO: use CBOR tags rather than reconstitute UnixFS nodes.
paths.add store.path(cid) paths.add store.path(cid)
if cid.isDagCbor: if cid.isDagCbor:
let dag = waitFor store.getDag(cid) let u = waitFor store.open(cid)
if dag.kind == cborMap: case u.kind:
if dag.contains("links"): of fileNode:
for cbor in dag["links"].items: assert(not u.links.isNil)
if cbor.contains("cid"): for i in 0..u.links.high:
paths.add store.path(cbor["cid"].getString.parseCid) paths.add store.path(u.links[i].cid)
else: of dirNode:
let ufsNode = parseUnixfs(dag, cid) for _, child in u.items:
case ufsNode.kind paths.dumpPaths(store, child.cid)
of fileNode: else:
for link in dag["links"].items: raiseAssert "cannot dump shallow nodes"
paths.dumpPaths(store, link["cid"].getBytes.parseCid)
of rootNode:
for _, u in ufsNode.items:
paths.dumpPaths(store, u.cid)
of dirNode:
raiseAssert "cannot dump child dir"
iterator dumpPaths*(store: FileStore; cid: Cid): string = iterator dumpPaths*(store: FileStore; cid: Cid): string =
var collector = newSeq[string]() var collector = newSeq[string]()