blobsets/src/blobset.nim

668 lines
16 KiB
Nim
Raw Normal View History

2018-12-21 03:50:36 +01:00
when not isMainModule:
{.error: "this module is not a library, import blobsets instead".}
2018-09-07 15:34:25 +02:00
2018-12-21 03:50:36 +01:00
import std/nre, std/os, std/strutils, std/tables, std/parseopt, std/streams, std/rdstdin
import cbor
2018-12-23 03:23:10 +01:00
import ./blobsets, ./blobsets/stores
2018-12-21 03:50:36 +01:00
when defined(genode):
import dagfsclient
2018-12-23 03:23:10 +01:00
#else:
# import ./blobsets/tcp
proc dumpMain() =
var args = newSeq[string]()
for kind, key, val in getopt():
if kind == cmdArgument:
args.add key
if args.len > 1:
let store = newFileStore("/tmp/blobs")
for i in 1..args.high:
try:
for chunk in store.dumpBlob(args[i].toBlobId):
write(stdout, chunk)
except:
writeLine(stderr, "failed to dump '", args[i], "', ", getCurrentExceptionMsg())
quit(-1)
proc insertPath(set: BlobSet; store: BlobStore; kind: PathComponent; path: string) =
try:
case kind
of pcFile, pcLinkToFile:
var path = normalizedPath path
let (id, size) = store.ingestFile(path)
path.removePrefix(getCurrentDir())
path.removePrefix("/")
set.insert(path, id, size)
writeLine(stdout, id, align($size, 11), " ", path)
of pcDir, pcLinkToDir:
for kind, subPath in path.walkDir:
set.insertPath(store, kind, subPath)
except:
let e = getCurrentException()
writeLine(stderr, "failed to ingest '", path, "', ", e.msg)
# raise e
proc ingestMain() =
var args = newSeq[string]()
for kind, key, val in getopt():
if kind == cmdArgument:
args.add key
if args.len > 1:
var set = newBlobSet()
let store = newFileStore("/tmp/blobs")
for i in 1..args.high:
let path = normalizedPath args[i]
set.insertPath(store, path.getFileInfo.kind, path)
let final = store.commit set
writeLine(stdout, final.setId)
2018-09-07 15:34:25 +02:00
type
EvalError = object of CatchableError
Env = ref EnvObj
AtomKind = enum
2018-12-23 03:23:10 +01:00
atomBlob
atomSet
2018-09-07 15:34:25 +02:00
atomPath
atomString
2018-12-21 03:50:36 +01:00
atomNum
2018-09-07 15:34:25 +02:00
atomSymbol
atomError
Atom = object
case kind: AtomKind
2018-12-23 03:23:10 +01:00
of atomBlob:
blob: BlobId
size: BiggestInt
of atomSet:
bs: BlobSet
2018-09-07 15:34:25 +02:00
of atomPath:
path: string
2018-12-12 17:56:44 +01:00
name: string
2018-09-07 15:34:25 +02:00
of atomString:
str: string
2018-12-21 03:50:36 +01:00
of atomNum:
num: BiggestInt
2018-09-07 15:34:25 +02:00
of atomSymbol:
sym: string
of atomError:
err: string
Func = proc(env: Env; arg: NodeObj): NodeRef
NodeKind = enum
nodeError
nodeList
nodeAtom
nodeFunc
NodeRef = ref NodeObj
## NodeRef is used to chain nodes into lists.
NodeObj = object
## NodeObj is used to mutate nodes without side-effects.
case kind: NodeKind
of nodeList:
headRef, tailRef: NodeRef
of nodeAtom:
atom: Atom
of nodeFunc:
fun: Func
name: string
of nodeError:
errMsg: string
errNode: NodeRef
nextRef: NodeRef
EnvObj = object
2018-12-21 03:50:36 +01:00
store: BlobStore
2018-09-07 15:34:25 +02:00
bindings: Table[string, NodeObj]
2018-12-21 03:50:36 +01:00
blobs: Table[string, tuple[id: BlobId, size: BiggestInt]]
2018-12-23 03:23:10 +01:00
sets: Table[string, BlobSet]
2018-09-07 15:34:25 +02:00
proc print(a: Atom; s: Stream)
proc print(ast: NodeRef; s: Stream)
2018-12-23 03:23:10 +01:00
proc newAtom(x: tuple[id: BlobId, size: BiggestInt]): Atom =
Atom(kind: atomBlob, blob: x.id, size: x.size)
proc newAtom(bs: BlobSet): Atom =
Atom(kind: atomSet, bs: bs)
2018-09-07 15:34:25 +02:00
proc newAtomError(msg: string): Atom =
Atom(kind: atomError, err: msg)
proc newAtomPath(s: string): Atom =
try:
let path = expandFilename s
2018-12-12 17:56:44 +01:00
Atom(kind: atomPath, path: path, name: extractFilename(s))
2018-09-07 15:34:25 +02:00
except OSError:
newAtomError("invalid path '$1'" % s)
proc newAtomString(s: string): Atom =
Atom(kind: atomString, str: s)
2018-12-21 03:50:36 +01:00
proc newAtom(i: Natural): Atom =
Atom(kind: atomNum, num: i)
2018-09-07 15:34:25 +02:00
proc newNodeError(msg: string; n: NodeObj): NodeRef =
var p = new NodeRef
p[] = n
NodeRef(kind: nodeError, errMsg: msg, errNode: p)
proc newNode(a: Atom): NodeRef =
NodeRef(kind: nodeAtom, atom: a)
proc newNodeList(): NodeRef =
NodeRef(kind: nodeList)
proc next(n: NodeObj | NodeRef): NodeObj =
## Return a copy of list element that follows Node n.
assert(not n.nextRef.isNil, "next element is nil")
result = n.nextRef[]
proc head(list: NodeObj | NodeRef): NodeObj =
## Return the start element of a list Node.
list.headRef[]
proc `next=`(n, p: NodeRef) =
## Return a copy of list element that follows Node n.
assert(n.nextRef.isNil, "append to node that is not at the end of a list")
n.nextRef = p
iterator list(n: NodeObj): NodeObj =
## Iterate over members of a list node.
var n = n.headRef
while not n.isNil:
yield n[]
n = n.nextRef
iterator walk(n: NodeObj): NodeObj =
## Walk down the singly linked list starting from a member node.
var n = n
while not n.nextRef.isNil:
yield n
n = n.nextRef[]
yield n
proc append(list, n: NodeRef) =
## Append a node to the end of a list node.
if list.headRef.isNil:
list.headRef = n
list.tailRef = n
else:
list.tailRef.next = n
while not list.tailRef.nextRef.isNil:
assert(list.tailRef != list.tailRef.nextRef)
list.tailRef = list.tailRef.nextRef
proc append(list: NodeRef; n: NodeObj) =
let p = new NodeRef
p[] = n
list.append p
2018-12-12 17:56:44 +01:00
template returnError(n: NodeObj) =
if n.atom.kind == atomError:
return n.atom.newNode
2018-12-21 03:50:36 +01:00
proc getBlob(env: Env; path: string): tuple[id: BlobId, size: BiggestInt] =
result = env.blobs.getOrDefault(path)
if result.size == 0:
result = env.store.ingestFile(path)
if result.size != 0:
env.blobs[path] = result
2018-12-23 03:23:10 +01:00
proc getSet(env: Env; path: string): BlobSet=
result = env.sets.getOrDefault(path)
2018-09-07 15:34:25 +02:00
if result.isNil:
2018-12-23 03:23:10 +01:00
result = newBlobSet()
2018-12-23 08:23:21 +01:00
result.insertPath(env.store, path.getFileInfo.kind, path)
2018-12-23 03:23:10 +01:00
if not result.isEmpty:
env.sets[path] = result
2018-09-07 15:34:25 +02:00
type
Tokens = seq[string]
Reader = ref object
buffer: string
tokens: Tokens
pos: int
proc newReader(): Reader =
Reader(buffer: "", tokens: newSeq[string]())
proc next(r: Reader): string =
assert(r.pos < r.tokens.len, $r.tokens)
result = r.tokens[r.pos]
inc r.pos
proc peek(r: Reader): string =
assert(r.pos < r.tokens.len, $r.tokens)
r.tokens[r.pos]
proc print(a: Atom; s: Stream) =
case a.kind
of atomPath:
s.write a.path
2018-12-23 03:23:10 +01:00
of atomBlob:
s.write $a.blob
s.write '|'
s.write $a.size
of atomSet:
s.write "«set»"
2018-09-07 15:34:25 +02:00
of atomString:
s.write '"'
s.write a.str
s.write '"'
#[
of atomData:
let fut = newFutureStream[string]()
asyncCheck env.store.fileStream(a.fileCid, fut)
while true:
let (valid, chunk) = fut.read()
if not valid: break
f.write chunk
]#
2018-12-21 03:50:36 +01:00
of atomNum:
s.write $a.num
2018-09-07 15:34:25 +02:00
of atomSymbol:
s.write a.sym
of atomError:
s.write "«"
s.write a.err
s.write "»"
proc print(ast: NodeObj; s: Stream) =
case ast.kind:
of nodeAtom:
ast.atom.print(s)
of nodeList:
s.write "\n("
for n in ast.list:
s.write " "
n.print(s)
2018-12-21 03:50:36 +01:00
s.write " )"
2018-09-07 15:34:25 +02:00
of nodeFunc:
s.write "#<procedure "
s.write ast.name
s.write ">"
of nodeError:
s.write "«"
s.write ast.errMsg
s.write ": "
ast.errNode.print s
s.write "»"
proc print(ast: NodeRef; s: Stream) =
if ast.isNil:
s.write "«nil»"
else:
ast[].print s
proc readAtom(r: Reader): Atom =
let token = r.next
block:
if token[token.low] == '"':
if token[token.high] != '"':
newAtomError("invalid string '$1'" % token)
else:
newAtomString(token[1..token.len-2])
elif token.contains DirSep:
# TODO: memoize this, store a table of paths to atoms
newAtomPath token
2018-12-23 03:23:10 +01:00
elif token.len == blobVisualLen:
Atom(kind: atomBlob, blob: token.toBlobId)
2018-09-07 15:34:25 +02:00
else:
Atom(kind: atomSymbol, sym: token.normalize)
#except:
# newAtomError(getCurrentExceptionMsg())
proc readForm(r: Reader): NodeRef
proc readList(r: Reader): NodeRef =
result = newNodeList()
while true:
if (r.pos == r.tokens.len):
return nil
let p = r.peek
case p[p.high]
of ')':
discard r.next
break
else:
result.append r.readForm
proc readForm(r: Reader): NodeRef =
case r.peek[0]
of '(':
discard r.next
r.readList
else:
r.readAtom.newNode
proc tokenizer(s: string): Tokens =
# TODO: this sucks
let tokens = s.findAll(re"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)""")
result = newSeqOfCap[string] tokens.len
for s in tokens:
let t = s.strip(leading = true, trailing = false).strip(leading = false, trailing = true)
if t.len > 0:
result.add t
proc read(r: Reader; line: string): NodeRef =
r.pos = 0
if r.buffer.len > 0:
r.buffer.add " "
r.buffer.add line
r.tokens = r.buffer.tokenizer
else:
r.tokens = line.tokenizer
result = r.readForm
if result.isNil:
r.buffer = line
else:
r.buffer.setLen 0
proc assertArgCount(args: NodeObj; len: int) =
var arg = args
for _ in 2..len:
doAssert(not arg.nextRef.isNil)
arg = arg.next
doAssert(arg.nextRef.isNil)
##
# Builtin functions
#
proc applyFunc(env: Env; args: NodeObj): NodeRef =
assertArgCount(args, 2)
let
fn = args
ln = fn.next
fn.fun(env, ln.head)
proc cborFunc(env: Env; arg: NodeObj): NodeRef =
assertArgCount(arg, 1)
2018-12-23 03:23:10 +01:00
newNode(newAtomString($toCbor(arg.atom.bs)))
proc commitFunc(env: Env; arg: NodeObj): NodeRef =
assertArgCount(arg, 1)
raiseAssert("not implemented")
2018-09-07 15:34:25 +02:00
2018-12-23 03:23:10 +01:00
#[
2018-09-07 15:34:25 +02:00
proc copyFunc(env: Env; args: NodeObj): NodeRef =
assertArgCount(args, 3)
let
x = args
y = x.next
z = y.next
var root = newFsRoot()
let dir = env.getUnixfs x.atom.cid
for name, node in dir.items:
root.add(name, node)
root.add(z.atom.str, dir[y.atom.str])
let cid = env.store.putDag(root.toCbor)
cid.newAtom.newNode
2018-12-23 03:23:10 +01:00
]#
2018-09-07 15:34:25 +02:00
proc consFunc(env: Env; args: NodeObj): NodeRef =
assertArgCount(args, 2)
result = newNodeList()
let
car = args
cdr = args.next
result.append car
result.append cdr.head
proc defineFunc(env: Env; args: NodeObj): NodeRef =
assertArgCount(args, 2)
let
symN = args
val = args.next
env.bindings[symN.atom.sym] = val
new result
result[] = val
proc globFunc(env: Env; args: NodeObj): NodeRef =
result = newNodeList()
for n in args.walk:
let a = n.atom
case a.kind
of atomPath:
result.append n
of atomString:
for match in walkPattern a.str:
result.append match.newAtomPath.newNode
else:
result = newNodeError("invalid glob argument", n)
2018-12-23 03:23:10 +01:00
proc keyFunc(env: Env; args: NodeObj): NodeRef =
assertArgCount(args, 1)
args.atom.str.toKey.newAtom.newNode
2018-09-07 15:34:25 +02:00
proc ingestFunc(env: Env; args: NodeObj): NodeRef =
2018-12-23 08:23:21 +01:00
var bs = newBlobSet()
2018-09-07 15:34:25 +02:00
for n in args.walk:
2018-12-23 08:23:21 +01:00
bs = env.store.union(bs, env.getSet(n.atom.path))
result = bs.newAtom.newNode
2018-09-07 15:34:25 +02:00
2018-12-21 03:50:36 +01:00
proc blobFunc(env: Env; args: NodeObj): NodeRef =
assertArgCount(args, 1)
2018-12-23 03:23:10 +01:00
newNode(newAtom(env.getBlob args.atom.path))
2018-12-21 03:50:36 +01:00
2018-09-07 15:34:25 +02:00
proc listFunc(env: Env; args: NodeObj): NodeRef =
## Standard Lisp 'list' function.
result = newNodeList()
new result.headRef
result.headRef[] = args
result.tailRef = result.headRef
while not result.tailRef.nextRef.isNil:
result.tailRef = result.tailRef.nextRef
2018-12-23 03:23:10 +01:00
#[
2018-09-07 15:34:25 +02:00
proc lsFunc(env: Env; args: NodeObj): NodeRef =
result = newNodeList()
for n in args.walk:
let
a = n.atom
ufsNode = env.getUnixfs a.cid
if ufsNode.isDir:
for name, u in ufsNode.items:
let e = newNodeList()
e.append u.cid.newAtom.newNode
e.append name.newAtomString.newNode
result.append e
2018-12-23 03:23:10 +01:00
]#
2018-09-07 15:34:25 +02:00
proc mapFunc(env: Env; args: NodeObj): NodeRef =
assertArgCount(args, 2)
result = newNodeList()
let f = args.fun
for v in args.next.list:
result.append f(env, v)
2018-12-23 03:23:10 +01:00
#[
2018-09-07 15:34:25 +02:00
proc mergeFunc(env: Env; args: NodeObj): NodeRef =
var root = newFsRoot()
for n in args.walk:
let
a = n.atom
dir = env.getUnixfs a.cid
for name, node in dir.items:
root.add(name, node)
let cid = env.store.putDag(root.toCbor)
cid.newAtom.newNode
2018-12-23 03:23:10 +01:00
]#
2018-09-07 15:34:25 +02:00
proc pathFunc(env: Env; arg: NodeObj): NodeRef =
result = arg.atom.str.newAtomPath.newNode
2018-12-23 03:23:10 +01:00
#[
2018-09-07 15:34:25 +02:00
proc rootFunc(env: Env; args: NodeObj): NodeRef =
var root = newFsRoot()
let
name = args.atom.str
cid = args.next.atom.cid
ufs = env.getUnixfs cid
root.add(name, ufs)
let rootCid = env.store.putDag(root.toCbor)
rootCid.newAtom.newNode
2018-12-23 03:23:10 +01:00
]#
proc unionFunc(env: Env; args: NodeObj): NodeRef =
assertArgCount(args, 2)
let bs = env.store.union(args.atom.bs, args.next.atom.bs)
bs.newAtom.newNode
2018-09-07 15:34:25 +02:00
##
# Environment
#
proc bindEnv(env: Env; name: string; fun: Func) =
assert(not env.bindings.contains name)
env.bindings[name] = NodeObj(kind: nodeFunc, fun: fun, name: name)
2018-12-21 03:50:36 +01:00
proc newEnv(store: BlobStore): Env =
2018-09-07 15:34:25 +02:00
result = Env(
store: store,
bindings: initTable[string, NodeObj](),
2018-12-21 03:50:36 +01:00
blobs: initTable[string, tuple[id: BlobId, size: BiggestInt]](),
2018-12-23 03:23:10 +01:00
sets: initTable[string, BlobSet]())
2018-09-07 15:34:25 +02:00
result.bindEnv "apply", applyFunc
2018-12-23 03:23:10 +01:00
result.bindEnv "blob", blobFunc
2018-09-07 15:34:25 +02:00
result.bindEnv "cbor", cborFunc
2018-12-23 03:23:10 +01:00
result.bindEnv "commit", commitFunc
2018-09-07 15:34:25 +02:00
result.bindEnv "cons", consFunc
2018-12-23 03:23:10 +01:00
#result.bindEnv "copy", copyFunc
2018-09-07 15:34:25 +02:00
result.bindEnv "define", defineFunc
result.bindEnv "glob", globFunc
2018-12-23 03:23:10 +01:00
result.bindEnv "key", keyFunc
2018-09-07 15:34:25 +02:00
result.bindEnv "ingest", ingestFunc
result.bindEnv "list", listFunc
2018-12-23 03:23:10 +01:00
#result.bindEnv "ls", lsFunc
2018-09-07 15:34:25 +02:00
result.bindEnv "map", mapFunc
2018-12-23 03:23:10 +01:00
#result.bindEnv "merge", mergeFunc
2018-09-07 15:34:25 +02:00
result.bindEnv "path", pathFunc
2018-12-23 03:23:10 +01:00
#result.bindEnv "root", rootFunc
#result.bindEnv "walk", walkFunc
result.bindEnv "union", unionFunc
2018-09-07 15:34:25 +02:00
proc eval(ast: NodeRef; env: Env): NodeRef
proc eval_ast(ast: NodeRef; env: Env): NodeRef =
result = ast
case ast.kind
of nodeList:
result = newNodeList()
while not ast.headRef.isNil:
# cut out the head of the list and evaluate
let n = ast.headRef
ast.headRef = n.nextRef
n.nextRef = nil
let x = n.eval(env)
result.append x
of nodeAtom:
if ast.atom.kind == atomSymbol:
if env.bindings.contains ast.atom.sym:
result = new NodeRef
result[] = env.bindings[ast.atom.sym]
else: discard
proc eval(ast: NodeRef; env: Env): NodeRef =
var input = ast[]
try:
if ast.kind == nodeList:
if ast.headRef == nil:
newNodeList()
else:
let
ast = eval_ast(ast, env)
head = ast.headRef
if head.kind == nodeFunc:
if not head.nextRef.isNil:
input = head.next
head.fun(env, input)
else:
input = NodeObj(kind: nodeList)
head.fun(env, input)
else:
input = head[]
newNodeError("not a function", input)
else:
eval_ast(ast, env)
except EvalError:
newNodeError(getCurrentExceptionMsg(), input)
except FieldError:
newNodeError("invalid argument", input)
2018-09-07 18:54:30 +02:00
except MissingChunk:
newNodeError("chunk not in store", input)
2018-09-07 15:34:25 +02:00
except OSError:
newNodeError(getCurrentExceptionMsg(), input)
2018-12-21 03:50:36 +01:00
proc readLineSimple(prompt: string; line: var TaintedString): bool =
stdin.readLine(line)
2018-09-07 15:34:25 +02:00
2018-12-23 03:23:10 +01:00
#[
2018-09-07 15:34:25 +02:00
when defined(genode):
2018-12-21 03:50:36 +01:00
proc openStore(): BlobStore =
2018-09-07 15:34:25 +02:00
result = newDagfsClient("repl")
else:
2018-12-21 03:50:36 +01:00
proc openStore(): BlobStore =
2018-09-07 15:34:25 +02:00
var host = ""
for kind, key, value in getopt():
2018-12-21 03:50:36 +01:00
if kind == cmdShortOption:
if key == "h":
if host != "":
quit "only a single store path argument is accepted"
host = value
2018-09-07 15:34:25 +02:00
if host == "": host = "127.0.0.1"
try: result = newTcpClient(host)
except:
quit("failed to connect to store at $1 ($2)" % [host, getCurrentExceptionMsg()])
2018-12-23 03:23:10 +01:00
]#
2018-09-07 15:34:25 +02:00
2018-12-21 03:50:36 +01:00
proc replMain() =
var scripted: bool
for kind, key, value in getopt():
if kind == cmdShortOption and key == "s":
scripted = true
2018-09-07 15:34:25 +02:00
let
2018-12-21 03:50:36 +01:00
#store = openStore()
2018-12-23 03:23:10 +01:00
store = newNullStore() # newFileStore("/tmp/blobs")
2018-09-07 15:34:25 +02:00
env = newEnv(store)
outStream = stdout.newFileStream
readLine = if scripted: readLineSimple else: readLineFromStdin
var
reader = newReader()
line = newStringOfCap 128
while readLine("> ", line):
if line.len > 0:
let ast = reader.read(line)
if not ast.isNil:
ast.eval(env).print(outStream)
outStream.write "\n"
flush outStream
2018-12-21 03:50:36 +01:00
proc main() =
var cmd = ""
for kind, key, val in getopt():
if kind == cmdArgument:
cmd = key
break
case normalize(cmd)
of "":
quit("no subcommand specified")
2018-12-23 03:23:10 +01:00
of "repl":
replMain()
2018-12-21 03:50:36 +01:00
of "dump":
dumpMain()
of "ingest":
ingestMain()
else:
quit("no such subcommand ")
2018-09-07 15:34:25 +02:00
main()