Ingest and dump blobs

This commit is contained in:
Ehmry - 2018-12-21 03:50:36 +01:00
parent c6fcfecd1d
commit 1aba9dcd42
14 changed files with 949 additions and 473 deletions

12
.gitignore vendored
View File

@ -1,12 +1,2 @@
nimcache
ipldrepl
/dagfs_repl
/genode/dagfs_genode/dagfs_fs
/genode/dagfs_genode/dagfs_fs_store
/genode/dagfs_genode/dagfs_rom
/genode/dagfs_genode/dagfs_tcp_store
/genode/dagfs_genode/bin/dagfs_fs
/genode/dagfs_genode/bin/dagfs_fs_store
/genode/dagfs_genode/bin/dagfs_rom
/genode/dagfs_genode/bin/dagfs_server
/genode/dagfs_genode/bin/dagfs_tcp_store
blobset

12
blobsets.nimble Normal file
View File

@ -0,0 +1,12 @@
# Package
version = "0.1.2"
author = "Emery Hemingway"
description = "Sets of named blobs"
license = "AGPLv3"
srcDir = "src"
requires "nim >= 0.18.0", "base58", "cbor >= 0.5.1", "siphash", "nimcrypto"
bin = @["blobset"]
skipFiles = @["blobset.nim"]

View File

@ -1,12 +0,0 @@
# Package
version = "0.1.2"
author = "Emery Hemingway"
description = "A simple content addressed file-system"
license = "GPLv3"
srcDir = "src"
requires "nim >= 0.18.0", "base58", "cbor >= 0.5.1", "siphash"
bin = @["dagfs_repl"]
skipFiles = @["dagfs_repl.nim"]

View File

@ -1,17 +1,25 @@
import nre, os, strutils, tables, parseopt, streams, cbor
when not isMainModule:
{.error: "this module is not a library, import blobsets instead".}
import ./dagfs, ./dagfs/stores, ./dagfs/fsnodes
import std/nre, std/os, std/strutils, std/tables, std/parseopt, std/streams, std/rdstdin
import cbor
import ./blobsets, ./blobsets/stores, ./blobsets/fsnodes
when defined(genode):
import dagfsclient
else:
import ./blobsets/tcp
type
EvalError = object of CatchableError
type
Env = ref EnvObj
AtomKind = enum
atomPath
atomCid
atomString
atomNum
atomSymbol
atomError
@ -24,6 +32,8 @@ type
cid: Cid
of atomString:
str: string
of atomNum:
num: BiggestInt
of atomSymbol:
sym: string
of atomError:
@ -55,8 +65,9 @@ type
nextRef: NodeRef
EnvObj = object
store: DagfsStore
store: BlobStore
bindings: Table[string, NodeObj]
blobs: Table[string, tuple[id: BlobId, size: BiggestInt]]
paths: Table[string, FsNode]
cids: Table[Cid, FsNode]
@ -79,6 +90,9 @@ proc newAtomPath(s: string): Atom =
proc newAtomString(s: string): Atom =
Atom(kind: atomString, str: s)
proc newAtom(i: Natural): Atom =
Atom(kind: atomNum, num: i)
proc newNodeError(msg: string; n: NodeObj): NodeRef =
var p = new NodeRef
p[] = n
@ -139,6 +153,13 @@ template returnError(n: NodeObj) =
if n.atom.kind == atomError:
return n.atom.newNode
proc getBlob(env: Env; path: string): tuple[id: BlobId, size: BiggestInt] =
result = env.blobs.getOrDefault(path)
if result.size == 0:
result = env.store.ingestFile(path)
if result.size != 0:
env.blobs[path] = result
proc getFile(env: Env; path: string): FsNode =
result = env.paths.getOrDefault path
if result.isNil:
@ -201,6 +222,8 @@ proc print(a: Atom; s: Stream) =
if not valid: break
f.write chunk
]#
of atomNum:
s.write $a.num
of atomSymbol:
s.write a.sym
of atomError:
@ -217,7 +240,7 @@ proc print(ast: NodeObj; s: Stream) =
for n in ast.list:
s.write " "
n.print(s)
s.write ")"
s.write " )"
of nodeFunc:
s.write "#<procedure "
s.write ast.name
@ -247,7 +270,7 @@ proc readAtom(r: Reader): Atom =
# TODO: memoize this, store a table of paths to atoms
newAtomPath token
elif token.len == 46 or token.len > 48:
Atom(kind: atomCid, cid: token.parseCid)
Atom(kind: atomCid, cid: token.toBlobId)
else:
Atom(kind: atomSymbol, sym: token.normalize)
#except:
@ -388,6 +411,14 @@ proc ingestFunc(env: Env; args: NodeObj): NodeRef =
cid = env.store.putDag(root.toCbor)
cid.newAtom.newNode
proc blobFunc(env: Env; args: NodeObj): NodeRef =
assertArgCount(args, 1)
let (blob, size) = env.getBlob args.atom.path
result = newNodeList()
result.append blob.newAtom.newNode
result.append size.newAtom.newNode
# TODO: natural number atom
proc listFunc(env: Env; args: NodeObj): NodeRef =
## Standard Lisp 'list' function.
result = newNodeList()
@ -461,10 +492,11 @@ proc bindEnv(env: Env; name: string; fun: Func) =
assert(not env.bindings.contains name)
env.bindings[name] = NodeObj(kind: nodeFunc, fun: fun, name: name)
proc newEnv(store: DagfsStore): Env =
proc newEnv(store: BlobStore): Env =
result = Env(
store: store,
bindings: initTable[string, NodeObj](),
blobs: initTable[string, tuple[id: BlobId, size: BiggestInt]](),
paths: initTable[string, FsNode](),
cids: initTable[Cid, FsNode]())
result.bindEnv "apply", applyFunc
@ -481,6 +513,7 @@ proc newEnv(store: DagfsStore): Env =
result.bindEnv "path", pathFunc
result.bindEnv "root", rootFunc
result.bindEnv "walk", walkFunc
result.bindEnv "blob", blobFunc
proc eval(ast: NodeRef; env: Env): NodeRef
@ -534,55 +567,37 @@ proc eval(ast: NodeRef; env: Env): NodeRef =
except OSError:
newNodeError(getCurrentExceptionMsg(), input)
var scripted = false
proc readLineSimple(prompt: string; line: var TaintedString): bool =
stdin.readLine(line)
when defined(genode):
import dagfsclient
proc openStore(): DagfsStore =
proc openStore(): BlobStore =
result = newDagfsClient("repl")
scripted = true # do not use linenoise for the moment
#[
for kind, key, value in getopt():
if kind == cmdShortOption and key == "s":
scripted = true
else:
quit "unhandled argument " & key
]#
else:
import ./dagfs/tcp
proc openStore(): DagfsStore =
proc openStore(): BlobStore =
var host = ""
for kind, key, value in getopt():
case kind
of cmdShortOption:
if key == "s":
scripted = true
else:
quit "unhandled argument " & key
of cmdArgument:
if host != "":
quit "only a single store path argument is accepted"
host = key
else:
quit "unhandled argument " & key
if kind == cmdShortOption:
if key == "h":
if host != "":
quit "only a single store path argument is accepted"
host = value
if host == "": host = "127.0.0.1"
try: result = newTcpClient(host)
except:
quit("failed to connect to store at $1 ($2)" % [host, getCurrentExceptionMsg()])
import rdstdin
proc readLineSimple(prompt: string; line: var TaintedString): bool =
stdin.readLine(line)
proc main() =
proc replMain() =
var scripted: bool
for kind, key, value in getopt():
if kind == cmdShortOption and key == "s":
scripted = true
let
store = openStore()
#store = openStore()
store = newFileStore("/tmp/blobs")
env = newEnv(store)
outStream = stdout.newFileStream
readLine = if scripted: readLineSimple else: readLineFromStdin
var
reader = newReader()
line = newStringOfCap 128
@ -594,5 +609,67 @@ proc main() =
outStream.write "\n"
flush outStream
proc dumpMain() =
var args = newSeq[string]()
for kind, key, val in getopt():
if kind == cmdArgument:
args.add key
if args.len > 1:
let store = newFileStore("/tmp/blobs")
for i in 1..args.high:
try:
for chunk in store.dumpBlob(args[i].toBlobId):
write(stdout, chunk)
except:
writeLine(stderr, "failed to dump '", args[i], "', ", getCurrentExceptionMsg())
quit(-1)
proc insertPath(set: BlobSet; store: BlobStore; kind: PathComponent; path: string) =
try:
case kind
of pcFile, pcLinkToFile:
let (id, size) = store.ingestFile(path)
set.insert(path, id, size)
writeLine(stdout, id, align($size, 11), " ", path)
of pcDir, pcLinkToDir:
for kind, subPath in path.walkDir:
set.insertPath(store, kind, normalizedPath subPath)
except:
let e = getCurrentException()
writeLine(stderr, "failed to ingest '", path, "', ", e.msg)
# raise e
proc ingestMain() =
var args = newSeq[string]()
for kind, key, val in getopt():
if kind == cmdArgument:
args.add key
if args.len > 1:
var set = newBlobSet()
#let store = newFileStore("/tmp/blobs")
let store = newNullStore()
for i in 1..args.high:
let path = normalizedPath args[i]
set.insertPath(store, path.getFileInfo.kind, path)
let final = store.commit set
writeLine(stdout, final.setId)
proc main() =
var cmd = ""
for kind, key, val in getopt():
if kind == cmdArgument:
cmd = key
break
case normalize(cmd)
of "":
quit("no subcommand specified")
#of "repl":
# replMain()
of "dump":
dumpMain()
of "ingest":
ingestMain()
else:
quit("no such subcommand ")
main()
quit 0 # Genode doesn't implicitly quit

301
src/blobsets.nim Normal file
View File

@ -0,0 +1,301 @@
import std/hashes, std/streams, std/strutils, std/bitops, std/unicode, std/endians
import base58/bitcoin, cbor, siphash
import ./blobsets/priv/hex
import nimcrypto, nimcrypto/blake2
const
digestLen* = 32
## Length of a chunk digest.
cidSize* = digestLen
## Size of CID object in memory
blobLeafSize* = 1 shl 14
## Size of blob leaves.
blobLeafSizeMask* = not(not(0) shl 14)
visualLen = 32 * 3
maxChunkSize* {.deprecated} = blobLeafSize
type
Blake2b256* = Blake2bContext[256]
BlobId* = MDigest[Blake2b256.bits]
## Blob Identifier
SetId* = MDigest[Blake2b256.bits]
## Set Identifier
Cid* {.deprecated} = BlobId
func `$`*(bh: BlobId): string =
## Convert a blob hash to a visual representation.
const baseRune = 0x2800
result = newString(visualLen)
var pos = 0
for b in bh.data.items:
let r = (Rune)baseRune or b.int
fastToUTF8Copy(r, result, pos, true)
func toBlobId*(s: string): BlobId =
## Parse a visual blob hash to binary.
if s.len == visualLen:
var
pos: int
r: Rune
for b in result.data.mitems:
fastRuneAt(s, pos, r, true)
b = r.byte
proc `==`*(x, y: BlobId): bool = x.data == y.data
## Compare two BlobIds.
proc `==`*(cbor: CborNode; cid: BlobId): bool =
## Compare a CBOR node with a BlobId.
if cbor.kind == cborBytes:
for i in 0..<digestLen:
if cid.data[i] != cbor.bytes[i].uint8:
return false
result = true
proc hash*(cid: BlobId): Hash =
## Reduce a BlobId into an integer for use in tables.
var zeroKey: Key
result = cast[Hash](sipHash(cid.data, zeroKey))
proc toCbor*(cid: BlobId): CborNode = newCborBytes cid.data
## Generate a CBOR representation of a BlobId.
proc toBlobId*(cbor: CborNode): BlobId =
## Generate a CBOR representation of a BlobId.
assert(cbor.bytes.len == digestLen)
for i in 0..<digestLen:
result.data[i] = cbor.bytes[i].uint8
{.deprecated: [newCborBytes: toCbor].}
proc toHex*(cid: BlobId): string = hex.encode(cid.data)
## Return BlobId encoded in hexidecimal.
proc writeUvarint*(s: Stream; n: SomeInteger) =
## Write an IPFS varint
var n = n
while true:
let c = int8(n and 0x7f)
n = n shr 7
if n == 0:
s.write((char)c.char)
break
else:
s.write((char)c or 0x80)
proc readUvarint*(s: Stream): BiggestInt =
## Read an IPFS varint
var shift: int
while shift < (9*8):
let c = (BiggestInt)s.readChar
result = result or ((c and 0x7f) shl shift)
if (c and 0x80) == 0:
break
shift.inc 7
proc toIpfs*(cid: BlobId): string =
## Return BlobId encoded in IPFS multimulti.
const
multiRaw = 0x55
multiBlake2b_256 = 0xb220
let s = newStringStream()
s.writeUvarint 1
s.writeUvarint multiRaw
s.writeUvarint multi_blake2b_256
s.writeUvarint digestLen
for e in cid.data:
s.write e
s.setPosition 0
result = 'z' & bitcoin.encode(s.readAll)
close s
const
zeroChunk* = "8ddb61928ec76e4ee904cd79ed977ab6f5d9187f1102975060a6ba6ce10e5481".toDigest
## BlobId of zero chunk of maximum size.
proc take*(cid: var BlobId; buf: var string) =
## Take a raw digest from a string buffer.
doAssert(buf.len == digestLen)
copyMem(cid.data[0].addr, buf[0].addr, digestLen)
proc dagHash*(buf: pointer; len: Natural): BlobId =
## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.
assert(len <= maxChunkSize)
var b: Blake2b256
init(b)
update(b, buf, len)
b.finish()
proc dagHash*(data: string): BlobId =
## Generate a BlobId for a string of data using the BLAKE2b hash algorithm.
assert(data.len <= maxChunkSize)
var b: Blake2b256
init(b)
update(b, data)
b.finish()
proc verify*(cid: BlobId; data: string): bool =
## Verify that a string of data corresponds to a BlobId.
var b: Blake2b256
init(b)
update(b, data)
finish(b) == cid
iterator simpleChunks*(s: Stream; size = maxChunkSize): string =
## Iterator that breaks a stream into simple chunks.
doAssert(size <= maxChunkSize)
var tmp = newString(size)
while not s.atEnd:
tmp.setLen(size)
tmp.setLen(s.readData(tmp[0].addr, size))
yield tmp
func isNonZero*(bh: BlobId): bool =
## Test if a blob hash is not zeroed.
var r: byte
for b in bh.data.items:
{.unroll.}
r = r or b
r != 0
{.deprecated: [isValid: isNonZero].}
type
Key = int64
const
keyBits = sizeof(Key) shl 3
keyChunkBits = fastLog2 keyBits
keyChunkMask = not ((not 0.Key) shl (keyChunkBits))
func toKey(s: string): Key =
var key: siphash.Key
let b = sipHash(toOpenArrayByte(s, s.low, s.high), key)
cast[Key](b)
func toCbor(k: Key): CborNode =
## Keys are endian independent.
newCborBytes cast[array[sizeof(k), byte]](k)
type
setKind* = enum hotNode, coldNode, leafNode
BlobSet* = ref BlobSetObj
BlobSetObj = object
case kind*: setKind
of hotNode:
bitmap: Key
table*: seq[BlobSet]
of coldNode:
setId*: SetId
of leafNode:
key: Key
blob: BlobId
size: BiggestInt
func newBlobSet*(): BlobSet =
BlobSet(kind: hotNode, table: newSeqOfCap[BlobSet](2))
func sparseIndex(x: Key): int = int(x and keyChunkMask)
func compactIndex(t: BlobSet; x: Key): int =
if (x and keyChunkMask) != 0:
# TODO: bug in shr and shl, cannot shift all bits out
result = (int)countSetBits(t.bitmap shl (keyBits - x.sparseIndex))
func masked(t: BlobSet; x: Key): bool =
((t.bitmap shr x.sparseIndex) and 1) != 0
func nodeCount*(bs: BlobSet): int =
## Count of internal nodes in set.
result = 1
for n in bs.table:
assert(n.kind != coldNode, "cannot count cold nodes")
if n.kind == hotNode:
result.inc n.nodeCount
func leafCount*(bs: BlobSet): int =
## Count of leaves in set.
for n in bs.table:
assert(n.kind != coldNode, "cannot count leaves of cold nodes")
if n.kind == leafNode:
result.inc 1
else:
result.inc n.leafCount
func search*(t: BlobSet; name: string): BlobId =
var
t = t
key = name.toKey
while true:
assert(key != 0, "keyspace exhausted during search")
if t.masked(key):
t = t.table[t.compactIndex(key)]
if t.kind == leafNode:
result = t.blob
break
key = key shr keyChunkBits
else:
raise newException(KeyError, "blob set does not contain key")
func insert(t, l: BlobSet; depth: int) =
## This procedure is recursive to a depth of keyBits/keyChunkBits.
doAssert(depth < (keyBits div keyChunkBits), "key space exhausted during insert")
let key = l.key shr (depth * keyChunkBits)
if t.masked(key):
let
depth = depth + 1
i = t.compactIndex(key)
case t.table[i].kind
of hotNode:
t.table[i].insert(l, depth)
of coldNode:
raiseAssert("cannot insert into cold node")
of leafNode:
if t.table[i].key == l.key:
raise newException(KeyError, "key collision in blob set")
let
subtrei = newBlobSet()
subtrei.insert(t.table[i], depth)
subtrei.insert(l, depth)
t.table[i] = subtrei
else:
t.bitmap = t.bitmap or (Key(1) shl key.sparseIndex)
t.table.insert(l, t.compactIndex(key))
func insert*(t: BlobSet; name: string; blob: BlobId; size: BiggestInt) =
## Insert a blob hash into a trie.
let leaf = BlobSet(kind: leafNode, key: name.toKey, blob: blob, size: size)
insert(t, leaf, 0)
func isEmpty*(s: BlobSet): bool = s.bitmap == Key(0)
## Test if a set is empty.
func toCbor*(x: BlobSet): CborNode =
const
nodeTag = 0
leafTag = 1
let array = newCborArray()
case x.kind
of hotNode:
var
map = x.bitmap
buf = newCborBytes(sizeof(Key))
when not sizeof(Key) == 8:
{.error: "unknown key conversion".}
bigEndian64(buf.bytes[0].addr, map.addr)
array.add buf
for y in x.table:
array.add y.toCbor
newCborTag(nodeTag, array)
of coldNode:
array.add x.setId.data
newCborTag(nodeTag, array)
of leafNode:
array.add x.key.toCbor
array.add x.blob.data
array.add x.size
newCborTag(leafTag, array)

View File

@ -1,6 +1,6 @@
import strutils, streams, tables, cbor, os, math, asyncfile, asyncdispatch
import ../dagfs, ./stores
import ../blobsets, ./stores
type EntryKey = enum
typeKey = 1,
@ -50,12 +50,11 @@ proc size*(u: FsNode): BiggestInt =
proc newFsRoot*(): FsNode =
FsNode(
cid: initCid(),
kind: dirNode,
entries: initOrderedTable[string, FsNode](8))
proc newUnixfsFile*(): FsNode =
FsNode(kind: fileNode, cid: initCid())
FsNode(kind: fileNode)
proc newUnixfsDir*(cid: Cid): FsNode =
FsNode(cid: cid, kind: dirNode)
@ -248,7 +247,7 @@ proc lookupFile*(dir: FsNode; name: string): tuple[cid: Cid, size: BiggestInt] =
result.cid = f.cid
result.size = f.size
proc addFile*(store: DagfsStore; path: string): FsNode =
proc addFile*(store: BlobStore; path: string): FsNode =
## Add a file to the store and a FsNode.
let
file = openAsync(path, fmRead)
@ -284,7 +283,7 @@ proc addFile*(store: DagfsStore; path: string): FsNode =
u.cid = store.putDag(u.toCbor)
result = u
proc addDir*(store: DagfsStore; dirPath: string): FsNode =
proc addDir*(store: BlobStore; dirPath: string): FsNode =
var dRoot = newFsRoot()
for kind, path in walkDir dirPath:
var child: FsNode
@ -300,12 +299,12 @@ proc addDir*(store: DagfsStore; dirPath: string): FsNode =
cid = store.putDag(dag)
result = newUnixfsDir(cid)
proc open*(store: DagfsStore; cid: Cid): FsNode =
proc open*(store: BlobStore; cid: Cid): FsNode =
assert cid.isValid
let raw = store.get(cid)
result = parseFs(raw, cid)
proc openDir*(store: DagfsStore; cid: Cid): FsNode =
proc openDir*(store: BlobStore; cid: Cid): FsNode =
assert cid.isValid
var raw = ""
try: store.get(cid, raw)
@ -314,7 +313,7 @@ proc openDir*(store: DagfsStore; cid: Cid): FsNode =
result = parseFs(raw, cid)
assert(result.kind == dirNode)
proc walk*(store: DagfsStore; dir: FsNode; path: string; cache = true): FsNode =
proc walk*(store: BlobStore; dir: FsNode; path: string; cache = true): FsNode =
## Walk a path down a root.
assert(dir.kind == dirNode)
result = dir
@ -336,7 +335,7 @@ proc walk*(store: DagfsStore; dir: FsNode; path: string; cache = true): FsNode =
result = next
#[
iterator fileChunks*(store: DagfsStore; file: FsNode): string =
iterator fileChunks*(store: BlobStore; file: FsNode): string =
## Iterate over the links in a file and return futures for link data.
if file.cid.isRaw:
yield store.get(file.cid)
@ -350,7 +349,7 @@ iterator fileChunks*(store: DagfsStore; file: FsNode): string =
inc i
]#
proc readBuffer*(store: DagfsStore; file: FsNode; pos: BiggestInt;
proc readBuffer*(store: BlobStore; file: FsNode; pos: BiggestInt;
buf: pointer; size: int): int =
## Read a UnixFS file into a buffer. May return zero for any failure.
assert(pos > -1)

View File

@ -1,6 +1,6 @@
import httpclient, json, base58/bitcoin, streams, cbor, tables
import ../dagfs, ./stores, ./fsnodes
import ../blobsets, ./stores, ./fsnodes
type
IpfsStore* = ref IpfsStoreObj

View File

@ -1,10 +1,73 @@
import std/bitops, std/endians
type
Blake2b* = object
hash: array[8, uint64]
offset: array[2, uint64]
buffer: array[128, uint8]
buffer_idx: uint8
hash_size: uint8
Blake2b* = object
hash: array[8, uint64]
offset: array[2, uint64]
buffer: array[128, uint8]
buffer_idx: uint8
hash_size: uint8
Blake2bParams* = object
b: array[64, byte]
Blake2sParams* = object
b: array[32, byte]
Blake2Params* = Blake2bParams | Blake2sParams
proc params(c: var Blake2b): ptr Blake2bParams =
cast[ptr Blake2bParams](c.hash.addr)
proc `digestLen=`*(p: ptr Blake2bParams; x: range[1..64]) =
p.b[0] = (uint8)x
proc `digestLen=`*(p: ptr Blake2sParams; x: range[1..32]) =
p.b[0] = (uint8)x
proc `keyLen=`*(p: ptr Blake2bParams; x: range[0..64]) =
p.b[1] = (uint8)x
proc `keyLen=`*(p: ptr Blake2sParams; x: range[0..32]) =
p.b[1] = (uint8)x
proc `fanout=`*(p: ptr Blake2Params; x: Natural) =
p.b[2] = (uint8)x
proc `depth=`*(p: ptr Blake2Params; x: Natural) =
p.b[3] = (uint8)x
proc `leafLength=`*(p: ptr Blake2Params; x: Natural) =
var x = x; littleEndian32(p.b[4].addr, x.addr)
proc `nodeOffset=`*(p: ptr Blake2bParams; x: Natural) =
var x = x; littleEndian64(p.b[8].addr, x.addr)
proc `nodeOffset=`*(p: ptr Blake2sParams; x: Natural) =
var tmp: int64
littleEndian64(tmp.addr, p.b[8].addr)
tmp = (tmp and 0xffffffff) or (x shl 32)
littleEndian64(p.b[8].addr, tmp.addr)
proc `nodeDepth=`*(p: ptr Blake2bParams; x: Natural) =
p.b[16] = (uint8)x
proc `nodeDepth=`*(p: ptr Blake2sParams; x: Natural) =
p.b[14] = (uint8)x
proc `innerLength=`*(p: ptr Blake2bParams; x: Natural) =
p.b[17] = (uint8)x
proc `innerLength=`*(p: ptr Blake2sParams; x: Natural) =
p.b[15] = (uint8)x
proc `salt=`*(p: ptr Blake2bParams; salt: pointer; len: Natural) =
copyMem(p.b[32].addr, salt, min(len, 16))
proc `salt=`*(p: ptr Blake2sParams; salt: pointer; len: Natural) =
copyMem(p.b[16].addr, salt, min(len, 8))
proc `personal=`*(p: ptr Blake2bParams; salt: pointer; len: Natural) =
copyMem(p.b[48].addr, salt, min(len, 16))
proc `personal=`*(p: ptr Blake2sParams; salt: pointer; len: Natural) =
copyMem(p.b[24].addr, salt, min(len, 8))
proc init(p: ptr Blake2Params) =
when p is Blake2bParams:
p.digestLen = 64
else:
p.digestLen = 32
p.fanout = 1
p.depth = 1
const Blake2bIV =
[ 0x6a09e667f3bcc908'u64, 0xbb67ae8584caa73b'u64,
@ -33,20 +96,17 @@ proc inc(a: var array[2, uint64], b: uint8) =
proc padding(a: var array[128, uint8], b: uint8) =
for i in b..127: a[i] = 0
proc ror64(x: uint64, n: int): uint64 {.inline.} =
result = (x shr n) or (x shl (64 - n))
proc G (v: var array[16, uint64],
a,b,c,d: int, x,y: uint64)
{.inline.} =
v[a] = v[a] + v[b] + x
v[d] = ror64(v[d] xor v[a], 32)
v[d] = rotateRightBits(v[d] xor v[a], 32)
v[c] = v[c] + v[d]
v[b] = ror64(v[b] xor v[c], 24)
v[b] = rotateRightBits(v[b] xor v[c], 24)
v[a] = v[a] + v[b] + y
v[d] = ror64(v[d] xor v[a], 16)
v[d] = rotateRightBits(v[d] xor v[a], 16)
v[c] = v[c] + v[d]
v[b] = ror64(v[b] xor v[c], 63)
v[b] = rotateRightBits(v[b] xor v[c], 63)
proc compress(c: var Blake2b, last: int = 0) =
var input, v: array[16, uint64]
@ -72,7 +132,7 @@ proc compress(c: var Blake2b, last: int = 0) =
c.buffer_idx = 0
{.push boundChecks: off.}
proc blake2b_update*(c: var Blake2b, buf: pointer, data_size: int) =
proc update*(c: var Blake2b, buf: pointer, data_size: int) =
var data = cast[ptr array[0, uint8]](buf)
for i in 0..<data_size:
if c.buffer_idx == 128:
@ -82,7 +142,7 @@ proc blake2b_update*(c: var Blake2b, buf: pointer, data_size: int) =
inc(c.buffer_idx)
{.pop.}
proc blake2b_update*(c: var Blake2b, data: cstring|string|seq|uint8, data_size: int) =
proc update*(c: var Blake2b, data: cstring|string|seq|uint8, data_size: int) =
for i in 0..<data_size:
if c.buffer_idx == 128:
inc(c.offset, c.buffer_idx)
@ -95,82 +155,36 @@ proc blake2b_update*(c: var Blake2b, data: cstring|string|seq|uint8, data_size:
c.buffer[c.buffer_idx] = data
inc(c.buffer_idx)
proc blake2b_init*(c: var Blake2b, hash_size: uint8,
key: cstring = nil, key_size: int = 0) =
assert(hash_size >= 1'u8 and hash_size <= 64'u8)
assert(key_size >= 0 and key_size <= 64)
c.hash = Blake2bIV
c.hash[0] = c.hash[0] xor 0x01010000 xor cast[uint64](key_size shl 8) xor hash_size
c.hash_size = hash_size
proc initBlake2b*(key: pointer = nil, key_size: range[0..64] = 0): Blake2b =
init(result.params)
result.hash_size = result.params.b[0]
result.params.keyLen = keySize
for i in 0..7:
result.hash[i] = Blake2bIV[i]
if key_size > 0:
blake2b_update(c, key, key_size)
padding(c.buffer, c.buffer_idx)
c.buffer_idx = 128
update(result, key, key_size)
padding(result.buffer, result.buffer_idx)
result.buffer_idx = 128
proc blake2b_final*(c: var Blake2b): seq[uint8] =
proc initBlake2b*(customize: proc(params: ptr Blake2bParams)): Blake2b =
let p = result.params
init(p)
customize(p)
result.hash_size = p.b[0]
for i in 0..7:
result.hash[0] = result.hash[0] xor Blake2bIV[i]
proc finish*(c: var Blake2b): seq[uint8] =
result = newSeq[uint8](c.hash_size)
inc(c.offset, c.buffer_idx)
padding(c.buffer, c.buffer_idx)
compress(c, 1)
for i in 0'u8..<c.hash_size:
result[i.int] = cast[uint8]((c.hash[i div 8] shr (8'u8 * (i and 7)) and 0xFF))
zeroMem(addr(c), sizeof(c))
proc `$`*(d: seq[uint8]): string =
proc `$`(d: seq[uint8]): string =
const digits = "0123456789abcdef"
result = ""
for i in 0..high(d):
add(result, digits[(d[i].int shr 4) and 0xF])
add(result, digits[d[i].int and 0xF])
proc getBlake2b*(s: string, hash_size: uint8, key: string = ""): string =
var b: Blake2b
blake2b_init(b, hash_size, cstring(key), len(key))
blake2b_update(b, s, len(s))
result = $blake2b_final(b)
when isMainModule:
import strutils, hex
proc hex2str(s: string): string =
hex.decode s
assert(getBlake2b("abc", 4, "abc") == "b8f97209")
assert(getBlake2b(nil, 4, "abc") == "8ef2d47e")
assert(getBlake2b("abc", 4) == "63906248")
assert(getBlake2b(nil, 4) == "1271cf25")
var b1, b2: Blake2b
blake2b_init(b1, 4)
blake2b_init(b2, 4)
blake2b_update(b1, 97'u8, 1)
blake2b_update(b1, 98'u8, 1)
blake2b_update(b1, 99'u8, 1)
blake2b_update(b2, @[97'u8, 98'u8, 99'u8], 3)
assert($blake2b_final(b1) == $blake2b_final(b2))
let f = open("blake2b-kat.txt", fmRead)
var
data, key, hash, r: string
b: Blake2b
while true:
try:
data = f.readLine()
data = hex2str(data[4.int..data.high])
key = f.readLine()
key = hex2str(key[5..key.high])
hash = f.readLine()
hash = hash[6..hash.high]
r = getBlake2b(data, 64, key)
assert(r == hash)
blake2b_init(b, 64, key, 64)
for i in 0..high(data):
blake2b_update(b, ($data[i]).cstring, 1)
assert($blake2b_final(b) == hash)
discard f.readLine()
except IOError: break
close(f)
echo "ok"

View File

@ -1,5 +1,5 @@
import std/streams, std/strutils, std/os, cbor
import ../dagfs, ./stores
import ../blobsets, ./stores
type
DagfsReplicator* = ref DagfsReplicatorObj

403
src/blobsets/stores.nim Normal file
View File

@ -0,0 +1,403 @@
import std/streams, std/strutils, std/os
import std/asyncfile, std/asyncdispatch
import cbor
import ../blobsets, ./priv/hex
import nimcrypto/blake2
type
MissingChunk* = ref object of CatchableError
cid*: Cid ## Missing chunk identifier
BufferTooSmall* = object of CatchableError
template raiseMissing*(cid: Cid) =
raise MissingChunk(msg: "chunk missing from store", cid: cid)
func leafCount(size: Natural): int = (size+blobLeafSize-1) div blobLeafSize
type
BlobStream* = ref BlobStreamObj
BlobStreamObj = object of RootObj
closeImpl*: proc (s: BlobStream) {.nimcall, gcsafe.}
readImpl*: proc (s: BlobStream; buffer: pointer; bufLen: int): int {.nimcall, gcsafe.}
IngestStream* = ref IngestStreamObj
IngestStreamObj = object of RootObj
finishImpl*: proc (s: IngestStream): tuple[id: BlobId, size: BiggestInt] {.nimcall, gcsafe.}
ingestImpl*: proc (s: IngestStream; buf: pointer; size: int) {.nimcall, gcsafe.}
proc close*(s: BlobStream) =
assert(not s.closeImpl.isNil)
s.closeImpl(s)
proc read*(s: BlobStream; buf: pointer; len: Natural): int =
assert(not s.readImpl.isNil)
result = s.readImpl(s, buf, len)
proc finish*(s: IngestStream): tuple[id: BlobId, size: BiggestInt] =
## Finish ingest stream
assert(not s.finishImpl.isNil)
s.finishImpl(s)
proc ingest*(s: IngestStream; buf: pointer; size: Natural) =
## Ingest stream
assert(not s.ingestImpl.isNil)
s.ingestImpl(s, buf, size)
proc ingest*(s: IngestStream; buf: var string) =
## Ingest stream
assert(not s.ingestImpl.isNil)
s.ingestImpl(s, buf[0].addr, buf.len)
type
BlobStore* = ref BlobStoreObj
BlobStoreObj* = object of RootObj
closeImpl*: proc (s: BlobStore) {.nimcall, gcsafe.}
putBufferImpl*: proc (s: BlobStore; buf: pointer; len: Natural): Cid {.nimcall, gcsafe.}
putImpl*: proc (s: BlobStore; chunk: string): Cid {.nimcall, gcsafe.}
getBufferImpl*: proc (s: BlobStore; cid: Cid; buf: pointer; len: Natural): int {.nimcall, gcsafe.}
getImpl*: proc (s: BlobStore; cid: Cid; result: var string) {.nimcall, gcsafe.}
openBlobStreamImpl*: proc (s: BlobStore; id: BlobId; size: BiggestInt): BlobStream {.nimcall, gcsafe.}
openIngestStreamImpl*: proc (s: BlobStore; size: BiggestInt): IngestStream {.nimcall, gcsafe.}
proc close*(s: BlobStore) =
## Close active store resources.
if not s.closeImpl.isNil: s.closeImpl(s)
proc putBuffer*(s: BlobStore; buf: pointer; len: Natural): Cid =
## Put a chunk into the store.
assert(0 < len and len <= maxChunkSize)
assert(not s.putBufferImpl.isNil)
s.putBufferImpl(s, buf, len)
proc put*(s: BlobStore; chunk: string): Cid =
## Place a raw block to the store. The hash argument specifies a required
## hash algorithm, or defaults to a algorithm choosen by the store
## implementation.
assert(0 < chunk.len and chunk.len <= maxChunkSize)
assert(not s.putImpl.isNil)
s.putImpl(s, chunk)
proc getBuffer*(s: BlobStore; cid: Cid; buf: pointer; len: Natural): int =
## Copy a raw block from the store into a buffer pointer.
assert(0 < len)
assert(not s.getBufferImpl.isNil)
result = s.getBufferImpl(s, cid, buf, len)
assert(0 < result)
proc get*(s: BlobStore; cid: Cid; result: var string) =
## Retrieve a raw block from the store.
assert(not s.getImpl.isNil)
s.getImpl(s, cid, result)
assert(result.len > 0)
proc openBlobStream*(s: BlobStore; id: BlobId; size = 0.BiggestInt): BlobStream =
## Return a new `BlobStream` for reading a blob.
assert(not s.openBlobStreamImpl.isNil)
s.openBlobStreamImpl(s, id, size)
proc openIngestStream*(s: BlobStore; size = 0.BiggestInt): IngestStream =
## Return a new `IngestStream` for ingesting a blob.
assert(not s.openIngestStreamImpl.isNil)
s.openIngestStreamImpl(s, size)
proc get*(s: BlobStore; cid: Cid): string =
## Retrieve a raw block from the store.
result = ""
s.get(cid, result)
proc putDag*(s: BlobStore; dag: CborNode): Cid =
## Place an Dagfs node in the store.
var raw = encode dag
s.put raw
proc getDag*(s: BlobStore; cid: Cid): CborNode =
## Retrieve an CBOR DAG from the store.
let stream = newStringStream(s.get(cid))
result = parseCbor stream
close stream
type
FileStore* = ref FileStoreObj
## A store that writes nodes and leafs as files.
FileStoreObj = object of BlobStoreObj
root, buf: string
proc parentAndFile(fs: FileStore; cid: Cid): (string, string) =
## Generate the parent path and file path of CID within the store.
let digest = hex.encode(cid.data)
result[0] = fs.root / digest[0..1]
result[1] = result[0] / digest[2..digest.high]
proc fsPutBuffer(s: BlobStore; buf: pointer; len: Natural): Cid =
var fs = FileStore(s)
result = dagHash(buf, len)
if result != zeroChunk:
let (dir, path) = fs.parentAndFile(result)
if not existsDir dir:
createDir dir
if not existsFile path:
fs.buf.setLen(len)
copyMem(addr fs.buf[0], buf, fs.buf.len)
let
tmp = fs.root / "tmp"
writeFile(tmp, fs.buf)
moveFile(tmp, path)
proc fsPut(s: BlobStore; chunk: string): Cid =
var fs = FileStore(s)
result = dagHash chunk
if result != zeroChunk:
let (dir, path) = fs.parentAndFile(result)
if not existsDir dir:
createDir dir
if not existsFile path:
let
tmp = fs.root / "tmp"
writeFile(tmp, chunk)
moveFile(tmp, path)
proc fsGetBuffer(s: BlobStore; cid: Cid; buf: pointer; len: Natural): int =
var fs = FileStore(s)
let (_, path) = fs.parentAndFile cid
if existsFile path:
let fSize = path.getFileSize
if maxChunkSize < fSize:
discard tryRemoveFile path
raiseMissing cid
if len.int64 < fSize:
raise newException(BufferTooSmall, "file is $1 bytes, buffer is $2" % [$fSize, $len])
let file = open(path, fmRead)
result = file.readBuffer(buf, len)
close file
if result == 0:
raiseMissing cid
proc fsGet(s: BlobStore; cid: Cid; result: var string) =
var fs = FileStore(s)
let (_, path) = fs.parentAndFile cid
if existsFile path:
let fSize = path.getFileSize
if fSize > maxChunkSize:
discard tryRemoveFile path
raiseMissing cid
result.setLen fSize.int
let
file = open(path, fmRead)
n = file.readChars(result, 0, result.len)
close file
doAssert(n == result.len)
else:
raiseMissing cid
func compressTree(leaves: var seq[BlobId]) =
var
ctx: Blake2b256
nodeOffset = 0
nodeDepth = 0
while leaves.len > 1:
nodeOffset = 0
inc nodeDepth
var pos, next: int
while pos < leaves.len:
ctx.init do (params: var Blake2bParams):
params.fanout = 2
params.depth = 255
params.leafLength = blobLeafSize
params.nodeOffset = nodeOffset
params.nodeDepth = nodeDepth
inc nodeOffset
ctx.update(leaves[pos].data)
inc pos
if pos < leaves.len:
ctx.update(leaves[pos].data)
inc pos
leaves[next] = ctx.finish()
inc next
leaves.setLen(next)
# TODO: BLAKE2 tree finalization flags
iterator dumpBlob*(store: BlobStore; id: BlobId): string =
var
stream = store.openBlobStream(id)
buf = newString(blobLeafSize)
defer:
close stream
while true:
buf.setLen(blobLeafSize)
let n = stream.read(buf[0].addr, buf.len)
if n == 0:
break
buf.setLen(n)
yield buf
proc ingestFile*(store: BlobStore; path: string): tuple[id: BlobId, size: BiggestInt] =
## Ingest a file and return blob metadata.
let
file = openAsync(path, fmRead)
fileSize = file.getFileSize
defer:
close file
let stream = store.openIngestStream(fileSize)
if fileSize > 0:
var buf = newString(min(blobLeafSize, fileSize))
while true:
let n = waitFor file.readBuffer(buf[0].addr, buf.len)
if n == 0: break
stream.ingest(buf[0].addr, n)
result = finish stream
proc commit*(store: BlobStore; bs: BlobSet): BlobSet =
assert(bs.kind == hotNode)
for e in bs.table.mitems:
case e.kind
of coldNode, leafNode: discard
of hotNode:
e = store.commit e
let stream = store.openIngestStream()
var buf = encode bs.toCbor
stream.ingest(buf)
let (id, _) = finish stream
result = BlobSet(kind: coldNode, setId: id)
type
FsBlobStream = ref FsBlobStreamObj
FsBlobStreamObj = object of BlobStreamObj
path: string
file: AsyncFile
NullIngestStream = ref NullIngestStreamObj
NullIngestStreamObj = object of IngestStreamObj
ctx: Blake2b256
leaves: seq[BlobId]
pos, nodeOffset: BiggestInt
FsIngestStream = ref FsIngestStreamObj
FsIngestStreamObj = object of IngestStreamObj
ctx: Blake2b256
leaves: seq[BlobId]
path: string
file: AsyncFile
pos, nodeOffset: BiggestInt
proc nullBlobClose(s: BlobStream) = discard
proc nullBlobRead(s: BlobStream; buffer: pointer; len: Natural): int = 0
proc nullOpenBlobStream(s: BlobStore; id: BlobId; size: BiggestInt): BlobStream =
BlobStream(closeImpl: nullBlobClose, readImpl: nullBlobRead)
proc fsBlobClose(s: BlobStream) =
var s = FsBlobStream(s)
close s.file
proc fsBlobRead(s: BlobStream; buffer: pointer; len: Natural): int =
var s = FsBlobStream(s)
result = waitFor s.file.readBuffer(buffer, len)
proc fsOpenBlobStream(s: BlobStore; id: BlobId; size: BiggestInt): BlobStream =
var fs = FileStore(s)
let stream = FsBlobStream()
result = stream
stream.path = fs.root / $id
stream.file = openAsync(stream.path, fmRead)
stream.closeImpl = fsBlobClose
stream.readImpl = fsBlobRead
proc fsFinish(s: IngestStream): tuple[id: BlobId, size: BiggestInt] =
var s = FsIngestStream(s)
close s.file
s.leaves.add finish(s.ctx)
compressTree(s.leaves)
result.id = s.leaves[0]
result.size = s.pos
moveFile(s.path, s.path.parentDir / $(result.id))
proc nullFinish(s: IngestStream): tuple[id: BlobId, size: BiggestInt] =
var s = NullIngestStream(s)
s.leaves.add finish(s.ctx)
compressTree(s.leaves)
result.id = s.leaves[0]
result.size = s.pos
proc nullIngest(s: IngestStream; buf: pointer; len: Natural) =
var
s = NullIngestStream(s)
off = 0
buf = cast[ptr array[blobLeafSize, byte]](buf)
while off < len:
var n = min(blobLeafSize, len-off)
let leafOff = int(s.pos and blobLeafSizeMask)
if leafOff == 0:
if s.pos > 0:
s.leaves.add finish(s.ctx)
s.ctx.init do (params: var Blake2bParams):
params.fanout = 2
params.depth = 255
params.leafLength = blobLeafSize
params.nodeOffset = s.nodeOffset
inc s.nodeOffset
else:
n = min(n, blobLeafSize-leafOff)
s.ctx.update(buf[off].addr, n)
off.inc n
s.pos.inc n
proc fsIngest(s: IngestStream; buf: pointer; len: Natural) =
var
s = FsIngestStream(s)
off = 0
buf = cast[ptr array[blobLeafSize, byte]](buf)
while off < len:
var n = min(blobLeafSize, len-off)
let leafOff = int(s.pos and blobLeafSizeMask)
if leafOff == 0:
if s.pos > 0:
s.leaves.add finish(s.ctx)
s.ctx.init do (params: var Blake2bParams):
params.fanout = 2
params.depth = 255
params.leafLength = blobLeafSize
params.nodeOffset = s.nodeOffset
inc s.nodeOffset
else:
n = min(n, blobLeafSize-leafOff)
s.ctx.update(buf[off].addr, n)
waitFor s.file.writeBuffer(buf[off].addr, n)
off.inc n
s.pos.inc n
proc nullOpenIngestStream(s: BlobStore; size: BiggestInt): IngestStream =
NullIngestStream(
finishImpl: nullFinish, ingestImpl: nullIngest, leaves: newSeq[BlobId]())
proc fsOpenIngestStream(s: BlobStore; size: BiggestInt): IngestStream =
var fs = FileStore(s)
let stream = FsIngestStream()
result = stream
stream.finishImpl = fsFinish
stream.ingestImpl = fsIngest
stream.path = fs.root / "ingest"
stream.file = openAsync(stream.path, fmWrite)
if size > 0:
stream.file.setFileSize(size)
stream.leaves = newSeqOfCap[BlobId](leafCount size)
else:
stream.leaves = newSeq[BlobId]()
proc newNullStore*(): BlobStore =
BlobStore(
openBlobStreamImpl: nullOpenBlobStream,
openIngestStreamImpl: nullOpenIngestStream)
proc newFileStore*(root: string): FileStore =
if not existsDir(root):
createDir root
new result
result.putBufferImpl = fsPutBuffer
result.putImpl = fsPut
result.getBufferImpl = fsGetBuffer
result.getImpl = fsGet
result.openBlobStreamImpl = fsOpenBlobStream
result.openIngestStreamImpl = fsOpenIngestStream
result.root = root
result.buf = ""

View File

@ -1,5 +1,5 @@
import std/asyncnet, std/asyncdispatch, std/streams
import ../dagfs, ./stores
import ../blobsets, ./stores
const
defaultPort = Port(1023)
@ -55,9 +55,9 @@ type
TcpServer* = ref TcpServerObj
TcpServerObj = object
sock: AsyncSocket
store: DagfsStore
store: BlobStore
proc newTcpServer*(store: DagfsStore; port = defaultPort): TcpServer =
proc newTcpServer*(store: BlobStore; port = defaultPort): TcpServer =
## Create a new TCP server that serves `store`.
result = TcpServer(sock: newAsyncSocket(buffered=true), store: store)
result.sock.bindAddr(port)
@ -153,11 +153,11 @@ proc close*(server: TcpServer) =
type
TcpClient* = ref TcpClientObj
TcpClientObj = object of DagfsStoreObj
TcpClientObj = object of BlobStoreObj
sock: AsyncSocket
buf: string
proc tcpClientPutBuffer(s: DagfsStore; buf: pointer; len: Natural): Cid =
proc tcpClientPutBuffer(s: BlobStore; buf: pointer; len: Natural): Cid =
var client = TcpClient(s)
result = dagHash(buf, len)
if result != zeroChunk:
@ -186,7 +186,7 @@ proc tcpClientPutBuffer(s: DagfsStore; buf: pointer; len: Natural): Cid =
else:
raiseAssert "invalid server message"
proc tcpClientPut(s: DagfsStore; chunk: string): Cid =
proc tcpClientPut(s: BlobStore; chunk: string): Cid =
var client = TcpClient(s)
result = dagHash chunk
if result != zeroChunk:
@ -215,7 +215,7 @@ proc tcpClientPut(s: DagfsStore; chunk: string): Cid =
else:
raiseAssert "invalid server message"
proc tcpClientGetBuffer(s: DagfsStore; cid: Cid; buf: pointer; len: Natural): int =
proc tcpClientGetBuffer(s: BlobStore; cid: Cid; buf: pointer; len: Natural): int =
var
client = TcpClient(s)
msg: Message
@ -242,7 +242,7 @@ proc tcpClientGetBuffer(s: DagfsStore; cid: Cid; buf: pointer; len: Natural): in
else:
raiseMissing cid
proc tcpClientGet(s: DagfsStore; cid: Cid; result: var string) =
proc tcpClientGet(s: BlobStore; cid: Cid; result: var string) =
result.setLen maxChunkSize
let n = s.getBuffer(cid, result[0].addr, result.len)
result.setLen n

View File

@ -1,153 +0,0 @@
import std/hashes, std/streams, std/strutils
import base58/bitcoin, cbor, siphash
import ./dagfs/priv/hex, ./dagfs/priv/blake2
const
maxChunkSize* = 1 shl 18
## Maximum supported chunk size.
digestLen* = 32
## Length of a chunk digest.
cidSize* = digestLen
## Size of CID object in memory
type Cid* = object
## Chunk IDentifier
digest*: array[digestLen, uint8]
proc initCid*(): Cid = Cid()
## Initialize an invalid CID.
proc isValid*(x: Cid): bool =
## Check that a CID has been properly initialized.
for c in x.digest.items:
if c != 0: return true
proc `==`*(x, y: Cid): bool =
## Compare two CIDs.
for i in 0..<digestLen:
if x.digest[i] != y.digest[i]:
return false
true
proc `==`*(cbor: CborNode; cid: Cid): bool =
## Compare a CBOR node with a CID.
if cbor.kind == cborBytes:
for i in 0..<digestLen:
if cid.digest[i] != cbor.bytes[i].uint8:
return false
result = true
proc hash*(cid: Cid): Hash =
## Reduce a CID into an integer for use in tables.
var zeroKey: Key
result = cast[Hash](sipHash(cid.digest, zeroKey))
proc toCbor*(cid: Cid): CborNode = newCborBytes cid.digest
## Generate a CBOR representation of a CID.
proc toCid*(cbor: CborNode): Cid =
## Generate a CBOR representation of a CID.
assert(cbor.bytes.len == digestLen)
for i in 0..<digestLen:
result.digest[i] = cbor.bytes[i].uint8
{.deprecated: [newCborBytes: toCbor].}
proc toHex*(cid: Cid): string = hex.encode(cid.digest)
## Return CID encoded in hexidecimal.
proc writeUvarint*(s: Stream; n: SomeInteger) =
## Write an IPFS varint
var n = n
while true:
let c = int8(n and 0x7f)
n = n shr 7
if n == 0:
s.write((char)c.char)
break
else:
s.write((char)c or 0x80)
proc readUvarint*(s: Stream): BiggestInt =
## Read an IPFS varint
var shift: int
while shift < (9*8):
let c = (BiggestInt)s.readChar
result = result or ((c and 0x7f) shl shift)
if (c and 0x80) == 0:
break
shift.inc 7
proc toIpfs*(cid: Cid): string =
## Return CID encoded in IPFS multimulti.
const
multiRaw = 0x55
multiBlake2b_256 = 0xb220
let s = newStringStream()
s.writeUvarint 1
s.writeUvarint multiRaw
s.writeUvarint multi_blake2b_256
s.writeUvarint digestLen
for e in cid.digest:
s.write e
s.setPosition 0
result = 'z' & bitcoin.encode(s.readAll)
close s
proc `$`*(cid: Cid): string = toHex cid
## Return CID in base 58, the default textual encoding.
proc parseCid*(s: string): Cid =
## Detect CID encoding and parse from a string.
var raw = parseHexStr s
if raw.len != digestLen:
raise newException(ValueError, "invalid ID length")
for i in 0..<digestLen:
result.digest[i] = raw[i].byte
const
zeroChunk* = parseCid "8ddb61928ec76e4ee904cd79ed977ab6f5d9187f1102975060a6ba6ce10e5481"
## CID of zero chunk of maximum size.
proc take*(cid: var Cid; buf: var string) =
## Take a raw digest from a string buffer.
doAssert(buf.len == digestLen)
copyMem(cid.digest[0].addr, buf[0].addr, digestLen)
proc dagHash*(buf: pointer; len: Natural): Cid =
## Generate a CID for a string of data using the BLAKE2b hash algorithm.
assert(len <= maxChunkSize)
var b: Blake2b
blake2b_init(b, digestLen, nil, 0)
blake2b_update(b, buf, len)
var s = blake2b_final(b)
copyMem(result.digest[0].addr, s[0].addr, digestLen)
proc dagHash*(data: string): Cid =
## Generate a CID for a string of data using the BLAKE2b hash algorithm.
assert(data.len <= maxChunkSize)
var b: Blake2b
blake2b_init(b, digestLen, nil, 0)
blake2b_update(b, data, data.len)
var s = blake2b_final(b)
copyMem(result.digest[0].addr, s[0].addr, digestLen)
proc verify*(cid: Cid; data: string): bool =
## Verify that a string of data corresponds to a CID.
var b: Blake2b
blake2b_init(b, digestLen, nil, 0)
blake2b_update(b, data, data.len)
let digest = blake2b_final(b)
for i in 0..<digestLen:
if cid.digest[i] != digest[i]:
return false
true
iterator simpleChunks*(s: Stream; size = maxChunkSize): string =
## Iterator that breaks a stream into simple chunks.
doAssert(size <= maxChunkSize)
var tmp = newString(size)
while not s.atEnd:
tmp.setLen(size)
tmp.setLen(s.readData(tmp[0].addr, size))
yield tmp

View File

@ -1,155 +0,0 @@
import std/streams, std/strutils, std/os
import std/asyncfile, std/asyncdispatch
import cbor
import ../dagfs, ./priv/hex
type
MissingChunk* = ref object of CatchableError
cid*: Cid ## Missing chunk identifier
BufferTooSmall* = object of CatchableError
template raiseMissing*(cid: Cid) =
raise MissingChunk(msg: "chunk missing from store", cid: cid)
type
DagfsStore* = ref DagfsStoreObj
DagfsStoreObj* = object of RootObj
closeImpl*: proc (s: DagfsStore) {.nimcall, gcsafe.}
putBufferImpl*: proc (s: DagfsStore; buf: pointer; len: Natural): Cid {.nimcall, gcsafe.}
putImpl*: proc (s: DagfsStore; chunk: string): Cid {.nimcall, gcsafe.}
getBufferImpl*: proc (s: DagfsStore; cid: Cid; buf: pointer; len: Natural): int {.nimcall, gcsafe.}
getImpl*: proc (s: DagfsStore; cid: Cid; result: var string) {.nimcall, gcsafe.}
proc close*(s: DagfsStore) =
## Close active store resources.
if not s.closeImpl.isNil: s.closeImpl(s)
proc putBuffer*(s: DagfsStore; buf: pointer; len: Natural): Cid =
## Put a chunk into the store.
assert(0 < len and len <= maxChunkSize)
assert(not s.putBufferImpl.isNil)
s.putBufferImpl(s, buf, len)
proc put*(s: DagfsStore; chunk: string): Cid =
## Place a raw block to the store. The hash argument specifies a required
## hash algorithm, or defaults to a algorithm choosen by the store
## implementation.
assert(0 < chunk.len and chunk.len <= maxChunkSize)
assert(not s.putImpl.isNil)
s.putImpl(s, chunk)
proc getBuffer*(s: DagfsStore; cid: Cid; buf: pointer; len: Natural): int =
## Copy a raw block from the store into a buffer pointer.
assert(cid.isValid)
assert(0 < len)
assert(not s.getBufferImpl.isNil)
result = s.getBufferImpl(s, cid, buf, len)
assert(0 < result)
proc get*(s: DagfsStore; cid: Cid; result: var string) =
## Retrieve a raw block from the store.
assert(not s.getImpl.isNil)
assert cid.isValid
s.getImpl(s, cid, result)
assert(result.len > 0)
proc get*(s: DagfsStore; cid: Cid): string =
## Retrieve a raw block from the store.
result = ""
s.get(cid, result)
proc putDag*(s: DagfsStore; dag: CborNode): Cid =
## Place an Dagfs node in the store.
var raw = encode dag
s.put raw
proc getDag*(s: DagfsStore; cid: Cid): CborNode =
## Retrieve an CBOR DAG from the store.
let stream = newStringStream(s.get(cid))
result = parseCbor stream
close stream
type
FileStore* = ref FileStoreObj
## A store that writes nodes and leafs as files.
FileStoreObj = object of DagfsStoreObj
root, buf: string
proc parentAndFile(fs: FileStore; cid: Cid): (string, string) =
## Generate the parent path and file path of CID within the store.
let digest = hex.encode(cid.digest)
result[0] = fs.root / digest[0..1]
result[1] = result[0] / digest[2..digest.high]
proc fsPutBuffer(s: DagfsStore; buf: pointer; len: Natural): Cid =
var fs = FileStore(s)
result = dagHash(buf, len)
if result != zeroChunk:
let (dir, path) = fs.parentAndFile(result)
if not existsDir dir:
createDir dir
if not existsFile path:
fs.buf.setLen(len)
copyMem(addr fs.buf[0], buf, fs.buf.len)
let
tmp = fs.root / "tmp"
writeFile(tmp, fs.buf)
moveFile(tmp, path)
proc fsPut(s: DagfsStore; chunk: string): Cid =
var fs = FileStore(s)
result = dagHash chunk
if result != zeroChunk:
let (dir, path) = fs.parentAndFile(result)
if not existsDir dir:
createDir dir
if not existsFile path:
let
tmp = fs.root / "tmp"
writeFile(tmp, chunk)
moveFile(tmp, path)
proc fsGetBuffer(s: DagfsStore; cid: Cid; buf: pointer; len: Natural): int =
var fs = FileStore(s)
let (_, path) = fs.parentAndFile cid
if existsFile path:
let fSize = path.getFileSize
if maxChunkSize < fSize:
discard tryRemoveFile path
raiseMissing cid
if len.int64 < fSize:
raise newException(BufferTooSmall, "file is $1 bytes, buffer is $2" % [$fSize, $len])
let file = open(path, fmRead)
result = file.readBuffer(buf, len)
close file
if result == 0:
raiseMissing cid
proc fsGet(s: DagfsStore; cid: Cid; result: var string) =
var fs = FileStore(s)
let (_, path) = fs.parentAndFile cid
if existsFile path:
let fSize = path.getFileSize
if fSize > maxChunkSize:
discard tryRemoveFile path
raiseMissing cid
result.setLen fSize.int
let
file = open(path, fmRead)
n = file.readChars(result, 0, result.len)
close file
doAssert(n == result.len)
else:
raiseMissing cid
proc newFileStore*(root: string): FileStore =
## Blocks retrieved by `get` are not hashed and verified.
if not existsDir(root):
createDir root
new result
result.putBufferImpl = fsPutBuffer
result.putImpl = fsPut
result.getBufferImpl = fsGetBuffer
result.getImpl = fsGet
result.root = root
result.buf = ""