Use Tiger tree hashes

Switch from BLAKE2B-256 tree hashing to the Tiger tree hash scheme
used by the ADC protocol. Hashes are now 192-bit (96-bit security)
and the tree leaf nodes 1 KiB.
master
Emery Hemingway 2019-06-17 08:22:54 +02:00
parent 1071e63d1c
commit b905b45525
28 changed files with 219 additions and 7331 deletions

View File

@ -1,12 +1,12 @@
# Package
version = "0.1.2"
version = "0.2.0"
author = "Emery Hemingway"
description = "Sets of named blobs"
license = "AGPLv3"
srcDir = "src"
requires "nim >= 0.18.0", "cbor >= 0.5.2", "siphash", "spryvm", "toxcore"
requires "nim >= 0.18.0", "base32", "cbor >= 0.5.2", "siphash", "tiger >= 0.2.1"
bin = @["blobset", "blobserver"]
bin = @["blobingest", "blobset", "blobserver"]
skipFiles = @["blobset.nim", "blobserver.nim"]

View File

@ -5,11 +5,18 @@ stdenv.mkDerivation {
src = ./.;
base32 = fetchFromGitHub {
owner = "ehmry";
repo = "base32.nim";
rev = "c557fd9e1d09d103f3781b6eba5fa22330579425";
sha256 = "081rlflad99f4krlsfgll6f5l09w3a2i4zgfwncxya5hn6jhfg32";
};
cbor = fetchFromGitHub {
owner = "ehmry";
repo = "nim-cbor";
rev = "v0.5.2";
sha256 = "0fh8vriad6nvjl3fxl2b62bk7y4z1hx90527a530nln5g1cj9z8f";
rev = "v0.6.0";
sha256 = "175h3vk0qccmzymlbqdwni645d7v5zz0fn6zdjsy1pd88kg5v66h";
};
siphash = fetchFromGitHub {
@ -19,13 +26,20 @@ stdenv.mkDerivation {
sha256 = "1z4xm3ckygw8pmn9b6axdk65ib1y5bgwa3v1dbxamvhjmyfr62w4";
};
tiger = fetchFromGitHub {
owner = "ehmry";
repo = "tiger";
rev = "v0.2";
sha256 = "0hn3ccmmfgkmiz5q2mvlyhgc22054i54m211ai2a9k9k37w7w2hz";
};
buildInputs = [ nim pcre ];
NIX_LDFLAGS = [ "-lpcre" ];
buildPhase = ''
HOME=$TMPDIR
nim c -p:$cbor/src -p:$siphash/src -d:release src/blobset.nim
nim c -p:$base32 -p:$cbor/src -p:$siphash/src -p:$tiger -d:release src/blobset.nim
'';
installPhase = "install -Dt $out/bin src/blobset";

View File

@ -6,19 +6,6 @@ import cbor
import ./blobsets, ./blobsets/filestores,
./blobsets/httpservers, ./blobsets/httpstores
when defined(tox):
import toxcore
# Basic Spry
when defined(spry):
import spryvm/spryvm
# Spry extra modules
import spryvm/sprycore, spryvm/spryextend, spryvm/sprymath, spryvm/spryos, spryvm/spryio,
spryvm/spryoo, spryvm/sprystring, spryvm/sprymodules, spryvm/spryreflect, spryvm/sprymemfile,
spryvm/spryblock,
./blobsets/spryblobs
import os, strutils
when defined(readLine):
import rdstdin, linenoise
@ -34,115 +21,6 @@ proc openStore(): BlobStore =
else:
newFileStore(url)
when defined(spry):
proc newSpry(store: BlobStore): Interpreter =
result = newInterpreter()
result.addCore()
result.addExtend()
result.addMath()
result.addOS()
result.addIO()
result.addOO()
result.addString()
result.addModules()
result.addReflect()
result.addMemfile()
result.addBlock()
result.addBlobSets(store)
discard result.evalRoot("""[loadFile: "blobs.sy"]""")
when defined(tox) and defined(spry):
type IngestTransfer = object
name: string
stream: IngestStream
size, pos: uint64
proc process(
t: var IngestTransfer; tox: Tox; friend: Friend; file: FileTransfer;
pos: uint64; data: pointer; size: int): bool =
## Process transfer data, return true if transfer is complete
if t.pos != pos:
cancel t.stream
tox.control(friend, file, TOX_FILE_CONTROL_CANCEL)
discard tox.send(friend, """stream is at position $# but you sent $#""" % [$t.pos, $pos])
result = true
else:
waitFor t.stream.ingest(data, size)
t.pos.inc size
if t.pos >= t.size:
let blobId, blobSize = waitFor t.stream.finish()
discard tox.send(friend, """$# $# $#""" % [t.name, $blobId, $blobSize])
result = true
type Transfer = uint64
proc `+`(friend: Friend; file: FileTransfer): Transfer =
(friend.Transfer shl 32) or file.Transfer
proc setupCallbacks(tox: Tox) =
tox.onSelfConnectionStatus do (status: TOX_CONNECTION):
echo "self status is ", status
tox.onFriendRequest do (key: PublicKey; msg: string):
echo "friend request from ", key, ", ", msg
discard tox.addFriendNoRequest(key)
block:
let
store = openStore()
spry = newSpry(store)
transfers = newTable[Transfer, IngestTransfer](16)
tox.onFriendMessage do (friend: Friend; msg: string; kind: TOX_MESSAGE_TYPE):
try:
if kind == TOX_MESSAGE_TYPE_NORMAL:
tox.typing(friend, true)
let res = spry.evalRoot("[" & msg & "]")
tox.typing(friend, false)
discard tox.send(friend, $res, TOX_MESSAGE_TYPE_ACTION)
except:
discard tox.send(friend, getCurrentExceptionMsg())
tox.onFileRecv do (friend: Friend; file: FileTransfer; kind: uint32; size: uint64; filename: string):
case kind
of (uint32)TOX_FILE_KIND_AVATAR:
tox.control(friend, file, TOX_FILE_CONTROL_CANCEL)
else:
if transfers.len > 16:
tox.control(friend, file, TOX_FILE_CONTROL_CANCEL)
discard tox.send(friend, "too many transfers are pending")
else:
let msg = """you are trying to send me "$#", type $#, size $#""" %
[filename, $kind, $size]
discard tox.send(friend, msg)
transfers[friend+file] = IngestTransfer(
name: filename,
stream: store.openIngestStream(size.BiggestInt),
size: size)
tox.control(friend, file, TOX_FILE_CONTROL_RESUME)
tox.onFileRecvChunk do (friend: Friend; file: FileTransfer; pos: uint64; data: pointer; size: int):
try:
if transfers[friend+file].process(tox, friend, file, pos, data, size):
transfers.del(friend+file)
except:
discard tox.send(friend, getCurrentExceptionMsg(), TOX_MESSAGE_TYPE_ACTION)
tox.name = "blobbot"
echo "/connect 127.0.0.1 ", tox.udpPort, " ", tox.dhtId
echo "/add ", tox.address
proc toxMain() {.async.} =
let tox = newTox do (o: Options):
o.ipv6Enabled = false
o.localDiscoveryEnabled = true
o.holePunchingEnabled = false
var saveData = ""
try: saveData = readFile "tox.save"
except: discard
if saveData != "":
o.saveData = saveData
o.saveDataType = TOX_SAVEDATA_TYPE_TOX_SAVE
writeFile "tox.save", tox.saveData
setupCallbacks(tox)
while not tox.isClosed:
iterate tox
await sleepAsync(tox.iterationInterval)
proc serverMain(): Future[void] =
let
store = newFileStore("/tmp/blobs")
@ -943,67 +821,6 @@ proc getLine(prompt: string): string =
stdout.write(prompt)
result = stdin.readline()
when defined(spry):
proc spryMain() =
let spry = newSpry(openStore())
var
lines, stashed, fileLines = newSeq[string]()
suspended: bool = true
echo "Welcome to interactive Spry!"
echo "An empty line will evaluate previous lines, so hit enter twice."
# We collect lines until an empty line is entered, easy way to enter
# multiline code.
while true:
var line: string
if suspended:
line = getLine(Prompt)
else:
if fileLines.len == 0:
quit 0
# Read a line, eh, would be nice with removeFirst or popFirst...
line = fileLines[0]
fileLines.delete(0)
# Logic for pausing
if line.strip() == "# pause":
var enter = getLine(" <enter = eval, s = suspend>")
if enter.strip() == "s":
stdout.write(" <suspended, c = continue>\n")
stashed = lines
lines = newSeq[string]()
suspended = true
continue
else:
stdout.write(line & "\n")
# Logic to start the script again
if suspended and line.strip() == "c":
lines = stashed
suspended = false
continue
# Finally time to eval
if line.strip().len() == 0:
let code = lines.join("\n")
lines = newSeq[string]()
try:
# Let the interpreter eval the code. We need to eval whatever we
# get (ispry acting as a func). The surrounding block is just because we only
# want to pass one Node.
var result = spry.evalRoot("[" & code & "]")
#discard spry.setBinding(newEvalWord("@"), result)
var output = $result
# Print any result
if output.isNil:
output = if suspended: "nil" else: ""
stdout.write(output & "\n")
except:
echo "Oops, sorry about that: " & getCurrentExceptionMsg() & "\n"
echo getStackTrace()
else:
lines.add(line)
when isMainModule:
var cmd = ""
for kind, key, val in getopt():
@ -1017,17 +834,6 @@ when isMainModule:
of "dump": dumpMain()
of "ingest": waitFor ingestMain()
of "server": waitFor serverMain()
of "spry":
when defined(spry):
spryMain()
else:
quit "not compiled with Spry interpreter"
of "check": waitFor checkMain()
of "replicate": waitFor replicateMain()
of "tox":
when defined(tox) and defined(spry):
#waitFor toxReplicateMain()
waitFor toxMain()
else:
quit "not compiled with Tox node"
else: quit("no such subcommand " & cmd)

View File

@ -1,5 +1,2 @@
# Disable this to use only primitive stdin
-d:readLine
--nilseqs:on
-d:spry
-d:tox

View File

@ -1,53 +1,41 @@
import std/asyncdispatch, std/asyncstreams
import std/hashes, std/streams, std/strutils, std/bitops, std/unicode, std/endians
import std/streams, std/strutils, std/random
import cbor, siphash
import ./blobsets/priv/hex
import base32, cbor, siphash, tiger
import ./blobsets/priv/hex, ./blobsets/priv/nimcrypto, ./blobsets/priv/nimcrypto/blake2
import std/asyncdispatch, std/asyncstreams
import std/hashes, std/streams, std/strutils, std/bitops, std/unicode, std/endians, std/random
const
digestLen* = 32
digestSize* = 24
## Length of a chunk digest.
cidSize* = digestLen
## Size of CID object in memory
blobLeafSize* = 1 shl 14
## Size of blob leaves.
blobLeafSizeMask* = not(not(0) shl 14)
blobHexLen* = 32 * 2
blobVisualLen* = 32 * 3
blobLeafSize* = 1 shl 10
## Size of blob hash leaves (THEX/ADC).
blobLeafSizeMask* = blobLeafSize - 1
blobHexLen* = digestSize * 2
blobBase32Len* = (digestSize * 5 div 3) - 1
blobVisualLen* = digestSize * 3
type
Blake2b256* = Blake2bContext[256]
BlobId* = MDigest[Blake2b256.bits]
BlobId* = TigerDigest
## Blob Identifier
SetId* = MDigest[Blake2b256.bits]
SetId* = TigerDigest
## Set Identifier
Cid* {.deprecated} = BlobId
func `$`*(bh: BlobId): string =
## Convert a blob hash to a visual representation.
const baseRune = 0x2800
result = newString(blobVisualLen)
var pos = 0
for b in bh.data.items:
let r = (Rune)baseRune or b.int
fastToUTF8Copy(r, result, pos, true)
func parseStringId[T](s: string): T =
case s.len
of blobHexLen:
hex.decode s, result.data
of blobBase32Len:
var tmp = base32.decode(s)
copyMem(result.data[0].addr, tmp[0].addr, digestSize)
of blobVisualLen:
var
pos: int
r: Rune
for b in result.data.mitems:
fastRuneAt(s, pos, r, true)
b = r.byte
b = byte(r.int and 0xff)
else:
raise newException(ValueError, "invalid blobset id encoding")
raise newException(ValueError, "invalid blobset id encoding of len " & $s.len)
func parseCborId[T](c: CborNode): T =
## Parse a CBOR node to binary.
@ -59,10 +47,6 @@ func toBlobId*(s: string): BlobId =
## Parse a visual blob hash to binary.
parseStringId[BlobId] s
func toBlobId(c: CborNode): BlobId =
## Parse a CBOR blob hash to binary.
parseCborId[BlobId] c
func toSetId*(s: string): SetId =
## Parse a visual set hash to binary.
parseStringId[SetId] s
@ -77,7 +61,7 @@ proc `==`*(x, y: BlobId): bool = x.data == y.data
proc `==`*(cbor: CborNode; cid: BlobId): bool =
## Compare a CBOR node with a BlobId.
if cbor.kind == cborBytes:
for i in 0..<digestLen:
for i in 0..<digestSize:
if cid.data[i] != cbor.bytes[i].uint8:
return false
result = true
@ -87,24 +71,34 @@ proc hash*(cid: BlobId): Hash =
var zeroKey: Key
result = cast[Hash](sipHash(cid.data, zeroKey))
proc toCbor*(cid: BlobId): CborNode = newCborBytes cid.data
proc toCbor*(id: BlobId): CborNode = newCborBytes id.data
## Generate a CBOR representation of a BlobId.
proc toBlobId*(cbor: CborNode): BlobId =
## Generate a CBOR representation of a BlobId.
assert(cbor.bytes.len == digestLen)
for i in 0..<digestLen:
assert(cbor.bytes.len == digestSize)
for i in 0..<digestSize:
result.data[i] = cbor.bytes[i].uint8
func `$`*(bh: BlobId): string =
## Convert a blob hash to a visual representation.
const baseRune = 0x2800
result = newString(blobVisualLen)
var pos = 0
for b in bh.data.items:
let r = (Rune)baseRune or b.int
fastToUTF8Copy(r, result, pos, true)
proc toHex*(id: BlobId|SetId): string = hex.encode(id.data)
## Return BlobId encoded in hexidecimal.
proc verify*(cid: BlobId; data: string): bool =
func toBase32*(bh: BlobId): string =
## Encode a blob hash into base32
base32.encode(cast[array[digestSize,char]](bh.data), pad=false)
proc verify*(id: BlobId; data: string): bool =
## Verify that a string of data corresponds to a BlobId.
var b: Blake2b256
init(b)
update(b, data)
finish(b) == cid
id == tiger(data)
func isNonZero*(bh: BlobId): bool =
## Test if a blob hash is not zeroed.
@ -208,7 +202,7 @@ proc openIngestStream*(s: BlobStore; size = 0.BiggestInt; kind = dataBlob): Inge
proc ingest*(store: BlobStore; buf: string): Future[BlobId] {.async.} =
let stream = store.openIngestStream(buf.len.BiggestInt, dataBlob)
await stream.ingest(buf[0].unsafeAddr, buf.len)
let (id, size) = await stream.finish()
let (id, _) = await stream.finish()
return id
type Key* = distinct uint64
@ -314,11 +308,15 @@ iterator dumpBlob*(store: BlobStore; id: BlobId): string =
proc loadSet(store: BlobStore; id: SetId; depth: int): Future[BlobSet] {.async.} =
assert(isNonZero id)
assert((not Key(0)) shr depth != Key(0), "loadSet trie is too deep")
var
let
stream = store.openBlobStream(id, kind=metaBlob)
buf = newString(blobLeafSize)
streamSize = stream.size
defer:
close stream
var buf = if streamSize == 0:
newString(4 shl 10)
else:
newString(stream.size)
let n = await stream.read(buf[0].addr, buf.len)
assert(n != 0, "read zero of set " & $id)
buf.setLen(n)
@ -360,7 +358,6 @@ proc randomApply*(store: BlobStore; trie: BlobSet; rng: var Rand;
f: proc(id: BlobId; size: BiggestInt)) =
## Apply to random leaf if the set is not empty.
var
retry = 0
trie = trie
i = rng.rand(max(1, countSetBits(trie.bitmap))-1)
while trie.bitmap != 0:
@ -433,27 +430,6 @@ func leafCount(bs: BlobSet): int =
else:
result.inc n.leafCount
#[
proc search*(store: BlobStore; trie: BlobSet; name: string): Future[BlobId] {.async.} =
let key = name.toKey
var
n = trie
k = key
level = 0
while k != Key(0) and n.masked(k):
let i = n.compactIndex(k)
if n.table[i].isCold:
n.table[i] = await store.load(n.table[i])
n = n.table[i]
if n.kind == leafNode:
if n.key == key:
return n.blob
break
k = k shr keyChunkBits
inc level
raise newException(KeyError, "key not in blob set")
]#
func apply(bs: BlobSet; cb: proc (leaf: BlobSet)) =
## Apply a callback to each set element.
for node in bs.table:
@ -581,45 +557,37 @@ func leafCount*(size: Natural): int = (size+blobLeafSize-1) div blobLeafSize
func compressTree*(leaves: var openArray[BlobId]) =
var
ctx: Blake2b256
nodeOffset = 0
nodeDepth = 0
ctx: TigerState
len = leaves.len
while len > 1:
nodeOffset = 0
inc nodeDepth
while 1 < len:
var pos, next: int
while pos < len:
ctx.init do (params: var Blake2bParams):
params.fanout = 2
params.depth = 255
params.leafLength = blobLeafSize
params.nodeOffset = nodeOffset
params.nodeDepth = nodeDepth
inc nodeOffset
ctx.update(leaves[pos].data)
inc pos
if pos < len:
ctx.update(leaves[pos].data)
inc pos
while pos+1 < len:
init ctx
ctx.update [1'u8]
ctx.update leaves[pos+0].data
ctx.update leaves[pos+1].data
pos.inc 2
leaves[next] = ctx.finish()
inc next
if pos < len:
leaves[next] = leaves[pos]
inc next
len = next
# TODO: BLAKE2 tree finalization flags
proc blobHash*(s: string): BlobId =
doAssert(s.len <= blobLeafSize)
func blobHash*(s: string): BlobId =
var
ctx: Blake2b256
leaves: array[1, BlobId]
ctx.init do (params: var Blake2bParams):
params.fanout = 2
params.depth = 255
params.leafLength = blobLeafSize
params.nodeOffset = 0
if s.len > 0:
ctx.update(unsafeAddr s[0], s.len)
leaves[0] = finish ctx
ctx: TigerState
leaves = newSeqOfCap[BlobId](leafCount s.len)
off: int
while true:
init ctx
ctx.update [0'u8]
let n = min(blobLeafSize, s.len - off)
if 0 < n:
ctx.update(unsafeAddr s[off], n)
off.inc n
leaves.add(finish ctx)
if off == s.len: break
compressTree(leaves)
leaves[0]
@ -651,12 +619,15 @@ proc commit*(store: BlobStore; bs: BlobSet): Future[BlobSet] {.async.} =
type
NullIngestStream = ref NullIngestStreamObj
NullIngestStreamObj = object of IngestStreamObj
ctx: Blake2b256
ctx: TigerState
leaves: seq[BlobId]
pos, nodeOffset: BiggestInt
pos: BiggestInt
proc nullBlobClose(s: BlobStream) = discard
proc nullBlobSize(s: BlobStream): BiggestInt =
discard
proc setPosNull(s: BlobStream; pos: BiggestInt) = discard
proc getPosNull(s: BlobStream): BiggestInt = discard
@ -667,13 +638,15 @@ proc nullBlobRead(s: BlobStream; buffer: pointer; len: Natural): Future[int] =
proc nullOpenBlobStream(s: BlobStore; id: BlobId; size: BiggestInt; kind: BlobKind): BlobStream =
BlobStream(
closeImpl: nullBlobClose,
sizeImpl: nullBlobSize,
setPosImpl: setPosNull,
getPosImpl: getPosNull,
readImpl: nullBlobRead)
proc nullFinish(s: IngestStream): Future[tuple[id: BlobId, size: BiggestInt]] =
var s = NullIngestStream(s)
s.leaves.add finish(s.ctx)
if s.pos == 0 or s.pos mod blobLeafSize != 0:
s.leaves.add finish(s.ctx)
compressTree(s.leaves)
var pair: tuple[id: BlobId, size: BiggestInt]
pair.id = s.leaves[0]
@ -681,34 +654,39 @@ proc nullFinish(s: IngestStream): Future[tuple[id: BlobId, size: BiggestInt]] =
result = newFuture[tuple[id: BlobId, size: BiggestInt]]()
complete result, pair
proc nullIngest(s: IngestStream; buf: pointer; len: Natural): Future[void] =
var
proc appendLeaf(s: NullIngestStream) =
s.leaves.add(finish s.ctx)
init s.ctx
s.ctx.update [0'u8]
proc nullIngest(s: IngestStream; data: pointer; size: Natural): Future[void] =
let
s = NullIngestStream(s)
off = 0
buf = cast[ptr array[blobLeafSize, byte]](buf)
while off < len:
var n = min(blobLeafSize, len-off)
let leafOff = int(s.pos and blobLeafSizeMask)
if leafOff == 0:
if s.pos > 0:
s.leaves.add finish(s.ctx)
s.ctx.init do (params: var Blake2bParams):
params.fanout = 2
params.depth = 255
params.leafLength = blobLeafSize
params.nodeOffset = s.nodeOffset
inc s.nodeOffset
else:
n = min(n, blobLeafSize-leafOff)
s.ctx.update(buf[off].addr, n)
off.inc n
s.pos.inc n
buf = cast[ptr UncheckedArray[byte]](data)
var dataOff: int
let leafOff = s.pos.int mod blobLeafSize
if leafOff != 0:
let leafFill = min(blobLeafSize - leafOff, size)
s.ctx.update(buf[0].addr, leafFill)
dataOff.inc leafFill
if leafFill < size:
appendLeaf s
while dataOff+blobLeafSize <= size:
s.ctx.update(buf[dataOff].addr, blobLeafSize)
dataOff.inc blobLeafSize
appendLeaf s
if dataOff != size:
s.ctx.update(buf[dataOff].addr, size - dataOff)
s.pos.inc size
result = newFuture[void]()
complete result
proc nullOpenIngestStream(s: BlobStore; size: BiggestInt; kind: BlobKind): IngestStream =
NullIngestStream(
let s = NullIngestStream(
finishImpl: nullFinish, ingestImpl: nullIngest, leaves: newSeq[BlobId]())
result = s
init s.ctx
s.ctx.update [0'u8]
proc newNullStore*(): BlobStore =
BlobStore(

View File

@ -1,9 +1,7 @@
import ../blobsets
import tiger
import std/asyncfile, std/asyncdispatch, std/os
import ./priv/nimcrypto/blake2
proc ingestFile*(store: BlobStore; path: string): Future[tuple[id: BlobId, size: BiggestInt]] {.async.} =
## Ingest a file and return blob metadata.
let
@ -13,7 +11,7 @@ proc ingestFile*(store: BlobStore; path: string): Future[tuple[id: BlobId, size:
close file
let stream = store.openIngestStream(fileSize, dataBlob)
if fileSize > 0:
var buf = newString(min(blobLeafSize, fileSize))
var buf = newString(min(8 shl 10, fileSize))
while true:
let n = await file.readBuffer(buf[0].addr, buf.len)
if n == 0: break
@ -28,7 +26,7 @@ type
FsIngestStream = ref FsIngestStreamObj
FsIngestStreamObj = object of IngestStreamObj
ctx: Blake2b256
ctx: TigerState
leaves: seq[BlobId]
path: string
file: AsyncFile
@ -43,6 +41,10 @@ proc fsBlobClose(s: BlobStream) =
var s = FsBlobStream(s)
close s.file
proc fsBlobSize(s: BlobStream): BiggestInt =
var s = FsBlobStream(s)
s.file.getFileSize.BiggestInt
proc setPosFs(s: BlobStream; pos: BiggestInt) =
var s = FsBlobStream(s)
s.file.setFilePos (int64)pos
@ -59,10 +61,11 @@ proc fsOpenBlobStream(s: BlobStore; id: BlobId; size: BiggestInt; kind: BlobKind
var fs = FileStore(s)
try:
let
path = fs.root / $kind / id.toHex
path = fs.root / $kind / id.toBase32
file = openAsync(path, fmRead)
result = FsBlobStream(
closeImpl: fsBlobClose,
sizeImpl: fsBlobSize,
setPosImpl: setPosFs,
getPosImpl: getPosFs,
readImpl: fsBlobRead,
@ -76,11 +79,12 @@ proc fsFinish(s: IngestStream): Future[tuple[id: BlobId, size: BiggestInt]] =
s = FsIngestStream(s)
pair: tuple[id: BlobId, size: BiggestInt]
close s.file
s.leaves.add finish(s.ctx)
if s.pos == 0 or s.pos mod blobLeafSize != 0:
s.leaves.add finish(s.ctx)
compressTree(s.leaves)
pair.id = s.leaves[0]
pair.size = s.pos
let finalPath = s.path.parentDir / pair.id.toHex
let finalPath = s.path.parentDir / pair.id.toBase32
if fileExists finalPath:
removeFile s.path
else:
@ -88,29 +92,31 @@ proc fsFinish(s: IngestStream): Future[tuple[id: BlobId, size: BiggestInt]] =
result = newFuture[tuple[id: BlobId, size: BiggestInt]]()
complete result, pair
proc fsIngest(s: IngestStream; buf: pointer; len: Natural) {.async.} =
var
proc appendLeaf(s: FsIngestStream) =
s.leaves.add(finish s.ctx)
init s.ctx
s.ctx.update [0'u8]
proc fsIngest(s: IngestStream; data: pointer; size: Natural): Future[void] =
let
s = FsIngestStream(s)
off = 0
buf = cast[ptr array[blobLeafSize, byte]](buf)
while off < len:
var n = min(blobLeafSize, len-off)
let leafOff = int(s.pos and blobLeafSizeMask)
if leafOff == 0:
if s.pos > 0:
s.leaves.add finish(s.ctx)
s.ctx.init do (params: var Blake2bParams):
params.fanout = 2
params.depth = 255
params.leafLength = blobLeafSize
params.nodeOffset = s.nodeOffset
inc s.nodeOffset
else:
n = min(n, blobLeafSize-leafOff)
s.ctx.update(buf[off].addr, n)
await s.file.writeBuffer(buf[off].addr, n)
off.inc n
s.pos.inc n
buf = cast[ptr UncheckedArray[byte]](data)
result = s.file.writeBuffer(data, size)
var dataOff: int
let leafOff = s.pos.int mod blobLeafSize
if leafOff != 0:
let leafFill = min(blobLeafSize - leafOff, size)
s.ctx.update(buf[0].addr, leafFill)
dataOff.inc leafFill
if leafFill < size:
appendLeaf s
while dataOff+blobLeafSize <= size:
s.ctx.update(buf[dataOff].addr, blobLeafSize)
dataOff.inc blobLeafSize
appendLeaf s
if dataOff != size:
s.ctx.update(buf[dataOff].addr, size - dataOff)
s.pos.inc size
proc fsOpenIngestStream(s: BlobStore; size: BiggestInt; kind: BlobKind): IngestStream =
var fs = FileStore(s)
@ -127,12 +133,14 @@ proc fsOpenIngestStream(s: BlobStore; size: BiggestInt; kind: BlobKind): IngestS
stream.leaves = newSeqOfCap[BlobId](leafCount size)
else:
stream.leaves = newSeq[BlobId]()
init stream.ctx
stream.ctx.update [0'u8]
stream
proc fsContains(s: BlobStore; id: BlobId; kind: BlobKind): Future[bool] =
var fs = FileStore(s)
result = newFuture[bool]("blobsets.filestores.fsContains")
let path = fs.root / $kind / id.toHex
let path = fs.root / $kind / id.toBase32
try:
close(openAsync(path, fmRead))
result.complete(true)

View File

@ -1,377 +0,0 @@
import strutils, streams, tables, cbor, os, math, asyncfile, asyncdispatch
import ../blobsets, ./stores
type EntryKey = enum
typeKey = 1,
dataKey = 2,
sizeKey = 3
type FsType* = enum
fsFileList = 0,
fsDirChunk = 1,
fsFileChunk = 2,
type FsKind* = enum
fileNode,
dirNode,
shallowDirChunk,
shallowFileList,
type
FileLink* = object
cid*: Cid
size*: int
FsNode* = ref object
cid: Cid
case kind*: FsKind
of fileNode:
links*: seq[FileLink]
of dirNode:
entries: OrderedTable[string, FsNode]
of shallowFileList, shallowDirChunk:
discard
size: BiggestInt
proc isRaw*(file: FsNode): bool =
(file.kind == fileNode) and (file.links.len == 0)
proc cid*(u: FsNode): Cid =
assert u.cid.isValid
u.cid
proc isFile*(u: FsNode): bool = u.kind in { fileNode, shallowFileList }
proc isDir*(u: FsNode): bool = u.kind in { dirNode, shallowDirChunk }
proc size*(u: FsNode): BiggestInt =
if u.kind == dirNode: u.entries.len.BiggestInt
else: u.size
proc newFsRoot*(): FsNode =
FsNode(
kind: dirNode,
entries: initOrderedTable[string, FsNode](8))
proc newUnixfsFile*(): FsNode =
FsNode(kind: fileNode)
proc newUnixfsDir*(cid: Cid): FsNode =
FsNode(cid: cid, kind: dirNode)
proc add*(root: var FsNode; name: string; node: FsNode) =
root.entries[name] = node
proc addDir*(root: var FsNode; name: string; cid: Cid) {.deprecated.} =
assert cid.isValid
root.add name, FsNode(kind: dirNode, cid: cid)
proc addFile*(root: var FsNode; name: string; cid: Cid; size: BiggestInt) {.deprecated.} =
assert cid.isValid
root.add name, FsNode(kind: fileNode, cid: cid, size: size)
proc del*(dir: var FsNode; name: string) =
dir.entries.del name
const
DirTag* = 0xda3c80 ## CBOR tag for UnixFS directories
FileTag* = 0xda3c81 ## CBOR tag for UnixFS files
proc isUnixfs*(bin: string): bool =
## Check if a string contains a UnixFS node
## in CBOR form.
var
s = newStringStream bin
c: CborParser
try:
c.open s
c.next
if c.kind == CborEventKind.cborTag:
result = c.tag == DirTag or c.tag == FileTag
except ValueError: discard
close s
proc toCbor*(u: FsNode): CborNode =
case u.kind
of fileNode:
let array = newCborArray()
array.seq.setLen u.links.len
for i in 0..u.links.high:
let L = newCborMap()
# typeEntry is reserved but not in use
L[dataKey.int] = u.links[i].cid.newCborBytes
L[sizeKey.int] = u.links[i].size.newCborInt
array.seq[i] = L
result = newCborTag(FileTag, array)
of dirNode:
let map = newCborMap()
for name, node in u.entries:
var entry = newCborMap()
case node.kind
of fileNode, shallowFileList:
if node.isRaw:
entry[typeKey.int] = fsFileChunk.int.newCborInt
else:
entry[typeKey.int] = fsFileList.int.newCborInt
entry[dataKey.int] = node.cid.newCborBytes
entry[sizeKey.int] = node.size.newCborInt
of dirNode:
entry[typeKey.int] = fsDirChunk.int.newCborInt
entry[dataKey.int] = node.cid.newCborBytes
entry[sizeKey.int] = node.entries.len.newCborInt
of shallowDirChunk:
entry[typeKey.int] = fsDirChunk.int.newCborInt
entry[dataKey.int] = node.cid.newCborBytes
entry[sizeKey.int] = node.size.int.newCborInt
map[name] = entry
# TODO: the CBOR maps must be sorted
result = newCborTag(DirTag, map)
else:
raiseAssert "shallow FsNodes can not be encoded"
template parseAssert(cond: bool; msg = "") =
if not cond: raise newException(
ValueError,
if msg == "": "invalid UnixFS CBOR" else: "invalid UnixFS CBOR, " & msg)
proc parseFs*(raw: string; cid: Cid): FsNode =
## Parse a string containing CBOR data into a FsNode.
new result
result.cid = cid
var
c: CborParser
buf = ""
open(c, newStringStream(raw))
next c
parseAssert(c.kind == CborEventKind.cborTag, "data not tagged")
let tag = c.tag
if tag == FileTag:
result.kind = fileNode
next c
parseAssert(c.kind == CborEventKind.cborArray, "file data not an array")
let nLinks = c.arrayLen
result.links = newSeq[FileLink](nLinks)
for i in 0..<nLinks:
next c
parseAssert(c.kind == CborEventKind.cborMap, "file array does not contain maps")
let nAttrs = c.mapLen
for _ in 1..nAttrs:
next c
parseAssert(c.kind == CborEventKind.cborPositive, "link map key not an integer")
let key = c.readInt.EntryKey
next c
case key
of typeKey:
parseAssert(false, "type file links are not supported")
of dataKey:
parseAssert(c.kind == CborEventKind.cborBytes, "CID not encoded as bytes")
c.readBytes buf
result.links[i].cid.take buf
of sizeKey:
parseAssert(c.kind == CborEventKind.cborPositive, "link size not encoded properly")
result.links[i].size = c.readInt
result.size.inc result.links[i].size
elif tag == DirTag:
result.kind = dirNode
next c
parseAssert(c.kind == CborEventKind.cborMap)
let dirLen = c.mapLen
parseAssert(dirLen != -1, raw)
result.entries = initOrderedTable[string, FsNode](dirLen.nextPowerOfTwo)
for i in 1 .. dirLen:
next c
parseAssert(c.kind == CborEventKind.cborText, raw)
c.readText buf
parseAssert(not buf.contains({ '/', '\0'}), raw)
next c
parseAssert(c.kind == CborEventKind.cborMap)
let nAttrs = c.mapLen
parseAssert(nAttrs > 1, raw)
let entry = new FsNode
result.entries[buf] = entry
for i in 1 .. nAttrs:
next c
parseAssert(c.kind == CborEventKind.cborPositive)
case c.readInt.EntryKey
of typeKey:
next c
case c.readInt.FsType
of fsFileList: entry.kind = shallowFileList
of fsDirChunk: entry.kind = shallowDirChunk
of fsFileChunk:
entry.kind = fileNode
entry.links = newSeq[FileLink](0)
of dataKey:
next c
c.readBytes buf
entry.cid.take buf
of sizeKey:
next c
entry.size = c.readInt
else:
parseAssert(false, raw)
next c
parseAssert(c.kind == cborEof, "trailing data")
proc toStream*(node: FsNode; s: Stream) =
let c = node.toCbor()
c.toStream s
iterator items*(dir: FsNode): (string, FsNode) =
assert(dir.kind == dirNode)
for k, v in dir.entries.pairs:
yield (k, v)
proc containsFile*(dir: FsNode; name: string): bool =
doAssert(dir.kind == dirNode)
dir.entries.contains name
proc `[]`*(dir: FsNode; name: string): FsNode =
if dir.kind == dirNode:
result = dir.entries.getOrDefault name
proc `[]`*(dir: FsNode; index: int): (string, FsNode) =
result[0] = ""
if dir.kind == dirNode:
var i = 0
for name, node in dir.entries.pairs:
if i == index:
result = (name, node)
break
inc i
proc lookupFile*(dir: FsNode; name: string): tuple[cid: Cid, size: BiggestInt] =
doAssert(dir.kind == dirNode)
let f = dir.entries[name]
if f.kind == fileNode:
result.cid = f.cid
result.size = f.size
proc addFile*(store: BlobStore; path: string): FsNode =
## Add a file to the store and a FsNode.
let
file = openAsync(path, fmRead)
fileSize = file.getFileSize
u = newUnixfsFile()
u.links = newSeqOfCap[FileLink](1)
var
buf = newString(min(maxChunKSize, fileSize))
pos = 0
let shortLen = fileSize mod maxChunKSize
if 0 < shortLen:
buf.setLen shortLen
# put the short chunck first
while true:
let n = waitFor file.readBuffer(buf[0].addr, buf.len)
buf.setLen n
let cid = store.put(buf)
u.links.add FileLink(cid: cid, size: buf.len)
u.size.inc buf.len
pos.inc n
if pos >= fileSize: break
buf.setLen maxChunkSize
close file
if u.size == 0:
# return the CID for a raw nothing
u.cid = dagHash("")
else:
if u.links.len == 1:
# take a shortcut use the raw chunk CID
u.cid = u.links[0].cid
u.links.setLen 0
else:
u.cid = store.putDag(u.toCbor)
result = u
proc addDir*(store: BlobStore; dirPath: string): FsNode =
var dRoot = newFsRoot()
for kind, path in walkDir dirPath:
var child: FsNode
case kind
of pcFile, pcLinkToFile:
child = store.addFile path
of pcDir, pcLinkToDir:
child = store.addDir(path)
else: continue
dRoot.add path.extractFilename, child
let
dag = dRoot.toCbor
cid = store.putDag(dag)
result = newUnixfsDir(cid)
proc open*(store: BlobStore; cid: Cid): FsNode =
assert cid.isValid
let raw = store.get(cid)
result = parseFs(raw, cid)
proc openDir*(store: BlobStore; cid: Cid): FsNode =
assert cid.isValid
var raw = ""
try: store.get(cid, raw)
except MissingChunk: raiseMissing cid
# this sucks
result = parseFs(raw, cid)
assert(result.kind == dirNode)
proc walk*(store: BlobStore; dir: FsNode; path: string; cache = true): FsNode =
## Walk a path down a root.
assert(dir.kind == dirNode)
result = dir
var raw = ""
for name in split(path, DirSep):
if name == "": continue
if result.kind == fileNode:
result = nil
break
var next = result[name]
if next.isNil:
result = nil
break
if (next.kind in {shallowFileList, shallowDirChunk}):
store.get(next.cid, raw)
next = parseFs(raw, next.cid)
if cache:
result.entries[name] = next
result = next
#[
iterator fileChunks*(store: BlobStore; file: FsNode): string =
## Iterate over the links in a file and return futures for link data.
if file.cid.isRaw:
yield store.get(file.cid)
else:
var
i = 0
chunk = ""
while i < file.links.len:
store.get(file.links[i].cid, chunk)
yield chunk
inc i
]#
proc readBuffer*(store: BlobStore; file: FsNode; pos: BiggestInt;
buf: pointer; size: int): int =
## Read a UnixFS file into a buffer. May return zero for any failure.
assert(pos > -1)
var
filePos = 0
chunk = ""
if pos < file.size:
if file.isRaw:
let pos = pos.int
store.get(file.cid, chunk)
if pos < chunk.high:
copyMem(buf, chunk[pos].addr, min(chunk.len - pos, size))
result = size
else:
for i in 0..file.links.high:
let linkSize = file.links[i].size
if filePos <= pos and pos < filePos+linkSize:
store.get(file.links[i].cid, chunk)
let
chunkPos = int(pos - filePos)
n = min(chunk.len-chunkPos, size)
copyMem(buf, chunk[chunkPos].addr, n)
result = n
break
filePos.inc linkSize

View File

@ -12,7 +12,7 @@ type
# TODO: tables must be purged periodically
rng: Rand
proc newHttpStoreServer*(backend: BlobStore; seed = 0'i64): HttpStoreServer =
proc newHttpStoreServer*(backend: BlobStore; seed = 1'i64): HttpStoreServer =
## Create a new HTTP server for a given store.
randomize()
HttpStoreServer(
@ -62,8 +62,8 @@ proc get(hss: HttpStoreServer; req: Request) {.async.} =
if range != "":
let
(startPos, endPos) = parseRange range
pos = startPos
len = endPos - startPos
pos = startPos
len = endPos - startPos
stream.pos = pos
var body = newString(len)
len = await stream.read(body[0].addr, len)

View File

@ -1,5 +1,6 @@
import std/asyncdispatch, std/httpclient, std/strutils, std/uri
import ../blobsets
import tiger
import std/asyncdispatch, std/httpclient, std/strutils, std/uri
type
HttpBlobStream = ref HttpBlobStreamObj
@ -12,7 +13,7 @@ type
HttpIngestStreamObj = object of IngestStreamObj
client: AsyncHttpClient
url: string
ctx: Blake2b256
ctx: TigerState
leaves: seq[BlobId]
leaf: string
buffOff: int
@ -27,6 +28,10 @@ type
proc httpBlobClose(s: BlobStream) = discard
proc httpBlobSize(s: BlobStream): BiggestInt =
var s = HttpBlobStream(s)
discard
proc setPosHttp(s: BlobStream; pos: BiggestInt) =
var s = (HttpBlobStream)s
s.rangePos = pos
@ -52,6 +57,7 @@ proc httpOpenBlobStream(store: BlobStore; id: BlobId; size: BiggestInt; kind: Bl
let stream = HttpBlobStream(
client: store.client,
closeImpl: httpBlobClose,
sizeImpl: httpBlobSize,
setPosImpl: setPosHttp,
getPosImpl: getPosHttp,
readImpl: httpBlobRead,

View File

@ -1,190 +0,0 @@
import std/bitops, std/endians
type
Blake2b* = object
hash: array[8, uint64]
offset: array[2, uint64]
buffer: array[128, uint8]
buffer_idx: uint8
hash_size: uint8
Blake2bParams* = object
b: array[64, byte]
Blake2sParams* = object
b: array[32, byte]
Blake2Params* = Blake2bParams | Blake2sParams
proc params(c: var Blake2b): ptr Blake2bParams =
cast[ptr Blake2bParams](c.hash.addr)
proc `digestLen=`*(p: ptr Blake2bParams; x: range[1..64]) =
p.b[0] = (uint8)x
proc `digestLen=`*(p: ptr Blake2sParams; x: range[1..32]) =
p.b[0] = (uint8)x
proc `keyLen=`*(p: ptr Blake2bParams; x: range[0..64]) =
p.b[1] = (uint8)x
proc `keyLen=`*(p: ptr Blake2sParams; x: range[0..32]) =
p.b[1] = (uint8)x
proc `fanout=`*(p: ptr Blake2Params; x: Natural) =
p.b[2] = (uint8)x
proc `depth=`*(p: ptr Blake2Params; x: Natural) =
p.b[3] = (uint8)x
proc `leafLength=`*(p: ptr Blake2Params; x: Natural) =
var x = x; littleEndian32(p.b[4].addr, x.addr)
proc `nodeOffset=`*(p: ptr Blake2bParams; x: Natural) =
var x = x; littleEndian64(p.b[8].addr, x.addr)
proc `nodeOffset=`*(p: ptr Blake2sParams; x: Natural) =
var tmp: int64
littleEndian64(tmp.addr, p.b[8].addr)
tmp = (tmp and 0xffffffff) or (x shl 32)
littleEndian64(p.b[8].addr, tmp.addr)
proc `nodeDepth=`*(p: ptr Blake2bParams; x: Natural) =
p.b[16] = (uint8)x
proc `nodeDepth=`*(p: ptr Blake2sParams; x: Natural) =
p.b[14] = (uint8)x
proc `innerLength=`*(p: ptr Blake2bParams; x: Natural) =
p.b[17] = (uint8)x
proc `innerLength=`*(p: ptr Blake2sParams; x: Natural) =
p.b[15] = (uint8)x
proc `salt=`*(p: ptr Blake2bParams; salt: pointer; len: Natural) =
copyMem(p.b[32].addr, salt, min(len, 16))
proc `salt=`*(p: ptr Blake2sParams; salt: pointer; len: Natural) =
copyMem(p.b[16].addr, salt, min(len, 8))
proc `personal=`*(p: ptr Blake2bParams; salt: pointer; len: Natural) =
copyMem(p.b[48].addr, salt, min(len, 16))
proc `personal=`*(p: ptr Blake2sParams; salt: pointer; len: Natural) =
copyMem(p.b[24].addr, salt, min(len, 8))
proc init(p: ptr Blake2Params) =
when p is Blake2bParams:
p.digestLen = 64
else:
p.digestLen = 32
p.fanout = 1
p.depth = 1
const Blake2bIV =
[ 0x6a09e667f3bcc908'u64, 0xbb67ae8584caa73b'u64,
0x3c6ef372fe94f82b'u64, 0xa54ff53a5f1d36f1'u64,
0x510e527fade682d1'u64, 0x9b05688c2b3e6c1f'u64,
0x1f83d9abfb41bd6b'u64, 0x5be0cd19137e2179'u64 ]
const Sigma = [
[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ],
[ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 ],
[ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 ],
[ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 ],
[ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 ],
[ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 ],
[ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 ],
[ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 ],
[ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 ],
[ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 ],
[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ],
[ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 ] ]
proc inc(a: var array[2, uint64], b: uint8) =
a[0] = a[0] + b
if (a[0] < b): inc(a[1])
proc padding(a: var array[128, uint8], b: uint8) =
for i in b..127: a[i] = 0
proc G (v: var array[16, uint64],
a,b,c,d: int, x,y: uint64)
{.inline.} =
v[a] = v[a] + v[b] + x
v[d] = rotateRightBits(v[d] xor v[a], 32)
v[c] = v[c] + v[d]
v[b] = rotateRightBits(v