generic base256 encoder

This commit is contained in:
Emery Hemingway 2014-10-26 04:10:58 -04:00
parent 1ceece9f5c
commit a10494da6e
7 changed files with 227 additions and 194 deletions

14
base256/README.md Normal file
View File

@ -0,0 +1,14 @@
Base256 encoding using UTF-8, configured by specifiying the
starting rune for 0x00.
Only tested with U+0800-U+FFFF.
### sha256 digest in hex
`e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855`
### sha256 digest in braille (U+2800)
`⣣⢰⣄⡂⢘⣼⠜⠔⢚⣻⣴⣈⢙⡯⢹⠤⠧⢮⡁⣤⡤⢛⢓⡌⢤⢕⢙⠛⡸⡒⢸⡕`
### sha256 digest in "Supplemental Arrows-B" (U+2900)
`⧣⦰⧄⥂⦘⧼⤜⤔⦚⧻⧴⧈⦙⥯⦹⤤⤧⦮⥁⧤⥤⦛⦓⥌⦤⦕⦙⤛⥸⥒⦸⥕`

116
base256/base256.go Normal file
View File

@ -0,0 +1,116 @@
// Copyright 2014 Emery Hemingway. All rights reserved.
// Use of this source code is governed by the GPLv3+.
// Package base256 implements radix 256
// encoding and decoding using UTF-8.
package base256
import (
"errors"
"fmt"
"unicode/utf8"
)
type Encoding []byte
var InvalidEncoding = errors.New("Invalid Encoding")
type InvalidZero rune
func (e InvalidZero) Error() string {
return fmt.Sprintf("base256: invalid zero rune %q", rune(e))
}
// NewEncoding generates a new Encoding using r as 0x00.
func NewEncoding(r rune) (Encoding, error) {
if !utf8.ValidRune(r) {
return nil, InvalidEncoding
}
b := make([]byte, utf8.UTFMax)
l := utf8.EncodeRune(b, r)
b = b[:l]
if b[2] != 0x80 {
return nil, InvalidZero(r)
}
return b, nil
}
// EncodedLen returns the length of an encoding of n source bytes.
func (e Encoding) EncodedLen(n int) int { return n * len(e) }
// Encode encodes src into EncodedLen(len(src))
// bytes of dst. As a convenience, it returns the number
// of bytes written to dst, but this value is always
// EncodedLen(len(src)).
// Encode implements Base256 encoding.
func (e Encoding) Encode(dst, src []byte) (n int) {
var i int
for i = 0; i < len(dst); i += len(e) {
copy(dst[i:], e)
}
var j = len(e) - 2
var k = len(e) - 1
for i = 0; i < len(src); i++ {
dst[j] |= (src[i] >> 6)
dst[k] |= (src[i]) & 0x3F
j += len(e)
k += len(e)
}
return len(src) * 3
}
// InvalidRuneError values describe errors resulting from an invalid byte in a Base256 string.
type InvalidRuneError rune
func (e InvalidRuneError) Error() string {
return fmt.Sprintf("base256: invalid rune: %#U", rune(e))
}
var ErrLength = errors.New("base256: bad length")
func (e Encoding) DecodedLen(x int) int { return x / len(e) }
// Decode has weak error checking, use utf8.Valid before
// decoding if corruption is a concern.
func (e Encoding) Decode(dst, src []byte) (int, error) {
if len(dst) < len(src)/3 {
return 0, ErrLength
}
var i int
for i = 0; i < len(src); i += 3 {
if src[i] != 0xE2 {
r, _ := utf8.DecodeRune(src[i:])
return 0, InvalidRuneError(r)
}
}
var j = 1
var k = 2
for i = 0; i < len(dst); i++ {
dst[i] = (src[j] << 6) | (src[k] ^ 0x80)
j += 3
k += 3
}
return len(src) / 3, nil
}
// EncodeToString returns the bralle encoding of src.
func (e Encoding) EncodeToString(src []byte) string {
dst := make([]byte, e.EncodedLen(len(src)))
e.Encode(dst, src)
return string(dst)
}
// DecodeString returns the bytes represented by the base256 string s.
func (e Encoding) DecodeString(s string) ([]byte, error) {
src := []byte(s)
dst := make([]byte, e.DecodedLen(len(src)))
_, err := e.Decode(dst, src)
if err != nil {
return nil, err
}
return dst, nil
}

71
base256/base256_test.go Normal file
View File

@ -0,0 +1,71 @@
package base256
import "testing"
func newEncT(t *testing.T) Encoding {
e, err := NewEncoding('⤀')
if err != nil {
t.Fatal(err)
}
return e
}
func newEncB(t *testing.B) Encoding {
e, err := NewEncoding('⤀')
if err != nil {
t.Fatal(err)
}
return e
}
func TestIterative(t *testing.T) {
e := newEncT(t)
var in, out byte
enc := make([]byte, e.EncodedLen(1))
dec := make([]byte, 1)
for in = 0; in < 0xFF; in++ {
dec[0] = in
e.Encode(enc, dec)
e.Decode(dec, enc)
out = dec[0]
if in != out {
t.Errorf("Wanted %08b, got %08b", in, out)
}
}
}
func BenchmarkEncode(b *testing.B) {
e := newEncB(b)
dec := make([]byte, 255)
enc := make([]byte, e.EncodedLen(255))
var c byte
for c = 0; c < 255; c++ {
dec[c] = c
}
for i := 0; i < b.N; i++ {
e.Encode(enc, dec)
b.SetBytes(255)
}
}
func BenchmarkDecode(b *testing.B) {
e := newEncB(b)
dec := make([]byte, 255)
enc := make([]byte, e.EncodedLen(255))
var c byte
for c = 0; c < 255; c++ {
dec[c] = c
}
e.Encode(enc, dec)
for i := 0; i < b.N; i++ {
e.Decode(dec, enc)
b.SetBytes(255)
}
}

26
base256/example_test.go Normal file
View File

@ -0,0 +1,26 @@
package base256_test
import (
"crypto/sha256"
"fmt"
"github.com/ehmry/encoding/base256"
)
func ExampleNewEncoding() {
h := sha256.New()
digest := h.Sum(nil)
fmt.Printf("%x\n", digest)
e, _ := base256.NewEncoding('') // U+2800 BRAILLE PATTERN BLANK
fmt.Println(e.EncodeToString(digest))
e, _ = base256.NewEncoding('⤀')
fmt.Println(e.EncodeToString(digest))
// Output:
// e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
// ⣣⢰⣄⡂⢘⣼⠜⠔⢚⣻⣴⣈⢙⡯⢹⠤⠧⢮⡁⣤⡤⢛⢓⡌⢤⢕⢙⠛⡸⡒⢸⡕
// ⧣⦰⧄⥂⦘⧼⤜⤔⦚⧻⧴⧈⦙⥯⦹⤤⤧⦮⥁⧤⥤⦛⦓⥌⦤⦕⦙⤛⥸⥒⦸⥕
}

View File

@ -1,5 +0,0 @@
# sha256 digest in hex
`da39a3ee5e6b4b0d3255bfef95601890afd80709`
# sha256 digest in braille
`⣣⢰⣄⡂⢘⣼⠜⠔⢚⣻⣴⣈⢙⡯⢹⠤⠧⢮⡁⣤⡤⢛⢓⡌⢤⢕⢙⠛⡸⡒⢸⡕`

View File

@ -1,89 +0,0 @@
// Copyright 2014 Emery Hemingway. All rights reserved.
// Use of this source code is governed by the GPLv3+.
// Package braille implements radix 256 encoding and decoding using
// the UTF-8 Braille encoding.
package braille
import (
"errors"
"fmt"
"unicode/utf8"
)
// EncodedLen returns the length of an encoding of n source bytes.
func EncodedLen(n int) int { return n * 3 }
// Encode encodes src into EncodedLen(len(src))
// bytes of dst. As a convenience, it returns the number
// of bytes written to dst, but this value is always
// EncodedLen(len(src)).
// Encode implements Braille encoding.
func Encode(dst, src []byte) (n int) {
var i int
for i = 0; i < len(dst); i += 3 {
dst[i] = 0xE2
}
var j = 1
var k = 2
for i = 0; i < len(src); i++ {
dst[j] = 0xA0 | (src[i] >> 6)
dst[k] = 0x80 | (src[i])&0x3F
j += 3
k += 3
}
return len(src) * 3
}
// InvalidRuneError values describe errors resulting from an invalid byte in a Braille string.
type InvalidRuneError rune
func (e InvalidRuneError) Error() string {
return fmt.Sprintf("braille: invalid rune: %#U", rune(e))
}
var ErrLength = errors.New("braille: bad length")
func DecodedLen(x int) int { return x / 3 }
func Decode(dst, src []byte) (int, error) {
if len(dst) < len(src)/3 {
return 0, ErrLength
}
var i int
for i = 0; i < len(src); i += 3 {
if src[i] != 0xE2 {
r, _ := utf8.DecodeRune(src[i:])
return 0, InvalidRuneError(r)
}
}
var j = 1
var k = 2
for i = 0; i < len(dst); i++ {
dst[i] = (src[j] << 6) | (src[k] ^ 0x80)
j += 3
k += 3
}
return len(src) / 3, nil
}
// EncodeToString returns the bralle encoding of src.
func EncodeToString(src []byte) string {
dst := make([]byte, EncodedLen(len(src)))
Encode(dst, src)
return string(dst)
}
// DecodeString returns the bytes represented by the braille string s.
func DecodeString(s string) ([]byte, error) {
src := []byte(s)
dst := make([]byte, DecodedLen(len(src)))
_, err := Decode(dst, src)
if err != nil {
return nil, err
}
return dst, nil
}

View File

@ -1,100 +0,0 @@
package braille
import (
"bytes"
"testing"
)
type encDecTest struct {
enc string
dec []byte
}
var encDecTests = []encDecTest{
{"", []byte{0x00}},
{"⠁", []byte{0x01}},
{"⠂", []byte{0x02}},
{"⠃", []byte{0x03}},
{"⠄", []byte{0x04}},
{"⣿", []byte{0xFF}},
{"", []byte{0x00, 0x00}},
{"⣿⣿", []byte{0xFF, 0xFF}},
{"⣿⣿", []byte{0xFF, 0xFF}},
{"", []byte{0x00, 0x00, 0x00}},
{"⣿⣿⣿⣿⣿⣿", []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}},
}
func TestEncode(t *testing.T) {
for i, test := range encDecTests {
dst := make([]byte, EncodedLen(len(test.dec)))
n := Encode(dst, test.dec)
if n != len(dst) {
t.Errorf("#%d: bad return value: got: %d want: %d", i, n, len(dst))
}
if string(dst) != test.enc {
t.Errorf("#%d: %X: got: %X want: %X - %q %q", i, test.dec, dst, test.enc, dst, test.enc)
}
}
}
func TestDecode(t *testing.T) {
for i, test := range encDecTests {
dst := make([]byte, DecodedLen(len(test.enc)))
n, err := Decode(dst, []byte(test.enc))
if err != nil {
t.Error(err)
}
if n != len(dst) {
t.Errorf("#%d: bad return value: got: %d want: %d", i, n, len(dst))
}
if !bytes.Equal(dst, test.dec) {
t.Errorf("#%d: %X: got: %X want: %X - %q %q", i, test.dec, dst, test.enc, dst, test.enc)
}
}
}
func TestIterative(t *testing.T) {
var in, out byte
enc := make([]byte, 3)
dec := make([]byte, 1)
for in = 0; in < 0xFF; in++ {
dec[0] = in
Encode(enc, dec)
Decode(dec, enc)
out = dec[0]
if in != out {
t.Errorf("Wanted %08b, got %08b", in, out)
}
}
}
func BenchmarkEncode(b *testing.B) {
dec := make([]byte, 255)
enc := make([]byte, EncodedLen(255))
var c byte
for c = 0; c < 255; c++ {
dec[c] = c
}
for i := 0; i < b.N; i++ {
Encode(enc, dec)
b.SetBytes(255)
}
}
func BenchmarkDecode(b *testing.B) {
dec := make([]byte, 255)
enc := make([]byte, EncodedLen(255))
var c byte
for c = 0; c < 255; c++ {
dec[c] = c
}
Encode(enc, dec)
for i := 0; i < b.N; i++ {
Decode(dec, enc)
b.SetBytes(255)
}
}