Element Ids may be expressed through a specific struct field

This commit is contained in:
Emery Hemingway 2013-09-13 23:25:42 -04:00
parent e74029f1c2
commit 6e42d10d27
7 changed files with 309 additions and 182 deletions

View File

@ -1,9 +1,39 @@
// Copyright © 2013 Emery Hemingway
// Released under the terms of the GNU Public License version 3
// Package ebml marshals and unmarshals Go objects struct to and from
// the Exensible Binary Markup Langauge.
package ebml
import "fmt"
import (
"fmt"
"strconv"
)
// Header is a struct for encoding and decoding EBML streams.
//
// If nesting in a struct, it's field should be listed first
// and should should have an ebml tag set to 1a45dfa3.
//
// The following could be a valid top-level struct for
// representing Matroska streams:
// type Matroska struct {
// ebml.Header `ebml:"1a45dfa3"`
// Segment []Segment `ebml:"18538067"`
// }
//
// You will however need to populate field values in Header
// to form a valid EBML document.
type Header struct {
EbmlId Id `ebml:"1a45dfa3"`
EBMLVersion uint8 `ebml:"4286"`
EBMLReadVersion uint8 `ebml:"42f7"`
EBMLMaxIDLength uint8 `ebml:"42f2"`
EBMLMaxSizeLength uint8 `ebml:"42f3"`
DocType string `ebml:"4282"`
DocTypeVersion uint8 `ebml:"4287"`
DocTypeReadVersion uint8 `ebml:"4285"`
}
// Id is a type that identifies an ebml element.
type Id []byte
@ -35,6 +65,14 @@ func NewId(x uint32) Id {
return id
}
func NewIdFromString(s string) (Id, error) {
x, err := strconv.ParseUint(s, 16, 64)
if err != nil {
return nil, err
}
return NewId(uint32(x)), nil
}
// Format returns
func (id Id) Format(s fmt.State, c rune) {
if len(id) == 0 {

View File

@ -1,3 +1,6 @@
// Copyright © 2013 Emery Hemingway
// Released under the terms of the GNU Public License version 3
package ebml
import (
@ -6,7 +9,7 @@ import (
"testing"
)
func TestMatroskaEBMLHeader(t *testing.T) {
func TestMarshal(t *testing.T) {
var headerA Header
headerA.EBMLVersion = 1
headerA.EBMLReadVersion = 1
@ -16,15 +19,12 @@ func TestMatroskaEBMLHeader(t *testing.T) {
headerA.DocTypeVersion = 1
headerA.DocTypeReadVersion = 1
dst := new(bytes.Buffer)
enc := NewEncoder(dst)
err := enc.Encode(headerA)
b, err := Marshal(headerA)
if err != nil {
t.Fatal("Marshal:", err)
}
src := bytes.NewReader(dst.Bytes())
src := bytes.NewReader(b)
dec := NewDecoder(src)
var headerB Header
@ -34,6 +34,6 @@ func TestMatroskaEBMLHeader(t *testing.T) {
}
if !reflect.DeepEqual(headerA, headerB) {
t.Fatalf("Marshal -> Unmarshal: marshaled %v to %x, but unmarshaled %v", headerA, dst.Bytes(), headerB)
t.Fatalf("Marshal -> Unmarshal: marshaled %v to %x, but unmarshaled %v", headerA, b, headerB)
}
}

297
encode.go
View File

@ -1,6 +1,5 @@
// Copyright (c) 2013, Emery Hemingway. All rights reserved.
// Actully most of it comes from encoding/json, courtesy of
// The Go Authors
// Copyright © 2013 Emery Hemingway
// Released under the terms of the GNU Public License version 3
package ebml
@ -14,16 +13,78 @@ import (
"sync"
)
type encElement struct {
body []byte
elements []*encElement
reader io.Reader
type EncoderElement interface {
io.Reader
Size() int64
}
type encContainerElement struct {
id Id
size int64
sizebuf []byte
elements []EncoderElement
}
func (E *encContainerElement) Append(e EncoderElement) {
E.elements = append(E.elements, e)
E.size += e.Size()
}
func (E *encContainerElement) Read(p []byte) (n int, err error) {
var l int
if len(E.id) > 0 {
n = copy(p, E.id)
E.id = E.id[n:]
}
if E.size > 0 {
E.sizebuf = marshalSize(E.size)
E.size = 0
}
if len(E.sizebuf) > 0 {
l = copy(p[n:], E.sizebuf)
n += l
E.sizebuf = E.sizebuf[l:]
}
for i, e := range E.elements {
if e.Size() == 0 {
E.elements = E.elements[i:]
}
if len(p) == 0 {
return
}
l, err = e.Read(p[n:])
n += l
if err != nil && err != io.EOF {
return
}
}
return n, io.EOF
}
func (E *encContainerElement) Size() (n int64) { return E.size }
type encSimpleElement struct {
b []byte
}
func (e *encSimpleElement) Read(p []byte) (n int, err error) {
n = copy(p, e.b)
e.b = e.b[n:]
if len(e.b) == 0 {
err = io.EOF
}
return
}
func (e *encSimpleElement) Size() int64 {
return int64(len(e.b))
}
// An UnsupportedTypeError is returned by Marshal when attempting
// to encode an unsupported value type.
type UnsupportedTypeError struct {
Type reflect.Type
}
@ -41,12 +102,7 @@ func (e *MarshalerError) Error() string {
return "ebml: error marshaling type " + e.Type.String() + ": " + e.Err.Error()
}
type encodeState struct {
w io.Writer
elements []*encElement
}
func (es *encodeState) marshal(x interface{}) (err error) {
func marshal(id []byte, v reflect.Value) (E EncoderElement, err error) {
defer func() {
if r := recover(); r != nil {
if _, ok := r.(runtime.Error); ok {
@ -56,53 +112,10 @@ func (es *encodeState) marshal(x interface{}) (err error) {
}
}()
v := reflect.ValueOf(x)
if v.Kind() == reflect.Ptr {
v = v.Elem()
}
for _, f := range cachedTypeFields(v.Type()) {
fv := fieldByIndex(v, f.index)
if !fv.IsValid() || isEmptyValue(fv) {
continue
}
e, err := reflectValue(f.id, fv)
if err != nil {
return err
}
// TODO this append can go away and instead increase the
// es.elements capacity to the amount of cachedTypeFields,
// then have a moving index
es.elements = append(es.elements, e)
}
for _, e := range es.elements {
err = es.push(e)
if err != nil {
return
}
}
return nil
}
func (es *encodeState) push(e *encElement) (err error) {
_, err = es.w.Write(e.body)
if err != nil {
return
}
for _, se := range e.elements {
err = es.push(se)
if err != nil {
return
}
}
if e.reader != nil {
_, err = io.Copy(es.w, e.reader)
if err != nil {
return
}
}
return nil
return reflectValue(id, v)
}
func isEmptyValue(v reflect.Value) bool {
@ -115,72 +128,30 @@ func isEmptyValue(v reflect.Value) bool {
return false
}
func reflectValue(id []byte, v reflect.Value) (*encElement, error) {
func reflectValue(id []byte, v reflect.Value) (EncoderElement, error) {
if id == nil {
panic(fmt.Sprintf("nil id for value %v", v.Type()))
panic("nil id for value " + v.Type().Name())
}
/*m, ok := v.Interface().(Marshaler)
if !ok {
// v dosen't match the interface. Check against *v too.
if v.Kind() != reflect.Ptr && v.CanAddr() {
m, ok = v.Addr().Interface().(Marshaler)
if ok {
v = v.Addr()
}
}
}
if ok && (v.Kind() != reflect.Ptr || !v.IsNil()) {
r, size := m.MarshalEBML()
return &encElement{reader: r, size: size}, nil
}*/
switch v.Kind() {
case reflect.Struct:
var children []*encElement
var size int64
for _, f := range cachedTypeFields(v.Type()) {
fv := fieldByIndex(v, f.index)
if !fv.IsValid() || isEmptyValue(fv) {
continue
}
child, err := reflectValue(f.id, fv)
if err != nil {
return nil, &MarshalerError{v.Type(), err}
}
if child == nil {
continue
}
children = append(children, child)
size += child.size
}
sz := MarshalSize(size)
l := len(id) + len(sz)
b := make([]byte, l)
p := copy(b, id)
copy(b[p:], sz)
size += int64(l)
return &encElement{body: b, elements: children, size: size}, nil
return marshalStruct(id, v)
case reflect.Slice:
if v.IsNil() || v.Len() == 0 {
return nil, nil
}
var size int64
children := make([]*encElement, v.Len())
children := make([]EncoderElement, 0, v.Len())
for i := 0; i < v.Len(); i++ {
child, err := reflectValue(id, v.Index(i))
if err != nil {
return nil, &MarshalerError{v.Type(), err}
}
children[i] = child
size += child.size
}
// in the case of the slice, do not note the Id, nor marshal the size,
// slice don't represent containers, only structs do.
return &encElement{elements: children, size: size}, nil
//return &encContainerElement{id: id, elements: children}, nil
return nil, nil
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
x := v.Int()
@ -194,14 +165,92 @@ func reflectValue(id []byte, v reflect.Value) (*encElement, error) {
return marshalString(id, v.String()), nil
case reflect.Interface, reflect.Ptr:
if v.IsNil() {
return nil, nil
}
/*
m, ok := v.Interface().(Marshaler)
if !ok {
// v dosen't match the interface. Check against *v too.
if v.Kind() != reflect.Ptr && v.CanAddr() {
m, ok = v.Addr().Interface().(Marshaler)
if ok {
v = v.Addr()
}
}
}
if ok && (v.Kind() != reflect.Ptr || !v.IsNil()) {
// BAD, BAD
continue
fmt.Printf("got to this bullshit at id %x\n", id)
r, size := m.MarshalEBML()
sb := MarshalSize(size)
l := len(id) + len(sb)
header := make([]byte, l)
size += int64(l)
l = copy(header, id)
copy(header[l:], sb)
return &encElement{body: header, reader: r, size: size}, nil
}
*/
//if v.IsNil() {
// return nil, nil
//}
return reflectValue(id, v.Elem())
}
return nil, &UnsupportedTypeError{v.Type()}
}
func marshalStruct(id []byte, v reflect.Value) (EncoderElement, error) {
//fmt.Printf("at marshalStruct for id %x\n", id)
//defer fmt.Printf("exited from marshalStruct for id %x\n", id)
/*
m, ok := v.Interface().(Marshaler)
if !ok {
// v dosen't match the interface. Check against *v too.
if v.Kind() != reflect.Ptr && v.CanAddr() {
m, ok = v.Addr().Interface().(Marshaler)
if ok {
v = v.Addr()
}
}
}
if ok && (v.Kind() != reflect.Ptr || !v.IsNil()) {
// BROKEN
continue
r, n := m.MarshalEBML()
sb := MarshalSize(n)
l := len(id) + len(sb)
header := make([]byte, l)
size := int64(l) + n
l = copy(header, id)
copy(header[l:], sb)
//return &encElement{body: header, reader: &io.LimitedReader{r, n}, size: size}, nil
}
*/
E := &encContainerElement{id: id}
for _, f := range cachedTypeFields(v.Type()) {
fv := fieldByIndex(v, f.index)
if !fv.IsValid() || isEmptyValue(fv) {
continue
}
e, err := reflectValue(f.id, fv)
if err != nil {
return nil, &MarshalerError{v.Type(), err}
}
if e == nil {
continue
}
E.Append(e)
}
if len(E.elements) == 0 {
return nil, nil
}
return E, nil
}
func fieldByIndex(v reflect.Value, index []int) reflect.Value {
for _, i := range index {
if v.Kind() == reflect.Ptr {
@ -260,7 +309,7 @@ const (
// MarshalSize returns the EBML variable width representation
// of an element's size
func MarshalSize(x int64) []byte {
func marshalSize(x int64) []byte {
var s int
var m byte
@ -305,7 +354,7 @@ func MarshalSize(x int64) []byte {
return b
}
func marshalInt(id []byte, x int64) *encElement {
func marshalInt(id Id, x int64) EncoderElement {
var xl int
switch {
case x < 0x8F, x > -0x8F:
@ -333,16 +382,17 @@ func marshalInt(id []byte, x int64) *encElement {
p++
i := l - 1
b[i] = byte(x)
for i > p {
x >>= 8
b[i] = byte(x)
i--
}
return &encElement{body: b, size: int64(len(b))}
return &encSimpleElement{b}
}
func marshalUint(id []byte, x uint64) *encElement {
func marshalUint(id []byte, x uint64) EncoderElement {
var xl int
switch {
case x < 0xFF:
@ -376,18 +426,18 @@ func marshalUint(id []byte, x uint64) *encElement {
b[i] = byte(x)
i--
}
return &encElement{body: b, size: int64(len(b))}
return &encSimpleElement{b}
}
func marshalString(id []byte, s string) *encElement {
func marshalString(id []byte, s string) EncoderElement {
sb := []byte(s)
l := len(sb)
sz := MarshalSize(int64(l))
sz := marshalSize(int64(l))
b := make([]byte, len(id)+len(sz)+l)
n := copy(b, id)
n += copy(b[n:], sz)
copy(b[n:], sb)
return &encElement{body: b, size: int64(len(b))}
return &encSimpleElement{b}
}
// A field represents a single field found in a struct.
@ -426,6 +476,9 @@ func typeFields(t reflect.Type) []field {
// Scan f.typ for fields to include.
for i := 0; i < f.typ.NumField(); i++ {
sf := f.typ.Field(i)
if sf.Name == "EbmlId" {
continue
}
tag := sf.Tag.Get("ebml")
if tag == "" {
continue
@ -439,10 +492,10 @@ func typeFields(t reflect.Type) []field {
index[len(f.index)] = i
ft := sf.Type
if ft.Kind() == reflect.Ptr {
// Follow pointer
ft = ft.Elem()
}
//if ft.Kind() == reflect.Ptr {
// // Follow pointer
// ft = ft.Elem()
//}
// Record found field and index sequence.
fields = append(fields, field{id, index, ft})
@ -488,5 +541,3 @@ func cachedTypeFields(t reflect.Type) []field {
fieldCache.Unlock()
return f
}
const singletonField = 0

View File

@ -1,10 +1,32 @@
package ebml
// Copyright © 2013 Emery Hemingway
// Released under the terms of the GNU Public License version 3
import "fmt"
package ebml_test
import (
"fmt"
"git.gitorious.org/go-ebml/ebml.git"
)
func ExampleNewId() {
id := NewId(0x1a45dfa3)
id := ebml.NewId(0x1a45dfa3)
fmt.Printf("%b, %d, %x, %v", id, id, id, id)
// Output:
// 00011010.01000101.11011111.10100011, 440786851, 1a45dfa3, 1a45dfa3
}
func ExampleMarshal() {
DoDad := new(struct {
EbmlId ebml.Id `ebml:"3f0000"`
DoHickey int `ebml:"4242"`
ThingaMabob string `ebml:"4243"`
})
DoDad.DoHickey = 14
DoDad.ThingaMabob = "huzah"
b, _ := ebml.Marshal(*DoDad)
fmt.Printf("%x", b)
// Output:
// 3f00008c4242810e42438568757a6168
}

View File

@ -1,29 +0,0 @@
// Copyright (c) 2013, Emery Hemingway. All rights reserved.
// Actully most of it comes from encoding/json, courtesy of
// The Go Authors
package ebml
// Header is a struct for encoding and decoding EBML streams.
//
// If nesting in a struct, it's field should be listed first
// and should should have an ebml tag set to 1a45dfa3.
//
// The following could be a valid top-level struct for
// representing Matroska streams:
// type Matroska struct {
// ebml.Header `ebml:"1a45dfa3"`
// Segment []Segment `ebml:"18538067"`
// }
//
// You will however need to populate field values in Header
// to form a valid EBML document.
type Header struct {
EBMLVersion uint8 `ebml:"4286"`
EBMLReadVersion uint8 `ebml:"42f7"`
EBMLMaxIDLength uint8 `ebml:"42f2"`
EBMLMaxSizeLength uint8 `ebml:"42f3"`
DocType string `ebml:"4282"`
DocTypeVersion uint8 `ebml:"4287"`
DocTypeReadVersion uint8 `ebml:"4285"`
}

View File

@ -9,15 +9,15 @@ import (
func main() {
for _, s := range os.Args[1:] {
b, err := ebml.ParseId(s)
id, err := ebml.NewIdFromString(s)
if err != nil {
fmt.Println(err)
continue
}
c := b[0]
c := id[0]
var good bool
switch l := len(b); l {
switch l := len(id); l {
case 1:
good = c <= 0xff && c > 0x80
case 2:
@ -33,8 +33,8 @@ func main() {
} else {
fmt.Print("Id is bad: ")
}
fmt.Printf("% 11x ", b)
for _, c := range b {
fmt.Printf("% 11x ", id)
for _, c := range id {
fmt.Printf(".%08b", c)
}
fmt.Println()

View File

@ -1,10 +1,13 @@
// Copyright (c) 2013, Emery Hemingway.
// See the LICENSE file for terms and conditions.
// Copyright © 2013 Emery Hemingway
// Released under the terms of the GNU Public License version 3
package ebml
import (
"bytes"
"errors"
"io"
"reflect"
)
// An Encoder writes EBML data to an output stream.
@ -18,20 +21,56 @@ func NewEncoder(w io.Writer) *Encoder {
return &Encoder{w: w}
}
// Encode writes the EBML binary encoding of v to the stream.
func (enc *Encoder) Encode(v interface{}) error {
// Encode writes the EBML binary encoding of v to an Encoder stream.
func (enc *Encoder) Encode(element interface{}) (err error) {
if enc.err != nil {
return enc.err
}
e := &encodeState{w: enc.w}
err := e.marshal(v)
v := reflect.ValueOf(element)
var id Id
t := reflect.TypeOf(element)
if f, ok := t.FieldByName("EbmlId"); ok {
id = v.FieldByIndex(f.Index).Interface().(Id)
if id == nil {
id, err = NewIdFromString(f.Tag.Get("ebml"))
if err != nil {
return
}
}
} else {
return errors.New("cannot resolve EBML Id for " + t.Name())
}
E, err := marshal(id, v)
if err != nil {
return err
}
_, err = io.Copy(enc.w, E)
if err != nil {
enc.err = err
return err
}
return nil
}
// Marshal returns an EBML representation of element.
//
// Marshal first determines the Id of element from the field named 'EbmlId',
// then recursively traverses element. Any exported struct field of element
// with an `ebml` tag will be including in marshalling, with the exception
// of fields tagged with `ebml:"-"`.
//
// The ebml tag should contain a valid EBML id, see the EBML documention for
// what constitutes a valid id.
func Marshal(element interface{}) ([]byte, error) {
buf := new(bytes.Buffer)
encoder := NewEncoder(buf)
err := encoder.Encode(element)
return buf.Bytes(), err
}
// A decoder reads data from an EBML stream.
type Decoder struct {
r io.ReadSeeker
@ -60,11 +99,14 @@ func (dec *Decoder) Decode(v interface{}) error {
}
// Marshaler is the interface implemented by objects that
// can marshal themselves into an EBML stream. If the Marshaler
// is not a container it should not contain and id and size
// header. N will be the size used to compute the size of the
// element that will contain marshaler, and only n bytes will
// be read from r.
// can marshal themselves into an EBML stream. r should only
// contain element data, and not the id and size header of
// the Marshaler element. n is the length of the data in r
// and will be used to compute the size of the element above
// Marshaler. Only n bytes will be read from r.
//
// If a struct both implements Marshaler and contains ebml
// tagged fields, the fields will be ignored.
type Marshaler interface {
MarshalEBML() (r io.Reader, n int64)
}
@ -72,8 +114,11 @@ type Marshaler interface {
// Unmarshaler is the interface implemented by objects that
// can unmarshal themselves from an EBML stream. The data
// written to W will contain the data for the element being
// unmarshaled, and not an id or size header. n shall be
// the size of the data at w.
// unmarshaled, and not an id or size header. n shall be the
// size of the data at w.
//
// If a struct both implements Unmarshaler and contains ebml
// tagged fields, the fields will be ignored.
type Unmarshaler interface {
UnmarshalEBML(n int64) (w io.Writer)
}