From 6e42d10d270853eb0beb10686752643d19c5e98f Mon Sep 17 00:00:00 2001 From: Emery Hemingway Date: Fri, 13 Sep 2013 23:25:42 -0400 Subject: [PATCH] Element Ids may be expressed through a specific struct field --- id.go => ebml.go | 40 ++++++- ebml_test.go | 14 +-- encode.go | 297 +++++++++++++++++++++++++++-------------------- example_test.go | 28 ++++- header.go | 29 ----- idtest/idtest.go | 10 +- stream.go | 73 +++++++++--- 7 files changed, 309 insertions(+), 182 deletions(-) rename id.go => ebml.go (51%) delete mode 100644 header.go diff --git a/id.go b/ebml.go similarity index 51% rename from id.go rename to ebml.go index d6b10cc..fb6be6a 100644 --- a/id.go +++ b/ebml.go @@ -1,9 +1,39 @@ // Copyright © 2013 Emery Hemingway // Released under the terms of the GNU Public License version 3 +// Package ebml marshals and unmarshals Go objects struct to and from +// the Exensible Binary Markup Langauge. package ebml -import "fmt" +import ( + "fmt" + "strconv" +) + +// Header is a struct for encoding and decoding EBML streams. +// +// If nesting in a struct, it's field should be listed first +// and should should have an ebml tag set to 1a45dfa3. +// +// The following could be a valid top-level struct for +// representing Matroska streams: +// type Matroska struct { +// ebml.Header `ebml:"1a45dfa3"` +// Segment []Segment `ebml:"18538067"` +// } +// +// You will however need to populate field values in Header +// to form a valid EBML document. +type Header struct { + EbmlId Id `ebml:"1a45dfa3"` + EBMLVersion uint8 `ebml:"4286"` + EBMLReadVersion uint8 `ebml:"42f7"` + EBMLMaxIDLength uint8 `ebml:"42f2"` + EBMLMaxSizeLength uint8 `ebml:"42f3"` + DocType string `ebml:"4282"` + DocTypeVersion uint8 `ebml:"4287"` + DocTypeReadVersion uint8 `ebml:"4285"` +} // Id is a type that identifies an ebml element. type Id []byte @@ -35,6 +65,14 @@ func NewId(x uint32) Id { return id } +func NewIdFromString(s string) (Id, error) { + x, err := strconv.ParseUint(s, 16, 64) + if err != nil { + return nil, err + } + return NewId(uint32(x)), nil +} + // Format returns func (id Id) Format(s fmt.State, c rune) { if len(id) == 0 { diff --git a/ebml_test.go b/ebml_test.go index 0e33a48..2066b39 100644 --- a/ebml_test.go +++ b/ebml_test.go @@ -1,3 +1,6 @@ +// Copyright © 2013 Emery Hemingway +// Released under the terms of the GNU Public License version 3 + package ebml import ( @@ -6,7 +9,7 @@ import ( "testing" ) -func TestMatroskaEBMLHeader(t *testing.T) { +func TestMarshal(t *testing.T) { var headerA Header headerA.EBMLVersion = 1 headerA.EBMLReadVersion = 1 @@ -16,15 +19,12 @@ func TestMatroskaEBMLHeader(t *testing.T) { headerA.DocTypeVersion = 1 headerA.DocTypeReadVersion = 1 - dst := new(bytes.Buffer) - enc := NewEncoder(dst) - - err := enc.Encode(headerA) + b, err := Marshal(headerA) if err != nil { t.Fatal("Marshal:", err) } - src := bytes.NewReader(dst.Bytes()) + src := bytes.NewReader(b) dec := NewDecoder(src) var headerB Header @@ -34,6 +34,6 @@ func TestMatroskaEBMLHeader(t *testing.T) { } if !reflect.DeepEqual(headerA, headerB) { - t.Fatalf("Marshal -> Unmarshal: marshaled %v to %x, but unmarshaled %v", headerA, dst.Bytes(), headerB) + t.Fatalf("Marshal -> Unmarshal: marshaled %v to %x, but unmarshaled %v", headerA, b, headerB) } } diff --git a/encode.go b/encode.go index 31c75d7..c6a9060 100644 --- a/encode.go +++ b/encode.go @@ -1,6 +1,5 @@ -// Copyright (c) 2013, Emery Hemingway. All rights reserved. -// Actully most of it comes from encoding/json, courtesy of -// The Go Authors +// Copyright © 2013 Emery Hemingway +// Released under the terms of the GNU Public License version 3 package ebml @@ -14,16 +13,78 @@ import ( "sync" ) -type encElement struct { - body []byte - elements []*encElement - reader io.Reader +type EncoderElement interface { + io.Reader + Size() int64 +} + +type encContainerElement struct { + id Id size int64 + sizebuf []byte + elements []EncoderElement +} + +func (E *encContainerElement) Append(e EncoderElement) { + E.elements = append(E.elements, e) + E.size += e.Size() +} + +func (E *encContainerElement) Read(p []byte) (n int, err error) { + var l int + if len(E.id) > 0 { + n = copy(p, E.id) + E.id = E.id[n:] + } + + if E.size > 0 { + E.sizebuf = marshalSize(E.size) + E.size = 0 + } + if len(E.sizebuf) > 0 { + l = copy(p[n:], E.sizebuf) + n += l + E.sizebuf = E.sizebuf[l:] + } + + for i, e := range E.elements { + if e.Size() == 0 { + E.elements = E.elements[i:] + } + if len(p) == 0 { + return + } + + l, err = e.Read(p[n:]) + n += l + if err != nil && err != io.EOF { + return + } + } + return n, io.EOF +} + +func (E *encContainerElement) Size() (n int64) { return E.size } + +type encSimpleElement struct { + b []byte +} + +func (e *encSimpleElement) Read(p []byte) (n int, err error) { + n = copy(p, e.b) + e.b = e.b[n:] + if len(e.b) == 0 { + err = io.EOF + } + return +} + +func (e *encSimpleElement) Size() int64 { + return int64(len(e.b)) } // An UnsupportedTypeError is returned by Marshal when attempting // to encode an unsupported value type. - type UnsupportedTypeError struct { Type reflect.Type } @@ -41,12 +102,7 @@ func (e *MarshalerError) Error() string { return "ebml: error marshaling type " + e.Type.String() + ": " + e.Err.Error() } -type encodeState struct { - w io.Writer - elements []*encElement -} - -func (es *encodeState) marshal(x interface{}) (err error) { +func marshal(id []byte, v reflect.Value) (E EncoderElement, err error) { defer func() { if r := recover(); r != nil { if _, ok := r.(runtime.Error); ok { @@ -56,53 +112,10 @@ func (es *encodeState) marshal(x interface{}) (err error) { } }() - v := reflect.ValueOf(x) if v.Kind() == reflect.Ptr { v = v.Elem() } - for _, f := range cachedTypeFields(v.Type()) { - fv := fieldByIndex(v, f.index) - - if !fv.IsValid() || isEmptyValue(fv) { - continue - } - - e, err := reflectValue(f.id, fv) - if err != nil { - return err - } - // TODO this append can go away and instead increase the - // es.elements capacity to the amount of cachedTypeFields, - // then have a moving index - es.elements = append(es.elements, e) - } - for _, e := range es.elements { - err = es.push(e) - if err != nil { - return - } - } - return nil -} - -func (es *encodeState) push(e *encElement) (err error) { - _, err = es.w.Write(e.body) - if err != nil { - return - } - for _, se := range e.elements { - err = es.push(se) - if err != nil { - return - } - } - if e.reader != nil { - _, err = io.Copy(es.w, e.reader) - if err != nil { - return - } - } - return nil + return reflectValue(id, v) } func isEmptyValue(v reflect.Value) bool { @@ -115,72 +128,30 @@ func isEmptyValue(v reflect.Value) bool { return false } -func reflectValue(id []byte, v reflect.Value) (*encElement, error) { +func reflectValue(id []byte, v reflect.Value) (EncoderElement, error) { if id == nil { - panic(fmt.Sprintf("nil id for value %v", v.Type())) + panic("nil id for value " + v.Type().Name()) } - - /*m, ok := v.Interface().(Marshaler) - if !ok { - // v dosen't match the interface. Check against *v too. - if v.Kind() != reflect.Ptr && v.CanAddr() { - m, ok = v.Addr().Interface().(Marshaler) - if ok { - v = v.Addr() - } - } - } - if ok && (v.Kind() != reflect.Ptr || !v.IsNil()) { - r, size := m.MarshalEBML() - return &encElement{reader: r, size: size}, nil - }*/ - switch v.Kind() { case reflect.Struct: - var children []*encElement - var size int64 - for _, f := range cachedTypeFields(v.Type()) { - fv := fieldByIndex(v, f.index) - if !fv.IsValid() || isEmptyValue(fv) { - continue - } - - child, err := reflectValue(f.id, fv) - if err != nil { - return nil, &MarshalerError{v.Type(), err} - } - if child == nil { - continue - } - children = append(children, child) - size += child.size - } - - sz := MarshalSize(size) - l := len(id) + len(sz) - b := make([]byte, l) - p := copy(b, id) - copy(b[p:], sz) - size += int64(l) - return &encElement{body: b, elements: children, size: size}, nil + return marshalStruct(id, v) case reflect.Slice: if v.IsNil() || v.Len() == 0 { return nil, nil } - var size int64 - children := make([]*encElement, v.Len()) + children := make([]EncoderElement, 0, v.Len()) for i := 0; i < v.Len(); i++ { child, err := reflectValue(id, v.Index(i)) if err != nil { return nil, &MarshalerError{v.Type(), err} } children[i] = child - size += child.size } // in the case of the slice, do not note the Id, nor marshal the size, // slice don't represent containers, only structs do. - return &encElement{elements: children, size: size}, nil + //return &encContainerElement{id: id, elements: children}, nil + return nil, nil case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: x := v.Int() @@ -194,14 +165,92 @@ func reflectValue(id []byte, v reflect.Value) (*encElement, error) { return marshalString(id, v.String()), nil case reflect.Interface, reflect.Ptr: - if v.IsNil() { - return nil, nil - } + /* + m, ok := v.Interface().(Marshaler) + if !ok { + // v dosen't match the interface. Check against *v too. + if v.Kind() != reflect.Ptr && v.CanAddr() { + m, ok = v.Addr().Interface().(Marshaler) + if ok { + v = v.Addr() + } + } + } + if ok && (v.Kind() != reflect.Ptr || !v.IsNil()) { + // BAD, BAD + continue + fmt.Printf("got to this bullshit at id %x\n", id) + r, size := m.MarshalEBML() + sb := MarshalSize(size) + l := len(id) + len(sb) + header := make([]byte, l) + size += int64(l) + + l = copy(header, id) + copy(header[l:], sb) + return &encElement{body: header, reader: r, size: size}, nil + } + */ + + //if v.IsNil() { + // return nil, nil + //} return reflectValue(id, v.Elem()) } return nil, &UnsupportedTypeError{v.Type()} } +func marshalStruct(id []byte, v reflect.Value) (EncoderElement, error) { + //fmt.Printf("at marshalStruct for id %x\n", id) + //defer fmt.Printf("exited from marshalStruct for id %x\n", id) + /* + m, ok := v.Interface().(Marshaler) + if !ok { + // v dosen't match the interface. Check against *v too. + if v.Kind() != reflect.Ptr && v.CanAddr() { + m, ok = v.Addr().Interface().(Marshaler) + if ok { + v = v.Addr() + } + } + } + if ok && (v.Kind() != reflect.Ptr || !v.IsNil()) { + // BROKEN + continue + + r, n := m.MarshalEBML() + sb := MarshalSize(n) + l := len(id) + len(sb) + header := make([]byte, l) + size := int64(l) + n + + l = copy(header, id) + copy(header[l:], sb) + //return &encElement{body: header, reader: &io.LimitedReader{r, n}, size: size}, nil + } + */ + + E := &encContainerElement{id: id} + for _, f := range cachedTypeFields(v.Type()) { + fv := fieldByIndex(v, f.index) + if !fv.IsValid() || isEmptyValue(fv) { + continue + } + e, err := reflectValue(f.id, fv) + if err != nil { + return nil, &MarshalerError{v.Type(), err} + } + if e == nil { + continue + } + E.Append(e) + } + if len(E.elements) == 0 { + return nil, nil + } + return E, nil +} + func fieldByIndex(v reflect.Value, index []int) reflect.Value { for _, i := range index { if v.Kind() == reflect.Ptr { @@ -260,7 +309,7 @@ const ( // MarshalSize returns the EBML variable width representation // of an element's size -func MarshalSize(x int64) []byte { +func marshalSize(x int64) []byte { var s int var m byte @@ -305,7 +354,7 @@ func MarshalSize(x int64) []byte { return b } -func marshalInt(id []byte, x int64) *encElement { +func marshalInt(id Id, x int64) EncoderElement { var xl int switch { case x < 0x8F, x > -0x8F: @@ -333,16 +382,17 @@ func marshalInt(id []byte, x int64) *encElement { p++ i := l - 1 + b[i] = byte(x) for i > p { x >>= 8 b[i] = byte(x) i-- } - return &encElement{body: b, size: int64(len(b))} + return &encSimpleElement{b} } -func marshalUint(id []byte, x uint64) *encElement { +func marshalUint(id []byte, x uint64) EncoderElement { var xl int switch { case x < 0xFF: @@ -376,18 +426,18 @@ func marshalUint(id []byte, x uint64) *encElement { b[i] = byte(x) i-- } - return &encElement{body: b, size: int64(len(b))} + return &encSimpleElement{b} } -func marshalString(id []byte, s string) *encElement { +func marshalString(id []byte, s string) EncoderElement { sb := []byte(s) l := len(sb) - sz := MarshalSize(int64(l)) + sz := marshalSize(int64(l)) b := make([]byte, len(id)+len(sz)+l) n := copy(b, id) n += copy(b[n:], sz) copy(b[n:], sb) - return &encElement{body: b, size: int64(len(b))} + return &encSimpleElement{b} } // A field represents a single field found in a struct. @@ -426,6 +476,9 @@ func typeFields(t reflect.Type) []field { // Scan f.typ for fields to include. for i := 0; i < f.typ.NumField(); i++ { sf := f.typ.Field(i) + if sf.Name == "EbmlId" { + continue + } tag := sf.Tag.Get("ebml") if tag == "" { continue @@ -439,10 +492,10 @@ func typeFields(t reflect.Type) []field { index[len(f.index)] = i ft := sf.Type - if ft.Kind() == reflect.Ptr { - // Follow pointer - ft = ft.Elem() - } + //if ft.Kind() == reflect.Ptr { + // // Follow pointer + // ft = ft.Elem() + //} // Record found field and index sequence. fields = append(fields, field{id, index, ft}) @@ -488,5 +541,3 @@ func cachedTypeFields(t reflect.Type) []field { fieldCache.Unlock() return f } - -const singletonField = 0 diff --git a/example_test.go b/example_test.go index e563a07..7995820 100644 --- a/example_test.go +++ b/example_test.go @@ -1,10 +1,32 @@ -package ebml +// Copyright © 2013 Emery Hemingway +// Released under the terms of the GNU Public License version 3 -import "fmt" +package ebml_test + +import ( + "fmt" + "git.gitorious.org/go-ebml/ebml.git" +) func ExampleNewId() { - id := NewId(0x1a45dfa3) + id := ebml.NewId(0x1a45dfa3) fmt.Printf("%b, %d, %x, %v", id, id, id, id) // Output: // 00011010.01000101.11011111.10100011, 440786851, 1a45dfa3, 1a45dfa3 } + +func ExampleMarshal() { + DoDad := new(struct { + EbmlId ebml.Id `ebml:"3f0000"` + DoHickey int `ebml:"4242"` + ThingaMabob string `ebml:"4243"` + }) + + DoDad.DoHickey = 14 + DoDad.ThingaMabob = "huzah" + + b, _ := ebml.Marshal(*DoDad) + fmt.Printf("%x", b) + // Output: + // 3f00008c4242810e42438568757a6168 +} diff --git a/header.go b/header.go deleted file mode 100644 index cf7b970..0000000 --- a/header.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2013, Emery Hemingway. All rights reserved. -// Actully most of it comes from encoding/json, courtesy of -// The Go Authors - -package ebml - -// Header is a struct for encoding and decoding EBML streams. -// -// If nesting in a struct, it's field should be listed first -// and should should have an ebml tag set to 1a45dfa3. -// -// The following could be a valid top-level struct for -// representing Matroska streams: -// type Matroska struct { -// ebml.Header `ebml:"1a45dfa3"` -// Segment []Segment `ebml:"18538067"` -// } -// -// You will however need to populate field values in Header -// to form a valid EBML document. -type Header struct { - EBMLVersion uint8 `ebml:"4286"` - EBMLReadVersion uint8 `ebml:"42f7"` - EBMLMaxIDLength uint8 `ebml:"42f2"` - EBMLMaxSizeLength uint8 `ebml:"42f3"` - DocType string `ebml:"4282"` - DocTypeVersion uint8 `ebml:"4287"` - DocTypeReadVersion uint8 `ebml:"4285"` -} diff --git a/idtest/idtest.go b/idtest/idtest.go index 03e076d..5f9f5d2 100644 --- a/idtest/idtest.go +++ b/idtest/idtest.go @@ -9,15 +9,15 @@ import ( func main() { for _, s := range os.Args[1:] { - b, err := ebml.ParseId(s) + id, err := ebml.NewIdFromString(s) if err != nil { fmt.Println(err) continue } - c := b[0] + c := id[0] var good bool - switch l := len(b); l { + switch l := len(id); l { case 1: good = c <= 0xff && c > 0x80 case 2: @@ -33,8 +33,8 @@ func main() { } else { fmt.Print("Id is bad: ") } - fmt.Printf("% 11x ", b) - for _, c := range b { + fmt.Printf("% 11x ", id) + for _, c := range id { fmt.Printf(".%08b", c) } fmt.Println() diff --git a/stream.go b/stream.go index dd873cf..ff367d0 100644 --- a/stream.go +++ b/stream.go @@ -1,10 +1,13 @@ -// Copyright (c) 2013, Emery Hemingway. -// See the LICENSE file for terms and conditions. +// Copyright © 2013 Emery Hemingway +// Released under the terms of the GNU Public License version 3 package ebml import ( + "bytes" + "errors" "io" + "reflect" ) // An Encoder writes EBML data to an output stream. @@ -18,20 +21,56 @@ func NewEncoder(w io.Writer) *Encoder { return &Encoder{w: w} } -// Encode writes the EBML binary encoding of v to the stream. -func (enc *Encoder) Encode(v interface{}) error { +// Encode writes the EBML binary encoding of v to an Encoder stream. +func (enc *Encoder) Encode(element interface{}) (err error) { if enc.err != nil { return enc.err } - e := &encodeState{w: enc.w} - err := e.marshal(v) + + v := reflect.ValueOf(element) + + var id Id + t := reflect.TypeOf(element) + if f, ok := t.FieldByName("EbmlId"); ok { + id = v.FieldByIndex(f.Index).Interface().(Id) + if id == nil { + id, err = NewIdFromString(f.Tag.Get("ebml")) + if err != nil { + return + } + } + } else { + return errors.New("cannot resolve EBML Id for " + t.Name()) + } + + E, err := marshal(id, v) + if err != nil { + return err + } + + _, err = io.Copy(enc.w, E) if err != nil { - enc.err = err return err } return nil } +// Marshal returns an EBML representation of element. +// +// Marshal first determines the Id of element from the field named 'EbmlId', +// then recursively traverses element. Any exported struct field of element +// with an `ebml` tag will be including in marshalling, with the exception +// of fields tagged with `ebml:"-"`. +// +// The ebml tag should contain a valid EBML id, see the EBML documention for +// what constitutes a valid id. +func Marshal(element interface{}) ([]byte, error) { + buf := new(bytes.Buffer) + encoder := NewEncoder(buf) + err := encoder.Encode(element) + return buf.Bytes(), err +} + // A decoder reads data from an EBML stream. type Decoder struct { r io.ReadSeeker @@ -60,11 +99,14 @@ func (dec *Decoder) Decode(v interface{}) error { } // Marshaler is the interface implemented by objects that -// can marshal themselves into an EBML stream. If the Marshaler -// is not a container it should not contain and id and size -// header. N will be the size used to compute the size of the -// element that will contain marshaler, and only n bytes will -// be read from r. +// can marshal themselves into an EBML stream. r should only +// contain element data, and not the id and size header of +// the Marshaler element. n is the length of the data in r +// and will be used to compute the size of the element above +// Marshaler. Only n bytes will be read from r. +// +// If a struct both implements Marshaler and contains ebml +// tagged fields, the fields will be ignored. type Marshaler interface { MarshalEBML() (r io.Reader, n int64) } @@ -72,8 +114,11 @@ type Marshaler interface { // Unmarshaler is the interface implemented by objects that // can unmarshal themselves from an EBML stream. The data // written to W will contain the data for the element being -// unmarshaled, and not an id or size header. n shall be -// the size of the data at w. +// unmarshaled, and not an id or size header. n shall be the +// size of the data at w. +// +// If a struct both implements Unmarshaler and contains ebml +// tagged fields, the fields will be ignored. type Unmarshaler interface { UnmarshalEBML(n int64) (w io.Writer) }