diff --git a/decode.go b/decode.go new file mode 100644 index 0000000..8f0ad07 --- /dev/null +++ b/decode.go @@ -0,0 +1,401 @@ +package ebml + +import ( + "bytes" + "errors" + "io" + "reflect" + "runtime" + "strconv" + "sync" +) + +// Unmarshal parses the EBML-encoded data and stores the result +// in the value pointed to by v. +func Unmarshal(data []byte, v interface{}) error { + d := decodeState{r: bytes.NewReader(data)} + return d.unmarshal(v) +} + +// Unmarshalar is the interface implemented by objects that can +// unmarshal themselves from a an EBML stream fed into an io.Writer. +// The Unmarshaler is free to block on Write and Close to pause decoding. +type Unmarshaler interface { + UnmarshalEBML() io.WriteCloser +} + +// An InvalidUnmarshalError describes an invalid argument passed to Unmarshal. +// (The argument to Unmarshal must be a non-nil pointer.) +type InvalidUnmarshalError struct { + Type reflect.Type +} + +func (e *InvalidUnmarshalError) Error() string { + if e.Type == nil { + return "ebml: Unmarshal(nil)" + } + + if e.Type.Kind() != reflect.Ptr { + return "ebml: Unmarshal(non-pointer " + e.Type.String() + ")" + } + return "ebml: Unmarshal(nil " + e.Type.String() + ")" +} + +func (d *decodeState) unmarshal(v interface{}) (err error) { + defer func() { + if r := recover(); r != nil { + if _, ok := r.(runtime.Error); ok { + panic(r) + } + err = r.(error) + } + }() + + rv := reflect.ValueOf(v) + if rv.Kind() != reflect.Ptr || rv.IsNil() { + return &InvalidUnmarshalError{reflect.TypeOf(v)} + } + for { + id := d.readId() + if id == 0 { + break + } + size := d.readSize() + rv = reflect.Indirect(rv) + if rv.Type().NumMethod() > 0 { + if w, ok := rv.Interface().(io.Writer); ok { + _, err = io.CopyN(w, d.r, size) + if err != nil { + d.error(err) + } + } + } else { + d.readToStruct(id, size, rv) + } + } + return d.savedError +} + +type decodeState struct { + r io.ReadSeeker + savedError error +} + +// error aborts the decoding by panicking with err. +func (d *decodeState) error(err error) { + panic(err) +} + +// saveError saves the first err it is called with, +// for reporting at the end of the unmarshal. +func (d *decodeState) saveError(err error) { + if d.savedError == nil { + d.savedError = err + } +} + +func (d *decodeState) readId() uint32 { + b := make([]byte, 4) + _, err := d.r.Read(b[:1]) + if err != nil { + if err == io.EOF { + return 0 + } else { + d.error(err) + } + } + x := uint32(b[0]) + switch { + case x >= 0x80: + return x + case x >= 0x40: + b = b[1:2] + case x >= 0x20: + b = b[1:3] + case x >= 0x10: + b = b[1:4] + default: + d.error(errors.New("positioned at an invalid Id or EBMLMaxIDLength > 4")) + return 0 + } + _, err = d.r.Read(b) + if err != nil { + d.error(err) + } + for _, c := range b { + x <<= 8 + x += uint32(c) + } + return x +} + +func (d *decodeState) readSize() int64 { + b := make([]byte, 8) + _, err := d.r.Read(b[:1]) + if err != nil { + d.error(err) + } + x := int64(b[0]) + + switch { + case x >= 0x80: + x -= 0x80 + return x + case x >= 0x40: + x -= 0x40 + b = b[1:2] + case x >= 0x20: + x -= 0x20 + b = b[1:3] + case x >= 0x10: + x -= 0x10 + b = b[1:4] + case x >= 0x08: + x -= 0x08 + b = b[1:5] + case x >= 0x04: + x -= 0x04 + b = b[1:6] + case x >= 0x02: + x -= 0x02 + b = b[1:7] + case x >= 0x01: + x -= 0x01 + b = b[1:] + } + _, err = d.r.Read(b) + if err != nil { + d.error(err) + } + for _, c := range b { + x <<= 8 + x += int64(c) + } + return x +} + +var i int + +func (d *decodeState) readValue(size int64, v reflect.Value) { + i++ + if !v.IsValid() { + _, err := d.r.Seek(size, 1) + if err != nil { + d.error(err) + } + return + } + + switch v.Kind() { + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + b := make([]byte, size) + _, err := d.r.Read(b) + if err != nil { + d.error(err) + } + v.SetUint(unmarshalUint(b)) + + case reflect.Struct: + if size == 0 { + return + } + d.readStruct(size, v) + + case reflect.Interface: + if m, ok := v.Interface().(Unmarshaler); ok { + w := m.UnmarshalEBML() + _, err := io.CopyN(w, d.r, size) + if err != nil { + d.error(err) + } + } else { + d.error(&UnsupportedTypeError{v.Type()}) + } + + case reflect.Slice: + d.readToSlice(size, v) + + case reflect.String: + b := make([]byte, size) + d.r.Read(b) + v.SetString(string(b)) + + default: + d.error(&UnsupportedTypeError{v.Type()}) + } +} + +func (d *decodeState) readStruct(size int64, v reflect.Value) { + t := v.Type() + fieldMap := cachedIdFields(t) + pos, err := d.r.Seek(0, 1) + if err != nil { + d.error(err) + } + end := pos + size + for pos < end { + subId := d.readId() + if subId == 0 { + return + } + subSize := d.readSize() + if subSize > 0 { + fi, ok := fieldMap[subId] + if !ok { + pos, err = d.r.Seek(subSize, 1) + if err != nil { + d.error(err) + } + continue + } + subv := v + if subv.Kind() == reflect.Ptr { + if subv.IsNil() { + subv.Set(reflect.New(subv.Type().Elem())) + } + subv = subv.Elem() + } + subv = subv.Field(fi) + switch subv.Kind() { + case reflect.Struct: + d.readStruct(subSize, subv) + case reflect.Slice: + d.readToSlice(subSize, subv) + default: + d.readValue(subSize, subv) + } + } + pos, err = d.r.Seek(0, 1) + if err != nil { + d.error(err) + } + + } +} + +// readToSlice decodes an EBML element from d.r and appends it to the +// slice at v. +func (d *decodeState) readToSlice(size int64, v reflect.Value) { + //var subv reflect.Value + i := v.Len() + if i >= v.Cap() { + newcap := v.Cap() + v.Cap()/2 + if newcap < 4 { + newcap = 4 + } + newv := reflect.MakeSlice(v.Type(), v.Len(), newcap) + reflect.Copy(newv, v) + v.Set(newv) + } + v.SetLen(i + 1) + d.readValue(size, v.Index(i)) +} + +// GET RID OF THIS +// readToStruct decodes an EBML element from d.r into the struct at v, +// if that element is tagged to a field of v. +func (d *decodeState) readToStruct(id uint32, size int64, v reflect.Value) { + // Calling this each time may be inefficient + fieldMap := cachedIdFields(v.Type()) + fi, ok := fieldMap[id] + if !ok { + _, err := d.r.Seek(size, 1) + if err != nil { + d.error(err) + } + return + } + + subv := v + if subv.Kind() == reflect.Ptr { + if subv.IsNil() { + subv.Set(reflect.New(subv.Type().Elem())) + } + subv = subv.Elem() + } + subv = subv.Field(fi) + d.readValue(size, subv) +} + +// indirect walks down v allocating pointers as needed, +// until it gets to a non-pointer. +func indirect(v reflect.Value) reflect.Value { + // If v is a named type and is addressable, + // start with its addres, so that if the type has pointer methods, + // we find them. + if v.Kind() != reflect.Ptr && v.Type().Name() != "" && v.CanAddr() { + v = v.Addr() + } + for { + // Load value from interface, but only if the result will be + // usefully addressable + if v.Kind() == reflect.Interface && !v.IsNil() { + e := v.Elem() + if e.Kind() == reflect.Ptr && !e.IsNil() && e.Elem().Kind() == reflect.Ptr { + v = e + continue + } + } + + if v.Kind() != reflect.Ptr { + break + } + + if v.Elem().Kind() != reflect.Ptr && v.CanSet() { + break + } + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + v = v.Elem() + } + return v +} + +func unmarshalUint(b []byte) uint64 { + x := uint64(b[0]) + for _, c := range b[1:] { + x <<= 8 + x += uint64(c) + } + return x +} + +func parseFieldIds(t reflect.Type) (m map[uint32]int) { + m = make(map[uint32]int) + for i := 0; i < t.NumField(); i++ { + sf := t.Field(i) + tag := sf.Tag.Get("ebml") + if tag == "" { + continue + } + id, err := strconv.ParseUint(tag, 16, 32) + if err != nil { + panic(err.Error()) + } + m[uint32(id)] = i + } + return +} + +var fieldIdCache struct { + sync.RWMutex + m map[reflect.Type]map[uint32]int +} + +func cachedIdFields(t reflect.Type) map[uint32]int { + fieldIdCache.RLock() + m, ok := fieldIdCache.m[t] + fieldIdCache.RUnlock() + if !ok { + if fieldIdCache.m == nil { + fieldIdCache.m = map[reflect.Type]map[uint32]int{} + } + + m = parseFieldIds(t) + fieldIdCache.Lock() + fieldIdCache.m[t] = m + fieldIdCache.Unlock() + } + return m +} diff --git a/decoder.go b/decoder.go deleted file mode 100644 index 7d455e5..0000000 --- a/decoder.go +++ /dev/null @@ -1,243 +0,0 @@ -package ebml - -import ( - "bufio" - "fmt" - "io" -) - -const ( - SupportedEBMLReadVersion = 1 -) - -type DecoderError string - -func (e DecoderError) Error() string { - return string(e) -} - -type DecoderConfig struct { - DocType string - DocTypeReadVersion uint -} - -// A Decoder decodes an ebml stream. -type Decoder struct { - *bufio.Reader -} - -// NewDecoder returns a new decoder that reads and decodes from r. -func NewDecoder(r io.Reader, c *DecoderConfig) (*Decoder, error) { - d := &Decoder{bufio.NewReader(r)} - - header, err := d.Next() - if err != nil { - return nil, err - } - - if header.ID != EBMLIDUint { - return nil, DecoderError(fmt.Sprintf("invalid EBML header ID %x", header.ID)) - } - - fields := make(map[uint32][]byte) - - var b []byte - for e, err := header.Next(); e != nil; e, err = header.Next() { - if err != nil { - return nil, err - } - - b, err = e.Bytes() - if err != nil { - return nil, err - } - fields[e.ID] = b - } - - b, ok := fields[EBMLReadVersionIDUint] - if ok { - rv := DecodeUint(b) - if rv > SupportedEBMLReadVersion { - return nil, DecoderError(fmt.Sprintf("stream requires an EBML version of %d or greater", rv)) - } - } - - b, ok = fields[DocTypeIDUint] - if !ok { - return nil, DecoderError("did not find mandatory DocType element") - } - dt := string(b) - if dt != c.DocType { - return nil, DecoderError(fmt.Sprintf("stream is of DocType %v, not %v", dt, c.DocType)) - } - - b, ok = fields[DocTypeReadVersionIDUint] - if !ok { - return nil, DecoderError("did not find mandatory DocTypeReadVersion element") - } - dtrv := DecodeUint(b) - if dtrv > c.DocTypeReadVersion { - return nil, DecoderError(fmt.Sprintf("stream requires a %v reader version of %d or greater", dt, dtrv)) - } - - return d, nil -} - -func (d *Decoder) Next() (*DecoderElement, error) { - id, err := d.decodeID() - if err != nil { - return nil, err - } - - size, err := d.decodeSize() - if err != nil { - return nil, err - } - - return &DecoderElement{d, int64(size), id, 0}, nil -} - -func (d *Decoder) decodeID() (id uint32, err error) { - var c byte - c, err = d.ReadByte() - if err != nil { - return - } - - id = uint32(c) - var s int - switch { - - case id >= 0x80: // 1xxx xxxx - return - - case id >= 0x40: // 01xx xxxx xxxx xxxx - s = 1 - - case id >= 0x20: // 001x xxxx xxxx xxxx xxxx xxxx - s = 2 - - case id >= 0x10: // 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx - s = 3 - - default: - // TODO make an error - return 0, DecoderError("An EBML ID element not at current position, or EBMLMaxIDLength is unknown") - } - - for i := 0; i < s; i++ { - id <<= 8 - c, err = d.ReadByte() - if err != nil { - return - } - id += uint32(c) - } - return -} - -func (d *Decoder) decodeSize() (size uint64, err error) { - var c byte - c, err = d.ReadByte() - if err != nil { - return - } - size = uint64(c) - var s int - switch { - case size >= 0x80: // 1xxx xxxx - size -= 0x80 - return - - case size >= 0x40: // 01xx xxxx xxxx xxxx - size -= 0x40 - s = 1 - - case size >= 0x20: // 001x xxxx xxxx xxxx xxxx xxxx - size -= 0x20 - s = 2 - - case size >= 0x10: // 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx - size -= 0x10 - s = 3 - - case size >= 0x08: // 0000 1xxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - size -= 0x08 - s = 4 - - case size >= 0x04: // 0000 01xx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - size -= 0x04 - s = 5 - - case size >= 0x02: // 0000 001x xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - size -= 0x02 - s = 6 - - case size == 0x01: // 0000 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx - size = 0 - s = 7 - - default: - // TODO make an error - return 0, DecoderError("An EBML size element is not at the current position, or EBMLMaxSizeLength is unknown") - } - - for i := 0; i < s; i++ { - size <<= 8 - c, err = d.ReadByte() - if err != nil { - return - } - size += uint64(c) - } - return -} - -type DecoderElement struct { - R *Decoder - N int64 - ID uint32 - size uint64 -} - -func (e *DecoderElement) Read(p []byte) (n int, err error) { - if e.N <= 0 { - return 0, io.EOF - } - if int64(len(p)) > e.N { - p = p[0:e.N] - } - n, err = e.R.Read(p) - e.N -= int64(n) - return -} - -func (e *DecoderElement) Bytes() ([]byte, error) { - b := make([]byte, e.N) - n, err := e.R.Read(b) - e.N -= int64(n) - return b, err -} - -func (e *DecoderElement) Next() (nx *DecoderElement, err error) { - if e.N <= 0 { - return nil, nil - } - - nx, err = e.R.Next() - if nx != nil { - e.N -= nx.N - } - return -} - -func DecodeUint(b []byte) (v uint) { - l := len(b) - - v = uint(b[0]) - for i := 1; i < l; i++ { - v <<= 8 - v += uint(b[i]) - } - return -} diff --git a/ebml_test.go b/ebml_test.go new file mode 100644 index 0000000..bc76659 --- /dev/null +++ b/ebml_test.go @@ -0,0 +1,32 @@ +package ebml + +import ( + "reflect" + "testing" +) + +func TestMatroskaEBMLHeader(t *testing.T) { + var headerA Header + headerA.EBMLVersion = 1 + headerA.EBMLReadVersion = 1 + headerA.EBMLMaxIDLength = 1 + headerA.EBMLMaxSizeLength = 1 + headerA.DocType = "matroska" + headerA.DocTypeVersion = 1 + headerA.DocTypeReadVersion = 1 + + b, err := Marshal(headerA) + if err != nil { + t.Fatal("Marshal:", err) + } + + var headerB Header + err = Unmarshal(b, &headerB) + if err != nil { + t.Fatal("Unmarshal:", err) + } + + if !reflect.DeepEqual(headerA, headerB) { + t.Fatalf("Marshal -> Unmarshal: marshaled %v to %x, but unmarshaled %v", headerA, b, headerB) + } +} diff --git a/element.go b/element.go deleted file mode 100644 index 32165eb..0000000 --- a/element.go +++ /dev/null @@ -1,81 +0,0 @@ -package ebml - -import ( - "io" -) - -type Element interface { - ID() []byte - Size() uint64 -} - -type Container interface { - Element - Next() Element -} - -type Value interface { - Element - io.Reader -} - -type header struct { - version *Uint - readVersion *Uint - maxIDLength *Uint - maxSizeLength *Uint - docType *String - docTypeVersion *Uint - docTypeReadVersion *Uint -} - -func newHeader(docType string, docTypeVersion, docTypeReadVersion interface{}) *header { - return &header{ - NewUint(EBMLVersionID, uint32(1)), - NewUint(EBMLReadVersionID, uint32(1)), - NewUint(EBMLMaxIDLengthID, uint32(4)), - NewUint(EBMLMaxSizeLengthID, uint32(8)), - NewString(DocTypeID, docType), - NewUint(DocTypeVersionID, docTypeVersion), - NewUint(DocTypeReadVersionID, docTypeReadVersion), - } -} - -func (h *header) ID() []byte { return EBMLID } - -func (h *header) Size() uint64 { - return h.docType.Size() + h.docTypeVersion.Size() + h.docTypeReadVersion.Size() -} - -func (h *header) Next() (e Element) { - switch { - case h.version != nil: - e = h.version - h.version = nil - - case h.readVersion != nil: - e = h.readVersion - h.readVersion = nil - - case h.maxIDLength != nil: - e = h.maxIDLength - h.maxIDLength = nil - - case h.maxSizeLength != nil: - e = h.maxSizeLength - h.maxSizeLength = nil - - case h.docType != nil: - e = h.docType - h.docType = nil - - case h.docTypeVersion != nil: - e = h.docTypeVersion - h.docTypeVersion = nil - - case h.docTypeReadVersion != nil: - e = h.docTypeReadVersion - h.docTypeReadVersion = nil - } - return -} diff --git a/encode.go b/encode.go new file mode 100644 index 0000000..e3090c6 --- /dev/null +++ b/encode.go @@ -0,0 +1,510 @@ +// Copyright (c) 2013, Emery Hemingway. All rights reserved. +// Actully most of it comes from encoding/json, courtesy of +// The Go Authors + +package ebml + +import ( + "bytes" + "errors" + "fmt" + "io" + "reflect" + "runtime" + "strconv" + "sync" +) + +// Marshal returns the EBML encoding of v. +// +// If a struct is encountered with a field tag in the form of +// `ebml:"FFFF"`, that field value will be encoded as an EBML +// element with the Id found in that tag. +func Marshal(v interface{}) ([]byte, error) { + var b bytes.Buffer + e := &encodeState{w: &b} + err := e.marshal(v) + if err != nil { + return nil, err + } + return b.Bytes(), nil +} + +// Marshaler is the inerface implemented by objects that can +// represent themselves as a io.Reader of a known length. +type Marshaler interface { + MarshalEBML() (io.Reader, int64) +} + +type encElement struct { + body []byte + elements []*encElement + reader io.Reader + size int64 +} + +// An UnsupportedTypeError is returned by Marshal when attempting +// to encode an unsupported value type. + +type UnsupportedTypeError struct { + Type reflect.Type +} + +func (e *UnsupportedTypeError) Error() string { + return "ebml: unsupported type: " + e.Type.String() +} + +type MarshalerError struct { + Type reflect.Type + Err error +} + +func (e *MarshalerError) Error() string { + return "ebml: error marshaling type " + e.Type.String() + ": " + e.Err.Error() +} + +type encodeState struct { + w io.Writer + elements []*encElement +} + +func (es *encodeState) marshal(x interface{}) (err error) { + defer func() { + if r := recover(); r != nil { + if _, ok := r.(runtime.Error); ok { + panic(r) + } + err = r.(error) + } + }() + + v := reflect.ValueOf(x) + if v.Kind() == reflect.Ptr { + v = v.Elem() + } + for _, f := range cachedTypeFields(v.Type()) { + fv := fieldByIndex(v, f.index) + + if !fv.IsValid() || isEmptyValue(fv) { + continue + } + + e, err := reflectValue(f.id, fv) + if err != nil { + return err + } + // TODO this append can go away and instead increase the + // es.elements capacity to the amount of cachedTypeFields, + // then have a moving index + es.elements = append(es.elements, e) + } + for _, e := range es.elements { + err = es.push(e) + if err != nil { + return + } + } + return nil +} + +func (es *encodeState) push(e *encElement) (err error) { + _, err = es.w.Write(e.body) + if err != nil { + return + } + for _, se := range e.elements { + err = es.push(se) + if err != nil { + return + } + } + if e.reader != nil { + _, err = io.Copy(es.w, e.reader) + if err != nil { + return + } + } + return nil +} + +func isEmptyValue(v reflect.Value) bool { + switch v.Kind() { + case reflect.Array, reflect.Slice, reflect.String: + return v.Len() == 0 + case reflect.Interface, reflect.Ptr: + return v.IsNil() + } + return false +} + +func reflectValue(id []byte, v reflect.Value) (*encElement, error) { + if id == nil { + panic(fmt.Sprintf("nil id for value %v", v.Type())) + } + + m, ok := v.Interface().(Marshaler) + if !ok { + // v dosen't match the interface. Check against *v too. + if v.Kind() != reflect.Ptr && v.CanAddr() { + m, ok = v.Addr().Interface().(Marshaler) + if ok { + v = v.Addr() + } + } + } + if ok && (v.Kind() != reflect.Ptr || !v.IsNil()) { + r, size := m.MarshalEBML() + return &encElement{reader: r, size: size}, nil + } + + switch v.Kind() { + case reflect.Struct: + var children []*encElement + var size int64 + for _, f := range cachedTypeFields(v.Type()) { + fv := fieldByIndex(v, f.index) + if !fv.IsValid() || isEmptyValue(fv) { + continue + } + + child, err := reflectValue(f.id, fv) + if err != nil { + return nil, &MarshalerError{v.Type(), err} + } + if child == nil { + continue + } + children = append(children, child) + size += child.size + } + + sz := MarshalSize(size) + l := len(id) + len(sz) + b := make([]byte, l) + p := copy(b, id) + copy(b[p:], sz) + size += int64(l) + return &encElement{body: b, elements: children, size: size}, nil + + case reflect.Slice: + if v.IsNil() || v.Len() == 0 { + return nil, nil + } + var size int64 + children := make([]*encElement, v.Len()) + for i := 0; i < v.Len(); i++ { + child, err := reflectValue(id, v.Index(i)) + if err != nil { + return nil, &MarshalerError{v.Type(), err} + } + children[i] = child + size += child.size + } + // in the case of the slice, do not note the Id, nor marshal the size, + // slice don't represent containers, only structs do. + return &encElement{elements: children, size: size}, nil + + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + x := v.Int() + return marshalInt(id, x), nil + + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + x := v.Uint() + return marshalUint(id, x), nil + + case reflect.String: + return marshalString(id, v.String()), nil + + case reflect.Interface, reflect.Ptr: + if v.IsNil() { + return nil, nil + } + return reflectValue(id, v.Elem()) + } + return nil, &UnsupportedTypeError{v.Type()} +} + +func fieldByIndex(v reflect.Value, index []int) reflect.Value { + for _, i := range index { + if v.Kind() == reflect.Ptr { + if v.IsNil() { + return reflect.Value{} + } + v = v.Elem() + } + v = v.Field(i) + } + return v +} + +func parseIdTag(s string) ([]byte, error) { + x, err := strconv.ParseUint(s, 16, 32) + if err != nil { + return nil, err + } + var xl int + switch { + case x < 0x10: + return nil, errors.New("invalid element ID " + s) + case x < 0x400: + xl = 1 + case x < 0x8000: + xl = 2 + case x < 0x400000: + xl = 3 + case x < 0x20000000: + xl = 4 + default: + return nil, errors.New(s + " overflows element ID") + } + buf := make([]byte, xl) + for xl > 1 { + xl-- + buf[xl] = byte(x) + x >>= 8 + } + buf[0] = byte(x) + return buf, nil +} + +const ( + o1 = 1<<7 - 2 + o2 = 1<<14 - 2 + o3 = 1<<21 - 2 + o4 = 1<<28 - 2 + o5 = 1<<35 - 2 + o6 = 1<<42 - 2 + o7 = 1<<49 - 2 + o8 = 1<<56 - 2 +) + +// MarshalSize returns the EBML variable width representation +// of an element's size +func MarshalSize(x int64) []byte { + var s int + var m byte + + switch { + case x == 0: + return []byte{byte(0x80)} + case x < o1: + return []byte{byte(0x80) | byte(x)} + case x < o2: + s = 2 + m = 0x40 + case x < o3: + s = 3 + m = 0x20 + case x < o4: + s = 4 + m = 0x10 + case x < o5: + s = 5 + m = 0x08 + case x < o6: + s = 6 + m = 0x04 + case x < o7: + s = 7 + m = 0x02 + case x < o8: + s = 8 + m = 0x01 + default: + panic(fmt.Sprintf("%x overflows element size", x)) + + } + b := make([]byte, s) + s-- + for s > 0 { + b[s] = byte(x) + x >>= 8 + s-- + } + b[0] = byte(x) | m + return b +} + +func marshalInt(id []byte, x int64) *encElement { + var xl int + switch { + case x < 0x8F, x > -0x8F: + xl = 1 + case x < 0x8FFF, x > -0x8FFF: + xl = 2 + case x < 0x8FFFFF, x > -0x8FFFFF: + xl = 3 + case x < 0x8FFFFFFF, x > -0x8FFFFFFF: + xl = 4 + case x < 0x8FFFFFFFFF, x > -0x8FFFFFFFFF: + xl = 5 + case x < 0x8FFFFFFFFFFF, x > -0x8FFFFFFFFFFF: + xl = 6 + case x < 0x8FFFFFFFFFFFFF, x > -0x8FFFFFFFFFFFFF: + xl = 7 + default: + xl = 8 + } + + l := len(id) + 1 + xl + b := make([]byte, l) + p := copy(b, id) + b[p] = 0x80 | byte(xl) + p++ + + i := l - 1 + b[i] = byte(x) + for i > p { + x >>= 8 + b[i] = byte(x) + i-- + } + return &encElement{body: b, size: int64(len(b))} +} + +func marshalUint(id []byte, x uint64) *encElement { + var xl int + switch { + case x < 0xFF: + xl = 1 + case x < 0xFFFF: + xl = 2 + case x < 0xFFFFFF: + xl = 3 + case x < 0xFFFFFFFF: + xl = 4 + case x < 0xFFFFFFFFFF: + xl = 5 + case x < 0xFFFFFFFFFFFF: + xl = 6 + case x < 0xFFFFFFFFFFFFFF: + xl = 7 + default: + xl = 8 + } + + l := len(id) + 1 + xl + b := make([]byte, l) + p := copy(b, id) + b[p] = 0x80 | byte(xl) + p++ + + i := l - 1 + b[i] = byte(x) + for i > p { + x >>= 8 + b[i] = byte(x) + i-- + } + return &encElement{body: b, size: int64(len(b))} +} + +func marshalString(id []byte, s string) *encElement { + sb := []byte(s) + l := len(sb) + sz := MarshalSize(int64(l)) + b := make([]byte, len(id)+len(sz)+l) + n := copy(b, id) + n += copy(b[n:], sz) + copy(b[n:], sb) + return &encElement{body: b, size: int64(len(b))} +} + +// A field represents a single field found in a struct. +type field struct { + id []byte + index []int + typ reflect.Type +} + +// typeFields returns a list of fields that EBML should recognize for the given type. +func typeFields(t reflect.Type) []field { + // Anonymous fields to explore at the current level and the next. + current := []field{} + next := []field{{typ: t}} + + // Count of queued names for current level and the next. + count := map[reflect.Type]int{} + nextCount := map[reflect.Type]int{} + + // Types already visited at an earlier level. + visited := map[reflect.Type]bool{} + + // Fields found. + var fields []field + + for len(next) > 0 { + current, next = next, current[:0] + count, nextCount = nextCount, map[reflect.Type]int{} + + for _, f := range current { + if visited[f.typ] { + continue + } + visited[f.typ] = true + + // Scan f.typ for fields to include. + for i := 0; i < f.typ.NumField(); i++ { + sf := f.typ.Field(i) + tag := sf.Tag.Get("ebml") + if tag == "" { + continue + } + id, err := parseIdTag(tag) + if err != nil { + panic(err.Error()) + } + index := make([]int, len(f.index)+1) + copy(index, f.index) + index[len(f.index)] = i + + ft := sf.Type + if ft.Kind() == reflect.Ptr { + // Follow pointer + ft = ft.Elem() + } + + // Record found field and index sequence. + fields = append(fields, field{id, index, ft}) + if count[f.typ] > 1 { + // If there were multipe instances, add a second, + // so that the annihilation code will see a dulicate. + // It only cares about the distinction between 1 or 2, + // so don't bother generating and more copies. + fields = append(fields, fields[len(fields)-1]) + } + } + } + } + return fields +} + +var fieldCache struct { + sync.RWMutex + m map[reflect.Type][]field +} + +// cachedTypeFields is like typeFields but uses a cache to avoid repeated work. +func cachedTypeFields(t reflect.Type) []field { + fieldCache.RLock() + f := fieldCache.m[t] + fieldCache.RUnlock() + if f != nil { + return f + } + + // Compute fields without lock. + // Might dulpicate effort but won't hold other computations back. + f = typeFields(t) + if f == nil { + f = []field{} + } + + fieldCache.Lock() + if fieldCache.m == nil { + fieldCache.m = map[reflect.Type][]field{} + } + fieldCache.m[t] = f + fieldCache.Unlock() + return f +} diff --git a/encoder.go b/encoder.go deleted file mode 100644 index 0a8fd4a..0000000 --- a/encoder.go +++ /dev/null @@ -1,104 +0,0 @@ -package ebml - -import ( - "fmt" - "io" -) - -type EncoderError string - -func (e EncoderError) Error() string { - return string(e) -} - -type EncoderConfig struct { - DocType string - DocTypeVersion uint - DocTypeReadVersion uint -} - -// An Encoder writes EBML data to an output stream. -type Encoder struct { - io.Writer -} - -// NewEncoder returns a new encoder that writes to w. -// -// NewEncoder will write an EBML header to w with values from -// c before returning., -func NewEncoder(w io.Writer, c *EncoderConfig) (*Encoder, error) { - e := &Encoder{w} - return e, e.Encode(newHeader(c.DocType, c.DocTypeVersion, c.DocTypeReadVersion)) -} - -// Encode writes a value that conforms to the Container -// or Element interface. -func (e *Encoder) Encode(v Element) (err error) { - switch V := v.(type) { - case Container: - _, err = e.Write(V.ID()) - if err != nil { - return - } - _, err = e.Write(MarshallSize(V.Size())) - if err != nil { - return - } - for element := V.Next(); element != nil; element = V.Next() { - err = e.Encode(element) - if err != nil { - return - } - } - case Value: - _, err = io.Copy(e, V) - if err != nil { - return - } - - default: - panic(fmt.Sprintf("%T conforms to neither the ebml.Container nor the ebml.Value interface", v)) - } - return -} - -// PutUint writes an unsigned interger with ebml ID id to the encoder strem. -func (e *Encoder) PutUint(id []byte, v interface{}) error { - i := NewUint(id, v) - return e.Encode(i) -} - -// EncodeID writes an element ID to the encoder stream. -// -// See the Encode convenience function. -func (e *Encoder) EncodeID(x uint32) (err error) { - var s int - switch { - - case x < 0x10: - panic("invalid element ID") - case x < 0x400: - s = 1 - case x < 0x8000: - s = 2 - case x < 0x400000: - s = 3 - case x < 0x20000000: - s = 4 - default: - return EncoderError(fmt.Sprintf("%x overflows element ID", x)) - } - - buf := make([]byte, s) - s-- - for s > 0 { - buf[s] = byte(x) - x >>= 8 - s-- - } - buf[0] = byte(x) - - _, err = e.Write(buf) - return - -} diff --git a/header.go b/header.go new file mode 100644 index 0000000..8704d84 --- /dev/null +++ b/header.go @@ -0,0 +1,29 @@ +// Copyright (c) 2013, Emery Hemingway. All rights reserved. +// Actully most of it comes from encoding/json, courtesy of +// The Go Authors + +package ebml + +// Header is a struct for encoding and decoding EBML streams. +// +// If nesting in a struct, it's field should be listed first +// and should should have an ebml tag set to 1a45dfa3. +// +// The following could be a valid top-level struct for +// representing Matroska streams: +// type Matroska struct { +// ebml.Header `ebml:"1a45dfa3"` +// Segment []Segment `ebml:"18538067"` +// } +// +// You will however need to populate field values in Header +// to form a valid EBML document. +type Header struct { + EBMLVersion uint `ebml:"4286"` + EBMLReadVersion uint `ebml:"42f7"` + EBMLMaxIDLength uint `ebml:"42f2"` + EBMLMaxSizeLength uint `ebml:"42f3"` + DocType string `ebml:"4282"` + DocTypeVersion uint `ebml:"4287"` + DocTypeReadVersion uint `ebml:"4285"` +} diff --git a/ids.go b/ids.go deleted file mode 100644 index 9c8b202..0000000 --- a/ids.go +++ /dev/null @@ -1,45 +0,0 @@ -package ebml - -var ( - EBMLID = []byte{0x1a, 0x45, 0xdf, 0xa3} - EBMLVersionID = []byte{0x42, 0x86} - EBMLReadVersionID = []byte{0x42, 0xf7} - EBMLMaxIDLengthID = []byte{0x42, 0xf2} - EBMLMaxSizeLengthID = []byte{0x42, 0xf3} - DocTypeID = []byte{0x42, 0x82} - DocTypeVersionID = []byte{0x42, 0x87} - DocTypeReadVersionID = []byte{0x42, 0x85} - - CRC32ID = []byte{0xbf} - VoidID = []byte{0xec} - SignatureSlotID = []byte{0x1b, 0x53, 0x86, 0x67} - SignatureAlgoID = []byte{0x7e, 0x8a} - SignatureHashID = []byte{0x7e, 0x9a} - SignaturePublicKey = []byte{0x7e, 0xa5} - Signature = []byte{0x7e, 0xb5} - SignatureElements = []byte{0x7e, 0x5b} - SignatureElementList = []byte{0x7e, 0x7b} - SignedElemnt = []byte{0x65, 0x32} -) - -const ( - EBMLIDUint = 0x1a45dfa3 - EBMLVersionIDUint = 0x4286 - EBMLReadVersionIDUint = 0x42f7 - EBMLMaxIDUintLengthIDUint = 0x42f2 - EBMLMaxSizeLengthIDUint = 0x42f3 - DocTypeIDUint = 0x4282 - DocTypeVersionIDUint = 0x4287 - DocTypeReadVersionIDUint = 0x4285 - - CRC32IDUint = 0xbf - VoidIDUint = 0xec - SignatureSlotIDUint = 0x1b538667 - SignatureAlgoIDUint = 0x7e8a - SignatureHashIDUint = 0x7e9a - SignaturePublicKeyUint = 0x7ea5 - SignatureUint = 0x7eb5 - SignatureElementsUint = 0x7e5b - SignatureElementListUint = 0x7e7b - SignedElemntUint = 0x6532 -) diff --git a/marshall.go b/marshall.go deleted file mode 100644 index cba5c9d..0000000 --- a/marshall.go +++ /dev/null @@ -1,152 +0,0 @@ -package ebml - -import ( - "fmt" -) - -const ( - o1 = 1<<7 - 2 - o2 = 1<<14 - 2 - o3 = 1<<21 - 2 - o4 = 1<<28 - 2 - o5 = 1<<35 - 2 - o6 = 1<<42 - 2 - o7 = 1<<49 - 2 - o8 = 1<<56 - 2 -) - -type MarshallError string - -func (e MarshallError) Error() string { - return string(e) -} - -// MarshallSize returns an EBML binary representation of a size -func MarshallSize(x uint64) []byte { - var s int - var m byte - switch { - case x == 0: - return []byte{byte(0)} - case x < o1: - s = 1 - m = 0x80 - case x < o2: - s = 2 - m = 0x40 - case x < o3: - s = 3 - m = 0x20 - case x < o4: - s = 4 - m = 0x10 - case x < o5: - s = 5 - m = 0x08 - case x < o6: - s = 6 - m = 0x04 - case x < o7: - s = 7 - m = 0x02 - case x < o8: - s = 8 - m = 0x01 - default: - panic(fmt.Sprintf("%x overflows element size", x)) - } - - b := make([]byte, s) - s-- - for s > 0 { - b[s] = byte(x) - x >>= 8 - s-- - } - b[0] = byte(x) | m - - return b -} - -type UnmarshallError string - -func (e UnmarshallError) Error() string { - return string(e) -} - -// UnmarshallID returns an EBML ID element at the start of buffer b -func UnmarshallID(b []byte) (uint32, error) { - x := uint32(b[0]) - var i int - switch { - case x > 0x80: // 1xxx xxxx - return x, nil - - case x > 0x40: // 01xx xxxx xxxx xxxx - i = 2 - - case x > 0x20: // 001x xxxx xxxx xxxx xxxx xxxx - i = 3 - - case x > 0x10: // 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx - i = 4 - - default: - return 0, UnmarshallError("not positioned at an ID element or document MaxIDLength is not supported") - } - - for j := 1; j < i; j++ { - x <<= 8 - x += uint32(b[j]) - } - - return x, nil -} - -// UnmarshallSize returns an EBML size element at the start of buffer b -func UnmarshallSize(b []byte) (uint64, error) { - x := uint64(b[0]) - var i int - switch { - case x >= 0x80: // 1xxx xxxx - return x - 0x80, nil - - case x >= 0x40: // 01xx xxxx xxxx xxxx - i = 2 - x -= 0x40 - - case x >= 0x20: // 001x xxxx xxxx xxxx xxxx xxxx - i = 3 - x -= 0x20 - - case x >= 0x10: // 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx - i = 4 - x -= 0x10 - - case x >= 0x08: - i = 5 - x -= 0x08 - - case x >= 0x04: - i = 6 - x -= 0x04 - - case x >= 0x02: - i = 7 - x -= 0x02 - - case x >= 0x01: - i = 8 - x -= 0x01 - - default: - return 0, UnmarshallError("not positioned at an size element or document MaxSizeLength is not supported") - } - - for j := 1; j < i; j++ { - x <<= 8 - x += uint64(b[j]) - } - - return x, nil -} diff --git a/marshall_test.go b/marshall_test.go deleted file mode 100644 index 3aa88f5..0000000 --- a/marshall_test.go +++ /dev/null @@ -1,75 +0,0 @@ -package ebml - -import ( - "bytes" - "testing" -) - -var goldenIDs = []struct { - num uint32 - bytes []byte -}{ - {0x1A45DFA3, []byte{0x1A, 0x45, 0xDF, 0xA3}}, // EBML - {0x4286, []byte{0x42, 0x86}}, // EBMLVersion - {0x42F7, []byte{0x42, 0xF7}}, // EBMLReadVersion - {0x42F2, []byte{0x42, 0xF2}}, // EBMLMaxIDLength - {0x42F3, []byte{0x42, 0xF3}}, // EBMLMaxSizeLength - {0x4282, []byte{0x42, 0x82}}, // DocType - {0x4287, []byte{0x42, 0x87}}, // DocTypeVersion - {0x4285, []byte{0x42, 0x85}}, // DocTypeReadVersion -} - -func TestGoldenIDs(t *testing.T) { - for _, g := range goldenIDs { - i, _ := UnmarshallID(g.bytes) - if i != g.num { - t.Errorf("failed to unmarshal ID %v, wanted %d, got %d", g.bytes, g.num, i) - } - } -} - -var goldenSizes = []struct { - num uint64 - bytes []byte -}{ - {0, []byte{0}}, - {1, []byte{129}}, - {2, []byte{130}}, - {127, []byte{64, 127}}, - {128, []byte{64, 128}}, - {256, []byte{65, 0}}, - {16382, []byte{32, 63, 254}}, - {16383, []byte{32, 63, 255}}, - {16384, []byte{32, 64, 0}}, - {16385, []byte{32, 64, 1}}, - {32768, []byte{32, 128, 0}}, - {2097150, []byte{16, 31, 255, 254}}, - {2097151, []byte{16, 31, 255, 255}}, - {2097152, []byte{16, 32, 0, 0}}, - {268435454, []byte{8, 15, 255, 255, 254}}, - {268435455, []byte{8, 15, 255, 255, 255}}, - // http://en.wikipedia.org/wiki/Big_Endian#Examples_of_storing_the_value_0A0B0C0Dh_in_memory - {0x0A0B0C0D, []byte{16 | 0x0A, 0x0B, 0x0C, 0x0D}}, - {1<<56 - 3, []byte{1, 255, 255, 255, 255, 255, 255, 253}}, -} - -func TestGoldenSizes(t *testing.T) { - for _, g := range goldenSizes { - b := MarshallSize(g.num) - if !bytes.Equal(b, g.bytes) { - t.Errorf("failed to marshal size %d, wanted %v, got %v", g.num, g.bytes, b) - } - - s, _ := UnmarshallSize(g.bytes) - if s != g.num { - t.Errorf("failed to unmarshal size %v, wanted %d, got %d", g.bytes, g.num, s) - } - } -} - -func BenchmarkSizeMarshalling(b *testing.B) { - for i := 0; i < b.N; i++ { - b := MarshallSize(uint64(i)) - UnmarshallSize(b) - } -} diff --git a/stream.go b/stream.go new file mode 100644 index 0000000..796e13c --- /dev/null +++ b/stream.go @@ -0,0 +1,157 @@ +// Copyright (c) 2013, Emery Hemingway. All rights reserved. +// Actully most of it comes from encoding/json, courtesy of +// The Go Authors + +package ebml + +import ( + "io" +) + +// An Encoder writes EBML data to an output stream. +type Encoder struct { + w io.Writer + err error +} + +// NewEncoder returns a new encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + return &Encoder{w: w} +} + +// Encode writes the EBML binary encoding of v to the stream. +func (enc *Encoder) Encode(v interface{}) error { + if enc.err != nil { + return enc.err + } + e := &encodeState{w: enc.w} + err := e.marshal(v) + if err != nil { + enc.err = err + return err + } + return nil +} + +type Decoder struct { + r io.ReadSeeker + err error +} + +// NewDecoder returns as new decoder that reads from r. +func NewDecoder(r io.ReadSeeker) *Decoder { + return &Decoder{r: r} +} + +// Decode reads EBML elements into from the intov. +func (dec *Decoder) Decode(v interface{}) error { + if dec.err != nil { + return dec.err + } + d := decodeState{r: dec.r} + err := d.unmarshal(v) + if err != nil { + dec.err = err + return err + } + return nil +} + +/* +func (dec *Decoder) decodeStruct(v reflect.Value) error { + for dec.off < dec.end { + id, err := dec.readId() + if err != nil { + dec.err = err + return err + } + size, err := readSize(dec.r) + if err != nil { + dec.err = err + return err + } + fmt.Printf("got to %x, size %d\n", id, size) + + t := v.Elem().Type() + i := indexById(t, unMarshallId(id)) + fmt.Println("id was", i) + if i < 0 { + fmt.Println("i was < 0, continueing") + continue + } + fv := v.Field(i) + switch fv.Kind() { + case reflect.Struct: + fmt.Println("got to a struct within a struct") + + off, err := dec.r.Seek(0, 1) + if err != nil { + dec.err = err + return err + } + + if v.Type() == ioLimitedReaderType { + off, err := dec.r.Seek(0, 1) + if err != nil { + dec.err = err + return err + } + // this won't work because the mutex is lost or confused + v.Set(reflect.ValueOf(&io.LimitedReader{R: &seekedReader{dec, off}, N: size})) + _, err = dec.r.Seek(size, 1) + if err != nil { + dec.err = err + return err + } + continue + } + + subDec := &Decoder{r: dec.r, off: off, end: off + size} + err = subDec.decodeStruct(fv) + if err != nil { + dec.err = err + return err + } + + case reflect.Slice: + fmt.Println("went into reflect.Slice") + i := v.Len() + l := i + 1 + newv := reflect.MakeSlice(v.Type(), l, l) + err = dec.decode(newv.Index(i)) + if err != nil { + dec.err = err + return err + } + + reflect.Copy(newv, v) + v.Set(newv) + + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + fmt.Println("went into reflect.Uint") + b := make([]byte, size) + + _, err := dec.r.Read(b) + if err != nil { + dec.err = err + return err + } + x := unMarshallUint(b) + fv.SetUint(x) + + case reflect.String: + fmt.Println("went into reflect.String") + b := make([]byte, size) + _, err := dec.r.Read(b) + if err != nil { + dec.err = err + return err + } + fv.SetString(string(b)) + + } + return &UnsupportedTypeError{v.Type()} + } + return nil +} +*/ diff --git a/value.go b/value.go deleted file mode 100644 index 8ba20c0..0000000 --- a/value.go +++ /dev/null @@ -1,179 +0,0 @@ -package ebml - -import ( - "io" -) - -type Int struct { - id []byte - buf []byte - off int -} - -func (i *Int) ID() []byte { return i.id } - -func (i *Int) Size() uint64 { return uint64(len(i.buf)) } - -func (i *Int) Read(p []byte) (n int, err error) { - if i.off >= len(i.buf) { - if len(p) == 0 { - return - } - return 0, io.EOF - } - n = copy(p, i.buf[i.off:]) - i.off += n - return -} - -func NewInt(id []byte, v interface{}) *Int { - var x int64 - switch V := v.(type) { - case int: - x = int64(V) - case int32: - x = int64(V) - case int64: - x = int64(V) - default: - return nil - } - - var i int - switch { - case x < 0x8F, x > -0x8F: - i = 1 - case x < 0x8FFF, x > -0x8FFF: - i = 2 - case x < 0x8FFFFF, x > -0x8FFFFF: - i = 3 - case x < 0x8FFFFFFF, x > -0x8FFFFFFF: - i = 4 - case x < 0x8FFFFFFFFF, x > -0x8FFFFFFFFF: - i = 5 - case x < 0x8FFFFFFFFFFF, x > -0x8FFFFFFFFFFF: - i = 6 - case x < 0x8FFFFFFFFFFFFF, x > -0x8FFFFFFFFFFFFF: - i = 7 - default: - i = 8 - } - - j := len(id) + 1 + i - b := make([]byte, j) - k := copy(b, id) - // Too much type conversion - k = copy(b[k:], MarshallSize(uint64(i))) - - for j > k { - j-- - b[j] = byte(x) - x >>= 8 - } - b[j] = byte(x) - - return &Int{id, b, 0} -} - -type Uint struct { - id []byte - buf []byte - off int -} - -func (u *Uint) ID() []byte { return u.id } - -func (u *Uint) Size() uint64 { return uint64(len(u.buf)) } - -func (u *Uint) Read(p []byte) (n int, err error) { - if u.off >= len(u.buf) { - if len(p) == 0 { - return - } - return 0, io.EOF - } - n = copy(p, u.buf[u.off:]) - u.off += n - return -} - -func NewUint(id []byte, v interface{}) *Uint { - var x uint64 - switch V := v.(type) { - case uint: - x = uint64(V) - case uint32: - x = uint64(V) - case uint64: - x = uint64(V) - default: - return nil - } - - var i int - switch { - case x < 0xFF: - i = 1 - case x < 0xFFFF: - i = 2 - case x < 0xFFFFFF: - i = 3 - case x < 0xFFFFFFFF: - i = 4 - case x < 0xFFFFFFFFFF: - i = 5 - case x < 0xFFFFFFFFFFFF: - i = 6 - case x < 0xFFFFFFFFFFFFFF: - i = 7 - default: - i = 8 - } - - j := len(id) + 1 + i - b := make([]byte, j) - k := copy(b, id) - k += copy(b[k:], MarshallSize(uint64(i))) - - for j > k { - j-- - b[j] = byte(x) - x >>= 8 - } - b[j] = byte(i) - return &Uint{id, b, 0} -} - -type String struct { - id []byte - buf []byte - off int -} - -func (s *String) ID() []byte { return s.id } - -func (s *String) Size() uint64 { return uint64(len(s.buf)) } - -func (s *String) Read(p []byte) (n int, err error) { - if s.off >= len(s.buf) { - if len(p) == 0 { - return - } - return 0, io.EOF - } - n = copy(p, s.buf[s.off:]) - s.off += n - return -} - -func NewString(id []byte, s string) *String { - sb := []byte(s) - sz := MarshallSize(uint64(len(sb))) - buf := make([]byte, len(id)+len(sz)+len(sb)) - - n := copy(buf, id) - n += copy(buf[n:], sz) - copy(buf[n:], sb) - - return &String{id, buf, 0} -}