From 592ed236387d6f3082ac86f644c75325a7a4cc60 Mon Sep 17 00:00:00 2001 From: xdrm-brackets Date: Tue, 25 Sep 2018 21:22:25 +0200 Subject: [PATCH] update multipart parser + tests --- internal/apirequest/dataset.go | 12 +- internal/multipart/component.go | 125 +++++++++++++++++ internal/multipart/private.go | 84 ------------ internal/multipart/public.go | 67 ---------- internal/multipart/reader.go | 87 ++++++++++++ internal/multipart/reader_test.go | 215 ++++++++++++++++++++++++++++++ internal/multipart/types.go | 36 +++-- 7 files changed, 459 insertions(+), 167 deletions(-) create mode 100644 internal/multipart/component.go delete mode 100644 internal/multipart/private.go delete mode 100644 internal/multipart/public.go create mode 100644 internal/multipart/reader.go create mode 100644 internal/multipart/reader_test.go diff --git a/internal/apirequest/dataset.go b/internal/apirequest/dataset.go index 7429cf6..8c24f46 100644 --- a/internal/apirequest/dataset.go +++ b/internal/apirequest/dataset.go @@ -181,13 +181,17 @@ func (i *DataSet) parseUrlencoded(req *http.Request) { func (i *DataSet) parseMultipart(req *http.Request) { /* (1) Create reader */ - mpr := multipart.CreateReader(req) + boundary := req.Header.Get("Content-Type")[len("multipart/form-data; boundary="):] + mpr, err := multipart.NewReader(req.Body, boundary) + if err != nil { + return + } /* (2) Parse multipart */ mpr.Parse() /* (3) Store data into 'Form' and 'Set */ - for name, component := range mpr.Components { + for name, data := range mpr.Data { // prevent injections if nameInjection(name) { @@ -198,8 +202,8 @@ func (i *DataSet) parseMultipart(req *http.Request) { // store value in 'Set' i.Set[name] = &Parameter{ Parsed: false, - File: component.File, - Value: component.Data, + File: len(data.GetHeader("filename")) > 0, + Value: string(data.Data), } // create link in 'Form' diff --git a/internal/multipart/component.go b/internal/multipart/component.go new file mode 100644 index 0000000..c1f01a3 --- /dev/null +++ b/internal/multipart/component.go @@ -0,0 +1,125 @@ +package multipart + +import ( + "bufio" + "strings" +) + +func (comp *Component) parseHeaders(_raw []byte) error { + + // 1. Extract lines + _lines := strings.Split(string(_raw), "\n") + if len(_lines) < 2 { + return ErrNoHeader + } + + // 2. trim each line + remove 'Content-Disposition' prefix + trimmed := strings.Trim(_lines[0], " \t") + header := trimmed + + if !strings.HasPrefix(trimmed, "Content-Disposition: form-data;") { + return ErrNoHeader + } + header = strings.Trim(trimmed[len("Content-Disposition: form-data;"):], " \t") + + if len(header) < 1 { + return ErrNoHeader + } + + // 3. Extract each key-value pair + pairs := strings.Split(header, "; ") + + // 4. extract each pair + for _, p := range pairs { + pair := strings.Split(p, "=") + + // ignore invalid pairs + if len(pair) != 2 || len(pair[1]) < 1 { + continue + } + + key := strings.Trim(pair[0], " \t\r\n") + value := strings.Trim(strings.Trim(pair[1], " \t\r\n"), `"`) + + if _, keyExists := comp.Headers[key]; !keyExists { + comp.Headers[key] = value + } + + } + + // 5. Extract content-type if set on the second line + for _, l := range _lines[1:] { + + if strings.HasPrefix(l, "Content-Type: ") { + comp.ContentType = strings.Trim(l[len("Content-Type: "):], " \t") + break + } + + } + + return nil + +} + +// GetHeader returns the header value associated with a key, empty string if not found +func (comp *Component) GetHeader(_key string) string { + value, ok := comp.Headers[_key] + + if !ok { + return "" + } + + return value +} + +// read all until the next boundary is found (and parse current MultipartData) +func (comp *Component) read(_reader *bufio.Reader, _boundary string) error { + + headerRead := false + rawHeader := make([]byte, 0) + + for { // Read until boundary or error + + line, err := _reader.ReadBytes('\n') + + // 1. Stop on error + if err != nil { + // remove last CR (newline) + if string(comp.Data[len(comp.Data)-1]) == "\n" { + comp.Data = comp.Data[0 : len(comp.Data)-1] + } + return err + } + + // 2. Stop at boundary + if strings.HasPrefix(string(line), _boundary) { + + // remove last CR (newline) + if string(comp.Data[len(comp.Data)-1]) == "\n" { + comp.Data = comp.Data[0 : len(comp.Data)-1] + } + return nil + } + + // 3. Ignore empty lines + if string(line) != "\n" && len(line) > 0 { + + // add to header if not finished + if !headerRead { + rawHeader = append(rawHeader, line...) + // else add to data (body) + } else { + comp.Data = append(comp.Data, line...) + } + + } else if !headerRead { // if empty line, header has been read + headerRead = true + // rawHeader = append(rawHeader, line...) + if err := comp.parseHeaders(rawHeader); err != nil { + return err + } + } + + } + +} diff --git a/internal/multipart/private.go b/internal/multipart/private.go deleted file mode 100644 index 2dab67b..0000000 --- a/internal/multipart/private.go +++ /dev/null @@ -1,84 +0,0 @@ -package multipart - -import ( - "fmt" - "strings" -) - -// Read all until the next boundary is found -func (i *Reader) readComponent() ([]string, error) { - - component := make([]string, 0) - - for { // Read until boundary or error - - line, _, err := i.reader.ReadLine() - - /* (1) Stop on error */ - if err != nil { - return component, err - } - - /* (2) Stop at boundary */ - if strings.HasPrefix(string(line), i.boundary) { - return component, err - } - - /* (3) Ignore empty lines */ - if len(line) > 0 { - component = append(component, string(line)) - } - - } - -} - -// Parses a single component from its raw lines -func (i *Reader) parseComponent(line []string) error { - - // next line index to use - cursor := 1 - - /* (1) Fail if invalid line count */ - if len(line) < 2 { - return fmt.Errorf("Missing data to parse component") - } - - /* (2) Split meta data */ - meta := strings.Split(line[0], "; ") - - if len(meta) < 2 { - return fmt.Errorf("Missing component meta data") - } - - /* (3) Extract name */ - if !strings.HasPrefix(meta[1], `name="`) { - return fmt.Errorf("Cannot extract component name") - } - name := meta[1][len(`name="`) : len(meta[1])-1] - - /* (4) Check if it is a file */ - isFile := len(meta) > 2 && strings.HasPrefix(meta[2], `filename="`) - - // skip next line (Content-Type) if file - if isFile { - cursor++ - } - - /* (5) Create index if name not already used */ - already, isset := i.Components[name] - if !isset { - - i.Components[name] = &Component{ - File: isFile, - Data: make([]string, 0), - } - already = i.Components[name] - - } - - /* (6) Store new value */ - already.Data = append(already.Data, strings.Join(line[cursor:], "\n")) - - return nil -} diff --git a/internal/multipart/public.go b/internal/multipart/public.go deleted file mode 100644 index 17fd6fa..0000000 --- a/internal/multipart/public.go +++ /dev/null @@ -1,67 +0,0 @@ -package multipart - -import ( - "bufio" - "fmt" - "io" - "log" - "net/http" -) - -// CreateReader creates a new multipart reader from an http.Request -func CreateReader(req *http.Request) *Reader { - - /* (1) extract boundary */ - boundary := req.Header.Get("Content-Type")[len("multipart/form-data; boundary="):] - boundary = fmt.Sprintf("--%s", boundary) - - /* (2) init reader */ - i := &Reader{ - reader: bufio.NewReader(req.Body), - boundary: boundary, - Components: make(map[string]*Component), - } - - /* (3) Place reader cursor after first boundary */ - var ( - err error - line []byte - ) - - for err == nil && string(line) != boundary { - line, _, err = i.reader.ReadLine() - } - - return i - -} - -// Parse parses the multipart components from the request -func (i *Reader) Parse() error { - - /* (1) For each component (until boundary) */ - for { - - // 1. Read component - component, err := i.readComponent() - - // 2. Stop at EOF - if err == io.EOF { - return nil - } - - // 3. Dispatch error - if err != nil { - return err - } - - // 4. parse component - err = i.parseComponent(component) - - if err != nil { - log.Printf("%s\n", err) - } - - } - -} diff --git a/internal/multipart/reader.go b/internal/multipart/reader.go new file mode 100644 index 0000000..4a21201 --- /dev/null +++ b/internal/multipart/reader.go @@ -0,0 +1,87 @@ +package multipart + +import ( + "bufio" + "fmt" + "io" +) + +// NewReader craetes a new reader +func NewReader(_src io.Reader, _boundary string) (*Reader, error) { + + reader := &Reader{ + reader: nil, + boundary: fmt.Sprintf("--%s", _boundary), + Data: make(map[string]*Component), + } + + // 1. Create reader + dst, ok := _src.(*bufio.Reader) + if !ok { + dst = bufio.NewReader(_src) + } + reader.reader = dst + + // 2. Place reader after the first boundary + var err error + line := make([]byte, 0) + + for err == nil && string(line) != reader.boundary { + line, _, err = dst.ReadLine() + } + if err != nil { + return nil, err + } + + // 3. return reader + return reader, nil + +} + +// Parse parses the multipart components from the request +func (reader *Reader) Parse() error { + + /* (1) For each component (until boundary) */ + for { + + mpd := &Component{ + ContentType: "raw", + Data: make([]byte, 0), + Headers: make(map[string]string), + } + + // 1. Read and parse data + err := mpd.read(reader.reader, reader.boundary) + + // 3. Dispatch error + if err != nil && err != io.EOF { + return err + } + + name := mpd.GetHeader("name") + if len(name) < 1 { + return ErrMissingDataName + } + + if _, nameUsed := reader.Data[name]; nameUsed { + return ErrDataNameConflict + } + + reader.Data[name] = mpd + + if err == io.EOF { + return nil + } + + } + +} + +// Get returns a multipart data by name, nil if not found +func (reader *Reader) Get(_key string) *Component { + data, ok := reader.Data[_key] + if !ok { + return nil + } + return data +} diff --git a/internal/multipart/reader_test.go b/internal/multipart/reader_test.go new file mode 100644 index 0000000..641a10b --- /dev/null +++ b/internal/multipart/reader_test.go @@ -0,0 +1,215 @@ +package multipart + +import ( + "bytes" + "testing" +) + +func TestSimple(t *testing.T) { + test := struct { + Input []byte + Boundary string + Length int + }{ + Input: []byte(`--BoUnDaRy +Content-Disposition: form-data; name="somevar" + +google.com +--BoUnDaRy +Content-Disposition: form-data; name="somefile"; filename="somefilename.pdf" +Content-Type: application/pdf + +facebook.com +--BoUnDaRy--`), + Boundary: "BoUnDaRy", + } + + mpr, err := NewReader(bytes.NewReader(test.Input), test.Boundary) + + if err != nil { + t.Fatalf("Unexpected error <%s>", err) + } + + if err = mpr.Parse(); err != nil { + t.Fatalf("Unexpected error <%s>", err) + } + + // 1. Check var + somevar := mpr.Get("somevar") + if somevar == nil { + t.Fatalf("Expected data {%s} to exist", "somevar") + } + if somevar.ContentType != "raw" { + t.Fatalf("Expected ContentType to be {raw}, got {%s}", somevar.ContentType) + } + + if string(somevar.Data) != "google.com" { + t.Fatalf("Expected data to be {%s}, got {%s}", "google.com", somevar.Data) + } + + // 2. Check file + somefile := mpr.Get("somefile") + if somefile == nil { + t.Fatalf("Expected data {%s} to exist", "somefile") + } + if somefile.ContentType != "application/pdf" { + t.Fatalf("Expected ContentType to be {application/pdf}, got {%s}", somevar.ContentType) + } + + if string(somefile.Data) != "facebook.com" { + t.Fatalf("Expected data to be {%s}, got {%s}", "facebook.com", somefile.Data) + } + + filename := somefile.GetHeader("filename") + if len(filename) < 1 { + t.Fatalf("Expected data to have header 'filename'") + } + if filename != "somefilename.pdf" { + t.Fatalf("Expected filename to be {%s}, got {%s}", "somefilename.pdf", filename) + } + +} + +func TestNoName(t *testing.T) { + tests := []struct { + Input []byte + Boundary string + Length int + }{ + { + Input: []byte("--BoUnDaRy\nContent-Disposition: form-data; xname=\"somevar\"\n\ngoogle.com\n--BoUnDaRy--"), + Boundary: "BoUnDaRy", + }, + { + Input: []byte("--BoUnDaRy\nContent-Disposition: form-data; name=\"\"\n\ngoogle.com\n--BoUnDaRy--"), + Boundary: "BoUnDaRy", + }, + { + Input: []byte("--BoUnDaRy\nContent-Disposition: form-data; name=\n\ngoogle.com\n--BoUnDaRy--"), + Boundary: "BoUnDaRy", + }, + { + Input: []byte("--BoUnDaRy\nContent-Disposition: form-data; name\n\ngoogle.com\n--BoUnDaRy--"), + Boundary: "BoUnDaRy", + }, + } + + for i, test := range tests { + + mpr, err := NewReader(bytes.NewReader(test.Input), test.Boundary) + + if err != nil { + t.Errorf("(%d) Unexpected error <%s>", i, err) + continue + } + + if err = mpr.Parse(); err != ErrMissingDataName { + t.Errorf("(%d) Expected the error <%s>, got <%s>", i, ErrMissingDataName, err) + continue + } + + } + +} + +func TestNoHeader(t *testing.T) { + tests := []struct { + Input []byte + Boundary string + Length int + }{ + { + Input: []byte("--BoUnDaRy\n\ngoogle.com\n--BoUnDaRy--"), + Boundary: "BoUnDaRy", + }, + { + Input: []byte("--BoUnDaRy\nContent-Disposition: false;\n\ngoogle.com\n--BoUnDaRy--"), + Boundary: "BoUnDaRy", + }, + { + Input: []byte("--BoUnDaRy\nContent-Disposition: form-data;\n\ngoogle.com\n--BoUnDaRy--"), + Boundary: "BoUnDaRy", + }, + } + + for i, test := range tests { + + mpr, err := NewReader(bytes.NewReader(test.Input), test.Boundary) + + if err != nil { + t.Errorf("(%d) Unexpected error <%s>", i, err) + continue + } + + if err = mpr.Parse(); err != ErrNoHeader { + t.Errorf("(%d) Expected the error <%s>, got <%s>", i, ErrNoHeader, err) + continue + } + + } + +} + +func TestNameConflict(t *testing.T) { + test := struct { + Input []byte + Boundary string + Length int + }{ + Input: []byte(`--BoUnDaRy +Content-Disposition: form-data; name="var1" + +google.com +--BoUnDaRy +Content-Disposition: form-data; name="var1" + +facebook.com +--BoUnDaRy--`), + Boundary: "BoUnDaRy", + } + + mpr, err := NewReader(bytes.NewReader(test.Input), test.Boundary) + + if err != nil { + t.Fatalf("Unexpected error <%s>", err) + } + + if err = mpr.Parse(); err != ErrDataNameConflict { + t.Fatalf("Expected the error <%s>, got <%s>", ErrDataNameConflict, err) + } + +} + +func TestGetterNil(t *testing.T) { + test := struct { + Input []byte + Boundary string + Length int + }{ + Input: []byte(`--BoUnDaRy +Content-Disposition: form-data; name="var1" + +google.com +--BoUnDaRy +Content-Disposition: form-data; name="var2" + +facebook.com +--BoUnDaRy--`), + Boundary: "BoUnDaRy", + } + + mpr, err := NewReader(bytes.NewReader(test.Input), test.Boundary) + + if err != nil { + t.Fatalf("Unexpected error <%s>", err) + } + + if err = mpr.Parse(); err != nil { + t.Fatalf("Unexpected error <%s>", err) + } + + if mpr.Get("unknown_key") != nil { + t.Fatalf("Expected 'unknown_key' not to exist, got {%v}", mpr.Get("unknown_key")) + } + +} diff --git a/internal/multipart/types.go b/internal/multipart/types.go index ea10f23..78c291d 100644 --- a/internal/multipart/types.go +++ b/internal/multipart/types.go @@ -2,26 +2,38 @@ package multipart import ( "bufio" + "errors" ) +// ErrMissingDataName is set when a multipart variable/file has no name="..." +var ErrMissingDataName = errors.New("data has no name") + +// ErrDataNameConflict is set when a multipart variable/file name is already used +var ErrDataNameConflict = errors.New("data name conflict") + +// ErrNoHeader is set when a multipart variable/file has no (valid) header +var ErrNoHeader = errors.New("data has no header") + +// Component represents a multipart variable/file +type Component struct { + // Content Type (raw for variables ; exported from files) + ContentType string + + // data headers + Headers map[string]string + + // actual data + Data []byte +} + // Reader represents a multipart reader type Reader struct { // reader used for http.Request.Body reading reader *bufio.Reader - // boundary used to separate multipart components + // boundary used to separate multipart MultipartDatas boundary string // result will be inside this field - Components map[string]*Component -} - -// Component represents a multipart component -type Component struct { - // whether this component is a file - // if not, it is a simple variable data - File bool - - // actual data - Data []string + Data map[string]*Component }