update multipart parser + tests

This commit is contained in:
Adrien Marquès 2018-09-25 21:22:25 +02:00
parent f76e3d35fa
commit 592ed23638
7 changed files with 459 additions and 167 deletions

View File

@ -181,13 +181,17 @@ func (i *DataSet) parseUrlencoded(req *http.Request) {
func (i *DataSet) parseMultipart(req *http.Request) { func (i *DataSet) parseMultipart(req *http.Request) {
/* (1) Create reader */ /* (1) Create reader */
mpr := multipart.CreateReader(req) boundary := req.Header.Get("Content-Type")[len("multipart/form-data; boundary="):]
mpr, err := multipart.NewReader(req.Body, boundary)
if err != nil {
return
}
/* (2) Parse multipart */ /* (2) Parse multipart */
mpr.Parse() mpr.Parse()
/* (3) Store data into 'Form' and 'Set */ /* (3) Store data into 'Form' and 'Set */
for name, component := range mpr.Components { for name, data := range mpr.Data {
// prevent injections // prevent injections
if nameInjection(name) { if nameInjection(name) {
@ -198,8 +202,8 @@ func (i *DataSet) parseMultipart(req *http.Request) {
// store value in 'Set' // store value in 'Set'
i.Set[name] = &Parameter{ i.Set[name] = &Parameter{
Parsed: false, Parsed: false,
File: component.File, File: len(data.GetHeader("filename")) > 0,
Value: component.Data, Value: string(data.Data),
} }
// create link in 'Form' // create link in 'Form'

View File

@ -0,0 +1,125 @@
package multipart
import (
"bufio"
"strings"
)
func (comp *Component) parseHeaders(_raw []byte) error {
// 1. Extract lines
_lines := strings.Split(string(_raw), "\n")
if len(_lines) < 2 {
return ErrNoHeader
}
// 2. trim each line + remove 'Content-Disposition' prefix
trimmed := strings.Trim(_lines[0], " \t")
header := trimmed
if !strings.HasPrefix(trimmed, "Content-Disposition: form-data;") {
return ErrNoHeader
}
header = strings.Trim(trimmed[len("Content-Disposition: form-data;"):], " \t")
if len(header) < 1 {
return ErrNoHeader
}
// 3. Extract each key-value pair
pairs := strings.Split(header, "; ")
// 4. extract each pair
for _, p := range pairs {
pair := strings.Split(p, "=")
// ignore invalid pairs
if len(pair) != 2 || len(pair[1]) < 1 {
continue
}
key := strings.Trim(pair[0], " \t\r\n")
value := strings.Trim(strings.Trim(pair[1], " \t\r\n"), `"`)
if _, keyExists := comp.Headers[key]; !keyExists {
comp.Headers[key] = value
}
}
// 5. Extract content-type if set on the second line
for _, l := range _lines[1:] {
if strings.HasPrefix(l, "Content-Type: ") {
comp.ContentType = strings.Trim(l[len("Content-Type: "):], " \t")
break
}
}
return nil
}
// GetHeader returns the header value associated with a key, empty string if not found
func (comp *Component) GetHeader(_key string) string {
value, ok := comp.Headers[_key]
if !ok {
return ""
}
return value
}
// read all until the next boundary is found (and parse current MultipartData)
func (comp *Component) read(_reader *bufio.Reader, _boundary string) error {
headerRead := false
rawHeader := make([]byte, 0)
for { // Read until boundary or error
line, err := _reader.ReadBytes('\n')
// 1. Stop on error
if err != nil {
// remove last CR (newline)
if string(comp.Data[len(comp.Data)-1]) == "\n" {
comp.Data = comp.Data[0 : len(comp.Data)-1]
}
return err
}
// 2. Stop at boundary
if strings.HasPrefix(string(line), _boundary) {
// remove last CR (newline)
if string(comp.Data[len(comp.Data)-1]) == "\n" {
comp.Data = comp.Data[0 : len(comp.Data)-1]
}
return nil
}
// 3. Ignore empty lines
if string(line) != "\n" && len(line) > 0 {
// add to header if not finished
if !headerRead {
rawHeader = append(rawHeader, line...)
// else add to data (body)
} else {
comp.Data = append(comp.Data, line...)
}
} else if !headerRead { // if empty line, header has been read
headerRead = true
// rawHeader = append(rawHeader, line...)
if err := comp.parseHeaders(rawHeader); err != nil {
return err
}
}
}
}

View File

@ -1,84 +0,0 @@
package multipart
import (
"fmt"
"strings"
)
// Read all until the next boundary is found
func (i *Reader) readComponent() ([]string, error) {
component := make([]string, 0)
for { // Read until boundary or error
line, _, err := i.reader.ReadLine()
/* (1) Stop on error */
if err != nil {
return component, err
}
/* (2) Stop at boundary */
if strings.HasPrefix(string(line), i.boundary) {
return component, err
}
/* (3) Ignore empty lines */
if len(line) > 0 {
component = append(component, string(line))
}
}
}
// Parses a single component from its raw lines
func (i *Reader) parseComponent(line []string) error {
// next line index to use
cursor := 1
/* (1) Fail if invalid line count */
if len(line) < 2 {
return fmt.Errorf("Missing data to parse component")
}
/* (2) Split meta data */
meta := strings.Split(line[0], "; ")
if len(meta) < 2 {
return fmt.Errorf("Missing component meta data")
}
/* (3) Extract name */
if !strings.HasPrefix(meta[1], `name="`) {
return fmt.Errorf("Cannot extract component name")
}
name := meta[1][len(`name="`) : len(meta[1])-1]
/* (4) Check if it is a file */
isFile := len(meta) > 2 && strings.HasPrefix(meta[2], `filename="`)
// skip next line (Content-Type) if file
if isFile {
cursor++
}
/* (5) Create index if name not already used */
already, isset := i.Components[name]
if !isset {
i.Components[name] = &Component{
File: isFile,
Data: make([]string, 0),
}
already = i.Components[name]
}
/* (6) Store new value */
already.Data = append(already.Data, strings.Join(line[cursor:], "\n"))
return nil
}

View File

@ -1,67 +0,0 @@
package multipart
import (
"bufio"
"fmt"
"io"
"log"
"net/http"
)
// CreateReader creates a new multipart reader from an http.Request
func CreateReader(req *http.Request) *Reader {
/* (1) extract boundary */
boundary := req.Header.Get("Content-Type")[len("multipart/form-data; boundary="):]
boundary = fmt.Sprintf("--%s", boundary)
/* (2) init reader */
i := &Reader{
reader: bufio.NewReader(req.Body),
boundary: boundary,
Components: make(map[string]*Component),
}
/* (3) Place reader cursor after first boundary */
var (
err error
line []byte
)
for err == nil && string(line) != boundary {
line, _, err = i.reader.ReadLine()
}
return i
}
// Parse parses the multipart components from the request
func (i *Reader) Parse() error {
/* (1) For each component (until boundary) */
for {
// 1. Read component
component, err := i.readComponent()
// 2. Stop at EOF
if err == io.EOF {
return nil
}
// 3. Dispatch error
if err != nil {
return err
}
// 4. parse component
err = i.parseComponent(component)
if err != nil {
log.Printf("%s\n", err)
}
}
}

View File

@ -0,0 +1,87 @@
package multipart
import (
"bufio"
"fmt"
"io"
)
// NewReader craetes a new reader
func NewReader(_src io.Reader, _boundary string) (*Reader, error) {
reader := &Reader{
reader: nil,
boundary: fmt.Sprintf("--%s", _boundary),
Data: make(map[string]*Component),
}
// 1. Create reader
dst, ok := _src.(*bufio.Reader)
if !ok {
dst = bufio.NewReader(_src)
}
reader.reader = dst
// 2. Place reader after the first boundary
var err error
line := make([]byte, 0)
for err == nil && string(line) != reader.boundary {
line, _, err = dst.ReadLine()
}
if err != nil {
return nil, err
}
// 3. return reader
return reader, nil
}
// Parse parses the multipart components from the request
func (reader *Reader) Parse() error {
/* (1) For each component (until boundary) */
for {
mpd := &Component{
ContentType: "raw",
Data: make([]byte, 0),
Headers: make(map[string]string),
}
// 1. Read and parse data
err := mpd.read(reader.reader, reader.boundary)
// 3. Dispatch error
if err != nil && err != io.EOF {
return err
}
name := mpd.GetHeader("name")
if len(name) < 1 {
return ErrMissingDataName
}
if _, nameUsed := reader.Data[name]; nameUsed {
return ErrDataNameConflict
}
reader.Data[name] = mpd
if err == io.EOF {
return nil
}
}
}
// Get returns a multipart data by name, nil if not found
func (reader *Reader) Get(_key string) *Component {
data, ok := reader.Data[_key]
if !ok {
return nil
}
return data
}

View File

@ -0,0 +1,215 @@
package multipart
import (
"bytes"
"testing"
)
func TestSimple(t *testing.T) {
test := struct {
Input []byte
Boundary string
Length int
}{
Input: []byte(`--BoUnDaRy
Content-Disposition: form-data; name="somevar"
google.com
--BoUnDaRy
Content-Disposition: form-data; name="somefile"; filename="somefilename.pdf"
Content-Type: application/pdf
facebook.com
--BoUnDaRy--`),
Boundary: "BoUnDaRy",
}
mpr, err := NewReader(bytes.NewReader(test.Input), test.Boundary)
if err != nil {
t.Fatalf("Unexpected error <%s>", err)
}
if err = mpr.Parse(); err != nil {
t.Fatalf("Unexpected error <%s>", err)
}
// 1. Check var
somevar := mpr.Get("somevar")
if somevar == nil {
t.Fatalf("Expected data {%s} to exist", "somevar")
}
if somevar.ContentType != "raw" {
t.Fatalf("Expected ContentType to be {raw}, got {%s}", somevar.ContentType)
}
if string(somevar.Data) != "google.com" {
t.Fatalf("Expected data to be {%s}, got {%s}", "google.com", somevar.Data)
}
// 2. Check file
somefile := mpr.Get("somefile")
if somefile == nil {
t.Fatalf("Expected data {%s} to exist", "somefile")
}
if somefile.ContentType != "application/pdf" {
t.Fatalf("Expected ContentType to be {application/pdf}, got {%s}", somevar.ContentType)
}
if string(somefile.Data) != "facebook.com" {
t.Fatalf("Expected data to be {%s}, got {%s}", "facebook.com", somefile.Data)
}
filename := somefile.GetHeader("filename")
if len(filename) < 1 {
t.Fatalf("Expected data to have header 'filename'")
}
if filename != "somefilename.pdf" {
t.Fatalf("Expected filename to be {%s}, got {%s}", "somefilename.pdf", filename)
}
}
func TestNoName(t *testing.T) {
tests := []struct {
Input []byte
Boundary string
Length int
}{
{
Input: []byte("--BoUnDaRy\nContent-Disposition: form-data; xname=\"somevar\"\n\ngoogle.com\n--BoUnDaRy--"),
Boundary: "BoUnDaRy",
},
{
Input: []byte("--BoUnDaRy\nContent-Disposition: form-data; name=\"\"\n\ngoogle.com\n--BoUnDaRy--"),
Boundary: "BoUnDaRy",
},
{
Input: []byte("--BoUnDaRy\nContent-Disposition: form-data; name=\n\ngoogle.com\n--BoUnDaRy--"),
Boundary: "BoUnDaRy",
},
{
Input: []byte("--BoUnDaRy\nContent-Disposition: form-data; name\n\ngoogle.com\n--BoUnDaRy--"),
Boundary: "BoUnDaRy",
},
}
for i, test := range tests {
mpr, err := NewReader(bytes.NewReader(test.Input), test.Boundary)
if err != nil {
t.Errorf("(%d) Unexpected error <%s>", i, err)
continue
}
if err = mpr.Parse(); err != ErrMissingDataName {
t.Errorf("(%d) Expected the error <%s>, got <%s>", i, ErrMissingDataName, err)
continue
}
}
}
func TestNoHeader(t *testing.T) {
tests := []struct {
Input []byte
Boundary string
Length int
}{
{
Input: []byte("--BoUnDaRy\n\ngoogle.com\n--BoUnDaRy--"),
Boundary: "BoUnDaRy",
},
{
Input: []byte("--BoUnDaRy\nContent-Disposition: false;\n\ngoogle.com\n--BoUnDaRy--"),
Boundary: "BoUnDaRy",
},
{
Input: []byte("--BoUnDaRy\nContent-Disposition: form-data;\n\ngoogle.com\n--BoUnDaRy--"),
Boundary: "BoUnDaRy",
},
}
for i, test := range tests {
mpr, err := NewReader(bytes.NewReader(test.Input), test.Boundary)
if err != nil {
t.Errorf("(%d) Unexpected error <%s>", i, err)
continue
}
if err = mpr.Parse(); err != ErrNoHeader {
t.Errorf("(%d) Expected the error <%s>, got <%s>", i, ErrNoHeader, err)
continue
}
}
}
func TestNameConflict(t *testing.T) {
test := struct {
Input []byte
Boundary string
Length int
}{
Input: []byte(`--BoUnDaRy
Content-Disposition: form-data; name="var1"
google.com
--BoUnDaRy
Content-Disposition: form-data; name="var1"
facebook.com
--BoUnDaRy--`),
Boundary: "BoUnDaRy",
}
mpr, err := NewReader(bytes.NewReader(test.Input), test.Boundary)
if err != nil {
t.Fatalf("Unexpected error <%s>", err)
}
if err = mpr.Parse(); err != ErrDataNameConflict {
t.Fatalf("Expected the error <%s>, got <%s>", ErrDataNameConflict, err)
}
}
func TestGetterNil(t *testing.T) {
test := struct {
Input []byte
Boundary string
Length int
}{
Input: []byte(`--BoUnDaRy
Content-Disposition: form-data; name="var1"
google.com
--BoUnDaRy
Content-Disposition: form-data; name="var2"
facebook.com
--BoUnDaRy--`),
Boundary: "BoUnDaRy",
}
mpr, err := NewReader(bytes.NewReader(test.Input), test.Boundary)
if err != nil {
t.Fatalf("Unexpected error <%s>", err)
}
if err = mpr.Parse(); err != nil {
t.Fatalf("Unexpected error <%s>", err)
}
if mpr.Get("unknown_key") != nil {
t.Fatalf("Expected 'unknown_key' not to exist, got {%v}", mpr.Get("unknown_key"))
}
}

View File

@ -2,26 +2,38 @@ package multipart
import ( import (
"bufio" "bufio"
"errors"
) )
// ErrMissingDataName is set when a multipart variable/file has no name="..."
var ErrMissingDataName = errors.New("data has no name")
// ErrDataNameConflict is set when a multipart variable/file name is already used
var ErrDataNameConflict = errors.New("data name conflict")
// ErrNoHeader is set when a multipart variable/file has no (valid) header
var ErrNoHeader = errors.New("data has no header")
// Component represents a multipart variable/file
type Component struct {
// Content Type (raw for variables ; exported from files)
ContentType string
// data headers
Headers map[string]string
// actual data
Data []byte
}
// Reader represents a multipart reader // Reader represents a multipart reader
type Reader struct { type Reader struct {
// reader used for http.Request.Body reading // reader used for http.Request.Body reading
reader *bufio.Reader reader *bufio.Reader
// boundary used to separate multipart components // boundary used to separate multipart MultipartDatas
boundary string boundary string
// result will be inside this field // result will be inside this field
Components map[string]*Component Data map[string]*Component
}
// Component represents a multipart component
type Component struct {
// whether this component is a file
// if not, it is a simple variable data
File bool
// actual data
Data []string
} }