最近在弄一些 RSS 解析的东西,记录一下如何解析非UTF-8编码 XML 文档,直接上代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
| package rss_test
import ( "bytes" "encoding/xml" "fmt" "io" "testing"
"github.com/yujiahaol68/rossy/rss" "golang.org/x/net/html/charset" )
func Test_notUTF8(t *testing.T) { r := rss.New()
d := xml.NewDecoder(bytes.NewReader([]byte(notUTF8rss))) d.CharsetReader = func(s string, reader io.Reader) (io.Reader, error) { return charset.NewReader(reader, s) } err := d.Decode(r)
if err != nil { t.Fatal(err) }
for _, item := range r.ItemList { fmt.Printf("* %s\n%s\n", item.Title, item.Link) } }
|