关于 c#网页编码判断
小弟最近刚开始接触c#网络编程
做到网页获取的时候,用StreamReader逐行读取,检测"charset"的方式获取编码
然后重新初始化一个StreamReader进行读取的时候 发现读取位置比sr.ReadLine读到的位置还要往下移动了几行
C# 编码 String UTF-8 网页编码
string line;
using (Stream st = wr.GetResponseStream())
{
StreamReader sr = new StreamReader(st);
while ((line = sr.ReadLine()) != null)
{
if (line.Contains("charset="))
{
string nowEncoding = GetCharset(line);
if (nowEncoding != "UTF-8")
{
sr = new StreamReader(st, Encoding.GetEncoding(nowEncoding));
content = sr.ReadToEnd();
}
else
{
content = sr.ReadToEnd();
}
}
}
}
var request = HttpWebRequest.Create("http://www.amazon.co.jp/");
using (var response = request.GetResponse())
using (var stream = response.GetResponseStream())
{
Regex regex = new Regex(@"(?<=charset=)\w+");
var sr = new StreamReader(
stream,
Encoding.GetEncoding(regex.Match(response.ContentType).Value));
Console.WriteLine(sr.ReadToEnd());
}