urlopen 抓取网页灵异时间
写了一个urlopen测试小脚本,
www.baidu.com gbk 可以抓到
news.aweinan.com/lists/4 utf-8 可以抓到
news.aweinan.com 可以抓到,但是填充不进TextCtrl中
#coding=utf-8from urllib import urlopenimport wxdef collect(evnt): value = url.GetValue() webpage = urlopen(value) info = str(webpage.info()) #info = info.decode('gbk').encode('utf-8') try: value = webpage.read() except: value = "抓去失败!" #转码 try: value = value.decode('gbk').encode('utf8') except: value = value httpinfo.SetValue(info) contents.SetValue(value) app = wx.App()win = wx.Frame(None,title = u'抓去远程网址!',size=(810,400))url = wx.TextCtrl(win,pos=(10,10),size=(400,20),value="http://www.aweinan.com")open_button = wx.Button(win,pos = (420,10),label=u'打开')open_button.SetDefault()open_button.Bind(wx.EVT_BUTTON,collect)contents = wx.TextCtrl(win,size=(480,330),pos=(10,40),style = wx.TE_MULTILINE)httpinfo = wx.TextCtrl(win,size=(300,330),pos=(500,40),style = wx.TE_MULTILINE)win.Show()app.MainLoop()#! /usr/bin/env python#coding=utf-8from urllib2 import urlopenimport wxdef collect(evnt): value = url.GetValue() webpage = urlopen(value) info = str(webpage.info()) #info = info.decode('gbk').encode('utf-8') #获取页面charset charset=webpage.headers.getparam('charset') try: value = webpage.read() except: value = "抓取失败!"# #转码# try:# value = value.decode('gbk').encode('utf8')# except:# value = value httpinfo.SetValue(info.decode(charset)) contents.SetValue(value.decode(charset)) httpinfo.AppendText('current charset:'+charset+'\n')app = wx.App()win = wx.Frame(None, title = u'抓取远程网址!', size=(810, 400))url = wx.TextCtrl(win, pos=(10, 10), size=(400, 20), value="http://www.aweinan.com")open_button = wx.Button(win, pos = (420, 10), label=u'打开')open_button.SetDefault()open_button.Bind(wx.EVT_BUTTON, collect)contents = wx.TextCtrl(win, size=(480, 330), pos=(10, 40), style = wx.TE_MULTILINE)httpinfo = wx.TextCtrl(win, size=(300, 330), pos=(500, 40), style = wx.TE_MULTILINE)win.Show()app.MainLoop()