[python]简单获取网页文件指定内容方法
#!/usr/bin/pythonimport urllib,HTMLParserpage_url = 'http://lists.meego.com/pipermail/meego-commits/2011-June/027331.html'#get the url raw contentpage_src_content = urllib.urlopen(page_url)page_list = list(page_src_content)def page_find(LST_NAME, LST_KEYWD, STRT_ELEMT = 0): POS_ELEMT = -1 for i in range(STRT_ELEMT,len(LST_NAME)): if LST_NAME[i].find(LST_KEYWD) >= 0: POS_ELEMT = i break return POS_ELEMT#get the line number matchs keywordskeywd_line = page_find(page_list, 'changes files:\n')#print keywd_line#get the end line number of the match keywordsend_line = page_list[keywd_line:].index('\n')#print end_linefor i in range (keywd_line, keywd_line + end_line - 1): print page_list[i]