首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 其他教程 > 开源软件 >

容易的网络邮箱抓取工具(附源码)

2012-06-30 
简单的网络邮箱抓取工具(附源码)网络爬虫,搜索引擎为了让自己的数据库足够的强大,没日没夜的在网络上寻找

简单的网络邮箱抓取工具(附源码)

网络爬虫,搜索引擎为了让自己的数据库足够的强大,没日没夜的在网络上寻找信息,以使自己的信息更全面。大家都知道互联网信息是无穷的,是爆炸式的增长,他们不可能手工索取信息,他们写一小程序不停的在网络上获取信息,于是网络爬虫便产生了。

下面我用java实现了一个简单的专门抓取邮箱的小工具,做得非常粗略,仅供大家参考,

这是效果图

?

容易的网络邮箱抓取工具(附源码)

?啥也不说了直接上代码吧

?

import java.awt.BorderLayout;import java.awt.Dimension;import java.awt.Image;import java.awt.MenuItem;import java.awt.PopupMenu;import java.awt.Toolkit;import java.awt.event.ActionEvent;import java.awt.event.ActionListener;import java.awt.event.WindowAdapter;import java.awt.event.WindowEvent;import java.io.BufferedReader;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStreamReader;import java.net.HttpURLConnection;import java.net.URL;import java.net.URLConnection;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.regex.Matcher;import java.util.regex.Pattern;import javax.swing.ImageIcon;import javax.swing.JButton;import javax.swing.JComboBox;import javax.swing.JFrame;import javax.swing.JLabel;import javax.swing.JOptionPane;import javax.swing.JPanel;import javax.swing.JScrollPane;import javax.swing.JTextArea;import javax.swing.JTextField;import javax.swing.UIManager;import javax.swing.UnsupportedLookAndFeelException;/** *  * @author http://javaflex.iteye.com/ * */public class MainFrm extends JFrame implements ActionListener {private static final long serialVersionUID = 1L;static int count=1;static int countUrl=1;JFrame frame;JButton b1;JButton b2;JTextArea t1;JTextField tf;JPanel panel;JScrollPane jScrollPane1;JLabel label;JComboBox comb;PopupMenu pm;List<Thread> t = new ArrayList<Thread>();static int m = 0;MainFrm into() {pm = new PopupMenu();MenuItem openItem = new MenuItem("1.打  开");MenuItem closeItem = new MenuItem("2.退  出");MenuItem aboutItem = new MenuItem("3.关  于");openItem.addActionListener(this);closeItem.addActionListener(this);aboutItem.addActionListener(this);pm.add(openItem);pm.add(closeItem);pm.add(aboutItem);String[] petStrings = { "Baidu", "Google", "Yahoo", "Bing", "Sogou" };comb = new JComboBox(petStrings);java.net.URL imgURL = MainFrm.class.getResource("mail.png");ImageIcon imageicon = new ImageIcon(imgURL);panel = new JPanel();tf = new JTextField(50);tf.setText("留下邮箱");label = new JLabel("关键字:");frame = new JFrame("邮箱抓取(注:抓取深度暂时默认为2)  QQ:三二八二四七六七六");frame.setIconImage(imageicon.getImage());b1 = new JButton("提取邮箱");b1.addActionListener(this);b2 = new JButton("停止抓取");b2.addActionListener(this);t1 = new JTextArea();t1.setLineWrap(true);jScrollPane1 = new JScrollPane(t1);jScrollPane1.setPreferredSize(new Dimension(200, 200));  this.setDefaultCloseOperation(DO_NOTHING_ON_CLOSE);frame.addWindowListener(new WindowAdapter() { // 窗口关闭事件public void windowClosing(WindowEvent e) {System.exit(0);};public void windowIconified(WindowEvent e) { // 窗口最小化事件frame.setVisible(false);systemTray();}});panel.add(label);panel.add(tf);panel.add(comb);panel.add(b1);panel.add(b2);frame.getContentPane().add(panel, BorderLayout.NORTH);frame.getContentPane().add(jScrollPane1, BorderLayout.CENTER);frame.setSize(300, 400);frame.pack();frame.setVisible(true);Dimension winSize = Toolkit.getDefaultToolkit().getScreenSize();frame.setLocation((winSize.width - frame.getWidth()) / 2,(winSize.height - frame.getHeight()) / 2);frame.setAlwaysOnTop(true);return this;}public static void main(String[] args) throws ClassNotFoundException,InstantiationException, IllegalAccessException,UnsupportedLookAndFeelException {// TODO Auto-generated method stubUIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());new MainFrm().into().systemTray();}@SuppressWarnings({ "unchecked", "deprecation", "static-access" })@Overridepublic void actionPerformed(ActionEvent e) {if ("提取邮箱".equals(e.getActionCommand())) {count=1;t1.setText("");// get("http://dzh.mop.com/whbm/20060109/4/lSgg8I6063c68aS3.shtml");String http = "";int combo = (comb.getSelectedIndex());switch (combo) {case 0:http = "http://www.baidu.com/s?wd=";break;case 1:http = "http://www.google.com.hk/search?num=50&q=";break;case 2:http = "http://www.yahoo.cn/s?q=";break;case 3:http = "http://cn.bing.com/search?q=";break;case 4:http = "http://www.sogou.com/web?query=";break;default:http = "http://www.baidu.com/s?wd=";break;}final List<Map> list = get(http + tf.getText());m = list.size();for (int i = 0, n = list.size(); i < n; i++) {final Map map = list.get(i);Thread tt = new Thread() {public void run() {Iterator<String> iterator = map.values().iterator();while (iterator.hasNext()) {String u=iterator.next();get(u);}}};t.add(tt);tt.start();}} else if ("终止抓取".equals(e.getActionCommand())) {for (int i = 0; i < t.size(); i++) {t.get(i).stop();}} else if ("1.打  开".equals(e.getActionCommand())) {frame.setVisible(true);frame.setExtendedState(frame.NORMAL);} else if ("2.退  出".equals(e.getActionCommand())) {System.exit(0);}else if ("3.关  于".equals(e.getActionCommand())) {JOptionPane.showMessageDialog(null, "本程序仅供初学参考 QQ:三二八二四七六七六");}}@SuppressWarnings("unchecked")public List<Map> get(String urlStr) {List<Map> list = new ArrayList<Map>();try {URL url = new URL(urlStr);URLConnection rulConnection = url.openConnection();HttpURLConnection httpUrlConnection = (HttpURLConnection) rulConnection;httpUrlConnection.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");BufferedReader br = new BufferedReader(new InputStreamReader(httpUrlConnection.getInputStream()));String line = "";while ((line = br.readLine()) != null) {Map map = pr(line);list.add(map);}} catch (FileNotFoundException e) {//e.printStackTrace();} catch (IOException e) {//e.printStackTrace();} finally {m--;if (m <= 0) {// JOptionPane.showMessageDialog(null, "提取结束");}}return list;}@SuppressWarnings("unchecked")public Map pr(String aa) {Pattern pattern = Pattern.compile("[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\\.[a-zA-Z0-9_-]+)+");Pattern pattern2 = Pattern.compile("(http|ftp|https):\\/\\/[\\w\\-_]+(\\.[\\w\\-_]+)+([\\w\\-\\.,@?^=%&amp;:/~\\+#]*[\\w\\-\\@?^=%&amp;/~\\+#])?");Matcher matcher = pattern.matcher(aa);Map<String, String> emailMap = new HashMap<String, String>();Matcher matcher2 = pattern2.matcher(aa);Map<String, String> httpMap = new HashMap<String, String>();while (matcher2.find()) {httpMap.put(matcher2.group(), matcher2.group());}while (matcher.find()) {emailMap.put(matcher.group(), matcher.group());}Iterator<String> iterator = emailMap.values().iterator();while (iterator.hasNext()) {String str = iterator.next();t1.append("第"+(count++)+"个:"+str + "\r\n");}return httpMap;}public void systemTray() {try {if (java.awt.SystemTray.isSupported()) {final java.awt.SystemTray st = java.awt.SystemTray.getSystemTray();Image image = Toolkit.getDefaultToolkit().getImage(getClass().getResource("email_go.png"));final java.awt.TrayIcon ti = new java.awt.TrayIcon(image);ti.setToolTip("邮箱抓取");ti.setPopupMenu(pm);st.add(ti);}} catch (Exception e) {}}public String toString(){new MainFrm().into();return null;}}

?@author http://javaflex.iteye.com/

?

自动发送邮件的功能(待续)

?

热点排行