网页源代码抓取工具( 网络爬虫java实现抓取邮箱邮箱的小工具,你知道吗? )
优采云 发布时间: 2022-04-12 02:35网页源代码抓取工具(
网络爬虫java实现抓取邮箱邮箱的小工具,你知道吗?
)
简单的webmail抓包工具(附源码)
为了使他们的数据库足够强大,网络爬虫和搜索引擎不分昼夜地在互联网上搜索信息,以使他们的信息更加全面。我们都知道,互联网上的信息是无限的、爆炸式的增长。他们不可能手动获取信息。他们编写小程序不断获取互联网上的信息,因此网络爬虫诞生了。
下面我实现了一个简单的java抓取邮箱的小工具,很粗糙,仅供大家参考。
这是渲染图
不说什么,直接上代码
<p>
import java.awt.BorderLayout;
import java.awt.Dimension;
import java.awt.Image;
import java.awt.MenuItem;
import java.awt.PopupMenu;
import java.awt.Toolkit;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.awt.event.WindowAdapter;
import java.awt.event.WindowEvent;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.swing.ImageIcon;
import javax.swing.JButton;
import javax.swing.JComboBox;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
import javax.swing.UIManager;
import javax.swing.UnsupportedLookAndFeelException;
public class MainFrm extends JFrame implements ActionListener {
private static final long serialVersionUID = 1L;
static int count=1;
static int countUrl=1;
JFrame frame;
JButton b1;
JButton b2;
JTextArea t1;
JTextField tf;
JPanel panel;
JScrollPane jScrollPane1;
JLabel label;
JComboBox comb;
PopupMenu pm;
List t = new ArrayList();
static int m = 0;
MainFrm into() {
pm = new PopupMenu();
MenuItem openItem = new MenuItem("1.打 开");
MenuItem closeItem = new MenuItem("2.退 出");
MenuItem aboutItem = new MenuItem("3.关 于");
openItem.addActionListener(this);
closeItem.addActionListener(this);
aboutItem.addActionListener(this);
pm.add(openItem);
pm.add(closeItem);
pm.add(aboutItem);
String[] petStrings = { "Baidu", "Google", "Yahoo", "Bing", "Sogou" };
comb = new JComboBox(petStrings);
java.net.URL imgURL = MainFrm.class.getResource("mail.png");
ImageIcon imageicon = new ImageIcon(imgURL);
panel = new JPanel();
tf = new JTextField(50);
tf.setText("留下邮箱");
label = new JLabel("关键字:");
frame = new JFrame("邮箱抓取(注:抓取深度暂时默认为2) QQ:三二八二四七六七六");
frame.setIconImage(imageicon.getImage());
b1 = new JButton("提取邮箱");
b1.addActionListener(this);
b2 = new JButton("停止抓取");
b2.addActionListener(this);
t1 = new JTextArea();
t1.setLineWrap(true);
jScrollPane1 = new JScrollPane(t1);
jScrollPane1.setPreferredSize(new Dimension(200, 200));
this.setDefaultCloseOperation(DO_NOTHING_ON_CLOSE);
frame.addWindowListener(new WindowAdapter() { // 窗口关闭事件
public void windowClosing(WindowEvent e) {
System.exit(0);
};
public void windowIconified(WindowEvent e) { // 窗口最小化事件
frame.setVisible(false);
systemTray();
}
});
panel.add(label);
panel.add(tf);
panel.add(comb);
panel.add(b1);
panel.add(b2);
frame.getContentPane().add(panel, BorderLayout.NORTH);
frame.getContentPane().add(jScrollPane1, BorderLayout.CENTER);
frame.setSize(300, 400);
frame.pack();
frame.setVisible(true);
Dimension winSize = Toolkit.getDefaultToolkit().getScreenSize();
frame.setLocation((winSize.width - frame.getWidth()) / 2,
(winSize.height - frame.getHeight()) / 2);
frame.setAlwaysOnTop(true);
return this;
}
public static void main(String[] args) throws ClassNotFoundException,
InstantiationException, IllegalAccessException,
UnsupportedLookAndFeelException {
// TODO Auto-generated method stub
UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
new MainFrm().into().systemTray();
}
@SuppressWarnings({ "unchecked", "deprecation", "static-access" })
@Override
public void actionPerformed(ActionEvent e) {
if ("提取邮箱".equals(e.getActionCommand())) {
count=1;
t1.setText("");
// get("http://dzh.mop.com/whbm/20060109/4/lSgg8I6063c68aS3.shtml");
String http = "";
int combo = (comb.getSelectedIndex());
switch (combo) {
case 0:
http = "http://www.baidu.com/s?wd=";
break;
case 1:
http = "http://www.google.com.hk/search?num=50&q=";
break;
case 2:
http = "http://www.yahoo.cn/s?q=";
break;
case 3:
http = "http://cn.bing.com/search?q=";
break;
case 4:
http = "http://www.sogou.com/web?query=";
break;
default:
http = "http://www.baidu.com/s?wd=";
break;
}
final List list = get(http + tf.getText());
m = list.size();
for (int i = 0, n = list.size(); i < n; i++) {
final Map map = list.get(i);
Thread tt = new Thread() {
public void run() {
Iterator iterator = map.values().iterator();
while (iterator.hasNext()) {
String u=iterator.next();
get(u);
}
}
};
t.add(tt);
tt.start();
}
} else if ("终止抓取".equals(e.getActionCommand())) {
for (int i = 0; i < t.size(); i++) {
t.get(i).stop();
}
} else if ("1.打 开".equals(e.getActionCommand())) {
frame.setVisible(true);
frame.setExtendedState(frame.NORMAL);
} else if ("2.退 出".equals(e.getActionCommand())) {
System.exit(0);
}else if ("3.关 于".equals(e.getActionCommand())) {
JOptionPane.showMessageDialog(null, "本程序仅供初学参考 QQ:三二八二四七六七六");
}
}
@SuppressWarnings("unchecked")
public List get(String urlStr) {
List list = new ArrayList();
try {
URL url = new URL(urlStr);
URLConnection rulConnection = url.openConnection();
HttpURLConnection httpUrlConnection = (HttpURLConnection) rulConnection;
httpUrlConnection.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
BufferedReader br = new BufferedReader(new InputStreamReader(
httpUrlConnection.getInputStream()));
String line = "";
while ((line = br.readLine()) != null) {
Map map = pr(line);
list.add(map);
}
} catch (FileNotFoundException e) {
//e.printStackTrace();
} catch (IOException e) {
//e.printStackTrace();
} finally {
m--;
if (m