网页源代码抓取工具( 网络爬虫java实现抓取邮箱邮箱的小工具,你知道吗? )

优采云 发布时间: 2022-04-12 02:35

  网页源代码抓取工具(

网络爬虫java实现抓取邮箱邮箱的小工具,你知道吗?

)

  简单的webmail抓包工具(附源码)

  为了使他们的数据库足够强大,网络爬虫和搜索引擎不分昼夜地在互联网上搜索信息,以使他们的信息更加全面。我们都知道,互联网上的信息是无限的、爆炸式的增长。他们不可能手动获取信息。他们编写小程序不断获取互联网上的信息,因此网络爬虫诞生了。

  下面我实现了一个简单的java抓取邮箱的小工具,很粗糙,仅供大家参考。

  这是渲染图

  不说什么,直接上代码

<p>

import java.awt.BorderLayout;

import java.awt.Dimension;

import java.awt.Image;

import java.awt.MenuItem;

import java.awt.PopupMenu;

import java.awt.Toolkit;

import java.awt.event.ActionEvent;

import java.awt.event.ActionListener;

import java.awt.event.WindowAdapter;

import java.awt.event.WindowEvent;

import java.io.BufferedReader;

import java.io.FileNotFoundException;

import java.io.IOException;

import java.io.InputStreamReader;

import java.net.HttpURLConnection;

import java.net.URL;

import java.net.URLConnection;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.Iterator;

import java.util.List;

import java.util.Map;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

import javax.swing.ImageIcon;

import javax.swing.JButton;

import javax.swing.JComboBox;

import javax.swing.JFrame;

import javax.swing.JLabel;

import javax.swing.JOptionPane;

import javax.swing.JPanel;

import javax.swing.JScrollPane;

import javax.swing.JTextArea;

import javax.swing.JTextField;

import javax.swing.UIManager;

import javax.swing.UnsupportedLookAndFeelException;

public class MainFrm extends JFrame implements ActionListener {

private static final long serialVersionUID = 1L;

static int count=1;

static int countUrl=1;

JFrame frame;

JButton b1;

JButton b2;

JTextArea t1;

JTextField tf;

JPanel panel;

JScrollPane jScrollPane1;

JLabel label;

JComboBox comb;

PopupMenu pm;

List t = new ArrayList();

static int m = 0;

MainFrm into() {

pm = new PopupMenu();

MenuItem openItem = new MenuItem("1.打 开");

MenuItem closeItem = new MenuItem("2.退 出");

MenuItem aboutItem = new MenuItem("3.关 于");

openItem.addActionListener(this);

closeItem.addActionListener(this);

aboutItem.addActionListener(this);

pm.add(openItem);

pm.add(closeItem);

pm.add(aboutItem);

String[] petStrings = { "Baidu", "Google", "Yahoo", "Bing", "Sogou" };

comb = new JComboBox(petStrings);

java.net.URL imgURL = MainFrm.class.getResource("mail.png");

ImageIcon imageicon = new ImageIcon(imgURL);

panel = new JPanel();

tf = new JTextField(50);

tf.setText("留下邮箱");

label = new JLabel("关键字:");

frame = new JFrame("邮箱抓取(注:抓取深度暂时默认为2) QQ:三二八二四七六七六");

frame.setIconImage(imageicon.getImage());

b1 = new JButton("提取邮箱");

b1.addActionListener(this);

b2 = new JButton("停止抓取");

b2.addActionListener(this);

t1 = new JTextArea();

t1.setLineWrap(true);

jScrollPane1 = new JScrollPane(t1);

jScrollPane1.setPreferredSize(new Dimension(200, 200));

this.setDefaultCloseOperation(DO_NOTHING_ON_CLOSE);

frame.addWindowListener(new WindowAdapter() { // 窗口关闭事件

public void windowClosing(WindowEvent e) {

System.exit(0);

};

public void windowIconified(WindowEvent e) { // 窗口最小化事件

frame.setVisible(false);

systemTray();

}

});

panel.add(label);

panel.add(tf);

panel.add(comb);

panel.add(b1);

panel.add(b2);

frame.getContentPane().add(panel, BorderLayout.NORTH);

frame.getContentPane().add(jScrollPane1, BorderLayout.CENTER);

frame.setSize(300, 400);

frame.pack();

frame.setVisible(true);

Dimension winSize = Toolkit.getDefaultToolkit().getScreenSize();

frame.setLocation((winSize.width - frame.getWidth()) / 2,

(winSize.height - frame.getHeight()) / 2);

frame.setAlwaysOnTop(true);

return this;

}

public static void main(String[] args) throws ClassNotFoundException,

InstantiationException, IllegalAccessException,

UnsupportedLookAndFeelException {

// TODO Auto-generated method stub

UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());

new MainFrm().into().systemTray();

}

@SuppressWarnings({ "unchecked", "deprecation", "static-access" })

@Override

public void actionPerformed(ActionEvent e) {

if ("提取邮箱".equals(e.getActionCommand())) {

count=1;

t1.setText("");

// get("http://dzh.mop.com/whbm/20060109/4/lSgg8I6063c68aS3.shtml");

String http = "";

int combo = (comb.getSelectedIndex());

switch (combo) {

case 0:

http = "http://www.baidu.com/s?wd=";

break;

case 1:

http = "http://www.google.com.hk/search?num=50&q=";

break;

case 2:

http = "http://www.yahoo.cn/s?q=";

break;

case 3:

http = "http://cn.bing.com/search?q=";

break;

case 4:

http = "http://www.sogou.com/web?query=";

break;

default:

http = "http://www.baidu.com/s?wd=";

break;

}

final List list = get(http + tf.getText());

m = list.size();

for (int i = 0, n = list.size(); i < n; i++) {

final Map map = list.get(i);

Thread tt = new Thread() {

public void run() {

Iterator iterator = map.values().iterator();

while (iterator.hasNext()) {

String u=iterator.next();

get(u);

}

}

};

t.add(tt);

tt.start();

}

} else if ("终止抓取".equals(e.getActionCommand())) {

for (int i = 0; i < t.size(); i++) {

t.get(i).stop();

}

} else if ("1.打 开".equals(e.getActionCommand())) {

frame.setVisible(true);

frame.setExtendedState(frame.NORMAL);

} else if ("2.退 出".equals(e.getActionCommand())) {

System.exit(0);

}else if ("3.关 于".equals(e.getActionCommand())) {

JOptionPane.showMessageDialog(null, "本程序仅供初学参考 QQ:三二八二四七六七六");

}

}

@SuppressWarnings("unchecked")

public List get(String urlStr) {

List list = new ArrayList();

try {

URL url = new URL(urlStr);

URLConnection rulConnection = url.openConnection();

HttpURLConnection httpUrlConnection = (HttpURLConnection) rulConnection;

httpUrlConnection.setRequestProperty("User-Agent",

"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");

BufferedReader br = new BufferedReader(new InputStreamReader(

httpUrlConnection.getInputStream()));

String line = "";

while ((line = br.readLine()) != null) {

Map map = pr(line);

list.add(map);

}

} catch (FileNotFoundException e) {

//e.printStackTrace();

} catch (IOException e) {

//e.printStackTrace();

} finally {

m--;

if (m

0 个评论

要回复文章请先登录注册


官方客服QQ群

微信人工客服

QQ人工客服


线