.NET2.0抓取网页全部链接

添加人:admin四级(1705分)   添加时间:2007-08-11    阅读次数:1413  收藏此教程
效果图

后台代码:
 1using System;
 2using System.Data;
 3using System.Configuration;
 4using System.Web;
 5using System.Web.Security;
 6using System.Web.UI;
 7using System.Web.UI.WebControls;
 8using System.Web.UI.WebControls.WebParts;
 9using System.Web.UI.HtmlControls;
10using System.Text.RegularExpressions;
11using System.Net;
12using System.IO;
13using System.Collections;
14public partial class _Default : System.Web.UI.Page
15{
16    protected void Page_Load(object sender, EventArgs e)
17    {
18        if (!IsPostBack)
19        {
20            
21        }

22        
23    }

24
25    protected void Button1_Click(object sender, EventArgs e)
26    {
27        TextBox2.Text = "";
28        string web_url = this.TextBox1.Text;
29        string all_code = "";
30        HttpWebRequest all_codeRequest = (HttpWebRequest)WebRequest.Create(web_url);
31        WebResponse all_codeResponse = all_codeRequest.GetResponse();
32        StreamReader the_Reader = new StreamReader(all_codeResponse.GetResponseStream());
33        all_code = the_Reader.ReadToEnd();
34        the_Reader.Close();
35        ArrayList my_list = new ArrayList();
36        string p = @"http://([w-]+.)+[w-]+(/[w- ./?%&=]*)?";
37        Regex re = new Regex(p, RegexOptions.IgnoreCase);
38        MatchCollection mc = re.Matches(all_code);
39        for (int i = 0; i <= mc.Count - 1; i++)
40        {
41            bool _foo = false;
42            string name = mc[i].ToString();
43            foreach (string list in my_list)
44            {
45                if (name == list)
46                {
47                    _foo = true;
48                    break;
49                }

50            }
//过滤
51            if (!_foo)
52            {
53                TextBox2.Text += name + " ";
54            }

55        }
 
56    }

57}

58


前台
 1<%@ Page Language="C#" AutoEventWireup="true"  CodeFile="Default.aspx.cs" Inherits="_Default" %>
 2<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3<html xmlns="http://www.w3.org/1999/xhtml" >
 4<head runat="server">
 5    <title>抓取网页所有链接</title>
 6    
 7</head>
 8<body >
 9    <form id="form1" runat="server">
10    <div>
11        <asp:TextBox ID="TextBox1" runat="server" Width="481px"></asp:TextBox>
12        <asp:Button ID="Button1" runat="server" OnClick="Button1_Click" Text="提取" />
13        <br />
14        <asp:TextBox ID="TextBox2" runat="server" Height="304px" TextMode="MultiLine" Width="524px"></asp:TextBox></div>
15    </form>
16</body>
17</html>
18

1页 第1上一页1下一页
相关的教程: 正则表达式 抓取见面 链接 Regex
收藏此教程

当前平均分: 0.0(0 次打分)

-5-4-3-2-1012345
评论主题
您的大名
您的评论
验证码 点击换一个验证码
知识库搜索: