以vb6为例,获取给定的网址的外链的链接地址和链接文本,这里过滤了网站本身的网址,还有一些客服代码
也可以以常见的搜索引擎蜘蛛模拟访问
VB6需要引用 :在“项目”菜单上单击“引用“Microsoft VBScript Regular Expressions 5.5
Public Function HtmlGetLinks(url, zhizu) ' 从网址内获取外链地址 参数:网址,蜘蛛类型
'On Error Resume Next
Dim str, reg, objMatches, key1
url2 = Replace(url, "http://", "")
url = "http://" & url2
str = GetUrlHtmlUTF(url, "auto", zhizu)
Set reg = New RegExp
reg.IgnoreCase = True
reg.Global = True
reg.Pattern = "href=[\""\'\s]?http://([^\s]+)(.*?)\b[\""\'\s]?(.*?)>(.*?)</a>"
Set objMatches = reg.Execute(str)
If objMatches.Count > 0 Then
For i = 0 To objMatches.Count - 1
url1 = objMatches(i).SubMatches(0)
url1 = Replace(url1, """", "")
url1 = Replace(url1, "'", "")
Keyword = objMatches(i).SubMatches(3)
Keyword = LCase(Keyword)
Keyword = Replace(Keyword, "'", """")
If InStr(Keyword, "src") > 0 Then
Keyword = FindStrMulti(Keyword, "src=""", """", "")
Keyword = "图片:" & Keyword
Else
Keyword = RemoveHTML(Keyword)
End If
If InStr(url1, url2) = 0 And InStr(Keyword, "wpa.qq.com") = 0 And InStr(url1, "51.la") = 0 And InStr(url1, "cnzz.com") = 0 And InStr(url1, "taobao.com") = 0 And InStr(url1, "beian.gov.cn") = 0 Then
key1 = key1 & (Keyword & "|" & url1) & vbCrLf
End If
DoEvents
Next
End If
HtmlGetLinks = key1
End Function
Public Function GetUrlHtmlUTF(url, CodeBase, zhizu)
Dim xmlHTTP1, GetCode, GetCode1
Set xmlHTTP1 = CreateObject("Microsoft.XMLHTTP")
xmlHTTP1.Open "get", url, True
If zhizu <> "" Then
Select Case zhizu
Case "baidu"
xmlHTTP1.setRequestHeader "UserAgent:", "Baiduspider+(+http://www.baidu.com/search/spider.htm)"
Case "google"
xmlHTTP1.setRequestHeader "UserAgent:", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
Case "yahoo"
xmlHTTP1.setRequestHeader "UserAgent:", "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)"
Case "yahoo"
xmlHTTP1.setRequestHeader "UserAgent:", "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)"
Case "youdao"
xmlHTTP1.setRequestHeader "UserAgent:", "Mozilla/5.0 (compatible; YodaoBot/1.0; http://www.yodao.com/help/webmaster/spider/; )"
Case "soso"
xmlHTTP1.setRequestHeader "UserAgent:", "Sosospider+(+http://help.soso.com/webspider.htm)"
Case "sogou"
xmlHTTP1.setRequestHeader "UserAgent:", "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)"
End Select
End If
xmlHTTP1.send
While xmlHTTP1.ReadyState <> 4
DoEvents
Wend
GetCode = xmlHTTP1.ResponseBody
If CodeBase = "auto" Then
GetCode1 = StrConv(Left(GetCode, 500), vbUnicode)
If InStr(GetCode1, "charset=gb2312") > 0 Then
GetCode = StrConv(GetCode, vbUnicode)
Else
If CStr(GetCode) <> "" Then GetCode = BytesToBstr(GetCode, "UTF-8")
End If
Else
If CStr(GetCode) <> "" Then GetCode = BytesToBstr(GetCode, CodeBase)
End If
GetUrlHtmlUTF = GetCode
Set ObjXML = Nothing
End Function
相关视频
相关阅读 “由于本机的限制,该操作已被取消”解决方法Word里的超链接不起作用解决办法超链接target=_blank的使用方法如何用Dreamweaver MX建设神奇网页图片超链接word删除超链接的方法wps超链接怎么做 wps怎么用超链接教你去掉EXCEL文档中的超链接如何修改Word 2007文档中超链接文字样式
热门文章 电视盒子怎么看百度云迅雷9怎么关闭右侧 迅迅雷99.99下载不完怎么网易云音乐怎么上传歌
最新文章
2022支付宝万能福扫福微信迎新春状态怎么设
微信小老虎头像怎么设置 微信ID后面设置小老抖音压岁钱卡怎么获得2022 抖音压岁钱获取方微博隐私保护怎么设置 微博隐私保护功能完善今日头条2022年集卡活动开启时间 今日头条2
人气排行 智学网怎么登录 智学网怎么查分数 智学网统教你怎么写电子邮箱格式?以163和QQ邮箱为例腾讯大王卡用了后悔怎么办 腾讯大王卡值得办缺少或丢失xinput1_3.dll解决方法,xinput1应用程序无法正常启动0xc000007b解决方法快播关闭怎么办?快播不能用了怎么看片将pdf文件转换为word文件的最简单方法如何破解QQ空间密码和权限
查看所有0条评论>>