VBA网抓教程
- 1、下载文档前请自行甄别文档内容的完整性,平台不提供额外的编辑、内容补充、找答案等附加服务。
- 2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
- 3、如文档侵犯您的权益,请联系客服反馈,我们会尽快为您处理(人工客服工作时间:9:00-18:30)。
vba网抓常用方法:
1、xmlhttp/winhttp法:
用xmlhttp/winhttp模拟向服务器发送请求,接收服务器返回的数据。
优点:效率高,基本无兼容性问题。
缺点:需要借助如fiddler的工具来模拟http请求。
2、IE/webbrowser法:
创建IE控件或webbrowser控件,结合htmlfile对象的方法和属性,模拟浏览器操作,获取浏览器页面的数据。
优点:这个方法可以模拟大部分的浏览器操作。
所见即所得,浏览器能看到的数据就能用代码获取。
缺点:各种弹窗相当烦人,兼容性也确实是个很伤脑筋的问题。
上传文件在IE里根本无法实现。
(有实现方法?请一定告诉我)
3、QueryTables法:
因为它是excel自带,所以勉强也算是一种方法。
其实此法和xmlhttp类似,也是GET或POST方式发送请求,然后得到服务器的response返回到单元格内。
优点:excel自带,可以通过录制宏得到代码,处理table很方便。
代码简短,适合快速获取一些存在于源代码的table里的数据。
缺点:无法模拟referer等发包头(如果你有在QT中模拟referer的方法,请一定告诉我)
Sub Main()
Dim strText As String
With CreateObject("MSXML2.XMLHTTP") 'CreateObject("WinHttp.WinHttpRequest.5.1")'
.Open "POST", "", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded" .setRequestHeader "Referer", ""
.Send
strText = .responsetext
Debug.Print strText
End With
End Sub
拷贝剪切板:
Sub CopyToClipbox(strText As String)
'文本拷贝到剪贴板
With CreateObject("new:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
.SetText strText
.PutInClipboard
End With
End Sub
DongYu作业1.rar
(18.29 KB, 下载次数: 88)
2014-10-21 17:05 上传
下载次数: 88
Sub HomerWork1_1()
'新手:DongYu
'作业:1、网站:/lccp/jrxp.aspx
' 操作:点击“今日在售产品”,获取今日在售产品第一页的数据。
Dim xml As New MSXML2.XMLHTTP, url As String, St As String
Dim arr, brr, ar, i, c
url = "/lccp/Jrxp.aspx?col=1&tag=desc&date=2014-10-21&page= 2"
With xml
.Open "GET", url, False
.send
St = .responseText
End With
St = Split(Split(St, "<div class=""mark"">")(1), "</div>")(0)
arr = Split(St, "<tr align='center'>")
ReDim brr(1 To UBound(arr), 1 To 9)
For i = 1 To UBound(arr)
ar = arr(i)
brr(i, 1) = Split(Split(ar, "value='")(1), "'")(0) + Split(Split(ar, "<font class='cred'>")(1), "</font>")(0)
brr(i, 2) = Split(Split(ar, "</font></td><td class='hl'>")(1), "</td>")(0)
brr(i, 3) = Split(Split(ar, "<td class='on'>")(1), "</td>")(0)
brr(i, 4) = Split(Split(ar, "<td class='hl'>")(1), "</td>")(0)
brr(i, 5) = Split(Split(ar, "<td class='hl'>")(2), "</td>")(0)
brr(i, 6) = Split(Split(ar, "<td class='hl'>")(3), "</td>")(0)
brr(i, 7) = Split(Split(ar, "<td class='hl'>")(4), "</td>")(0)
brr(i, 8) = Split(Split(ar, "<td class='hl'>")(5), "</td>")(0)
brr(i, 9) = Split(Split(Split(ar, "<td class='hl'>")(5), "</td>")(1), ">")(1)
Next i
With ActiveSheet
.Cells.Clear
.Columns("D:E").NumberFormatLocal = "yyyy-m-d"
.[a1].Resize(1, 10) = [{"对比","产品名称","银行","起售日","停售日","币种","管理期(月)","产品类型","预期收益(%)","收益"}]
.[b2].Resize(UBound(brr, 1), 9) = brr
End With
End Sub
Sub 按钮2_单击()
Dim url, html
url = "/WEB/Flight/FlightSearchResultDefault.aspx?JT=1" url = url & "&OC=PEK" '北京首都机场
url = url & "&DC=SHA" '上海虹口机场
url = url & "&dstDesp=GUANGZHOU%B9%E3%D6%DD"
url = url & "&dst2=CAN"
url = url & "&DD=2014-10-22" '查询日期
url = url & "&DT=7"
url = url & "&BD="
url = url & "&BT=7"
url = url & "&AL=ALL" '全部航空
url = url & "&DR=true"
url = url & "&image.x=33"
url = url & "&image.y=9"
url = url & "&Sn=87bf24142bc0c78727610871f373e0a7"
Set html = CreateObject("htmlfile")
With CreateObject("msxml2.xmlhttp")
.Open "get", url, False
.send
html.body.innerhtml = .responsetext
Set tb = html.all.tags("div")
For i = 0 To tb.Length - 1
If tb(i).classname = "menu_layout2" Or tb(i).classname = "listone_layout" Or tb(i).classname = "listtwo_layout" Or tb(i).classname = "menu_content_small2" Then
n = n + 1
For j = 0 To tb(i).childnodes.Length - 1
Cells(n, j + 1) = tb(i).childnodes(j).innertext
Next
End If
Next
End With
End Sub
Sub 作业1_2_获取航班信息数据()
'网站:/S1/GNCX/
'操作:点击“查询”,获取航班信息数据。
Dim St As String, Url$, arr, brr, Crr
Dim S1$, S2$, i%, j%, rng As Range
Url =
"/WEB/Flight/FlightSearchResultDefault.aspx?JT=1&O
C=PEK&DC=SHA&dstDesp=GUANGZHOU%B9%E3%D6%DD&dst2=CAN&DD=2014-10-22&DT=7&BD=&BT=7
&AL=ALL&DR=true&image.x=37&image.y=9&Sn=87bf24142bc0c78727610871f373e0a7"
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", Url, False
.Send
St = .responsetext
End With
'
If InStr(St, "<div id=""FlightListFlight0"">") < 1 Then
Cells(1, 1) = "抱歉!没有满足条件的航班,请重新输入查询条件! "
Else
St = Split(Split(St, "<div id=""FlightListFlight0"">")(1),
"</div><br>")(0)
With ActiveSheet
Cells(1, 1) = Split(Split(St, "<strong>")(1), "</strong>")(0)
arr = Split(St, "<div class=""menu_layout2"">") '航空公司分组
For i = 1 To UBound(arr)
S1 = arr(i)
Crr = Split(S1, "<div class=""listtwo_layout"">")
ReDim brr(1 To UBound(Crr) + 2, 1 To 5) '班次UBound(S1) + 1,航空公司及机行+1,航线+1
'航空公司
brr(1, 1) = Trim(Split(Split(S1, "<div class=""menu_top1"">")(1),
"</div>")(0)) '中国东方航空公司
brr(1, 2) = Trim(Split(Split(S1, "<div class=""menu_top2"">")(1),
"</div>")(0)) '航班
brr(1, 2) = Trim(Split(Split(brr(1, 2), "font"">")(1),
"</span>")(0))
brr(1, 3) = Trim(Split(Split(S1, "<div class=""menu_top2"">")(2), "</div>")(0)) ''机型:333
'飞行线路
brr(2, 1) = Trim(Split(Split(S1, "<div class=""menu1_layout"">")(1), "</div>")(0)) '北京首都机场
brr(2, 2) = Trim(Split(Split(S1, "<div class=""menu2_layout"">")(1), "</div>")(0)) '(22:00)
brr(2, 3) = Trim(Split(Split(S1, "<div class=""menu3_layout"">")(1), "</div>")(0)) '经停:0
brr(2, 4) = Trim(Split(Split(S1, "<div class=""menu1_layout"">")(2), "</div>")(0)) '上海虹桥机场
brr(2, 5) = Trim(Split(Split(S1, "<div class=""menu2_layout"">")(2), "</div>")(0)) '(23:55)
'飞行班次
For j = 1 To UBound(Crr)
S2 = Crr(j)
' Debug.Print S2
brr(2 + j, 1) = Trim(Split(Split(S2, "<div class=""menu4_layout"">")(1), "</div>")(0)) '票价
brr(2 + j, 2) = Trim(Split(Split(S2, "<div class=""menu5_layout"">")(1), "</div>")(0)) '舱位'
brr(2 + j, 3) = Trim(Split(Split(S2, "<div class=""menu6_layout"">")(1), "</div>")(0)) '票数'
'……
Next j
Set rng = ActiveSheet.Cells(Rows.Count, 1).End(xlUp).Offset(1, 0) rng.Resize(UBound(brr, 1), 5) = brr
Next i
End With
End If
End Sub
Sub 作业1_2_航空公司获取()
'网站:/S1/GNCX/
'操作:点击“查询”,获取航班信息数据。
Dim strText As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "/images/airlinecontrol.js", False .Send
strText = .responsetext
Debug.Print ByteToStr(.responseBody, "GB2312")
End With
End Sub
Function ByteToStr(arrByte, strCharset As String) As String
With CreateObject("Adodb.Stream")
.Type = 1 'adTypeBinary
.Open
.Write arrByte
.Position = 0
.Type = 2 'adTypeText
.Charset = strCharset
ByteToStr = .Readtext
.Close
End With
End Function
Sub Main()
Dim strText As String
Const saltkey As String = "oUuXXXX"'请复制你自己的Cookie粘贴到这里。
下同
Const sid As String = "tXXXX"
Const auth As String = "a30eEZTXXXXXXXXXXXXXXXXXXXX"
Const cookiereport As String = "f1fXXXXXXXXXXXXXXXXXXXXXXXX"
Const ulastactivity As String = "84cXXXXXXXXXXXXXXXXXXXX"
Const touclick As String = "70a9vPXXXXXXXXXXXXXXXXXXXX"
Const member_login_uid As String = "218917"
Const member_login_sid As String = "tXXXX"
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", "/home.php?mod=space&do=thread&view=me ", False
.setRequestHeader "Cookie", _
"5WOj_b676_saltkey=" & saltkey _
& ";5WOj_b676_sid=" & sid _
& ";5WOj_b676_auth=" & auth _
& ";5WOj_b676_cookiereport=" & cookiereport _
& ";5WOj_b676_ulastactivity=" & ulastactivity _
& ";5WOj_b676_touclick=" & touclick _
& ";5WOj_b676_member_login_uid=" & member_login_uid _
& ";5WOj_b676_member_login_sid=" & member_login_sid
.Send
strText = .responsetext
Debug.Print strText
End With
End Sub
Sub Main()
Dim strText As String
Const saltkey As String = "oUuXXXX"
Const auth As String = "a30eEZTXXXXXXXXXXXXXXXXXXXX"
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", "/home.php?mod=space&do=thread&view=me ", False
.setRequestHeader "Cookie", _
"5WOj_b676_saltkey=" & saltkey _
& ";5WOj_b676_auth=" & auth
.Send
strText = .responsetext
Debug.Print strText
End With
End Sub
Sub Main()
Dim strText As String
With CreateObject("MSXML2.XMLHTTP") 'CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "POST", "/lz/etpsInfo.do?method=viewDetail", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
' .setRequestHeader "Referer", ""
.send "etpsId=150000012002040300047"
strText = .responseText
Debug.Print strText
End With
End Sub
Sub Main()
Dim strText As String
With CreateObject("WinHttp.WinHttpRequest.5.1") 'CreateObject("MSXML2.XMLHTTP") '
.Open "POST", "/lz/etpsInfo.do?method=viewDetail", False .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "Referer", "/lz/etpsInfo.do?method=doSearch"
.send "etpsId=150000012002040300047"
strText = .responseText
Debug.Print strText
End With
End Sub
Sub Main()
Dim strText As String
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", ":8080/costRegulatory/project.do?method=showProjectList&is Visitor=1&f_id=11011&t1413902083242", False
.setRequestHeader "Referer", ":8080/costRegulatory/user.do?method=changeIndex&fareaId=1 "
.setRequestHeader "Cookie", "E0685A9F6B708A1F1039BF2322B82A35"
.Send
strText = .responsetext
Debug.Print strText
End With
End Sub
Sub Main()
Dim strText As String
Dim strCookie As String
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Option(6) = False ' 禁止重定向,以获取原网页信息
.Open "GET", ":8080/costRegulatory/user.do?method=changeIndex&fareaId=1 ", False
.Send
strText = .getAllResponseHeaders '获取所有的回应头信息
Debug.Print strText: Stop '在立即窗口里查看头信息
strCookie = Split(Split(strText, "Set-Cookie: ")(1), ";")(0) '取出Cookie 值
End With
'在同一个winhttp对象里能保留cookie,为了体现设置cookie的作用,启用一个新的winhttp对象
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", ":8080/costRegulatory/project.do?method=showProjectList&is Visitor=1&f_id=11011&t1413902083242", False
.setRequestHeader "Referer", ":8080/costRegulatory/user.do?method=changeIndex&fareaId=1 "
.setRequestHeader "Cookie", strCookie '模拟Cookie
.Send
strText = .responsetext
Debug.Print strText
End With
End Sub
Sub Main()
Dim strText As String
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", ":8080/costRegulatory/user.do?method=changeIndex&fareaId=1 ", False
.Send '此次send是为了获取cookie
.Open "GET", ":8080/costRegulatory/project.do?method=showProjectList&is Visitor=1&f_id=11011&t1413902083242", False
.setRequestHeader "Referer", ":8080/costRegulatory/user.do?method=changeIndex&fareaId=1 "
.Send
strText = .responsetext
Debug.Print strText
End With
End Sub
Sub Main()
Dim strText As String
Const saltkey As String = "oUuXXXX"'请复制你自己的Cookie粘贴到这里。
下同
Const sid As String = "tXXXX"
Const auth As String = "a30eEZTXXXXXXXXXXXXXXXXXXXX"
Const cookiereport As String = "f1fXXXXXXXXXXXXXXXXXXXXXXXX"
Const ulastactivity As String = "84cXXXXXXXXXXXXXXXXXXXX"
Const touclick As String = "70a9vPXXXXXXXXXXXXXXXXXXXX"
Const member_login_uid As String = "218917"
Const member_login_sid As String = "tXXXX"
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", "/home.php?mod=space&do=thread&view=me ", False
.setRequestHeader "Cookie", _
"5WOj_b676_saltkey=" & saltkey _
& ";5WOj_b676_sid=" & sid _
& ";5WOj_b676_auth=" & auth _
& ";5WOj_b676_cookiereport=" & cookiereport _
& ";5WOj_b676_ulastactivity=" & ulastactivity _
& ";5WOj_b676_touclick=" & touclick _
& ";5WOj_b676_member_login_uid=" & member_login_uid _
& ";5WOj_b676_member_login_sid=" & member_login_sid
.Send
strText = .responsetext
Debug.Print strText
End With
End Sub
Sub Main()
Dim strText As String
Const saltkey As String = "oUuXXXX"
Const auth As String = "a30eEZTXXXXXXXXXXXXXXXXXXXX"
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", "/home.php?mod=space&do=thread&view=me ", False
.setRequestHeader "Cookie", _
"5WOj_b676_saltkey=" & saltkey _
& ";5WOj_b676_auth=" & auth
.Send
strText = .responsetext
Debug.Print strText
End With
End Sub
Sub Main()
Dim strText As String
With CreateObject("MSXML2.XMLHTTP") 'CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", "/home.php?mod=space&uid=218917&do=thread&view=me&type =reply&from=space&mobile=yes", False
.setRequestHeader "User-Agent", "Mozilla/4.0 (compatible: MSIE 7.0; Windows Phone OS 7.0; Trident/3.1; IEMobile/7.0; SAMSUNG; SGH-i917)"
.Send
strText = .responsetext
Debug.Print strText
End With
End Sub
Sub Main()
Dim strText As String
Dim strHost As String
Dim strURL As String
strHost = ""
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", strHost & "/WEB/Flight/WaitingSearch.aspx?JT=1&OC=PEK&DC=SHA&dstDesp=GUANGZHOU%B9%E3%D6%D D&dst2=CAN&DD=2014-10-22&DT=7&BD=&BT=7&AL=ALL&DR=true&image.x=37&image.y=14", False
.setRequestHeader "Referer", "/S1/GNCX/"
.Send
strText = .responsetext
strURL = Split(Split(strText, "setTimeout(""window.location.replace('")(1), "'")(0)
.Open "GET", strHost & strURL, False
.Send
strText = .responsetext
Debug.Print strText
End With
End Sub
Sub Main()
Dim strText As String
Dim strHost As String
Dim strURL As String
strHost = ""
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", strHost & "/WEB/Flight/WaitingSearch.aspx?JT=1&OC=PEK&DC=SHA&dstDesp=GUANGZHOU%B9%E3%D6%D D&dst2=CAN&DD=2014-10-22&DT=7&BD=&BT=7&AL=ALL&DR=true&image.x=37&image.y=14", False
.setRequestHeader "Referer", "/S1/GNCX/"
.Send
strText = .responsetext
strURL = Split(Split(strText, "setTimeout(""window.location.replace('")(1), "'")(0)
.Open "GET", strHost & strURL, False
.Send
strText = .responsetext
Debug.Print strText
End With
End Sub
本帖最后由wcymiss 于2014-10-24 15:18 编辑
对获取数据作个小结:
1、清除缓存cookie历史记录后用fiddler抓包。
2、搜索所需数据,找到数据真实网页(别忘了对fiddler事先进行设置,否则有可能搜不到数据)
3、用代码模拟Request框的Raw按钮下的内容:
首先只写Open和Send,看是否有数据;(xmlhttp)(winhttp有时解析utf-8字符不成功,所以初始测试首选xmlhttp)
无数据的话,首选模拟Referer;(winhttp)
仍然不行的话,观察Cookie或是URL或SendData中有无动态参数。
有的话需要追根朔源。
(这步需要时间和耐心)
其他模拟一般都是小概率事件,如果遇到了我只能说你很不幸。
最后,祝你成功!
Sub Main()
Dim strText As String
With CreateObject("WinHttp.WinHttpRequest.5.1")
.SetProxy 2, "218.75.100.114:8080"
.Open "GET", "/ic.asp", False
.send
strText = ByteToStr(.Responsebody, "GB2312")'请自行拷贝之前的常用函数
Debug.Print strText
End With
End Sub
Sub Main()
Const strFileName As String = "C:\测试EH下载文件.rar"
With CreateObject("MSXML2.XMLHTTP") 'CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", "/forum.php?mod=attachment&aid=MTA2MjQ1MHw0MDQxMTAzOHw xNDE0MTIxNTg0fDIxODkxN3w4MDk5MjQ%3D", False
.Send
ByteToFile .responsebody, strFileName
End With
End Sub
Function unescape(strTobecoded As String) As String
With CreateObject("msscriptcontrol.scriptcontrol")
.Language = "JavaScript"
unescape = .Eval("unescape('" & strTobecoded & "');")
End With
End Function
Function JSEval(s As String) As String
With CreateObject("MSScriptControl.ScriptControl")
.Language = "javascript"
JSEval = .Eval(s)
End With
End Function
Function EnCodeByHTML(strText As String)
With CreateObject("htmlfile")
.write strText
EnCodeByHTML = .body.innertext
End With
End Function
有坛友问ResponseBody和ResponseText的区别,这里补充说下:
1、ResponseBody是二进制的数据,是服务器传来的没有经过任何加工的数据。
在网络中,文本一般都是以utf-8编码,所以xmlhttp/winhttp对象的ResponseText是按照utf-8编码把ResponseBody转换而成,也就是:Response Text=ByteToStr(Response Body,"UTF-8")
至于问“为什么ByteToStr(Response Text,"GB2312")没有结果”,原因是:一是参数类型不对,ByteToStr的第一参数是二进制数据的Byte数组类型,Response Text是文本类型,系统提示出错;二是,即使进行了将文本转成二进制数据的转换(如下面代码里的b7=s这样的转换),这种转换也是按照某种编码进行的,这样的二进制已经进行过一次编码加工了,你再用ByteToStr就得不到原来的字符了。
处理数据的通用方法:
1、数组法:
用split和数组,循环将所需数据取出。
优点:不需其他对象辅助,起点低,会数组即可。
缺点:需要分析数据结构,对于复杂结构的数据,需要多步才能完成。
Sub Main()
Dim strText As String
Dim arrRow, arrCell
Dim i As Long, j As Long, n As Long
Dim arrColumn
Dim arrData(1 To 1000, 1 To 10)
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "/lccp/jrxp.aspx", False
.Send
strText = .responsetext
End With
arrColumn = Array(, , 9, 12, 14, 16, 18, 20, 22, 24, 26)
arrRow = Split(strText, "name='proTest' ")
For i = 1 To UBound(arrRow)
arrCell = Split(arrRow(i), ">")
n = n + 1
arrData(n, 1) = Split(Split(arrCell(0), "value='")(1), "'")(0)
For j = 2 To 10
arrData(n, j) = Split(arrCell(arrColumn(j)), "<")(0)
Next
Next
Cells.Clear
Range("a1:j1").Value = Split("产品名称是否在售银行起售日停售日币种管理期(月) 产品类型预期收益(%) 收益类型", " ")
Range("a2").Resize(n, 10).Value = arrData
End Sub
2、正则法:
用正则拆解字符串,提取匹配数据,循环取出。
优点:即便复杂结构的数据,也有可能一步到位。
缺点:需要学习正则知识。
Sub Main()
Const gc As String = "" '群号
Const bkn As String = "" '从fiddler中获取
Const uin As String = "" 'QQ号
Const skey As String = "" '从fiddler中获取
Dim strText As String
Dim RegMatch As Object
Dim arrData(1 To 1000, 1 To 2)
Dim n As Long
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET",
"/cgi-bin/qun_info/get_group_members_new?gc=" & gc & "&bkn=" & bkn, False
.setRequestHeader "Cookie", "uin=o" & uin & "; skey=" & skey
.Send
strText = .responsetext
Debug.Print strText
End With
With CreateObject("VBScript.Regexp")
.Global = True
.Pattern = "{""b"":\d+,""g"":\d+,""n"":""([^""]*)"",""u"":(\d+)}"
For Each RegMatch In .Execute(strText)
n = n + 1
arrData(n, 1) = RegMatch.submatches(0)
arrData(n, 2) = RegMatch.submatches(1)
Next
End With
Set RegMatch = Nothing
Cells.Clear
Range("a1:b1").Value = Array("昵称", "QQ号")
Range("a2").Resize(n, 2).Value = arrData
End Sub
处理table
table数据处理,除了之前的两种通用方法外,还有以下几种方法:
1、html法
将table数据写入htmldocument对象,然后循环取出表格的各个元素。
优点:可以利用htmldocument对象整理表格。
缺点:需要学习html相关知识。
以作业二为例:
Sub Main()
Dim strText As String
Dim arrData(1 To 1000, 1 To 3)
Dim i As Long, j As Long
Dim TR As Object, TD As Object
With CreateObject("MSXML2.XMLHTTP")
.Open "POST", "/Template/WebService1.asmx/Present3DList", False
.setRequestHeader "Content-Type", "application/json"
.Send "{pageindex:'1',lottory:'TC7XCData_jiangS',pl3:'',name:'江苏七星彩',isgp: '0'}"
strText = Split(JSEval(.responsetext), "<script")(0) '本例的script运行会提示错误,所以去除这部分script代码
End With
With CreateObject("htmlfile")
.write strText
i = 0
For Each TR In .all.tags("table")(2).Rows
i = i + 1
j = 0
For Each TD In TR.Cells
j = j + 1
arrData(i, j) = TD.innerText
Next
Next
End With
Set TR = Nothing
Set TD = Nothing
Cells.Clear
Range("C:C").NumberFormat = "@" '设置文本格式以显示数字前面的0
Range("a1").Resize(i, 3).Value = arrData
End Sub
Function JSEval(s As String) As String
With CreateObject("MSScriptControl.ScriptControl")
.Language = "javascript"
JSEval = .Eval(s)
End With
End Function
2、QueryTable法:
这个是excel自带的网抓利器。
个人觉得它最大的优势就是处理table很方便。
优点:处理table方便,代码简短。
缺点:会产生定义名称。
多页循环时每页都会产生行字段名称,需要后续处理删除。
Sub Main()
Cells.Delete
With
ActiveSheet.QueryTables.Add("url;/lccp/jrxp.aspx", Range("a1"))
.WebFormatting = xlWebFormattingNone '不包含格式
.WebSelectionType = xlSpecifiedTables '指定table模式
.WebTables = "2" '第2张table
.Refresh False
End With
End Sub
3、复制粘贴法:
table部分的文字可以直接复制到单元格内,且保留数据原格式。
优点:只需取出table部分,不需分析数据内部结构。
代码编写简便。
缺点:有时格式反而是累赘。
Sub Main()
Dim strText As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "/lccp/jrxp.aspx", False
.Send
strText = .responsetext
End With
strText = "<table" & Split(Split(strText, "<table")(2), "</table>")(0) & "</table>"
CopyToClipbox strText
Cells.Clear
Range("a1").Select
ActiveSheet.Paste
End Sub
Sub CopyToClipbox(strText As String)
'文本拷贝到剪贴板
With CreateObject("new:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
.SetText strText
.PutInClipboard
End With
End Sub
处理xml数据
Sub Main()
ThisWorkbook.XmlImport _
URL:="/fzjy/tjsj/pztj/pzrtj/2014/index.xml", _ ImportMap:=Nothing, _
Overwrite:=True, _
Destination:=ActiveSheet.Range("a1")
End Sub
Sub Main()
Dim arrEM(1 To 4), arrEMname
Dim arrData(1000, 1 To 4)
Dim i As Long, j As Long
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "/fzjy/tjsj/pztj/pzrtj/2014/index.xml", False
.send
arrEMname = Array(, "productid", "tradingday", "volume", "openinterest") With .responseXML
For i = 1 To 4
Set arrEM(i) = .getElementsByTagName(arrEMname(i))
Next
For i = 0 To arrEM(1).Length - 1
For j = 1 To 4
arrData(i, j) = arrEM(j)(i).Text
Next
Next
End With
End With
Cells.Clear
Range("a1:d1").Value = Array("品种", "日期", "总成交量", "总持仓量")
Range("a2").Resize(i, 4).Value = arrData
End Sub
初识JSON
JSON数据的特点:
1、用方括号扩住的是数组,数组内元素以逗号分隔。
如:["甲","乙","丙"]、[1,2,3]
2、用花括号扩住的是对象,对象内各属性以逗号分隔,属性名和属性值以冒号分隔。
同一对象里的属性名不会重复。
如对象{"name":"甲","age":36},含name、age两个属性,属性值分别为“甲”和36。
3、对象的属性值可以是数组。
数组的元素可以是对象。
JSON数据就是数组对象嵌套的大集合。
比如,下面的JSON数据记录了甲乙二人的基本信息:
JSON转换成vba对象
1、JSON数组在vba内需要用For Each来获取其元素:(For Each 后面的变量不能定义为Object类型)
1、JSON数组在vba内需要用For Each来获取其元素:(For Each 后面的变量不能定义为
Object类型)
Sub Test()
Const strJSON As String = "[""甲"",""乙"",""丙""]"
Dim objJSON As Object
Dim Cell '这里不能定义为object类型
With CreateObject("msscriptcontrol.scriptcontrol")
.Language = "JavaScript"
.AddCode "var mydata =" & strJSON
Set objJSON = .CodeObject
End With
Stop '查看vba本地窗口里objJSON对象以了解JSON数据在vba里的形态
For Each Cell In objJSON.mydata
Debug.Print Cell
Next
End Sub
2、JSON对象在vba内可直接用“对象.属性”的方法获取,但当名称不被vba允许时,用
CallByName函数获取:
Sub Test()
Const strJSON As String = "{""name"":""甲"",""age"":36}"
Dim objJSON As Object
With CreateObject("msscriptcontrol.scriptcontrol")
.Language = "JavaScript"
.AddCode "var mydata=" & strJSON
Set objJSON = .CodeObject
End With
Stop '查看本地窗口
Debug.Print objJSON.mydata.age
Debug.Print '此句出错
End Sub
登陆:
Sub Main()
Const username As String = "vbatest"
Const password As String = "12341234"
Dim strText As String
Dim uid As String
uid = username & "@"
With CreateObject("MSXML2.XMLHTTP")
.Open "POST", "https:///entry/cgi/ntesdoor?df=mail163_letter&funcid=loginone&ifra me=1&passtype=1&product=mail163&race=63_31_31_gz&uid=" & username & "@", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded" .Send
"savelogin=0&url2=http%3A%2F%%2Ferrorpage%2Ferror163.htm&username =" & username & "&password=" & password & "&password="
strText = .getallresponseheaders
Debug.Print strText
strText = .responsetext
Debug.Print strText
End With
End Sub
登录之后可以做什么----查询数据
登录并非是我们的最终目的。
最终目的是查询一些非登录不能查看的数据,或是发送数据。
如论坛登录后,可下载附件,可发帖;邮箱登录后,可收件发件。
前面讲过,xmlhttp和winhttp只要该对象不销毁,都可以保持cookie。
我们登录就是为了。