Delphi 通过MSHTML实现一个HTML解析类
- 1、下载文档前请自行甄别文档内容的完整性,平台不提供额外的编辑、内容补充、找答案等附加服务。
- 2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
- 3、如文档侵犯您的权益,请联系客服反馈,我们会尽快为您处理(人工客服工作时间:9:00-18:30)。
Delphi 通过MSHTML实现一个HTML解析类
Delphi 通过MSHTML实现一个HTML解析类分类:Delphi编程
2010-01-05 09:14
400人阅读
评论(7)
收藏
举报
最近经常会模拟网页提交返回网页源码,然后获得网页中相应的元素,于是需要常常解析Html中相应的各种元素,网络是个好东西,搜索一番,就找到了
好几个Delphi版本的HtmlParser的类库,试着使用了几个,发现解析起来都不完整,或多或少的回出现一些问题!于是想到了如果界面上有一个浏
览器,我们可以通过WebBrowser的Document接口对网页元素进行操作,很是方便!但是模拟网页提交,界面上是不一定要出现
WebBrowser的,肯定有办法,不通过WebBrowser就直接
解析HTML的,那便是我不要WebBrowser这个外壳,只要他里面的
Document文档接口对象就能实现对Html的解析了,查找了一番MSDN,然后Google一下,果然可行,构建方法如下://创建
IHTMLDocument2接口
CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);
接口创建好了之后就能够对文
档元素进行解析了,很是爽快!
结合了我自己的特有操作,我对Combobox,Table,Frame等一些网页元素做了相应的封装,实现
了一个HTMLParser,大致代码如下:
这里只给出声明,代
码请在最后下载
代码
(*
****************************************************
*)(*
得闲工作
室
*)(*
网页元素操作
类库
*)(*
*)(*
DxHtmlElement Unit
*)(*
Copyright(c) 2008-2010 不得
闲
*)(*
email:appleak46@ QQ:75492895 *)(*
****************************************************
*)unit
DxHtmlElement;interfaceuses
Windows,sysUtils,Clipbrd,MSHTML,ActiveX,OleCtrls,Grap hics,TypInfo;{
Get EleMent Type
}function
IsSelectElement(eleElement: IHTMLElement): Boolean;function
IsPwdElement(eleElement: IHTMLElement): Boolean;function
IsTextElement(element: IHTMLElement):
boolean;function
IsTableElement(element: IHTMLElement):
Boolean;function
IsElementCollection(element: IHTMLElement): Boolean;function
IsChkElement(element: IHTMLElement):
boolean;function
IsRadioBtnElement(element: IHTMLElement): boolean;function
IsMemoElement(element: IHTMLElement):
boolean;function
IsFormElement(element: IHTMLElement):
boolean;function
IsIMGElement(element: IHTMLElement):
boolean;function
IsInIMGElement(element: IHTMLElement):
boolean;function
IsLabelElement(element: IHTMLElement):
boolean;function
IsLinkElement(element: IHTMLElement):
boolean;function
IsListElement(element: IHTMLElement): boolean;function IsControlElement(element: IHTMLElement):
boolean;function
IsObjectElement(element: IHTMLElement):
boolean;function
IsFrameElement(element: IHTMLElement):
boolean;function
IsInPutBtnElement(element: IHTMLElement): boolean;function
IsInHiddenElement(element: IHTMLElement): boolean;function
IsSubmitElement(element: IHTMLElement): boolean;{ Get ImgElement Data
}function
GetPicIndex(doc: IHTMLDocument2; Src:
string
; Alt:
string
): Integer;function
GetPicElement(doc: IHTMLDocument2;imgName: string
;src:
string
;Alt:
string
): IHTMLImgElement;function
GetRegCodePic(doc: IHTMLDocument2;ImgName: string
; Src:
string
; Alt:
string
): TPicture;
overload
;function
GetRegCodePic(doc: IHTMLDocument2;Index: integer):
TPicture;
overload
;function
GetRegCodePic(doc: IHTMLDocument2;element: IHTMLIMGElement): TPicture;
overload
;type TObjectFromLResult
=
function
(LRESULT: lResult;
const
IID: TIID; WPARAM: wParam;
out
pObject): HRESULT;
stdcall
;
TEleMentType
=
(ELE_UNKNOW,ELE_TEXT,ELE_PWD,ELE_SELECT,EL
E_CHECKBOX,ELE_RADIOBTN,ELE_MEMO,ELE_FOR M,ELE_IMAGE,
ELE_LABEL,ELE_LINK,ELE_LIST,ELE_CONTROL,ELE_ OBJECT,ELE_FRAME,ELE_INPUTBTN,ELE_INIMAGE,E LE_INHIDDEN);function
GetElementType(element: IHTMLELEMENT): TEleMentType;function
GetElementTypeName(element: IHTMLELEMENT): string
;function
GetHtmlTableCell(aTable: IHTMLTable;aRow,aCol: Integer): IHTMLElement;function
GetHtmlTable(aDoc: IHTMLDocument2; aIndex: Integer): IHTMLTable;function GetWebBrowserHtmlTableCellText(Doc: IHTMLDocument2;
const
TableIndex, RowIndex, ColIndex: Integer;
var
ResValue:
string
): Boolean;function
GetHtmlTableRowHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement;function GetWebBrowserHtmlTableCellHtml(Doc: IHTMLDocument2;
const
TableIndex,RowIndex,ColIndex: Integer;
var
ResValue:
string
): Boolean;function
GeHtmlTableHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement;function GetWebBrowserHtmlTableHtml(Doc: IHTMLDocument2;
const
TableIndex,RowIndex: Integer;
var
ResValue:
string
): Boolean;type TDxWebFrameCollection
=
class
;
TDxWebElementCollection
=
class
;
TLoadState
=
(Doc_Loading,Doc_Completed,Doc_Invalidate); TDxWebFrame
=
private FFrame: IHTMLWINDOW2;
FElementCollections: TDxWebElementCollection;
FWebFrameCollections: TDxWebFrameCollection;
function
GetSrc:
string
;
function
GetElementCount: integer;
function
GetWebFrameCollections: TDxWebFrameCollection;
GetElementCollections: TDxWebElementCollection;
function
GetDocument: IHTMLDOCUMENT2;
function
GetReadState: TLoadState;
function
GetIsLoaded: boolean;
procedure
SetFrame(
const
Value: IHTMLWINDOW2);
GetName:
string
;
public Constructor Create(IFrame: IHTMLWINDOW2);
Destructor Destroy;
override
;
property
Frame: IHTMLWINDOW2
read
FFrame
write
SetFrame;
property
Src:
string
read
GetSrc;
property
Document: IHTMLDOCUMENT2 read
GetDocument;
property
Name:
string
read
GetName;
property
Frames: TDxWebFrameCollection
read
GetWebFrameCollections;
property
ElementCount: integer
read
GetElementCount;
property
ElementCollections: TDxWebElementCollection read
GetElementCollections;
property
ReadyState: TLoadState
read
GetReadState;
property
IsLoaded: boolean
read
GetIsLoaded;
end
; TDxWebFrameCollection
=
Class
private FFrameCollection: IHTMLFramesCollection2;
Frame: TDxWebFrame;
function
GetCount: integer;
function
GetFrameInterfaceByIndex(index: integer): IHTMLWINDOW2;
function
GetFrameInterfaceByName(Name:
string
): IHTMLWINDOW2;
function
GetFrameByIndex(index: integer): TDxWebFrame;
function
GetFrameByName(Name:
string
): TDxWebFrame;
procedure
SetFrameCollection(
const
Value: IHTMLFramesCollection2);
public Constructor Create(ACollection: IHTMLFramesCollection2);
Destructor Destroy;
override
;
property
FrameCollection: IHTMLFramesCollection2 read
FFrameCollection
write
SetFrameCollection;
property
Count: integer
read
GetCount;
property
FrameInterfaceByIndex[index: integer]: IHTMLWINDOW2 read
GetFrameInterfaceByIndex;
property
FrameInterfaceByName[Name:
string
]: IHTMLWINDOW2
read
GetFrameInterfaceByName;
property
FrameByIndex[index: integer]: TDxWebFrame
read
GetFrameByIndex;
property
FrameByName[Name:
string
]: TDxWebFrame
read
GetFrameByName;
end
;
TDxWebElementCollection
=
class
private FCollection: IHTMLElementCollection;
FChildCollection: TDxWebElementCollection;
function
GetCollection(index: String): TDxWebElementCollection;
function
GetCount: integer;
function
GetElement(itemName:
string
; index: integer): IHTMLElement;
function
GetElementByName(itemName:
string
): IHTMLELEMENT;
function
GetElementByIndex(index: integer): IHTMLELEMENT;
procedure
SetCollection(
const
Value: IHTMLElementCollection);
public Constructor Create(ACollection: IHTMLElementCollection);
Destructor Destroy;
override
;
property
Collection: IHTMLElementCollection
read
FCollection
write
SetCollection;
property
ChildElementCollection[index: String]: TDxWebElementCollection
read
GetCollection;
property
ElementCount: integer
read
GetCount;
property
Element[itemName:
string
;index: integer]: IHTMLElement
read
GetElement;
property
ElementByName[itemName:
string
]: IHTMLELEMENT
read
GetElementByName;
property
ElementByIndex[index: integer]: IHTMLELEMENT read
GetElementByIndex;
end
; TLinkCollection
=
class
(TDxWebElementCollection)
end
;
TDxWebTable
=
class
; TDxTableCollection
=
class
private FTableCollection: IHTMLElementCollection;
FDocument: IHTMLDOCUMENT2;
FWebTable: TDxWebTable;
GetTableInterfaceByName(AName:
string
): IHTMLTABLE;
procedure
SetDocument(Value: IHTMLDOCUMENT2);
function
GetTableInterfaceByIndex(index: integer): IHTMLTABLE;
function
GetCount: integer;
function
GetTableByIndex(index: integer): TDxWebTable;
GetTableByName(AName:
string
): TDxWebTable;
public Constructor Create(Doc: IHTMLDOCUMENT2);
destructor
Destroy;
override
;
property
TableInterfaceByName[AName:
string
]: IHTMLTABLE
read
GetTableInterfaceByName;
property
TableInterfaceByIndex[index: integer]: IHTMLTABLE read
GetTableInterfaceByIndex;
property
TableByName[AName:
string
]: TDxWebTable
read
GetTableByName;
property
TableByIndex[index: integer]: TDxWebTable
read
GetTableByIndex;
property
Document: IHTMLDOCUMENT2
read
FDocument
write
SetDocument;
property
Count: integer
read
GetCount;
end
; TDxWebTable
=
class
private FTableInterface: IHTMLTABLE; function
GetRowCount: integer;
procedure
SetTableInterface(
const
Value: IHTMLTABLE);
function
GetCell(ACol, ARow: integer):
string
;
function
GetRowColCount(RowIndex: integer): integer;
function
GetInnerHtml:
string
function
GetInnerText:
string
;
function
GetCellElement(ACol, ARow: Integer): IHTMLTableCell; public Constructor Create(ATable: IHTMLTABLE);
property
TableInterface: IHTMLTABLE
read
FTableInterface
write
SetTableInterface;
property
RowCount: integer
read
GetRowCount;
property
Cell[ACol: integer;ARow: integer]:
string
read
GetCell;
property
CellElement[ACol: Integer;ARow: Integer]: IHTMLTableCell
read
GetCellElement;
property
RowColCount[RowIndex: integer]: integer read
GetRowColCount;
property
InnerHtml:
string
read
GetInnerHtml;
property
InnerText:
string
read
GetInnerText;
end
; TDxWebCombobox
=
class
private FHtmlSelect: IHTMLSelectElement;
function
GetCount: Integer;
procedure
SetItemIndex(
const
Value: Integer);
function
GetItemIndex: Integer;
function GetName: string
;
procedure SetName( const Value: string
);
function GetValue: string
; procedure
SetValue(
const
Value:
string
);
procedure SetCombInterface(
const
Value: IHTMLSelectElement);
function
GetItemByName(EleName: string
):
string
;
function
GetItemByIndex(index: integer):
string
;
function
GetItemAttribute(index: Integer; AttribName: string
): OleVariant;
public
constructor
Create(AWebCombo: IHTMLSelectElement);
procedure
Add(Ele: IHTMLElement);
procedure
Insert(Ele: IHTMLElement;Index: Integer);
procedure
Remove(index: Integer);
property
CombInterface: IHTMLSelectElement read
FHtmlSelect
write
SetCombInterface;
property
Count: Integer
read
GetCount;
property
ItemIndex: Integer
read
GetItemIndex
write
SetItemIndex;
property
ItemByIndex[index: integer]: string
read
GetItemByIndex;
property
ItemByName[EleName: string
]:
string
read
GetItemByName;
property
ItemAttribute[index: Integer;AttribName: string
]: OleVariant
read
GetItemAttribute;
property
Name:
string
read
GetName
write
SetName;
property
value:
string
GetValue
write
SetValue;
end
;implementationend
.
HTMLParser解析类的代码实现单元
代码
(*
**************************************************** *)(*
得闲工作
室
*)(*
HTML解析
单元库
*)(*
DxHtmlParser Unit
*)(*
Copyright(c) 2008-2010 不得
闲
*)(*
email:appleak46@ QQ:75492895 *)(*
****************************************************
*)unit
DxHtmlParser;interfaceuses
Windows,MSHTML,ActiveX,DxHtmlElement,Forms;type TDxHtmlParser
=
class
private FHtmlDoc: IHTMLDocument2;
FHTML:
string
FWebTables: TDxTableCollection;
FWebElements: TDxWebElementCollection;
FWebComb: TDxWebCombobox;
procedure
SetHTML(
const
Value:
string
);
function
GetWebCombobox(AName:
string
): TDxWebCombobox;
public constructor Create;
destructor Destroy; override
;
property HTML: string
read FHTML write SetHTML;
property
WebTables: TDxTableCollection
read
FWebTables;
property
WebElements: TDxWebElementCollection read
FWebElements;
property
WebCombobox[Name:
string
]: TDxWebCombobox
read
GetWebCombobox;
end
;implementation{
TDxHtmlParser
}constructor
TDxHtmlParser.Create;begin CoInitialize(
nil
);
//
创建IHTMLDocument2接口
CoCreateInstance(CLASS_HTMLDocument,
nil
, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);
Assert(FHtmlDoc
<>
nil
,
'
构建HTMLDocument接口失败
'
);
FHtmlDoc.Set_designMode( '
On
'
);
//
设置为设计模式,不执行脚本while
not
(FHtmlDoc.readyState
=
'
complete
'
)
do
begin sleep(
1
);
Application.ProcessMessages;
end
;
FWebTables :
=
TDxTableCollection.Create(FHtmlDoc);
FWebElements :
=
TDxWebElementCollection.Create(
nil
);
FWebComb :
=
TDxWebCombobox.Create(
nil
);end
;destructor
TDxHtmlParser.Destroy;begin FWebTables.Free; FWebElements.Free;
FWebComb.Free;
CoUninitialize;
inherited
;end
;function
TDxHtmlParser.GetWebCombobox(AName:
string
): TDxWebCombobox;begin
if
FWebElements.Collection
<>
nil
then
begin bInterface :
=
FWebElements.ElementByName[AName] as
IHTMLSelectElement;
Result :
=
FWebComb;
end
else
Result :
=
nil
;end。