用INDY元件,如何取得网页的所有链接? |
尚未結案
|
eoisoft
一般會員 發表:20 回覆:12 積分:6 註冊:2004-11-24 發送簡訊給我 |
|
eoisoft
一般會員 發表:20 回覆:12 積分:6 註冊:2004-11-24 發送簡訊給我 |
|
chris_shieh
高階會員 發表:46 回覆:308 積分:240 註冊:2004-04-26 發送簡訊給我 |
參考 http://delphi.about.com/od/adptips2004/a/bltip1204_3.htm
unit Unit1; interface uses Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms, Dialogs, StdCtrls; type TForm1 = class(TForm) btn_GetImageLink: TButton; Memo1: TMemo; btn_GetAllLinks: TButton; editURL: TEdit; procedure btn_GetImageLinkClick(Sender: TObject); procedure btn_GetAllLinksClick(Sender: TObject); private { Private declarations } public { Public declarations } end; var Form1: TForm1; implementation {$R *.dfm} uses mshtml, ActiveX, COMObj, IdHTTP, idURI; procedure GetImageLinks(AURL: String; AList: TStrings) ; var IDoc : IHTMLDocument2; strHTML : String; v : Variant; x : integer; ovLinks : OleVariant; DocURL : String; URI : TidURI; ImgURL : String; idHTTP : TidHTTP; begin AList.Clear; URI := TidURI.Create(AURL) ; try DocURL := 'http://' URI.Host; if URI.Path <> '/' then DocURL := DocURL URI.Path; finally URI.Free; end; Idoc:=CreateComObject(Class_HTMLDOcument) as IHTMLDocument2; try IDoc.designMode:='on'; while IDoc.readyState<>'complete' do Application.ProcessMessages; v:=VarArrayCreate([0,0],VarVariant) ; idHTTP := TidHTTP.Create(nil) ; try strHTML := idHTTP.Get(AURL) ; finally idHTTP.Free; end; v[0]:= strHTML; IDoc.write(PSafeArray(System.TVarData(v).VArray)) ; IDoc.designMode:='off'; while IDoc.readyState<>'complete' do Application.ProcessMessages; ovLinks := IDoc.all.tags('IMG') ; if ovLinks.Length > 0 then begin for x := 0 to ovLinks.Length-1 do begin ImgURL := ovLinks.Item(x).src; // The stuff below will probably need a little tweaking // Deteriming and turning realtive URLs into absolute URLs // is not that difficult but this is all I could come up with // in such a short notice. if (ImgURL[1] = '/') then begin // more than likely a relative URL so // append the DocURL ImgURL := DocURL ImgUrl; end else begin if (Copy(ImgURL, 1, 11) = 'about:blank') then begin ImgURL := DocURL Copy(ImgUrl, 12, Length(ImgURL)) ; end; end; AList.Add(ImgURL) ; Application.ProcessMessages; end; end; finally IDoc := nil; end; end; procedure GetURLLinks(AURL: String; AList: TStrings); var IDoc : IHTMLDocument2; strHTML : String; v : Variant; x : integer; ovLinks : OleVariant; DocURL : String; URI : TidURI; ImgURL : String; idHTTP : TidHTTP; begin AList.Clear; URI := TidURI.Create(AURL) ; try DocURL := 'http://' URI.Host; if URI.Path <> '/' then DocURL := DocURL URI.Path; finally URI.Free; end; Idoc:=CreateComObject(Class_HTMLDOcument) as IHTMLDocument2; try IDoc.designMode:='on'; while IDoc.readyState<>'complete' do Application.ProcessMessages; v:=VarArrayCreate([0,0],VarVariant) ; idHTTP := TidHTTP.Create(nil) ; try strHTML := idHTTP.Get(AURL) ; finally idHTTP.Free; end; v[0]:= strHTML; IDoc.write(PSafeArray(System.TVarData(v).VArray)) ; IDoc.designMode:='off'; while IDoc.readyState<>'complete' do Application.ProcessMessages; ovLinks := IDoc.all.tags('A') ; if ovLinks.Length > 0 then begin for x := 0 to ovLinks.Length-1 do begin ImgURL := ovLinks.Item(x).href; // The stuff below will probably need a little tweaking // Deteriming and turning realtive URLs into absolute URLs // is not that difficult but this is all I could come up with // in such a short notice. if (ImgURL[1] = '/') then begin // more than likely a relative URL so // append the DocURL ImgURL := DocURL ImgUrl; end else begin if (Copy(ImgURL, 1, 11) = 'about:blank') then begin ImgURL := DocURL Copy(ImgUrl, 12, Length(ImgURL)) ; end; end; AList.Add(ImgURL) ; Application.ProcessMessages; end; end; finally IDoc := nil; end; end; //Usage procedure TForm1.btn_GetImageLinkClick(Sender: TObject) ; begin memo1.Clear; GetImageLinks(editURL.Text, Memo1.Lines) ; end; procedure TForm1.btn_GetAllLinksClick(Sender: TObject); begin memo1.Clear; GetURLLinks(editURL.Text, Memo1.Lines) ; end; end. |
eoisoft
一般會員 發表:20 回覆:12 積分:6 註冊:2004-11-24 發送簡訊給我 |
|
Ktop_Robot
站務副站長 發表:0 回覆:3511 積分:0 註冊:2007-04-17 發送簡訊給我 |
本站聲明 |
1. 本論壇為無營利行為之開放平台,所有文章都是由網友自行張貼,如牽涉到法律糾紛一切與本站無關。 2. 假如網友發表之內容涉及侵權,而損及您的利益,請立即通知版主刪除。 3. 請勿批評中華民國元首及政府或批評各政黨,是藍是綠本站無權干涉,但這裡不是政治性論壇! |