Véase el módulo URI y la sección 3.12 de estos apuntes.
lhp@nereida:~/Lperl/src/perl_networking/ch5$ cat -n http_get.pl 1 #!/usr/bin/perl 2 use strict; 3 use IO::Socket qw(:DEFAULT :crlf); 4 use URI; 5 local $/ = CRLF . CRLF; 6 7 my $url = shift or die "Usage: web_fetch.pl <URL>\n"; 8 $url = URI->new($url)->canonical; 9 my ($host,$path, $port) = ($url->host(), $url->path(), $url->port()); 10 die "Bad URL " unless $host and $path; 11 12 my $socket = IO::Socket::INET->new(PeerAddr => $host, PeerPort => "http($port)") 13 or die "Can't connect: $!"; 14 15 print $socket "GET $path HTTP/1.0",CRLF,CRLF; 16 17 my $header = <$socket>; # read the header 18 $header =~ s/$CRLF/\n/g; # replace CRLF with logical newline 19 print $header; 20 21 my $data; 22 print $data while read($socket,$data,1024) > 0;
La siguiente ejecución con el depurador muestra el funcionamiento del programa:
lhp@nereida:~/Lperl/src/perl_networking/ch5$ perl -wd http_get.pl 'http://nereida.deioc.ull.es/~pp2/' main::(http_get.pl:5): local $/ = CRLF . CRLF; DB<1> n main::(http_get.pl:7): my $url = shift or die "Usage: web_fetch.pl <URL>\n"; DB<1> main::(http_get.pl:8): $url = URI->new($url)->canonical; DB<1> main::(http_get.pl:9): my ($host,$path, $port) = ($url->host(), $url->path(), $url->port()); DB<1> x $url 0 URI::http=SCALAR(0x84ae8b4) -> 'http://nereida.deioc.ull.es/~pp2/' DB<2> n main::(http_get.pl:10): die "Bad URL " unless $host and $path; DB<2> x ($host,$path, $port) 0 'nereida.deioc.ull.es' 1 '/~pp2/' 2 80 DB<3> n main::(http_get.pl:12): my $socket = IO::Socket::INET->new(PeerAddr => $host, PeerPort => "http($port)") main::(http_get.pl:13): or die "Can't connect: $!"; DB<3> n main::(http_get.pl:15): print $socket "GET $path HTTP/1.0",CRLF,CRLF; DB<3> x $socket 0 IO::Socket::INET=GLOB(0x8626c40) -> *Symbol::GEN0 FileHandle({*Symbol::GEN0}) => fileno(3) DB<4> c 17 main::(http_get.pl:17): my $header = <$socket>; # read the header DB<5> n main::(http_get.pl:18): $header =~ s/$CRLF/\n/g; # replace CRLF with logical newline DB<5> p $header HTTP/1.1 200 OK Date: Mon, 12 May 2008 12:33:09 GMT Server: Apache/1.3.34 (Debian) PHP/5.2.0-8+etch10 mod_ssl/2.8.25 OpenSSL/0.9.8c mod_perl/1.29 Last-Modified: Tue, 19 Feb 2008 11:17:24 GMT ETag: "16ec3a-d9-47babac4" Accept-Ranges: bytes Content-Length: 217 Connection: close Content-Type: text/html; charset=utf-8 DB<6> c 22 HTTP/1.1 200 OK Date: Mon, 12 May 2008 12:33:09 GMT Server: Apache/1.3.34 (Debian) PHP/5.2.0-8+etch10 mod_ssl/2.8.25 OpenSSL/0.9.8c mod_perl/1.29 Last-Modified: Tue, 19 Feb 2008 11:17:24 GMT ETag: "16ec3a-d9-47babac4" Accept-Ranges: bytes Content-Length: 217 Connection: close Content-Type: text/html; charset=utf-8 main::(http_get.pl:22): print $data while read($socket,$data,1024) > 0; DB<7> x read($socket,$data,1024) 0 217 DB<8> p $data <HTML> <HEAD> <TITLE>PP2</TITLE> <META HTTP-EQUIV="refresh" CONTENT="0;URL=pp20708/index.html"> </HEAD> <BODY> El enlace que busca se encuentra en: <a href="pp20708index.html"> pp20708/index.html"</a> </BODY> </HTML> DB<9> q lhp@nereida:~/Lperl/src/perl_networking/ch5$