Véase el módulo URI y la sección 3.12 de estos apuntes.
lhp@nereida:~/Lperl/src/perl_networking/ch5$ cat -n http_get.pl 1 #!/usr/bin/perl 2 use strict; 3 use IO::Socket qw(:DEFAULT :crlf); 4 use URI; 5 local $/ = CRLF . CRLF; 6 7 my $url = shift or die "Usage: web_fetch.pl <URL>\n"; 8 $url = URI->new($url)->canonical; 9 my ($host,$path, $port) = ($url->host(), $url->path(), $url->port()); 10 die "Bad URL " unless $host and $path; 11 12 my $socket = IO::Socket::INET->new(PeerAddr => $host, PeerPort => "http($port)") 13 or die "Can't connect: $!"; 14 15 print $socket "GET $path HTTP/1.0",CRLF,CRLF; 16 17 my $header = <$socket>; # read the header 18 $header =~ s/$CRLF/\n/g; # replace CRLF with logical newline 19 print $header; 20 21 my $data; 22 print $data while read($socket,$data,1024) > 0;
La siguiente ejecución con el depurador muestra el funcionamiento del programa:
lhp@nereida:~/Lperl/src/perl_networking/ch5$ perl -wd http_get.pl 'http://nereida.deioc.ull.es/~pp2/'
main::(http_get.pl:5): local $/ = CRLF . CRLF;
DB<1> n
main::(http_get.pl:7): my $url = shift or die "Usage: web_fetch.pl <URL>\n";
DB<1>
main::(http_get.pl:8): $url = URI->new($url)->canonical;
DB<1>
main::(http_get.pl:9): my ($host,$path, $port) = ($url->host(), $url->path(), $url->port());
DB<1> x $url
0 URI::http=SCALAR(0x84ae8b4)
-> 'http://nereida.deioc.ull.es/~pp2/'
DB<2> n
main::(http_get.pl:10): die "Bad URL " unless $host and $path;
DB<2> x ($host,$path, $port)
0 'nereida.deioc.ull.es'
1 '/~pp2/'
2 80
DB<3> n
main::(http_get.pl:12): my $socket = IO::Socket::INET->new(PeerAddr => $host, PeerPort => "http($port)")
main::(http_get.pl:13): or die "Can't connect: $!";
DB<3> n
main::(http_get.pl:15): print $socket "GET $path HTTP/1.0",CRLF,CRLF;
DB<3> x $socket
0 IO::Socket::INET=GLOB(0x8626c40)
-> *Symbol::GEN0
FileHandle({*Symbol::GEN0}) => fileno(3)
DB<4> c 17
main::(http_get.pl:17): my $header = <$socket>; # read the header
DB<5> n
main::(http_get.pl:18): $header =~ s/$CRLF/\n/g; # replace CRLF with logical newline
DB<5> p $header
HTTP/1.1 200 OK
Date: Mon, 12 May 2008 12:33:09 GMT
Server: Apache/1.3.34 (Debian) PHP/5.2.0-8+etch10 mod_ssl/2.8.25 OpenSSL/0.9.8c mod_perl/1.29
Last-Modified: Tue, 19 Feb 2008 11:17:24 GMT
ETag: "16ec3a-d9-47babac4"
Accept-Ranges: bytes
Content-Length: 217
Connection: close
Content-Type: text/html; charset=utf-8
DB<6> c 22
HTTP/1.1 200 OK
Date: Mon, 12 May 2008 12:33:09 GMT
Server: Apache/1.3.34 (Debian) PHP/5.2.0-8+etch10 mod_ssl/2.8.25 OpenSSL/0.9.8c mod_perl/1.29
Last-Modified: Tue, 19 Feb 2008 11:17:24 GMT
ETag: "16ec3a-d9-47babac4"
Accept-Ranges: bytes
Content-Length: 217
Connection: close
Content-Type: text/html; charset=utf-8
main::(http_get.pl:22): print $data while read($socket,$data,1024) > 0;
DB<7> x read($socket,$data,1024)
0 217
DB<8> p $data
<HTML>
<HEAD>
<TITLE>PP2</TITLE>
<META HTTP-EQUIV="refresh" CONTENT="0;URL=pp20708/index.html">
</HEAD>
<BODY>
El enlace que busca se encuentra en:
<a href="pp20708index.html">
pp20708/index.html"</a>
</BODY>
</HTML>
DB<9> q
lhp@nereida:~/Lperl/src/perl_networking/ch5$
