

1 #!/usr/local/bin/perl5.8.0 -w
2 #
3 # This program created 2004, Dan Sugalski. The code in this file is in
4 # the public domain--go for it, good luck, don't forget to write.
5 use strict;
6 use Parse::RecDescent;
7 use Data::Dumper;
8
9 # Take the source and destination files as parameters
10 my ($source, $destination) = @ARGV;
11
12 my %global_vars;
13 my $tempcount = 0;
14 my (%temps) = (P => 0,
15 I => 0,
16 N => 0,
17 S => 0
18 );
19
20 # AUTOACTION simplifies the creation of a parse tree by specifying an action
21 # for each production (ie action is { [@item] })
22 $::RD_AUTOACTION = q{ [@item] };
23
24 my $grammar = <<'EOG';
25 field: /\b\w+\b/
26
27 stringconstant: /'[^']*'/ |
28 /"[^"]*"/
29 #"
30 float: /[+-]?(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?/
31
32 constant: float | stringconstant
33
34 addop: '+' | '-'
35 mulop: '*' | '/'
36 modop: '%'
37 cmpop: '<>' | '>='| '<=' | '<' | '>' | '='
38 logop: 'and' | 'or'
39
40 parenexpr: '(' expr ')'
41
42 simplevalue: parenexpr | constant | field
43
44 modval: <leftop: simplevalue modop simplevalue>
45
46 mulval: <leftop: modval mulop modval>
47
48 addval: <leftop: mulval addop mulval>
49
50 cmpval: <leftop: addval cmpop addval>
51
52 logval: <leftop: cmpval logop cmpval>
53
54 expr: logval
55
56 declare: 'declare' field
57
58 assign: field '=' expr
59
60 print: 'print' expr
61
62 statement: assign | print | declare
63 EOG
64
65 # ?? Makes emacs cperl syntax highlighting mode happier
66 my $parser = Parse::RecDescent->new($grammar);
La gramatica categoriza las prioridades de cada una de las operaciones:
categorías próximas al símbolo de arranque tienen menos prioridad
que aquellas mas lejanas.
68 my @nodes;
69 open SOURCE, "<$source" or die "Can't open source program ($!)";
70
71 while (<SOURCE>) {
72 # Strip the trailing newline and leading spaces. If the line is
73 # blank, then skip it
74 chomp;
75 s/^\s+//;
76 next unless $_;
77
78 # Parse the statement and throw an error if something went wrong
79 my $node = $parser->statement($_);
80 die "Bad statement" if !defined $node;
81
82 # put the parsed statement onto our list of nodes for later treatment
83 push @nodes, $node;
84 }
85
86 print Dumper(\@nodes);
87 #exit;
88
89 # At this point we have parsed the program and have a tree of it
90 # ready to process. So lets do so. First we set up our node handlers.
91
El programa principal lee una línea del fuente y construye el
árbol (línea 79). Los árboles se van guardando en la lista
@nodes. El paso siguiente es la generación de código:
# At this point we have parsed the program and have a tree of it
# ready to process. So lets do so. First we set up our node handlers.
my (%handlers) = (addval => \&handle_generic_val,
assign => \&handle_assign,
cmpval => \&handle_generic_val,
constant => \&delegate,
declare => \&handle_declare,
expr => \&delegate,
field => \&handle_field,
float => \&handle_float,
logval => \&handle_generic_val,
modval => \&handle_generic_val,
mulval => \&handle_generic_val,
negfield => \&handle_negfield,
parenexpr => \&handle_paren_expr,
print => \&handle_print,
simplevalue => \&delegate,
statement => \&delegate,
stringconstant => \&handle_stringconstant,
);
El autor ha optado por escribir un manipulador para cada tipo
de nodo. Es algo similar a lo que hicimos usando métodos
y herencia para el compilador de Tutu. Algunos nodos simplemente
delegan y otros recurren a un manipulador genérico.
La fase de generación de código comienza por la escritura de un preámbulo y termina con la escritura de un pie requeridos por el intérprete. En medio se sitúa el código correspondiente a la traducción de los nodos provenientes de las diversas líneas del fuente:
# Open the output file and emit the preamble
open PIR, ">$destination" or die "Can't open destination ($!)";
print PIR <<HEADER;
.sub __MAIN prototyped
.param pmc argv
HEADER
foreach my $node (@nodes) {
my @lines = process_node(@$node);
print PIR join("", @lines);
}
print PIR <<FOOTER;
end
.end
FOOTER
La subrutina process_node hace un recorrido
de los árboles de análisis, llamando a los manipuladores de los
nodos que están siendo visitados.
El elemento 0 del array elems identifica
la clase de nodo. Así la llamada
$handlers{$elems[0]}->(@elems)
produce una llamada al manipulador correspondiente,
pasándole como argumento los hijos
del nodo.
# The value of the last expression evaluated
sub last_expr_val {
return $::last_expr;
}
# Setting the last expression evaluated's value
sub set_last_expr_val {
$::last_expr = $_[0];
}
sub process_node {
my (@elems) = @_;
return "\n" unless @elems;
return "\n" unless defined($elems[0]);
if (ref $elems[0]) {
return process_node(@{$elems[0]});
} elsif (exists($handlers{$elems[0]})) {
return $handlers{$elems[0]}->(@elems);
} else {
return "***", $elems[0], "***\n";
}
}
A continuación siguen los diversos manipuladores para los diferentes tipos de nodo:
sub handle_assign {
my ($nodetype, $destvar, undef, $expr) = @_;
my @nodes;
push @nodes, process_node(@$expr);
my $rhs = last_expr_val();
push @nodes, process_node(@$destvar);
my $lhs = last_expr_val();
push @nodes, " $lhs = $rhs\n";
return @nodes;
}
sub handle_declare {
my ($nodetype, undef, $var) = @_;
my @lines;
my $varname = $var->[1];
# Does it exist?
if (defined $global_vars{$varname}) {
die "Multiple declaration of $varname";
}
$global_vars{$varname}++;
push @lines, " .local pmc $varname\n";
push @lines, " new $varname, .PerlInt\n";
return @lines;
}
sub handle_field {
my ($nodetype, $fieldname) = @_;
if (!exists $global_vars{$fieldname}) {
die "undeclared field $fieldname used";
}
set_last_expr_val($fieldname);
return;
}
sub handle_float {
my ($nodetype, $floatval) = @_;
set_last_expr_val($floatval);
return;
}
sub handle_generic_val {
my (undef, $terms) = @_;
my (@terms) = @$terms;
# Process the LHS
my $lhs = shift @terms;
my @tokens;
push @tokens, process_node(@$lhs);
my ($op, $rhs);
# Now keep processing the RHS as long as we have it
while (@terms) {
$op = shift @terms;
$rhs = shift @terms;
my $val = last_expr_val();
my $oper = $op->[1];
push @tokens, process_node(@$rhs);
my $other_val = last_expr_val();
my $dest = $temps{P}++;
foreach ($oper) {
# Simple stuff -- addition, subtraction, multiplication,
# division, and modulus. Just a quick imcc transform
/(\+|\-|\*|\/|%)/ && do { push @tokens, "new \$P$dest, .PerlInt\n";
push @tokens, "\$P$dest = $val $oper $other_val\n";
set_last_expr_val("\$P$dest");
last;
};
/and/ && do { push @tokens, "new \$P$dest, .PerlInt\n";
push @tokens, "\$P$dest = $val && $other_val\n";
set_last_expr_val("\$P$dest");
last;
};
/or/ && do { push @tokens, "new \$P$dest, .PerlInt\n";
push @tokens, "\$P$dest = $val || $other_val\n";
set_last_expr_val("\$P$dest");
last;
};
/<>/ && do { my $label = "eqcheck$tempcount";
$tempcount++;
push @tokens, "new \$P$dest, .Integer\n";
push @tokens, "\$P$dest = 1\n";
push @tokens, "ne $val, $other_val, $label\n";
push @tokens, "\$P$dest = 0\n";
push @tokens, "$label:\n";
set_last_expr_val("\$P$dest");
last;
};
/=/ && do { my $label = "eqcheck$tempcount";
$tempcount++;
push @tokens, "new \$P$dest, .Integer\n";
push @tokens, "\$P$dest = 1\n";
push @tokens, "eq $val, $other_val, $label\n";
push @tokens, "\$P$dest = 0\n";
push @tokens, "$label:\n";
set_last_expr_val("\$P$dest");
last;
};
/</ && do { my $label = "eqcheck$tempcount";
$tempcount++;
push @tokens, "new \$P$dest, .Integer\n";
push @tokens, "\$P$dest = 1\n";
push @tokens, "lt $val, $other_val, $label\n";
push @tokens, "\$P$dest = 0\n";
push @tokens, "$label:\n";
set_last_expr_val("\$P$dest");
last;
};
/>/ && do { my $label = "eqcheck$tempcount";
$tempcount++;
push @tokens, "new \$P$dest, .Integer\n";
push @tokens, "\$P$dest = 1\n";
push @tokens, "gt $val, $other_val, $label\n";
push @tokens, "\$P$dest = 0\n";
push @tokens, "$label:\n";
set_last_expr_val("\$P$dest");
last;
};
die "Can't handle $oper";
}
}
return @tokens;
}
sub handle_paren_expr {
my ($nodetype, undef, $expr, undef) = @_;
return process_node(@$expr);
}
sub handle_stringconstant {
my ($nodetype, $stringval) = @_;
set_last_expr_val($stringval);
return;
}
sub handle_print {
my ($nodetype, undef, $expr) = @_;
my @nodes;
push @nodes, process_node(@$expr);
my $val = last_expr_val();
push @nodes, " print $val\n";
return @nodes;
}
sub delegate {
my ($nodetype, $nodeval) = @_;
return process_node(@$nodeval);
}
El fichero fuente foo.len:
declare foo declare bar foo = 15 bar = (foo+8)*32-7 print bar print "\n" print foo % 10 print "\n"
Compilamos:
$ ./compiler.pl foo.len foo.pirEsto produce por pantalla un volcado de los árboles de als diferentes sentencias. Asi para
declare foo:
$VAR1 = [ [ 'statement', [ 'declare', 'declare', [ 'field', 'foo' ] ] ],para la sentencia
foo = 15 el árbol es:
[ 'statement',
[ 'assign',
[
'field',
'foo'
],
'=',
[ 'expr',
[ 'logval',
[
[ 'cmpval',
[
[ 'addval',
[
[ 'mulval',
[
[ 'modval',
[
[ 'simplevalue',
[ 'constant',
[
'float',
'15'
]
] ] ] ] ] ] ] ] ] ] ] ] ] ] ],
Este es el árbol de la sentencia print bar:
[ 'statement',
[ 'print',
'print',
[ 'expr',
[ 'logval',
[
[ 'cmpval',
[
[ 'addval',
[
[ 'mulval',
[
[ 'modval',
[
[ 'simplevalue',
[
'field',
'bar'
] ] ] ] ] ] ] ] ] ] ] ] ] ] ],
Además de los árboles presentados en la
salida estándar, se produce como salida el fichero foo.pir
conteniendo el código parrot intermedio:
$ cat foo.pir .sub __MAIN prototyped .param pmc argv .local pmc foo new foo, .PerlInt .local pmc bar new bar, .PerlInt foo = 15 new $P0, .PerlInt $P0 = foo + 8 new $P1, .PerlInt $P1 = $P0 * 32 new $P2, .PerlInt $P2 = $P1 - 7 bar = $P2 print bar print "\n" new $P3, .PerlInt $P3 = foo % 10 print $P3 print "\n" end .endAntes de ejecutarlo veamos las opciones de
parrot:
$ parrot -h
parrot [Options] <file>
Options:
-h --help
-V --version
<Run core options>
-b --bounds-checks|--slow-core
-C --CGP-core
-f --fast-core
-g --computed-goto-core
-j --jit-core
-p --profile
-P --predereferenced-core
-S --switched-core
-t --trace
<VM options>
-d --debug[=HEXFLAGS]
--help-debug
-w --warnings
-G --no-gc
--gc-debug
--leak-test|--destroy-at-end
-. --wait Read a keystroke before starting
<Compiler options>
-v --verbose
-E --pre-process-only
-o --output=FILE
--output-pbc
-O --optimize[=LEVEL]
-a --pasm
-c --pbc
-r --run-pbc
-y --yydebug
<Language options>
--python
see docs/running.pod for more
Con la opción -o podemos producir un fichero en formato
pbc:
$ parrot -o foo.pbc foo.pirque podemos ejecutar con el depurador
pdb
(para construirlo en el momento de la instalación de Parrot
deberás hacer make pdb).
$ pdb foo.pbc
Parrot Debugger 0.0.2
(pdb) h
List of commands:
disassemble -- disassemble the bytecode
load -- load a source code file
list (l) -- list the source code file
run (r) -- run the program
break (b) -- add a breakpoint
watch (w) -- add a watchpoint
delete (d) -- delete a breakpoint
disable -- disable a breakpoint
enable -- reenable a disabled breakpoint
continue (c) -- continue the program execution
next (n) -- run the next instruction
eval (e) -- run an instruction
trace (t) -- trace the next instruction
print (p) -- print the interpreter registers
stack (s) -- examine the stack
info -- print interpreter information
quit (q) -- exit the debugger
help (h) -- print this help
Type "help" followed by a command name for full documentation.
Veamos el programa traducido:
(pdb) list 1 17 1 new_p_ic P16,32 2 new_p_ic P30,32 3 set_p_ic P16,15 4 new_p_ic P29,32 5 add_p_p_ic P29,P16,8 6 new_p_ic P28,32 7 mul_p_p_ic P28,P29,32 8 new_p_ic P29,32 9 sub_p_p_ic P29,P28,7 10 set_p_p P30,P29 11 print_p P30 12 print_sc "\n" 13 new_p_ic P30,32 14 mod_p_p_ic P30,P16,10 15 print_p P30 16 print_sc "\n" 17 endProcedemos a ejecutarlo:
(pdb) n 2 new_p_ic P30,32 (pdb) 3 set_p_ic P16,15 (pdb) 4 new_p_ic P29,32 (pdb) 5 add_p_p_ic P29,P16,8 (pdb) p P16 P16 = [PerlInt] Stringified: 15 5 add_p_p_ic P29,P16,8 (pdb) c 729 5 Program exited. (pdb) quit $

