1 #!/usr/local/bin/perl5.8.0 -w 2 # 3 # This program created 2004, Dan Sugalski. The code in this file is in 4 # the public domain--go for it, good luck, don't forget to write. 5 use strict; 6 use Parse::RecDescent; 7 use Data::Dumper; 8 9 # Take the source and destination files as parameters 10 my ($source, $destination) = @ARGV; 11 12 my %global_vars; 13 my $tempcount = 0; 14 my (%temps) = (P => 0, 15 I => 0, 16 N => 0, 17 S => 0 18 ); 19 20 # AUTOACTION simplifies the creation of a parse tree by specifying an action 21 # for each production (ie action is { [@item] }) 22 $::RD_AUTOACTION = q{ [@item] }; 23 24 my $grammar = <<'EOG'; 25 field: /\b\w+\b/ 26 27 stringconstant: /'[^']*'/ | 28 /"[^"]*"/ 29 #" 30 float: /[+-]?(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?/ 31 32 constant: float | stringconstant 33 34 addop: '+' | '-' 35 mulop: '*' | '/' 36 modop: '%' 37 cmpop: '<>' | '>='| '<=' | '<' | '>' | '=' 38 logop: 'and' | 'or' 39 40 parenexpr: '(' expr ')' 41 42 simplevalue: parenexpr | constant | field 43 44 modval: <leftop: simplevalue modop simplevalue> 45 46 mulval: <leftop: modval mulop modval> 47 48 addval: <leftop: mulval addop mulval> 49 50 cmpval: <leftop: addval cmpop addval> 51 52 logval: <leftop: cmpval logop cmpval> 53 54 expr: logval 55 56 declare: 'declare' field 57 58 assign: field '=' expr 59 60 print: 'print' expr 61 62 statement: assign | print | declare 63 EOG 64 65 # ?? Makes emacs cperl syntax highlighting mode happier 66 my $parser = Parse::RecDescent->new($grammar);La gramatica categoriza las prioridades de cada una de las operaciones: categorías próximas al símbolo de arranque tienen menos prioridad que aquellas mas lejanas.
68 my @nodes; 69 open SOURCE, "<$source" or die "Can't open source program ($!)"; 70 71 while (<SOURCE>) { 72 # Strip the trailing newline and leading spaces. If the line is 73 # blank, then skip it 74 chomp; 75 s/^\s+//; 76 next unless $_; 77 78 # Parse the statement and throw an error if something went wrong 79 my $node = $parser->statement($_); 80 die "Bad statement" if !defined $node; 81 82 # put the parsed statement onto our list of nodes for later treatment 83 push @nodes, $node; 84 } 85 86 print Dumper(\@nodes); 87 #exit; 88 89 # At this point we have parsed the program and have a tree of it 90 # ready to process. So lets do so. First we set up our node handlers. 91El programa principal lee una línea del fuente y construye el árbol (línea 79). Los árboles se van guardando en la lista
@nodes
. El paso siguiente es la generación de código:
# At this point we have parsed the program and have a tree of it # ready to process. So lets do so. First we set up our node handlers. my (%handlers) = (addval => \&handle_generic_val, assign => \&handle_assign, cmpval => \&handle_generic_val, constant => \&delegate, declare => \&handle_declare, expr => \&delegate, field => \&handle_field, float => \&handle_float, logval => \&handle_generic_val, modval => \&handle_generic_val, mulval => \&handle_generic_val, negfield => \&handle_negfield, parenexpr => \&handle_paren_expr, print => \&handle_print, simplevalue => \&delegate, statement => \&delegate, stringconstant => \&handle_stringconstant, );El autor ha optado por escribir un manipulador para cada tipo de nodo. Es algo similar a lo que hicimos usando métodos y herencia para el compilador de Tutu. Algunos nodos simplemente delegan y otros recurren a un manipulador genérico.
La fase de generación de código comienza por la escritura de un preámbulo y termina con la escritura de un pie requeridos por el intérprete. En medio se sitúa el código correspondiente a la traducción de los nodos provenientes de las diversas líneas del fuente:
# Open the output file and emit the preamble open PIR, ">$destination" or die "Can't open destination ($!)"; print PIR <<HEADER; .sub __MAIN prototyped .param pmc argv HEADER foreach my $node (@nodes) { my @lines = process_node(@$node); print PIR join("", @lines); } print PIR <<FOOTER; end .end FOOTER
La subrutina process_node
hace un recorrido
de los árboles de análisis, llamando a los manipuladores de los
nodos que están siendo visitados.
El elemento 0 del array elems
identifica
la clase de nodo. Así la llamada
$handlers{$elems[0]}->(@elems)
produce una llamada al manipulador correspondiente,
pasándole como argumento los hijos
del nodo.
# The value of the last expression evaluated sub last_expr_val { return $::last_expr; } # Setting the last expression evaluated's value sub set_last_expr_val { $::last_expr = $_[0]; } sub process_node { my (@elems) = @_; return "\n" unless @elems; return "\n" unless defined($elems[0]); if (ref $elems[0]) { return process_node(@{$elems[0]}); } elsif (exists($handlers{$elems[0]})) { return $handlers{$elems[0]}->(@elems); } else { return "***", $elems[0], "***\n"; } }
A continuación siguen los diversos manipuladores para los diferentes tipos de nodo:
sub handle_assign { my ($nodetype, $destvar, undef, $expr) = @_; my @nodes; push @nodes, process_node(@$expr); my $rhs = last_expr_val(); push @nodes, process_node(@$destvar); my $lhs = last_expr_val(); push @nodes, " $lhs = $rhs\n"; return @nodes; } sub handle_declare { my ($nodetype, undef, $var) = @_; my @lines; my $varname = $var->[1]; # Does it exist? if (defined $global_vars{$varname}) { die "Multiple declaration of $varname"; } $global_vars{$varname}++; push @lines, " .local pmc $varname\n"; push @lines, " new $varname, .PerlInt\n"; return @lines; } sub handle_field { my ($nodetype, $fieldname) = @_; if (!exists $global_vars{$fieldname}) { die "undeclared field $fieldname used"; } set_last_expr_val($fieldname); return; } sub handle_float { my ($nodetype, $floatval) = @_; set_last_expr_val($floatval); return; } sub handle_generic_val { my (undef, $terms) = @_; my (@terms) = @$terms; # Process the LHS my $lhs = shift @terms; my @tokens; push @tokens, process_node(@$lhs); my ($op, $rhs); # Now keep processing the RHS as long as we have it while (@terms) { $op = shift @terms; $rhs = shift @terms; my $val = last_expr_val(); my $oper = $op->[1]; push @tokens, process_node(@$rhs); my $other_val = last_expr_val(); my $dest = $temps{P}++; foreach ($oper) { # Simple stuff -- addition, subtraction, multiplication, # division, and modulus. Just a quick imcc transform /(\+|\-|\*|\/|%)/ && do { push @tokens, "new \$P$dest, .PerlInt\n"; push @tokens, "\$P$dest = $val $oper $other_val\n"; set_last_expr_val("\$P$dest"); last; }; /and/ && do { push @tokens, "new \$P$dest, .PerlInt\n"; push @tokens, "\$P$dest = $val && $other_val\n"; set_last_expr_val("\$P$dest"); last; }; /or/ && do { push @tokens, "new \$P$dest, .PerlInt\n"; push @tokens, "\$P$dest = $val || $other_val\n"; set_last_expr_val("\$P$dest"); last; }; /<>/ && do { my $label = "eqcheck$tempcount"; $tempcount++; push @tokens, "new \$P$dest, .Integer\n"; push @tokens, "\$P$dest = 1\n"; push @tokens, "ne $val, $other_val, $label\n"; push @tokens, "\$P$dest = 0\n"; push @tokens, "$label:\n"; set_last_expr_val("\$P$dest"); last; }; /=/ && do { my $label = "eqcheck$tempcount"; $tempcount++; push @tokens, "new \$P$dest, .Integer\n"; push @tokens, "\$P$dest = 1\n"; push @tokens, "eq $val, $other_val, $label\n"; push @tokens, "\$P$dest = 0\n"; push @tokens, "$label:\n"; set_last_expr_val("\$P$dest"); last; }; /</ && do { my $label = "eqcheck$tempcount"; $tempcount++; push @tokens, "new \$P$dest, .Integer\n"; push @tokens, "\$P$dest = 1\n"; push @tokens, "lt $val, $other_val, $label\n"; push @tokens, "\$P$dest = 0\n"; push @tokens, "$label:\n"; set_last_expr_val("\$P$dest"); last; }; />/ && do { my $label = "eqcheck$tempcount"; $tempcount++; push @tokens, "new \$P$dest, .Integer\n"; push @tokens, "\$P$dest = 1\n"; push @tokens, "gt $val, $other_val, $label\n"; push @tokens, "\$P$dest = 0\n"; push @tokens, "$label:\n"; set_last_expr_val("\$P$dest"); last; }; die "Can't handle $oper"; } } return @tokens; } sub handle_paren_expr { my ($nodetype, undef, $expr, undef) = @_; return process_node(@$expr); } sub handle_stringconstant { my ($nodetype, $stringval) = @_; set_last_expr_val($stringval); return; } sub handle_print { my ($nodetype, undef, $expr) = @_; my @nodes; push @nodes, process_node(@$expr); my $val = last_expr_val(); push @nodes, " print $val\n"; return @nodes; } sub delegate { my ($nodetype, $nodeval) = @_; return process_node(@$nodeval); }
El fichero fuente foo.len
:
declare foo declare bar foo = 15 bar = (foo+8)*32-7 print bar print "\n" print foo % 10 print "\n"
Compilamos:
$ ./compiler.pl foo.len foo.pirEsto produce por pantalla un volcado de los árboles de als diferentes sentencias. Asi para
declare foo
:
$VAR1 = [ [ 'statement', [ 'declare', 'declare', [ 'field', 'foo' ] ] ],para la sentencia
foo = 15
el árbol es:
[ 'statement', [ 'assign', [ 'field', 'foo' ], '=', [ 'expr', [ 'logval', [ [ 'cmpval', [ [ 'addval', [ [ 'mulval', [ [ 'modval', [ [ 'simplevalue', [ 'constant', [ 'float', '15' ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ],Este es el árbol de la sentencia
print bar
:
[ 'statement', [ 'print', 'print', [ 'expr', [ 'logval', [ [ 'cmpval', [ [ 'addval', [ [ 'mulval', [ [ 'modval', [ [ 'simplevalue', [ 'field', 'bar' ] ] ] ] ] ] ] ] ] ] ] ] ] ] ],Además de los árboles presentados en la salida estándar, se produce como salida el fichero
foo.pir
conteniendo el código parrot intermedio:
$ cat foo.pir .sub __MAIN prototyped .param pmc argv .local pmc foo new foo, .PerlInt .local pmc bar new bar, .PerlInt foo = 15 new $P0, .PerlInt $P0 = foo + 8 new $P1, .PerlInt $P1 = $P0 * 32 new $P2, .PerlInt $P2 = $P1 - 7 bar = $P2 print bar print "\n" new $P3, .PerlInt $P3 = foo % 10 print $P3 print "\n" end .endAntes de ejecutarlo veamos las opciones de
parrot
:
$ parrot -h parrot [Options] <file> Options: -h --help -V --version <Run core options> -b --bounds-checks|--slow-core -C --CGP-core -f --fast-core -g --computed-goto-core -j --jit-core -p --profile -P --predereferenced-core -S --switched-core -t --trace <VM options> -d --debug[=HEXFLAGS] --help-debug -w --warnings -G --no-gc --gc-debug --leak-test|--destroy-at-end -. --wait Read a keystroke before starting <Compiler options> -v --verbose -E --pre-process-only -o --output=FILE --output-pbc -O --optimize[=LEVEL] -a --pasm -c --pbc -r --run-pbc -y --yydebug <Language options> --python see docs/running.pod for moreCon la opción
-o
podemos producir un fichero en formato
pbc
:
$ parrot -o foo.pbc foo.pirque podemos ejecutar con el depurador
pdb
(para construirlo en el momento de la instalación de Parrot
deberás hacer make pdb
).
$ pdb foo.pbc Parrot Debugger 0.0.2 (pdb) h List of commands: disassemble -- disassemble the bytecode load -- load a source code file list (l) -- list the source code file run (r) -- run the program break (b) -- add a breakpoint watch (w) -- add a watchpoint delete (d) -- delete a breakpoint disable -- disable a breakpoint enable -- reenable a disabled breakpoint continue (c) -- continue the program execution next (n) -- run the next instruction eval (e) -- run an instruction trace (t) -- trace the next instruction print (p) -- print the interpreter registers stack (s) -- examine the stack info -- print interpreter information quit (q) -- exit the debugger help (h) -- print this help Type "help" followed by a command name for full documentation.Veamos el programa traducido:
(pdb) list 1 17 1 new_p_ic P16,32 2 new_p_ic P30,32 3 set_p_ic P16,15 4 new_p_ic P29,32 5 add_p_p_ic P29,P16,8 6 new_p_ic P28,32 7 mul_p_p_ic P28,P29,32 8 new_p_ic P29,32 9 sub_p_p_ic P29,P28,7 10 set_p_p P30,P29 11 print_p P30 12 print_sc "\n" 13 new_p_ic P30,32 14 mod_p_p_ic P30,P16,10 15 print_p P30 16 print_sc "\n" 17 endProcedemos a ejecutarlo:
(pdb) n 2 new_p_ic P30,32 (pdb) 3 set_p_ic P16,15 (pdb) 4 new_p_ic P29,32 (pdb) 5 add_p_p_ic P29,P16,8 (pdb) p P16 P16 = [PerlInt] Stringified: 15 5 add_p_p_ic P29,P16,8 (pdb) c 729 5 Program exited. (pdb) quit $