En este capítulo usaremos Parse::Eyapp para desarrollar las primeras fases - análisis léxico, análisis sintáctico y análisis de ámbito - de un compilador para un subconjunto de C al que denominaremos Simple C :
%% program: definition<%name PROGRAM +>.program { $program } ; definition: $funcDef { $funcDef } | %name FUNCTION $basictype $funcDef { $funcDef } | declaration {} ; basictype: %name INT INT | %name CHAR CHAR ; funcDef: $ID '(' $params ')' $block { flat_statements($block); $block->{parameters} = []; $block->{function_name} = $ID; $block->type("FUNCTION"); return $block; } ; params: ( basictype ID arraySpec)<%name PARAMS * ','> { $_[1] } ; block: '{'.bracket declaration<%name DECLARATIONS *>.decs statement<%name STATEMENTS *>.sts '}' { flat_statements($sts); $sts->type("BLOCK") if $decs->children; return $sts; } ; declaration: %name DECLARATION $basictype $declList ';' ; declList: (ID arraySpec) <%name VARLIST + ','> { $_[1] } ; arraySpec: ( '[' INUM ']')* { $_[1]->type("ARRAYSPEC"); $_[1] } ; statement: expression ';' { $_[1] } | ';' | %name BREAK $BREAK ';' | %name CONTINUE $CONTINUE ';' | %name EMPTYRETURN RETURN ';' | %name RETURN RETURN expression ';' | block { $_[1] } | %name IF ifPrefix statement %prec '+' | %name IFELSE ifPrefix statement 'ELSE' statement | %name WHILE $loopPrefix statement ; ifPrefix: IF '(' expression ')' { $_[3] } ; loopPrefix: $WHILE '(' expression ')' { $_[3] } ; expression: binary <+ ','> { return $_[1]->child(0) if ($_[1]->children() == 1); return $_[1]; } ; Variable: %name VAR ID | %name VARARRAY $ID ('[' binary ']') <%name INDEXSPEC +> ; Primary: %name INUM INUM | %name CHARCONSTANT CHARCONSTANT | $Variable { return $Variable } | '(' expression ')' { $_[2] } | $function_call { return $function_call # bypass } ; function_call: %name FUNCTIONCALL ID '(' binary <%name ARGLIST * ','> ')' ; Unary: '++' Variable | '--' Variable | Primary { $_[1] } ; binary: Unary { $_[1] } | %name PLUS binary '+' binary | %name MINUS binary '-' binary | %name TIMES binary '*' binary | %name DIV binary '/' binary | %name MOD binary '%' binary | %name LT binary '<' binary | %name GT binary '>' binary | %name GE binary '>=' binary | %name LE binary '<=' binary | %name EQ binary '==' binary | %name NE binary '!=' binary | %name AND binary '&' binary | %name EXP binary '**' binary | %name OR binary '|' binary | %name ASSIGN $Variable '=' binary | %name PLUSASSIGN $Variable '+=' binary | %name MINUSASSIGN $Variable '-=' binary | %name TIMESASSIGN $Variable '*=' binary | %name DIVASSIGN $Variable '/=' binary | %name MODASSIGN $Variable '%=' binary ;
/* File: Simple/Syntax.eyp Full Type checking To build it, Do make or: eyapp -m Simple::Syntax Syntax.eyp; */ %{ use strict; use Carp; use warnings; use Data::Dumper; use List::MoreUtils qw(firstval); our $VERSION = "0.4"; my $debug = 1; my %reserved = ( int => "INT", char => "CHAR", if => "IF", else => "ELSE", break => "BREAK", continue => "CONTINUE", return => "RETURN", while => "WHILE" ); my %lexeme = ( '=' => "ASSIGN", '+' => "PLUS", '-' => "MINUS", '*' => "TIMES", '/' => "DIV", '%' => "MOD", '|' => "OR", '&' => "AND", '{' => "LEFTKEY", '}' => "RIGHTKEY", ',' => "COMMA", ';' => "SEMICOLON", '(' => "LEFTPARENTHESIS", ')' => "RIGHTPARENTHESIS", '[' => "LEFTBRACKET", ']' => "RIGHTBRACKET", '==' => "EQ", '+=' => "PLUSASSIGN", '-=' => "MINUSASSIGN", '*=' => "TIMESASSIGN", '/=' => "DIVASSIGN", '%=' => "MODASSIGN", '!=' => "NE", '<' => "LT", '>' => "GT", '<=' => "LE", '>=' => "GE", '++' => "INC", '--' => "DEC", '**' => "EXP" ); my ($tokenbegin, $tokenend) = (1, 1); sub flat_statements { my $block = shift; my $i = 0; for ($block->children) { if ($_->type eq "STATEMENTS") { splice @{$block->{children}}, $i, 1, $_->children; } $i++; } } %} %syntactic token RETURN BREAK CONTINUE %right '=' '+=' '-=' '*=' '/=' '%=' %left '|' %left '&' %left '==' '!=' %left '<' '>' '>=' '<=' %left '+' '-' %left '*' '/' '%' %right '**' %right '++' '--' %right 'ELSE' %tree
%% sub _Error { my($token)=$_[0]->YYCurval; my($what)= $token ? "input: '$token->[0]' in line $token->[1]" : "end of input"; my @expected = $_[0]->YYExpect(); my $expected = @expected? "Expected one of these tokens: '@expected'":""; croak "Syntax error near $what. $expected\n"; } sub _Lexer { my($parser)=shift; my $token; for ($parser->YYData->{INPUT}) { return('',undef) if !defined($_) or $_ eq ''; #Skip blanks s{\A ((?: \s+ # any white space char | /\*.*?\*/ # C like comments )+ ) } {}xs and do { my($blanks)=$1; #Maybe At EOF return('', undef) if $_ eq ''; $tokenend += $blanks =~ tr/\n//; }; $tokenbegin = $tokenend; s/^('.')// and return('CHARCONSTANT', [$1, $tokenbegin]); s/^([0-9]+(?:\.[0-9]+)?)// and return('INUM',[$1, $tokenbegin]); s/^([A-Za-z][A-Za-z0-9_]*)// and do { my $word = $1; my $r; return ($r, [$r, $tokenbegin]) if defined($r = $reserved{$word}); return('ID',[$word, $tokenbegin]); }; m/^(\S\S)/ and defined($token = $1) and exists($lexeme{$token}) and do { s/..//; return ($token, [$token, $tokenbegin]); }; # do m/^(\S)/ and defined($token = $1) and exists($lexeme{$token}) and do { s/.//; return ($token, [$token, $tokenbegin]); }; # do die "Unexpected character at $tokenbegin\n"; } # for } sub compile { my($self)=shift; my ($t); $self->YYData->{INPUT} = $_[0]; $t = $self->YYParse( yylex => \&_Lexer, yyerror => \&_Error, #yydebug => 0x1F ); return $t; } sub TERMINAL::value { return $_[0]->{attr}[0]; } ########## line Support sub TERMINAL::line { return $_[0]->{attr}[1]; } sub VAR::line { my $self = shift; return $self->child(0)->{attr}[1]; } sub PLUS::line { $_[0]->{lines}[0] } { no warnings; *TIMES::line = *DIV::line = *MINUS::line = *ASSIGN::line =*GT::line =*IF::line =*RETURN::line = \&PLUS::line; *VARARRAY::line = *FUNCTIONCALL::line =\&VAR::line; } ############## Debugging and Display sub show_trees { my ($t) = shift; my $debug = shift; $Data::Dumper::Indent = 1; print Dumper $t if $debug > 3; local $Parse::Eyapp::Node::INDENT = $debug; print $t->str."\n"; } sub TERMINAL::info { my $a = join ':', @{$_[0]->{attr}}; return $a } sub TERMINAL::save_attributes { # $_[0] is a syntactic terminal # $_[1] is the father. push @{$_[1]->{lines}}, $_[0]->[1]; # save the line! } sub WHILE::line { return $_[0]->{line} }
pl@nereida:~/Lbook/code/Simple-Syntax/script$ usesyntax.pl bugmatch01.c 2 test (int n) { while (1) { if (1>0) { a = 2; break; } else if (2> 0){ b = 3; continue; } } } PROGRAM( FUNCTION( WHILE( INUM( TERMINAL[1:3] ), STATEMENTS( IFELSE( GT( INUM( TERMINAL[1:4] ), INUM( TERMINAL[0:4] ) ) # GT, STATEMENTS( ASSIGN( VAR( TERMINAL[a:5] ), INUM( TERMINAL[2:5] ) ) # ASSIGN, BREAK ) # STATEMENTS, IF( GT( INUM( TERMINAL[2:8] ), INUM( TERMINAL[0:8] ) ) # GT, STATEMENTS( ASSIGN( VAR( TERMINAL[b:9] ), INUM( TERMINAL[3:9] ) ) # ASSIGN, CONTINUE ) # STATEMENTS ) # IF ) # IFELSE ) # STATEMENTS ) # WHILE ) # FUNCTION ) # PROGRAM
pl@nereida:~/Lbook/code/Simple-Syntax/script$ usesyntax.pl prueba26.c 2 int a[20],b,e[10]; g() {} int f(char c) { char d; c = 'X'; e[d][b] = 'A'+c; { int d; d = c * 2; } { d = a + b; { c = a + 1; } } c = d * 2; return c; } PROGRAM( FUNCTION, FUNCTION( ASSIGN( VAR( TERMINAL[c:7] ), CHARCONSTANT( TERMINAL['X':7] ) ) # ASSIGN, ASSIGN( VARARRAY( TERMINAL[e:8], INDEXSPEC( VAR( TERMINAL[d:8] ), VAR( TERMINAL[b:8] ) ) # INDEXSPEC ) # VARARRAY, PLUS( CHARCONSTANT( TERMINAL['A':8] ), VAR( TERMINAL[c:8] ) ) # PLUS ) # ASSIGN, BLOCK( ASSIGN( VAR( TERMINAL[d:11] ), TIMES( VAR( TERMINAL[c:11] ), INUM( TERMINAL[2:11] ) ) # TIMES ) # ASSIGN ) # BLOCK, ASSIGN( VAR( TERMINAL[d:14] ), PLUS( VAR( TERMINAL[a:14] ), VAR( TERMINAL[b:14] ) ) # PLUS ) # ASSIGN, ASSIGN( VAR( TERMINAL[c:16] ), PLUS( VAR( TERMINAL[a:16] ), INUM( TERMINAL[1:16] ) ) # PLUS ) # ASSIGN, ASSIGN( VAR( TERMINAL[c:19] ), TIMES( VAR( TERMINAL[d:19] ), INUM( TERMINAL[2:19] ) ) # TIMES ) # ASSIGN, RETURN( VAR( TERMINAL[c:20] ) ) # RETURN ) # FUNCTION ) # PROGRAM