

En este capítulo usaremos Parse::Eyapp para desarrollar las primeras fases - análisis léxico, análisis sintáctico y análisis de ámbito - de un compilador para un subconjunto de C al que denominaremos Simple C :
%%
program:
definition<%name PROGRAM +>.program
{ $program }
;
definition:
$funcDef
{ $funcDef }
| %name FUNCTION
$basictype $funcDef
{ $funcDef }
| declaration
{}
;
basictype:
%name INT
INT
| %name CHAR
CHAR
;
funcDef:
$ID '(' $params ')' $block
{
flat_statements($block);
$block->{parameters} = [];
$block->{function_name} = $ID;
$block->type("FUNCTION");
return $block;
}
;
params:
( basictype ID arraySpec)<%name PARAMS * ','>
{ $_[1] }
;
block:
'{'.bracket
declaration<%name DECLARATIONS *>.decs statement<%name STATEMENTS *>.sts '}'
{
flat_statements($sts);
$sts->type("BLOCK") if $decs->children;
return $sts;
}
;
declaration:
%name DECLARATION
$basictype $declList ';'
;
declList:
(ID arraySpec) <%name VARLIST + ','> { $_[1] }
;
arraySpec:
( '[' INUM ']')* { $_[1]->type("ARRAYSPEC"); $_[1] }
;
statement:
expression ';' { $_[1] }
| ';'
| %name BREAK
$BREAK ';'
| %name CONTINUE
$CONTINUE ';'
| %name EMPTYRETURN
RETURN ';'
| %name RETURN
RETURN expression ';'
| block { $_[1] }
| %name IF
ifPrefix statement %prec '+'
| %name IFELSE
ifPrefix statement 'ELSE' statement
| %name WHILE
$loopPrefix statement
;
ifPrefix:
IF '(' expression ')' { $_[3] }
;
loopPrefix:
$WHILE '(' expression ')' { $_[3] }
;
expression:
binary <+ ','>
{
return $_[1]->child(0) if ($_[1]->children() == 1);
return $_[1];
}
;
Variable:
%name VAR
ID
| %name VARARRAY
$ID ('[' binary ']') <%name INDEXSPEC +>
;
Primary:
%name INUM
INUM
| %name CHARCONSTANT
CHARCONSTANT
| $Variable
{
return $Variable
}
| '(' expression ')' { $_[2] }
| $function_call
{
return $function_call # bypass
}
;
function_call:
%name
FUNCTIONCALL
ID '(' binary <%name ARGLIST * ','> ')'
;
Unary:
'++' Variable
| '--' Variable
| Primary { $_[1] }
;
binary:
Unary { $_[1] }
| %name PLUS
binary '+' binary
| %name MINUS
binary '-' binary
| %name TIMES
binary '*' binary
| %name DIV
binary '/' binary
| %name MOD
binary '%' binary
| %name LT
binary '<' binary
| %name GT
binary '>' binary
| %name GE
binary '>=' binary
| %name LE
binary '<=' binary
| %name EQ
binary '==' binary
| %name NE
binary '!=' binary
| %name AND
binary '&' binary
| %name EXP
binary '**' binary
| %name OR
binary '|' binary
| %name ASSIGN
$Variable '=' binary
| %name PLUSASSIGN
$Variable '+=' binary
| %name MINUSASSIGN
$Variable '-=' binary
| %name TIMESASSIGN
$Variable '*=' binary
| %name DIVASSIGN
$Variable '/=' binary
| %name MODASSIGN
$Variable '%=' binary
;
/*
File: Simple/Syntax.eyp
Full Type checking
To build it, Do make or:
eyapp -m Simple::Syntax Syntax.eyp;
*/
%{
use strict;
use Carp;
use warnings;
use Data::Dumper;
use List::MoreUtils qw(firstval);
our $VERSION = "0.4";
my $debug = 1;
my %reserved = (
int => "INT",
char => "CHAR",
if => "IF",
else => "ELSE",
break => "BREAK",
continue => "CONTINUE",
return => "RETURN",
while => "WHILE"
);
my %lexeme = (
'=' => "ASSIGN",
'+' => "PLUS",
'-' => "MINUS",
'*' => "TIMES",
'/' => "DIV",
'%' => "MOD",
'|' => "OR",
'&' => "AND",
'{' => "LEFTKEY",
'}' => "RIGHTKEY",
',' => "COMMA",
';' => "SEMICOLON",
'(' => "LEFTPARENTHESIS",
')' => "RIGHTPARENTHESIS",
'[' => "LEFTBRACKET",
']' => "RIGHTBRACKET",
'==' => "EQ",
'+=' => "PLUSASSIGN",
'-=' => "MINUSASSIGN",
'*=' => "TIMESASSIGN",
'/=' => "DIVASSIGN",
'%=' => "MODASSIGN",
'!=' => "NE",
'<' => "LT",
'>' => "GT",
'<=' => "LE",
'>=' => "GE",
'++' => "INC",
'--' => "DEC",
'**' => "EXP"
);
my ($tokenbegin, $tokenend) = (1, 1);
sub flat_statements {
my $block = shift;
my $i = 0;
for ($block->children) {
if ($_->type eq "STATEMENTS") {
splice @{$block->{children}}, $i, 1, $_->children;
}
$i++;
}
}
%}
%syntactic token RETURN BREAK CONTINUE
%right '=' '+=' '-=' '*=' '/=' '%='
%left '|'
%left '&'
%left '==' '!='
%left '<' '>' '>=' '<='
%left '+' '-'
%left '*' '/' '%'
%right '**'
%right '++' '--'
%right 'ELSE'
%tree
%%
sub _Error {
my($token)=$_[0]->YYCurval;
my($what)= $token ? "input: '$token->[0]' in line $token->[1]" : "end of input";
my @expected = $_[0]->YYExpect();
my $expected = @expected? "Expected one of these tokens: '@expected'":"";
croak "Syntax error near $what. $expected\n";
}
sub _Lexer {
my($parser)=shift;
my $token;
for ($parser->YYData->{INPUT}) {
return('',undef) if !defined($_) or $_ eq '';
#Skip blanks
s{\A
((?:
\s+ # any white space char
| /\*.*?\*/ # C like comments
)+
)
}
{}xs
and do {
my($blanks)=$1;
#Maybe At EOF
return('', undef) if $_ eq '';
$tokenend += $blanks =~ tr/\n//;
};
$tokenbegin = $tokenend;
s/^('.')//
and return('CHARCONSTANT', [$1, $tokenbegin]);
s/^([0-9]+(?:\.[0-9]+)?)//
and return('INUM',[$1, $tokenbegin]);
s/^([A-Za-z][A-Za-z0-9_]*)//
and do {
my $word = $1;
my $r;
return ($r, [$r, $tokenbegin]) if defined($r = $reserved{$word});
return('ID',[$word, $tokenbegin]);
};
m/^(\S\S)/ and defined($token = $1) and exists($lexeme{$token})
and do {
s/..//;
return ($token, [$token, $tokenbegin]);
}; # do
m/^(\S)/ and defined($token = $1) and exists($lexeme{$token})
and do {
s/.//;
return ($token, [$token, $tokenbegin]);
}; # do
die "Unexpected character at $tokenbegin\n";
} # for
}
sub compile {
my($self)=shift;
my ($t);
$self->YYData->{INPUT} = $_[0];
$t = $self->YYParse( yylex => \&_Lexer, yyerror => \&_Error,
#yydebug => 0x1F
);
return $t;
}
sub TERMINAL::value {
return $_[0]->{attr}[0];
}
########## line Support
sub TERMINAL::line {
return $_[0]->{attr}[1];
}
sub VAR::line {
my $self = shift;
return $self->child(0)->{attr}[1];
}
sub PLUS::line {
$_[0]->{lines}[0]
}
{
no warnings;
*TIMES::line = *DIV::line = *MINUS::line = *ASSIGN::line
=*GT::line
=*IF::line
=*RETURN::line
= \&PLUS::line;
*VARARRAY::line = *FUNCTIONCALL::line
=\&VAR::line;
}
############## Debugging and Display
sub show_trees {
my ($t) = shift;
my $debug = shift;
$Data::Dumper::Indent = 1;
print Dumper $t if $debug > 3;
local $Parse::Eyapp::Node::INDENT = $debug;
print $t->str."\n";
}
sub TERMINAL::info {
my $a = join ':', @{$_[0]->{attr}};
return $a
}
sub TERMINAL::save_attributes {
# $_[0] is a syntactic terminal
# $_[1] is the father.
push @{$_[1]->{lines}}, $_[0]->[1]; # save the line!
}
sub WHILE::line {
return $_[0]->{line}
}
pl@nereida:~/Lbook/code/Simple-Syntax/script$ usesyntax.pl bugmatch01.c 2
test (int n)
{
while (1) {
if (1>0) {
a = 2;
break;
}
else if (2> 0){
b = 3;
continue;
}
}
}
PROGRAM(
FUNCTION(
WHILE(
INUM(
TERMINAL[1:3]
),
STATEMENTS(
IFELSE(
GT(
INUM(
TERMINAL[1:4]
),
INUM(
TERMINAL[0:4]
)
) # GT,
STATEMENTS(
ASSIGN(
VAR(
TERMINAL[a:5]
),
INUM(
TERMINAL[2:5]
)
) # ASSIGN,
BREAK
) # STATEMENTS,
IF(
GT(
INUM(
TERMINAL[2:8]
),
INUM(
TERMINAL[0:8]
)
) # GT,
STATEMENTS(
ASSIGN(
VAR(
TERMINAL[b:9]
),
INUM(
TERMINAL[3:9]
)
) # ASSIGN,
CONTINUE
) # STATEMENTS
) # IF
) # IFELSE
) # STATEMENTS
) # WHILE
) # FUNCTION
) # PROGRAM
pl@nereida:~/Lbook/code/Simple-Syntax/script$ usesyntax.pl prueba26.c 2
int a[20],b,e[10];
g() {}
int f(char c) {
char d;
c = 'X';
e[d][b] = 'A'+c;
{
int d;
d = c * 2;
}
{
d = a + b;
{
c = a + 1;
}
}
c = d * 2;
return c;
}
PROGRAM(
FUNCTION,
FUNCTION(
ASSIGN(
VAR(
TERMINAL[c:7]
),
CHARCONSTANT(
TERMINAL['X':7]
)
) # ASSIGN,
ASSIGN(
VARARRAY(
TERMINAL[e:8],
INDEXSPEC(
VAR(
TERMINAL[d:8]
),
VAR(
TERMINAL[b:8]
)
) # INDEXSPEC
) # VARARRAY,
PLUS(
CHARCONSTANT(
TERMINAL['A':8]
),
VAR(
TERMINAL[c:8]
)
) # PLUS
) # ASSIGN,
BLOCK(
ASSIGN(
VAR(
TERMINAL[d:11]
),
TIMES(
VAR(
TERMINAL[c:11]
),
INUM(
TERMINAL[2:11]
)
) # TIMES
) # ASSIGN
) # BLOCK,
ASSIGN(
VAR(
TERMINAL[d:14]
),
PLUS(
VAR(
TERMINAL[a:14]
),
VAR(
TERMINAL[b:14]
)
) # PLUS
) # ASSIGN,
ASSIGN(
VAR(
TERMINAL[c:16]
),
PLUS(
VAR(
TERMINAL[a:16]
),
INUM(
TERMINAL[1:16]
)
) # PLUS
) # ASSIGN,
ASSIGN(
VAR(
TERMINAL[c:19]
),
TIMES(
VAR(
TERMINAL[d:19]
),
INUM(
TERMINAL[2:19]
)
) # TIMES
) # ASSIGN,
RETURN(
VAR(
TERMINAL[c:20]
)
) # RETURN
) # FUNCTION
) # PROGRAM

