next up previous contents index PLPL moodlepserratamodulosperlmonksperldocapuntes LHPgoogleetsiiullpcgull
Sig: Práctica: Construcción del AST Sup: Análisis de Ámbito Ant: Análisis de Ámbito: Conceptos Err: Si hallas una errata ...


Descripción Eyapp del Lenguaje SimpleC

El proceso de identificar los nombres conlleva establecer enlaces entre las ocurrencias y sus declaraciones o bien - en caso de error - determinar que dicho enlace no existe. El resultado de este proceso de identificación (o análisis de ámbito y visibilidad) será utilizado durante las fases posteriores.

En este capítulo usaremos Parse::Eyapp para desarrollar las primeras fases - análisis léxico, análisis sintáctico y análisis de ámbito - de un compilador para un subconjunto de C al que denominaremos Simple C :

El Cuerpo

%%
program:
    definition<%name PROGRAM +>.program
      { $program }
;

definition:
    $funcDef 
      { $funcDef }
  | %name FUNCTION
    $basictype $funcDef
      { $funcDef }
  | declaration 
      {}
;

basictype:
    %name INT 
    INT
  | %name CHAR 
    CHAR
;

funcDef:
    $ID '('  $params  ')' $block
      {
         flat_statements($block);
         $block->{parameters} = [];
         $block->{function_name} = $ID;
         $block->type("FUNCTION");
         return $block;
      }
;

params: 
    ( basictype ID arraySpec)<%name PARAMS * ','>
      { $_[1] }
;

block:
    '{'.bracket 
     declaration<%name DECLARATIONS *>.decs statement<%name STATEMENTS *>.sts '}'
       {
         flat_statements($sts);
         $sts->type("BLOCK") if $decs->children;
         return $sts; 
       }
;

declaration:
    %name DECLARATION
    $basictype $declList ';' 
;

declList:
    (ID arraySpec) <%name VARLIST + ','> { $_[1] } 
;

arraySpec:
    ( '[' INUM ']')* { $_[1]->type("ARRAYSPEC"); $_[1] }
;

statement:
    expression ';' { $_[1] }
  | ';'
  | %name BREAK
    $BREAK ';'
  | %name CONTINUE
     $CONTINUE ';'
  | %name EMPTYRETURN
    RETURN ';'
  | %name RETURN
     RETURN expression ';'
  | block { $_[1] }
  | %name IF
    ifPrefix statement %prec '+'
  | %name IFELSE
    ifPrefix statement 'ELSE' statement
  | %name WHILE
    $loopPrefix statement
;

ifPrefix:
    IF '(' expression ')' { $_[3] }
;

loopPrefix:
    $WHILE '(' expression ')' { $_[3] }
;

expression:
    binary <+ ','> 
      { 
        return $_[1]->child(0) if ($_[1]->children() == 1); 
        return $_[1];
      }
;

Variable:
    %name VAR
    ID 
  | %name  VARARRAY
    $ID ('[' binary ']') <%name INDEXSPEC +> 
;

Primary:
    %name INUM
    INUM 
  | %name CHARCONSTANT
    CHARCONSTANT
  | $Variable 
      { 
        return $Variable 
      }
  | '(' expression ')' { $_[2] }
  | $function_call 
      { 
        return $function_call  # bypass
      }
;
    
function_call:  
  %name 
    FUNCTIONCALL
    ID  '(' binary <%name ARGLIST * ','> ')'
;

Unary:
    '++' Variable
  | '--' Variable
  | Primary { $_[1] }
;

binary:
    Unary { $_[1] }
  | %name PLUS
    binary '+' binary
  | %name MINUS
    binary '-' binary
  | %name TIMES
    binary '*' binary
  | %name DIV
    binary '/' binary
  | %name MOD
    binary '%' binary
  | %name LT
    binary '<' binary
  | %name GT
    binary '>' binary
  | %name GE
    binary '>=' binary
  | %name LE
    binary '<=' binary
  | %name EQ
    binary '==' binary
  | %name NE
    binary '!=' binary
  | %name AND
    binary '&' binary
  | %name EXP
    binary '**' binary
  | %name OR
    binary '|' binary
  | %name ASSIGN
    $Variable '=' binary
  | %name PLUSASSIGN
    $Variable '+=' binary
  | %name MINUSASSIGN
    $Variable '-=' binary
  | %name TIMESASSIGN
    $Variable '*=' binary
  | %name DIVASSIGN
    $Variable '/=' binary
  | %name MODASSIGN
    $Variable '%=' binary
;

La Cabeza

/* 
File: Simple/Syntax.eyp
Full Type checking
To build it, Do make or:
  eyapp -m Simple::Syntax Syntax.eyp; 
*/
%{
use strict;
use Carp;
use warnings;
use Data::Dumper;
use List::MoreUtils qw(firstval);
our $VERSION = "0.4";

my $debug = 1;
my %reserved = (
  int => "INT",
  char => "CHAR",
  if => "IF",
  else => "ELSE",
  break => "BREAK",
  continue => "CONTINUE",
  return => "RETURN",
  while => "WHILE"
);

my %lexeme = (
  '='  => "ASSIGN",
  '+'  => "PLUS",
  '-'  => "MINUS",
  '*'  => "TIMES",
  '/'  => "DIV",
  '%'  => "MOD",
  '|'  => "OR",
  '&'  => "AND",
  '{'  => "LEFTKEY",
  '}'  => "RIGHTKEY",
  ','  => "COMMA",
  ';'  => "SEMICOLON",
  '('  => "LEFTPARENTHESIS",
  ')'  => "RIGHTPARENTHESIS",
  '['  => "LEFTBRACKET",
  ']'  => "RIGHTBRACKET",
  '==' => "EQ",
  '+=' => "PLUSASSIGN",
  '-=' => "MINUSASSIGN",
  '*=' => "TIMESASSIGN",
  '/=' => "DIVASSIGN",
  '%=' => "MODASSIGN",
  '!=' => "NE",
  '<'  => "LT",
  '>'  => "GT",
  '<=' => "LE",
  '>=' => "GE",
  '++' => "INC",
  '--' => "DEC",
  '**' => "EXP"
);

my ($tokenbegin, $tokenend) = (1, 1);

sub flat_statements {
  my $block = shift;

  my $i = 0;
  for ($block->children) {
    if ($_->type eq "STATEMENTS") {
      splice @{$block->{children}}, $i, 1, $_->children;
    }
    $i++;
  }
}
%}

%syntactic token RETURN BREAK CONTINUE

%right '=' '+=' '-=' '*=' '/=' '%='
%left '|'
%left '&'
%left '==' '!='
%left '<' '>' '>=' '<='
%left '+' '-'
%left '*' '/' '%'
%right '**'
%right '++' '--'
%right 'ELSE'

%tree

La Cola

%%

sub _Error {
  my($token)=$_[0]->YYCurval;
  my($what)= $token ? "input: '$token->[0]' in line $token->[1]" : "end of input";
  my @expected = $_[0]->YYExpect();
  my $expected = @expected? "Expected one of these tokens: '@expected'":"";

  croak "Syntax error near $what. $expected\n";
}

sub _Lexer {
  my($parser)=shift;

  my $token;
  for ($parser->YYData->{INPUT}) {
      return('',undef) if !defined($_) or $_ eq '';

      #Skip blanks
      s{\A
         ((?:
              \s+       # any white space char
          |   /\*.*?\*/ # C like comments
          )+
         )
       }
       {}xs
      and do {
            my($blanks)=$1;

            #Maybe At EOF
            return('', undef) if $_ eq '';
            $tokenend += $blanks =~ tr/\n//;
        };

     $tokenbegin = $tokenend;

      s/^('.')//
              and return('CHARCONSTANT', [$1, $tokenbegin]);

      s/^([0-9]+(?:\.[0-9]+)?)//
              and return('INUM',[$1, $tokenbegin]);

      s/^([A-Za-z][A-Za-z0-9_]*)//
        and do {
          my $word = $1;
          my $r;
          return ($r, [$r, $tokenbegin]) if defined($r = $reserved{$word});
          return('ID',[$word, $tokenbegin]);
      };

      m/^(\S\S)/ and  defined($token = $1) and exists($lexeme{$token})
        and do {
          s/..//;
          return ($token, [$token, $tokenbegin]);
        }; # do

      m/^(\S)/ and defined($token = $1) and  exists($lexeme{$token})
        and do {
          s/.//;
          return ($token, [$token, $tokenbegin]);
        }; # do
      
      die "Unexpected character at $tokenbegin\n";
  } # for
}

sub compile {
 my($self)=shift;

 my ($t);
   
 $self->YYData->{INPUT} = $_[0];

 $t = $self->YYParse( yylex => \&_Lexer, yyerror => \&_Error, 
                      #yydebug => 0x1F 
     );

 return $t;
}

sub TERMINAL::value {
  return $_[0]->{attr}[0];
}

########## line Support 

sub TERMINAL::line {
  return $_[0]->{attr}[1];
}

sub VAR::line {
  my $self = shift;

  return $self->child(0)->{attr}[1];
}

sub PLUS::line {
  $_[0]->{lines}[0]
}

{ 
no warnings;
*TIMES::line = *DIV::line = *MINUS::line = *ASSIGN::line 
=*GT::line
=*IF::line
=*RETURN::line
= \&PLUS::line;

*VARARRAY::line = *FUNCTIONCALL::line 
=\&VAR::line;
}

############## Debugging and Display
sub show_trees {
 my ($t) = shift;
 my $debug = shift;

 $Data::Dumper::Indent = 1;
 print Dumper $t if $debug > 3;
 local $Parse::Eyapp::Node::INDENT = $debug;
 print $t->str."\n";
}

sub TERMINAL::info { 
  my $a = join ':', @{$_[0]->{attr}}; 
  return $a
}

sub TERMINAL::save_attributes {
  # $_[0] is a syntactic terminal
  # $_[1] is the father.
  push @{$_[1]->{lines}}, $_[0]->[1]; # save the line!
}

sub WHILE::line {
  return $_[0]->{line}
}

Ejemplo de Árbol Construido

pl@nereida:~/Lbook/code/Simple-Syntax/script$ usesyntax.pl bugmatch01.c 2
test (int n)
{
  while (1) {
    if (1>0) {
      a = 2;
      break;
    }
    else if (2> 0){
      b = 3;
      continue;
    }
  }
}

PROGRAM(
  FUNCTION(
    WHILE(
      INUM(
        TERMINAL[1:3]
      ),
      STATEMENTS(
        IFELSE(
          GT(
            INUM(
              TERMINAL[1:4]
            ),
            INUM(
              TERMINAL[0:4]
            )
          ) # GT,
          STATEMENTS(
            ASSIGN(
              VAR(
                TERMINAL[a:5]
              ),
              INUM(
                TERMINAL[2:5]
              )
            ) # ASSIGN,
            BREAK
          ) # STATEMENTS,
          IF(
            GT(
              INUM(
                TERMINAL[2:8]
              ),
              INUM(
                TERMINAL[0:8]
              )
            ) # GT,
            STATEMENTS(
              ASSIGN(
                VAR(
                  TERMINAL[b:9]
                ),
                INUM(
                  TERMINAL[3:9]
                )
              ) # ASSIGN,
              CONTINUE
            ) # STATEMENTS
          ) # IF
        ) # IFELSE
      ) # STATEMENTS
    ) # WHILE
  ) # FUNCTION
) # PROGRAM

Ejemplo de Árbol con Aplanamiento de STATEMENTS

pl@nereida:~/Lbook/code/Simple-Syntax/script$ usesyntax.pl prueba26.c 2
int a[20],b,e[10];

g() {}

int f(char c) {
char d;
 c = 'X';
 e[d][b] = 'A'+c;
 {
   int d;
   d = c * 2;
 }
 {
   d = a + b;
   {
     c = a + 1;
   }
 }
 c = d * 2;
 return c;
}


PROGRAM(
  FUNCTION,
  FUNCTION(
    ASSIGN(
      VAR(
        TERMINAL[c:7]
      ),
      CHARCONSTANT(
        TERMINAL['X':7]
      )
    ) # ASSIGN,
    ASSIGN(
      VARARRAY(
        TERMINAL[e:8],
        INDEXSPEC(
          VAR(
            TERMINAL[d:8]
          ),
          VAR(
            TERMINAL[b:8]
          )
        ) # INDEXSPEC
      ) # VARARRAY,
      PLUS(
        CHARCONSTANT(
          TERMINAL['A':8]
        ),
        VAR(
          TERMINAL[c:8]
        )
      ) # PLUS
    ) # ASSIGN,
    BLOCK(
      ASSIGN(
        VAR(
          TERMINAL[d:11]
        ),
        TIMES(
          VAR(
            TERMINAL[c:11]
          ),
          INUM(
            TERMINAL[2:11]
          )
        ) # TIMES
      ) # ASSIGN
    ) # BLOCK,
    ASSIGN(
      VAR(
        TERMINAL[d:14]
      ),
      PLUS(
        VAR(
          TERMINAL[a:14]
        ),
        VAR(
          TERMINAL[b:14]
        )
      ) # PLUS
    ) # ASSIGN,
    ASSIGN(
      VAR(
        TERMINAL[c:16]
      ),
      PLUS(
        VAR(
          TERMINAL[a:16]
        ),
        INUM(
          TERMINAL[1:16]
        )
      ) # PLUS
    ) # ASSIGN,
    ASSIGN(
      VAR(
        TERMINAL[c:19]
      ),
      TIMES(
        VAR(
          TERMINAL[d:19]
        ),
        INUM(
          TERMINAL[2:19]
        )
      ) # TIMES
    ) # ASSIGN,
    RETURN(
      VAR(
        TERMINAL[c:20]
      )
    ) # RETURN
  ) # FUNCTION
) # PROGRAM



Subsecciones
next up previous contents index PLPL moodlepserratamodulosperlmonksperldocapuntes LHPgoogleetsiiullpcgull
Sig: Práctica: Construcción del AST Sup: Análisis de Ámbito Ant: Análisis de Ámbito: Conceptos Err: Si hallas una errata ...
Casiano Rodríguez León
2012-05-22