[fun] CFG grammar parser - Programming @ 4-ch Archives

[fun] CFG grammar parser (8)

1 Name: #!/usr/bin/anonymous : 2008-04-14 11:22 ID:frY7udtC [Del]

http://en.wikipedia.org/wiki/Context-free_grammar
Post parsers in your language!

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>

/* CFG grammar */

typedef struct {
    int c;
    const char *str;
} cfg_t;

int eval(const char *buf, const cfg_t *, char *res, size_t sz);

int main(void)
{

    char buf[BUFSIZ];
    char res[BUFSIZ];
    char *p, *s, *tmp;
    int ret;
    size_t n;
    const cfg_t cfg[26] = {
    {'A', "hello"},
    {'B', "world"},
    {'C', "A B"},
    {'D', "A D"}
    };

    while (fgets(buf, sizeof buf, stdin) != NULL) {

    if (*buf && buf[strlen(buf) - 1] == '\n')
        buf[strlen(buf) - 1] = 0;

    p = buf;
    s = res;
    n = 0;

    while ((ret = eval(p, cfg, s, BUFSIZ)) == 1) {
        printf("%lu ==> %s\n", (unsigned long) n, s);
        tmp = p;
        p = s;
        s = tmp;
        n++;
    }

    if (ret == -1)
        printf("'%s': expansion too big to fit in %d buf\n",
           p, BUFSIZ);
    else
        printf("result: '%s'\n", s);

    }

    if (ferror(stdin)) {
    perror("stdin");
    return EXIT_FAILURE;
    }

    return 0;
}

int eval(const char *buf, const cfg_t * cfg, char *res, size_t sz)
{

    size_t i;
    int done = 0;

    while (*buf != 0) {
    if (isupper((unsigned char) *buf)) {
        for (i = 0; i < 26; i++)
        if (*buf == cfg[i].c)
            break;
        if (i != 26) {
        done = 1;
        if (strlen(cfg[i].str) >= sz)
            return -1;
        strcpy(res, cfg[i].str);
        res += strlen(cfg[i].str);
        sz -= strlen(cfg[i].str);
        }
    } else {
        if (sz <= 1)
        return -1;
        *res = *buf;
        res++;
        sz--;
    }
    buf++;
    }
    *res = 0;

    return done;
}

example

$ ./cfg
A
0 ==> hello
result: 'hello'
A B
0 ==> hello world
result: 'hello world'
BB
0 ==> worldworld
result: 'worldworld'
C
0 ==> A B
1 ==> hello world
result: 'hello world'
^D
$

2 Name: #!/usr/bin/anonymous : 2008-04-14 21:36 ID:Heaven [Del]

http://dis.4chan.org/read/prog/1208126223/
Copy threads from 4chan!

3 Name: #!/usr/bin/anonymous : 2008-04-14 21:40 ID:joKM82JN [Del]

>>2
I'm the OP there too -_-

4 Name: dmpk2k!hinhT6kz2E : 2008-04-15 06:25 ID:Heaven [Del]

This was a quick hack to create an assembler for a toy bytecode interpreter, part of a demonstration I did. It started off as a recursive-descent parser, but ended up being complete overkill for something so simple.

#!/usr/bin/env python

import sys
import re

def wrong_number_of_operands( opcode_name ):
  exit( 'Wrong number of operands given for %s on line %d.' % ( opcode_name, line_num ) )

def bad_label( label ):
  exit( 'Attempt to call unknown label "%s" on line %d.' % ( label, line_num ) )

def label_too_high( label ):
  exit( 'Label "%s" on line %d has an address beyond 256 bytes.' % ( label, line_num ) )

def overflow( args ):
  for arg in args:
    if arg:
      opcode_name = sys._getframe(1).f_code.co_name
      wrong_number_of_operands( opcode_name )

def num( string ):
  try:
    return chr( int( string ) )
  except TypeError:
    opcode_name = sys._getframe(1).f_code.co_name
    wrong_number_of_operands( opcode_name )
  except ValueError:
    exit( string + ' is not valid operand.' )

def fixup_labels():
  for deferred_label in deferred_labels.keys():
    target_address = label( deferred_label, bad_label )

    for address in deferred_labels[ deferred_label ]:
      word = bytes[ address ]
      bytes[ address ] = word[ 0:2 ] + target_address + word[ 3 ]

def defer_label( label ):
  if label not in deferred_labels.keys():
    deferred_labels[ label ] = []

  current_word = len( bytes )
  deferred_labels[ label ].append( current_word )
  return chr( 255 )

def label( string, keyerror_handler ):
  try:
    return chr( labels[ string ] )
  except KeyError:
    return keyerror_handler( string )   
  except ValueError:
    label_too_high( string )

def je( operand1, operand2 ):
  return 'e' + num( operand1 ) + label( operand2, defer_label )

def jne( operand1, operand2 ):
  return 'n' + num( operand1 ) + label( operand2, defer_label )

def jg( operand1, operand2 ):
  return '>' + num( operand1 ) + label( operand2, defer_label )

def jl( operand1, operand2 ):
  return '<' + num( operand1 ) + label( operand2, defer_label )

def jmp( operand1, *args ):
  overflow( args );
  return 'j' + chr(0) + label( operand1, defer_label )

def call( operand1, *args ):
  overflow( args )
  return 'c' + chr(0) + label( operand1, defer_label )

def ret( *args ):
  overflow( args )
  return 'r'

def add( *args ):
  overflow( args )
  return '+'

def sub( *args ):
  overflow( args )
  return '-'

def mul( *args ):
  overflow( args )
  return '*'

def idiv( *args ):
  overflow( args )
  return '/' + num( operand1 )

5 Name: dmpk2k!hinhT6kz2E : 2008-04-15 06:27 ID:Heaven [Del]

def push( operand1, *args ):
  overflow( args )
  return 'u' + num( operand1 )

def pop( *args ):
  overflow( args )
  return 'o'

def dup( *args ):
  overflow( args )
  return 'd'

def swap( *args ):
  overflow( args )
  return 's'

def get( *args ):
  overflow( args )
  return 'g'

def put( *args ):
  overflow( args )
  return 'p'

def quit( *args ):
  overflow( args )
  return 'q'

def pad( bytes, size ):
  pad_size = size - len( bytes )
  return bytes + chr(0) * pad_size

def bytes_for( opcode, operand1, operand2 ):
  opcode_val = { 'je'   : je,
                 'jne'  : jne,
                 'jg'   : jg,
                 'jl'   : jl,
                 'jmp'  : jmp,
                 'call' : call,
                 'ret'  : ret,
                 'add'  : add,
                 'sub'  : sub,
                 'mul'  : mul,
                 'idiv' : idiv,
                 'push' : push,
                 'pop'  : pop,
                 'dup'  : dup,
                 'swap' : swap,
                 'put'  : put,
                 'get'  : get,
                 'quit' : quit }

  if opcode in opcode_val:
    bytes = opcode_val[ opcode ]( operand1, operand2 ) 
    return pad( bytes, WORD_SIZE )
  else:
    exit( '"%s" is not a valid opcode.' % opcode )

def compile( assembly ):
  global line_num, deferred_labels, labels, bytes
  line_num        = 0
  deferred_labels = {}
  labels          = {}
  bytes           = []

  regex = re.compile( r"""
                          (?:(\w+)\s*:)?      # label
                          \s*(\w+)            # opcode
                          (?:\s+(\w+))?       # operand1
                          (?:\s*,\s*(\w+))?   # operand2
                       """, re.VERBOSE )

  for line in assembly.split( "\n" ):
    instruction = regex.match( line )
    line_num += 1

    if instruction:
      label, opcode, operand1, operand2 = instruction.groups()
 
      if label:
        labels[ label ] = len( bytes )
      bytes.append( bytes_for( opcode, operand1, operand2 ) )

  fixup_labels()
  return ''.join( bytes )

def read_data_in( file_name ):
  file = open( file_name , "r" )
  data = file.read()
  file.close()
  return data

def write_out( bytecode, file_name ):
  file = open( file_name , "wb" )
  file.write( bytecode )
  file.close()

WORD_SIZE = 4
assembly = read_data_in( sys.argv[1] )
bytecode = compile( assembly )
write_out( bytecode, "a.bytecode" )

6 Name: #!/usr/bin/anonymous : 2008-04-15 06:40 ID:Heaven [Del]

I'm waiting for WAHa to post Wakaba's parser here...

7 Name: !WAHa.06x36 : 2008-04-15 14:47 ID:Heaven [Del]

>>6

It's not a parser, it's a generator that runs the CFG in reverse!

8 Name: #!/usr/bin/anonymous : 2008-04-23 10:41 ID:Heaven [Del]

Now we need a CFG that generates CFGs that generates ???.

	Return Entire thread Last 50 posts
	This thread has been closed. You cannot post in this thread any longer.