domingo, 9 de octubre de 2011

miniSL parte 16 - El código hasta ahora

Como prometí en la última entrada de esta serie, aquí está el código que tenemos desarrollado hasta ahora. Ya llevamos 425 líneas de código. Más o menos la mitad de toda la implementación. Faltan por desarrollar el reconocimiento sintáctico, las funciones nativas y el REPL. Empezaremos en la siguiente entrada con el reconocimiento sintáctico.

//Standard includes
#include <string>
#include <map>
#include <deque>
#include <fstream>
#include <sstream>
#include <iostream>
#include <vector>

//Forward declarations
class Script;
struct CELL;

//
// Type declarations
//

typedef std::wstring STRING;
typedef std::wistream ISTREAM;
typedef std::wostream OSTREAM;
typedef std::map<STRING, CELL*> STRING_MAP;
typedef std::map<CELL*, CELL*> ENVIR_TABLE;
typedef std::deque<CELL> CELL_STORAGE;
typedef CELL& (*NATIVE) (Script& script, CELL& args, CELL& envir);


//
// CELL STRUCTURE
//

enum CELL_TYPE
{
 UNUSED, 

 //Literals
 EMPTY_LIT, INT_LIT, STRING_LIT,

 //Value constructors
 LAMBDA_VAL, NATIVE_VAL, ENVIR_VAL, BOOL_VAL, 

 //Code constructors
 NAME_CODE, COMBINE_CODE,

 //Value and code constructors
 CONS_CTOR
};

struct CELL
{
 CELL_TYPE type;
 bool mark;      //Garbage collection mark
 union
 {
  CELL* next_unused;   //UNUSED
  int int_val;    //INT_LIT
  bool bool_val;    //BOOL_VAL
  STRING const* string_val; //STRING_LIT
  CELL* head;     //CONS.
  CELL* code;     //LAMBDA_VAL. The cell must be a CONS_CTOR with parameters and body
  NATIVE native;    //NATIVE_VAL
  ENVIR_TABLE* envir_table; //ENVIR_VAL
  CELL* op;     //COMBINE_CODE. The cell must be code (Any *_CODE type or CONS or EMPTY)
 };

 union
 {
  CELL* tail;   //CONS. Must be CONS or EMPTY.
  CELL* closure;  //LAMBDA_VAL. The cell must be an environment (ENVIR_VAL)
  CELL* parent_envir; //ENVIR_VAL. The pointer may be NULL or an environment cell (ENVIR_VAL)
  CELL* operands;  //COMBINE_CODE. The cell must be a list of code.
 };

 void Print(OSTREAM& o)const;
 void PrintList(OSTREAM& o, STRING const& separator)const;
 static void PrintEscapedString(OSTREAM& o, STRING const& s);

 int GetInteger()const   { if(type==INT_LIT) return int_val; else throw L"Integer expected"; }
 bool GetBoolean()const   { if(type==BOOL_VAL) return bool_val; else throw L"Boolean expected"; }
 STRING const& GetString()const { if(type==STRING_LIT) return *string_val; else throw L"String expected"; }
};

//
// SCRIPT CLASS
//

class Script
{
public:
 Script() : m_FirstUnused(NULL) { m_GlobalEnvir=&CreateEnvir(NULL); }
 ~Script() { GCSweep(); GCSweep(); } //Sweep twice to delete everything

 CELL& CreateCell(CELL_TYPE ct, CELL* first=NULL, CELL* second=NULL);
 CELL& CreateInteger(int val){ CELL& c=CreateCell(INT_LIT); c.int_val=val; return c; }
 CELL& CreateBool(bool val) { CELL& c=CreateCell(BOOL_VAL); c.bool_val=val; return c; }
 CELL& CreateString(STRING const& val);
 CELL& CreateName(STRING const& val);
 CELL& CreateEnvir(CELL* parent);
 CELL& CreateNative(NATIVE native);

 ENVIR_TABLE& GlobalEnvirTable()const { return *m_GlobalEnvir->envir_table; }
 void DefineUsualSymbols();

 void DefineGlobalNative(STRING const& name, NATIVE nat)
 {
  (*m_GlobalEnvir->envir_table)[&CreateName(name)]=&CreateNative(nat);
 }

 CELL& Evaluate(CELL& c, CELL& envir);
 CELL& EvaluateInSequence(CELL& listc, CELL& envir);
 CELL& Evaluate(CELL& c)     { return Evaluate(c, *m_GlobalEnvir); }

 int GarbageCollect();
 static void GCMark(CELL* c);

 static CELL* FindName(CELL& name, CELL& envir);
 static void ChangeName(CELL& name, CELL& envir, CELL& value);

 void* m_User;
private:
 enum TOKEN_TYPE { T_NONE, T_RAW, T_LITERAL, T_SYMBOL, T_NAME };

 struct TOKEN
 {
  TOKEN() : type(T_NONE) {}
  TOKEN(TOKEN_TYPE t, CELL* d) : type(t), data(d) {}
  TOKEN(ISTREAM::int_type r) : type(T_RAW), raw(r) {}

  TOKEN_TYPE type;
  union
  {
   CELL* data;
   ISTREAM::int_type raw;
  };
 };

private:
 int GCSweep();

 CELL& ApplyLambda(CELL& code, CELL& args, CELL& closure, CELL& envir);
private:
 CELL* m_FirstUnused;
 CELL* m_GlobalEnvir;
 CELL_STORAGE m_Cells;
 STRING_MAP m_InternedStrings;
 TOKEN m_Ahead;
};

//
// (CELL) PRINTING
//

void CELL::Print(OSTREAM& o)const
{
 switch(type)
 {
 default:  o << L"{{**UNKNOWN**}}"; break;
 case UNUSED: o << L"{{**UNUSED**}}"; break;
 case INT_LIT: o << std::dec << int_val; break;
 case BOOL_VAL: o << (bool_val ? L"true" : L"false"); break;
 case LAMBDA_VAL:o << L"{{ "; code->Print(o); o << L" }}"; break;
 case NATIVE_VAL:o << L"{{NATIVE}}"; break;
 case STRING_LIT:PrintEscapedString(o, *string_val); break;
 case NAME_CODE: o << L"@"; PrintEscapedString(o, *string_val); break;

 case CONS_CTOR: case EMPTY_LIT:
  o << L"[";
  PrintList(o, L", ");
  o << L"]";
  break;

 case ENVIR_VAL: o << L"{{ENVIR ";
  for(ENVIR_TABLE::const_iterator i=envir_table->begin(); i!=envir_table->end(); ++i)
  {
   o << *i->first->string_val << L"= ";
   i->second->Print(o);
  }
  o << L"}}";
  break;

 case COMBINE_CODE:
  op->Print(o);
  o << L"(";
  operands->PrintList(o, L", ");
  o << L")";
  break;
 }
}

void CELL::PrintList(OSTREAM& o, STRING const& separator)const
{
 for(CELL const* c=this; c->type==CONS_CTOR; c=c->tail)
 {
  if(c!=this)
   o << separator;
  c->head->Print(o);
 }
}

void CELL::PrintEscapedString(OSTREAM& o, STRING const& s)
{
 o << L"\"";
 for(STRING::const_iterator i=s.begin(); i!=s.end(); ++i)
 {
  if(*i<32) o << L"\\x" << std::hex << (unsigned int)(unsigned)*i << L";";
  else  o << *i;
 }
 o << L"\"";
}


//
// ALLOCATION AND GARBAGE COLLECTION
//

CELL& Script::CreateCell(CELL_TYPE ct, CELL* first, CELL* second)
{
 CELL* p;
 if(m_FirstUnused==NULL)
 {
  CELL c;
  m_Cells.push_back(c);
  p=&m_Cells.back();
 }
 else
 {
  p=m_FirstUnused;
  m_FirstUnused=p->next_unused;
 }
 p->mark=false;
 p->type=ct;
 p->head=first;
 p->tail=second;
 return *p;
}

CELL& Script::CreateString(STRING const& val)
{
 CELL& c=CreateCell(STRING_LIT);
 c.string_val=new STRING(val);
 return c;
}

CELL& Script::CreateName(STRING const& val)
{
 STRING_MAP::const_iterator i=m_InternedStrings.find(val);
 if(i==m_InternedStrings.end())
 {
  CELL& c=CreateCell(NAME_CODE);
  i=m_InternedStrings.insert(std::make_pair(val, &c)).first;
  c.string_val=&i->first;
 }
 return *i->second;
}

CELL& Script::CreateEnvir(CELL* parent)
{
 CELL& c=CreateCell(ENVIR_VAL);
 c.envir_table=new ENVIR_TABLE;
 c.parent_envir=parent;
 return c;
}

CELL& Script::CreateNative(NATIVE native)
{
 CELL& c=CreateCell(NATIVE_VAL);
 c.native=native;
 return c;
}

int Script::GarbageCollect()
{
 GCMark(m_GlobalEnvir);
 return GCSweep();
}

void Script::GCMark(CELL* c)
{
 if(c==NULL || c->mark)
  return;

 c->mark=true;

 switch(c->type)
 {
 case UNUSED:  throw L"Marking unused cell";
 case CONS_CTOR:  GCMark(c->head); GCMark(c->tail);  break;
 case LAMBDA_VAL: GCMark(c->code); GCMark(c->closure);  break;
 case COMBINE_CODE: GCMark(c->op);  GCMark(c->operands); break;

 case ENVIR_VAL:
  GCMark(c->parent_envir);
  for(ENVIR_TABLE::const_iterator i=c->envir_table->begin(); i!=c->envir_table->end(); ++i)
  {
   GCMark(i->first);
   GCMark(i->second);
  }
  break;
 }
}

int Script::GCSweep()
{
 int count=0;
 for(CELL_STORAGE::iterator i=m_Cells.begin(); i!=m_Cells.end(); ++i)
 {
  if(i->mark)
  {
   i->mark=false;
   continue;
  }

  if(i->type==UNUSED)
   continue;

  ++count;

  switch(i->type)
  {
  case STRING_LIT: delete i->string_val;      break;
  case ENVIR_VAL:  delete i->envir_table;      break;
  case NAME_CODE:  m_InternedStrings.erase(*i->string_val); break;
  }

  i->type=UNUSED;
  i->next_unused=m_FirstUnused;
  m_FirstUnused=&*i;
 }
 return count;
}

//
// EVALUATION
//

CELL& Script::Evaluate(CELL& c, CELL& envir)
{
 CELL* aux;
 switch(c.type)
 {
  //Non evaluable
 case UNUSED: throw L"Evaluating an unused cell";
 default:  throw L"Evaluating an unknown cell";
 case LAMBDA_VAL:throw L"Evaluating a lambda value";
 case NATIVE_VAL:throw L"Evaluating a native";
 case ENVIR_VAL: throw L"Evaluating an environment";

  //Literals
 case INT_LIT: case BOOL_VAL: case STRING_LIT: case EMPTY_LIT:
  return c;

  //Code constructors
 case CONS_CTOR:  return CreateCell(CONS_CTOR, &Evaluate(*c.head, envir), &Evaluate(*c.tail, envir));

 case NAME_CODE: 
  if((aux=FindName(c, envir))==NULL)
   throw L"Unknown name";
  return *aux;

 case COMBINE_CODE:
  switch((aux=&Evaluate(*c.op, envir))->type)
  {
  case LAMBDA_VAL: return ApplyLambda(*aux->code, *c.operands, *aux->closure, envir);
  case NATIVE_VAL: return aux->native(*this, *c.operands, envir);
  default:   throw L"Non-combinable value";
  }
 }
}

CELL& Script::EvaluateInSequence(CELL& c, CELL& envir)
{
 CELL* aux=&CreateCell(EMPTY_LIT);
 for(CELL* p=&c; p->type==CONS_CTOR; p=p->tail)
  aux=&Evaluate(*p->head, envir);
 return *aux;
}

CELL* Script::FindName(CELL& name, CELL& envir)
{
 //Try to find the name in current environment
 ENVIR_TABLE::const_iterator i=envir.envir_table->find(&name);
 if(i!=envir.envir_table->end())
  return i->second;

 //Failed, try parent
 if(envir.parent_envir!=NULL)
  return FindName(name, *envir.parent_envir);

 //Failed also, not found
 return NULL;
}

void Script::ChangeName(CELL& name, CELL& envir, CELL& value)
{
 //Try to find the name in current environment
 ENVIR_TABLE::iterator i=envir.envir_table->find(&name);
 if(i!=envir.envir_table->end())
 {
  (*envir.envir_table)[&name]=&value;
  return;
 }

 //Failed, try parent
 if(envir.parent_envir!=NULL)
  return ChangeName(name, *envir.parent_envir, value);

 //Failed also, not found
 throw L"Name to be changed is undefined";
}

CELL& Script::ApplyLambda(CELL& code, CELL& args, CELL& closure, CELL& envir)
{
 CELL& new_envir=CreateEnvir(&closure);
 CELL* params=code.head;
 CELL* arguments=&args;
 while(params->type==CONS_CTOR && arguments->type==CONS_CTOR)
 {
  (*new_envir.envir_table)[params->head]=&Evaluate(*arguments->head, envir);
  params=params->tail, arguments=arguments->tail;
 }

 if((params->type!=CONS_CTOR) != (arguments->type!=CONS_CTOR))
  throw L"Invalid arity";

 return EvaluateInSequence(*code.tail->head, new_envir);
}

0 comentarios:

Publicar un comentario en la entrada