//Standard includes
#include <string>
#include <map>
#include <deque>
#include <fstream>
#include <sstream>
#include <iostream>
#include <vector>
//Forward declarations
class Script;
struct CELL;
//
// Type declarations
//
typedef std::wstring STRING;
typedef std::wistream ISTREAM;
typedef std::wostream OSTREAM;
typedef std::map<STRING, CELL*> STRING_MAP;
typedef std::map<CELL*, CELL*> ENVIR_TABLE;
typedef std::deque<CELL> CELL_STORAGE;
typedef CELL& (*NATIVE) (Script& script, CELL& args, CELL& envir);
//
// CELL STRUCTURE
//
enum CELL_TYPE
{
UNUSED,
//Literals
EMPTY_LIT, INT_LIT, STRING_LIT,
//Value constructors
LAMBDA_VAL, NATIVE_VAL, ENVIR_VAL, BOOL_VAL,
//Code constructors
NAME_CODE, COMBINE_CODE,
//Value and code constructors
CONS_CTOR
};
struct CELL
{
CELL_TYPE type;
bool mark; //Garbage collection mark
union
{
CELL* next_unused; //UNUSED
int int_val; //INT_LIT
bool bool_val; //BOOL_VAL
STRING const* string_val; //STRING_LIT
CELL* head; //CONS.
CELL* code; //LAMBDA_VAL. The cell must be a CONS_CTOR with parameters and body
NATIVE native; //NATIVE_VAL
ENVIR_TABLE* envir_table; //ENVIR_VAL
CELL* op; //COMBINE_CODE. The cell must be code (Any *_CODE type or CONS or EMPTY)
};
union
{
CELL* tail; //CONS. Must be CONS or EMPTY.
CELL* closure; //LAMBDA_VAL. The cell must be an environment (ENVIR_VAL)
CELL* parent_envir; //ENVIR_VAL. The pointer may be NULL or an environment cell (ENVIR_VAL)
CELL* operands; //COMBINE_CODE. The cell must be a list of code.
};
void Print(OSTREAM& o)const;
void PrintList(OSTREAM& o, STRING const& separator)const;
static void PrintEscapedString(OSTREAM& o, STRING const& s);
int GetInteger()const { if(type==INT_LIT) return int_val; else throw L"Integer expected"; }
bool GetBoolean()const { if(type==BOOL_VAL) return bool_val; else throw L"Boolean expected"; }
STRING const& GetString()const { if(type==STRING_LIT) return *string_val; else throw L"String expected"; }
};
//
// SCRIPT CLASS
//
class Script
{
public:
Script() : m_FirstUnused(NULL) { m_GlobalEnvir=&CreateEnvir(NULL); }
~Script() { GCSweep(); GCSweep(); } //Sweep twice to delete everything
CELL& CreateCell(CELL_TYPE ct, CELL* first=NULL, CELL* second=NULL);
CELL& CreateInteger(int val){ CELL& c=CreateCell(INT_LIT); c.int_val=val; return c; }
CELL& CreateBool(bool val) { CELL& c=CreateCell(BOOL_VAL); c.bool_val=val; return c; }
CELL& CreateString(STRING const& val);
CELL& CreateName(STRING const& val);
CELL& CreateEnvir(CELL* parent);
CELL& CreateNative(NATIVE native);
ENVIR_TABLE& GlobalEnvirTable()const { return *m_GlobalEnvir->envir_table; }
void DefineUsualSymbols();
void DefineGlobalNative(STRING const& name, NATIVE nat)
{
(*m_GlobalEnvir->envir_table)[&CreateName(name)]=&CreateNative(nat);
}
CELL& Evaluate(CELL& c, CELL& envir);
CELL& EvaluateInSequence(CELL& listc, CELL& envir);
CELL& Evaluate(CELL& c) { return Evaluate(c, *m_GlobalEnvir); }
int GarbageCollect();
static void GCMark(CELL* c);
static CELL* FindName(CELL& name, CELL& envir);
static void ChangeName(CELL& name, CELL& envir, CELL& value);
void* m_User;
private:
enum TOKEN_TYPE { T_NONE, T_RAW, T_LITERAL, T_SYMBOL, T_NAME };
struct TOKEN
{
TOKEN() : type(T_NONE) {}
TOKEN(TOKEN_TYPE t, CELL* d) : type(t), data(d) {}
TOKEN(ISTREAM::int_type r) : type(T_RAW), raw(r) {}
TOKEN_TYPE type;
union
{
CELL* data;
ISTREAM::int_type raw;
};
};
private:
int GCSweep();
CELL& ApplyLambda(CELL& code, CELL& args, CELL& closure, CELL& envir);
private:
CELL* m_FirstUnused;
CELL* m_GlobalEnvir;
CELL_STORAGE m_Cells;
STRING_MAP m_InternedStrings;
TOKEN m_Ahead;
};
//
// (CELL) PRINTING
//
void CELL::Print(OSTREAM& o)const
{
switch(type)
{
default: o << L"{{**UNKNOWN**}}"; break;
case UNUSED: o << L"{{**UNUSED**}}"; break;
case INT_LIT: o << std::dec << int_val; break;
case BOOL_VAL: o << (bool_val ? L"true" : L"false"); break;
case LAMBDA_VAL:o << L"{{ "; code->Print(o); o << L" }}"; break;
case NATIVE_VAL:o << L"{{NATIVE}}"; break;
case STRING_LIT:PrintEscapedString(o, *string_val); break;
case NAME_CODE: o << L"@"; PrintEscapedString(o, *string_val); break;
case CONS_CTOR: case EMPTY_LIT:
o << L"[";
PrintList(o, L", ");
o << L"]";
break;
case ENVIR_VAL: o << L"{{ENVIR ";
for(ENVIR_TABLE::const_iterator i=envir_table->begin(); i!=envir_table->end(); ++i)
{
o << *i->first->string_val << L"= ";
i->second->Print(o);
}
o << L"}}";
break;
case COMBINE_CODE:
op->Print(o);
o << L"(";
operands->PrintList(o, L", ");
o << L")";
break;
}
}
void CELL::PrintList(OSTREAM& o, STRING const& separator)const
{
for(CELL const* c=this; c->type==CONS_CTOR; c=c->tail)
{
if(c!=this)
o << separator;
c->head->Print(o);
}
}
void CELL::PrintEscapedString(OSTREAM& o, STRING const& s)
{
o << L"\"";
for(STRING::const_iterator i=s.begin(); i!=s.end(); ++i)
{
if(*i<32) o << L"\\x" << std::hex << (unsigned int)(unsigned)*i << L";";
else o << *i;
}
o << L"\"";
}
//
// ALLOCATION AND GARBAGE COLLECTION
//
CELL& Script::CreateCell(CELL_TYPE ct, CELL* first, CELL* second)
{
CELL* p;
if(m_FirstUnused==NULL)
{
CELL c;
m_Cells.push_back(c);
p=&m_Cells.back();
}
else
{
p=m_FirstUnused;
m_FirstUnused=p->next_unused;
}
p->mark=false;
p->type=ct;
p->head=first;
p->tail=second;
return *p;
}
CELL& Script::CreateString(STRING const& val)
{
CELL& c=CreateCell(STRING_LIT);
c.string_val=new STRING(val);
return c;
}
CELL& Script::CreateName(STRING const& val)
{
STRING_MAP::const_iterator i=m_InternedStrings.find(val);
if(i==m_InternedStrings.end())
{
CELL& c=CreateCell(NAME_CODE);
i=m_InternedStrings.insert(std::make_pair(val, &c)).first;
c.string_val=&i->first;
}
return *i->second;
}
CELL& Script::CreateEnvir(CELL* parent)
{
CELL& c=CreateCell(ENVIR_VAL);
c.envir_table=new ENVIR_TABLE;
c.parent_envir=parent;
return c;
}
CELL& Script::CreateNative(NATIVE native)
{
CELL& c=CreateCell(NATIVE_VAL);
c.native=native;
return c;
}
int Script::GarbageCollect()
{
GCMark(m_GlobalEnvir);
return GCSweep();
}
void Script::GCMark(CELL* c)
{
if(c==NULL || c->mark)
return;
c->mark=true;
switch(c->type)
{
case UNUSED: throw L"Marking unused cell";
case CONS_CTOR: GCMark(c->head); GCMark(c->tail); break;
case LAMBDA_VAL: GCMark(c->code); GCMark(c->closure); break;
case COMBINE_CODE: GCMark(c->op); GCMark(c->operands); break;
case ENVIR_VAL:
GCMark(c->parent_envir);
for(ENVIR_TABLE::const_iterator i=c->envir_table->begin(); i!=c->envir_table->end(); ++i)
{
GCMark(i->first);
GCMark(i->second);
}
break;
}
}
int Script::GCSweep()
{
int count=0;
for(CELL_STORAGE::iterator i=m_Cells.begin(); i!=m_Cells.end(); ++i)
{
if(i->mark)
{
i->mark=false;
continue;
}
if(i->type==UNUSED)
continue;
++count;
switch(i->type)
{
case STRING_LIT: delete i->string_val; break;
case ENVIR_VAL: delete i->envir_table; break;
case NAME_CODE: m_InternedStrings.erase(*i->string_val); break;
}
i->type=UNUSED;
i->next_unused=m_FirstUnused;
m_FirstUnused=&*i;
}
return count;
}
//
// EVALUATION
//
CELL& Script::Evaluate(CELL& c, CELL& envir)
{
CELL* aux;
switch(c.type)
{
//Non evaluable
case UNUSED: throw L"Evaluating an unused cell";
default: throw L"Evaluating an unknown cell";
case LAMBDA_VAL:throw L"Evaluating a lambda value";
case NATIVE_VAL:throw L"Evaluating a native";
case ENVIR_VAL: throw L"Evaluating an environment";
//Literals
case INT_LIT: case BOOL_VAL: case STRING_LIT: case EMPTY_LIT:
return c;
//Code constructors
case CONS_CTOR: return CreateCell(CONS_CTOR, &Evaluate(*c.head, envir), &Evaluate(*c.tail, envir));
case NAME_CODE:
if((aux=FindName(c, envir))==NULL)
throw L"Unknown name";
return *aux;
case COMBINE_CODE:
switch((aux=&Evaluate(*c.op, envir))->type)
{
case LAMBDA_VAL: return ApplyLambda(*aux->code, *c.operands, *aux->closure, envir);
case NATIVE_VAL: return aux->native(*this, *c.operands, envir);
default: throw L"Non-combinable value";
}
}
}
CELL& Script::EvaluateInSequence(CELL& c, CELL& envir)
{
CELL* aux=&CreateCell(EMPTY_LIT);
for(CELL* p=&c; p->type==CONS_CTOR; p=p->tail)
aux=&Evaluate(*p->head, envir);
return *aux;
}
CELL* Script::FindName(CELL& name, CELL& envir)
{
//Try to find the name in current environment
ENVIR_TABLE::const_iterator i=envir.envir_table->find(&name);
if(i!=envir.envir_table->end())
return i->second;
//Failed, try parent
if(envir.parent_envir!=NULL)
return FindName(name, *envir.parent_envir);
//Failed also, not found
return NULL;
}
void Script::ChangeName(CELL& name, CELL& envir, CELL& value)
{
//Try to find the name in current environment
ENVIR_TABLE::iterator i=envir.envir_table->find(&name);
if(i!=envir.envir_table->end())
{
(*envir.envir_table)[&name]=&value;
return;
}
//Failed, try parent
if(envir.parent_envir!=NULL)
return ChangeName(name, *envir.parent_envir, value);
//Failed also, not found
throw L"Name to be changed is undefined";
}
CELL& Script::ApplyLambda(CELL& code, CELL& args, CELL& closure, CELL& envir)
{
CELL& new_envir=CreateEnvir(&closure);
CELL* params=code.head;
CELL* arguments=&args;
while(params->type==CONS_CTOR && arguments->type==CONS_CTOR)
{
(*new_envir.envir_table)[params->head]=&Evaluate(*arguments->head, envir);
params=params->tail, arguments=arguments->tail;
}
if((params->type!=CONS_CTOR) != (arguments->type!=CONS_CTOR))
throw L"Invalid arity";
return EvaluateInSequence(*code.tail->head, new_envir);
}
domingo, 9 de octubre de 2011
miniSL parte 16 - El código hasta ahora
Como prometí en la última entrada de esta serie, aquí está el código que tenemos desarrollado hasta ahora. Ya llevamos 425 líneas de código. Más o menos la mitad de toda la implementación. Faltan por desarrollar el reconocimiento sintáctico, las funciones nativas y el REPL. Empezaremos en la siguiente entrada con el reconocimiento sintáctico.
Etiquetas:
miniSL
Suscribirse a:
Enviar comentarios (Atom)

0 comentarios:
Publicar un comentario