//Standard includes #include <string> #include <map> #include <deque> #include <fstream> #include <sstream> #include <iostream> #include <vector> //Forward declarations class Script; struct CELL; // // Type declarations // typedef std::wstring STRING; typedef std::wistream ISTREAM; typedef std::wostream OSTREAM; typedef std::map<STRING, CELL*> STRING_MAP; typedef std::map<CELL*, CELL*> ENVIR_TABLE; typedef std::deque<CELL> CELL_STORAGE; typedef CELL& (*NATIVE) (Script& script, CELL& args, CELL& envir); // // CELL STRUCTURE // enum CELL_TYPE { UNUSED, //Literals EMPTY_LIT, INT_LIT, STRING_LIT, //Value constructors LAMBDA_VAL, NATIVE_VAL, ENVIR_VAL, BOOL_VAL, //Code constructors NAME_CODE, COMBINE_CODE, //Value and code constructors CONS_CTOR }; struct CELL { CELL_TYPE type; bool mark; //Garbage collection mark union { CELL* next_unused; //UNUSED int int_val; //INT_LIT bool bool_val; //BOOL_VAL STRING const* string_val; //STRING_LIT CELL* head; //CONS. CELL* code; //LAMBDA_VAL. The cell must be a CONS_CTOR with parameters and body NATIVE native; //NATIVE_VAL ENVIR_TABLE* envir_table; //ENVIR_VAL CELL* op; //COMBINE_CODE. The cell must be code (Any *_CODE type or CONS or EMPTY) }; union { CELL* tail; //CONS. Must be CONS or EMPTY. CELL* closure; //LAMBDA_VAL. The cell must be an environment (ENVIR_VAL) CELL* parent_envir; //ENVIR_VAL. The pointer may be NULL or an environment cell (ENVIR_VAL) CELL* operands; //COMBINE_CODE. The cell must be a list of code. }; void Print(OSTREAM& o)const; void PrintList(OSTREAM& o, STRING const& separator)const; static void PrintEscapedString(OSTREAM& o, STRING const& s); int GetInteger()const { if(type==INT_LIT) return int_val; else throw L"Integer expected"; } bool GetBoolean()const { if(type==BOOL_VAL) return bool_val; else throw L"Boolean expected"; } STRING const& GetString()const { if(type==STRING_LIT) return *string_val; else throw L"String expected"; } }; // // SCRIPT CLASS // class Script { public: Script() : m_FirstUnused(NULL) { m_GlobalEnvir=&CreateEnvir(NULL); } ~Script() { GCSweep(); GCSweep(); } //Sweep twice to delete everything CELL& CreateCell(CELL_TYPE ct, CELL* first=NULL, CELL* second=NULL); CELL& CreateInteger(int val){ CELL& c=CreateCell(INT_LIT); c.int_val=val; return c; } CELL& CreateBool(bool val) { CELL& c=CreateCell(BOOL_VAL); c.bool_val=val; return c; } CELL& CreateString(STRING const& val); CELL& CreateName(STRING const& val); CELL& CreateEnvir(CELL* parent); CELL& CreateNative(NATIVE native); ENVIR_TABLE& GlobalEnvirTable()const { return *m_GlobalEnvir->envir_table; } void DefineUsualSymbols(); void DefineGlobalNative(STRING const& name, NATIVE nat) { (*m_GlobalEnvir->envir_table)[&CreateName(name)]=&CreateNative(nat); } CELL& Evaluate(CELL& c, CELL& envir); CELL& EvaluateInSequence(CELL& listc, CELL& envir); CELL& Evaluate(CELL& c) { return Evaluate(c, *m_GlobalEnvir); } int GarbageCollect(); static void GCMark(CELL* c); static CELL* FindName(CELL& name, CELL& envir); static void ChangeName(CELL& name, CELL& envir, CELL& value); void* m_User; private: enum TOKEN_TYPE { T_NONE, T_RAW, T_LITERAL, T_SYMBOL, T_NAME }; struct TOKEN { TOKEN() : type(T_NONE) {} TOKEN(TOKEN_TYPE t, CELL* d) : type(t), data(d) {} TOKEN(ISTREAM::int_type r) : type(T_RAW), raw(r) {} TOKEN_TYPE type; union { CELL* data; ISTREAM::int_type raw; }; }; private: int GCSweep(); CELL& ApplyLambda(CELL& code, CELL& args, CELL& closure, CELL& envir); private: CELL* m_FirstUnused; CELL* m_GlobalEnvir; CELL_STORAGE m_Cells; STRING_MAP m_InternedStrings; TOKEN m_Ahead; }; // // (CELL) PRINTING // void CELL::Print(OSTREAM& o)const { switch(type) { default: o << L"{{**UNKNOWN**}}"; break; case UNUSED: o << L"{{**UNUSED**}}"; break; case INT_LIT: o << std::dec << int_val; break; case BOOL_VAL: o << (bool_val ? L"true" : L"false"); break; case LAMBDA_VAL:o << L"{{ "; code->Print(o); o << L" }}"; break; case NATIVE_VAL:o << L"{{NATIVE}}"; break; case STRING_LIT:PrintEscapedString(o, *string_val); break; case NAME_CODE: o << L"@"; PrintEscapedString(o, *string_val); break; case CONS_CTOR: case EMPTY_LIT: o << L"["; PrintList(o, L", "); o << L"]"; break; case ENVIR_VAL: o << L"{{ENVIR "; for(ENVIR_TABLE::const_iterator i=envir_table->begin(); i!=envir_table->end(); ++i) { o << *i->first->string_val << L"= "; i->second->Print(o); } o << L"}}"; break; case COMBINE_CODE: op->Print(o); o << L"("; operands->PrintList(o, L", "); o << L")"; break; } } void CELL::PrintList(OSTREAM& o, STRING const& separator)const { for(CELL const* c=this; c->type==CONS_CTOR; c=c->tail) { if(c!=this) o << separator; c->head->Print(o); } } void CELL::PrintEscapedString(OSTREAM& o, STRING const& s) { o << L"\""; for(STRING::const_iterator i=s.begin(); i!=s.end(); ++i) { if(*i<32) o << L"\\x" << std::hex << (unsigned int)(unsigned)*i << L";"; else o << *i; } o << L"\""; } // // ALLOCATION AND GARBAGE COLLECTION // CELL& Script::CreateCell(CELL_TYPE ct, CELL* first, CELL* second) { CELL* p; if(m_FirstUnused==NULL) { CELL c; m_Cells.push_back(c); p=&m_Cells.back(); } else { p=m_FirstUnused; m_FirstUnused=p->next_unused; } p->mark=false; p->type=ct; p->head=first; p->tail=second; return *p; } CELL& Script::CreateString(STRING const& val) { CELL& c=CreateCell(STRING_LIT); c.string_val=new STRING(val); return c; } CELL& Script::CreateName(STRING const& val) { STRING_MAP::const_iterator i=m_InternedStrings.find(val); if(i==m_InternedStrings.end()) { CELL& c=CreateCell(NAME_CODE); i=m_InternedStrings.insert(std::make_pair(val, &c)).first; c.string_val=&i->first; } return *i->second; } CELL& Script::CreateEnvir(CELL* parent) { CELL& c=CreateCell(ENVIR_VAL); c.envir_table=new ENVIR_TABLE; c.parent_envir=parent; return c; } CELL& Script::CreateNative(NATIVE native) { CELL& c=CreateCell(NATIVE_VAL); c.native=native; return c; } int Script::GarbageCollect() { GCMark(m_GlobalEnvir); return GCSweep(); } void Script::GCMark(CELL* c) { if(c==NULL || c->mark) return; c->mark=true; switch(c->type) { case UNUSED: throw L"Marking unused cell"; case CONS_CTOR: GCMark(c->head); GCMark(c->tail); break; case LAMBDA_VAL: GCMark(c->code); GCMark(c->closure); break; case COMBINE_CODE: GCMark(c->op); GCMark(c->operands); break; case ENVIR_VAL: GCMark(c->parent_envir); for(ENVIR_TABLE::const_iterator i=c->envir_table->begin(); i!=c->envir_table->end(); ++i) { GCMark(i->first); GCMark(i->second); } break; } } int Script::GCSweep() { int count=0; for(CELL_STORAGE::iterator i=m_Cells.begin(); i!=m_Cells.end(); ++i) { if(i->mark) { i->mark=false; continue; } if(i->type==UNUSED) continue; ++count; switch(i->type) { case STRING_LIT: delete i->string_val; break; case ENVIR_VAL: delete i->envir_table; break; case NAME_CODE: m_InternedStrings.erase(*i->string_val); break; } i->type=UNUSED; i->next_unused=m_FirstUnused; m_FirstUnused=&*i; } return count; } // // EVALUATION // CELL& Script::Evaluate(CELL& c, CELL& envir) { CELL* aux; switch(c.type) { //Non evaluable case UNUSED: throw L"Evaluating an unused cell"; default: throw L"Evaluating an unknown cell"; case LAMBDA_VAL:throw L"Evaluating a lambda value"; case NATIVE_VAL:throw L"Evaluating a native"; case ENVIR_VAL: throw L"Evaluating an environment"; //Literals case INT_LIT: case BOOL_VAL: case STRING_LIT: case EMPTY_LIT: return c; //Code constructors case CONS_CTOR: return CreateCell(CONS_CTOR, &Evaluate(*c.head, envir), &Evaluate(*c.tail, envir)); case NAME_CODE: if((aux=FindName(c, envir))==NULL) throw L"Unknown name"; return *aux; case COMBINE_CODE: switch((aux=&Evaluate(*c.op, envir))->type) { case LAMBDA_VAL: return ApplyLambda(*aux->code, *c.operands, *aux->closure, envir); case NATIVE_VAL: return aux->native(*this, *c.operands, envir); default: throw L"Non-combinable value"; } } } CELL& Script::EvaluateInSequence(CELL& c, CELL& envir) { CELL* aux=&CreateCell(EMPTY_LIT); for(CELL* p=&c; p->type==CONS_CTOR; p=p->tail) aux=&Evaluate(*p->head, envir); return *aux; } CELL* Script::FindName(CELL& name, CELL& envir) { //Try to find the name in current environment ENVIR_TABLE::const_iterator i=envir.envir_table->find(&name); if(i!=envir.envir_table->end()) return i->second; //Failed, try parent if(envir.parent_envir!=NULL) return FindName(name, *envir.parent_envir); //Failed also, not found return NULL; } void Script::ChangeName(CELL& name, CELL& envir, CELL& value) { //Try to find the name in current environment ENVIR_TABLE::iterator i=envir.envir_table->find(&name); if(i!=envir.envir_table->end()) { (*envir.envir_table)[&name]=&value; return; } //Failed, try parent if(envir.parent_envir!=NULL) return ChangeName(name, *envir.parent_envir, value); //Failed also, not found throw L"Name to be changed is undefined"; } CELL& Script::ApplyLambda(CELL& code, CELL& args, CELL& closure, CELL& envir) { CELL& new_envir=CreateEnvir(&closure); CELL* params=code.head; CELL* arguments=&args; while(params->type==CONS_CTOR && arguments->type==CONS_CTOR) { (*new_envir.envir_table)[params->head]=&Evaluate(*arguments->head, envir); params=params->tail, arguments=arguments->tail; } if((params->type!=CONS_CTOR) != (arguments->type!=CONS_CTOR)) throw L"Invalid arity"; return EvaluateInSequence(*code.tail->head, new_envir); }
domingo, 9 de octubre de 2011
miniSL parte 16 - El código hasta ahora
Como prometí en la última entrada de esta serie, aquí está el código que tenemos desarrollado hasta ahora. Ya llevamos 425 líneas de código. Más o menos la mitad de toda la implementación. Faltan por desarrollar el reconocimiento sintáctico, las funciones nativas y el REPL. Empezaremos en la siguiente entrada con el reconocimiento sintáctico.
Etiquetas:
miniSL
Suscribirse a:
Enviar comentarios (Atom)
0 comentarios:
Publicar un comentario