baf_read.hh

00001 // This file is a part of Aurelia.
00002 // Copyright (C) 2010  Valentin David
00003 // Copyright (C) 2010  University of Bergen
00004 //
00005 // This program is free software: you can redistribute it and/or modify
00006 // it under the terms of the GNU General Public License as published by
00007 // the Free Software Foundation, either version 3 of the License, or
00008 // (at your option) any later version.
00009 //
00010 // This program is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013 // GNU General Public License for more details.
00014 //
00015 // You should have received a copy of the GNU General Public License
00016 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017 #ifndef __BAF_READ_HH
00018 # define __BAF_READ_HH
00019 
00020 # include "../fast/invert_bits.hh"
00021 # include "term.hh"
00022 # include "constructor.hh"
00023 # include <fstream>
00024 # include <sstream>
00025 # include <cstdlib>
00026 
00027 # include <aurelia-config.hh>
00028 
00029 # define BAF_MAGIC       0xbaf
00030 # define BAF_VERSION     0x0300
00031 # define INT_SIZE_IN_BAF 32
00032 
00033 namespace aurelia {
00034 
00035 struct baf_istream {
00036   std::istream& stream;
00037   baf_istream(std::istream& in): stream(in), bits_in_buffer(0) {
00038     in.exceptions(std::istream::eofbit
00039                   | std::ifstream::failbit
00040                   | std::ifstream::badbit);
00041   }
00042 
00043 private:
00044   int bit_buffer;
00045   int bits_in_buffer;
00046 
00047 public:
00048   void flush_bits() {
00049     bits_in_buffer = 0;
00050   }
00051 
00052   void read_bits(int nr_bits, unsigned &val) {
00053     val = 0;
00054     if (nr_bits <= 2) {
00055       int mask = 1;
00056       for (int cur_bit = 0; cur_bit < nr_bits; ++cur_bit) {
00057         if (bits_in_buffer == 0) {
00058           stream.read((char*)&bit_buffer, 1);
00059           bits_in_buffer = 8;
00060         }
00061         val |= ((bit_buffer << (8-bits_in_buffer)) & 0x80 ? mask : 0);
00062         mask <<= 1;
00063         bits_in_buffer--;
00064       }
00065       return ;
00066     }
00067 
00068     int shift = INT_SIZE_IN_BAF;
00069     while (nr_bits > 0) {
00070       if (bits_in_buffer == 0) {
00071         bits_in_buffer = (nr_bits+7)/8;
00072         bit_buffer = 0;
00073 #ifdef WORDS_BIGENDIAN
00074         stream.read((char*)&bit_buffer, bits_in_buffer);
00075 #else
00076         stream.read(((char*)&bit_buffer)+4-bits_in_buffer, bits_in_buffer);
00077         char a = ((char*)&bit_buffer)[0];
00078         ((char*)&bit_buffer)[0] = ((char*)&bit_buffer)[3];
00079         ((char*)&bit_buffer)[3] = a;
00080         a = ((char*)&bit_buffer)[1];
00081         ((char*)&bit_buffer)[1] = ((char*)&bit_buffer)[2];
00082         ((char*)&bit_buffer)[2] = a;
00083 #endif
00084         bits_in_buffer *= 8;
00085       }
00086       if (nr_bits >= bits_in_buffer) {
00087         shift -= bits_in_buffer;
00088         if (bits_in_buffer == 32)
00089           val |= bit_buffer << shift;
00090         else
00091           val |= ((~(-1<<bits_in_buffer)) & bit_buffer) << shift;
00092         nr_bits -= bits_in_buffer;
00093         bits_in_buffer = 0;
00094       } else {
00095         shift -= nr_bits;
00096         if (bits_in_buffer == 32)
00097           val |= (bit_buffer >> (bits_in_buffer - nr_bits)) << shift;
00098         else
00099           val |= ((~(-1<<bits_in_buffer)) & (bit_buffer >> (bits_in_buffer - nr_bits))) << shift;
00100         bits_in_buffer -= nr_bits;
00101         nr_bits = 0;
00102       }
00103     }
00104     fast::invert_bits(val);
00105   }
00106 };
00107 
00108 baf_istream& operator>>(baf_istream& s, unsigned int& val) {
00109   unsigned char c0;
00110   s.stream.read((char*)&c0, 1);
00111   if ((c0 & 0x80) == 0) {
00112     val = c0;
00113     return s;
00114   }
00115 
00116   unsigned char c1;
00117   s.stream.read((char*)&c1,1);
00118 
00119   if ((c0 & 0x40) == 0) {
00120     val = c1 + ((c0 & ~0xc0) << 8);
00121     return s;
00122   }
00123 
00124   unsigned char c2;
00125   s.stream.read((char*)&c2,1);
00126 
00127   if ((c0 & 0x20) == 0) {
00128     val = c2 + (c1 << 8) + ((c0 & ~0xe0) << 16);
00129     return s;
00130   }
00131 
00132   unsigned char c3;
00133   s.stream.read((char*)&c3,1);
00134 
00135   if ((c0 & 0x10) == 0) {
00136     val = c3 + (c2 << 8) + (c1 << 16) +
00137       ((c0 & ~0xf0) << 24);
00138     return s;
00139   }
00140 
00141   unsigned char c4;
00142   s.stream.read((char*)&c4,1);
00143   val = c4 + (c3 << 8) + (c2 << 16) + (c1 << 24);
00144   return s;
00145 }
00146 
00147 baf_istream& operator>>(baf_istream& s, int& val) {
00148   s >> (unsigned&)val;
00149   return s;
00150 }
00151 
00152 baf_istream& operator>>(baf_istream& s, std::string& val) {
00153   unsigned len;
00154   s >> len;
00155 
00156   char c;
00157   for (unsigned i = 0; i < len; ++i) {
00158     s.stream.read(&c,1);
00159     val += c;
00160   }
00161   return s;
00162 }
00163 
00164 template <typename pool>
00165 struct sym_read_entry
00166 {
00167   untyped_constructor<pool>*   sym;
00168   unsigned    arity;
00169   unsigned   nr_terms;
00170   int    term_width;
00171   untyped_term<pool> **terms;
00172   unsigned *nr_topsyms;
00173   int   *sym_width;
00174   int  **topsyms;
00175 
00176   sym_read_entry() {
00177     sym = NULL;
00178     terms = NULL;
00179     nr_topsyms = NULL;
00180     sym_width = NULL;
00181     topsyms = NULL;
00182   }
00183 
00184   ~sym_read_entry() {
00185     delete sym;
00186     if (terms != NULL) {
00187       for (unsigned i = 0; i < nr_terms; ++i) {
00188         delete terms[i];
00189       }
00190       delete[] terms;
00191     }
00192     if (sym_width != NULL)
00193       delete[] sym_width;
00194     if (topsyms != NULL) {
00195       for (unsigned i = 0; i < arity; ++i) {
00196         if (topsyms[i] != NULL) {
00197           delete[] topsyms[i];
00198         }
00199       }
00200       delete[] topsyms;
00201     }
00202     if (nr_topsyms != NULL)
00203       delete[] nr_topsyms;
00204   }
00205 };
00206 
00207 unsigned bit_width(unsigned val)
00208 {
00209   unsigned nr_bits = 0;
00210 
00211   if (val <= 1)
00212     return 0;
00213 
00214   while (val) {
00215     val>>=1;
00216     nr_bits++;
00217   }
00218 
00219   return nr_bits;
00220 }
00221 
00222 struct bad_format {};
00223 
00224 template<typename pool>
00225 untyped_term<pool>* read_term(sym_read_entry<pool>* read_symbols, int top,
00226                      baf_istream& st) {
00227   unsigned val;
00228   sym_read_entry<pool>* t = read_symbols + top;
00229   untyped_term<pool>* args = NULL;
00230   if ((t->arity))
00231     args = (untyped_term<pool>*)malloc((t->arity)*sizeof(untyped_term<pool>));
00232 
00233   unsigned i=0;
00234   try {
00235     for (; i <  t->arity; ++i) {
00236       st.read_bits(t->sym_width[i], val);
00237       if (val >= t->nr_topsyms[i]) {
00238         throw bad_format();
00239       }
00240       unsigned argn = t->topsyms[i][val];
00241       sym_read_entry<pool>* arg = &read_symbols[argn];
00242       st.read_bits(arg->term_width, val);
00243       if (val >= arg->nr_terms) {
00244         throw bad_format();
00245       }
00246       if (!arg->terms[val]) {
00247         arg->terms[val] = read_term(read_symbols, argn, st);
00248       }
00249       new (args+i) untyped_term<pool>(*arg->terms[val]);
00250     }
00251 
00252     if (untyped_constructor<pool>::AS_INT == *(t->sym)) {
00253       st.read_bits(INT_SIZE_IN_BAF, val);
00254       std::stringstream os;
00255       os << val;
00256       if (args != NULL)
00257         throw bad_format();
00258       return new untyped_term<pool>(untyped_constructor<pool>(os.str(), 0), NULL);
00259     } else if (untyped_constructor<pool>::AS_REAL == *(t->sym)) {
00260       st.flush_bits();
00261       std::string s;
00262       st >> s;
00263       if (args != NULL)
00264         throw bad_format();
00265       return new untyped_term<pool>(untyped_constructor<pool>(s, 0), NULL);
00266     } else if (untyped_constructor<pool>::AS_BLOB == *(t->sym)) {
00267       st.flush_bits();
00268       std::string s;
00269       st >> s;
00270       if (args != NULL)
00271         throw bad_format();
00272       return new untyped_term<pool>(untyped_constructor<pool>(s, 0), NULL);
00273     }
00274   }
00275   catch (...) {
00276     for (unsigned j = 0; j < i; ++j) {
00277       (args+j)->~untyped_term<pool>();
00278     }
00279     free(args);
00280     throw ;
00281   }
00282 
00283   return new untyped_term<pool>(*(t->sym), args);
00284 }
00285 
00286 template <typename pool>
00287 untyped_term<pool> read_baf(const pool&, std::istream& in)
00288 {
00289   unsigned val;
00290   baf_istream st(in);
00291 
00292   st >> val;
00293   if (val == 0)
00294     st >> val;
00295   if (val != BAF_MAGIC) {
00296     throw bad_format();
00297   }
00298   st >> val;
00299   if (val != BAF_VERSION) {
00300     throw bad_format();
00301   }
00302   unsigned nr_unique_symbols;
00303   st >> nr_unique_symbols;
00304   unsigned nr_unique_terms;
00305   st >> nr_unique_terms;
00306   sym_read_entry<pool>* read_symbols =
00307     new sym_read_entry<pool>[nr_unique_symbols];
00308   untyped_term<pool>* pret;
00309   try {
00310     for (unsigned i = 0; i < nr_unique_symbols; ++i) {
00311       std::string s;
00312       st >> s;
00313       st >> read_symbols[i].arity;
00314       st >> val;
00315       if (val) {
00316         s = "\"" + s + "\"";
00317       }
00318       read_symbols[i].sym = new untyped_constructor<pool>(s, read_symbols[i].arity);
00319       st >> read_symbols[i].nr_terms;
00320       read_symbols[i].term_width = bit_width(read_symbols[i].nr_terms);
00321       read_symbols[i].terms = new untyped_term<pool>*[read_symbols[i].nr_terms];
00322       for (unsigned j = 0; j < read_symbols[i].nr_terms; ++j)
00323         read_symbols[i].terms[j] = NULL;
00324       if (read_symbols[i].arity == 0) {
00325       } else {
00326         read_symbols[i].nr_topsyms = new unsigned[read_symbols[i].arity];
00327         read_symbols[i].sym_width = new int[read_symbols[i].arity];
00328         read_symbols[i].topsyms = new int*[read_symbols[i].arity];
00329         for (unsigned j = 0; j < read_symbols[i].arity; ++j) {
00330           read_symbols[i].topsyms[j] = NULL;
00331         }
00332         for (unsigned j = 0; j < read_symbols[i].arity; ++j) {
00333           st >> val;
00334           read_symbols[i].nr_topsyms[j] = val;
00335           read_symbols[i].sym_width[j] = bit_width(val);
00336           read_symbols[i].topsyms[j] = new int[val];
00337           for (unsigned k=0; k < read_symbols[i].nr_topsyms[j]; ++k) {
00338             st >> read_symbols[i].topsyms[j][k];
00339           }
00340         }
00341       }
00342     }
00343 
00344     st >> val;
00345     pret = read_term<pool>(read_symbols, val, st);
00346   }
00347   catch (...) {
00348     delete[] read_symbols;
00349     throw ;
00350   }
00351 
00352   untyped_term<pool> ret(*pret);
00353   delete pret;
00354   delete[] read_symbols;
00355   return ret;
00356 }
00357 
00358 }
00359 
00360 #endif