nis-util  1.0.D108
lib/rcstring.cc
Go to the documentation of this file.
00001 //
00002 // nis-util - NIS Administration Utilities
00003 // Copyright (C) 2001-2003, 2008, 2009, 2011, 2012 Peter Miller
00004 //
00005 // This program is free software; you can redistribute it and/or modify
00006 // it under the terms of the GNU General Public License as published by
00007 // the Free Software Foundation; either version 2 of the License, or (at
00008 // your option) any later version.
00009 //
00010 // This program is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013 // General Public License for more details.
00014 //
00015 // You should have received a copy of the GNU General Public License
00016 // along with this program. If not, see <http://www.gnu.org/licenses/>.
00017 //
00018 
00019 #include <lib/ac/assert.h>
00020 #include <lib/ac/ctype.h>
00021 #include <lib/ac/stdlib.h>
00022 #include <lib/ac/string.h>
00023 #include <libexplain/malloc.h>
00024 #include <libexplain/output.h>
00025 #include <libexplain/realloc.h>
00026 
00027 #include <lib/rcstring.h>
00028 #include <lib/mprintf.h>
00029 
00030 rcstring::string_ty **rcstring::string_ty::hash_table;
00031 rcstring::hash_ty rcstring::string_ty::hash_modulus;
00032 rcstring::hash_ty rcstring::string_ty::hash_mask;
00033 rcstring::hash_ty rcstring::string_ty::hash_load;
00034 
00035 //
00036 // maximum conversion width for numbers
00037 //
00038 #define MAX_WIDTH 509
00039 
00040 #define MAX_HASH_LEN 20
00041 
00042 
00051 static rcstring::hash_ty
00052 hash_generate(const char *s, size_t n)
00053 {
00054     if (n > MAX_HASH_LEN)
00055     {
00056         s += n - MAX_HASH_LEN;
00057         n = MAX_HASH_LEN;
00058     }
00059 
00060     rcstring::hash_ty retval = 0;
00061     while (n > 0)
00062     {
00063         retval = (retval + (retval << 1)) ^ (unsigned char)*s++;
00064         --n;
00065     }
00066     return retval;
00067 }
00068 
00069 
00070 void
00071 rcstring::string_ty::initialize(void)
00072 {
00073     if (hash_table)
00074         return;
00075     hash_modulus = 1 << 8; // MUST be a power of 2
00076     hash_mask = hash_modulus - 1;
00077     hash_load = 0;
00078     hash_table = new string_ty * [hash_modulus];
00079     for (rcstring::hash_ty j = 0; j < hash_modulus; ++j)
00080         hash_table[j] = 0;
00081 }
00082 
00083 
00084 void
00085 rcstring::string_ty::split(void)
00086 {
00087     //
00088     // double the modulus
00089     //
00090     // This is subtle.  If we only increase the modulus by one, the
00091     // load always hovers around 80%, so we have to do a split for
00092     // every insert.  I.e. the malloc burden os O(n) for the lifetime of
00093     // the program.  BUT if we double the modulus, the length of time
00094     // until the next split also doubles, making the probablity of a
00095     // split halve, and sigma(2**-n)=1, so the malloc burden becomes O(1)
00096     // for the lifetime of the program.
00097     //
00098     size_t new_hash_modulus = hash_modulus * 2;
00099     string_ty **new_hash_table = new string_ty * [new_hash_modulus];
00100     size_t new_hash_mask = new_hash_modulus - 1;
00101 
00102     //
00103     // now redistribute the list elements
00104     //
00105     for (size_t idx = 0; idx < hash_modulus; ++idx)
00106     {
00107         new_hash_table[idx] = 0;
00108         new_hash_table[idx + hash_modulus] = 0;
00109         string_ty *p = hash_table[idx];
00110         while (p)
00111         {
00112             string_ty *p2 = p;
00113             p = p->next;
00114 
00115             assert((p2->str_hash & hash_mask) == idx);
00116             rcstring::hash_ty new_idx = p2->str_hash & new_hash_mask;
00117             p2->next = new_hash_table[new_idx];
00118             new_hash_table[new_idx] = p2;
00119         }
00120     }
00121 
00122     //
00123     // replace old hash table with new one
00124     //
00125     delete [] hash_table;
00126     hash_table = new_hash_table;
00127     hash_modulus = new_hash_modulus;
00128     hash_mask = new_hash_mask;
00129 }
00130 
00131 
00132 rcstring::string_ty *
00133 rcstring::string_ty::from_c(const char *s)
00134 {
00135     return n_from_c(s, strlen(s));
00136 }
00137 
00138 
00139 rcstring::string_ty::string_ty()
00140 {
00141     construct("", 0);
00142 }
00143 
00144 
00145 rcstring::string_ty::string_ty(const char *s)
00146 {
00147     construct(s, strlen(s));
00148 }
00149 
00150 
00151 rcstring::string_ty::string_ty(const char *s, size_t len)
00152 {
00153     construct(s, len);
00154 }
00155 
00156 
00157 void
00158 rcstring::string_ty::construct(const char *s, size_t length)
00159 {
00160     text = (char *)explain_malloc_or_die(length + 1);
00161     memcpy(text, s, length);
00162     text[length] = 0;
00163 
00164     str_length = length;
00165     references = 1;
00166 
00167     str_hash = 0; // caller will tickle
00168     next = 0; // caller will tickle
00169 }
00170 
00171 
00172 rcstring::string_ty *
00173 rcstring::string_ty::n_from_c(const char *s, size_t length)
00174 {
00175     rcstring::hash_ty hash = hash_generate(s, length);
00176     if (!hash_table)
00177         initialize();
00178     rcstring::hash_ty index = hash & hash_mask;
00179     assert(index < hash_modulus);
00180 
00181     for (string_ty *p = hash_table[index]; p; p = p->next)
00182     {
00183         if
00184         (
00185             p->str_hash == hash
00186         &&
00187             p->str_length == length
00188         &&
00189             !memcmp(p->text, s, length)
00190         )
00191         {
00192             p->one_more();
00193             return p;
00194         }
00195     }
00196 
00197     string_ty *np = new string_ty(s, length);
00198     np->str_hash = hash;
00199     np->next = hash_table[index];
00200     hash_table[index] = np;
00201 
00202     hash_load++;
00203     if (hash_load * 10 > hash_modulus * 8)
00204         split();
00205     return np;
00206 }
00207 
00208 
00209 rcstring::string_ty::~string_ty()
00210 {
00211     //
00212     //  find the hash bucket it was in,
00213     //  and remove it
00214     //
00215     rcstring::hash_ty index = str_hash & hash_mask;
00216     assert(index < hash_modulus);
00217     for
00218     (
00219         string_ty **spp = &hash_table[index];
00220         *spp;
00221         spp = &(*spp)->next
00222     )
00223     {
00224         if (*spp == this)
00225         {
00226             *spp = next;
00227             --hash_load;
00228 
00229             //
00230             // Now nuke the instance variables
00231             //
00232             delete text;
00233             str_hash = 0;
00234             next = 0;
00235             references = 0;
00236             str_length = 0;
00237             text = 0;
00238             return;
00239         }
00240     }
00241     // should never reach here!
00242     explain_output_error_and_die("attempted to free non-existent string (bug)");
00243 }
00244 
00245 
00246 rcstring
00247 rcstring::catenate(const rcstring &lhs, const rcstring &rhs)
00248 {
00249     static char *tmp;
00250     static size_t tmplen;
00251     size_t length = lhs.size() + rhs.size();
00252     if (tmplen < length)
00253     {
00254         tmplen = length;
00255         tmp = (char *)explain_realloc_or_die(tmp, tmplen);
00256     }
00257     memcpy(tmp, lhs.c_str(), lhs.size());
00258     memcpy(tmp + lhs.size(), rhs.c_str(), rhs.size());
00259     return rcstring(tmp, length);
00260 }
00261 
00262 
00263 bool
00264 operator<(const rcstring &lhs, const rcstring &rhs)
00265 {
00266     return (strcmp(lhs.c_str(), rhs.c_str()) < 0);
00267 }
00268 
00269 bool
00270 operator<=(const rcstring &lhs, const rcstring &rhs)
00271 {
00272     return (strcmp(lhs.c_str(), rhs.c_str()) <= 0);
00273 }
00274 
00275 bool
00276 operator>(const rcstring &lhs, const rcstring &rhs)
00277 {
00278     return (strcmp(lhs.c_str(), rhs.c_str()) > 0);
00279 }
00280 
00281 bool
00282 operator>=(const rcstring &lhs, const rcstring &rhs)
00283 {
00284     return (strcmp(lhs.c_str(), rhs.c_str()) >= 0);
00285 }
00286 
00287 
00288 rcstring
00289 rcstring::upcase(void)
00290     const
00291 {
00292     static char *tmp;
00293     static size_t tmplen;
00294     if (tmplen < size())
00295     {
00296         tmplen = size();
00297         tmp = (char *)explain_realloc_or_die(tmp, tmplen);
00298     }
00299 
00300     const char *cp1;
00301     char *cp2;
00302     for (cp1 = c_str(), cp2 = tmp; *cp1; ++cp1, ++cp2)
00303     {
00304         int c = (unsigned char)*cp1;
00305         if (islower(c))
00306             c = toupper(c);
00307         *cp2 = c;
00308     }
00309     return rcstring(tmp, size());
00310 }
00311 
00312 
00313 rcstring
00314 rcstring::downcase(void)
00315     const
00316 {
00317     static char *tmp;
00318     static size_t tmplen;
00319     if (tmplen < size())
00320     {
00321         tmplen = size();
00322         tmp = (char *)explain_realloc_or_die(tmp, tmplen);
00323     }
00324 
00325     const char *cp1;
00326     char *cp2;
00327     for (cp1 = c_str(), cp2 = tmp; *cp1; ++cp1, ++cp2)
00328     {
00329         int c = (unsigned char)*cp1;
00330         if (isupper(c))
00331             c = tolower(c);
00332         *cp2 = c;
00333     }
00334     return rcstring(tmp, size());
00335 }
00336 
00337 
00338 rcstring
00339 rcstring::format(const char *fmt, ...)
00340 {
00341     va_list ap;
00342     va_start(ap, fmt);
00343     rcstring result(vmprintfe(fmt, ap));
00344     va_end(ap);
00345     return result;
00346 }
00347 
00348 
00349 rcstring
00350 rcstring::vformat(const char *fmt, va_list ap)
00351 {
00352     return rcstring(vmprintfe(fmt, ap));
00353 }
00354 
00355 
00356 void
00357 rcstring::string_ty::one_more(void)
00358 {
00359     // assert(references > 0);
00360     references++;
00361 }
00362 
00363 
00364 void
00365 rcstring::string_ty::one_less(void)
00366 {
00367     // assert(references > 0);
00368     references--;
00369     if (references <= 0)
00370         delete this;
00371 }
00372 
00373 
00374 std::vector<rcstring>
00375 rcstring::break_up(const char *sep, bool ewhite)
00376     const
00377 {
00378     if (!sep)
00379     {
00380         sep = " \t\n\f\r";
00381         ewhite = 1;
00382     }
00383     std::vector<rcstring> slp;
00384     const char *cp = c_str();
00385     bool more = false;
00386     while (*cp || more)
00387     {
00388         if (ewhite)
00389             while (isspace((unsigned char)*cp))
00390                 cp++;
00391         if (!*cp && !more)
00392             break;
00393         more = 0;
00394         const char *cp1 = cp;
00395         while (*cp && !strchr(sep, *cp))
00396             cp++;
00397         const char *cp2 = cp;
00398         if (*cp)
00399         {
00400             cp2 = cp + 1;
00401             more = 1;
00402         }
00403         if (ewhite)
00404         {
00405             while (cp > cp1 && isspace((unsigned char)cp[-1]))
00406                 cp--;
00407         }
00408         slp.push_back(rcstring(cp1, cp - cp1));
00409         cp = cp2;
00410     }
00411     return slp;
00412 }
00413 
00414 
00415 rcstring
00416 rcstring::substr(size_t start, size_t len)
00417     const
00418 {
00419     if (start >= size())
00420         return rcstring("");
00421     if (start + len > size())
00422         len = size() - start;
00423     return rcstring(c_str() + start, len);
00424 }
00425 
00426 
00427 bool
00428 rcstring::to_long(long &n, int base)
00429     const
00430 {
00431     char *end = 0;
00432     n = strtol(c_str(), &end, base);
00433     return (end != c_str() && *end == 0);
00434 }
00435 
00436 
00437 // vim: set ts=8 sw=4 et :