/* ** string.c - String class ** ** See Copyright Notice in mruby.h */ #ifdef _MSC_VER # define _CRT_NONSTDC_NO_DEPRECATE #endif #ifndef MRB_WITHOUT_FLOAT #include <float.h> #endif #include <limits.h> #include <stddef.h> #include <stdlib.h> #include <string.h> #include <mruby.h> #include <mruby/array.h> #include <mruby/class.h> #include <mruby/range.h> #include <mruby/string.h> #include <mruby/re.h> typedef struct mrb_shared_string { mrb_bool nofree : 1; int refcnt; char *ptr; mrb_int len; } mrb_shared_string; const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class)) static struct RString* str_new_static(mrb_state *mrb, const char *p, size_t len) { struct RString *s; if (len >= MRB_INT_MAX) { mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); } s = mrb_obj_alloc_string(mrb); s->as.heap.len = (mrb_int)len; s->as.heap.aux.capa = 0; /* nofree */ s->as.heap.ptr = (char *)p; s->flags = MRB_STR_NOFREE; return s; } static struct RString* str_new(mrb_state *mrb, const char *p, size_t len) { struct RString *s; if (p && mrb_ro_data_p(p)) { return str_new_static(mrb, p, len); } s = mrb_obj_alloc_string(mrb); if (len <= RSTRING_EMBED_LEN_MAX) { RSTR_SET_EMBED_FLAG(s); RSTR_SET_EMBED_LEN(s, len); if (p) { memcpy(s->as.ary, p, len); } } else { if (len >= MRB_INT_MAX) { mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); } s->as.heap.ptr = (char *)mrb_malloc(mrb, len+1); s->as.heap.len = (mrb_int)len; s->as.heap.aux.capa = (mrb_int)len; if (p) { memcpy(s->as.heap.ptr, p, len); } } RSTR_PTR(s)[len] = '\0'; return s; } static inline void str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj) { s->c = mrb_str_ptr(obj)->c; } static mrb_value mrb_str_new_empty(mrb_state *mrb, mrb_value str) { struct RString *s = str_new(mrb, 0, 0); str_with_class(mrb, s, str); return mrb_obj_value(s); } MRB_API mrb_value mrb_str_new_capa(mrb_state *mrb, size_t capa) { struct RString *s; s = mrb_obj_alloc_string(mrb); if (capa >= MRB_INT_MAX) { mrb_raise(mrb, E_ARGUMENT_ERROR, "string capacity size too big"); } s->as.heap.len = 0; s->as.heap.aux.capa = (mrb_int)capa; s->as.heap.ptr = (char *)mrb_malloc(mrb, capa+1); RSTR_PTR(s)[0] = '\0'; return mrb_obj_value(s); } #ifndef MRB_STR_BUF_MIN_SIZE # define MRB_STR_BUF_MIN_SIZE 128 #endif MRB_API mrb_value mrb_str_buf_new(mrb_state *mrb, size_t capa) { if (capa < MRB_STR_BUF_MIN_SIZE) { capa = MRB_STR_BUF_MIN_SIZE; } return mrb_str_new_capa(mrb, capa); } static void resize_capa(mrb_state *mrb, struct RString *s, size_t capacity) { #if SIZE_MAX > MRB_INT_MAX mrb_assert(capacity < MRB_INT_MAX); #endif if (RSTR_EMBED_P(s)) { if (RSTRING_EMBED_LEN_MAX < capacity) { char *const tmp = (char *)mrb_malloc(mrb, capacity+1); const mrb_int len = RSTR_EMBED_LEN(s); memcpy(tmp, s->as.ary, len); RSTR_UNSET_EMBED_FLAG(s); s->as.heap.ptr = tmp; s->as.heap.len = len; s->as.heap.aux.capa = (mrb_int)capacity; } } else { s->as.heap.ptr = (char*)mrb_realloc(mrb, RSTR_PTR(s), capacity+1); s->as.heap.aux.capa = (mrb_int)capacity; } } MRB_API mrb_value mrb_str_new(mrb_state *mrb, const char *p, size_t len) { return mrb_obj_value(str_new(mrb, p, len)); } /* * call-seq: (Caution! NULL string) * String.new(str="") => new_str * * Returns a new string object containing a copy of <i>str</i>. */ MRB_API mrb_value mrb_str_new_cstr(mrb_state *mrb, const char *p) { struct RString *s; size_t len; if (p) { len = strlen(p); } else { len = 0; } s = str_new(mrb, p, len); return mrb_obj_value(s); } MRB_API mrb_value mrb_str_new_static(mrb_state *mrb, const char *p, size_t len) { struct RString *s = str_new_static(mrb, p, len); return mrb_obj_value(s); } static void str_decref(mrb_state *mrb, mrb_shared_string *shared) { shared->refcnt--; if (shared->refcnt == 0) { if (!shared->nofree) { mrb_free(mrb, shared->ptr); } mrb_free(mrb, shared); } } void mrb_gc_free_str(mrb_state *mrb, struct RString *str) { if (RSTR_EMBED_P(str)) /* no code */; else if (RSTR_SHARED_P(str)) str_decref(mrb, str->as.heap.aux.shared); else if (!RSTR_NOFREE_P(str) && !RSTR_FSHARED_P(str)) mrb_free(mrb, str->as.heap.ptr); } #ifdef MRB_UTF8_STRING static const char utf8len_codepage[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1, }; static mrb_int utf8len(const char* p, const char* e) { mrb_int len; mrb_int i; len = utf8len_codepage[(unsigned char)*p]; if (p + len > e) return 1; for (i = 1; i < len; ++i) if ((p[i] & 0xc0) != 0x80) return 1; return len; } static mrb_int utf8_strlen(mrb_value str, mrb_int len) { mrb_int total = 0; char* p = RSTRING_PTR(str); char* e = p; if (RSTRING(str)->flags & MRB_STR_NO_UTF) { return RSTRING_LEN(str); } e += len < 0 ? RSTRING_LEN(str) : len; while (p<e) { p += utf8len(p, e); total++; } if (RSTRING_LEN(str) == total) { RSTRING(str)->flags |= MRB_STR_NO_UTF; } return total; } #define RSTRING_CHAR_LEN(s) utf8_strlen(s, -1) /* map character index to byte offset index */ static mrb_int chars2bytes(mrb_value s, mrb_int off, mrb_int idx) { mrb_int i, b, n; const char *p = RSTRING_PTR(s) + off; const char *e = RSTRING_END(s); for (b=i=0; p<e && i<idx; i++) { n = utf8len(p, e); b += n; p += n; } return b; } /* map byte offset to character index */ static mrb_int bytes2chars(char *p, mrb_int bi) { mrb_int i, b, n; for (b=i=0; b<bi; i++) { n = utf8len_codepage[(unsigned char)*p]; b += n; p += n; } if (b != bi) return -1; return i; } #define BYTES_ALIGN_CHECK(pos) if (pos < 0) return mrb_nil_value(); #else #define RSTRING_CHAR_LEN(s) RSTRING_LEN(s) #define chars2bytes(p, off, ci) (ci) #define bytes2chars(p, bi) (bi) #define BYTES_ALIGN_CHECK(pos) #endif static inline mrb_int mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n) { const unsigned char *x = xs, *xe = xs + m; const unsigned char *y = ys; int i; ptrdiff_t qstable[256]; /* Preprocessing */ for (i = 0; i < 256; ++i) qstable[i] = m + 1; for (; x < xe; ++x) qstable[*x] = xe - x; /* Searching */ for (; y + m <= ys + n; y += *(qstable + y[m])) { if (*xs == *y && memcmp(xs, y, m) == 0) return (mrb_int)(y - ys); } return -1; } static mrb_int mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n) { const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0; if (m > n) return -1; else if (m == n) { return memcmp(x0, y0, m) == 0 ? 0 : -1; } else if (m < 1) { return 0; } else if (m == 1) { const unsigned char *ys = (const unsigned char *)memchr(y, *x, n); if (ys) return (mrb_int)(ys - y); else return -1; } return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n); } static void str_make_shared(mrb_state *mrb, struct RString *orig, struct RString *s) { mrb_shared_string *shared; mrb_int len = RSTR_LEN(orig); mrb_assert(!RSTR_EMBED_P(orig)); if (RSTR_SHARED_P(orig)) { shared = orig->as.heap.aux.shared; shared->refcnt++; s->as.heap.ptr = orig->as.heap.ptr; s->as.heap.len = len; s->as.heap.aux.shared = shared; RSTR_SET_SHARED_FLAG(s); RSTR_UNSET_EMBED_FLAG(s); } else if (RSTR_FSHARED_P(orig)) { struct RString *fs; fs = orig->as.heap.aux.fshared; s->as.heap.ptr = orig->as.heap.ptr; s->as.heap.len = len; s->as.heap.aux.fshared = fs; RSTR_SET_FSHARED_FLAG(s); RSTR_UNSET_EMBED_FLAG(s); } else if (MRB_FROZEN_P(orig) && !RSTR_POOL_P(orig)) { s->as.heap.ptr = orig->as.heap.ptr; s->as.heap.len = len; s->as.heap.aux.fshared = orig; RSTR_SET_FSHARED_FLAG(s); RSTR_UNSET_EMBED_FLAG(s); } else { shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string)); shared->refcnt = 2; shared->nofree = !!RSTR_NOFREE_P(orig); if (!shared->nofree && orig->as.heap.aux.capa > orig->as.heap.len) { shared->ptr = (char *)mrb_realloc(mrb, orig->as.heap.ptr, len+1); orig->as.heap.ptr = shared->ptr; } else { shared->ptr = orig->as.heap.ptr; } orig->as.heap.aux.shared = shared; RSTR_SET_SHARED_FLAG(orig); shared->len = len; s->as.heap.aux.shared = shared; s->as.heap.ptr = shared->ptr; s->as.heap.len = len; RSTR_SET_SHARED_FLAG(s); RSTR_UNSET_EMBED_FLAG(s); } } static mrb_value byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { struct RString *orig, *s; orig = mrb_str_ptr(str); if (RSTR_EMBED_P(orig) || RSTR_LEN(orig) == 0 || len <= RSTRING_EMBED_LEN_MAX) { s = str_new(mrb, RSTR_PTR(orig)+beg, len); } else { s = mrb_obj_alloc_string(mrb); str_make_shared(mrb, orig, s); s->as.heap.ptr += beg; s->as.heap.len = len; } return mrb_obj_value(s); } #ifdef MRB_UTF8_STRING static inline mrb_value str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { beg = chars2bytes(str, 0, beg); len = chars2bytes(str, beg, len); return byte_subseq(mrb, str, beg, len); } #else #define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len) #endif static mrb_value str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { mrb_int clen = RSTRING_CHAR_LEN(str); if (len < 0) return mrb_nil_value(); if (clen == 0) { len = 0; } else if (beg < 0) { beg = clen + beg; } if (beg > clen) return mrb_nil_value(); if (beg < 0) { beg += clen; if (beg < 0) return mrb_nil_value(); } if (len > clen - beg) len = clen - beg; if (len <= 0) { len = 0; } return str_subseq(mrb, str, beg, len); } MRB_API mrb_int mrb_str_index(mrb_state *mrb, mrb_value str, const char *sptr, mrb_int slen, mrb_int offset) { mrb_int pos; char *s; mrb_int len; len = RSTRING_LEN(str); if (offset < 0) { offset += len; if (offset < 0) return -1; } if (len - offset < slen) return -1; s = RSTRING_PTR(str); if (offset) { s += offset; } if (slen == 0) return offset; /* need proceed one character at a time */ len = RSTRING_LEN(str) - offset; pos = mrb_memsearch(sptr, slen, s, len); if (pos < 0) return pos; return pos + offset; } static mrb_int str_index_str(mrb_state *mrb, mrb_value str, mrb_value str2, mrb_int offset) { const char *ptr; mrb_int len; ptr = RSTRING_PTR(str2); len = RSTRING_LEN(str2); return mrb_str_index(mrb, str, ptr, len, offset); } static void check_frozen(mrb_state *mrb, struct RString *s) { if (MRB_FROZEN_P(s)) { mrb_raise(mrb, E_FROZEN_ERROR, "can't modify frozen string"); } } static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2) { mrb_int len; check_frozen(mrb, s1); if (s1 == s2) return mrb_obj_value(s1); s1->flags &= ~MRB_STR_NO_UTF; s1->flags |= s2->flags&MRB_STR_NO_UTF; len = RSTR_LEN(s2); if (RSTR_SHARED_P(s1)) { str_decref(mrb, s1->as.heap.aux.shared); RSTR_UNSET_SHARED_FLAG(s1); } else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1) && !RSTR_FSHARED_P(s1) && s1->as.heap.ptr) { mrb_free(mrb, s1->as.heap.ptr); } RSTR_UNSET_FSHARED_FLAG(s1); RSTR_UNSET_NOFREE_FLAG(s1); if (len <= RSTRING_EMBED_LEN_MAX) { RSTR_UNSET_SHARED_FLAG(s1); RSTR_UNSET_FSHARED_FLAG(s1); RSTR_SET_EMBED_FLAG(s1); memcpy(s1->as.ary, RSTR_PTR(s2), len); RSTR_SET_EMBED_LEN(s1, len); } else { str_make_shared(mrb, s2, s1); } return mrb_obj_value(s1); } static mrb_int str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) { char *s, *sbeg, *t; struct RString *ps = mrb_str_ptr(str); mrb_int len = RSTRING_LEN(sub); /* substring longer than string */ if (RSTR_LEN(ps) < len) return -1; if (RSTR_LEN(ps) - pos < len) { pos = RSTR_LEN(ps) - len; } sbeg = RSTR_PTR(ps); s = RSTR_PTR(ps) + pos; t = RSTRING_PTR(sub); if (len) { while (sbeg <= s) { if (memcmp(s, t, len) == 0) { return (mrb_int)(s - RSTR_PTR(ps)); } s--; } return -1; } else { return pos; } } MRB_API mrb_int mrb_str_strlen(mrb_state *mrb, struct RString *s) { mrb_int i, max = RSTR_LEN(s); char *p = RSTR_PTR(s); if (!p) return 0; for (i=0; i<max; i++) { if (p[i] == '\0') { mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte"); } } return max; } #ifdef _WIN32 #include <windows.h> char* mrb_utf8_from_locale(const char *str, int len) { wchar_t* wcsp; char* mbsp; int mbssize, wcssize; if (len == 0) return strdup(""); if (len == -1) len = (int)strlen(str); wcssize = MultiByteToWideChar(GetACP(), 0, str, len, NULL, 0); wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t)); if (!wcsp) return NULL; wcssize = MultiByteToWideChar(GetACP(), 0, str, len, wcsp, wcssize + 1); wcsp[wcssize] = 0; mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL); mbsp = (char*) malloc((mbssize + 1)); if (!mbsp) { free(wcsp); return NULL; } mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL); mbsp[mbssize] = 0; free(wcsp); return mbsp; } char* mrb_locale_from_utf8(const char *utf8, int len) { wchar_t* wcsp; char* mbsp; int mbssize, wcssize; if (len == 0) return strdup(""); if (len == -1) len = (int)strlen(utf8); wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, NULL, 0); wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t)); if (!wcsp) return NULL; wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, wcsp, wcssize + 1); wcsp[wcssize] = 0; mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL); mbsp = (char*) malloc((mbssize + 1)); if (!mbsp) { free(wcsp); return NULL; } mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL); mbsp[mbssize] = 0; free(wcsp); return mbsp; } #endif MRB_API void mrb_str_modify(mrb_state *mrb, struct RString *s) { check_frozen(mrb, s); s->flags &= ~MRB_STR_NO_UTF; if (RSTR_SHARED_P(s)) { mrb_shared_string *shared = s->as.heap.aux.shared; if (shared->nofree == 0 && shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) { s->as.heap.ptr = shared->ptr; s->as.heap.aux.capa = shared->len; RSTR_PTR(s)[s->as.heap.len] = '\0'; mrb_free(mrb, shared); } else { char *ptr, *p; mrb_int len; p = RSTR_PTR(s); len = s->as.heap.len; if (len < RSTRING_EMBED_LEN_MAX) { RSTR_SET_EMBED_FLAG(s); RSTR_SET_EMBED_LEN(s, len); ptr = RSTR_PTR(s); } else { ptr = (char *)mrb_malloc(mrb, (size_t)len + 1); s->as.heap.ptr = ptr; s->as.heap.aux.capa = len; } if (p) { memcpy(ptr, p, len); } ptr[len] = '\0'; str_decref(mrb, shared); } RSTR_UNSET_SHARED_FLAG(s); return; } if (RSTR_NOFREE_P(s) || RSTR_FSHARED_P(s)) { char *p = s->as.heap.ptr; mrb_int len = s->as.heap.len; RSTR_UNSET_FSHARED_FLAG(s); RSTR_UNSET_NOFREE_FLAG(s); RSTR_UNSET_FSHARED_FLAG(s); if (len < RSTRING_EMBED_LEN_MAX) { RSTR_SET_EMBED_FLAG(s); RSTR_SET_EMBED_LEN(s, len); } else { s->as.heap.ptr = (char *)mrb_malloc(mrb, (size_t)len+1); s->as.heap.aux.capa = len; } if (p) { memcpy(RSTR_PTR(s), p, len); } RSTR_PTR(s)[len] = '\0'; return; } } MRB_API mrb_value mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len) { mrb_int slen; struct RString *s = mrb_str_ptr(str); mrb_str_modify(mrb, s); slen = RSTR_LEN(s); if (len != slen) { if (slen < len || slen - len > 256) { resize_capa(mrb, s, len); } RSTR_SET_LEN(s, len); RSTR_PTR(s)[len] = '\0'; /* sentinel */ } return str; } MRB_API char* mrb_str_to_cstr(mrb_state *mrb, mrb_value str0) { struct RString *s; if (!mrb_string_p(str0)) { mrb_raise(mrb, E_TYPE_ERROR, "expected String"); } s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0)); if ((strlen(RSTR_PTR(s)) ^ RSTR_LEN(s)) != 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte"); } return RSTR_PTR(s); } /* * call-seq: (Caution! String("abcd") change) * String("abcdefg") = String("abcd") + String("efg") * * Returns a new string object containing a copy of <i>str</i>. */ MRB_API void mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other) { if (!mrb_string_p(other)) { other = mrb_str_to_str(mrb, other); } mrb_str_cat_str(mrb, self, other); } /* * call-seq: (Caution! String("abcd") remain) * String("abcdefg") = String("abcd") + String("efg") * * Returns a new string object containing a copy of <i>str</i>. */ MRB_API mrb_value mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b) { struct RString *s = mrb_str_ptr(a); struct RString *s2 = mrb_str_ptr(b); struct RString *t; t = str_new(mrb, 0, RSTR_LEN(s) + RSTR_LEN(s2)); memcpy(RSTR_PTR(t), RSTR_PTR(s), RSTR_LEN(s)); memcpy(RSTR_PTR(t) + RSTR_LEN(s), RSTR_PTR(s2), RSTR_LEN(s2)); return mrb_obj_value(t); } /* 15.2.10.5.2 */ /* * call-seq: (Caution! String("abcd") remain) for stack_argument * String("abcdefg") = String("abcd") + String("efg") * * Returns a new string object containing a copy of <i>str</i>. */ static mrb_value mrb_str_plus_m(mrb_state *mrb, mrb_value self) { mrb_value str; mrb_get_args(mrb, "S", &str); return mrb_str_plus(mrb, self, str); } /* 15.2.10.5.26 */ /* 15.2.10.5.33 */ /* * call-seq: * "abcd".size => int * * Returns the length of string. */ static mrb_value mrb_str_size(mrb_state *mrb, mrb_value self) { mrb_int len = RSTRING_CHAR_LEN(self); return mrb_fixnum_value(len); } static mrb_value mrb_str_bytesize(mrb_state *mrb, mrb_value self) { mrb_int len = RSTRING_LEN(self); return mrb_fixnum_value(len); } /* 15.2.10.5.1 */ /* * call-seq: * str * integer => new_str * * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of * the receiver. * * "Ho! " * 3 #=> "Ho! Ho! Ho! " */ static mrb_value mrb_str_times(mrb_state *mrb, mrb_value self) { mrb_int n,len,times; struct RString *str2; char *p; mrb_get_args(mrb, "i", ×); if (times < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument"); } if (times && MRB_INT_MAX / times < RSTRING_LEN(self)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big"); } len = RSTRING_LEN(self)*times; str2 = str_new(mrb, 0, len); str_with_class(mrb, str2, self); p = RSTR_PTR(str2); if (len > 0) { n = RSTRING_LEN(self); memcpy(p, RSTRING_PTR(self), n); while (n <= len/2) { memcpy(p + n, p, n); n *= 2; } memcpy(p + n, p, len-n); } p[RSTR_LEN(str2)] = '\0'; return mrb_obj_value(str2); } /* -------------------------------------------------------------- */ #define lesser(a,b) (((a)>(b))?(b):(a)) /* ---------------------------*/ /* * call-seq: * mrb_value str1 <=> mrb_value str2 => int * > 1 * = 0 * < -1 */ MRB_API int mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2) { mrb_int len; mrb_int retval; struct RString *s1 = mrb_str_ptr(str1); struct RString *s2 = mrb_str_ptr(str2); len = lesser(RSTR_LEN(s1), RSTR_LEN(s2)); retval = memcmp(RSTR_PTR(s1), RSTR_PTR(s2), len); if (retval == 0) { if (RSTR_LEN(s1) == RSTR_LEN(s2)) return 0; if (RSTR_LEN(s1) > RSTR_LEN(s2)) return 1; return -1; } if (retval > 0) return 1; return -1; } /* 15.2.10.5.3 */ /* * call-seq: * str <=> other_str => -1, 0, +1 * * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than * <i>str</i>. If the strings are of different lengths, and the strings are * equal when compared up to the shortest length, then the longer string is * considered greater than the shorter one. If the variable <code>$=</code> is * <code>false</code>, the comparison is based on comparing the binary values * of each character in the string. In older versions of Ruby, setting * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated * in favor of using <code>String#casecmp</code>. * * <code><=></code> is the basis for the methods <code><</code>, * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>, * included from module <code>Comparable</code>. The method * <code>String#==</code> does not use <code>Comparable#==</code>. * * "abcdef" <=> "abcde" #=> 1 * "abcdef" <=> "abcdef" #=> 0 * "abcdef" <=> "abcdefg" #=> -1 * "abcdef" <=> "ABCDEF" #=> 1 */ static mrb_value mrb_str_cmp_m(mrb_state *mrb, mrb_value str1) { mrb_value str2; mrb_int result; mrb_get_args(mrb, "o", &str2); if (!mrb_string_p(str2)) { if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_s"))) { return mrb_nil_value(); } else if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "<=>"))) { return mrb_nil_value(); } else { mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1); if (mrb_nil_p(tmp)) return mrb_nil_value(); if (!mrb_fixnum_p(tmp)) { return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp); } result = -mrb_fixnum(tmp); } } else { result = mrb_str_cmp(mrb, str1, str2); } return mrb_fixnum_value(result); } static mrb_bool str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2) { const mrb_int len = RSTRING_LEN(str1); if (len != RSTRING_LEN(str2)) return FALSE; if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), (size_t)len) == 0) return TRUE; return FALSE; } MRB_API mrb_bool mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2) { if (mrb_immediate_p(str2)) return FALSE; if (!mrb_string_p(str2)) { if (mrb_nil_p(str2)) return FALSE; if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_str"))) { return FALSE; } str2 = mrb_funcall(mrb, str2, "to_str", 0); return mrb_equal(mrb, str2, str1); } return str_eql(mrb, str1, str2); } /* 15.2.10.5.4 */ /* * call-seq: * str == obj => true or false * * Equality--- * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>. * Otherwise, returns <code>false</code> or <code>true</code> * * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero. */ static mrb_value mrb_str_equal_m(mrb_state *mrb, mrb_value str1) { mrb_value str2; mrb_get_args(mrb, "o", &str2); return mrb_bool_value(mrb_str_equal(mrb, str1, str2)); } /* ---------------------------------- */ MRB_API mrb_value mrb_str_to_str(mrb_state *mrb, mrb_value str) { mrb_value s; if (!mrb_string_p(str)) { s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); if (mrb_nil_p(s)) { s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s"); } return s; } return str; } MRB_API const char* mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr) { mrb_value str = mrb_str_to_str(mrb, ptr); return RSTRING_PTR(str); } MRB_API mrb_int mrb_string_value_len(mrb_state *mrb, mrb_value ptr) { mrb_value str = mrb_str_to_str(mrb, ptr); return RSTRING_LEN(str); } void mrb_noregexp(mrb_state *mrb, mrb_value self) { mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented"); } void mrb_regexp_check(mrb_state *mrb, mrb_value obj) { if (mrb_regexp_p(mrb, obj)) { mrb_noregexp(mrb, obj); } } MRB_API mrb_value mrb_str_dup(mrb_state *mrb, mrb_value str) { struct RString *s = mrb_str_ptr(str); struct RString *dup = str_new(mrb, 0, 0); str_with_class(mrb, dup, str); return str_replace(mrb, dup, s); } static mrb_value mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx) { mrb_int idx; mrb_regexp_check(mrb, indx); switch (mrb_type(indx)) { case MRB_TT_FIXNUM: idx = mrb_fixnum(indx); num_index: str = str_substr(mrb, str, idx, 1); if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value(); return str; case MRB_TT_STRING: if (str_index_str(mrb, str, indx, 0) != -1) return mrb_str_dup(mrb, indx); return mrb_nil_value(); case MRB_TT_RANGE: goto range_arg; default: indx = mrb_Integer(mrb, indx); if (mrb_nil_p(indx)) { range_arg: { mrb_int beg, len; len = RSTRING_CHAR_LEN(str); switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, TRUE)) { case 1: return str_subseq(mrb, str, beg, len); case 2: return mrb_nil_value(); default: break; } } mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum"); } idx = mrb_fixnum(indx); goto num_index; } return mrb_nil_value(); /* not reached */ } /* 15.2.10.5.6 */ /* 15.2.10.5.34 */ /* * call-seq: * str[fixnum] => fixnum or nil * str[fixnum, fixnum] => new_str or nil * str[range] => new_str or nil * str[regexp] => new_str or nil * str[regexp, fixnum] => new_str or nil * str[other_str] => new_str or nil * str.slice(fixnum) => fixnum or nil * str.slice(fixnum, fixnum) => new_str or nil * str.slice(range) => new_str or nil * str.slice(other_str) => new_str or nil * * Element Reference---If passed a single <code>Fixnum</code>, returns the code * of the character at that position. If passed two <code>Fixnum</code> * objects, returns a substring starting at the offset given by the first, and * a length given by the second. If given a range, a substring containing * characters at offsets given by the range is returned. In all three cases, if * an offset is negative, it is counted from the end of <i>str</i>. Returns * <code>nil</code> if the initial offset falls outside the string, the length * is negative, or the beginning of the range is greater than the end. * * If a <code>String</code> is given, that string is returned if it occurs in * <i>str</i>. In both cases, <code>nil</code> is returned if there is no * match. * * a = "hello there" * a[1] #=> 101(1.8.7) "e"(1.9.2) * a[1.1] #=> "e"(1.9.2) * a[1,3] #=> "ell" * a[1..3] #=> "ell" * a[-3,2] #=> "er" * a[-4..-2] #=> "her" * a[12..-1] #=> nil * a[-2..-4] #=> "" * a["lo"] #=> "lo" * a["bye"] #=> nil */ static mrb_value mrb_str_aref_m(mrb_state *mrb, mrb_value str) { mrb_value a1, a2; mrb_int argc; argc = mrb_get_args(mrb, "o|o", &a1, &a2); if (argc == 2) { mrb_int n1, n2; mrb_regexp_check(mrb, a1); mrb_get_args(mrb, "ii", &n1, &n2); return str_substr(mrb, str, n1, n2); } if (argc != 1) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc)); } return mrb_str_aref(mrb, str, a1); } /* 15.2.10.5.8 */ /* * call-seq: * str.capitalize! => str or nil * * Modifies <i>str</i> by converting the first character to uppercase and the * remainder to lowercase. Returns <code>nil</code> if no changes are made. * * a = "hello" * a.capitalize! #=> "Hello" * a #=> "Hello" * a.capitalize! #=> nil */ static mrb_value mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str) { char *p, *pend; mrb_bool modify = FALSE; struct RString *s = mrb_str_ptr(str); mrb_str_modify(mrb, s); if (RSTR_LEN(s) == 0 || !RSTR_PTR(s)) return mrb_nil_value(); p = RSTR_PTR(s); pend = RSTR_PTR(s) + RSTR_LEN(s); if (ISLOWER(*p)) { *p = TOUPPER(*p); modify = TRUE; } while (++p < pend) { if (ISUPPER(*p)) { *p = TOLOWER(*p); modify = TRUE; } } if (modify) return str; return mrb_nil_value(); } /* 15.2.10.5.7 */ /* * call-seq: * str.capitalize => new_str * * Returns a copy of <i>str</i> with the first character converted to uppercase * and the remainder to lowercase. * * "hello".capitalize #=> "Hello" * "HELLO".capitalize #=> "Hello" * "123ABC".capitalize #=> "123abc" */ static mrb_value mrb_str_capitalize(mrb_state *mrb, mrb_value self) { mrb_value str; str = mrb_str_dup(mrb, self); mrb_str_capitalize_bang(mrb, str); return str; } /* 15.2.10.5.10 */ /* * call-seq: * str.chomp!(separator="\n") => str or nil * * Modifies <i>str</i> in place as described for <code>String#chomp</code>, * returning <i>str</i>, or <code>nil</code> if no modifications were made. */ static mrb_value mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) { mrb_value rs; mrb_int newline; char *p, *pp; mrb_int rslen; mrb_int len; mrb_int argc; struct RString *s = mrb_str_ptr(str); argc = mrb_get_args(mrb, "|S", &rs); mrb_str_modify(mrb, s); len = RSTR_LEN(s); if (argc == 0) { if (len == 0) return mrb_nil_value(); smart_chomp: if (RSTR_PTR(s)[len-1] == '\n') { RSTR_SET_LEN(s, RSTR_LEN(s) - 1); if (RSTR_LEN(s) > 0 && RSTR_PTR(s)[RSTR_LEN(s)-1] == '\r') { RSTR_SET_LEN(s, RSTR_LEN(s) - 1); } } else if (RSTR_PTR(s)[len-1] == '\r') { RSTR_SET_LEN(s, RSTR_LEN(s) - 1); } else { return mrb_nil_value(); } RSTR_PTR(s)[RSTR_LEN(s)] = '\0'; return str; } if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value(); p = RSTR_PTR(s); rslen = RSTRING_LEN(rs); if (rslen == 0) { while (len>0 && p[len-1] == '\n') { len--; if (len>0 && p[len-1] == '\r') len--; } if (len < RSTR_LEN(s)) { RSTR_SET_LEN(s, len); p[len] = '\0'; return str; } return mrb_nil_value(); } if (rslen > len) return mrb_nil_value(); newline = RSTRING_PTR(rs)[rslen-1]; if (rslen == 1 && newline == '\n') newline = RSTRING_PTR(rs)[rslen-1]; if (rslen == 1 && newline == '\n') goto smart_chomp; pp = p + len - rslen; if (p[len-1] == newline && (rslen <= 1 || memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) { RSTR_SET_LEN(s, len - rslen); p[RSTR_LEN(s)] = '\0'; return str; } return mrb_nil_value(); } /* 15.2.10.5.9 */ /* * call-seq: * str.chomp(separator="\n") => new_str * * Returns a new <code>String</code> with the given record separator removed * from the end of <i>str</i> (if present). If <code>$/</code> has not been * changed from the default Ruby record separator, then <code>chomp</code> also * removes carriage return characters (that is it will remove <code>\n</code>, * <code>\r</code>, and <code>\r\n</code>). * * "hello".chomp #=> "hello" * "hello\n".chomp #=> "hello" * "hello\r\n".chomp #=> "hello" * "hello\n\r".chomp #=> "hello\n" * "hello\r".chomp #=> "hello" * "hello \n there".chomp #=> "hello \n there" * "hello".chomp("llo") #=> "he" */ static mrb_value mrb_str_chomp(mrb_state *mrb, mrb_value self) { mrb_value str; str = mrb_str_dup(mrb, self); mrb_str_chomp_bang(mrb, str); return str; } /* 15.2.10.5.12 */ /* * call-seq: * str.chop! => str or nil * * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>, * or <code>nil</code> if <i>str</i> is the empty string. See also * <code>String#chomp!</code>. */ static mrb_value mrb_str_chop_bang(mrb_state *mrb, mrb_value str) { struct RString *s = mrb_str_ptr(str); mrb_str_modify(mrb, s); if (RSTR_LEN(s) > 0) { mrb_int len; #ifdef MRB_UTF8_STRING const char* t = RSTR_PTR(s), *p = t; const char* e = p + RSTR_LEN(s); while (p<e) { mrb_int clen = utf8len(p, e); if (p + clen>=e) break; p += clen; } len = p - t; #else len = RSTR_LEN(s) - 1; #endif if (RSTR_PTR(s)[len] == '\n') { if (len > 0 && RSTR_PTR(s)[len-1] == '\r') { len--; } } RSTR_SET_LEN(s, len); RSTR_PTR(s)[len] = '\0'; return str; } return mrb_nil_value(); } /* 15.2.10.5.11 */ /* * call-seq: * str.chop => new_str * * Returns a new <code>String</code> with the last character removed. If the * string ends with <code>\r\n</code>, both characters are removed. Applying * <code>chop</code> to an empty string returns an empty * string. <code>String#chomp</code> is often a safer alternative, as it leaves * the string unchanged if it doesn't end in a record separator. * * "string\r\n".chop #=> "string" * "string\n\r".chop #=> "string\n" * "string\n".chop #=> "string" * "string".chop #=> "strin" * "x".chop #=> "" */ static mrb_value mrb_str_chop(mrb_state *mrb, mrb_value self) { mrb_value str; str = mrb_str_dup(mrb, self); mrb_str_chop_bang(mrb, str); return str; } /* 15.2.10.5.14 */ /* * call-seq: * str.downcase! => str or nil * * Downcases the contents of <i>str</i>, returning <code>nil</code> if no * changes were made. */ static mrb_value mrb_str_downcase_bang(mrb_state *mrb, mrb_value str) { char *p, *pend; mrb_bool modify = FALSE; struct RString *s = mrb_str_ptr(str); mrb_str_modify(mrb, s); p = RSTR_PTR(s); pend = RSTR_PTR(s) + RSTR_LEN(s); while (p < pend) { if (ISUPPER(*p)) { *p = TOLOWER(*p); modify = TRUE; } p++; } if (modify) return str; return mrb_nil_value(); } /* 15.2.10.5.13 */ /* * call-seq: * str.downcase => new_str * * Returns a copy of <i>str</i> with all uppercase letters replaced with their * lowercase counterparts. The operation is locale insensitive---only * characters 'A' to 'Z' are affected. * * "hEllO".downcase #=> "hello" */ static mrb_value mrb_str_downcase(mrb_state *mrb, mrb_value self) { mrb_value str; str = mrb_str_dup(mrb, self); mrb_str_downcase_bang(mrb, str); return str; } /* 15.2.10.5.16 */ /* * call-seq: * str.empty? => true or false * * Returns <code>true</code> if <i>str</i> has a length of zero. * * "hello".empty? #=> false * "".empty? #=> true */ static mrb_value mrb_str_empty_p(mrb_state *mrb, mrb_value self) { struct RString *s = mrb_str_ptr(self); return mrb_bool_value(RSTR_LEN(s) == 0); } /* 15.2.10.5.17 */ /* * call-seq: * str.eql?(other) => true or false * * Two strings are equal if the have the same length and content. */ static mrb_value mrb_str_eql(mrb_state *mrb, mrb_value self) { mrb_value str2; mrb_bool eql_p; mrb_get_args(mrb, "o", &str2); eql_p = (mrb_type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2); return mrb_bool_value(eql_p); } MRB_API mrb_value mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) { return str_substr(mrb, str, beg, len); } uint32_t mrb_str_hash(mrb_state *mrb, mrb_value str) { /* 1-8-7 */ struct RString *s = mrb_str_ptr(str); mrb_int len = RSTR_LEN(s); char *p = RSTR_PTR(s); uint64_t key = 0; while (len--) { key = key*65599 + *p; p++; } return (uint32_t)(key + (key>>5)); } /* 15.2.10.5.20 */ /* * call-seq: * str.hash => fixnum * * Return a hash based on the string's length and content. */ static mrb_value mrb_str_hash_m(mrb_state *mrb, mrb_value self) { mrb_int key = mrb_str_hash(mrb, self); return mrb_fixnum_value(key); } /* 15.2.10.5.21 */ /* * call-seq: * str.include? other_str => true or false * str.include? fixnum => true or false * * Returns <code>true</code> if <i>str</i> contains the given string or * character. * * "hello".include? "lo" #=> true * "hello".include? "ol" #=> false * "hello".include? ?h #=> true */ static mrb_value mrb_str_include(mrb_state *mrb, mrb_value self) { mrb_value str2; mrb_get_args(mrb, "S", &str2); if (str_index_str(mrb, self, str2, 0) < 0) return mrb_bool_value(FALSE); return mrb_bool_value(TRUE); } /* 15.2.10.5.22 */ /* * call-seq: * str.index(substring [, offset]) => fixnum or nil * str.index(fixnum [, offset]) => fixnum or nil * str.index(regexp [, offset]) => fixnum or nil * * Returns the index of the first occurrence of the given * <i>substring</i>, * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. * Returns * <code>nil</code> if not found. * If the second parameter is present, it * specifies the position in the string to begin the search. * * "hello".index('e') #=> 1 * "hello".index('lo') #=> 3 * "hello".index('a') #=> nil * "hello".index(101) #=> 1(101=0x65='e') * "hello".index(/[aeiou]/, -3) #=> 4 */ static mrb_value mrb_str_index_m(mrb_state *mrb, mrb_value str) { mrb_value *argv; mrb_int argc; mrb_value sub; mrb_int pos, clen; mrb_get_args(mrb, "*!", &argv, &argc); if (argc == 2) { mrb_get_args(mrb, "oi", &sub, &pos); } else { pos = 0; if (argc > 0) sub = argv[0]; else sub = mrb_nil_value(); } mrb_regexp_check(mrb, sub); clen = RSTRING_CHAR_LEN(str); if (pos < 0) { pos += clen; if (pos < 0) { return mrb_nil_value(); } } if (pos > clen) return mrb_nil_value(); pos = chars2bytes(str, 0, pos); switch (mrb_type(sub)) { default: { mrb_value tmp; tmp = mrb_check_string_type(mrb, sub); if (mrb_nil_p(tmp)) { mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub); } sub = tmp; } /* fall through */ case MRB_TT_STRING: pos = str_index_str(mrb, str, sub, pos); break; } if (pos == -1) return mrb_nil_value(); pos = bytes2chars(RSTRING_PTR(str), pos); BYTES_ALIGN_CHECK(pos); return mrb_fixnum_value(pos); } #define STR_REPLACE_SHARED_MIN 10 /* 15.2.10.5.24 */ /* 15.2.10.5.28 */ /* * call-seq: * str.replace(other_str) => str * * s = "hello" #=> "hello" * s.replace "world" #=> "world" */ static mrb_value mrb_str_replace(mrb_state *mrb, mrb_value str) { mrb_value str2; mrb_get_args(mrb, "S", &str2); return str_replace(mrb, mrb_str_ptr(str), mrb_str_ptr(str2)); } /* 15.2.10.5.23 */ /* * call-seq: * String.new(str="") => new_str * * Returns a new string object containing a copy of <i>str</i>. */ static mrb_value mrb_str_init(mrb_state *mrb, mrb_value self) { mrb_value str2; if (mrb_get_args(mrb, "|S", &str2) == 0) { struct RString *s = str_new(mrb, 0, 0); str2 = mrb_obj_value(s); } str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2)); return self; } /* 15.2.10.5.25 */ /* 15.2.10.5.41 */ /* * call-seq: * str.intern => symbol * str.to_sym => symbol * * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the * symbol if it did not previously exist. See <code>Symbol#id2name</code>. * * "Koala".intern #=> :Koala * s = 'cat'.to_sym #=> :cat * s == :cat #=> true * s = '@cat'.to_sym #=> :@cat * s == :@cat #=> true * * This can also be used to create symbols that cannot be represented using the * <code>:xxx</code> notation. * * 'cat and dog'.to_sym #=> :"cat and dog" */ MRB_API mrb_value mrb_str_intern(mrb_state *mrb, mrb_value self) { return mrb_symbol_value(mrb_intern_str(mrb, self)); } /* ---------------------------------- */ MRB_API mrb_value mrb_obj_as_string(mrb_state *mrb, mrb_value obj) { mrb_value str; if (mrb_string_p(obj)) { return obj; } str = mrb_funcall(mrb, obj, "to_s", 0); if (!mrb_string_p(str)) return mrb_any_to_s(mrb, obj); return str; } MRB_API mrb_value mrb_ptr_to_str(mrb_state *mrb, void *p) { struct RString *p_str; char *p1; char *p2; uintptr_t n = (uintptr_t)p; p_str = str_new(mrb, NULL, 2 + sizeof(uintptr_t) * CHAR_BIT / 4); p1 = RSTR_PTR(p_str); *p1++ = '0'; *p1++ = 'x'; p2 = p1; do { *p2++ = mrb_digitmap[n % 16]; n /= 16; } while (n > 0); *p2 = '\0'; RSTR_SET_LEN(p_str, (mrb_int)(p2 - RSTR_PTR(p_str))); while (p1 < p2) { const char c = *p1; *p1++ = *--p2; *p2 = c; } return mrb_obj_value(p_str); } MRB_API mrb_value mrb_string_type(mrb_state *mrb, mrb_value str) { return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); } MRB_API mrb_value mrb_check_string_type(mrb_state *mrb, mrb_value str) { return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); } /* 15.2.10.5.30 */ /* * call-seq: * str.reverse! => str * * Reverses <i>str</i> in place. */ static mrb_value mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) { #ifdef MRB_UTF8_STRING mrb_int utf8_len = RSTRING_CHAR_LEN(str); mrb_int len = RSTRING_LEN(str); if (utf8_len == len) goto bytes; if (utf8_len > 1) { char *buf; char *p, *e, *r; mrb_str_modify(mrb, mrb_str_ptr(str)); len = RSTRING_LEN(str); buf = (char*)mrb_malloc(mrb, (size_t)len); p = buf; e = buf + len; memcpy(buf, RSTRING_PTR(str), len); r = RSTRING_PTR(str) + len; while (p<e) { mrb_int clen = utf8len(p, e); r -= clen; memcpy(r, p, clen); p += clen; } mrb_free(mrb, buf); } return str; bytes: #endif { struct RString *s = mrb_str_ptr(str); char *p, *e; char c; mrb_str_modify(mrb, s); if (RSTR_LEN(s) > 1) { p = RSTR_PTR(s); e = p + RSTR_LEN(s) - 1; while (p < e) { c = *p; *p++ = *e; *e-- = c; } } return str; } } /* ---------------------------------- */ /* 15.2.10.5.29 */ /* * call-seq: * str.reverse => new_str * * Returns a new string with the characters from <i>str</i> in reverse order. * * "stressed".reverse #=> "desserts" */ static mrb_value mrb_str_reverse(mrb_state *mrb, mrb_value str) { mrb_value str2 = mrb_str_dup(mrb, str); mrb_str_reverse_bang(mrb, str2); return str2; } /* 15.2.10.5.31 */ /* * call-seq: * str.rindex(substring [, fixnum]) => fixnum or nil * str.rindex(fixnum [, fixnum]) => fixnum or nil * str.rindex(regexp [, fixnum]) => fixnum or nil * * Returns the index of the last occurrence of the given <i>substring</i>, * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns * <code>nil</code> if not found. If the second parameter is present, it * specifies the position in the string to end the search---characters beyond * this point will not be considered. * * "hello".rindex('e') #=> 1 * "hello".rindex('l') #=> 3 * "hello".rindex('a') #=> nil * "hello".rindex(101) #=> 1 * "hello".rindex(/[aeiou]/, -2) #=> 1 */ static mrb_value mrb_str_rindex(mrb_state *mrb, mrb_value str) { mrb_value *argv; mrb_int argc; mrb_value sub; mrb_int pos, len = RSTRING_CHAR_LEN(str); mrb_get_args(mrb, "*!", &argv, &argc); if (argc == 2) { mrb_get_args(mrb, "oi", &sub, &pos); if (pos < 0) { pos += len; if (pos < 0) { mrb_regexp_check(mrb, sub); return mrb_nil_value(); } } if (pos > len) pos = len; } else { pos = len; if (argc > 0) sub = argv[0]; else sub = mrb_nil_value(); } pos = chars2bytes(str, 0, pos); mrb_regexp_check(mrb, sub); switch (mrb_type(sub)) { default: { mrb_value tmp; tmp = mrb_check_string_type(mrb, sub); if (mrb_nil_p(tmp)) { mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub); } sub = tmp; } /* fall through */ case MRB_TT_STRING: pos = str_rindex(mrb, str, sub, pos); if (pos >= 0) { pos = bytes2chars(RSTRING_PTR(str), pos); BYTES_ALIGN_CHECK(pos); return mrb_fixnum_value(pos); } break; } /* end of switch (TYPE(sub)) */ return mrb_nil_value(); } /* 15.2.10.5.35 */ /* * call-seq: * str.split(pattern="\n", [limit]) => anArray * * Divides <i>str</i> into substrings based on a delimiter, returning an array * of these substrings. * * If <i>pattern</i> is a <code>String</code>, then its contents are used as * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single * space, <i>str</i> is split on whitespace, with leading whitespace and runs * of contiguous whitespace characters ignored. * * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the * pattern matches. Whenever the pattern matches a zero-length string, * <i>str</i> is split into individual characters. * * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is * split on whitespace as if ' ' were specified. * * If the <i>limit</i> parameter is omitted, trailing null fields are * suppressed. If <i>limit</i> is a positive number, at most that number of * fields will be returned (if <i>limit</i> is <code>1</code>, the entire * string is returned as the only entry in an array). If negative, there is no * limit to the number of fields returned, and trailing null fields are not * suppressed. * * " now's the time".split #=> ["now's", "the", "time"] * " now's the time".split(' ') #=> ["now's", "the", "time"] * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"] * "hello".split(//) #=> ["h", "e", "l", "l", "o"] * "hello".split(//, 3) #=> ["h", "e", "llo"] * * "mellow yellow".split("ello") #=> ["m", "w y", "w"] * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"] * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"] * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""] */ static mrb_value mrb_str_split_m(mrb_state *mrb, mrb_value str) { mrb_int argc; mrb_value spat = mrb_nil_value(); enum {awk, string, regexp} split_type = string; mrb_int i = 0; mrb_int beg; mrb_int end; mrb_int lim = 0; mrb_bool lim_p; mrb_value result, tmp; argc = mrb_get_args(mrb, "|oi", &spat, &lim); lim_p = (lim > 0 && argc == 2); if (argc == 2) { if (lim == 1) { if (RSTRING_LEN(str) == 0) return mrb_ary_new_capa(mrb, 0); return mrb_ary_new_from_values(mrb, 1, &str); } i = 1; } if (argc == 0 || mrb_nil_p(spat)) { split_type = awk; } else { if (mrb_string_p(spat)) { split_type = string; if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') { split_type = awk; } } else { mrb_noregexp(mrb, str); } } result = mrb_ary_new(mrb); beg = 0; if (split_type == awk) { mrb_bool skip = TRUE; mrb_int idx = 0; mrb_int str_len = RSTRING_LEN(str); unsigned int c; int ai = mrb_gc_arena_save(mrb); idx = end = beg; while (idx < str_len) { c = (unsigned char)RSTRING_PTR(str)[idx++]; if (skip) { if (ISSPACE(c)) { beg = idx; } else { end = idx; skip = FALSE; if (lim_p && lim <= i) break; } } else if (ISSPACE(c)) { mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg)); mrb_gc_arena_restore(mrb, ai); skip = TRUE; beg = idx; if (lim_p) ++i; } else { end = idx; } } } else if (split_type == string) { mrb_int str_len = RSTRING_LEN(str); mrb_int pat_len = RSTRING_LEN(spat); mrb_int idx = 0; int ai = mrb_gc_arena_save(mrb); while (idx < str_len) { if (pat_len > 0) { end = mrb_memsearch(RSTRING_PTR(spat), pat_len, RSTRING_PTR(str)+idx, str_len - idx); if (end < 0) break; } else { end = chars2bytes(str, idx, 1); } mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end)); mrb_gc_arena_restore(mrb, ai); idx += end + pat_len; if (lim_p && lim <= ++i) break; } beg = idx; } else { mrb_noregexp(mrb, str); } if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) { if (RSTRING_LEN(str) == beg) { tmp = mrb_str_new_empty(mrb, str); } else { tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); } mrb_ary_push(mrb, result, tmp); } if (!lim_p && lim == 0) { mrb_int len; while ((len = RARRAY_LEN(result)) > 0 && (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0)) mrb_ary_pop(mrb, result); } return result; } MRB_API mrb_value mrb_str_len_to_inum(mrb_state *mrb, const char *str, mrb_int len, mrb_int base, int badcheck) { const char *p = str; const char *pend = str + len; char sign = 1; int c; uint64_t n = 0; mrb_int val; #define conv_digit(c) \ (ISDIGIT(c) ? ((c) - '0') : \ ISLOWER(c) ? ((c) - 'a' + 10) : \ ISUPPER(c) ? ((c) - 'A' + 10) : \ -1) if (!p) { if (badcheck) goto bad; return mrb_fixnum_value(0); } while (p<pend && ISSPACE(*p)) p++; if (p[0] == '+') { p++; } else if (p[0] == '-') { p++; sign = 0; } if (base <= 0) { if (p[0] == '0') { switch (p[1]) { case 'x': case 'X': base = 16; break; case 'b': case 'B': base = 2; break; case 'o': case 'O': base = 8; break; case 'd': case 'D': base = 10; break; default: base = 8; break; } } else if (base < -1) { base = -base; } else { base = 10; } } switch (base) { case 2: if (p[0] == '0' && (p[1] == 'b'||p[1] == 'B')) { p += 2; } break; case 3: break; case 8: if (p[0] == '0' && (p[1] == 'o'||p[1] == 'O')) { p += 2; } case 4: case 5: case 6: case 7: break; case 10: if (p[0] == '0' && (p[1] == 'd'||p[1] == 'D')) { p += 2; } case 9: case 11: case 12: case 13: case 14: case 15: break; case 16: if (p[0] == '0' && (p[1] == 'x'||p[1] == 'X')) { p += 2; } break; default: if (base < 2 || 36 < base) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base)); } break; } /* end of switch (base) { */ if (p>=pend) { if (badcheck) goto bad; return mrb_fixnum_value(0); } if (*p == '0') { /* squeeze preceding 0s */ p++; while (p<pend) { c = *p++; if (c == '_') { if (p<pend && *p == '_') { if (badcheck) goto bad; break; } continue; } if (c != '0') { p--; break; } } if (*(p - 1) == '0') p--; } if (p == pend) { if (badcheck) goto bad; return mrb_fixnum_value(0); } for ( ;p<pend;p++) { if (*p == '_') { p++; if (p==pend) { if (badcheck) goto bad; continue; } if (*p == '_') { if (badcheck) goto bad; break; } } if (badcheck && *p == '\0') { goto nullbyte; } c = conv_digit(*p); if (c < 0 || c >= base) { break; } n *= base; n += c; if (n > (uint64_t)MRB_INT_MAX + (sign ? 0 : 1)) { if (base == 10) { return mrb_float_value(mrb, mrb_str_to_dbl(mrb, mrb_str_new(mrb, str, len), badcheck)); } else { mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer", mrb_str_new(mrb, str, pend-str)); } } } val = (mrb_int)n; if (badcheck) { if (p == str) goto bad; /* no number */ while (p<pend && ISSPACE(*p)) p++; if (p<pend) goto bad; /* trailing garbage */ } return mrb_fixnum_value(sign ? val : -val); nullbyte: mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte"); /* not reached */ bad: mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)", mrb_inspect(mrb, mrb_str_new(mrb, str, pend-str))); /* not reached */ return mrb_fixnum_value(0); } MRB_API mrb_value mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck) { return mrb_str_len_to_inum(mrb, str, strlen(str), base, badcheck); } MRB_API const char* mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr) { mrb_value str = mrb_str_to_str(mrb, *ptr); struct RString *ps = mrb_str_ptr(str); mrb_int len = mrb_str_strlen(mrb, ps); char *p = RSTR_PTR(ps); if (!p || p[len] != '\0') { if (MRB_FROZEN_P(ps)) { *ptr = str = mrb_str_dup(mrb, str); ps = mrb_str_ptr(str); } mrb_str_modify(mrb, ps); return RSTR_PTR(ps); } return p; } MRB_API mrb_value mrb_str_to_inum(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck) { const char *s; mrb_int len; s = mrb_string_value_ptr(mrb, str); len = RSTRING_LEN(str); return mrb_str_len_to_inum(mrb, s, len, base, badcheck); } /* 15.2.10.5.38 */ /* * call-seq: * str.to_i(base=10) => integer * * Returns the result of interpreting leading characters in <i>str</i> as an * integer base <i>base</i> (between 2 and 36). Extraneous characters past the * end of a valid number are ignored. If there is not a valid number at the * start of <i>str</i>, <code>0</code> is returned. This method never raises an * exception. * * "12345".to_i #=> 12345 * "99 red balloons".to_i #=> 99 * "0a".to_i #=> 0 * "0a".to_i(16) #=> 10 * "hello".to_i #=> 0 * "1100101".to_i(2) #=> 101 * "1100101".to_i(8) #=> 294977 * "1100101".to_i(10) #=> 1100101 * "1100101".to_i(16) #=> 17826049 */ static mrb_value mrb_str_to_i(mrb_state *mrb, mrb_value self) { mrb_int base = 10; mrb_get_args(mrb, "|i", &base); if (base < 0) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base)); } return mrb_str_to_inum(mrb, self, base, FALSE); } #ifndef MRB_WITHOUT_FLOAT MRB_API double mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck) { char *end; char buf[DBL_DIG * 4 + 10]; double d; enum {max_width = 20}; if (!p) return 0.0; while (ISSPACE(*p)) p++; if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { return 0.0; } d = mrb_float_read(p, &end); if (p == end) { if (badcheck) { bad: mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%S)", mrb_str_new_cstr(mrb, p)); /* not reached */ } return d; } if (*end) { char *n = buf; char *e = buf + sizeof(buf) - 1; char prev = 0; while (p < end && n < e) prev = *n++ = *p++; while (*p) { if (*p == '_') { /* remove underscores between digits */ if (badcheck) { if (n == buf || !ISDIGIT(prev)) goto bad; ++p; if (!ISDIGIT(*p)) goto bad; } else { while (*++p == '_'); continue; } } prev = *p++; if (n < e) *n++ = prev; } *n = '\0'; p = buf; if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { return 0.0; } d = mrb_float_read(p, &end); if (badcheck) { if (!end || p == end) goto bad; while (*end && ISSPACE(*end)) end++; if (*end) goto bad; } } return d; } MRB_API double mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck) { char *s; mrb_int len; str = mrb_str_to_str(mrb, str); s = RSTRING_PTR(str); len = RSTRING_LEN(str); if (s) { if (badcheck && memchr(s, '\0', len)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte"); } if (s[len]) { /* no sentinel somehow */ struct RString *temp_str = str_new(mrb, s, len); s = RSTR_PTR(temp_str); } } return mrb_cstr_to_dbl(mrb, s, badcheck); } /* 15.2.10.5.39 */ /* * call-seq: * str.to_f => float * * Returns the result of interpreting leading characters in <i>str</i> as a * floating point number. Extraneous characters past the end of a valid number * are ignored. If there is not a valid number at the start of <i>str</i>, * <code>0.0</code> is returned. This method never raises an exception. * * "123.45e1".to_f #=> 1234.5 * "45.67 degrees".to_f #=> 45.67 * "thx1138".to_f #=> 0.0 */ static mrb_value mrb_str_to_f(mrb_state *mrb, mrb_value self) { return mrb_float_value(mrb, mrb_str_to_dbl(mrb, self, FALSE)); } #endif /* 15.2.10.5.40 */ /* * call-seq: * str.to_s => str * str.to_str => str * * Returns the receiver. */ static mrb_value mrb_str_to_s(mrb_state *mrb, mrb_value self) { if (mrb_obj_class(mrb, self) != mrb->string_class) { return mrb_str_dup(mrb, self); } return self; } /* 15.2.10.5.43 */ /* * call-seq: * str.upcase! => str or nil * * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes * were made. */ static mrb_value mrb_str_upcase_bang(mrb_state *mrb, mrb_value str) { struct RString *s = mrb_str_ptr(str); char *p, *pend; mrb_bool modify = FALSE; mrb_str_modify(mrb, s); p = RSTRING_PTR(str); pend = RSTRING_END(str); while (p < pend) { if (ISLOWER(*p)) { *p = TOUPPER(*p); modify = TRUE; } p++; } if (modify) return str; return mrb_nil_value(); } /* 15.2.10.5.42 */ /* * call-seq: * str.upcase => new_str * * Returns a copy of <i>str</i> with all lowercase letters replaced with their * uppercase counterparts. The operation is locale insensitive---only * characters 'a' to 'z' are affected. * * "hEllO".upcase #=> "HELLO" */ static mrb_value mrb_str_upcase(mrb_state *mrb, mrb_value self) { mrb_value str; str = mrb_str_dup(mrb, self); mrb_str_upcase_bang(mrb, str); return str; } #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{')) /* * call-seq: * str.dump -> new_str * * Produces a version of <i>str</i> with all nonprinting characters replaced by * <code>\nnn</code> notation and all special characters escaped. */ mrb_value mrb_str_dump(mrb_state *mrb, mrb_value str) { mrb_int len; const char *p, *pend; char *q; struct RString *result; len = 2; /* "" */ p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); while (p < pend) { unsigned char c = *p++; switch (c) { case '"': case '\\': case '\n': case '\r': case '\t': case '\f': case '\013': case '\010': case '\007': case '\033': len += 2; break; case '#': len += IS_EVSTR(p, pend) ? 2 : 1; break; default: if (ISPRINT(c)) { len++; } else { len += 4; /* \NNN */ } break; } } result = str_new(mrb, 0, len); str_with_class(mrb, result, str); p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); q = RSTR_PTR(result); *q++ = '"'; while (p < pend) { unsigned char c = *p++; switch (c) { case '"': case '\\': *q++ = '\\'; *q++ = c; break; case '\n': *q++ = '\\'; *q++ = 'n'; break; case '\r': *q++ = '\\'; *q++ = 'r'; break; case '\t': *q++ = '\\'; *q++ = 't'; break; case '\f': *q++ = '\\'; *q++ = 'f'; break; case '\013': *q++ = '\\'; *q++ = 'v'; break; case '\010': *q++ = '\\'; *q++ = 'b'; break; case '\007': *q++ = '\\'; *q++ = 'a'; break; case '\033': *q++ = '\\'; *q++ = 'e'; break; case '#': if (IS_EVSTR(p, pend)) *q++ = '\\'; *q++ = '#'; break; default: if (ISPRINT(c)) { *q++ = c; } else { *q++ = '\\'; *q++ = 'x'; q[1] = mrb_digitmap[c % 16]; c /= 16; q[0] = mrb_digitmap[c % 16]; q += 2; } } } *q = '"'; return mrb_obj_value(result); } MRB_API mrb_value mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len) { struct RString *s = mrb_str_ptr(str); size_t capa; size_t total; ptrdiff_t off = -1; if (len == 0) return str; mrb_str_modify(mrb, s); if (ptr >= RSTR_PTR(s) && ptr <= RSTR_PTR(s) + (size_t)RSTR_LEN(s)) { off = ptr - RSTR_PTR(s); } capa = RSTR_CAPA(s); total = RSTR_LEN(s)+len; if (total >= MRB_INT_MAX) { size_error: mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); } if (capa <= total) { if (capa == 0) capa = 1; while (capa <= total) { if (capa <= MRB_INT_MAX / 2) { capa *= 2; } else { capa = total+1; } } if (capa <= total || capa > MRB_INT_MAX) { goto size_error; } resize_capa(mrb, s, capa); } if (off != -1) { ptr = RSTR_PTR(s) + off; } memcpy(RSTR_PTR(s) + RSTR_LEN(s), ptr, len); mrb_assert_int_fit(size_t, total, mrb_int, MRB_INT_MAX); RSTR_SET_LEN(s, total); RSTR_PTR(s)[total] = '\0'; /* sentinel */ return str; } MRB_API mrb_value mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr) { return mrb_str_cat(mrb, str, ptr, strlen(ptr)); } MRB_API mrb_value mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2) { return mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2)); } MRB_API mrb_value mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2) { str2 = mrb_str_to_str(mrb, str2); return mrb_str_cat_str(mrb, str1, str2); } #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */ /* * call-seq: * str.inspect -> string * * Returns a printable version of _str_, surrounded by quote marks, * with special characters escaped. * * str = "hello" * str[3] = "\b" * str.inspect #=> "\"hel\\bo\"" */ mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str) { const char *p, *pend; char buf[CHAR_ESC_LEN + 1]; mrb_value result = mrb_str_new_lit(mrb, "\""); p = RSTRING_PTR(str); pend = RSTRING_END(str); for (;p < pend; p++) { unsigned char c, cc; #ifdef MRB_UTF8_STRING mrb_int clen; clen = utf8len(p, pend); if (clen > 1) { mrb_int i; for (i=0; i<clen; i++) { buf[i] = p[i]; } mrb_str_cat(mrb, result, buf, clen); p += clen-1; continue; } #endif c = *p; if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p+1, pend))) { buf[0] = '\\'; buf[1] = c; mrb_str_cat(mrb, result, buf, 2); continue; } if (ISPRINT(c)) { buf[0] = c; mrb_str_cat(mrb, result, buf, 1); continue; } switch (c) { case '\n': cc = 'n'; break; case '\r': cc = 'r'; break; case '\t': cc = 't'; break; case '\f': cc = 'f'; break; case '\013': cc = 'v'; break; case '\010': cc = 'b'; break; case '\007': cc = 'a'; break; case 033: cc = 'e'; break; default: cc = 0; break; } if (cc) { buf[0] = '\\'; buf[1] = (char)cc; mrb_str_cat(mrb, result, buf, 2); continue; } else { buf[0] = '\\'; buf[1] = 'x'; buf[3] = mrb_digitmap[c % 16]; c /= 16; buf[2] = mrb_digitmap[c % 16]; mrb_str_cat(mrb, result, buf, 4); continue; } } mrb_str_cat_lit(mrb, result, "\""); return result; } /* * call-seq: * str.bytes -> array of fixnums * * Returns an array of bytes in _str_. * * str = "hello" * str.bytes #=> [104, 101, 108, 108, 111] */ static mrb_value mrb_str_bytes(mrb_state *mrb, mrb_value str) { struct RString *s = mrb_str_ptr(str); mrb_value a = mrb_ary_new_capa(mrb, RSTR_LEN(s)); unsigned char *p = (unsigned char *)(RSTR_PTR(s)), *pend = p + RSTR_LEN(s); while (p < pend) { mrb_ary_push(mrb, a, mrb_fixnum_value(p[0])); p++; } return a; } /* ---------------------------*/ void mrb_init_string(mrb_state *mrb) { struct RClass *s; mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << 5), "pointer size too big for embedded string"); mrb->string_class = s = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */ MRB_SET_INSTANCE_TT(s, MRB_TT_STRING); mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */ mrb_define_method(mrb, s, "==", mrb_str_equal_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */ mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */ mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */ mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */ mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */ mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */ mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */ mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */ mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_NONE()); /* 15.2.10.5.11 */ mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_NONE()); /* 15.2.10.5.12 */ mrb_define_method(mrb, s, "downcase", mrb_str_downcase, MRB_ARGS_NONE()); /* 15.2.10.5.13 */ mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.14 */ mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */ mrb_define_method(mrb, s, "eql?", mrb_str_eql, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */ mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_NONE()); /* 15.2.10.5.20 */ mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */ mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ANY()); /* 15.2.10.5.22 */ mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */ mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */ mrb_define_method(mrb, s, "intern", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.25 */ mrb_define_method(mrb, s, "length", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.26 */ mrb_define_method(mrb, s, "replace", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */ mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); /* 15.2.10.5.29 */ mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); /* 15.2.10.5.30 */ mrb_define_method(mrb, s, "rindex", mrb_str_rindex, MRB_ARGS_ANY()); /* 15.2.10.5.31 */ mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.33 */ mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.34 */ mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */ #ifndef MRB_WITHOUT_FLOAT mrb_define_method(mrb, s, "to_f", mrb_str_to_f, MRB_ARGS_NONE()); /* 15.2.10.5.38 */ #endif mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */ mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */ mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */ mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */ mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */ mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */ mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE()); } #ifndef MRB_WITHOUT_FLOAT /* * Source code for the "strtod" library procedure. * * Copyright (c) 1988-1993 The Regents of the University of California. * Copyright (c) 1994 Sun Microsystems, Inc. * * Permission to use, copy, modify, and distribute this * software and its documentation for any purpose and without * fee is hereby granted, provided that the above copyright * notice appear in all copies. The University of California * makes no representations about the suitability of this * software for any purpose. It is provided "as is" without * express or implied warranty. * * RCS: @(#) $Id: strtod.c 11708 2007-02-12 23:01:19Z shyouhei $ */ #include <ctype.h> #include <errno.h> static const int maxExponent = 511; /* Largest possible base 10 exponent. Any * exponent larger than this will already * produce underflow or overflow, so there's * no need to worry about additional digits. */ static const double powersOf10[] = {/* Table giving binary powers of 10. Entry */ 10., /* is 10^2^i. Used to convert decimal */ 100., /* exponents into floating-point numbers. */ 1.0e4, 1.0e8, 1.0e16, 1.0e32, 1.0e64, 1.0e128, 1.0e256 }; MRB_API double mrb_float_read(const char *string, char **endPtr) /* const char *string; A decimal ASCII floating-point number, * optionally preceded by white space. * Must have form "-I.FE-X", where I is the * integer part of the mantissa, F is the * fractional part of the mantissa, and X * is the exponent. Either of the signs * may be "+", "-", or omitted. Either I * or F may be omitted, or both. The decimal * point isn't necessary unless F is present. * The "E" may actually be an "e". E and X * may both be omitted (but not just one). */ /* char **endPtr; If non-NULL, store terminating character's * address here. */ { int sign, expSign = FALSE; double fraction, dblExp; const double *d; const char *p; int c; int exp = 0; /* Exponent read from "EX" field. */ int fracExp = 0; /* Exponent that derives from the fractional * part. Under normal circumstatnces, it is * the negative of the number of digits in F. * However, if I is very long, the last digits * of I get dropped (otherwise a long I with a * large negative exponent could cause an * unnecessary overflow on I alone). In this * case, fracExp is incremented one for each * dropped digit. */ int mantSize; /* Number of digits in mantissa. */ int decPt; /* Number of mantissa digits BEFORE decimal * point. */ const char *pExp; /* Temporarily holds location of exponent * in string. */ /* * Strip off leading blanks and check for a sign. */ p = string; while (isspace(*p)) { p += 1; } if (*p == '-') { sign = TRUE; p += 1; } else { if (*p == '+') { p += 1; } sign = FALSE; } /* * Count the number of digits in the mantissa (including the decimal * point), and also locate the decimal point. */ decPt = -1; for (mantSize = 0; ; mantSize += 1) { c = *p; if (!isdigit(c)) { if ((c != '.') || (decPt >= 0)) { break; } decPt = mantSize; } p += 1; } /* * Now suck up the digits in the mantissa. Use two integers to * collect 9 digits each (this is faster than using floating-point). * If the mantissa has more than 18 digits, ignore the extras, since * they can't affect the value anyway. */ pExp = p; p -= mantSize; if (decPt < 0) { decPt = mantSize; } else { mantSize -= 1; /* One of the digits was the point. */ } if (mantSize > 18) { if (decPt - 18 > 29999) { fracExp = 29999; } else { fracExp = decPt - 18; } mantSize = 18; } else { fracExp = decPt - mantSize; } if (mantSize == 0) { fraction = 0.0; p = string; goto done; } else { int frac1, frac2; frac1 = 0; for ( ; mantSize > 9; mantSize -= 1) { c = *p; p += 1; if (c == '.') { c = *p; p += 1; } frac1 = 10*frac1 + (c - '0'); } frac2 = 0; for (; mantSize > 0; mantSize -= 1) { c = *p; p += 1; if (c == '.') { c = *p; p += 1; } frac2 = 10*frac2 + (c - '0'); } fraction = (1.0e9 * frac1) + frac2; } /* * Skim off the exponent. */ p = pExp; if ((*p == 'E') || (*p == 'e')) { p += 1; if (*p == '-') { expSign = TRUE; p += 1; } else { if (*p == '+') { p += 1; } expSign = FALSE; } while (isdigit(*p)) { exp = exp * 10 + (*p - '0'); if (exp > 19999) { exp = 19999; } p += 1; } } if (expSign) { exp = fracExp - exp; } else { exp = fracExp + exp; } /* * Generate a floating-point number that represents the exponent. * Do this by processing the exponent one bit at a time to combine * many powers of 2 of 10. Then combine the exponent with the * fraction. */ if (exp < 0) { expSign = TRUE; exp = -exp; } else { expSign = FALSE; } if (exp > maxExponent) { exp = maxExponent; errno = ERANGE; } dblExp = 1.0; for (d = powersOf10; exp != 0; exp >>= 1, d += 1) { if (exp & 01) { dblExp *= *d; } } if (expSign) { fraction /= dblExp; } else { fraction *= dblExp; } done: if (endPtr != NULL) { *endPtr = (char *) p; } if (sign) { return -fraction; } return fraction; } #endif