Ruby  2.1.10p492(2016-04-01revision54464)
string.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  string.c -
4 
5  $Author: usa $
6  created at: Mon Aug 9 17:12:58 JST 1993
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9  Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
10  Copyright (C) 2000 Information-technology Promotion Agency, Japan
11 
12 **********************************************************************/
13 
14 #include "ruby/ruby.h"
15 #include "ruby/re.h"
16 #include "ruby/encoding.h"
17 #include "vm_core.h"
18 #include "internal.h"
19 #include "probes.h"
20 #include <assert.h>
21 
22 #define BEG(no) (regs->beg[(no)])
23 #define END(no) (regs->end[(no)])
24 
25 #include <math.h>
26 #include <ctype.h>
27 
28 #ifdef HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 
32 #define STRING_ENUMERATORS_WANTARRAY 0 /* next major */
33 
34 #undef rb_str_new_cstr
35 #undef rb_tainted_str_new_cstr
36 #undef rb_usascii_str_new_cstr
37 #undef rb_enc_str_new_cstr
38 #undef rb_external_str_new_cstr
39 #undef rb_locale_str_new_cstr
40 #undef rb_str_dup_frozen
41 #undef rb_str_buf_new_cstr
42 #undef rb_str_buf_cat2
43 #undef rb_str_cat2
44 
45 static VALUE rb_str_clear(VALUE str);
46 
49 
50 #define RUBY_MAX_CHAR_LEN 16
51 #define STR_TMPLOCK FL_USER7
52 #define STR_UNSET_NOCAPA(s) do {\
53  if (FL_TEST((s),STR_NOEMBED)) FL_UNSET((s),(ELTS_SHARED|STR_ASSOC));\
54 } while (0)
55 
56 #define STR_SET_NOEMBED(str) do {\
57  FL_SET((str), STR_NOEMBED);\
58  STR_SET_EMBED_LEN((str), 0);\
59 } while (0)
60 #define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED)
61 #define STR_SET_EMBED_LEN(str, n) do { \
62  long tmp_n = (n);\
63  RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
64  RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
65 } while (0)
66 
67 #define STR_SET_LEN(str, n) do { \
68  if (STR_EMBED_P(str)) {\
69  STR_SET_EMBED_LEN((str), (n));\
70  }\
71  else {\
72  RSTRING(str)->as.heap.len = (n);\
73  }\
74 } while (0)
75 
76 #define STR_DEC_LEN(str) do {\
77  if (STR_EMBED_P(str)) {\
78  long n = RSTRING_LEN(str);\
79  n--;\
80  STR_SET_EMBED_LEN((str), n);\
81  }\
82  else {\
83  RSTRING(str)->as.heap.len--;\
84  }\
85 } while (0)
86 
87 #define TERM_LEN(str) rb_enc_mbminlen(rb_enc_get(str))
88 #define TERM_FILL(ptr, termlen) do {\
89  char *const term_fill_ptr = (ptr);\
90  const int term_fill_len = (termlen);\
91  *term_fill_ptr = '\0';\
92  if (UNLIKELY(term_fill_len > 1))\
93  memset(term_fill_ptr, 0, term_fill_len);\
94 } while (0)
95 
96 #define RESIZE_CAPA(str,capacity) do {\
97  const int termlen = TERM_LEN(str);\
98  if (STR_EMBED_P(str)) {\
99  if ((capacity) > RSTRING_EMBED_LEN_MAX) {\
100  char *const tmp = ALLOC_N(char, (capacity)+termlen);\
101  const long tlen = RSTRING_LEN(str);\
102  memcpy(tmp, RSTRING_PTR(str), tlen);\
103  RSTRING(str)->as.heap.ptr = tmp;\
104  RSTRING(str)->as.heap.len = tlen;\
105  STR_SET_NOEMBED(str);\
106  RSTRING(str)->as.heap.aux.capa = (capacity);\
107  }\
108  }\
109  else {\
110  REALLOC_N(RSTRING(str)->as.heap.ptr, char, (capacity)+termlen);\
111  if (!STR_NOCAPA_P(str))\
112  RSTRING(str)->as.heap.aux.capa = (capacity);\
113  }\
114 } while (0)
115 
116 #define STR_SET_SHARED(str, shared_str) do { \
117  RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
118  FL_SET((str), ELTS_SHARED); \
119 } while (0)
120 
121 #define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
122 #define STR_HEAP_SIZE(str) (RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
123 
124 #define STR_ENC_GET(str) get_encoding(str)
125 
127 
128 static rb_encoding *
129 get_actual_encoding(const int encidx, VALUE str)
130 {
131  const unsigned char *q;
132 
133  switch (encidx) {
134  case ENCINDEX_UTF_16:
135  if (RSTRING_LEN(str) < 2) break;
136  q = (const unsigned char *)RSTRING_PTR(str);
137  if (q[0] == 0xFE && q[1] == 0xFF) {
138  return rb_enc_get_from_index(ENCINDEX_UTF_16BE);
139  }
140  if (q[0] == 0xFF && q[1] == 0xFE) {
141  return rb_enc_get_from_index(ENCINDEX_UTF_16LE);
142  }
143  return rb_ascii8bit_encoding();
144  case ENCINDEX_UTF_32:
145  if (RSTRING_LEN(str) < 4) break;
146  q = (const unsigned char *)RSTRING_PTR(str);
147  if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF) {
148  return rb_enc_get_from_index(ENCINDEX_UTF_32BE);
149  }
150  if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF) {
151  return rb_enc_get_from_index(ENCINDEX_UTF_32LE);
152  }
153  return rb_ascii8bit_encoding();
154  }
155  return rb_enc_from_index(encidx);
156 }
157 
158 static rb_encoding *
160 {
161  return get_actual_encoding(ENCODING_GET(str), str);
162 }
163 
164 static int fstring_cmp(VALUE a, VALUE b);
165 
167 
168 static const struct st_hash_type fstring_hash_type = {
169  fstring_cmp,
170  rb_str_hash,
171 };
172 
173 static int
175 {
176  VALUE *fstr = (VALUE *)arg;
177  VALUE str = (VALUE)*key;
178 
179  if (existing) {
180  /* because of lazy sweep, str may be unmarked already and swept
181  * at next time */
182  rb_gc_resurrect(*fstr = *key);
183  return ST_STOP;
184  }
185 
186  if (STR_SHARED_P(str)) {
187  /* str should not be shared */
188  str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), STR_ENC_GET(str));
189  OBJ_FREEZE(str);
190  }
191  else {
192  str = rb_str_new_frozen(str);
193  }
194  RBASIC(str)->flags |= RSTRING_FSTR;
195 
196  *key = *value = *fstr = str;
197  return ST_CONTINUE;
198 }
199 
200 VALUE
202 {
203  VALUE fstr = Qnil;
204  Check_Type(str, T_STRING);
205 
206  if (!frozen_strings)
208 
209  if (FL_TEST(str, RSTRING_FSTR))
210  return str;
211 
213  return fstr;
214 }
215 
216 static int
218 {
220  return ST_CONTINUE;
221 }
222 
223 static int
225 {
226  int cmp = rb_str_hash_cmp(a, b);
227  if (cmp != 0) {
228  return cmp;
229  }
230  return ENCODING_GET(b) - ENCODING_GET(a);
231 }
232 
233 static inline int
235 {
236  rb_encoding *enc;
237 
238  /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
239  if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
240  return 1;
241 
242  enc = STR_ENC_GET(str);
243  if (rb_enc_mbmaxlen(enc) == 1)
244  return 1;
245 
246  /* Conservative. Possibly single byte.
247  * "\xa1" in Shift_JIS for example. */
248  return 0;
249 }
250 
252 
253 static inline const char *
254 search_nonascii(const char *p, const char *e)
255 {
256 #if SIZEOF_VALUE == 8
257 # define NONASCII_MASK 0x8080808080808080ULL
258 #elif SIZEOF_VALUE == 4
259 # define NONASCII_MASK 0x80808080UL
260 #endif
261 #ifdef NONASCII_MASK
262  if ((int)sizeof(VALUE) * 2 < e - p) {
263  const VALUE *s, *t;
264  const VALUE lowbits = sizeof(VALUE) - 1;
265  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
266  while (p < (const char *)s) {
267  if (!ISASCII(*p))
268  return p;
269  p++;
270  }
271  t = (const VALUE*)(~lowbits & (VALUE)e);
272  while (s < t) {
273  if (*s & NONASCII_MASK) {
274  t = s;
275  break;
276  }
277  s++;
278  }
279  p = (const char *)t;
280  }
281 #endif
282  while (p < e) {
283  if (!ISASCII(*p))
284  return p;
285  p++;
286  }
287  return NULL;
288 }
289 
290 static int
291 coderange_scan(const char *p, long len, rb_encoding *enc)
292 {
293  const char *e = p + len;
294 
295  if (rb_enc_to_index(enc) == 0) {
296  /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
297  p = search_nonascii(p, e);
299  }
300 
301  if (rb_enc_asciicompat(enc)) {
302  p = search_nonascii(p, e);
303  if (!p) {
304  return ENC_CODERANGE_7BIT;
305  }
306  while (p < e) {
307  int ret = rb_enc_precise_mbclen(p, e, enc);
308  if (!MBCLEN_CHARFOUND_P(ret)) {
309  return ENC_CODERANGE_BROKEN;
310  }
311  p += MBCLEN_CHARFOUND_LEN(ret);
312  if (p < e) {
313  p = search_nonascii(p, e);
314  if (!p) {
315  return ENC_CODERANGE_VALID;
316  }
317  }
318  }
319  if (e < p) {
320  return ENC_CODERANGE_BROKEN;
321  }
322  return ENC_CODERANGE_VALID;
323  }
324 
325  while (p < e) {
326  int ret = rb_enc_precise_mbclen(p, e, enc);
327 
328  if (!MBCLEN_CHARFOUND_P(ret)) {
329  return ENC_CODERANGE_BROKEN;
330  }
331  p += MBCLEN_CHARFOUND_LEN(ret);
332  }
333  if (e < p) {
334  return ENC_CODERANGE_BROKEN;
335  }
336  return ENC_CODERANGE_VALID;
337 }
338 
339 long
340 rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr)
341 {
342  const char *p = s;
343 
344  if (*cr == ENC_CODERANGE_BROKEN)
345  return e - s;
346 
347  if (rb_enc_to_index(enc) == 0) {
348  /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
349  p = search_nonascii(p, e);
351  return e - s;
352  }
353  else if (rb_enc_asciicompat(enc)) {
354  p = search_nonascii(p, e);
355  if (!p) {
356  if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT;
357  return e - s;
358  }
359  while (p < e) {
360  int ret = rb_enc_precise_mbclen(p, e, enc);
361  if (!MBCLEN_CHARFOUND_P(ret)) {
363  return p - s;
364  }
365  p += MBCLEN_CHARFOUND_LEN(ret);
366  if (p < e) {
367  p = search_nonascii(p, e);
368  if (!p) {
369  *cr = ENC_CODERANGE_VALID;
370  return e - s;
371  }
372  }
373  }
375  return p - s;
376  }
377  else {
378  while (p < e) {
379  int ret = rb_enc_precise_mbclen(p, e, enc);
380  if (!MBCLEN_CHARFOUND_P(ret)) {
382  return p - s;
383  }
384  p += MBCLEN_CHARFOUND_LEN(ret);
385  }
387  return p - s;
388  }
389 }
390 
391 static inline void
393 {
394  rb_enc_set_index(str1, ENCODING_GET(str2));
395 }
396 
397 static void
399 {
400  /* this function is designed for copying encoding and coderange
401  * from src to new string "dest" which is made from the part of src.
402  */
403  str_enc_copy(dest, src);
404  if (RSTRING_LEN(dest) == 0) {
405  if (!rb_enc_asciicompat(STR_ENC_GET(src)))
407  else
409  return;
410  }
411  switch (ENC_CODERANGE(src)) {
412  case ENC_CODERANGE_7BIT:
414  break;
415  case ENC_CODERANGE_VALID:
416  if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
419  else
421  break;
422  default:
423  break;
424  }
425 }
426 
427 static void
429 {
430  str_enc_copy(dest, src);
431  ENC_CODERANGE_SET(dest, ENC_CODERANGE(src));
432 }
433 
434 int
436 {
437  int cr = ENC_CODERANGE(str);
438 
439  if (cr == ENC_CODERANGE_UNKNOWN) {
440  rb_encoding *enc = STR_ENC_GET(str);
441  cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
442  ENC_CODERANGE_SET(str, cr);
443  }
444  return cr;
445 }
446 
447 int
449 {
450  rb_encoding *enc = STR_ENC_GET(str);
451 
452  if (!rb_enc_asciicompat(enc))
453  return FALSE;
454  else if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
455  return TRUE;
456  return FALSE;
457 }
458 
459 static inline void
460 str_mod_check(VALUE s, const char *p, long len)
461 {
462  if (RSTRING_PTR(s) != p || RSTRING_LEN(s) != len){
463  rb_raise(rb_eRuntimeError, "string modified");
464  }
465 }
466 
467 size_t
469 {
470  if (STR_EMBED_P(str)) {
471  return RSTRING_EMBED_LEN_MAX;
472  }
473  else if (STR_NOCAPA_P(str)) {
474  return RSTRING(str)->as.heap.len;
475  }
476  else {
477  return RSTRING(str)->as.heap.aux.capa;
478  }
479 }
480 
481 static inline VALUE
483 {
485  return (VALUE)str;
486 }
487 
488 static inline VALUE
490 {
493  }
494  return str_alloc(klass);
495 }
496 
497 static VALUE
498 str_new0(VALUE klass, const char *ptr, long len, int termlen)
499 {
500  VALUE str;
501 
502  if (len < 0) {
503  rb_raise(rb_eArgError, "negative string size (or size too big)");
504  }
505 
508  }
509 
510  str = str_alloc(klass);
511  if (len > RSTRING_EMBED_LEN_MAX) {
512  RSTRING(str)->as.heap.aux.capa = len;
513  RSTRING(str)->as.heap.ptr = ALLOC_N(char, len + termlen);
514  STR_SET_NOEMBED(str);
515  }
516  else if (len == 0) {
518  }
519  if (ptr) {
520  memcpy(RSTRING_PTR(str), ptr, len);
521  }
522  STR_SET_LEN(str, len);
523  TERM_FILL(RSTRING_PTR(str) + len, termlen);
524  return str;
525 }
526 
527 static VALUE
528 str_new(VALUE klass, const char *ptr, long len)
529 {
530  return str_new0(klass, ptr, len, 1);
531 }
532 
533 VALUE
534 rb_str_new(const char *ptr, long len)
535 {
536  return str_new(rb_cString, ptr, len);
537 }
538 
539 VALUE
540 rb_usascii_str_new(const char *ptr, long len)
541 {
542  VALUE str = rb_str_new(ptr, len);
544  return str;
545 }
546 
547 VALUE
548 rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
549 {
550  VALUE str;
551 
552  if (!enc) return rb_str_new(ptr, len);
553 
554  str = str_new0(rb_cString, ptr, len, rb_enc_mbminlen(enc));
555  rb_enc_associate(str, enc);
556  return str;
557 }
558 
559 VALUE
560 rb_str_new_cstr(const char *ptr)
561 {
562  if (!ptr) {
563  rb_raise(rb_eArgError, "NULL pointer given");
564  }
565  return rb_str_new(ptr, strlen(ptr));
566 }
567 
568 VALUE
569 rb_usascii_str_new_cstr(const char *ptr)
570 {
571  VALUE str = rb_str_new2(ptr);
573  return str;
574 }
575 
576 VALUE
577 rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
578 {
579  if (!ptr) {
580  rb_raise(rb_eArgError, "NULL pointer given");
581  }
582  if (rb_enc_mbminlen(enc) != 1) {
583  rb_raise(rb_eArgError, "wchar encoding given");
584  }
585  return rb_enc_str_new(ptr, strlen(ptr), enc);
586 }
587 
588 VALUE
589 rb_tainted_str_new(const char *ptr, long len)
590 {
591  VALUE str = rb_str_new(ptr, len);
592 
593  OBJ_TAINT(str);
594  return str;
595 }
596 
597 VALUE
598 rb_tainted_str_new_cstr(const char *ptr)
599 {
600  VALUE str = rb_str_new2(ptr);
601 
602  OBJ_TAINT(str);
603  return str;
604 }
605 
606 VALUE
607 rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
608 {
610  rb_econv_t *ec;
611  rb_econv_result_t ret;
612  long len, olen;
613  VALUE econv_wrapper;
614  VALUE newstr;
615  const unsigned char *start, *sp;
616  unsigned char *dest, *dp;
617  size_t converted_output = 0;
618 
619  if (!to) return str;
620  if (!from) from = rb_enc_get(str);
621  if (from == to) return str;
622  if ((rb_enc_asciicompat(to) && is_ascii_string(str)) ||
623  to == rb_ascii8bit_encoding()) {
624  if (STR_ENC_GET(str) != to) {
625  str = rb_str_dup(str);
626  rb_enc_associate(str, to);
627  }
628  return str;
629  }
630 
631  len = RSTRING_LEN(str);
632  newstr = rb_str_new(0, len);
633  OBJ_INFECT(newstr, str);
634  olen = len;
635 
636  econv_wrapper = rb_obj_alloc(rb_cEncodingConverter);
637  RBASIC_CLEAR_CLASS(econv_wrapper);
638  ec = rb_econv_open_opts(from->name, to->name, ecflags, ecopts);
639  if (!ec) return str;
640  DATA_PTR(econv_wrapper) = ec;
641 
642  sp = (unsigned char*)RSTRING_PTR(str);
643  start = sp;
644  while ((dest = (unsigned char*)RSTRING_PTR(newstr)),
645  (dp = dest + converted_output),
646  (ret = rb_econv_convert(ec, &sp, start + len, &dp, dest + olen, 0)),
648  /* destination buffer short */
649  size_t converted_input = sp - start;
650  size_t rest = len - converted_input;
651  converted_output = dp - dest;
652  rb_str_set_len(newstr, converted_output);
653  if (converted_input && converted_output &&
654  rest < (LONG_MAX / converted_output)) {
655  rest = (rest * converted_output) / converted_input;
656  }
657  else {
658  rest = olen;
659  }
660  olen += rest < 2 ? 2 : rest;
661  rb_str_resize(newstr, olen);
662  }
663  DATA_PTR(econv_wrapper) = 0;
664  rb_econv_close(ec);
665  rb_gc_force_recycle(econv_wrapper);
666  switch (ret) {
667  case econv_finished:
668  len = dp - (unsigned char*)RSTRING_PTR(newstr);
669  rb_str_set_len(newstr, len);
670  rb_enc_associate(newstr, to);
671  return newstr;
672 
673  default:
674  /* some error, return original */
675  return str;
676  }
677 }
678 
679 VALUE
681 {
682  return rb_str_conv_enc_opts(str, from, to, 0, Qnil);
683 }
684 
685 VALUE
686 rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
687 {
688  VALUE str;
689 
690  str = rb_tainted_str_new(ptr, len);
691  return rb_external_str_with_enc(str, eenc);
692 }
693 
694 VALUE
696 {
697  if (eenc == rb_usascii_encoding() &&
700  return str;
701  }
702  rb_enc_associate(str, eenc);
703  return rb_str_conv_enc(str, eenc, rb_default_internal_encoding());
704 }
705 
706 VALUE
707 rb_external_str_new(const char *ptr, long len)
708 {
710 }
711 
712 VALUE
713 rb_external_str_new_cstr(const char *ptr)
714 {
716 }
717 
718 VALUE
719 rb_locale_str_new(const char *ptr, long len)
720 {
722 }
723 
724 VALUE
725 rb_locale_str_new_cstr(const char *ptr)
726 {
728 }
729 
730 VALUE
731 rb_filesystem_str_new(const char *ptr, long len)
732 {
734 }
735 
736 VALUE
738 {
740 }
741 
742 VALUE
744 {
746 }
747 
748 VALUE
750 {
751  return rb_str_conv_enc(str, STR_ENC_GET(str), rb_locale_encoding());
752 }
753 
754 VALUE
756 {
757  return rb_str_conv_enc(str, STR_ENC_GET(str), enc);
758 }
759 
760 static VALUE
762 {
763  if (RSTRING_LEN(str) <= RSTRING_EMBED_LEN_MAX) {
764  STR_SET_EMBED(str2);
765  memcpy(RSTRING_PTR(str2), RSTRING_PTR(str), RSTRING_LEN(str)+1);
766  STR_SET_EMBED_LEN(str2, RSTRING_LEN(str));
767  }
768  else {
769  str = rb_str_new_frozen(str);
770  FL_SET(str2, STR_NOEMBED);
771  RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
772  RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
773  STR_SET_SHARED(str2, str);
774  }
775  return str2;
776 }
777 
778 static VALUE
780 {
782  rb_enc_cr_str_exact_copy(str2, str);
783  return str2;
784 }
785 
786 static VALUE
788 {
789  return str_replace_shared(str_alloc(klass), str);
790 }
791 
792 static VALUE
793 str_new3(VALUE klass, VALUE str)
794 {
795  return str_new_shared(klass, str);
796 }
797 
798 VALUE
800 {
801  VALUE str2 = str_new3(rb_obj_class(str), str);
802 
803  OBJ_INFECT(str2, str);
804  return str2;
805 }
806 
807 static VALUE
808 str_new4(VALUE klass, VALUE str)
809 {
810  VALUE str2;
811 
812  str2 = str_alloc(klass);
813  STR_SET_NOEMBED(str2);
814  RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
815  RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
816  if (STR_SHARED_P(str)) {
817  VALUE shared = RSTRING(str)->as.heap.aux.shared;
818  assert(OBJ_FROZEN(shared));
819  STR_SET_SHARED(str2, shared); /* TODO: WB is not needed because str2 is *new* object */
820  }
821  else {
822  if (!STR_ASSOC_P(str)) {
823  RSTRING(str2)->as.heap.aux.capa = RSTRING(str)->as.heap.aux.capa;
824  }
825  STR_SET_SHARED(str, str2);
826  }
827  rb_enc_cr_str_exact_copy(str2, str);
828  OBJ_INFECT(str2, str);
829  return str2;
830 }
831 
832 VALUE
834 {
835  VALUE klass, str;
836 
837  if (OBJ_FROZEN(orig)) return orig;
838  klass = rb_obj_class(orig);
839  if (STR_SHARED_P(orig) && (str = RSTRING(orig)->as.heap.aux.shared)) {
840  long ofs;
841  assert(OBJ_FROZEN(str));
842  ofs = RSTRING_LEN(str) - RSTRING_LEN(orig);
843  if ((ofs > 0) || (klass != RBASIC(str)->klass) ||
844  ((RBASIC(str)->flags ^ RBASIC(orig)->flags) & FL_TAINT) ||
845  ENCODING_GET(str) != ENCODING_GET(orig)) {
846  str = str_new3(klass, str);
847  RSTRING(str)->as.heap.ptr += ofs;
848  RSTRING(str)->as.heap.len -= ofs;
849  rb_enc_cr_str_exact_copy(str, orig);
850  OBJ_INFECT(str, orig);
851  }
852  }
853  else if (STR_EMBED_P(orig)) {
854  str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
855  rb_enc_cr_str_exact_copy(str, orig);
856  OBJ_INFECT(str, orig);
857  }
858  else if (STR_ASSOC_P(orig)) {
859  VALUE assoc = RSTRING(orig)->as.heap.aux.shared;
860  FL_UNSET(orig, STR_ASSOC);
861  str = str_new4(klass, orig);
862  FL_SET(str, STR_ASSOC);
863  RB_OBJ_WRITE(str, &RSTRING(str)->as.heap.aux.shared, assoc);
864  /* TODO: WB is not needed because str is new object */
865  }
866  else {
867  str = str_new4(klass, orig);
868  }
869  OBJ_FREEZE(str);
870  return str;
871 }
872 
873 VALUE
874 rb_str_new_with_class(VALUE obj, const char *ptr, long len)
875 {
876  return str_new(rb_obj_class(obj), ptr, len);
877 }
878 
879 static VALUE
881 {
882  VALUE v = rb_str_new5(str, 0, 0);
883  rb_enc_copy(v, str);
884  OBJ_INFECT(v, str);
885  return v;
886 }
887 
888 #define STR_BUF_MIN_SIZE 128
889 
890 VALUE
891 rb_str_buf_new(long capa)
892 {
893  VALUE str = str_alloc(rb_cString);
894 
895  if (capa < STR_BUF_MIN_SIZE) {
896  capa = STR_BUF_MIN_SIZE;
897  }
898  FL_SET(str, STR_NOEMBED);
899  RSTRING(str)->as.heap.aux.capa = capa;
900  RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1);
901  RSTRING(str)->as.heap.ptr[0] = '\0';
902 
903  return str;
904 }
905 
906 VALUE
907 rb_str_buf_new_cstr(const char *ptr)
908 {
909  VALUE str;
910  long len = strlen(ptr);
911 
912  str = rb_str_buf_new(len);
913  rb_str_buf_cat(str, ptr, len);
914 
915  return str;
916 }
917 
918 VALUE
919 rb_str_tmp_new(long len)
920 {
921  return str_new(0, 0, len);
922 }
923 
924 void *
925 rb_alloc_tmp_buffer(volatile VALUE *store, long len)
926 {
927  VALUE s = rb_str_tmp_new(len);
928  *store = s;
929  return RSTRING_PTR(s);
930 }
931 
932 void
933 rb_free_tmp_buffer(volatile VALUE *store)
934 {
935  VALUE s = *store;
936  *store = 0;
937  if (s) rb_str_clear(s);
938 }
939 
940 void
942 {
943  if (FL_TEST(str, RSTRING_FSTR)) {
944  st_data_t fstr = (st_data_t)str;
945  st_delete(frozen_strings, &fstr, NULL);
946  }
947  if (!STR_EMBED_P(str) && !STR_SHARED_P(str)) {
949  }
950 }
951 
952 RUBY_FUNC_EXPORTED size_t
954 {
956  return STR_HEAP_SIZE(str);
957  }
958  else {
959  return 0;
960  }
961 }
962 
963 VALUE
965 {
966  return rb_convert_type(str, T_STRING, "String", "to_str");
967 }
968 
969 static inline void str_discard(VALUE str);
970 
971 void
973 {
974  rb_encoding *enc;
975  int cr;
976  if (str == str2) return;
977  enc = STR_ENC_GET(str2);
978  cr = ENC_CODERANGE(str2);
979  str_discard(str);
980  OBJ_INFECT(str, str2);
981  if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX) {
982  STR_SET_EMBED(str);
983  memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+1);
984  STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
985  rb_enc_associate(str, enc);
986  ENC_CODERANGE_SET(str, cr);
987  return;
988  }
989  STR_SET_NOEMBED(str);
990  STR_UNSET_NOCAPA(str);
991  RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
992  RSTRING(str)->as.heap.len = RSTRING_LEN(str2);
993  if (STR_NOCAPA_P(str2)) {
994  VALUE shared = RSTRING(str2)->as.heap.aux.shared;
995  FL_SET(str, RBASIC(str2)->flags & STR_NOCAPA);
996  RB_OBJ_WRITE(str, &RSTRING(str)->as.heap.aux.shared, shared);
997  }
998  else {
999  RSTRING(str)->as.heap.aux.capa = RSTRING(str2)->as.heap.aux.capa;
1000  }
1001  STR_SET_EMBED(str2); /* abandon str2 */
1002  RSTRING_PTR(str2)[0] = 0;
1003  STR_SET_EMBED_LEN(str2, 0);
1004  rb_enc_associate(str, enc);
1005  ENC_CODERANGE_SET(str, cr);
1006 }
1007 
1008 static ID id_to_s;
1009 
1010 VALUE
1012 {
1013  VALUE str;
1014 
1015  if (RB_TYPE_P(obj, T_STRING)) {
1016  return obj;
1017  }
1018  str = rb_funcall(obj, id_to_s, 0);
1019  if (!RB_TYPE_P(str, T_STRING))
1020  return rb_any_to_s(obj);
1021  OBJ_INFECT(str, obj);
1022  return str;
1023 }
1024 
1025 static VALUE
1027 {
1028  long len;
1029 
1030  len = RSTRING_LEN(str2);
1031  if (STR_ASSOC_P(str2)) {
1032  str2 = rb_str_new4(str2);
1033  }
1034  if (STR_SHARED_P(str2)) {
1035  VALUE shared = RSTRING(str2)->as.heap.aux.shared;
1036  assert(OBJ_FROZEN(shared));
1037  STR_SET_NOEMBED(str);
1038  RSTRING(str)->as.heap.len = len;
1039  RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1040  FL_SET(str, ELTS_SHARED);
1041  FL_UNSET(str, STR_ASSOC);
1042  STR_SET_SHARED(str, shared);
1043  }
1044  else {
1045  str_replace_shared(str, str2);
1046  }
1047 
1048  OBJ_INFECT(str, str2);
1049  rb_enc_cr_str_exact_copy(str, str2);
1050  return str;
1051 }
1052 
1053 static VALUE
1055 {
1056  VALUE dup = str_alloc(klass);
1057  str_replace(dup, str);
1058  return dup;
1059 }
1060 
1061 VALUE
1063 {
1064  return str_duplicate(rb_obj_class(str), str);
1065 }
1066 
1067 VALUE
1069 {
1073  }
1074  return str_duplicate(rb_cString, str);
1075 }
1076 
1077 /*
1078  * call-seq:
1079  * String.new(str="") -> new_str
1080  *
1081  * Returns a new string object containing a copy of <i>str</i>.
1082  */
1083 
1084 static VALUE
1086 {
1087  VALUE orig;
1088 
1089  if (argc > 0 && rb_scan_args(argc, argv, "01", &orig) == 1)
1090  rb_str_replace(str, orig);
1091  return str;
1092 }
1093 
1094 static inline long
1095 enc_strlen(const char *p, const char *e, rb_encoding *enc, int cr)
1096 {
1097  long c;
1098  const char *q;
1099 
1100  if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1101  return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
1102  }
1103  else if (rb_enc_asciicompat(enc)) {
1104  c = 0;
1105  if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) {
1106  while (p < e) {
1107  if (ISASCII(*p)) {
1108  q = search_nonascii(p, e);
1109  if (!q)
1110  return c + (e - p);
1111  c += q - p;
1112  p = q;
1113  }
1114  p += rb_enc_fast_mbclen(p, e, enc);
1115  c++;
1116  }
1117  }
1118  else {
1119  while (p < e) {
1120  if (ISASCII(*p)) {
1121  q = search_nonascii(p, e);
1122  if (!q)
1123  return c + (e - p);
1124  c += q - p;
1125  p = q;
1126  }
1127  p += rb_enc_mbclen(p, e, enc);
1128  c++;
1129  }
1130  }
1131  return c;
1132  }
1133 
1134  for (c=0; p<e; c++) {
1135  p += rb_enc_mbclen(p, e, enc);
1136  }
1137  return c;
1138 }
1139 
1140 long
1141 rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
1142 {
1143  return enc_strlen(p, e, enc, ENC_CODERANGE_UNKNOWN);
1144 }
1145 
1146 long
1147 rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
1148 {
1149  long c;
1150  const char *q;
1151  int ret;
1152 
1153  *cr = 0;
1154  if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1155  return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
1156  }
1157  else if (rb_enc_asciicompat(enc)) {
1158  c = 0;
1159  while (p < e) {
1160  if (ISASCII(*p)) {
1161  q = search_nonascii(p, e);
1162  if (!q) {
1163  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1164  return c + (e - p);
1165  }
1166  c += q - p;
1167  p = q;
1168  }
1169  ret = rb_enc_precise_mbclen(p, e, enc);
1170  if (MBCLEN_CHARFOUND_P(ret)) {
1171  *cr |= ENC_CODERANGE_VALID;
1172  p += MBCLEN_CHARFOUND_LEN(ret);
1173  }
1174  else {
1175  *cr = ENC_CODERANGE_BROKEN;
1176  p++;
1177  }
1178  c++;
1179  }
1180  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1181  return c;
1182  }
1183 
1184  for (c=0; p<e; c++) {
1185  ret = rb_enc_precise_mbclen(p, e, enc);
1186  if (MBCLEN_CHARFOUND_P(ret)) {
1187  *cr |= ENC_CODERANGE_VALID;
1188  p += MBCLEN_CHARFOUND_LEN(ret);
1189  }
1190  else {
1191  *cr = ENC_CODERANGE_BROKEN;
1192  if (p + rb_enc_mbminlen(enc) <= e)
1193  p += rb_enc_mbminlen(enc);
1194  else
1195  p = e;
1196  }
1197  }
1198  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1199  return c;
1200 }
1201 
1202 #ifdef NONASCII_MASK
1203 #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1204 
1205 /*
1206  * UTF-8 leading bytes have either 0xxxxxxx or 11xxxxxx
1207  * bit representation. (see http://en.wikipedia.org/wiki/UTF-8)
1208  * Therefore, following pseudo code can detect UTF-8 leading byte.
1209  *
1210  * if (!(byte & 0x80))
1211  * byte |= 0x40; // turn on bit6
1212  * return ((byte>>6) & 1); // bit6 represent it's leading byte or not.
1213  *
1214  * This function calculate every bytes in the argument word `s'
1215  * using the above logic concurrently. and gather every bytes result.
1216  */
1217 static inline VALUE
1218 count_utf8_lead_bytes_with_word(const VALUE *s)
1219 {
1220  VALUE d = *s;
1221 
1222  /* Transform into bit0 represent UTF-8 leading or not. */
1223  d |= ~(d>>1);
1224  d >>= 6;
1225  d &= NONASCII_MASK >> 7;
1226 
1227  /* Gather every bytes. */
1228  d += (d>>8);
1229  d += (d>>16);
1230 #if SIZEOF_VALUE == 8
1231  d += (d>>32);
1232 #endif
1233  return (d&0xF);
1234 }
1235 #endif
1236 
1237 static long
1239 {
1240  const char *p, *e;
1241  long n;
1242  int cr;
1243 
1244  if (single_byte_optimizable(str)) return RSTRING_LEN(str);
1245  if (!enc) enc = STR_ENC_GET(str);
1246  p = RSTRING_PTR(str);
1247  e = RSTRING_END(str);
1248  cr = ENC_CODERANGE(str);
1249 #ifdef NONASCII_MASK
1250  if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
1251  enc == rb_utf8_encoding()) {
1252 
1253  VALUE len = 0;
1254  if ((int)sizeof(VALUE) * 2 < e - p) {
1255  const VALUE *s, *t;
1256  const VALUE lowbits = sizeof(VALUE) - 1;
1257  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1258  t = (const VALUE*)(~lowbits & (VALUE)e);
1259  while (p < (const char *)s) {
1260  if (is_utf8_lead_byte(*p)) len++;
1261  p++;
1262  }
1263  while (s < t) {
1264  len += count_utf8_lead_bytes_with_word(s);
1265  s++;
1266  }
1267  p = (const char *)s;
1268  }
1269  while (p < e) {
1270  if (is_utf8_lead_byte(*p)) len++;
1271  p++;
1272  }
1273  return (long)len;
1274  }
1275 #endif
1276  n = rb_enc_strlen_cr(p, e, enc, &cr);
1277  if (cr) {
1278  ENC_CODERANGE_SET(str, cr);
1279  }
1280  return n;
1281 }
1282 
1283 long
1285 {
1286  return str_strlen(str, STR_ENC_GET(str));
1287 }
1288 
1289 /*
1290  * call-seq:
1291  * str.length -> integer
1292  * str.size -> integer
1293  *
1294  * Returns the character length of <i>str</i>.
1295  */
1296 
1297 VALUE
1299 {
1300  long len;
1301 
1302  len = str_strlen(str, STR_ENC_GET(str));
1303  return LONG2NUM(len);
1304 }
1305 
1306 /*
1307  * call-seq:
1308  * str.bytesize -> integer
1309  *
1310  * Returns the length of +str+ in bytes.
1311  *
1312  * "\x80\u3042".bytesize #=> 4
1313  * "hello".bytesize #=> 5
1314  */
1315 
1316 static VALUE
1318 {
1319  return LONG2NUM(RSTRING_LEN(str));
1320 }
1321 
1322 /*
1323  * call-seq:
1324  * str.empty? -> true or false
1325  *
1326  * Returns <code>true</code> if <i>str</i> has a length of zero.
1327  *
1328  * "hello".empty? #=> false
1329  * " ".empty? #=> false
1330  * "".empty? #=> true
1331  */
1332 
1333 static VALUE
1335 {
1336  if (RSTRING_LEN(str) == 0)
1337  return Qtrue;
1338  return Qfalse;
1339 }
1340 
1341 /*
1342  * call-seq:
1343  * str + other_str -> new_str
1344  *
1345  * Concatenation---Returns a new <code>String</code> containing
1346  * <i>other_str</i> concatenated to <i>str</i>.
1347  *
1348  * "Hello from " + self.to_s #=> "Hello from main"
1349  */
1350 
1351 VALUE
1353 {
1354  VALUE str3;
1355  rb_encoding *enc;
1356 
1357  StringValue(str2);
1358  enc = rb_enc_check(str1, str2);
1359  str3 = rb_str_new(0, RSTRING_LEN(str1)+RSTRING_LEN(str2));
1360  memcpy(RSTRING_PTR(str3), RSTRING_PTR(str1), RSTRING_LEN(str1));
1361  memcpy(RSTRING_PTR(str3) + RSTRING_LEN(str1),
1362  RSTRING_PTR(str2), RSTRING_LEN(str2));
1363  RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0';
1364 
1365  FL_SET_RAW(str3, OBJ_TAINTED_RAW(str1) | OBJ_TAINTED_RAW(str2));
1368  return str3;
1369 }
1370 
1371 /*
1372  * call-seq:
1373  * str * integer -> new_str
1374  *
1375  * Copy --- Returns a new String containing +integer+ copies of the receiver.
1376  * +integer+ must be greater than or equal to 0.
1377  *
1378  * "Ho! " * 3 #=> "Ho! Ho! Ho! "
1379  * "Ho! " * 0 #=> ""
1380  */
1381 
1382 VALUE
1384 {
1385  VALUE str2;
1386  long n, len;
1387  char *ptr2;
1388 
1389  len = NUM2LONG(times);
1390  if (len < 0) {
1391  rb_raise(rb_eArgError, "negative argument");
1392  }
1393  if (len && LONG_MAX/len < RSTRING_LEN(str)) {
1394  rb_raise(rb_eArgError, "argument too big");
1395  }
1396 
1397  str2 = rb_str_new5(str, 0, len *= RSTRING_LEN(str));
1398  ptr2 = RSTRING_PTR(str2);
1399  if (len) {
1400  n = RSTRING_LEN(str);
1401  memcpy(ptr2, RSTRING_PTR(str), n);
1402  while (n <= len/2) {
1403  memcpy(ptr2 + n, ptr2, n);
1404  n *= 2;
1405  }
1406  memcpy(ptr2 + n, ptr2, len-n);
1407  }
1408  ptr2[RSTRING_LEN(str2)] = '\0';
1409  OBJ_INFECT(str2, str);
1410  rb_enc_cr_str_copy_for_substr(str2, str);
1411 
1412  return str2;
1413 }
1414 
1415 /*
1416  * call-seq:
1417  * str % arg -> new_str
1418  *
1419  * Format---Uses <i>str</i> as a format specification, and returns the result
1420  * of applying it to <i>arg</i>. If the format specification contains more than
1421  * one substitution, then <i>arg</i> must be an <code>Array</code> or <code>Hash</code>
1422  * containing the values to be substituted. See <code>Kernel::sprintf</code> for
1423  * details of the format string.
1424  *
1425  * "%05d" % 123 #=> "00123"
1426  * "%-5s: %08x" % [ "ID", self.object_id ] #=> "ID : 200e14d6"
1427  * "foo = %{foo}" % { :foo => 'bar' } #=> "foo = bar"
1428  */
1429 
1430 static VALUE
1432 {
1433  volatile VALUE tmp = rb_check_array_type(arg);
1434 
1435  if (!NIL_P(tmp)) {
1436  return rb_str_format(RARRAY_LENINT(tmp), RARRAY_CONST_PTR(tmp), str);
1437  }
1438  return rb_str_format(1, &arg, str);
1439 }
1440 
1441 static inline void
1443 {
1444  if (FL_TEST(str, STR_TMPLOCK)) {
1445  rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked");
1446  }
1447  rb_check_frozen(str);
1448 }
1449 
1450 static inline int
1452 {
1453  str_modifiable(str);
1454  if (!STR_SHARED_P(str)) return 1;
1455  if (STR_EMBED_P(str)) return 1;
1456  return 0;
1457 }
1458 
1459 static void
1461 {
1462  char *ptr;
1463  long len = RSTRING_LEN(str);
1464  const int termlen = TERM_LEN(str);
1465  long capa = len + expand;
1466 
1467  if (len > capa) len = capa;
1468  ptr = ALLOC_N(char, capa + termlen);
1469  if (RSTRING_PTR(str)) {
1470  memcpy(ptr, RSTRING_PTR(str), len);
1471  }
1472  STR_SET_NOEMBED(str);
1473  STR_UNSET_NOCAPA(str);
1474  TERM_FILL(ptr + len, termlen);
1475  RSTRING(str)->as.heap.ptr = ptr;
1476  RSTRING(str)->as.heap.len = len;
1477  RSTRING(str)->as.heap.aux.capa = capa;
1478 }
1479 
1480 #define str_make_independent(str) str_make_independent_expand((str), 0L)
1481 
1482 void
1484 {
1485  if (!str_independent(str))
1486  str_make_independent(str);
1487  ENC_CODERANGE_CLEAR(str);
1488 }
1489 
1490 void
1491 rb_str_modify_expand(VALUE str, long expand)
1492 {
1493  if (expand < 0) {
1494  rb_raise(rb_eArgError, "negative expanding string size");
1495  }
1496  if (!str_independent(str)) {
1497  str_make_independent_expand(str, expand);
1498  }
1499  else if (expand > 0) {
1500  long len = RSTRING_LEN(str);
1501  long capa = len + expand;
1502  int termlen = TERM_LEN(str);
1503  if (!STR_EMBED_P(str)) {
1504  REALLOC_N(RSTRING(str)->as.heap.ptr, char, capa + termlen);
1505  STR_UNSET_NOCAPA(str);
1506  RSTRING(str)->as.heap.aux.capa = capa;
1507  }
1508  else if (capa + termlen > RSTRING_EMBED_LEN_MAX + 1) {
1509  str_make_independent_expand(str, expand);
1510  }
1511  }
1512  ENC_CODERANGE_CLEAR(str);
1513 }
1514 
1515 /* As rb_str_modify(), but don't clear coderange */
1516 static void
1518 {
1519  if (!str_independent(str))
1520  str_make_independent(str);
1521  if (ENC_CODERANGE(str) == ENC_CODERANGE_BROKEN)
1522  /* Force re-scan later */
1523  ENC_CODERANGE_CLEAR(str);
1524 }
1525 
1526 static inline void
1528 {
1529  str_modifiable(str);
1530  if (!STR_SHARED_P(str) && !STR_EMBED_P(str)) {
1532  RSTRING(str)->as.heap.ptr = 0;
1533  RSTRING(str)->as.heap.len = 0;
1534  }
1535 }
1536 
1537 void
1539 {
1540  /* sanity check */
1541  rb_check_frozen(str);
1542  if (STR_ASSOC_P(str)) {
1543  /* already associated */
1544  rb_ary_concat(RSTRING(str)->as.heap.aux.shared, add);
1545  }
1546  else {
1547  if (STR_SHARED_P(str)) {
1548  VALUE assoc = RSTRING(str)->as.heap.aux.shared;
1549  str_make_independent(str);
1550  if (STR_ASSOC_P(assoc)) {
1551  assoc = RSTRING(assoc)->as.heap.aux.shared;
1552  rb_ary_concat(assoc, add);
1553  add = assoc;
1554  }
1555  }
1556  else if (STR_EMBED_P(str)) {
1557  str_make_independent(str);
1558  }
1559  else if (RSTRING(str)->as.heap.aux.capa != RSTRING_LEN(str)) {
1560  RESIZE_CAPA(str, RSTRING_LEN(str));
1561  }
1562  FL_SET(str, STR_ASSOC);
1564  RB_OBJ_WRITE(str, &RSTRING(str)->as.heap.aux.shared, add);
1565  }
1566 }
1567 
1568 VALUE
1570 {
1571  if (STR_SHARED_P(str)) str = RSTRING(str)->as.heap.aux.shared;
1572  if (STR_ASSOC_P(str)) {
1573  return RSTRING(str)->as.heap.aux.shared;
1574  }
1575  return Qfalse;
1576 }
1577 
1578 void
1580 {
1581  rb_encoding *enc = rb_enc_get(str);
1582  if (!rb_enc_asciicompat(enc)) {
1583  rb_raise(rb_eEncCompatError, "ASCII incompatible encoding: %s", rb_enc_name(enc));
1584  }
1585 }
1586 
1587 VALUE
1588 rb_string_value(volatile VALUE *ptr)
1589 {
1590  VALUE s = *ptr;
1591  if (!RB_TYPE_P(s, T_STRING)) {
1592  s = rb_str_to_str(s);
1593  *ptr = s;
1594  }
1595  return s;
1596 }
1597 
1598 char *
1600 {
1601  VALUE str = rb_string_value(ptr);
1602  return RSTRING_PTR(str);
1603 }
1604 
1605 static int
1606 zero_filled(const char *s, int n)
1607 {
1608  for (; n > 0; --n) {
1609  if (*s++) return 0;
1610  }
1611  return 1;
1612 }
1613 
1614 static const char *
1615 str_null_char(const char *s, long len, const int minlen, rb_encoding *enc)
1616 {
1617  const char *e = s + len;
1618 
1619  for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
1620  if (zero_filled(s, minlen)) return s;
1621  }
1622  return 0;
1623 }
1624 
1625 static char *
1626 str_fill_term(VALUE str, char *s, long len, int oldtermlen, int termlen)
1627 {
1628  long capa = rb_str_capacity(str) + 1;
1629 
1630  if (capa < len + termlen) {
1631  rb_str_modify_expand(str, termlen);
1632  }
1633  else if (!str_independent(str)) {
1634  if (zero_filled(s + len, termlen)) return s;
1635  str_make_independent(str);
1636  }
1637  s = RSTRING_PTR(str);
1638  TERM_FILL(s + len, termlen);
1639  return s;
1640 }
1641 
1642 char *
1644 {
1645  VALUE str = rb_string_value(ptr);
1646  char *s = RSTRING_PTR(str);
1647  long len = RSTRING_LEN(str);
1648  rb_encoding *enc = rb_enc_get(str);
1649  const int minlen = rb_enc_mbminlen(enc);
1650 
1651  if (minlen > 1) {
1652  if (str_null_char(s, len, minlen, enc)) {
1653  rb_raise(rb_eArgError, "string contains null char");
1654  }
1655  return str_fill_term(str, s, len, minlen, minlen);
1656  }
1657  if (!s || memchr(s, 0, len)) {
1658  rb_raise(rb_eArgError, "string contains null byte");
1659  }
1660  if (s[len]) {
1661  rb_str_modify(str);
1662  s = RSTRING_PTR(str);
1663  s[RSTRING_LEN(str)] = 0;
1664  }
1665  return s;
1666 }
1667 
1668 void
1669 rb_str_fill_terminator(VALUE str, const int newminlen)
1670 {
1671  char *s = RSTRING_PTR(str);
1672  long len = RSTRING_LEN(str);
1673  rb_encoding *enc = rb_enc_get(str);
1674  str_fill_term(str, s, len, rb_enc_mbminlen(enc), newminlen);
1675 }
1676 
1677 VALUE
1679 {
1680  str = rb_check_convert_type(str, T_STRING, "String", "to_str");
1681  return str;
1682 }
1683 
1684 /*
1685  * call-seq:
1686  * String.try_convert(obj) -> string or nil
1687  *
1688  * Try to convert <i>obj</i> into a String, using to_str method.
1689  * Returns converted string or nil if <i>obj</i> cannot be converted
1690  * for any reason.
1691  *
1692  * String.try_convert("str") #=> "str"
1693  * String.try_convert(/re/) #=> nil
1694  */
1695 static VALUE
1697 {
1698  return rb_check_string_type(str);
1699 }
1700 
1701 static char*
1702 str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
1703 {
1704  long nth = *nthp;
1705  if (rb_enc_mbmaxlen(enc) == 1) {
1706  p += nth;
1707  }
1708  else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1709  p += nth * rb_enc_mbmaxlen(enc);
1710  }
1711  else if (rb_enc_asciicompat(enc)) {
1712  const char *p2, *e2;
1713  int n;
1714 
1715  while (p < e && 0 < nth) {
1716  e2 = p + nth;
1717  if (e < e2) {
1718  *nthp = nth;
1719  return (char *)e;
1720  }
1721  if (ISASCII(*p)) {
1722  p2 = search_nonascii(p, e2);
1723  if (!p2) {
1724  nth -= e2 - p;
1725  *nthp = nth;
1726  return (char *)e2;
1727  }
1728  nth -= p2 - p;
1729  p = p2;
1730  }
1731  n = rb_enc_mbclen(p, e, enc);
1732  p += n;
1733  nth--;
1734  }
1735  *nthp = nth;
1736  if (nth != 0) {
1737  return (char *)e;
1738  }
1739  return (char *)p;
1740  }
1741  else {
1742  while (p < e && nth--) {
1743  p += rb_enc_mbclen(p, e, enc);
1744  }
1745  }
1746  if (p > e) p = e;
1747  *nthp = nth;
1748  return (char*)p;
1749 }
1750 
1751 char*
1752 rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
1753 {
1754  return str_nth_len(p, e, &nth, enc);
1755 }
1756 
1757 static char*
1758 str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
1759 {
1760  if (singlebyte)
1761  p += nth;
1762  else {
1763  p = str_nth_len(p, e, &nth, enc);
1764  }
1765  if (!p) return 0;
1766  if (p > e) p = e;
1767  return (char *)p;
1768 }
1769 
1770 /* char offset to byte offset */
1771 static long
1772 str_offset(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
1773 {
1774  const char *pp = str_nth(p, e, nth, enc, singlebyte);
1775  if (!pp) return e - p;
1776  return pp - p;
1777 }
1778 
1779 long
1780 rb_str_offset(VALUE str, long pos)
1781 {
1782  return str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
1784 }
1785 
1786 #ifdef NONASCII_MASK
1787 static char *
1788 str_utf8_nth(const char *p, const char *e, long *nthp)
1789 {
1790  long nth = *nthp;
1791  if ((int)SIZEOF_VALUE * 2 < e - p && (int)SIZEOF_VALUE * 2 < nth) {
1792  const VALUE *s, *t;
1793  const VALUE lowbits = sizeof(VALUE) - 1;
1794  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1795  t = (const VALUE*)(~lowbits & (VALUE)e);
1796  while (p < (const char *)s) {
1797  if (is_utf8_lead_byte(*p)) nth--;
1798  p++;
1799  }
1800  do {
1801  nth -= count_utf8_lead_bytes_with_word(s);
1802  s++;
1803  } while (s < t && (int)sizeof(VALUE) <= nth);
1804  p = (char *)s;
1805  }
1806  while (p < e) {
1807  if (is_utf8_lead_byte(*p)) {
1808  if (nth == 0) break;
1809  nth--;
1810  }
1811  p++;
1812  }
1813  *nthp = nth;
1814  return (char *)p;
1815 }
1816 
1817 static long
1818 str_utf8_offset(const char *p, const char *e, long nth)
1819 {
1820  const char *pp = str_utf8_nth(p, e, &nth);
1821  return pp - p;
1822 }
1823 #endif
1824 
1825 /* byte offset to char offset */
1826 long
1827 rb_str_sublen(VALUE str, long pos)
1828 {
1829  if (single_byte_optimizable(str) || pos < 0)
1830  return pos;
1831  else {
1832  char *p = RSTRING_PTR(str);
1833  return enc_strlen(p, p + pos, STR_ENC_GET(str), ENC_CODERANGE(str));
1834  }
1835 }
1836 
1837 VALUE
1838 rb_str_subseq(VALUE str, long beg, long len)
1839 {
1840  VALUE str2;
1841 
1842  if (RSTRING_LEN(str) == beg + len &&
1843  RSTRING_EMBED_LEN_MAX < len) {
1844  str2 = rb_str_new_shared(rb_str_new_frozen(str));
1845  rb_str_drop_bytes(str2, beg);
1846  }
1847  else {
1848  str2 = rb_str_new5(str, RSTRING_PTR(str)+beg, len);
1849  RB_GC_GUARD(str);
1850  }
1851 
1852  rb_enc_cr_str_copy_for_substr(str2, str);
1853  OBJ_INFECT(str2, str);
1854 
1855  return str2;
1856 }
1857 
1858 char *
1859 rb_str_subpos(VALUE str, long beg, long *lenp)
1860 {
1861  long len = *lenp;
1862  long slen = -1L;
1863  long blen = RSTRING_LEN(str);
1864  rb_encoding *enc = STR_ENC_GET(str);
1865  char *p, *s = RSTRING_PTR(str), *e = s + blen;
1866 
1867  if (len < 0) return 0;
1868  if (!blen) {
1869  len = 0;
1870  }
1871  if (single_byte_optimizable(str)) {
1872  if (beg > blen) return 0;
1873  if (beg < 0) {
1874  beg += blen;
1875  if (beg < 0) return 0;
1876  }
1877  if (beg + len > blen)
1878  len = blen - beg;
1879  if (len < 0) return 0;
1880  p = s + beg;
1881  goto end;
1882  }
1883  if (beg < 0) {
1884  if (len > -beg) len = -beg;
1885  if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
1886  beg = -beg;
1887  while (beg-- > len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
1888  p = e;
1889  if (!p) return 0;
1890  while (len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
1891  if (!p) return 0;
1892  len = e - p;
1893  goto end;
1894  }
1895  else {
1896  slen = str_strlen(str, enc);
1897  beg += slen;
1898  if (beg < 0) return 0;
1899  p = s + beg;
1900  if (len == 0) goto end;
1901  }
1902  }
1903  else if (beg > 0 && beg > RSTRING_LEN(str)) {
1904  return 0;
1905  }
1906  if (len == 0) {
1907  if (beg > str_strlen(str, enc)) return 0;
1908  p = s + beg;
1909  }
1910 #ifdef NONASCII_MASK
1911  else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
1912  enc == rb_utf8_encoding()) {
1913  p = str_utf8_nth(s, e, &beg);
1914  if (beg > 0) return 0;
1915  len = str_utf8_offset(p, e, len);
1916  }
1917 #endif
1918  else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1919  int char_sz = rb_enc_mbmaxlen(enc);
1920 
1921  p = s + beg * char_sz;
1922  if (p > e) {
1923  return 0;
1924  }
1925  else if (len * char_sz > e - p)
1926  len = e - p;
1927  else
1928  len *= char_sz;
1929  }
1930  else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
1931  if (beg > 0) return 0;
1932  len = 0;
1933  }
1934  else {
1935  len = str_offset(p, e, len, enc, 0);
1936  }
1937  end:
1938  *lenp = len;
1939  RB_GC_GUARD(str);
1940  return p;
1941 }
1942 
1943 VALUE
1944 rb_str_substr(VALUE str, long beg, long len)
1945 {
1946  VALUE str2;
1947  char *p = rb_str_subpos(str, beg, &len);
1948 
1949  if (!p) return Qnil;
1950  if (len > RSTRING_EMBED_LEN_MAX && p + len == RSTRING_END(str)) {
1951  str2 = rb_str_new4(str);
1952  str2 = str_new3(rb_obj_class(str2), str2);
1953  RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
1954  RSTRING(str2)->as.heap.len = len;
1955  }
1956  else {
1957  str2 = rb_str_new5(str, p, len);
1958  OBJ_INFECT(str2, str);
1959  RB_GC_GUARD(str);
1960  }
1961  rb_enc_cr_str_copy_for_substr(str2, str);
1962 
1963  return str2;
1964 }
1965 
1966 VALUE
1968 {
1969  if (STR_ASSOC_P(str)) {
1970  VALUE ary = RSTRING(str)->as.heap.aux.shared;
1971  OBJ_FREEZE(ary);
1972  }
1973  return rb_obj_freeze(str);
1974 }
1975 
1977 #define rb_str_dup_frozen rb_str_new_frozen
1978 
1979 VALUE
1980 rb_str_locktmp(VALUE str)
1981 {
1982  if (FL_TEST(str, STR_TMPLOCK)) {
1983  rb_raise(rb_eRuntimeError, "temporal locking already locked string");
1984  }
1985  FL_SET(str, STR_TMPLOCK);
1986  return str;
1987 }
1988 
1989 VALUE
1991 {
1992  if (!FL_TEST(str, STR_TMPLOCK)) {
1993  rb_raise(rb_eRuntimeError, "temporal unlocking already unlocked string");
1994  }
1995  FL_UNSET(str, STR_TMPLOCK);
1996  return str;
1997 }
1998 
1999 VALUE
2001 {
2002  rb_str_locktmp(str);
2003  return rb_ensure(func, arg, rb_str_unlocktmp, str);
2004 }
2005 
2006 void
2007 rb_str_set_len(VALUE str, long len)
2008 {
2009  long capa;
2010  const int termlen = TERM_LEN(str);
2011 
2012  str_modifiable(str);
2013  if (STR_SHARED_P(str)) {
2014  rb_raise(rb_eRuntimeError, "can't set length of shared string");
2015  }
2016  if (len + termlen - 1 > (capa = (long)rb_str_capacity(str))) {
2017  rb_bug("probable buffer overflow: %ld for %ld", len, capa);
2018  }
2019  STR_SET_LEN(str, len);
2020  TERM_FILL(&RSTRING_PTR(str)[len], termlen);
2021 }
2022 
2023 VALUE
2024 rb_str_resize(VALUE str, long len)
2025 {
2026  long slen;
2027  int independent;
2028 
2029  if (len < 0) {
2030  rb_raise(rb_eArgError, "negative string size (or size too big)");
2031  }
2032 
2033  independent = str_independent(str);
2034  ENC_CODERANGE_CLEAR(str);
2035  slen = RSTRING_LEN(str);
2036  {
2037  long capa;
2038  const int termlen = TERM_LEN(str);
2039  if (STR_EMBED_P(str)) {
2040  if (len == slen) return str;
2041  if (len + termlen <= RSTRING_EMBED_LEN_MAX + 1) {
2042  STR_SET_EMBED_LEN(str, len);
2043  TERM_FILL(RSTRING(str)->as.ary + len, termlen);
2044  return str;
2045  }
2046  str_make_independent_expand(str, len - slen);
2047  STR_SET_NOEMBED(str);
2048  }
2049  else if (len + termlen <= RSTRING_EMBED_LEN_MAX + 1) {
2050  char *ptr = STR_HEAP_PTR(str);
2051  STR_SET_EMBED(str);
2052  if (slen > len) slen = len;
2053  if (slen > 0) MEMCPY(RSTRING(str)->as.ary, ptr, char, slen);
2054  TERM_FILL(RSTRING(str)->as.ary + len, termlen);
2055  STR_SET_EMBED_LEN(str, len);
2056  if (independent) ruby_xfree(ptr);
2057  return str;
2058  }
2059  else if (!independent) {
2060  if (len == slen) return str;
2061  str_make_independent_expand(str, len - slen);
2062  }
2063  else if ((capa = RSTRING(str)->as.heap.aux.capa) < len ||
2064  (capa - len) > (len < 1024 ? len : 1024)) {
2065  REALLOC_N(RSTRING(str)->as.heap.ptr, char, len + termlen);
2066  RSTRING(str)->as.heap.aux.capa = len;
2067  }
2068  else if (len == slen) return str;
2069  RSTRING(str)->as.heap.len = len;
2070  TERM_FILL(RSTRING(str)->as.heap.ptr + len, termlen); /* sentinel */
2071  }
2072  return str;
2073 }
2074 
2075 static VALUE
2076 str_buf_cat(VALUE str, const char *ptr, long len)
2077 {
2078  long capa, total, off = -1;
2079  const int termlen = TERM_LEN(str);
2080 
2081  if (ptr >= RSTRING_PTR(str) && ptr <= RSTRING_END(str)) {
2082  off = ptr - RSTRING_PTR(str);
2083  }
2084  rb_str_modify(str);
2085  if (len == 0) return 0;
2086  if (STR_ASSOC_P(str)) {
2087  FL_UNSET(str, STR_ASSOC);
2088  capa = RSTRING(str)->as.heap.aux.capa = RSTRING_LEN(str);
2089  }
2090  else if (STR_EMBED_P(str)) {
2091  capa = RSTRING_EMBED_LEN_MAX;
2092  }
2093  else {
2094  capa = RSTRING(str)->as.heap.aux.capa;
2095  }
2096  if (RSTRING_LEN(str) >= LONG_MAX - len) {
2097  rb_raise(rb_eArgError, "string sizes too big");
2098  }
2099  total = RSTRING_LEN(str)+len;
2100  if (capa <= total) {
2101  while (total > capa) {
2102  if (capa + termlen >= LONG_MAX / 2) {
2103  capa = (total + 4095) / 4096 * 4096;
2104  break;
2105  }
2106  capa = (capa + termlen) * 2;
2107  }
2108  RESIZE_CAPA(str, capa);
2109  }
2110  if (off != -1) {
2111  ptr = RSTRING_PTR(str) + off;
2112  }
2113  memcpy(RSTRING_PTR(str) + RSTRING_LEN(str), ptr, len);
2114  STR_SET_LEN(str, total);
2115  RSTRING_PTR(str)[total] = '\0'; /* sentinel */
2116 
2117  return str;
2118 }
2119 
2120 #define str_buf_cat2(str, ptr) str_buf_cat((str), (ptr), strlen(ptr))
2121 
2122 VALUE
2123 rb_str_buf_cat(VALUE str, const char *ptr, long len)
2124 {
2125  if (len == 0) return str;
2126  if (len < 0) {
2127  rb_raise(rb_eArgError, "negative string size (or size too big)");
2128  }
2129  return str_buf_cat(str, ptr, len);
2130 }
2131 
2132 VALUE
2133 rb_str_buf_cat2(VALUE str, const char *ptr)
2134 {
2135  return rb_str_buf_cat(str, ptr, strlen(ptr));
2136 }
2137 
2138 VALUE
2139 rb_str_cat(VALUE str, const char *ptr, long len)
2140 {
2141  if (len < 0) {
2142  rb_raise(rb_eArgError, "negative string size (or size too big)");
2143  }
2144  if (STR_ASSOC_P(str)) {
2145  char *p;
2146  rb_str_modify_expand(str, len);
2147  p = RSTRING(str)->as.heap.ptr;
2148  memcpy(p + RSTRING(str)->as.heap.len, ptr, len);
2149  len = RSTRING(str)->as.heap.len += len;
2150  TERM_FILL(p, TERM_LEN(str)); /* sentinel */
2151  return str;
2152  }
2153 
2154  return rb_str_buf_cat(str, ptr, len);
2155 }
2156 
2157 VALUE
2158 rb_str_cat2(VALUE str, const char *ptr)
2159 {
2160  return rb_str_cat(str, ptr, strlen(ptr));
2161 }
2162 
2163 static VALUE
2164 rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len,
2165  int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
2166 {
2167  int str_encindex = ENCODING_GET(str);
2168  int res_encindex;
2169  int str_cr, res_cr;
2170 
2171  str_cr = RSTRING_LEN(str) ? ENC_CODERANGE(str) : ENC_CODERANGE_7BIT;
2172 
2173  if (str_encindex == ptr_encindex) {
2174  if (str_cr == ENC_CODERANGE_UNKNOWN)
2175  ptr_cr = ENC_CODERANGE_UNKNOWN;
2176  else if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
2177  ptr_cr = coderange_scan(ptr, len, rb_enc_from_index(ptr_encindex));
2178  }
2179  }
2180  else {
2181  rb_encoding *str_enc = rb_enc_from_index(str_encindex);
2182  rb_encoding *ptr_enc = rb_enc_from_index(ptr_encindex);
2183  if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
2184  if (len == 0)
2185  return str;
2186  if (RSTRING_LEN(str) == 0) {
2187  rb_str_buf_cat(str, ptr, len);
2188  ENCODING_CODERANGE_SET(str, ptr_encindex, ptr_cr);
2189  return str;
2190  }
2191  goto incompatible;
2192  }
2193  if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
2194  ptr_cr = coderange_scan(ptr, len, ptr_enc);
2195  }
2196  if (str_cr == ENC_CODERANGE_UNKNOWN) {
2197  if (ENCODING_IS_ASCII8BIT(str) || ptr_cr != ENC_CODERANGE_7BIT) {
2198  str_cr = rb_enc_str_coderange(str);
2199  }
2200  }
2201  }
2202  if (ptr_cr_ret)
2203  *ptr_cr_ret = ptr_cr;
2204 
2205  if (str_encindex != ptr_encindex &&
2206  str_cr != ENC_CODERANGE_7BIT &&
2207  ptr_cr != ENC_CODERANGE_7BIT) {
2208  incompatible:
2209  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
2210  rb_enc_name(rb_enc_from_index(str_encindex)),
2211  rb_enc_name(rb_enc_from_index(ptr_encindex)));
2212  }
2213 
2214  if (str_cr == ENC_CODERANGE_UNKNOWN) {
2215  res_encindex = str_encindex;
2216  res_cr = ENC_CODERANGE_UNKNOWN;
2217  }
2218  else if (str_cr == ENC_CODERANGE_7BIT) {
2219  if (ptr_cr == ENC_CODERANGE_7BIT) {
2220  res_encindex = str_encindex;
2221  res_cr = ENC_CODERANGE_7BIT;
2222  }
2223  else {
2224  res_encindex = ptr_encindex;
2225  res_cr = ptr_cr;
2226  }
2227  }
2228  else if (str_cr == ENC_CODERANGE_VALID) {
2229  res_encindex = str_encindex;
2230  if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID)
2231  res_cr = str_cr;
2232  else
2233  res_cr = ptr_cr;
2234  }
2235  else { /* str_cr == ENC_CODERANGE_BROKEN */
2236  res_encindex = str_encindex;
2237  res_cr = str_cr;
2238  if (0 < len) res_cr = ENC_CODERANGE_UNKNOWN;
2239  }
2240 
2241  if (len < 0) {
2242  rb_raise(rb_eArgError, "negative string size (or size too big)");
2243  }
2244  str_buf_cat(str, ptr, len);
2245  ENCODING_CODERANGE_SET(str, res_encindex, res_cr);
2246  return str;
2247 }
2248 
2249 VALUE
2250 rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
2251 {
2252  return rb_enc_cr_str_buf_cat(str, ptr, len,
2254 }
2255 
2256 VALUE
2257 rb_str_buf_cat_ascii(VALUE str, const char *ptr)
2258 {
2259  /* ptr must reference NUL terminated ASCII string. */
2260  int encindex = ENCODING_GET(str);
2261  rb_encoding *enc = rb_enc_from_index(encindex);
2262  if (rb_enc_asciicompat(enc)) {
2263  return rb_enc_cr_str_buf_cat(str, ptr, strlen(ptr),
2264  encindex, ENC_CODERANGE_7BIT, 0);
2265  }
2266  else {
2267  char *buf = ALLOCA_N(char, rb_enc_mbmaxlen(enc));
2268  while (*ptr) {
2269  unsigned int c = (unsigned char)*ptr;
2270  int len = rb_enc_codelen(c, enc);
2271  rb_enc_mbcput(c, buf, enc);
2272  rb_enc_cr_str_buf_cat(str, buf, len,
2273  encindex, ENC_CODERANGE_VALID, 0);
2274  ptr++;
2275  }
2276  return str;
2277  }
2278 }
2279 
2280 VALUE
2282 {
2283  int str2_cr;
2284 
2285  str2_cr = ENC_CODERANGE(str2);
2286 
2287  rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
2288  ENCODING_GET(str2), str2_cr, &str2_cr);
2289 
2290  OBJ_INFECT(str, str2);
2291  ENC_CODERANGE_SET(str2, str2_cr);
2292 
2293  return str;
2294 }
2295 
2296 VALUE
2298 {
2299  rb_encoding *enc;
2300  int cr, cr2;
2301  long len2;
2302 
2303  StringValue(str2);
2304  if ((len2 = RSTRING_LEN(str2)) > 0 && STR_ASSOC_P(str)) {
2305  long len1 = RSTRING(str)->as.heap.len, len = len1 + len2;
2306  enc = rb_enc_check(str, str2);
2307  cr = ENC_CODERANGE(str);
2308  if ((cr2 = ENC_CODERANGE(str2)) > cr || RSTRING_LEN(str) == 0)
2309  cr = cr2;
2310  rb_str_modify_expand(str, len2);
2311  memcpy(RSTRING(str)->as.heap.ptr + len1, RSTRING_PTR(str2), len2);
2312  TERM_FILL(RSTRING(str)->as.heap.ptr + len, rb_enc_mbminlen(enc));
2313  RSTRING(str)->as.heap.len = len;
2314  rb_enc_associate(str, enc);
2315  ENC_CODERANGE_SET(str, cr);
2316  OBJ_INFECT(str, str2);
2317  return str;
2318  }
2319  return rb_str_buf_append(str, str2);
2320 }
2321 
2322 /*
2323  * call-seq:
2324  * str << integer -> str
2325  * str.concat(integer) -> str
2326  * str << obj -> str
2327  * str.concat(obj) -> str
2328  *
2329  * Append---Concatenates the given object to <i>str</i>. If the object is a
2330  * <code>Integer</code>, it is considered as a codepoint, and is converted
2331  * to a character before concatenation.
2332  *
2333  * a = "hello "
2334  * a << "world" #=> "hello world"
2335  * a.concat(33) #=> "hello world!"
2336  */
2337 
2338 VALUE
2340 {
2341  unsigned int code;
2342  rb_encoding *enc = STR_ENC_GET(str1);
2343 
2344  if (FIXNUM_P(str2) || RB_TYPE_P(str2, T_BIGNUM)) {
2345  if (rb_num_to_uint(str2, &code) == 0) {
2346  }
2347  else if (FIXNUM_P(str2)) {
2348  rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
2349  }
2350  else {
2351  rb_raise(rb_eRangeError, "bignum out of char range");
2352  }
2353  }
2354  else {
2355  return rb_str_append(str1, str2);
2356  }
2357 
2358  if (enc == rb_usascii_encoding()) {
2359  /* US-ASCII automatically extended to ASCII-8BIT */
2360  char buf[1];
2361  buf[0] = (char)code;
2362  if (code > 0xFF) {
2363  rb_raise(rb_eRangeError, "%u out of char range", code);
2364  }
2365  rb_str_cat(str1, buf, 1);
2366  if (code > 127) {
2369  }
2370  }
2371  else {
2372  long pos = RSTRING_LEN(str1);
2373  int cr = ENC_CODERANGE(str1);
2374  int len;
2375  char *buf;
2376 
2377  switch (len = rb_enc_codelen(code, enc)) {
2379  rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2380  break;
2382  case 0:
2383  rb_raise(rb_eRangeError, "%u out of char range", code);
2384  break;
2385  }
2386  buf = ALLOCA_N(char, len + 1);
2387  rb_enc_mbcput(code, buf, enc);
2388  if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
2389  rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2390  }
2391  rb_str_resize(str1, pos+len);
2392  memcpy(RSTRING_PTR(str1) + pos, buf, len);
2393  if (cr == ENC_CODERANGE_7BIT && code > 127)
2394  cr = ENC_CODERANGE_VALID;
2395  ENC_CODERANGE_SET(str1, cr);
2396  }
2397  return str1;
2398 }
2399 
2400 /*
2401  * call-seq:
2402  * str.prepend(other_str) -> str
2403  *
2404  * Prepend---Prepend the given string to <i>str</i>.
2405  *
2406  * a = "world"
2407  * a.prepend("hello ") #=> "hello world"
2408  * a #=> "hello world"
2409  */
2410 
2411 static VALUE
2413 {
2414  StringValue(str2);
2415  StringValue(str);
2416  rb_str_update(str, 0L, 0L, str2);
2417  return str;
2418 }
2419 
2420 st_index_t
2422 {
2423  int e = ENCODING_GET(str);
2424  if (e && rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
2425  e = 0;
2426  }
2427  return rb_memhash((const void *)RSTRING_PTR(str), RSTRING_LEN(str)) ^ e;
2428 }
2429 
2430 int
2432 {
2433  long len;
2434 
2435  if (!rb_str_comparable(str1, str2)) return 1;
2436  if (RSTRING_LEN(str1) == (len = RSTRING_LEN(str2)) &&
2437  memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) {
2438  return 0;
2439  }
2440  return 1;
2441 }
2442 
2443 /*
2444  * call-seq:
2445  * str.hash -> fixnum
2446  *
2447  * Return a hash based on the string's length and content.
2448  */
2449 
2450 static VALUE
2452 {
2453  st_index_t hval = rb_str_hash(str);
2454  return INT2FIX(hval);
2455 }
2456 
2457 #define lesser(a,b) (((a)>(b))?(b):(a))
2458 
2459 int
2461 {
2462  int idx1, idx2;
2463  int rc1, rc2;
2464 
2465  if (RSTRING_LEN(str1) == 0) return TRUE;
2466  if (RSTRING_LEN(str2) == 0) return TRUE;
2467  idx1 = ENCODING_GET(str1);
2468  idx2 = ENCODING_GET(str2);
2469  if (idx1 == idx2) return TRUE;
2470  rc1 = rb_enc_str_coderange(str1);
2471  rc2 = rb_enc_str_coderange(str2);
2472  if (rc1 == ENC_CODERANGE_7BIT) {
2473  if (rc2 == ENC_CODERANGE_7BIT) return TRUE;
2475  return TRUE;
2476  }
2477  if (rc2 == ENC_CODERANGE_7BIT) {
2479  return TRUE;
2480  }
2481  return FALSE;
2482 }
2483 
2484 int
2486 {
2487  long len1, len2;
2488  const char *ptr1, *ptr2;
2489  int retval;
2490 
2491  if (str1 == str2) return 0;
2492  RSTRING_GETMEM(str1, ptr1, len1);
2493  RSTRING_GETMEM(str2, ptr2, len2);
2494  if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
2495  if (len1 == len2) {
2496  if (!rb_str_comparable(str1, str2)) {
2497  if (ENCODING_GET(str1) > ENCODING_GET(str2))
2498  return 1;
2499  return -1;
2500  }
2501  return 0;
2502  }
2503  if (len1 > len2) return 1;
2504  return -1;
2505  }
2506  if (retval > 0) return 1;
2507  return -1;
2508 }
2509 
2510 /* expect tail call optimization */
2511 static VALUE
2512 str_eql(const VALUE str1, const VALUE str2)
2513 {
2514  const long len = RSTRING_LEN(str1);
2515  const char *ptr1, *ptr2;
2516 
2517  if (len != RSTRING_LEN(str2)) return Qfalse;
2518  if (!rb_str_comparable(str1, str2)) return Qfalse;
2519  if ((ptr1 = RSTRING_PTR(str1)) == (ptr2 = RSTRING_PTR(str2)))
2520  return Qtrue;
2521  if (memcmp(ptr1, ptr2, len) == 0)
2522  return Qtrue;
2523  return Qfalse;
2524 }
2525 
2526 /*
2527  * call-seq:
2528  * str == obj -> true or false
2529  * str === obj -> true or false
2530  *
2531  * === Equality
2532  *
2533  * Returns whether +str+ == +obj+, similar to Object#==.
2534  *
2535  * If +obj+ is not an instance of String but responds to +to_str+, then the
2536  * two strings are compared using case equality Object#===.
2537  *
2538  * Otherwise, returns similarly to String#eql?, comparing length and content.
2539  */
2540 
2541 VALUE
2543 {
2544  if (str1 == str2) return Qtrue;
2545  if (!RB_TYPE_P(str2, T_STRING)) {
2546  if (!rb_respond_to(str2, rb_intern("to_str"))) {
2547  return Qfalse;
2548  }
2549  return rb_equal(str2, str1);
2550  }
2551  return str_eql(str1, str2);
2552 }
2553 
2554 /*
2555  * call-seq:
2556  * str.eql?(other) -> true or false
2557  *
2558  * Two strings are equal if they have the same length and content.
2559  */
2560 
2561 static VALUE
2563 {
2564  if (str1 == str2) return Qtrue;
2565  if (!RB_TYPE_P(str2, T_STRING)) return Qfalse;
2566  return str_eql(str1, str2);
2567 }
2568 
2569 /*
2570  * call-seq:
2571  * string <=> other_string -> -1, 0, +1 or nil
2572  *
2573  *
2574  * Comparison---Returns -1, 0, +1 or nil depending on whether +string+ is less
2575  * than, equal to, or greater than +other_string+.
2576  *
2577  * +nil+ is returned if the two values are incomparable.
2578  *
2579  * If the strings are of different lengths, and the strings are equal when
2580  * compared up to the shortest length, then the longer string is considered
2581  * greater than the shorter one.
2582  *
2583  * <code><=></code> is the basis for the methods <code><</code>,
2584  * <code><=</code>, <code>></code>, <code>>=</code>, and
2585  * <code>between?</code>, included from module Comparable. The method
2586  * String#== does not use Comparable#==.
2587  *
2588  * "abcdef" <=> "abcde" #=> 1
2589  * "abcdef" <=> "abcdef" #=> 0
2590  * "abcdef" <=> "abcdefg" #=> -1
2591  * "abcdef" <=> "ABCDEF" #=> 1
2592  */
2593 
2594 static VALUE
2596 {
2597  int result;
2598 
2599  if (!RB_TYPE_P(str2, T_STRING)) {
2600  VALUE tmp = rb_check_funcall(str2, rb_intern("to_str"), 0, 0);
2601  if (RB_TYPE_P(tmp, T_STRING)) {
2602  result = rb_str_cmp(str1, tmp);
2603  }
2604  else {
2605  return rb_invcmp(str1, str2);
2606  }
2607  }
2608  else {
2609  result = rb_str_cmp(str1, str2);
2610  }
2611  return INT2FIX(result);
2612 }
2613 
2614 /*
2615  * call-seq:
2616  * str.casecmp(other_str) -> -1, 0, +1 or nil
2617  *
2618  * Case-insensitive version of <code>String#<=></code>.
2619  *
2620  * "abcdef".casecmp("abcde") #=> 1
2621  * "aBcDeF".casecmp("abcdef") #=> 0
2622  * "abcdef".casecmp("abcdefg") #=> -1
2623  * "abcdef".casecmp("ABCDEF") #=> 0
2624  */
2625 
2626 static VALUE
2628 {
2629  long len;
2630  rb_encoding *enc;
2631  char *p1, *p1end, *p2, *p2end;
2632 
2633  StringValue(str2);
2634  enc = rb_enc_compatible(str1, str2);
2635  if (!enc) {
2636  return Qnil;
2637  }
2638 
2639  p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1);
2640  p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
2641  if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
2642  while (p1 < p1end && p2 < p2end) {
2643  if (*p1 != *p2) {
2644  unsigned int c1 = TOUPPER(*p1 & 0xff);
2645  unsigned int c2 = TOUPPER(*p2 & 0xff);
2646  if (c1 != c2)
2647  return INT2FIX(c1 < c2 ? -1 : 1);
2648  }
2649  p1++;
2650  p2++;
2651  }
2652  }
2653  else {
2654  while (p1 < p1end && p2 < p2end) {
2655  int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
2656  int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
2657 
2658  if (0 <= c1 && 0 <= c2) {
2659  c1 = TOUPPER(c1);
2660  c2 = TOUPPER(c2);
2661  if (c1 != c2)
2662  return INT2FIX(c1 < c2 ? -1 : 1);
2663  }
2664  else {
2665  int r;
2666  l1 = rb_enc_mbclen(p1, p1end, enc);
2667  l2 = rb_enc_mbclen(p2, p2end, enc);
2668  len = l1 < l2 ? l1 : l2;
2669  r = memcmp(p1, p2, len);
2670  if (r != 0)
2671  return INT2FIX(r < 0 ? -1 : 1);
2672  if (l1 != l2)
2673  return INT2FIX(l1 < l2 ? -1 : 1);
2674  }
2675  p1 += l1;
2676  p2 += l2;
2677  }
2678  }
2679  if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) return INT2FIX(0);
2680  if (RSTRING_LEN(str1) > RSTRING_LEN(str2)) return INT2FIX(1);
2681  return INT2FIX(-1);
2682 }
2683 
2684 static long
2685 rb_str_index(VALUE str, VALUE sub, long offset)
2686 {
2687  char *s, *sptr, *e;
2688  long pos, len, slen;
2689  int single_byte = single_byte_optimizable(str);
2690  rb_encoding *enc;
2691 
2692  enc = rb_enc_check(str, sub);
2693  if (is_broken_string(sub)) return -1;
2694 
2695  len = single_byte ? RSTRING_LEN(str) : str_strlen(str, enc);
2696  slen = str_strlen(sub, enc);
2697  if (offset < 0) {
2698  offset += len;
2699  if (offset < 0) return -1;
2700  }
2701  if (len - offset < slen) return -1;
2702 
2703  s = RSTRING_PTR(str);
2704  e = RSTRING_END(str);
2705  if (offset) {
2706  offset = str_offset(s, e, offset, enc, single_byte);
2707  s += offset;
2708  }
2709  if (slen == 0) return offset;
2710  /* need proceed one character at a time */
2711  sptr = RSTRING_PTR(sub);
2712  slen = RSTRING_LEN(sub);
2713  len = RSTRING_LEN(str) - offset;
2714  for (;;) {
2715  char *t;
2716  pos = rb_memsearch(sptr, slen, s, len, enc);
2717  if (pos < 0) return pos;
2718  t = rb_enc_right_char_head(s, s+pos, e, enc);
2719  if (t == s + pos) break;
2720  len -= t - s;
2721  if (len <= 0) return -1;
2722  offset += t - s;
2723  s = t;
2724  }
2725  return pos + offset;
2726 }
2727 
2728 
2729 /*
2730  * call-seq:
2731  * str.index(substring [, offset]) -> fixnum or nil
2732  * str.index(regexp [, offset]) -> fixnum or nil
2733  *
2734  * Returns the index of the first occurrence of the given <i>substring</i> or
2735  * pattern (<i>regexp</i>) in <i>str</i>. Returns <code>nil</code> if not
2736  * found. If the second parameter is present, it specifies the position in the
2737  * string to begin the search.
2738  *
2739  * "hello".index('e') #=> 1
2740  * "hello".index('lo') #=> 3
2741  * "hello".index('a') #=> nil
2742  * "hello".index(?e) #=> 1
2743  * "hello".index(/[aeiou]/, -3) #=> 4
2744  */
2745 
2746 static VALUE
2748 {
2749  VALUE sub;
2750  VALUE initpos;
2751  long pos;
2752 
2753  if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
2754  pos = NUM2LONG(initpos);
2755  }
2756  else {
2757  pos = 0;
2758  }
2759  if (pos < 0) {
2760  pos += str_strlen(str, STR_ENC_GET(str));
2761  if (pos < 0) {
2762  if (RB_TYPE_P(sub, T_REGEXP)) {
2764  }
2765  return Qnil;
2766  }
2767  }
2768 
2769  if (SPECIAL_CONST_P(sub)) goto generic;
2770  switch (BUILTIN_TYPE(sub)) {
2771  case T_REGEXP:
2772  if (pos > str_strlen(str, STR_ENC_GET(str)))
2773  return Qnil;
2774  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2776 
2777  pos = rb_reg_search(sub, str, pos, 0);
2778  pos = rb_str_sublen(str, pos);
2779  break;
2780 
2781  generic:
2782  default: {
2783  VALUE tmp;
2784 
2785  tmp = rb_check_string_type(sub);
2786  if (NIL_P(tmp)) {
2787  rb_raise(rb_eTypeError, "type mismatch: %s given",
2789  }
2790  sub = tmp;
2791  }
2792  /* fall through */
2793  case T_STRING:
2794  pos = rb_str_index(str, sub, pos);
2795  pos = rb_str_sublen(str, pos);
2796  break;
2797  }
2798 
2799  if (pos == -1) return Qnil;
2800  return LONG2NUM(pos);
2801 }
2802 
2803 #ifdef HAVE_MEMRCHR
2804 static long
2805 str_rindex(VALUE str, VALUE sub, const char *s, long pos, rb_encoding *enc)
2806 {
2807  char *hit, *adjusted;
2808  int c;
2809  long slen, searchlen;
2810  char *sbeg, *e, *t;
2811 
2812  slen = RSTRING_LEN(sub);
2813  if (slen == 0) return pos;
2814  sbeg = RSTRING_PTR(str);
2815  e = RSTRING_END(str);
2816  t = RSTRING_PTR(sub);
2817  c = *t & 0xff;
2818  searchlen = s - sbeg + 1;
2819 
2820  do {
2821  hit = memrchr(sbeg, c, searchlen);
2822  if (!hit) break;
2823  adjusted = rb_enc_left_char_head(sbeg, hit, e, enc);
2824  if (hit != adjusted) {
2825  searchlen = adjusted - sbeg;
2826  continue;
2827  }
2828  if (memcmp(hit, t, slen) == 0)
2829  return rb_str_sublen(str, hit - sbeg);
2830  searchlen = adjusted - sbeg;
2831  } while (searchlen > 0);
2832 
2833  return -1;
2834 }
2835 #else
2836 static long
2837 str_rindex(VALUE str, VALUE sub, const char *s, long pos, rb_encoding *enc)
2838 {
2839  long slen;
2840  char *sbeg, *e, *t;
2841 
2842  sbeg = RSTRING_PTR(str);
2843  e = RSTRING_END(str);
2844  t = RSTRING_PTR(sub);
2845  slen = RSTRING_LEN(sub);
2846 
2847  while (s) {
2848  if (memcmp(s, t, slen) == 0) {
2849  return pos;
2850  }
2851  if (pos == 0) break;
2852  pos--;
2853  s = rb_enc_prev_char(sbeg, s, e, enc);
2854  }
2855 
2856  return -1;
2857 }
2858 #endif
2859 
2860 static long
2861 rb_str_rindex(VALUE str, VALUE sub, long pos)
2862 {
2863  long len, slen;
2864  char *sbeg, *s;
2865  rb_encoding *enc;
2866  int singlebyte;
2867 
2868  enc = rb_enc_check(str, sub);
2869  if (is_broken_string(sub)) return -1;
2870  singlebyte = single_byte_optimizable(str);
2871  len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
2872  slen = str_strlen(sub, enc);
2873 
2874  /* substring longer than string */
2875  if (len < slen) return -1;
2876  if (len - pos < slen) pos = len - slen;
2877  if (len == 0) return pos;
2878 
2879  sbeg = RSTRING_PTR(str);
2880 
2881  if (pos == 0) {
2882  if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
2883  return 0;
2884  else
2885  return -1;
2886  }
2887 
2888  s = str_nth(sbeg, RSTRING_END(str), pos, enc, singlebyte);
2889  return str_rindex(str, sub, s, pos, enc);
2890 }
2891 
2892 
2893 /*
2894  * call-seq:
2895  * str.rindex(substring [, fixnum]) -> fixnum or nil
2896  * str.rindex(regexp [, fixnum]) -> fixnum or nil
2897  *
2898  * Returns the index of the last occurrence of the given <i>substring</i> or
2899  * pattern (<i>regexp</i>) in <i>str</i>. Returns <code>nil</code> if not
2900  * found. If the second parameter is present, it specifies the position in the
2901  * string to end the search---characters beyond this point will not be
2902  * considered.
2903  *
2904  * "hello".rindex('e') #=> 1
2905  * "hello".rindex('l') #=> 3
2906  * "hello".rindex('a') #=> nil
2907  * "hello".rindex(?e) #=> 1
2908  * "hello".rindex(/[aeiou]/, -2) #=> 1
2909  */
2910 
2911 static VALUE
2913 {
2914  VALUE sub;
2915  VALUE vpos;
2916  rb_encoding *enc = STR_ENC_GET(str);
2917  long pos, len = str_strlen(str, enc);
2918 
2919  if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
2920  pos = NUM2LONG(vpos);
2921  if (pos < 0) {
2922  pos += len;
2923  if (pos < 0) {
2924  if (RB_TYPE_P(sub, T_REGEXP)) {
2926  }
2927  return Qnil;
2928  }
2929  }
2930  if (pos > len) pos = len;
2931  }
2932  else {
2933  pos = len;
2934  }
2935 
2936  if (SPECIAL_CONST_P(sub)) goto generic;
2937  switch (BUILTIN_TYPE(sub)) {
2938  case T_REGEXP:
2939  /* enc = rb_get_check(str, sub); */
2940  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2942 
2943  if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
2944  pos = rb_reg_search(sub, str, pos, 1);
2945  pos = rb_str_sublen(str, pos);
2946  }
2947  if (pos >= 0) return LONG2NUM(pos);
2948  break;
2949 
2950  generic:
2951  default: {
2952  VALUE tmp;
2953 
2954  tmp = rb_check_string_type(sub);
2955  if (NIL_P(tmp)) {
2956  rb_raise(rb_eTypeError, "type mismatch: %s given",
2958  }
2959  sub = tmp;
2960  }
2961  /* fall through */
2962  case T_STRING:
2963  pos = rb_str_rindex(str, sub, pos);
2964  if (pos >= 0) return LONG2NUM(pos);
2965  break;
2966  }
2967  return Qnil;
2968 }
2969 
2970 /*
2971  * call-seq:
2972  * str =~ obj -> fixnum or nil
2973  *
2974  * Match---If <i>obj</i> is a <code>Regexp</code>, use it as a pattern to match
2975  * against <i>str</i>,and returns the position the match starts, or
2976  * <code>nil</code> if there is no match. Otherwise, invokes
2977  * <i>obj.=~</i>, passing <i>str</i> as an argument. The default
2978  * <code>=~</code> in <code>Object</code> returns <code>nil</code>.
2979  *
2980  * Note: <code>str =~ regexp</code> is not the same as
2981  * <code>regexp =~ str</code>. Strings captured from named capture groups
2982  * are assigned to local variables only in the second case.
2983  *
2984  * "cat o' 9 tails" =~ /\d/ #=> 7
2985  * "cat o' 9 tails" =~ 9 #=> nil
2986  */
2987 
2988 static VALUE
2990 {
2991  if (SPECIAL_CONST_P(y)) goto generic;
2992  switch (BUILTIN_TYPE(y)) {
2993  case T_STRING:
2994  rb_raise(rb_eTypeError, "type mismatch: String given");
2995 
2996  case T_REGEXP:
2997  return rb_reg_match(y, x);
2998 
2999  generic:
3000  default:
3001  return rb_funcall(y, rb_intern("=~"), 1, x);
3002  }
3003 }
3004 
3005 
3006 static VALUE get_pat(VALUE, int);
3007 
3008 
3009 /*
3010  * call-seq:
3011  * str.match(pattern) -> matchdata or nil
3012  * str.match(pattern, pos) -> matchdata or nil
3013  *
3014  * Converts <i>pattern</i> to a <code>Regexp</code> (if it isn't already one),
3015  * then invokes its <code>match</code> method on <i>str</i>. If the second
3016  * parameter is present, it specifies the position in the string to begin the
3017  * search.
3018  *
3019  * 'hello'.match('(.)\1') #=> #<MatchData "ll" 1:"l">
3020  * 'hello'.match('(.)\1')[0] #=> "ll"
3021  * 'hello'.match(/(.)\1/)[0] #=> "ll"
3022  * 'hello'.match('xx') #=> nil
3023  *
3024  * If a block is given, invoke the block with MatchData if match succeed, so
3025  * that you can write
3026  *
3027  * str.match(pat) {|m| ...}
3028  *
3029  * instead of
3030  *
3031  * if m = str.match(pat)
3032  * ...
3033  * end
3034  *
3035  * The return value is a value from block execution in this case.
3036  */
3037 
3038 static VALUE
3040 {
3041  VALUE re, result;
3042  if (argc < 1)
3043  rb_check_arity(argc, 1, 2);
3044  re = argv[0];
3045  argv[0] = str;
3046  result = rb_funcall2(get_pat(re, 0), rb_intern("match"), argc, argv);
3047  if (!NIL_P(result) && rb_block_given_p()) {
3048  return rb_yield(result);
3049  }
3050  return result;
3051 }
3052 
3057 };
3058 
3059 static enum neighbor_char
3060 enc_succ_char(char *p, long len, rb_encoding *enc)
3061 {
3062  long i;
3063  int l;
3064 
3065  if (rb_enc_mbminlen(enc) > 1) {
3066  /* wchar, trivial case */
3067  int r = rb_enc_precise_mbclen(p, p + len, enc), c;
3068  if (!MBCLEN_CHARFOUND_P(r)) {
3069  return NEIGHBOR_NOT_CHAR;
3070  }
3071  c = rb_enc_mbc_to_codepoint(p, p + len, enc) + 1;
3072  l = rb_enc_code_to_mbclen(c, enc);
3073  if (!l) return NEIGHBOR_NOT_CHAR;
3074  if (l != len) return NEIGHBOR_WRAPPED;
3075  rb_enc_mbcput(c, p, enc);
3076  r = rb_enc_precise_mbclen(p, p + len, enc);
3077  if (!MBCLEN_CHARFOUND_P(r)) {
3078  return NEIGHBOR_NOT_CHAR;
3079  }
3080  return NEIGHBOR_FOUND;
3081  }
3082  while (1) {
3083  for (i = len-1; 0 <= i && (unsigned char)p[i] == 0xff; i--)
3084  p[i] = '\0';
3085  if (i < 0)
3086  return NEIGHBOR_WRAPPED;
3087  ++((unsigned char*)p)[i];
3088  l = rb_enc_precise_mbclen(p, p+len, enc);
3089  if (MBCLEN_CHARFOUND_P(l)) {
3090  l = MBCLEN_CHARFOUND_LEN(l);
3091  if (l == len) {
3092  return NEIGHBOR_FOUND;
3093  }
3094  else {
3095  memset(p+l, 0xff, len-l);
3096  }
3097  }
3098  if (MBCLEN_INVALID_P(l) && i < len-1) {
3099  long len2;
3100  int l2;
3101  for (len2 = len-1; 0 < len2; len2--) {
3102  l2 = rb_enc_precise_mbclen(p, p+len2, enc);
3103  if (!MBCLEN_INVALID_P(l2))
3104  break;
3105  }
3106  memset(p+len2+1, 0xff, len-(len2+1));
3107  }
3108  }
3109 }
3110 
3111 static enum neighbor_char
3112 enc_pred_char(char *p, long len, rb_encoding *enc)
3113 {
3114  long i;
3115  int l;
3116  if (rb_enc_mbminlen(enc) > 1) {
3117  /* wchar, trivial case */
3118  int r = rb_enc_precise_mbclen(p, p + len, enc), c;
3119  if (!MBCLEN_CHARFOUND_P(r)) {
3120  return NEIGHBOR_NOT_CHAR;
3121  }
3122  c = rb_enc_mbc_to_codepoint(p, p + len, enc);
3123  if (!c) return NEIGHBOR_NOT_CHAR;
3124  --c;
3125  l = rb_enc_code_to_mbclen(c, enc);
3126  if (!l) return NEIGHBOR_NOT_CHAR;
3127  if (l != len) return NEIGHBOR_WRAPPED;
3128  rb_enc_mbcput(c, p, enc);
3129  r = rb_enc_precise_mbclen(p, p + len, enc);
3130  if (!MBCLEN_CHARFOUND_P(r)) {
3131  return NEIGHBOR_NOT_CHAR;
3132  }
3133  return NEIGHBOR_FOUND;
3134  }
3135  while (1) {
3136  for (i = len-1; 0 <= i && (unsigned char)p[i] == 0; i--)
3137  p[i] = '\xff';
3138  if (i < 0)
3139  return NEIGHBOR_WRAPPED;
3140  --((unsigned char*)p)[i];
3141  l = rb_enc_precise_mbclen(p, p+len, enc);
3142  if (MBCLEN_CHARFOUND_P(l)) {
3143  l = MBCLEN_CHARFOUND_LEN(l);
3144  if (l == len) {
3145  return NEIGHBOR_FOUND;
3146  }
3147  else {
3148  memset(p+l, 0, len-l);
3149  }
3150  }
3151  if (MBCLEN_INVALID_P(l) && i < len-1) {
3152  long len2;
3153  int l2;
3154  for (len2 = len-1; 0 < len2; len2--) {
3155  l2 = rb_enc_precise_mbclen(p, p+len2, enc);
3156  if (!MBCLEN_INVALID_P(l2))
3157  break;
3158  }
3159  memset(p+len2+1, 0, len-(len2+1));
3160  }
3161  }
3162 }
3163 
3164 /*
3165  overwrite +p+ by succeeding letter in +enc+ and returns
3166  NEIGHBOR_FOUND or NEIGHBOR_WRAPPED.
3167  When NEIGHBOR_WRAPPED, carried-out letter is stored into carry.
3168  assuming each ranges are successive, and mbclen
3169  never change in each ranges.
3170  NEIGHBOR_NOT_CHAR is returned if invalid character or the range has only one
3171  character.
3172  */
3173 static enum neighbor_char
3174 enc_succ_alnum_char(char *p, long len, rb_encoding *enc, char *carry)
3175 {
3176  enum neighbor_char ret;
3177  unsigned int c;
3178  int ctype;
3179  int range;
3180  char save[ONIGENC_CODE_TO_MBC_MAXLEN];
3181 
3182  /* skip 03A2, invalid char between GREEK CAPITAL LETTERS */
3183  int try;
3184  const int max_gaps = 1;
3185 
3186  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
3187  if (rb_enc_isctype(c, ONIGENC_CTYPE_DIGIT, enc))
3188  ctype = ONIGENC_CTYPE_DIGIT;
3189  else if (rb_enc_isctype(c, ONIGENC_CTYPE_ALPHA, enc))
3190  ctype = ONIGENC_CTYPE_ALPHA;
3191  else
3192  return NEIGHBOR_NOT_CHAR;
3193 
3194  MEMCPY(save, p, char, len);
3195  for (try = 0; try <= max_gaps; ++try) {
3196  ret = enc_succ_char(p, len, enc);
3197  if (ret == NEIGHBOR_FOUND) {
3198  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
3199  if (rb_enc_isctype(c, ctype, enc))
3200  return NEIGHBOR_FOUND;
3201  }
3202  }
3203  MEMCPY(p, save, char, len);
3204  range = 1;
3205  while (1) {
3206  MEMCPY(save, p, char, len);
3207  ret = enc_pred_char(p, len, enc);
3208  if (ret == NEIGHBOR_FOUND) {
3209  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
3210  if (!rb_enc_isctype(c, ctype, enc)) {
3211  MEMCPY(p, save, char, len);
3212  break;
3213  }
3214  }
3215  else {
3216  MEMCPY(p, save, char, len);
3217  break;
3218  }
3219  range++;
3220  }
3221  if (range == 1) {
3222  return NEIGHBOR_NOT_CHAR;
3223  }
3224 
3225  if (ctype != ONIGENC_CTYPE_DIGIT) {
3226  MEMCPY(carry, p, char, len);
3227  return NEIGHBOR_WRAPPED;
3228  }
3229 
3230  MEMCPY(carry, p, char, len);
3231  enc_succ_char(carry, len, enc);
3232  return NEIGHBOR_WRAPPED;
3233 }
3234 
3235 
3236 /*
3237  * call-seq:
3238  * str.succ -> new_str
3239  * str.next -> new_str
3240  *
3241  * Returns the successor to <i>str</i>. The successor is calculated by
3242  * incrementing characters starting from the rightmost alphanumeric (or
3243  * the rightmost character if there are no alphanumerics) in the
3244  * string. Incrementing a digit always results in another digit, and
3245  * incrementing a letter results in another letter of the same case.
3246  * Incrementing nonalphanumerics uses the underlying character set's
3247  * collating sequence.
3248  *
3249  * If the increment generates a ``carry,'' the character to the left of
3250  * it is incremented. This process repeats until there is no carry,
3251  * adding an additional character if necessary.
3252  *
3253  * "abcd".succ #=> "abce"
3254  * "THX1138".succ #=> "THX1139"
3255  * "<<koala>>".succ #=> "<<koalb>>"
3256  * "1999zzz".succ #=> "2000aaa"
3257  * "ZZZ9999".succ #=> "AAAA0000"
3258  * "***".succ #=> "**+"
3259  */
3260 
3261 VALUE
3263 {
3264  rb_encoding *enc;
3265  VALUE str;
3266  char *sbeg, *s, *e, *last_alnum = 0;
3267  int c = -1;
3268  long l;
3269  char carry[ONIGENC_CODE_TO_MBC_MAXLEN] = "\1";
3270  long carry_pos = 0, carry_len = 1;
3271  enum neighbor_char neighbor = NEIGHBOR_FOUND;
3272 
3273  str = rb_str_new5(orig, RSTRING_PTR(orig), RSTRING_LEN(orig));
3274  rb_enc_cr_str_copy_for_substr(str, orig);
3275  OBJ_INFECT(str, orig);
3276  if (RSTRING_LEN(str) == 0) return str;
3277 
3278  enc = STR_ENC_GET(orig);
3279  sbeg = RSTRING_PTR(str);
3280  s = e = sbeg + RSTRING_LEN(str);
3281 
3282  while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
3283  if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
3284  if (ISALPHA(*last_alnum) ? ISDIGIT(*s) :
3285  ISDIGIT(*last_alnum) ? ISALPHA(*s) : 0) {
3286  s = last_alnum;
3287  break;
3288  }
3289  }
3290  l = rb_enc_precise_mbclen(s, e, enc);
3291  if (!ONIGENC_MBCLEN_CHARFOUND_P(l)) continue;
3293  neighbor = enc_succ_alnum_char(s, l, enc, carry);
3294  switch (neighbor) {
3295  case NEIGHBOR_NOT_CHAR:
3296  continue;
3297  case NEIGHBOR_FOUND:
3298  return str;
3299  case NEIGHBOR_WRAPPED:
3300  last_alnum = s;
3301  break;
3302  }
3303  c = 1;
3304  carry_pos = s - sbeg;
3305  carry_len = l;
3306  }
3307  if (c == -1) { /* str contains no alnum */
3308  s = e;
3309  while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
3310  enum neighbor_char neighbor;
3311  char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
3312  l = rb_enc_precise_mbclen(s, e, enc);
3313  if (!ONIGENC_MBCLEN_CHARFOUND_P(l)) continue;
3315  MEMCPY(tmp, s, char, l);
3316  neighbor = enc_succ_char(tmp, l, enc);
3317  switch (neighbor) {
3318  case NEIGHBOR_FOUND:
3319  MEMCPY(s, tmp, char, l);
3320  return str;
3321  break;
3322  case NEIGHBOR_WRAPPED:
3323  MEMCPY(s, tmp, char, l);
3324  break;
3325  case NEIGHBOR_NOT_CHAR:
3326  break;
3327  }
3328  if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
3329  /* wrapped to \0...\0. search next valid char. */
3330  enc_succ_char(s, l, enc);
3331  }
3332  if (!rb_enc_asciicompat(enc)) {
3333  MEMCPY(carry, s, char, l);
3334  carry_len = l;
3335  }
3336  carry_pos = s - sbeg;
3337  }
3338  }
3339  RESIZE_CAPA(str, RSTRING_LEN(str) + carry_len);
3340  s = RSTRING_PTR(str) + carry_pos;
3341  memmove(s + carry_len, s, RSTRING_LEN(str) - carry_pos);
3342  memmove(s, carry, carry_len);
3343  STR_SET_LEN(str, RSTRING_LEN(str) + carry_len);
3344  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
3345  rb_enc_str_coderange(str);
3346  return str;
3347 }
3348 
3349 
3350 /*
3351  * call-seq:
3352  * str.succ! -> str
3353  * str.next! -> str
3354  *
3355  * Equivalent to <code>String#succ</code>, but modifies the receiver in
3356  * place.
3357  */
3358 
3359 static VALUE
3361 {
3363 
3364  return str;
3365 }
3366 
3367 
3368 /*
3369  * call-seq:
3370  * str.upto(other_str, exclusive=false) {|s| block } -> str
3371  * str.upto(other_str, exclusive=false) -> an_enumerator
3372  *
3373  * Iterates through successive values, starting at <i>str</i> and
3374  * ending at <i>other_str</i> inclusive, passing each value in turn to
3375  * the block. The <code>String#succ</code> method is used to generate
3376  * each value. If optional second argument exclusive is omitted or is false,
3377  * the last value will be included; otherwise it will be excluded.
3378  *
3379  * If no block is given, an enumerator is returned instead.
3380  *
3381  * "a8".upto("b6") {|s| print s, ' ' }
3382  * for s in "a8".."b6"
3383  * print s, ' '
3384  * end
3385  *
3386  * <em>produces:</em>
3387  *
3388  * a8 a9 b0 b1 b2 b3 b4 b5 b6
3389  * a8 a9 b0 b1 b2 b3 b4 b5 b6
3390  *
3391  * If <i>str</i> and <i>other_str</i> contains only ascii numeric characters,
3392  * both are recognized as decimal numbers. In addition, the width of
3393  * string (e.g. leading zeros) is handled appropriately.
3394  *
3395  * "9".upto("11").to_a #=> ["9", "10", "11"]
3396  * "25".upto("5").to_a #=> []
3397  * "07".upto("11").to_a #=> ["07", "08", "09", "10", "11"]
3398  */
3399 
3400 static VALUE
3402 {
3403  VALUE end, exclusive;
3404  VALUE current, after_end;
3405  ID succ;
3406  int n, excl, ascii;
3407  rb_encoding *enc;
3408 
3409  rb_scan_args(argc, argv, "11", &end, &exclusive);
3410  RETURN_ENUMERATOR(beg, argc, argv);
3411  excl = RTEST(exclusive);
3412  CONST_ID(succ, "succ");
3413  StringValue(end);
3414  enc = rb_enc_check(beg, end);
3415  ascii = (is_ascii_string(beg) && is_ascii_string(end));
3416  /* single character */
3417  if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
3418  char c = RSTRING_PTR(beg)[0];
3419  char e = RSTRING_PTR(end)[0];
3420 
3421  if (c > e || (excl && c == e)) return beg;
3422  for (;;) {
3423  rb_yield(rb_enc_str_new(&c, 1, enc));
3424  if (!excl && c == e) break;
3425  c++;
3426  if (excl && c == e) break;
3427  }
3428  return beg;
3429  }
3430  /* both edges are all digits */
3431  if (ascii && ISDIGIT(RSTRING_PTR(beg)[0]) && ISDIGIT(RSTRING_PTR(end)[0])) {
3432  char *s, *send;
3433  VALUE b, e;
3434  int width;
3435 
3436  s = RSTRING_PTR(beg); send = RSTRING_END(beg);
3437  width = rb_long2int(send - s);
3438  while (s < send) {
3439  if (!ISDIGIT(*s)) goto no_digits;
3440  s++;
3441  }
3442  s = RSTRING_PTR(end); send = RSTRING_END(end);
3443  while (s < send) {
3444  if (!ISDIGIT(*s)) goto no_digits;
3445  s++;
3446  }
3447  b = rb_str_to_inum(beg, 10, FALSE);
3448  e = rb_str_to_inum(end, 10, FALSE);
3449  if (FIXNUM_P(b) && FIXNUM_P(e)) {
3450  long bi = FIX2LONG(b);
3451  long ei = FIX2LONG(e);
3452  rb_encoding *usascii = rb_usascii_encoding();
3453 
3454  while (bi <= ei) {
3455  if (excl && bi == ei) break;
3456  rb_yield(rb_enc_sprintf(usascii, "%.*ld", width, bi));
3457  bi++;
3458  }
3459  }
3460  else {
3461  ID op = excl ? '<' : rb_intern("<=");
3462  VALUE args[2], fmt = rb_obj_freeze(rb_usascii_str_new_cstr("%.*d"));
3463 
3464  args[0] = INT2FIX(width);
3465  while (rb_funcall(b, op, 1, e)) {
3466  args[1] = b;
3467  rb_yield(rb_str_format(numberof(args), args, fmt));
3468  b = rb_funcall(b, succ, 0, 0);
3469  }
3470  }
3471  return beg;
3472  }
3473  /* normal case */
3474  no_digits:
3475  n = rb_str_cmp(beg, end);
3476  if (n > 0 || (excl && n == 0)) return beg;
3477 
3478  after_end = rb_funcall(end, succ, 0, 0);
3479  current = rb_str_dup(beg);
3480  while (!rb_str_equal(current, after_end)) {
3481  VALUE next = Qnil;
3482  if (excl || !rb_str_equal(current, end))
3483  next = rb_funcall(current, succ, 0, 0);
3484  rb_yield(current);
3485  if (NIL_P(next)) break;
3486  current = next;
3487  StringValue(current);
3488  if (excl && rb_str_equal(current, end)) break;
3489  if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
3490  break;
3491  }
3492 
3493  return beg;
3494 }
3495 
3496 static VALUE
3497 rb_str_subpat(VALUE str, VALUE re, VALUE backref)
3498 {
3499  if (rb_reg_search(re, str, 0, 0) >= 0) {
3501  int nth = rb_reg_backref_number(match, backref);
3502  return rb_reg_nth_match(nth, match);
3503  }
3504  return Qnil;
3505 }
3506 
3507 static VALUE
3509 {
3510  long idx;
3511 
3512  if (FIXNUM_P(indx)) {
3513  idx = FIX2LONG(indx);
3514 
3515  num_index:
3516  str = rb_str_substr(str, idx, 1);
3517  if (!NIL_P(str) && RSTRING_LEN(str) == 0) return Qnil;
3518  return str;
3519  }
3520 
3521  if (SPECIAL_CONST_P(indx)) goto generic;
3522  switch (BUILTIN_TYPE(indx)) {
3523  case T_REGEXP:
3524  return rb_str_subpat(str, indx, INT2FIX(0));
3525 
3526  case T_STRING:
3527  if (rb_str_index(str, indx, 0) != -1)
3528  return rb_str_dup(indx);
3529  return Qnil;
3530 
3531  generic:
3532  default:
3533  /* check if indx is Range */
3534  {
3535  long beg, len;
3536  VALUE tmp;
3537 
3538  len = str_strlen(str, STR_ENC_GET(str));
3539  switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
3540  case Qfalse:
3541  break;
3542  case Qnil:
3543  return Qnil;
3544  default:
3545  tmp = rb_str_substr(str, beg, len);
3546  return tmp;
3547  }
3548  }
3549  idx = NUM2LONG(indx);
3550  goto num_index;
3551  }
3552 
3553  UNREACHABLE;
3554 }
3555 
3556 
3557 /*
3558  * call-seq:
3559  * str[index] -> new_str or nil
3560  * str[start, length] -> new_str or nil
3561  * str[range] -> new_str or nil
3562  * str[regexp] -> new_str or nil
3563  * str[regexp, capture] -> new_str or nil
3564  * str[match_str] -> new_str or nil
3565  * str.slice(index) -> new_str or nil
3566  * str.slice(start, length) -> new_str or nil
3567  * str.slice(range) -> new_str or nil
3568  * str.slice(regexp) -> new_str or nil
3569  * str.slice(regexp, capture) -> new_str or nil
3570  * str.slice(match_str) -> new_str or nil
3571  *
3572  * Element Reference --- If passed a single +index+, returns a substring of
3573  * one character at that index. If passed a +start+ index and a +length+,
3574  * returns a substring containing +length+ characters starting at the
3575  * +index+. If passed a +range+, its beginning and end are interpreted as
3576  * offsets delimiting the substring to be returned.
3577  *
3578  * In these three cases, if an index is negative, it is counted from the end
3579  * of the string. For the +start+ and +range+ cases the starting index
3580  * is just before a character and an index matching the string's size.
3581  * Additionally, an empty string is returned when the starting index for a
3582  * character range is at the end of the string.
3583  *
3584  * Returns +nil+ if the initial index falls outside the string or the length
3585  * is negative.
3586  *
3587  * If a +Regexp+ is supplied, the matching portion of the string is
3588  * returned. If a +capture+ follows the regular expression, which may be a
3589  * capture group index or name, follows the regular expression that component
3590  * of the MatchData is returned instead.
3591  *
3592  * If a +match_str+ is given, that string is returned if it occurs in
3593  * the string.
3594  *
3595  * Returns +nil+ if the regular expression does not match or the match string
3596  * cannot be found.
3597  *
3598  * a = "hello there"
3599  *
3600  * a[1] #=> "e"
3601  * a[2, 3] #=> "llo"
3602  * a[2..3] #=> "ll"
3603  *
3604  * a[-3, 2] #=> "er"
3605  * a[7..-2] #=> "her"
3606  * a[-4..-2] #=> "her"
3607  * a[-2..-4] #=> ""
3608  *
3609  * a[11, 0] #=> ""
3610  * a[11] #=> nil
3611  * a[12, 0] #=> nil
3612  * a[12..-1] #=> nil
3613  *
3614  * a[/[aeiou](.)\1/] #=> "ell"
3615  * a[/[aeiou](.)\1/, 0] #=> "ell"
3616  * a[/[aeiou](.)\1/, 1] #=> "l"
3617  * a[/[aeiou](.)\1/, 2] #=> nil
3618  *
3619  * a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "non_vowel"] #=> "l"
3620  * a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "vowel"] #=> "e"
3621  *
3622  * a["lo"] #=> "lo"
3623  * a["bye"] #=> nil
3624  */
3625 
3626 static VALUE
3628 {
3629  if (argc == 2) {
3630  if (RB_TYPE_P(argv[0], T_REGEXP)) {
3631  return rb_str_subpat(str, argv[0], argv[1]);
3632  }
3633  return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
3634  }
3635  rb_check_arity(argc, 1, 2);
3636  return rb_str_aref(str, argv[0]);
3637 }
3638 
3639 VALUE
3640 rb_str_drop_bytes(VALUE str, long len)
3641 {
3642  char *ptr = RSTRING_PTR(str);
3643  long olen = RSTRING_LEN(str), nlen;
3644 
3645  str_modifiable(str);
3646  if (len > olen) len = olen;
3647  nlen = olen - len;
3648  if (nlen <= RSTRING_EMBED_LEN_MAX) {
3649  char *oldptr = ptr;
3650  int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|ELTS_SHARED));
3651  STR_SET_EMBED(str);
3652  STR_SET_EMBED_LEN(str, nlen);
3653  ptr = RSTRING(str)->as.ary;
3654  memmove(ptr, oldptr + len, nlen);
3655  if (fl == STR_NOEMBED) xfree(oldptr);
3656  }
3657  else {
3658  if (!STR_SHARED_P(str)) rb_str_new4(str);
3659  ptr = RSTRING(str)->as.heap.ptr += len;
3660  RSTRING(str)->as.heap.len = nlen;
3661  }
3662  ptr[nlen] = 0;
3663  ENC_CODERANGE_CLEAR(str);
3664  return str;
3665 }
3666 
3667 static void
3668 rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
3669 {
3670  if (beg == 0 && RSTRING_LEN(val) == 0) {
3671  rb_str_drop_bytes(str, len);
3672  OBJ_INFECT(str, val);
3673  return;
3674  }
3675 
3676  rb_str_modify(str);
3677  if (len < RSTRING_LEN(val)) {
3678  /* expand string */
3679  RESIZE_CAPA(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len + TERM_LEN(str));
3680  }
3681 
3682  if (RSTRING_LEN(val) != len) {
3683  memmove(RSTRING_PTR(str) + beg + RSTRING_LEN(val),
3684  RSTRING_PTR(str) + beg + len,
3685  RSTRING_LEN(str) - (beg + len));
3686  }
3687  if (RSTRING_LEN(val) < beg && len < 0) {
3688  MEMZERO(RSTRING_PTR(str) + RSTRING_LEN(str), char, -len);
3689  }
3690  if (RSTRING_LEN(val) > 0) {
3692  }
3693  STR_SET_LEN(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len);
3694  if (RSTRING_PTR(str)) {
3695  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
3696  }
3697  OBJ_INFECT(str, val);
3698 }
3699 
3700 static void
3701 rb_str_splice(VALUE str, long beg, long len, VALUE val)
3702 {
3703  long slen;
3704  char *p, *e;
3705  rb_encoding *enc;
3706  int singlebyte = single_byte_optimizable(str);
3707  int cr;
3708 
3709  if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
3710 
3711  StringValue(val);
3712  enc = rb_enc_check(str, val);
3713  slen = str_strlen(str, enc);
3714 
3715  if (slen < beg) {
3716  out_of_range:
3717  rb_raise(rb_eIndexError, "index %ld out of string", beg);
3718  }
3719  if (beg < 0) {
3720  if (-beg > slen) {
3721  goto out_of_range;
3722  }
3723  beg += slen;
3724  }
3725  if (slen < len || slen < beg + len) {
3726  len = slen - beg;
3727  }
3728  str_modify_keep_cr(str);
3729  p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, singlebyte);
3730  if (!p) p = RSTRING_END(str);
3731  e = str_nth(p, RSTRING_END(str), len, enc, singlebyte);
3732  if (!e) e = RSTRING_END(str);
3733  /* error check */
3734  beg = p - RSTRING_PTR(str); /* physical position */
3735  len = e - p; /* physical length */
3736  rb_str_splice_0(str, beg, len, val);
3737  rb_enc_associate(str, enc);
3739  if (cr != ENC_CODERANGE_BROKEN)
3740  ENC_CODERANGE_SET(str, cr);
3741 }
3742 
3743 void
3744 rb_str_update(VALUE str, long beg, long len, VALUE val)
3745 {
3746  rb_str_splice(str, beg, len, val);
3747 }
3748 
3749 static void
3751 {
3752  int nth;
3753  VALUE match;
3754  long start, end, len;
3755  rb_encoding *enc;
3756  struct re_registers *regs;
3757 
3758  if (rb_reg_search(re, str, 0, 0) < 0) {
3759  rb_raise(rb_eIndexError, "regexp not matched");
3760  }
3761  match = rb_backref_get();
3762  nth = rb_reg_backref_number(match, backref);
3763  regs = RMATCH_REGS(match);
3764  if (nth >= regs->num_regs) {
3765  out_of_range:
3766  rb_raise(rb_eIndexError, "index %d out of regexp", nth);
3767  }
3768  if (nth < 0) {
3769  if (-nth >= regs->num_regs) {
3770  goto out_of_range;
3771  }
3772  nth += regs->num_regs;
3773  }
3774 
3775  start = BEG(nth);
3776  if (start == -1) {
3777  rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
3778  }
3779  end = END(nth);
3780  len = end - start;
3781  StringValue(val);
3782  enc = rb_enc_check(str, val);
3783  rb_str_splice_0(str, start, len, val);
3784  rb_enc_associate(str, enc);
3785 }
3786 
3787 static VALUE
3789 {
3790  long idx, beg;
3791 
3792  if (FIXNUM_P(indx)) {
3793  idx = FIX2LONG(indx);
3794  num_index:
3795  rb_str_splice(str, idx, 1, val);
3796  return val;
3797  }
3798 
3799  if (SPECIAL_CONST_P(indx)) goto generic;
3800  switch (TYPE(indx)) {
3801  case T_REGEXP:
3802  rb_str_subpat_set(str, indx, INT2FIX(0), val);
3803  return val;
3804 
3805  case T_STRING:
3806  beg = rb_str_index(str, indx, 0);
3807  if (beg < 0) {
3808  rb_raise(rb_eIndexError, "string not matched");
3809  }
3810  beg = rb_str_sublen(str, beg);
3811  rb_str_splice(str, beg, str_strlen(indx, 0), val);
3812  return val;
3813 
3814  generic:
3815  default:
3816  /* check if indx is Range */
3817  {
3818  long beg, len;
3819  if (rb_range_beg_len(indx, &beg, &len, str_strlen(str, 0), 2)) {
3820  rb_str_splice(str, beg, len, val);
3821  return val;
3822  }
3823  }
3824  idx = NUM2LONG(indx);
3825  goto num_index;
3826  }
3827 }
3828 
3829 /*
3830  * call-seq:
3831  * str[fixnum] = new_str
3832  * str[fixnum, fixnum] = new_str
3833  * str[range] = aString
3834  * str[regexp] = new_str
3835  * str[regexp, fixnum] = new_str
3836  * str[regexp, name] = new_str
3837  * str[other_str] = new_str
3838  *
3839  * Element Assignment---Replaces some or all of the content of <i>str</i>. The
3840  * portion of the string affected is determined using the same criteria as
3841  * <code>String#[]</code>. If the replacement string is not the same length as
3842  * the text it is replacing, the string will be adjusted accordingly. If the
3843  * regular expression or string is used as the index doesn't match a position
3844  * in the string, <code>IndexError</code> is raised. If the regular expression
3845  * form is used, the optional second <code>Fixnum</code> allows you to specify
3846  * which portion of the match to replace (effectively using the
3847  * <code>MatchData</code> indexing rules. The forms that take a
3848  * <code>Fixnum</code> will raise an <code>IndexError</code> if the value is
3849  * out of range; the <code>Range</code> form will raise a
3850  * <code>RangeError</code>, and the <code>Regexp</code> and <code>String</code>
3851  * will raise an <code>IndexError</code> on negative match.
3852  */
3853 
3854 static VALUE
3856 {
3857  if (argc == 3) {
3858  if (RB_TYPE_P(argv[0], T_REGEXP)) {
3859  rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
3860  }
3861  else {
3862  rb_str_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
3863  }
3864  return argv[2];
3865  }
3866  rb_check_arity(argc, 2, 3);
3867  return rb_str_aset(str, argv[0], argv[1]);
3868 }
3869 
3870 /*
3871  * call-seq:
3872  * str.insert(index, other_str) -> str
3873  *
3874  * Inserts <i>other_str</i> before the character at the given
3875  * <i>index</i>, modifying <i>str</i>. Negative indices count from the
3876  * end of the string, and insert <em>after</em> the given character.
3877  * The intent is insert <i>aString</i> so that it starts at the given
3878  * <i>index</i>.
3879  *
3880  * "abcd".insert(0, 'X') #=> "Xabcd"
3881  * "abcd".insert(3, 'X') #=> "abcXd"
3882  * "abcd".insert(4, 'X') #=> "abcdX"
3883  * "abcd".insert(-3, 'X') #=> "abXcd"
3884  * "abcd".insert(-1, 'X') #=> "abcdX"
3885  */
3886 
3887 static VALUE
3889 {
3890  long pos = NUM2LONG(idx);
3891 
3892  if (pos == -1) {
3893  return rb_str_append(str, str2);
3894  }
3895  else if (pos < 0) {
3896  pos++;
3897  }
3898  rb_str_splice(str, pos, 0, str2);
3899  return str;
3900 }
3901 
3902 
3903 /*
3904  * call-seq:
3905  * str.slice!(fixnum) -> fixnum or nil
3906  * str.slice!(fixnum, fixnum) -> new_str or nil
3907  * str.slice!(range) -> new_str or nil
3908  * str.slice!(regexp) -> new_str or nil
3909  * str.slice!(other_str) -> new_str or nil
3910  *
3911  * Deletes the specified portion from <i>str</i>, and returns the portion
3912  * deleted.
3913  *
3914  * string = "this is a string"
3915  * string.slice!(2) #=> "i"
3916  * string.slice!(3..6) #=> " is "
3917  * string.slice!(/s.*t/) #=> "sa st"
3918  * string.slice!("r") #=> "r"
3919  * string #=> "thing"
3920  */
3921 
3922 static VALUE
3924 {
3925  VALUE result;
3926  VALUE buf[3];
3927  int i;
3928 
3929  rb_check_arity(argc, 1, 2);
3930  for (i=0; i<argc; i++) {
3931  buf[i] = argv[i];
3932  }
3933  str_modify_keep_cr(str);
3934  result = rb_str_aref_m(argc, buf, str);
3935  if (!NIL_P(result)) {
3936  buf[i] = rb_str_new(0,0);
3937  rb_str_aset_m(argc+1, buf, str);
3938  }
3939  return result;
3940 }
3941 
3942 static VALUE
3943 get_pat(VALUE pat, int quote)
3944 {
3945  VALUE val;
3946 
3947  switch (TYPE(pat)) {
3948  case T_REGEXP:
3949  return pat;
3950 
3951  case T_STRING:
3952  break;
3953 
3954  default:
3955  val = rb_check_string_type(pat);
3956  if (NIL_P(val)) {
3957  Check_Type(pat, T_REGEXP);
3958  }
3959  pat = val;
3960  }
3961 
3962  if (quote) {
3963  pat = rb_reg_quote(pat);
3964  }
3965 
3966  return rb_reg_regcomp(pat);
3967 }
3968 
3969 
3970 /*
3971  * call-seq:
3972  * str.sub!(pattern, replacement) -> str or nil
3973  * str.sub!(pattern) {|match| block } -> str or nil
3974  *
3975  * Performs the same substitution as String#sub in-place.
3976  *
3977  * Returns +str+ if a substitution was performed or +nil+ if no substitution
3978  * was performed.
3979  */
3980 
3981 static VALUE
3983 {
3984  VALUE pat, repl, hash = Qnil;
3985  int iter = 0;
3986  int tainted = 0;
3987  long plen;
3988  int min_arity = rb_block_given_p() ? 1 : 2;
3989 
3990  rb_check_arity(argc, min_arity, 2);
3991  if (argc == 1) {
3992  iter = 1;
3993  }
3994  else {
3995  repl = argv[1];
3997  if (NIL_P(hash)) {
3998  StringValue(repl);
3999  }
4000  tainted = OBJ_TAINTED_RAW(repl);
4001  }
4002 
4003  pat = get_pat(argv[0], 1);
4004  str_modifiable(str);
4005  if (rb_reg_search(pat, str, 0, 0) >= 0) {
4006  rb_encoding *enc;
4007  int cr = ENC_CODERANGE(str);
4009  struct re_registers *regs = RMATCH_REGS(match);
4010  long beg0 = BEG(0);
4011  long end0 = END(0);
4012  char *p, *rp;
4013  long len, rlen;
4014 
4015  if (iter || !NIL_P(hash)) {
4016  p = RSTRING_PTR(str); len = RSTRING_LEN(str);
4017 
4018  if (iter) {
4020  }
4021  else {
4022  repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
4023  repl = rb_obj_as_string(repl);
4024  }
4025  str_mod_check(str, p, len);
4026  rb_check_frozen(str);
4027  }
4028  else {
4029  repl = rb_reg_regsub(repl, str, regs, pat);
4030  }
4031  enc = rb_enc_compatible(str, repl);
4032  if (!enc) {
4033  rb_encoding *str_enc = STR_ENC_GET(str);
4034  p = RSTRING_PTR(str); len = RSTRING_LEN(str);
4035  if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
4036  coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
4037  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
4038  rb_enc_name(str_enc),
4039  rb_enc_name(STR_ENC_GET(repl)));
4040  }
4041  enc = STR_ENC_GET(repl);
4042  }
4043  rb_str_modify(str);
4044  rb_enc_associate(str, enc);
4045  tainted |= OBJ_TAINTED_RAW(repl);
4046  if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
4047  int cr2 = ENC_CODERANGE(repl);
4048  if (cr2 == ENC_CODERANGE_BROKEN ||
4049  (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
4050  cr = ENC_CODERANGE_UNKNOWN;
4051  else
4052  cr = cr2;
4053  }
4054  plen = end0 - beg0;
4055  rp = RSTRING_PTR(repl); rlen = RSTRING_LEN(repl);
4056  len = RSTRING_LEN(str);
4057  if (rlen > plen) {
4058  RESIZE_CAPA(str, len + rlen - plen);
4059  }
4060  p = RSTRING_PTR(str);
4061  if (rlen != plen) {
4062  memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
4063  }
4064  memcpy(p + beg0, rp, rlen);
4065  len += rlen - plen;
4066  STR_SET_LEN(str, len);
4067  RSTRING_PTR(str)[len] = '\0';
4068  ENC_CODERANGE_SET(str, cr);
4069  FL_SET_RAW(str, tainted);
4070 
4071  return str;
4072  }
4073  return Qnil;
4074 }
4075 
4076 
4077 /*
4078  * call-seq:
4079  * str.sub(pattern, replacement) -> new_str
4080  * str.sub(pattern, hash) -> new_str
4081  * str.sub(pattern) {|match| block } -> new_str
4082  *
4083  * Returns a copy of +str+ with the _first_ occurrence of +pattern+
4084  * replaced by the second argument. The +pattern+ is typically a Regexp; if
4085  * given as a String, any regular expression metacharacters it contains will
4086  * be interpreted literally, e.g. <code>'\\\d'</code> will match a backlash
4087  * followed by 'd', instead of a digit.
4088  *
4089  * If +replacement+ is a String it will be substituted for the matched text.
4090  * It may contain back-references to the pattern's capture groups of the form
4091  * <code>"\\d"</code>, where <i>d</i> is a group number, or
4092  * <code>"\\k<n>"</code>, where <i>n</i> is a group name. If it is a
4093  * double-quoted string, both back-references must be preceded by an
4094  * additional backslash. However, within +replacement+ the special match
4095  * variables, such as <code>&$</code>, will not refer to the current match.
4096  * If +replacement+ is a String that looks like a pattern's capture group but
4097  * is actaully not a pattern capture group e.g. <code>"\\'"</code>, then it
4098  * will have to be preceded by two backslashes like so <code>"\\\\'"</code>.
4099  *
4100  * If the second argument is a Hash, and the matched text is one of its keys,
4101  * the corresponding value is the replacement string.
4102  *
4103  * In the block form, the current match string is passed in as a parameter,
4104  * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
4105  * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
4106  * returned by the block will be substituted for the match on each call.
4107  *
4108  * The result inherits any tainting in the original string or any supplied
4109  * replacement string.
4110  *
4111  * "hello".sub(/[aeiou]/, '*') #=> "h*llo"
4112  * "hello".sub(/([aeiou])/, '<\1>') #=> "h<e>llo"
4113  * "hello".sub(/./) {|s| s.ord.to_s + ' ' } #=> "104 ello"
4114  * "hello".sub(/(?<foo>[aeiou])/, '*\k<foo>*') #=> "h*e*llo"
4115  * 'Is SHELL your preferred shell?'.sub(/[[:upper:]]{2,}/, ENV)
4116  * #=> "Is /bin/bash your preferred shell?"
4117  */
4118 
4119 static VALUE
4121 {
4122  str = rb_str_dup(str);
4123  rb_str_sub_bang(argc, argv, str);
4124  return str;
4125 }
4126 
4127 static VALUE
4128 str_gsub(int argc, VALUE *argv, VALUE str, int bang)
4129 {
4130  VALUE pat, val, repl, match, dest, hash = Qnil;
4131  struct re_registers *regs;
4132  long beg, n;
4133  long beg0, end0;
4134  long offset, blen, slen, len, last;
4135  int iter = 0;
4136  char *sp, *cp;
4137  int tainted = 0;
4138  rb_encoding *str_enc;
4139 
4140  switch (argc) {
4141  case 1:
4142  RETURN_ENUMERATOR(str, argc, argv);
4143  iter = 1;
4144  break;
4145  case 2:
4146  repl = argv[1];
4148  if (NIL_P(hash)) {
4149  StringValue(repl);
4150  }
4151  tainted = OBJ_TAINTED_RAW(repl);
4152  break;
4153  default:
4154  rb_check_arity(argc, 1, 2);
4155  }
4156 
4157  pat = get_pat(argv[0], 1);
4158  beg = rb_reg_search(pat, str, 0, 0);
4159  if (beg < 0) {
4160  if (bang) return Qnil; /* no match, no substitution */
4161  return rb_str_dup(str);
4162  }
4163 
4164  offset = 0;
4165  n = 0;
4166  blen = RSTRING_LEN(str) + 30; /* len + margin */
4167  dest = rb_str_buf_new(blen);
4168  sp = RSTRING_PTR(str);
4169  slen = RSTRING_LEN(str);
4170  cp = sp;
4171  str_enc = STR_ENC_GET(str);
4172  rb_enc_associate(dest, str_enc);
4174 
4175  do {
4176  n++;
4177  match = rb_backref_get();
4178  regs = RMATCH_REGS(match);
4179  beg0 = BEG(0);
4180  end0 = END(0);
4181  if (iter || !NIL_P(hash)) {
4182  if (iter) {
4184  }
4185  else {
4186  val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
4188  }
4189  str_mod_check(str, sp, slen);
4190  if (val == dest) { /* paranoid check [ruby-dev:24827] */
4191  rb_raise(rb_eRuntimeError, "block should not cheat");
4192  }
4193  }
4194  else {
4195  val = rb_reg_regsub(repl, str, regs, pat);
4196  }
4197 
4198  tainted |= OBJ_TAINTED_RAW(val);
4199 
4200  len = beg0 - offset; /* copy pre-match substr */
4201  if (len) {
4202  rb_enc_str_buf_cat(dest, cp, len, str_enc);
4203  }
4204 
4205  rb_str_buf_append(dest, val);
4206 
4207  last = offset;
4208  offset = end0;
4209  if (beg0 == end0) {
4210  /*
4211  * Always consume at least one character of the input string
4212  * in order to prevent infinite loops.
4213  */
4214  if (RSTRING_LEN(str) <= end0) break;
4215  len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
4216  rb_enc_str_buf_cat(dest, RSTRING_PTR(str)+end0, len, str_enc);
4217  offset = end0 + len;
4218  }
4219  cp = RSTRING_PTR(str) + offset;
4220  if (offset > RSTRING_LEN(str)) break;
4221  beg = rb_reg_search(pat, str, offset, 0);
4222  } while (beg >= 0);
4223  if (RSTRING_LEN(str) > offset) {
4224  rb_enc_str_buf_cat(dest, cp, RSTRING_LEN(str) - offset, str_enc);
4225  }
4226  rb_reg_search(pat, str, last, 0);
4227  if (bang) {
4228  rb_str_shared_replace(str, dest);
4229  }
4230  else {
4231  RBASIC_SET_CLASS(dest, rb_obj_class(str));
4232  tainted |= OBJ_TAINTED_RAW(str);
4233  str = dest;
4234  }
4235 
4236  FL_SET_RAW(str, tainted);
4237  return str;
4238 }
4239 
4240 
4241 /*
4242  * call-seq:
4243  * str.gsub!(pattern, replacement) -> str or nil
4244  * str.gsub!(pattern) {|match| block } -> str or nil
4245  * str.gsub!(pattern) -> an_enumerator
4246  *
4247  * Performs the substitutions of <code>String#gsub</code> in place, returning
4248  * <i>str</i>, or <code>nil</code> if no substitutions were performed.
4249  * If no block and no <i>replacement</i> is given, an enumerator is returned instead.
4250  */
4251 
4252 static VALUE
4254 {
4255  str_modify_keep_cr(str);
4256  return str_gsub(argc, argv, str, 1);
4257 }
4258 
4259 
4260 /*
4261  * call-seq:
4262  * str.gsub(pattern, replacement) -> new_str
4263  * str.gsub(pattern, hash) -> new_str
4264  * str.gsub(pattern) {|match| block } -> new_str
4265  * str.gsub(pattern) -> enumerator
4266  *
4267  * Returns a copy of <i>str</i> with the <em>all</em> occurrences of
4268  * <i>pattern</i> substituted for the second argument. The <i>pattern</i> is
4269  * typically a <code>Regexp</code>; if given as a <code>String</code>, any
4270  * regular expression metacharacters it contains will be interpreted
4271  * literally, e.g. <code>'\\\d'</code> will match a backlash followed by 'd',
4272  * instead of a digit.
4273  *
4274  * If <i>replacement</i> is a <code>String</code> it will be substituted for
4275  * the matched text. It may contain back-references to the pattern's capture
4276  * groups of the form <code>\\\d</code>, where <i>d</i> is a group number, or
4277  * <code>\\\k<n></code>, where <i>n</i> is a group name. If it is a
4278  * double-quoted string, both back-references must be preceded by an
4279  * additional backslash. However, within <i>replacement</i> the special match
4280  * variables, such as <code>$&</code>, will not refer to the current match.
4281  *
4282  * If the second argument is a <code>Hash</code>, and the matched text is one
4283  * of its keys, the corresponding value is the replacement string.
4284  *
4285  * In the block form, the current match string is passed in as a parameter,
4286  * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
4287  * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
4288  * returned by the block will be substituted for the match on each call.
4289  *
4290  * The result inherits any tainting in the original string or any supplied
4291  * replacement string.
4292  *
4293  * When neither a block nor a second argument is supplied, an
4294  * <code>Enumerator</code> is returned.
4295  *
4296  * "hello".gsub(/[aeiou]/, '*') #=> "h*ll*"
4297  * "hello".gsub(/([aeiou])/, '<\1>') #=> "h<e>ll<o>"
4298  * "hello".gsub(/./) {|s| s.ord.to_s + ' '} #=> "104 101 108 108 111 "
4299  * "hello".gsub(/(?<foo>[aeiou])/, '{\k<foo>}') #=> "h{e}ll{o}"
4300  * 'hello'.gsub(/[eo]/, 'e' => 3, 'o' => '*') #=> "h3ll*"
4301  */
4302 
4303 static VALUE
4305 {
4306  return str_gsub(argc, argv, str, 0);
4307 }
4308 
4309 
4310 /*
4311  * call-seq:
4312  * str.replace(other_str) -> str
4313  *
4314  * Replaces the contents and taintedness of <i>str</i> with the corresponding
4315  * values in <i>other_str</i>.
4316  *
4317  * s = "hello" #=> "hello"
4318  * s.replace "world" #=> "world"
4319  */
4320 
4321 VALUE
4323 {
4324  str_modifiable(str);
4325  if (str == str2) return str;
4326 
4327  StringValue(str2);
4328  str_discard(str);
4329  return str_replace(str, str2);
4330 }
4331 
4332 /*
4333  * call-seq:
4334  * string.clear -> string
4335  *
4336  * Makes string empty.
4337  *
4338  * a = "abcde"
4339  * a.clear #=> ""
4340  */
4341 
4342 static VALUE
4344 {
4345  str_discard(str);
4346  STR_SET_EMBED(str);
4347  STR_SET_EMBED_LEN(str, 0);
4348  RSTRING_PTR(str)[0] = 0;
4349  if (rb_enc_asciicompat(STR_ENC_GET(str)))
4351  else
4353  return str;
4354 }
4355 
4356 /*
4357  * call-seq:
4358  * string.chr -> string
4359  *
4360  * Returns a one-character string at the beginning of the string.
4361  *
4362  * a = "abcde"
4363  * a.chr #=> "a"
4364  */
4365 
4366 static VALUE
4368 {
4369  return rb_str_substr(str, 0, 1);
4370 }
4371 
4372 /*
4373  * call-seq:
4374  * str.getbyte(index) -> 0 .. 255
4375  *
4376  * returns the <i>index</i>th byte as an integer.
4377  */
4378 static VALUE
4380 {
4381  long pos = NUM2LONG(index);
4382 
4383  if (pos < 0)
4384  pos += RSTRING_LEN(str);
4385  if (pos < 0 || RSTRING_LEN(str) <= pos)
4386  return Qnil;
4387 
4388  return INT2FIX((unsigned char)RSTRING_PTR(str)[pos]);
4389 }
4390 
4391 /*
4392  * call-seq:
4393  * str.setbyte(index, integer) -> integer
4394  *
4395  * modifies the <i>index</i>th byte as <i>integer</i>.
4396  */
4397 static VALUE
4398 rb_str_setbyte(VALUE str, VALUE index, VALUE value)
4399 {
4400  long pos = NUM2LONG(index);
4401  int byte = NUM2INT(value);
4402 
4403  rb_str_modify(str);
4404 
4405  if (pos < -RSTRING_LEN(str) || RSTRING_LEN(str) <= pos)
4406  rb_raise(rb_eIndexError, "index %ld out of string", pos);
4407  if (pos < 0)
4408  pos += RSTRING_LEN(str);
4409 
4410  RSTRING_PTR(str)[pos] = byte;
4411 
4412  return value;
4413 }
4414 
4415 static VALUE
4416 str_byte_substr(VALUE str, long beg, long len)
4417 {
4418  char *p, *s = RSTRING_PTR(str);
4419  long n = RSTRING_LEN(str);
4420  VALUE str2;
4421 
4422  if (beg > n || len < 0) return Qnil;
4423  if (beg < 0) {
4424  beg += n;
4425  if (beg < 0) return Qnil;
4426  }
4427  if (beg + len > n)
4428  len = n - beg;
4429  if (len <= 0) {
4430  len = 0;
4431  p = 0;
4432  }
4433  else
4434  p = s + beg;
4435 
4436  if (len > RSTRING_EMBED_LEN_MAX && beg + len == n) {
4437  str2 = rb_str_new4(str);
4438  str2 = str_new3(rb_obj_class(str2), str2);
4439  RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
4440  RSTRING(str2)->as.heap.len = len;
4441  }
4442  else {
4443  str2 = rb_str_new5(str, p, len);
4444  }
4445 
4446  str_enc_copy(str2, str);
4447 
4448  if (RSTRING_LEN(str2) == 0) {
4449  if (!rb_enc_asciicompat(STR_ENC_GET(str)))
4451  else
4453  }
4454  else {
4455  switch (ENC_CODERANGE(str)) {
4456  case ENC_CODERANGE_7BIT:
4458  break;
4459  default:
4461  break;
4462  }
4463  }
4464 
4465  OBJ_INFECT_RAW(str2, str);
4466 
4467  return str2;
4468 }
4469 
4470 static VALUE
4472 {
4473  long idx;
4474  switch (TYPE(indx)) {
4475  case T_FIXNUM:
4476  idx = FIX2LONG(indx);
4477 
4478  num_index:
4479  str = str_byte_substr(str, idx, 1);
4480  if (NIL_P(str) || RSTRING_LEN(str) == 0) return Qnil;
4481  return str;
4482 
4483  default:
4484  /* check if indx is Range */
4485  {
4486  long beg, len = RSTRING_LEN(str);
4487 
4488  switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
4489  case Qfalse:
4490  break;
4491  case Qnil:
4492  return Qnil;
4493  default:
4494  return str_byte_substr(str, beg, len);
4495  }
4496  }
4497  idx = NUM2LONG(indx);
4498  goto num_index;
4499  }
4500 
4501  UNREACHABLE;
4502 }
4503 
4504 /*
4505  * call-seq:
4506  * str.byteslice(fixnum) -> new_str or nil
4507  * str.byteslice(fixnum, fixnum) -> new_str or nil
4508  * str.byteslice(range) -> new_str or nil
4509  *
4510  * Byte Reference---If passed a single <code>Fixnum</code>, returns a
4511  * substring of one byte at that position. If passed two <code>Fixnum</code>
4512  * objects, returns a substring starting at the offset given by the first, and
4513  * a length given by the second. If given a <code>Range</code>, a substring containing
4514  * bytes at offsets given by the range is returned. In all three cases, if
4515  * an offset is negative, it is counted from the end of <i>str</i>. Returns
4516  * <code>nil</code> if the initial offset falls outside the string, the length
4517  * is negative, or the beginning of the range is greater than the end.
4518  * The encoding of the resulted string keeps original encoding.
4519  *
4520  * "hello".byteslice(1) #=> "e"
4521  * "hello".byteslice(-1) #=> "o"
4522  * "hello".byteslice(1, 2) #=> "el"
4523  * "\x80\u3042".byteslice(1, 3) #=> "\u3042"
4524  * "\x03\u3042\xff".byteslice(1..3) #=> "\u3042"
4525  */
4526 
4527 static VALUE
4529 {
4530  if (argc == 2) {
4531  return str_byte_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
4532  }
4533  rb_check_arity(argc, 1, 2);
4534  return str_byte_aref(str, argv[0]);
4535 }
4536 
4537 /*
4538  * call-seq:
4539  * str.reverse -> new_str
4540  *
4541  * Returns a new string with the characters from <i>str</i> in reverse order.
4542  *
4543  * "stressed".reverse #=> "desserts"
4544  */
4545 
4546 static VALUE
4548 {
4549  rb_encoding *enc;
4550  VALUE rev;
4551  char *s, *e, *p;
4552  int cr;
4553 
4554  if (RSTRING_LEN(str) <= 1) return rb_str_dup(str);
4555  enc = STR_ENC_GET(str);
4556  rev = rb_str_new5(str, 0, RSTRING_LEN(str));
4557  s = RSTRING_PTR(str); e = RSTRING_END(str);
4558  p = RSTRING_END(rev);
4559  cr = ENC_CODERANGE(str);
4560 
4561  if (RSTRING_LEN(str) > 1) {
4562  if (single_byte_optimizable(str)) {
4563  while (s < e) {
4564  *--p = *s++;
4565  }
4566  }
4567  else if (cr == ENC_CODERANGE_VALID) {
4568  while (s < e) {
4569  int clen = rb_enc_fast_mbclen(s, e, enc);
4570 
4571  p -= clen;
4572  memcpy(p, s, clen);
4573  s += clen;
4574  }
4575  }
4576  else {
4577  cr = rb_enc_asciicompat(enc) ?
4579  while (s < e) {
4580  int clen = rb_enc_mbclen(s, e, enc);
4581 
4582  if (clen > 1 || (*s & 0x80)) cr = ENC_CODERANGE_UNKNOWN;
4583  p -= clen;
4584  memcpy(p, s, clen);
4585  s += clen;
4586  }
4587  }
4588  }
4589  STR_SET_LEN(rev, RSTRING_LEN(str));
4590  OBJ_INFECT_RAW(rev, str);
4591  str_enc_copy(rev, str);
4592  ENC_CODERANGE_SET(rev, cr);
4593 
4594  return rev;
4595 }
4596 
4597 
4598 /*
4599  * call-seq:
4600  * str.reverse! -> str
4601  *
4602  * Reverses <i>str</i> in place.
4603  */
4604 
4605 static VALUE
4607 {
4608  if (RSTRING_LEN(str) > 1) {
4609  if (single_byte_optimizable(str)) {
4610  char *s, *e, c;
4611 
4612  str_modify_keep_cr(str);
4613  s = RSTRING_PTR(str);
4614  e = RSTRING_END(str) - 1;
4615  while (s < e) {
4616  c = *s;
4617  *s++ = *e;
4618  *e-- = c;
4619  }
4620  }
4621  else {
4623  }
4624  }
4625  else {
4626  str_modify_keep_cr(str);
4627  }
4628  return str;
4629 }
4630 
4631 
4632 /*
4633  * call-seq:
4634  * str.include? other_str -> true or false
4635  *
4636  * Returns <code>true</code> if <i>str</i> contains the given string or
4637  * character.
4638  *
4639  * "hello".include? "lo" #=> true
4640  * "hello".include? "ol" #=> false
4641  * "hello".include? ?h #=> true
4642  */
4643 
4644 static VALUE
4646 {
4647  long i;
4648 
4649  StringValue(arg);
4650  i = rb_str_index(str, arg, 0);
4651 
4652  if (i == -1) return Qfalse;
4653  return Qtrue;
4654 }
4655 
4656 
4657 /*
4658  * call-seq:
4659  * str.to_i(base=10) -> integer
4660  *
4661  * Returns the result of interpreting leading characters in <i>str</i> as an
4662  * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
4663  * end of a valid number are ignored. If there is not a valid number at the
4664  * start of <i>str</i>, <code>0</code> is returned. This method never raises an
4665  * exception when <i>base</i> is valid.
4666  *
4667  * "12345".to_i #=> 12345
4668  * "99 red balloons".to_i #=> 99
4669  * "0a".to_i #=> 0
4670  * "0a".to_i(16) #=> 10
4671  * "hello".to_i #=> 0
4672  * "1100101".to_i(2) #=> 101
4673  * "1100101".to_i(8) #=> 294977
4674  * "1100101".to_i(10) #=> 1100101
4675  * "1100101".to_i(16) #=> 17826049
4676  */
4677 
4678 static VALUE
4680 {
4681  int base;
4682 
4683  if (argc == 0) base = 10;
4684  else {
4685  VALUE b;
4686 
4687  rb_scan_args(argc, argv, "01", &b);
4688  base = NUM2INT(b);
4689  }
4690  if (base < 0) {
4691  rb_raise(rb_eArgError, "invalid radix %d", base);
4692  }
4693  return rb_str_to_inum(str, base, FALSE);
4694 }
4695 
4696 
4697 /*
4698  * call-seq:
4699  * str.to_f -> float
4700  *
4701  * Returns the result of interpreting leading characters in <i>str</i> as a
4702  * floating point number. Extraneous characters past the end of a valid number
4703  * are ignored. If there is not a valid number at the start of <i>str</i>,
4704  * <code>0.0</code> is returned. This method never raises an exception.
4705  *
4706  * "123.45e1".to_f #=> 1234.5
4707  * "45.67 degrees".to_f #=> 45.67
4708  * "thx1138".to_f #=> 0.0
4709  */
4710 
4711 static VALUE
4713 {
4714  return DBL2NUM(rb_str_to_dbl(str, FALSE));
4715 }
4716 
4717 
4718 /*
4719  * call-seq:
4720  * str.to_s -> str
4721  * str.to_str -> str
4722  *
4723  * Returns the receiver.
4724  */
4725 
4726 static VALUE
4728 {
4729  if (rb_obj_class(str) != rb_cString) {
4730  return str_duplicate(rb_cString, str);
4731  }
4732  return str;
4733 }
4734 
4735 #if 0
4736 static void
4737 str_cat_char(VALUE str, unsigned int c, rb_encoding *enc)
4738 {
4739  char s[RUBY_MAX_CHAR_LEN];
4740  int n = rb_enc_codelen(c, enc);
4741 
4742  rb_enc_mbcput(c, s, enc);
4743  rb_enc_str_buf_cat(str, s, n, enc);
4744 }
4745 #endif
4746 
4747 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
4748 
4749 int
4750 rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
4751 {
4752  char buf[CHAR_ESC_LEN + 1];
4753  int l;
4754 
4755 #if SIZEOF_INT > 4
4756  c &= 0xffffffff;
4757 #endif
4758  if (unicode_p) {
4759  if (c < 0x7F && ISPRINT(c)) {
4760  snprintf(buf, CHAR_ESC_LEN, "%c", c);
4761  }
4762  else if (c < 0x10000) {
4763  snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c);
4764  }
4765  else {
4766  snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c);
4767  }
4768  }
4769  else {
4770  if (c < 0x100) {
4771  snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c);
4772  }
4773  else {
4774  snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c);
4775  }
4776  }
4777  l = (int)strlen(buf); /* CHAR_ESC_LEN cannot exceed INT_MAX */
4778  rb_str_buf_cat(result, buf, l);
4779  return l;
4780 }
4781 
4782 /*
4783  * call-seq:
4784  * str.inspect -> string
4785  *
4786  * Returns a printable version of _str_, surrounded by quote marks,
4787  * with special characters escaped.
4788  *
4789  * str = "hello"
4790  * str[3] = "\b"
4791  * str.inspect #=> "\"hel\\bo\""
4792  */
4793 
4794 VALUE
4796 {
4797  int encidx = ENCODING_GET(str);
4798  rb_encoding *enc = rb_enc_from_index(encidx), *actenc;
4799  const char *p, *pend, *prev;
4800  char buf[CHAR_ESC_LEN + 1];
4803  int unicode_p = rb_enc_unicode_p(enc);
4804  int asciicompat = rb_enc_asciicompat(enc);
4805 
4806  if (resenc == NULL) resenc = rb_default_external_encoding();
4807  if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
4808  rb_enc_associate(result, resenc);
4809  str_buf_cat2(result, "\"");
4810 
4811  p = RSTRING_PTR(str); pend = RSTRING_END(str);
4812  prev = p;
4813  actenc = get_actual_encoding(encidx, str);
4814  if (actenc != enc) {
4815  enc = actenc;
4816  if (unicode_p) unicode_p = rb_enc_unicode_p(enc);
4817  }
4818  while (p < pend) {
4819  unsigned int c, cc;
4820  int n;
4821 
4822  n = rb_enc_precise_mbclen(p, pend, enc);
4823  if (!MBCLEN_CHARFOUND_P(n)) {
4824  if (p > prev) str_buf_cat(result, prev, p - prev);
4825  n = rb_enc_mbminlen(enc);
4826  if (pend < p + n)
4827  n = (int)(pend - p);
4828  while (n--) {
4829  snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
4831  prev = ++p;
4832  }
4833  continue;
4834  }
4835  n = MBCLEN_CHARFOUND_LEN(n);
4836  c = rb_enc_mbc_to_codepoint(p, pend, enc);
4837  p += n;
4838  if ((asciicompat || unicode_p) &&
4839  (c == '"'|| c == '\\' ||
4840  (c == '#' &&
4841  p < pend &&
4843  (cc = rb_enc_codepoint(p,pend,enc),
4844  (cc == '$' || cc == '@' || cc == '{'))))) {
4845  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4846  str_buf_cat2(result, "\\");
4847  if (asciicompat || enc == resenc) {
4848  prev = p - n;
4849  continue;
4850  }
4851  }
4852  switch (c) {
4853  case '\n': cc = 'n'; break;
4854  case '\r': cc = 'r'; break;
4855  case '\t': cc = 't'; break;
4856  case '\f': cc = 'f'; break;
4857  case '\013': cc = 'v'; break;
4858  case '\010': cc = 'b'; break;
4859  case '\007': cc = 'a'; break;
4860  case 033: cc = 'e'; break;
4861  default: cc = 0; break;
4862  }
4863  if (cc) {
4864  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4865  buf[0] = '\\';
4866  buf[1] = (char)cc;
4867  str_buf_cat(result, buf, 2);
4868  prev = p;
4869  continue;
4870  }
4871  if ((enc == resenc && rb_enc_isprint(c, enc)) ||
4872  (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
4873  continue;
4874  }
4875  else {
4876  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4877  rb_str_buf_cat_escaped_char(result, c, unicode_p);
4878  prev = p;
4879  continue;
4880  }
4881  }
4882  if (p > prev) str_buf_cat(result, prev, p - prev);
4883  str_buf_cat2(result, "\"");
4884 
4885  OBJ_INFECT_RAW(result, str);
4886  return result;
4887 }
4888 
4889 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
4890 
4891 /*
4892  * call-seq:
4893  * str.dump -> new_str
4894  *
4895  * Produces a version of +str+ with all non-printing characters replaced by
4896  * <code>\nnn</code> notation and all special characters escaped.
4897  *
4898  * "hello \n ''".dump #=> "\"hello \\n ''\"
4899  */
4900 
4901 VALUE
4903 {
4904  rb_encoding *enc = rb_enc_get(str);
4905  long len;
4906  const char *p, *pend;
4907  char *q, *qend;
4908  VALUE result;
4909  int u8 = (enc == rb_utf8_encoding());
4910 
4911  len = 2; /* "" */
4912  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
4913  while (p < pend) {
4914  unsigned char c = *p++;
4915  switch (c) {
4916  case '"': case '\\':
4917  case '\n': case '\r':
4918  case '\t': case '\f':
4919  case '\013': case '\010': case '\007': case '\033':
4920  len += 2;
4921  break;
4922 
4923  case '#':
4924  len += IS_EVSTR(p, pend) ? 2 : 1;
4925  break;
4926 
4927  default:
4928  if (ISPRINT(c)) {
4929  len++;
4930  }
4931  else {
4932  if (u8 && c > 0x7F) { /* \u{NN} */
4933  int n = rb_enc_precise_mbclen(p-1, pend, enc);
4934  if (MBCLEN_CHARFOUND_P(n)) {
4935  unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
4936  while (cc >>= 4) len++;
4937  len += 5;
4938  p += MBCLEN_CHARFOUND_LEN(n)-1;
4939  break;
4940  }
4941  }
4942  len += 4; /* \xNN */
4943  }
4944  break;
4945  }
4946  }
4947  if (!rb_enc_asciicompat(enc)) {
4948  len += 19; /* ".force_encoding('')" */
4949  len += strlen(enc->name);
4950  }
4951 
4952  result = rb_str_new5(str, 0, len);
4953  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
4954  q = RSTRING_PTR(result); qend = q + len + 1;
4955 
4956  *q++ = '"';
4957  while (p < pend) {
4958  unsigned char c = *p++;
4959 
4960  if (c == '"' || c == '\\') {
4961  *q++ = '\\';
4962  *q++ = c;
4963  }
4964  else if (c == '#') {
4965  if (IS_EVSTR(p, pend)) *q++ = '\\';
4966  *q++ = '#';
4967  }
4968  else if (c == '\n') {
4969  *q++ = '\\';
4970  *q++ = 'n';
4971  }
4972  else if (c == '\r') {
4973  *q++ = '\\';
4974  *q++ = 'r';
4975  }
4976  else if (c == '\t') {
4977  *q++ = '\\';
4978  *q++ = 't';
4979  }
4980  else if (c == '\f') {
4981  *q++ = '\\';
4982  *q++ = 'f';
4983  }
4984  else if (c == '\013') {
4985  *q++ = '\\';
4986  *q++ = 'v';
4987  }
4988  else if (c == '\010') {
4989  *q++ = '\\';
4990  *q++ = 'b';
4991  }
4992  else if (c == '\007') {
4993  *q++ = '\\';
4994  *q++ = 'a';
4995  }
4996  else if (c == '\033') {
4997  *q++ = '\\';
4998  *q++ = 'e';
4999  }
5000  else if (ISPRINT(c)) {
5001  *q++ = c;
5002  }
5003  else {
5004  *q++ = '\\';
5005  if (u8) {
5006  int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
5007  if (MBCLEN_CHARFOUND_P(n)) {
5008  int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
5009  p += n;
5010  snprintf(q, qend-q, "u{%x}", cc);
5011  q += strlen(q);
5012  continue;
5013  }
5014  }
5015  snprintf(q, qend-q, "x%02X", c);
5016  q += 3;
5017  }
5018  }
5019  *q++ = '"';
5020  *q = '\0';
5021  if (!rb_enc_asciicompat(enc)) {
5022  snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name);
5023  enc = rb_ascii8bit_encoding();
5024  }
5025  OBJ_INFECT_RAW(result, str);
5026  /* result from dump is ASCII */
5027  rb_enc_associate(result, enc);
5029  return result;
5030 }
5031 
5032 
5033 static void
5035 {
5036  if (rb_enc_dummy_p(enc)) {
5037  rb_raise(rb_eEncCompatError, "incompatible encoding with this operation: %s",
5038  rb_enc_name(enc));
5039  }
5040 }
5041 
5042 /*
5043  * call-seq:
5044  * str.upcase! -> str or nil
5045  *
5046  * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
5047  * were made.
5048  * Note: case replacement is effective only in ASCII region.
5049  */
5050 
5051 static VALUE
5053 {
5054  rb_encoding *enc;
5055  char *s, *send;
5056  int modify = 0;
5057  int n;
5058 
5059  str_modify_keep_cr(str);
5060  enc = STR_ENC_GET(str);
5062  s = RSTRING_PTR(str); send = RSTRING_END(str);
5063  if (single_byte_optimizable(str)) {
5064  while (s < send) {
5065  unsigned int c = *(unsigned char*)s;
5066 
5067  if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
5068  *s = 'A' + (c - 'a');
5069  modify = 1;
5070  }
5071  s++;
5072  }
5073  }
5074  else {
5075  int ascompat = rb_enc_asciicompat(enc);
5076 
5077  while (s < send) {
5078  unsigned int c;
5079 
5080  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5081  if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
5082  *s = 'A' + (c - 'a');
5083  modify = 1;
5084  }
5085  s++;
5086  }
5087  else {
5088  c = rb_enc_codepoint_len(s, send, &n, enc);
5089  if (rb_enc_islower(c, enc)) {
5090  /* assuming toupper returns codepoint with same size */
5091  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
5092  modify = 1;
5093  }
5094  s += n;
5095  }
5096  }
5097  }
5098 
5099  if (modify) return str;
5100  return Qnil;
5101 }
5102 
5103 
5104 /*
5105  * call-seq:
5106  * str.upcase -> new_str
5107  *
5108  * Returns a copy of <i>str</i> with all lowercase letters replaced with their
5109  * uppercase counterparts. The operation is locale insensitive---only
5110  * characters ``a'' to ``z'' are affected.
5111  * Note: case replacement is effective only in ASCII region.
5112  *
5113  * "hEllO".upcase #=> "HELLO"
5114  */
5115 
5116 static VALUE
5118 {
5119  str = rb_str_dup(str);
5120  rb_str_upcase_bang(str);
5121  return str;
5122 }
5123 
5124 
5125 /*
5126  * call-seq:
5127  * str.downcase! -> str or nil
5128  *
5129  * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
5130  * changes were made.
5131  * Note: case replacement is effective only in ASCII region.
5132  */
5133 
5134 static VALUE
5136 {
5137  rb_encoding *enc;
5138  char *s, *send;
5139  int modify = 0;
5140 
5141  str_modify_keep_cr(str);
5142  enc = STR_ENC_GET(str);
5144  s = RSTRING_PTR(str); send = RSTRING_END(str);
5145  if (single_byte_optimizable(str)) {
5146  while (s < send) {
5147  unsigned int c = *(unsigned char*)s;
5148 
5149  if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
5150  *s = 'a' + (c - 'A');
5151  modify = 1;
5152  }
5153  s++;
5154  }
5155  }
5156  else {
5157  int ascompat = rb_enc_asciicompat(enc);
5158 
5159  while (s < send) {
5160  unsigned int c;
5161  int n;
5162 
5163  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5164  if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
5165  *s = 'a' + (c - 'A');
5166  modify = 1;
5167  }
5168  s++;
5169  }
5170  else {
5171  c = rb_enc_codepoint_len(s, send, &n, enc);
5172  if (rb_enc_isupper(c, enc)) {
5173  /* assuming toupper returns codepoint with same size */
5174  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
5175  modify = 1;
5176  }
5177  s += n;
5178  }
5179  }
5180  }
5181 
5182  if (modify) return str;
5183  return Qnil;
5184 }
5185 
5186 
5187 /*
5188  * call-seq:
5189  * str.downcase -> new_str
5190  *
5191  * Returns a copy of <i>str</i> with all uppercase letters replaced with their
5192  * lowercase counterparts. The operation is locale insensitive---only
5193  * characters ``A'' to ``Z'' are affected.
5194  * Note: case replacement is effective only in ASCII region.
5195  *
5196  * "hEllO".downcase #=> "hello"
5197  */
5198 
5199 static VALUE
5201 {
5202  str = rb_str_dup(str);
5203  rb_str_downcase_bang(str);
5204  return str;
5205 }
5206 
5207 
5208 /*
5209  * call-seq:
5210  * str.capitalize! -> str or nil
5211  *
5212  * Modifies <i>str</i> by converting the first character to uppercase and the
5213  * remainder to lowercase. Returns <code>nil</code> if no changes are made.
5214  * Note: case conversion is effective only in ASCII region.
5215  *
5216  * a = "hello"
5217  * a.capitalize! #=> "Hello"
5218  * a #=> "Hello"
5219  * a.capitalize! #=> nil
5220  */
5221 
5222 static VALUE
5224 {
5225  rb_encoding *enc;
5226  char *s, *send;
5227  int modify = 0;
5228  unsigned int c;
5229  int n;
5230 
5231  str_modify_keep_cr(str);
5232  enc = STR_ENC_GET(str);
5234  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5235  s = RSTRING_PTR(str); send = RSTRING_END(str);
5236 
5237  c = rb_enc_codepoint_len(s, send, &n, enc);
5238  if (rb_enc_islower(c, enc)) {
5239  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
5240  modify = 1;
5241  }
5242  s += n;
5243  while (s < send) {
5244  c = rb_enc_codepoint_len(s, send, &n, enc);
5245  if (rb_enc_isupper(c, enc)) {
5246  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
5247  modify = 1;
5248  }
5249  s += n;
5250  }
5251 
5252  if (modify) return str;
5253  return Qnil;
5254 }
5255 
5256 
5257 /*
5258  * call-seq:
5259  * str.capitalize -> new_str
5260  *
5261  * Returns a copy of <i>str</i> with the first character converted to uppercase
5262  * and the remainder to lowercase.
5263  * Note: case conversion is effective only in ASCII region.
5264  *
5265  * "hello".capitalize #=> "Hello"
5266  * "HELLO".capitalize #=> "Hello"
5267  * "123ABC".capitalize #=> "123abc"
5268  */
5269 
5270 static VALUE
5272 {
5273  str = rb_str_dup(str);
5275  return str;
5276 }
5277 
5278 
5279 /*
5280  * call-seq:
5281  * str.swapcase! -> str or nil
5282  *
5283  * Equivalent to <code>String#swapcase</code>, but modifies the receiver in
5284  * place, returning <i>str</i>, or <code>nil</code> if no changes were made.
5285  * Note: case conversion is effective only in ASCII region.
5286  */
5287 
5288 static VALUE
5290 {
5291  rb_encoding *enc;
5292  char *s, *send;
5293  int modify = 0;
5294  int n;
5295 
5296  str_modify_keep_cr(str);
5297  enc = STR_ENC_GET(str);
5299  s = RSTRING_PTR(str); send = RSTRING_END(str);
5300  while (s < send) {
5301  unsigned int c = rb_enc_codepoint_len(s, send, &n, enc);
5302 
5303  if (rb_enc_isupper(c, enc)) {
5304  /* assuming toupper returns codepoint with same size */
5305  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
5306  modify = 1;
5307  }
5308  else if (rb_enc_islower(c, enc)) {
5309  /* assuming tolower returns codepoint with same size */
5310  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
5311  modify = 1;
5312  }
5313  s += n;
5314  }
5315 
5316  if (modify) return str;
5317  return Qnil;
5318 }
5319 
5320 
5321 /*
5322  * call-seq:
5323  * str.swapcase -> new_str
5324  *
5325  * Returns a copy of <i>str</i> with uppercase alphabetic characters converted
5326  * to lowercase and lowercase characters converted to uppercase.
5327  * Note: case conversion is effective only in ASCII region.
5328  *
5329  * "Hello".swapcase #=> "hELLO"
5330  * "cYbEr_PuNk11".swapcase #=> "CyBeR_pUnK11"
5331  */
5332 
5333 static VALUE
5335 {
5336  str = rb_str_dup(str);
5337  rb_str_swapcase_bang(str);
5338  return str;
5339 }
5340 
5341 typedef unsigned char *USTR;
5342 
5343 struct tr {
5344  int gen;
5345  unsigned int now, max;
5346  char *p, *pend;
5347 };
5348 
5349 static unsigned int
5350 trnext(struct tr *t, rb_encoding *enc)
5351 {
5352  int n;
5353 
5354  for (;;) {
5355  if (!t->gen) {
5356 nextpart:
5357  if (t->p == t->pend) return -1;
5358  if (rb_enc_ascget(t->p, t->pend, &n, enc) == '\\' && t->p + n < t->pend) {
5359  t->p += n;
5360  }
5361  t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
5362  t->p += n;
5363  if (rb_enc_ascget(t->p, t->pend, &n, enc) == '-' && t->p + n < t->pend) {
5364  t->p += n;
5365  if (t->p < t->pend) {
5366  unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
5367  t->p += n;
5368  if (t->now > c) {
5369  if (t->now < 0x80 && c < 0x80) {
5371  "invalid range \"%c-%c\" in string transliteration",
5372  t->now, c);
5373  }
5374  else {
5375  rb_raise(rb_eArgError, "invalid range in string transliteration");
5376  }
5377  continue; /* not reached */
5378  }
5379  t->gen = 1;
5380  t->max = c;
5381  }
5382  }
5383  return t->now;
5384  }
5385  else {
5386  while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
5387  if (t->now == t->max) {
5388  t->gen = 0;
5389  goto nextpart;
5390  }
5391  }
5392  if (t->now < t->max) {
5393  return t->now;
5394  }
5395  else {
5396  t->gen = 0;
5397  return t->max;
5398  }
5399  }
5400  }
5401 }
5402 
5403 static VALUE rb_str_delete_bang(int,VALUE*,VALUE);
5404 
5405 static VALUE
5406 tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
5407 {
5408  const unsigned int errc = -1;
5409  unsigned int trans[256];
5410  rb_encoding *enc, *e1, *e2;
5411  struct tr trsrc, trrepl;
5412  int cflag = 0;
5413  unsigned int c, c0, last = 0;
5414  int modify = 0, i, l;
5415  char *s, *send;
5416  VALUE hash = 0;
5417  int singlebyte = single_byte_optimizable(str);
5418  int cr;
5419 
5420 #define CHECK_IF_ASCII(c) \
5421  (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
5422  (cr = ENC_CODERANGE_VALID) : 0)
5423 
5424  StringValue(src);
5425  StringValue(repl);
5426  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5427  if (RSTRING_LEN(repl) == 0) {
5428  return rb_str_delete_bang(1, &src, str);
5429  }
5430 
5431  cr = ENC_CODERANGE(str);
5432  e1 = rb_enc_check(str, src);
5433  e2 = rb_enc_check(str, repl);
5434  if (e1 == e2) {
5435  enc = e1;
5436  }
5437  else {
5438  enc = rb_enc_check(src, repl);
5439  }
5440  trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
5441  if (RSTRING_LEN(src) > 1 &&
5442  rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) == '^' &&
5443  trsrc.p + l < trsrc.pend) {
5444  cflag = 1;
5445  trsrc.p += l;
5446  }
5447  trrepl.p = RSTRING_PTR(repl);
5448  trrepl.pend = trrepl.p + RSTRING_LEN(repl);
5449  trsrc.gen = trrepl.gen = 0;
5450  trsrc.now = trrepl.now = 0;
5451  trsrc.max = trrepl.max = 0;
5452 
5453  if (cflag) {
5454  for (i=0; i<256; i++) {
5455  trans[i] = 1;
5456  }
5457  while ((c = trnext(&trsrc, enc)) != errc) {
5458  if (c < 256) {
5459  trans[c] = errc;
5460  }
5461  else {
5462  if (!hash) hash = rb_hash_new();
5464  }
5465  }
5466  while ((c = trnext(&trrepl, enc)) != errc)
5467  /* retrieve last replacer */;
5468  last = trrepl.now;
5469  for (i=0; i<256; i++) {
5470  if (trans[i] != errc) {
5471  trans[i] = last;
5472  }
5473  }
5474  }
5475  else {
5476  unsigned int r;
5477 
5478  for (i=0; i<256; i++) {
5479  trans[i] = errc;
5480  }
5481  while ((c = trnext(&trsrc, enc)) != errc) {
5482  r = trnext(&trrepl, enc);
5483  if (r == errc) r = trrepl.now;
5484  if (c < 256) {
5485  trans[c] = r;
5486  if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
5487  }
5488  else {
5489  if (!hash) hash = rb_hash_new();
5491  }
5492  }
5493  }
5494 
5495  if (cr == ENC_CODERANGE_VALID)
5496  cr = ENC_CODERANGE_7BIT;
5497  str_modify_keep_cr(str);
5498  s = RSTRING_PTR(str); send = RSTRING_END(str);
5499  if (sflag) {
5500  int clen, tlen;
5501  long offset, max = RSTRING_LEN(str);
5502  unsigned int save = -1;
5503  char *buf = ALLOC_N(char, max), *t = buf;
5504 
5505  while (s < send) {
5506  int may_modify = 0;
5507 
5508  c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
5509  tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
5510 
5511  s += clen;
5512  if (c < 256) {
5513  c = trans[c];
5514  }
5515  else if (hash) {
5516  VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c));
5517  if (NIL_P(tmp)) {
5518  if (cflag) c = last;
5519  else c = errc;
5520  }
5521  else if (cflag) c = errc;
5522  else c = NUM2INT(tmp);
5523  }
5524  else {
5525  c = errc;
5526  }
5527  if (c != (unsigned int)-1) {
5528  if (save == c) {
5529  CHECK_IF_ASCII(c);
5530  continue;
5531  }
5532  save = c;
5533  tlen = rb_enc_codelen(c, enc);
5534  modify = 1;
5535  }
5536  else {
5537  save = -1;
5538  c = c0;
5539  if (enc != e1) may_modify = 1;
5540  }
5541  while (t - buf + tlen >= max) {
5542  offset = t - buf;
5543  max *= 2;
5544  REALLOC_N(buf, char, max);
5545  t = buf + offset;
5546  }
5547  rb_enc_mbcput(c, t, enc);
5548  if (may_modify && memcmp(s, t, tlen) != 0) {
5549  modify = 1;
5550  }
5551  CHECK_IF_ASCII(c);
5552  t += tlen;
5553  }
5554  if (!STR_EMBED_P(str)) {
5556  }
5557  *t = '\0';
5558  RSTRING(str)->as.heap.ptr = buf;
5559  RSTRING(str)->as.heap.len = t - buf;
5560  STR_SET_NOEMBED(str);
5561  RSTRING(str)->as.heap.aux.capa = max;
5562  }
5563  else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) {
5564  while (s < send) {
5565  c = (unsigned char)*s;
5566  if (trans[c] != errc) {
5567  if (!cflag) {
5568  c = trans[c];
5569  *s = c;
5570  modify = 1;
5571  }
5572  else {
5573  *s = last;
5574  modify = 1;
5575  }
5576  }
5577  CHECK_IF_ASCII(c);
5578  s++;
5579  }
5580  }
5581  else {
5582  int clen, tlen, max = (int)(RSTRING_LEN(str) * 1.2);
5583  long offset;
5584  char *buf = ALLOC_N(char, max), *t = buf;
5585 
5586  while (s < send) {
5587  int may_modify = 0;
5588  c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
5589  tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
5590 
5591  if (c < 256) {
5592  c = trans[c];
5593  }
5594  else if (hash) {
5595  VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c));
5596  if (NIL_P(tmp)) {
5597  if (cflag) c = last;
5598  else c = errc;
5599  }
5600  else if (cflag) c = errc;
5601  else c = NUM2INT(tmp);
5602  }
5603  else {
5604  c = cflag ? last : errc;
5605  }
5606  if (c != errc) {
5607  tlen = rb_enc_codelen(c, enc);
5608  modify = 1;
5609  }
5610  else {
5611  c = c0;
5612  if (enc != e1) may_modify = 1;
5613  }
5614  while (t - buf + tlen >= max) {
5615  offset = t - buf;
5616  max *= 2;
5617  REALLOC_N(buf, char, max);
5618  t = buf + offset;
5619  }
5620  if (s != t) {
5621  rb_enc_mbcput(c, t, enc);
5622  if (may_modify && memcmp(s, t, tlen) != 0) {
5623  modify = 1;
5624  }
5625  }
5626  CHECK_IF_ASCII(c);
5627  s += clen;
5628  t += tlen;
5629  }
5630  if (!STR_EMBED_P(str)) {
5632  }
5633  *t = '\0';
5634  RSTRING(str)->as.heap.ptr = buf;
5635  RSTRING(str)->as.heap.len = t - buf;
5636  STR_SET_NOEMBED(str);
5637  RSTRING(str)->as.heap.aux.capa = max;
5638  }
5639 
5640  if (modify) {
5641  if (cr != ENC_CODERANGE_BROKEN)
5642  ENC_CODERANGE_SET(str, cr);
5643  rb_enc_associate(str, enc);
5644  return str;
5645  }
5646  return Qnil;
5647 }
5648 
5649 
5650 /*
5651  * call-seq:
5652  * str.tr!(from_str, to_str) -> str or nil
5653  *
5654  * Translates <i>str</i> in place, using the same rules as
5655  * <code>String#tr</code>. Returns <i>str</i>, or <code>nil</code> if no
5656  * changes were made.
5657  */
5658 
5659 static VALUE
5661 {
5662  return tr_trans(str, src, repl, 0);
5663 }
5664 
5665 
5666 /*
5667  * call-seq:
5668  * str.tr(from_str, to_str) => new_str
5669  *
5670  * Returns a copy of +str+ with the characters in +from_str+ replaced by the
5671  * corresponding characters in +to_str+. If +to_str+ is shorter than
5672  * +from_str+, it is padded with its last character in order to maintain the
5673  * correspondence.
5674  *
5675  * "hello".tr('el', 'ip') #=> "hippo"
5676  * "hello".tr('aeiou', '*') #=> "h*ll*"
5677  * "hello".tr('aeiou', 'AA*') #=> "hAll*"
5678  *
5679  * Both strings may use the <code>c1-c2</code> notation to denote ranges of
5680  * characters, and +from_str+ may start with a <code>^</code>, which denotes
5681  * all characters except those listed.
5682  *
5683  * "hello".tr('a-y', 'b-z') #=> "ifmmp"
5684  * "hello".tr('^aeiou', '*') #=> "*e**o"
5685  *
5686  * The backslash character <code></code> can be used to escape
5687  * <code>^</code> or <code>-</code> and is otherwise ignored unless it
5688  * appears at the end of a range or the end of the +from_str+ or +to_str+:
5689  *
5690  * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld"
5691  * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld"
5692  *
5693  * "hello\r\nworld".tr("\r", "") #=> "hello\nworld"
5694  * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold"
5695  * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld"
5696  *
5697  * "X['\\b']".tr("X\\", "") #=> "['b']"
5698  * "X['\\b']".tr("X-\\]", "") #=> "'b'"
5699  */
5700 
5701 static VALUE
5702 rb_str_tr(VALUE str, VALUE src, VALUE repl)
5703 {
5704  str = rb_str_dup(str);
5705  tr_trans(str, src, repl, 0);
5706  return str;
5707 }
5708 
5709 #define TR_TABLE_SIZE 257
5710 static void
5711 tr_setup_table(VALUE str, char stable[TR_TABLE_SIZE], int first,
5712  VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
5713 {
5714  const unsigned int errc = -1;
5715  char buf[256];
5716  struct tr tr;
5717  unsigned int c;
5718  VALUE table = 0, ptable = 0;
5719  int i, l, cflag = 0;
5720 
5721  tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str);
5722  tr.gen = tr.now = tr.max = 0;
5723 
5724  if (RSTRING_LEN(str) > 1 && rb_enc_ascget(tr.p, tr.pend, &l, enc) == '^') {
5725  cflag = 1;
5726  tr.p += l;
5727  }
5728  if (first) {
5729  for (i=0; i<256; i++) {
5730  stable[i] = 1;
5731  }
5732  stable[256] = cflag;
5733  }
5734  else if (stable[256] && !cflag) {
5735  stable[256] = 0;
5736  }
5737  for (i=0; i<256; i++) {
5738  buf[i] = cflag;
5739  }
5740 
5741  while ((c = trnext(&tr, enc)) != errc) {
5742  if (c < 256) {
5743  buf[c & 0xff] = !cflag;
5744  }
5745  else {
5746  VALUE key = UINT2NUM(c);
5747 
5748  if (!table && (first || *tablep || stable[256])) {
5749  if (cflag) {
5750  ptable = *ctablep;
5751  table = ptable ? ptable : rb_hash_new();
5752  *ctablep = table;
5753  }
5754  else {
5755  table = rb_hash_new();
5756  ptable = *tablep;
5757  *tablep = table;
5758  }
5759  }
5760  if (table && (!ptable || (cflag ^ !NIL_P(rb_hash_aref(ptable, key))))) {
5761  rb_hash_aset(table, key, Qtrue);
5762  }
5763  }
5764  }
5765  for (i=0; i<256; i++) {
5766  stable[i] = stable[i] && buf[i];
5767  }
5768  if (!table && !cflag) {
5769  *tablep = 0;
5770  }
5771 }
5772 
5773 
5774 static int
5775 tr_find(unsigned int c, const char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
5776 {
5777  if (c < 256) {
5778  return table[c] != 0;
5779  }
5780  else {
5781  VALUE v = UINT2NUM(c);
5782 
5783  if (del) {
5784  if (!NIL_P(rb_hash_lookup(del, v)) &&
5785  (!nodel || NIL_P(rb_hash_lookup(nodel, v)))) {
5786  return TRUE;
5787  }
5788  }
5789  else if (nodel && !NIL_P(rb_hash_lookup(nodel, v))) {
5790  return FALSE;
5791  }
5792  return table[256] ? TRUE : FALSE;
5793  }
5794 }
5795 
5796 /*
5797  * call-seq:
5798  * str.delete!([other_str]+) -> str or nil
5799  *
5800  * Performs a <code>delete</code> operation in place, returning <i>str</i>, or
5801  * <code>nil</code> if <i>str</i> was not modified.
5802  */
5803 
5804 static VALUE
5806 {
5807  char squeez[TR_TABLE_SIZE];
5808  rb_encoding *enc = 0;
5809  char *s, *send, *t;
5810  VALUE del = 0, nodel = 0;
5811  int modify = 0;
5812  int i, ascompat, cr;
5813 
5814  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5816  for (i=0; i<argc; i++) {
5817  VALUE s = argv[i];
5818 
5819  StringValue(s);
5820  enc = rb_enc_check(str, s);
5821  tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
5822  }
5823 
5824  str_modify_keep_cr(str);
5825  ascompat = rb_enc_asciicompat(enc);
5826  s = t = RSTRING_PTR(str);
5827  send = RSTRING_END(str);
5828  cr = ascompat ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
5829  while (s < send) {
5830  unsigned int c;
5831  int clen;
5832 
5833  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5834  if (squeez[c]) {
5835  modify = 1;
5836  }
5837  else {
5838  if (t != s) *t = c;
5839  t++;
5840  }
5841  s++;
5842  }
5843  else {
5844  c = rb_enc_codepoint_len(s, send, &clen, enc);
5845 
5846  if (tr_find(c, squeez, del, nodel)) {
5847  modify = 1;
5848  }
5849  else {
5850  if (t != s) rb_enc_mbcput(c, t, enc);
5851  t += clen;
5852  if (cr == ENC_CODERANGE_7BIT) cr = ENC_CODERANGE_VALID;
5853  }
5854  s += clen;
5855  }
5856  }
5857  *t = '\0';
5858  STR_SET_LEN(str, t - RSTRING_PTR(str));
5859  ENC_CODERANGE_SET(str, cr);
5860 
5861  if (modify) return str;
5862  return Qnil;
5863 }
5864 
5865 
5866 /*
5867  * call-seq:
5868  * str.delete([other_str]+) -> new_str
5869  *
5870  * Returns a copy of <i>str</i> with all characters in the intersection of its
5871  * arguments deleted. Uses the same rules for building the set of characters as
5872  * <code>String#count</code>.
5873  *
5874  * "hello".delete "l","lo" #=> "heo"
5875  * "hello".delete "lo" #=> "he"
5876  * "hello".delete "aeiou", "^e" #=> "hell"
5877  * "hello".delete "ej-m" #=> "ho"
5878  */
5879 
5880 static VALUE
5882 {
5883  str = rb_str_dup(str);
5884  rb_str_delete_bang(argc, argv, str);
5885  return str;
5886 }
5887 
5888 
5889 /*
5890  * call-seq:
5891  * str.squeeze!([other_str]*) -> str or nil
5892  *
5893  * Squeezes <i>str</i> in place, returning either <i>str</i>, or
5894  * <code>nil</code> if no changes were made.
5895  */
5896 
5897 static VALUE
5899 {
5900  char squeez[TR_TABLE_SIZE];
5901  rb_encoding *enc = 0;
5902  VALUE del = 0, nodel = 0;
5903  char *s, *send, *t;
5904  int i, modify = 0;
5905  int ascompat, singlebyte = single_byte_optimizable(str);
5906  unsigned int save;
5907 
5908  if (argc == 0) {
5909  enc = STR_ENC_GET(str);
5910  }
5911  else {
5912  for (i=0; i<argc; i++) {
5913  VALUE s = argv[i];
5914 
5915  StringValue(s);
5916  enc = rb_enc_check(str, s);
5917  if (singlebyte && !single_byte_optimizable(s))
5918  singlebyte = 0;
5919  tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
5920  }
5921  }
5922 
5923  str_modify_keep_cr(str);
5924  s = t = RSTRING_PTR(str);
5925  if (!s || RSTRING_LEN(str) == 0) return Qnil;
5926  send = RSTRING_END(str);
5927  save = -1;
5928  ascompat = rb_enc_asciicompat(enc);
5929 
5930  if (singlebyte) {
5931  while (s < send) {
5932  unsigned int c = *(unsigned char*)s++;
5933  if (c != save || (argc > 0 && !squeez[c])) {
5934  *t++ = save = c;
5935  }
5936  }
5937  } else {
5938  while (s < send) {
5939  unsigned int c;
5940  int clen;
5941 
5942  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5943  if (c != save || (argc > 0 && !squeez[c])) {
5944  *t++ = save = c;
5945  }
5946  s++;
5947  }
5948  else {
5949  c = rb_enc_codepoint_len(s, send, &clen, enc);
5950 
5951  if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
5952  if (t != s) rb_enc_mbcput(c, t, enc);
5953  save = c;
5954  t += clen;
5955  }
5956  s += clen;
5957  }
5958  }
5959  }
5960 
5961  *t = '\0';
5962  if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
5963  STR_SET_LEN(str, t - RSTRING_PTR(str));
5964  modify = 1;
5965  }
5966 
5967  if (modify) return str;
5968  return Qnil;
5969 }
5970 
5971 
5972 /*
5973  * call-seq:
5974  * str.squeeze([other_str]*) -> new_str
5975  *
5976  * Builds a set of characters from the <i>other_str</i> parameter(s) using the
5977  * procedure described for <code>String#count</code>. Returns a new string
5978  * where runs of the same character that occur in this set are replaced by a
5979  * single character. If no arguments are given, all runs of identical
5980  * characters are replaced by a single character.
5981  *
5982  * "yellow moon".squeeze #=> "yelow mon"
5983  * " now is the".squeeze(" ") #=> " now is the"
5984  * "putters shoot balls".squeeze("m-z") #=> "puters shot balls"
5985  */
5986 
5987 static VALUE
5989 {
5990  str = rb_str_dup(str);
5991  rb_str_squeeze_bang(argc, argv, str);
5992  return str;
5993 }
5994 
5995 
5996 /*
5997  * call-seq:
5998  * str.tr_s!(from_str, to_str) -> str or nil
5999  *
6000  * Performs <code>String#tr_s</code> processing on <i>str</i> in place,
6001  * returning <i>str</i>, or <code>nil</code> if no changes were made.
6002  */
6003 
6004 static VALUE
6006 {
6007  return tr_trans(str, src, repl, 1);
6008 }
6009 
6010 
6011 /*
6012  * call-seq:
6013  * str.tr_s(from_str, to_str) -> new_str
6014  *
6015  * Processes a copy of <i>str</i> as described under <code>String#tr</code>,
6016  * then removes duplicate characters in regions that were affected by the
6017  * translation.
6018  *
6019  * "hello".tr_s('l', 'r') #=> "hero"
6020  * "hello".tr_s('el', '*') #=> "h*o"
6021  * "hello".tr_s('el', 'hx') #=> "hhxo"
6022  */
6023 
6024 static VALUE
6025 rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
6026 {
6027  str = rb_str_dup(str);
6028  tr_trans(str, src, repl, 1);
6029  return str;
6030 }
6031 
6032 
6033 /*
6034  * call-seq:
6035  * str.count([other_str]+) -> fixnum
6036  *
6037  * Each +other_str+ parameter defines a set of characters to count. The
6038  * intersection of these sets defines the characters to count in +str+. Any
6039  * +other_str+ that starts with a caret <code>^</code> is negated. The
6040  * sequence <code>c1-c2</code> means all characters between c1 and c2. The
6041  * backslash character <code></code> can be used to escape <code>^</code> or
6042  * <code>-</code> and is otherwise ignored unless it appears at the end of a
6043  * sequence or the end of a +other_str+.
6044  *
6045  * a = "hello world"
6046  * a.count "lo" #=> 5
6047  * a.count "lo", "o" #=> 2
6048  * a.count "hello", "^l" #=> 4
6049  * a.count "ej-m" #=> 4
6050  *
6051  * "hello^world".count "\\^aeiou" #=> 4
6052  * "hello-world".count "a\\-eo" #=> 4
6053  *
6054  * c = "hello world\\r\\n"
6055  * c.count "\\" #=> 2
6056  * c.count "\\A" #=> 0
6057  * c.count "X-\\w" #=> 3
6058  */
6059 
6060 static VALUE
6062 {
6063  char table[TR_TABLE_SIZE];
6064  rb_encoding *enc = 0;
6065  VALUE del = 0, nodel = 0, tstr;
6066  char *s, *send;
6067  int i;
6068  int ascompat;
6069 
6071 
6072  tstr = argv[0];
6073  StringValue(tstr);
6074  enc = rb_enc_check(str, tstr);
6075  if (argc == 1) {
6076  const char *ptstr;
6077  if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
6078  (ptstr = RSTRING_PTR(tstr),
6079  ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (const unsigned char *)ptstr, (const unsigned char *)ptstr+1)) &&
6080  !is_broken_string(str)) {
6081  int n = 0;
6082  int clen;
6083  unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
6084 
6085  s = RSTRING_PTR(str);
6086  if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
6087  send = RSTRING_END(str);
6088  while (s < send) {
6089  if (*(unsigned char*)s++ == c) n++;
6090  }
6091  return INT2NUM(n);
6092  }
6093  }
6094 
6095  tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
6096  for (i=1; i<argc; i++) {
6097  tstr = argv[i];
6098  StringValue(tstr);
6099  enc = rb_enc_check(str, tstr);
6100  tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
6101  }
6102 
6103  s = RSTRING_PTR(str);
6104  if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
6105  send = RSTRING_END(str);
6106  ascompat = rb_enc_asciicompat(enc);
6107  i = 0;
6108  while (s < send) {
6109  unsigned int c;
6110 
6111  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
6112  if (table[c]) {
6113  i++;
6114  }
6115  s++;
6116  }
6117  else {
6118  int clen;
6119  c = rb_enc_codepoint_len(s, send, &clen, enc);
6120  if (tr_find(c, table, del, nodel)) {
6121  i++;
6122  }
6123  s += clen;
6124  }
6125  }
6126 
6127  return INT2NUM(i);
6128 }
6129 
6130 static const char isspacetable[256] = {
6131  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
6132  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6133  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6134  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6135  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6136  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6137  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6138  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6139  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6140  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6141  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6142  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6143  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6144  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6145  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6146  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6147 };
6148 
6149 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
6150 
6151 /*
6152  * call-seq:
6153  * str.split(pattern=$;, [limit]) -> anArray
6154  *
6155  * Divides <i>str</i> into substrings based on a delimiter, returning an array
6156  * of these substrings.
6157  *
6158  * If <i>pattern</i> is a <code>String</code>, then its contents are used as
6159  * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
6160  * space, <i>str</i> is split on whitespace, with leading whitespace and runs
6161  * of contiguous whitespace characters ignored.
6162  *
6163  * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
6164  * pattern matches. Whenever the pattern matches a zero-length string,
6165  * <i>str</i> is split into individual characters. If <i>pattern</i> contains
6166  * groups, the respective matches will be returned in the array as well.
6167  *
6168  * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
6169  * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
6170  * split on whitespace as if ` ' were specified.
6171  *
6172  * If the <i>limit</i> parameter is omitted, trailing null fields are
6173  * suppressed. If <i>limit</i> is a positive number, at most that number of
6174  * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
6175  * string is returned as the only entry in an array). If negative, there is no
6176  * limit to the number of fields returned, and trailing null fields are not
6177  * suppressed.
6178  *
6179  * When the input +str+ is empty an empty Array is returned as the string is
6180  * considered to have no fields to split.
6181  *
6182  * " now's the time".split #=> ["now's", "the", "time"]
6183  * " now's the time".split(' ') #=> ["now's", "the", "time"]
6184  * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
6185  * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
6186  * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
6187  * "hello".split(//, 3) #=> ["h", "e", "llo"]
6188  * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
6189  *
6190  * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
6191  * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
6192  * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
6193  * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
6194  *
6195  * "".split(',', -1) #=> []
6196  */
6197 
6198 static VALUE
6200 {
6201  rb_encoding *enc;
6202  VALUE spat;
6203  VALUE limit;
6204  enum {awk, string, regexp} split_type;
6205  long beg, end, i = 0;
6206  int lim = 0;
6207  VALUE result, tmp;
6208 
6209  if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
6210  lim = NUM2INT(limit);
6211  if (lim <= 0) limit = Qnil;
6212  else if (lim == 1) {
6213  if (RSTRING_LEN(str) == 0)
6214  return rb_ary_new2(0);
6215  return rb_ary_new3(1, str);
6216  }
6217  i = 1;
6218  }
6219 
6220  enc = STR_ENC_GET(str);
6221  if (NIL_P(spat) && NIL_P(spat = rb_fs)) {
6222  split_type = awk;
6223  }
6224  else {
6225  if (RB_TYPE_P(spat, T_STRING)) {
6226  rb_encoding *enc2 = STR_ENC_GET(spat);
6227 
6228  split_type = string;
6229  if (RSTRING_LEN(spat) == 0) {
6230  /* Special case - split into chars */
6231  spat = rb_reg_regcomp(spat);
6232  split_type = regexp;
6233  }
6234  else if (rb_enc_asciicompat(enc2) == 1) {
6235  if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
6236  split_type = awk;
6237  }
6238  }
6239  else {
6240  int l;
6241  if (rb_enc_ascget(RSTRING_PTR(spat), RSTRING_END(spat), &l, enc2) == ' ' &&
6242  RSTRING_LEN(spat) == l) {
6243  split_type = awk;
6244  }
6245  }
6246  }
6247  else {
6248  spat = get_pat(spat, 1);
6249  split_type = regexp;
6250  }
6251  }
6252 
6253  result = rb_ary_new();
6254  beg = 0;
6255  if (split_type == awk) {
6256  char *ptr = RSTRING_PTR(str);
6257  char *eptr = RSTRING_END(str);
6258  char *bptr = ptr;
6259  int skip = 1;
6260  unsigned int c;
6261 
6262  end = beg;
6263  if (is_ascii_string(str)) {
6264  while (ptr < eptr) {
6265  c = (unsigned char)*ptr++;
6266  if (skip) {
6267  if (ascii_isspace(c)) {
6268  beg = ptr - bptr;
6269  }
6270  else {
6271  end = ptr - bptr;
6272  skip = 0;
6273  if (!NIL_P(limit) && lim <= i) break;
6274  }
6275  }
6276  else if (ascii_isspace(c)) {
6277  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6278  skip = 1;
6279  beg = ptr - bptr;
6280  if (!NIL_P(limit)) ++i;
6281  }
6282  else {
6283  end = ptr - bptr;
6284  }
6285  }
6286  }
6287  else {
6288  while (ptr < eptr) {
6289  int n;
6290 
6291  c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
6292  ptr += n;
6293  if (skip) {
6294  if (rb_isspace(c)) {
6295  beg = ptr - bptr;
6296  }
6297  else {
6298  end = ptr - bptr;
6299  skip = 0;
6300  if (!NIL_P(limit) && lim <= i) break;
6301  }
6302  }
6303  else if (rb_isspace(c)) {
6304  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6305  skip = 1;
6306  beg = ptr - bptr;
6307  if (!NIL_P(limit)) ++i;
6308  }
6309  else {
6310  end = ptr - bptr;
6311  }
6312  }
6313  }
6314  }
6315  else if (split_type == string) {
6316  char *ptr = RSTRING_PTR(str);
6317  char *temp = ptr;
6318  char *eptr = RSTRING_END(str);
6319  char *sptr = RSTRING_PTR(spat);
6320  long slen = RSTRING_LEN(spat);
6321 
6322  if (is_broken_string(str)) {
6323  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
6324  }
6325  if (is_broken_string(spat)) {
6326  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(spat)));
6327  }
6328  enc = rb_enc_check(str, spat);
6329  while (ptr < eptr &&
6330  (end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
6331  /* Check we are at the start of a char */
6332  char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
6333  if (t != ptr + end) {
6334  ptr = t;
6335  continue;
6336  }
6337  rb_ary_push(result, rb_str_subseq(str, ptr - temp, end));
6338  ptr += end + slen;
6339  if (!NIL_P(limit) && lim <= ++i) break;
6340  }
6341  beg = ptr - temp;
6342  }
6343  else {
6344  char *ptr = RSTRING_PTR(str);
6345  long len = RSTRING_LEN(str);
6346  long start = beg;
6347  long idx;
6348  int last_null = 0;
6349  struct re_registers *regs;
6350 
6351  while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
6352  regs = RMATCH_REGS(rb_backref_get());
6353  if (start == end && BEG(0) == END(0)) {
6354  if (!ptr) {
6356  break;
6357  }
6358  else if (last_null == 1) {
6359  rb_ary_push(result, rb_str_subseq(str, beg,
6360  rb_enc_fast_mbclen(ptr+beg,
6361  ptr+len,
6362  enc)));
6363  beg = start;
6364  }
6365  else {
6366  if (ptr+start == ptr+len)
6367  start++;
6368  else
6369  start += rb_enc_fast_mbclen(ptr+start,ptr+len,enc);
6370  last_null = 1;
6371  continue;
6372  }
6373  }
6374  else {
6375  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6376  beg = start = END(0);
6377  }
6378  last_null = 0;
6379 
6380  for (idx=1; idx < regs->num_regs; idx++) {
6381  if (BEG(idx) == -1) continue;
6382  if (BEG(idx) == END(idx))
6383  tmp = str_new_empty(str);
6384  else
6385  tmp = rb_str_subseq(str, BEG(idx), END(idx)-BEG(idx));
6386  rb_ary_push(result, tmp);
6387  }
6388  if (!NIL_P(limit) && lim <= ++i) break;
6389  }
6390  }
6391  if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
6392  if (RSTRING_LEN(str) == beg)
6393  tmp = str_new_empty(str);
6394  else
6395  tmp = rb_str_subseq(str, beg, RSTRING_LEN(str)-beg);
6396  rb_ary_push(result, tmp);
6397  }
6398  if (NIL_P(limit) && lim == 0) {
6399  long len;
6400  while ((len = RARRAY_LEN(result)) > 0 &&
6401  (tmp = RARRAY_AREF(result, len-1), RSTRING_LEN(tmp) == 0))
6402  rb_ary_pop(result);
6403  }
6404 
6405  return result;
6406 }
6407 
6408 VALUE
6409 rb_str_split(VALUE str, const char *sep0)
6410 {
6411  VALUE sep;
6412 
6413  StringValue(str);
6414  sep = rb_str_new2(sep0);
6415  return rb_str_split_m(1, &sep, str);
6416 }
6417 
6418 
6419 static VALUE
6420 rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
6421 {
6422  rb_encoding *enc;
6423  VALUE line, rs, orig = str;
6424  const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
6425  long pos, len, rslen;
6426  int paragraph_mode = 0;
6427 
6428  VALUE UNINITIALIZED_VAR(ary);
6429 
6430  if (argc == 0)
6431  rs = rb_rs;
6432  else
6433  rb_scan_args(argc, argv, "01", &rs);
6434 
6435  if (rb_block_given_p()) {
6436  if (wantarray) {
6437 #if STRING_ENUMERATORS_WANTARRAY
6438  rb_warn("given block not used");
6439  ary = rb_ary_new();
6440 #else
6441  rb_warning("passing a block to String#lines is deprecated");
6442  wantarray = 0;
6443 #endif
6444  }
6445  }
6446  else {
6447  if (wantarray)
6448  ary = rb_ary_new();
6449  else
6450  RETURN_ENUMERATOR(str, argc, argv);
6451  }
6452 
6453  if (NIL_P(rs)) {
6454  if (wantarray) {
6455  rb_ary_push(ary, str);
6456  return ary;
6457  }
6458  else {
6459  rb_yield(str);
6460  return orig;
6461  }
6462  }
6463 
6464  str = rb_str_new4(str);
6465  ptr = subptr = RSTRING_PTR(str);
6466  pend = RSTRING_END(str);
6467  len = RSTRING_LEN(str);
6468  StringValue(rs);
6469  rslen = RSTRING_LEN(rs);
6470 
6471  if (rs == rb_default_rs)
6472  enc = rb_enc_get(str);
6473  else
6474  enc = rb_enc_check(str, rs);
6475 
6476  if (rslen == 0) {
6477  rsptr = "\n\n";
6478  rslen = 2;
6479  paragraph_mode = 1;
6480  }
6481  else {
6482  rsptr = RSTRING_PTR(rs);
6483  }
6484 
6485  if ((rs == rb_default_rs || paragraph_mode) && !rb_enc_asciicompat(enc)) {
6486  rs = rb_str_new(rsptr, rslen);
6487  rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil);
6488  rsptr = RSTRING_PTR(rs);
6489  rslen = RSTRING_LEN(rs);
6490  }
6491 
6492  while (subptr < pend) {
6493  pos = rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
6494  if (pos < 0) break;
6495  hit = subptr + pos;
6496  adjusted = rb_enc_right_char_head(subptr, hit, pend, enc);
6497  if (hit != adjusted) {
6498  subptr = adjusted;
6499  continue;
6500  }
6501  subend = hit + rslen;
6502  if (paragraph_mode) {
6503  while (subend < pend && rb_enc_is_newline(subend, pend, enc)) {
6504  subend += rb_enc_mbclen(subend, pend, enc);
6505  }
6506  }
6507  line = rb_str_subseq(str, subptr - ptr, subend - subptr);
6508  if (wantarray) {
6509  rb_ary_push(ary, line);
6510  }
6511  else {
6512  rb_yield(line);
6513  str_mod_check(str, ptr, len);
6514  }
6515  subptr = subend;
6516  }
6517 
6518  if (subptr != pend) {
6519  line = rb_str_subseq(str, subptr - ptr, pend - subptr);
6520  if (wantarray)
6521  rb_ary_push(ary, line);
6522  else
6523  rb_yield(line);
6524  RB_GC_GUARD(str);
6525  }
6526 
6527  if (wantarray)
6528  return ary;
6529  else
6530  return orig;
6531 }
6532 
6533 /*
6534  * call-seq:
6535  * str.each_line(separator=$/) {|substr| block } -> str
6536  * str.each_line(separator=$/) -> an_enumerator
6537  *
6538  * Splits <i>str</i> using the supplied parameter as the record
6539  * separator (<code>$/</code> by default), passing each substring in
6540  * turn to the supplied block. If a zero-length record separator is
6541  * supplied, the string is split into paragraphs delimited by
6542  * multiple successive newlines.
6543  *
6544  * If no block is given, an enumerator is returned instead.
6545  *
6546  * print "Example one\n"
6547  * "hello\nworld".each_line {|s| p s}
6548  * print "Example two\n"
6549  * "hello\nworld".each_line('l') {|s| p s}
6550  * print "Example three\n"
6551  * "hello\n\n\nworld".each_line('') {|s| p s}
6552  *
6553  * <em>produces:</em>
6554  *
6555  * Example one
6556  * "hello\n"
6557  * "world"
6558  * Example two
6559  * "hel"
6560  * "l"
6561  * "o\nworl"
6562  * "d"
6563  * Example three
6564  * "hello\n\n\n"
6565  * "world"
6566  */
6567 
6568 static VALUE
6570 {
6571  return rb_str_enumerate_lines(argc, argv, str, 0);
6572 }
6573 
6574 /*
6575  * call-seq:
6576  * str.lines(separator=$/) -> an_array
6577  *
6578  * Returns an array of lines in <i>str</i> split using the supplied
6579  * record separator (<code>$/</code> by default). This is a
6580  * shorthand for <code>str.each_line(separator).to_a</code>.
6581  *
6582  * If a block is given, which is a deprecated form, works the same as
6583  * <code>each_line</code>.
6584  */
6585 
6586 static VALUE
6588 {
6589  return rb_str_enumerate_lines(argc, argv, str, 1);
6590 }
6591 
6592 static VALUE
6594 {
6595  return LONG2FIX(RSTRING_LEN(str));
6596 }
6597 
6598 static VALUE
6599 rb_str_enumerate_bytes(VALUE str, int wantarray)
6600 {
6601  long i;
6602  VALUE UNINITIALIZED_VAR(ary);
6603 
6604  if (rb_block_given_p()) {
6605  if (wantarray) {
6606 #if STRING_ENUMERATORS_WANTARRAY
6607  rb_warn("given block not used");
6608  ary = rb_ary_new();
6609 #else
6610  rb_warning("passing a block to String#bytes is deprecated");
6611  wantarray = 0;
6612 #endif
6613  }
6614  }
6615  else {
6616  if (wantarray)
6617  ary = rb_ary_new2(RSTRING_LEN(str));
6618  else
6620  }
6621 
6622  for (i=0; i<RSTRING_LEN(str); i++) {
6623  if (wantarray)
6624  rb_ary_push(ary, INT2FIX(RSTRING_PTR(str)[i] & 0xff));
6625  else
6626  rb_yield(INT2FIX(RSTRING_PTR(str)[i] & 0xff));
6627  }
6628  if (wantarray)
6629  return ary;
6630  else
6631  return str;
6632 }
6633 
6634 /*
6635  * call-seq:
6636  * str.each_byte {|fixnum| block } -> str
6637  * str.each_byte -> an_enumerator
6638  *
6639  * Passes each byte in <i>str</i> to the given block, or returns an
6640  * enumerator if no block is given.
6641  *
6642  * "hello".each_byte {|c| print c, ' ' }
6643  *
6644  * <em>produces:</em>
6645  *
6646  * 104 101 108 108 111
6647  */
6648 
6649 static VALUE
6651 {
6652  return rb_str_enumerate_bytes(str, 0);
6653 }
6654 
6655 /*
6656  * call-seq:
6657  * str.bytes -> an_array
6658  *
6659  * Returns an array of bytes in <i>str</i>. This is a shorthand for
6660  * <code>str.each_byte.to_a</code>.
6661  *
6662  * If a block is given, which is a deprecated form, works the same as
6663  * <code>each_byte</code>.
6664  */
6665 
6666 static VALUE
6668 {
6669  return rb_str_enumerate_bytes(str, 1);
6670 }
6671 
6672 static VALUE
6674 {
6675  return rb_str_length(str);
6676 }
6677 
6678 static VALUE
6679 rb_str_enumerate_chars(VALUE str, int wantarray)
6680 {
6681  VALUE orig = str;
6682  VALUE substr;
6683  long i, len, n;
6684  const char *ptr;
6685  rb_encoding *enc;
6686  VALUE UNINITIALIZED_VAR(ary);
6687 
6688  str = rb_str_new4(str);
6689  ptr = RSTRING_PTR(str);
6690  len = RSTRING_LEN(str);
6691  enc = rb_enc_get(str);
6692 
6693  if (rb_block_given_p()) {
6694  if (wantarray) {
6695 #if STRING_ENUMERATORS_WANTARRAY
6696  rb_warn("given block not used");
6697  ary = rb_ary_new_capa(str_strlen(str, enc));
6698 #else
6699  rb_warning("passing a block to String#chars is deprecated");
6700  wantarray = 0;
6701 #endif
6702  }
6703  }
6704  else {
6705  if (wantarray)
6706  ary = rb_ary_new_capa(str_strlen(str, enc));
6707  else
6709  }
6710 
6711  switch (ENC_CODERANGE(str)) {
6712  case ENC_CODERANGE_VALID:
6713  case ENC_CODERANGE_7BIT:
6714  for (i = 0; i < len; i += n) {
6715  n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc);
6716  substr = rb_str_subseq(str, i, n);
6717  if (wantarray)
6718  rb_ary_push(ary, substr);
6719  else
6720  rb_yield(substr);
6721  }
6722  break;
6723  default:
6724  for (i = 0; i < len; i += n) {
6725  n = rb_enc_mbclen(ptr + i, ptr + len, enc);
6726  substr = rb_str_subseq(str, i, n);
6727  if (wantarray)
6728  rb_ary_push(ary, substr);
6729  else
6730  rb_yield(substr);
6731  }
6732  }
6733  RB_GC_GUARD(str);
6734  if (wantarray)
6735  return ary;
6736  else
6737  return orig;
6738 }
6739 
6740 /*
6741  * call-seq:
6742  * str.each_char {|cstr| block } -> str
6743  * str.each_char -> an_enumerator
6744  *
6745  * Passes each character in <i>str</i> to the given block, or returns
6746  * an enumerator if no block is given.
6747  *
6748  * "hello".each_char {|c| print c, ' ' }
6749  *
6750  * <em>produces:</em>
6751  *
6752  * h e l l o
6753  */
6754 
6755 static VALUE
6757 {
6758  return rb_str_enumerate_chars(str, 0);
6759 }
6760 
6761 /*
6762  * call-seq:
6763  * str.chars -> an_array
6764  *
6765  * Returns an array of characters in <i>str</i>. This is a shorthand
6766  * for <code>str.each_char.to_a</code>.
6767  *
6768  * If a block is given, which is a deprecated form, works the same as
6769  * <code>each_char</code>.
6770  */
6771 
6772 static VALUE
6774 {
6775  return rb_str_enumerate_chars(str, 1);
6776 }
6777 
6778 
6779 static VALUE
6781 {
6782  VALUE orig = str;
6783  int n;
6784  unsigned int c;
6785  const char *ptr, *end;
6786  rb_encoding *enc;
6787  VALUE UNINITIALIZED_VAR(ary);
6788 
6789  if (single_byte_optimizable(str))
6790  return rb_str_enumerate_bytes(str, wantarray);
6791 
6792  str = rb_str_new4(str);
6793  ptr = RSTRING_PTR(str);
6794  end = RSTRING_END(str);
6795  enc = STR_ENC_GET(str);
6796 
6797  if (rb_block_given_p()) {
6798  if (wantarray) {
6799 #if STRING_ENUMERATORS_WANTARRAY
6800  rb_warn("given block not used");
6801  ary = rb_ary_new_capa(str_strlen(str, enc));
6802 #else
6803  rb_warning("passing a block to String#codepoints is deprecated");
6804  wantarray = 0;
6805 #endif
6806  }
6807  }
6808  else {
6809  if (wantarray)
6810  ary = rb_ary_new_capa(str_strlen(str, enc));
6811  else
6813  }
6814 
6815  while (ptr < end) {
6816  c = rb_enc_codepoint_len(ptr, end, &n, enc);
6817  if (wantarray)
6818  rb_ary_push(ary, UINT2NUM(c));
6819  else
6820  rb_yield(UINT2NUM(c));
6821  ptr += n;
6822  }
6823  RB_GC_GUARD(str);
6824  if (wantarray)
6825  return ary;
6826  else
6827  return orig;
6828 }
6829 
6830 /*
6831  * call-seq:
6832  * str.each_codepoint {|integer| block } -> str
6833  * str.each_codepoint -> an_enumerator
6834  *
6835  * Passes the <code>Integer</code> ordinal of each character in <i>str</i>,
6836  * also known as a <i>codepoint</i> when applied to Unicode strings to the
6837  * given block.
6838  *
6839  * If no block is given, an enumerator is returned instead.
6840  *
6841  * "hello\u0639".each_codepoint {|c| print c, ' ' }
6842  *
6843  * <em>produces:</em>
6844  *
6845  * 104 101 108 108 111 1593
6846  */
6847 
6848 static VALUE
6850 {
6851  return rb_str_enumerate_codepoints(str, 0);
6852 }
6853 
6854 /*
6855  * call-seq:
6856  * str.codepoints -> an_array
6857  *
6858  * Returns an array of the <code>Integer</code> ordinals of the
6859  * characters in <i>str</i>. This is a shorthand for
6860  * <code>str.each_codepoint.to_a</code>.
6861  *
6862  * If a block is given, which is a deprecated form, works the same as
6863  * <code>each_codepoint</code>.
6864  */
6865 
6866 static VALUE
6868 {
6869  return rb_str_enumerate_codepoints(str, 1);
6870 }
6871 
6872 
6873 static long
6875 {
6876  rb_encoding *enc = STR_ENC_GET(str);
6877  const char *p, *p2, *beg, *end;
6878 
6879  beg = RSTRING_PTR(str);
6880  end = beg + RSTRING_LEN(str);
6881  if (beg > end) return 0;
6882  p = rb_enc_prev_char(beg, end, end, enc);
6883  if (!p) return 0;
6884  if (p > beg && rb_enc_ascget(p, end, 0, enc) == '\n') {
6885  p2 = rb_enc_prev_char(beg, p, end, enc);
6886  if (p2 && rb_enc_ascget(p2, end, 0, enc) == '\r') p = p2;
6887  }
6888  return p - beg;
6889 }
6890 
6891 /*
6892  * call-seq:
6893  * str.chop! -> str or nil
6894  *
6895  * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
6896  * or <code>nil</code> if <i>str</i> is the empty string. See also
6897  * <code>String#chomp!</code>.
6898  */
6899 
6900 static VALUE
6902 {
6903  str_modify_keep_cr(str);
6904  if (RSTRING_LEN(str) > 0) {
6905  long len;
6906  len = chopped_length(str);
6907  STR_SET_LEN(str, len);
6908  RSTRING_PTR(str)[len] = '\0';
6909  if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
6910  ENC_CODERANGE_CLEAR(str);
6911  }
6912  return str;
6913  }
6914  return Qnil;
6915 }
6916 
6917 
6918 /*
6919  * call-seq:
6920  * str.chop -> new_str
6921  *
6922  * Returns a new <code>String</code> with the last character removed. If the
6923  * string ends with <code>\r\n</code>, both characters are removed. Applying
6924  * <code>chop</code> to an empty string returns an empty
6925  * string. <code>String#chomp</code> is often a safer alternative, as it leaves
6926  * the string unchanged if it doesn't end in a record separator.
6927  *
6928  * "string\r\n".chop #=> "string"
6929  * "string\n\r".chop #=> "string\n"
6930  * "string\n".chop #=> "string"
6931  * "string".chop #=> "strin"
6932  * "x".chop.chop #=> ""
6933  */
6934 
6935 static VALUE
6937 {
6938  return rb_str_subseq(str, 0, chopped_length(str));
6939 }
6940 
6941 
6942 /*
6943  * call-seq:
6944  * str.chomp!(separator=$/) -> str or nil
6945  *
6946  * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
6947  * returning <i>str</i>, or <code>nil</code> if no modifications were made.
6948  */
6949 
6950 static VALUE
6952 {
6953  rb_encoding *enc;
6954  VALUE rs;
6955  int newline;
6956  char *p, *pp, *e;
6957  long len, rslen;
6958 
6959  str_modify_keep_cr(str);
6960  len = RSTRING_LEN(str);
6961  if (len == 0) return Qnil;
6962  p = RSTRING_PTR(str);
6963  e = p + len;
6964  if (argc == 0) {
6965  rs = rb_rs;
6966  if (rs == rb_default_rs) {
6967  smart_chomp:
6968  enc = rb_enc_get(str);
6969  if (rb_enc_mbminlen(enc) > 1) {
6970  pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc);
6971  if (rb_enc_is_newline(pp, e, enc)) {
6972  e = pp;
6973  }
6974  pp = e - rb_enc_mbminlen(enc);
6975  if (pp >= p) {
6976  pp = rb_enc_left_char_head(p, pp, e, enc);
6977  if (rb_enc_ascget(pp, e, 0, enc) == '\r') {
6978  e = pp;
6979  }
6980  }
6981  if (e == RSTRING_END(str)) {
6982  return Qnil;
6983  }
6984  len = e - RSTRING_PTR(str);
6985  STR_SET_LEN(str, len);
6986  }
6987  else {
6988  if (RSTRING_PTR(str)[len-1] == '\n') {
6989  STR_DEC_LEN(str);
6990  if (RSTRING_LEN(str) > 0 &&
6991  RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
6992  STR_DEC_LEN(str);
6993  }
6994  }
6995  else if (RSTRING_PTR(str)[len-1] == '\r') {
6996  STR_DEC_LEN(str);
6997  }
6998  else {
6999  return Qnil;
7000  }
7001  }
7002  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
7003  return str;
7004  }
7005  }
7006  else {
7007  rb_scan_args(argc, argv, "01", &rs);
7008  }
7009  if (NIL_P(rs)) return Qnil;
7010  StringValue(rs);
7011  rslen = RSTRING_LEN(rs);
7012  if (rslen == 0) {
7013  while (len>0 && p[len-1] == '\n') {
7014  len--;
7015  if (len>0 && p[len-1] == '\r')
7016  len--;
7017  }
7018  if (len < RSTRING_LEN(str)) {
7019  STR_SET_LEN(str, len);
7020  RSTRING_PTR(str)[len] = '\0';
7021  return str;
7022  }
7023  return Qnil;
7024  }
7025  if (rslen > len) return Qnil;
7026  newline = RSTRING_PTR(rs)[rslen-1];
7027  if (rslen == 1 && newline == '\n')
7028  goto smart_chomp;
7029 
7030  enc = rb_enc_check(str, rs);
7031  if (is_broken_string(rs)) {
7032  return Qnil;
7033  }
7034  pp = e - rslen;
7035  if (p[len-1] == newline &&
7036  (rslen <= 1 ||
7037  memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
7038  if (rb_enc_left_char_head(p, pp, e, enc) != pp)
7039  return Qnil;
7040  if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
7041  ENC_CODERANGE_CLEAR(str);
7042  }
7043  STR_SET_LEN(str, RSTRING_LEN(str) - rslen);
7044  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
7045  return str;
7046  }
7047  return Qnil;
7048 }
7049 
7050 
7051 /*
7052  * call-seq:
7053  * str.chomp(separator=$/) -> new_str
7054  *
7055  * Returns a new <code>String</code> with the given record separator removed
7056  * from the end of <i>str</i> (if present). If <code>$/</code> has not been
7057  * changed from the default Ruby record separator, then <code>chomp</code> also
7058  * removes carriage return characters (that is it will remove <code>\n</code>,
7059  * <code>\r</code>, and <code>\r\n</code>). If <code>$/</code> is an empty string,
7060  * it will remove all trailing newlines from the string.
7061  *
7062  * "hello".chomp #=> "hello"
7063  * "hello\n".chomp #=> "hello"
7064  * "hello\r\n".chomp #=> "hello"
7065  * "hello\n\r".chomp #=> "hello\n"
7066  * "hello\r".chomp #=> "hello"
7067  * "hello \n there".chomp #=> "hello \n there"
7068  * "hello".chomp("llo") #=> "he"
7069  * "hello\r\n\r\n".chomp('') #=> "hello"
7070  * "hello\r\n\r\r\n".chomp('') #=> "hello\r\n\r"
7071  */
7072 
7073 static VALUE
7075 {
7076  str = rb_str_dup(str);
7077  rb_str_chomp_bang(argc, argv, str);
7078  return str;
7079 }
7080 
7081 /*
7082  * call-seq:
7083  * str.lstrip! -> self or nil
7084  *
7085  * Removes leading whitespace from <i>str</i>, returning <code>nil</code> if no
7086  * change was made. See also <code>String#rstrip!</code> and
7087  * <code>String#strip!</code>.
7088  *
7089  * " hello ".lstrip #=> "hello "
7090  * "hello".lstrip! #=> nil
7091  */
7092 
7093 static VALUE
7095 {
7096  rb_encoding *enc;
7097  char *s, *t, *e;
7098 
7099  str_modify_keep_cr(str);
7100  enc = STR_ENC_GET(str);
7101  s = RSTRING_PTR(str);
7102  if (!s || RSTRING_LEN(str) == 0) return Qnil;
7103  e = t = RSTRING_END(str);
7104  /* remove spaces at head */
7105  while (s < e) {
7106  int n;
7107  unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
7108 
7109  if (!rb_isspace(cc)) break;
7110  s += n;
7111  }
7112 
7113  if (s > RSTRING_PTR(str)) {
7114  STR_SET_LEN(str, t-s);
7115  memmove(RSTRING_PTR(str), s, RSTRING_LEN(str));
7116  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
7117  return str;
7118  }
7119  return Qnil;
7120 }
7121 
7122 
7123 /*
7124  * call-seq:
7125  * str.lstrip -> new_str
7126  *
7127  * Returns a copy of <i>str</i> with leading whitespace removed. See also
7128  * <code>String#rstrip</code> and <code>String#strip</code>.
7129  *
7130  * " hello ".lstrip #=> "hello "
7131  * "hello".lstrip #=> "hello"
7132  */
7133 
7134 static VALUE
7136 {
7137  str = rb_str_dup(str);
7138  rb_str_lstrip_bang(str);
7139  return str;
7140 }
7141 
7142 
7143 /*
7144  * call-seq:
7145  * str.rstrip! -> self or nil
7146  *
7147  * Removes trailing whitespace from <i>str</i>, returning <code>nil</code> if
7148  * no change was made. See also <code>String#lstrip!</code> and
7149  * <code>String#strip!</code>.
7150  *
7151  * " hello ".rstrip #=> " hello"
7152  * "hello".rstrip! #=> nil
7153  */
7154 
7155 static VALUE
7157 {
7158  rb_encoding *enc;
7159  char *s, *t, *e;
7160 
7161  str_modify_keep_cr(str);
7162  enc = STR_ENC_GET(str);
7164  s = RSTRING_PTR(str);
7165  if (!s || RSTRING_LEN(str) == 0) return Qnil;
7166  t = e = RSTRING_END(str);
7167 
7168  /* remove trailing spaces or '\0's */
7169  if (single_byte_optimizable(str)) {
7170  unsigned char c;
7171  while (s < t && ((c = *(t-1)) == '\0' || ascii_isspace(c))) t--;
7172  }
7173  else {
7174  char *tp;
7175 
7176  while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
7177  unsigned int c = rb_enc_codepoint(tp, e, enc);
7178  if (c && !rb_isspace(c)) break;
7179  t = tp;
7180  }
7181  }
7182  if (t < e) {
7183  long len = t-RSTRING_PTR(str);
7184 
7185  STR_SET_LEN(str, len);
7186  RSTRING_PTR(str)[len] = '\0';
7187  return str;
7188  }
7189  return Qnil;
7190 }
7191 
7192 
7193 /*
7194  * call-seq:
7195  * str.rstrip -> new_str
7196  *
7197  * Returns a copy of <i>str</i> with trailing whitespace removed. See also
7198  * <code>String#lstrip</code> and <code>String#strip</code>.
7199  *
7200  * " hello ".rstrip #=> " hello"
7201  * "hello".rstrip #=> "hello"
7202  */
7203 
7204 static VALUE
7206 {
7207  str = rb_str_dup(str);
7208  rb_str_rstrip_bang(str);
7209  return str;
7210 }
7211 
7212 
7213 /*
7214  * call-seq:
7215  * str.strip! -> str or nil
7216  *
7217  * Removes leading and trailing whitespace from <i>str</i>. Returns
7218  * <code>nil</code> if <i>str</i> was not altered.
7219  */
7220 
7221 static VALUE
7223 {
7224  VALUE l = rb_str_lstrip_bang(str);
7225  VALUE r = rb_str_rstrip_bang(str);
7226 
7227  if (NIL_P(l) && NIL_P(r)) return Qnil;
7228  return str;
7229 }
7230 
7231 
7232 /*
7233  * call-seq:
7234  * str.strip -> new_str
7235  *
7236  * Returns a copy of <i>str</i> with leading and trailing whitespace removed.
7237  *
7238  * " hello ".strip #=> "hello"
7239  * "\tgoodbye\r\n".strip #=> "goodbye"
7240  */
7241 
7242 static VALUE
7244 {
7245  str = rb_str_dup(str);
7246  rb_str_strip_bang(str);
7247  return str;
7248 }
7249 
7250 static VALUE
7251 scan_once(VALUE str, VALUE pat, long *start)
7252 {
7253  VALUE result, match;
7254  struct re_registers *regs;
7255  int i;
7256 
7257  if (rb_reg_search(pat, str, *start, 0) >= 0) {
7258  match = rb_backref_get();
7259  regs = RMATCH_REGS(match);
7260  if (BEG(0) == END(0)) {
7261  rb_encoding *enc = STR_ENC_GET(str);
7262  /*
7263  * Always consume at least one character of the input string
7264  */
7265  if (RSTRING_LEN(str) > END(0))
7266  *start = END(0)+rb_enc_fast_mbclen(RSTRING_PTR(str)+END(0),
7267  RSTRING_END(str), enc);
7268  else
7269  *start = END(0)+1;
7270  }
7271  else {
7272  *start = END(0);
7273  }
7274  if (regs->num_regs == 1) {
7275  return rb_reg_nth_match(0, match);
7276  }
7277  result = rb_ary_new2(regs->num_regs);
7278  for (i=1; i < regs->num_regs; i++) {
7280  }
7281 
7282  return result;
7283  }
7284  return Qnil;
7285 }
7286 
7287 
7288 /*
7289  * call-seq:
7290  * str.scan(pattern) -> array
7291  * str.scan(pattern) {|match, ...| block } -> str
7292  *
7293  * Both forms iterate through <i>str</i>, matching the pattern (which may be a
7294  * <code>Regexp</code> or a <code>String</code>). For each match, a result is
7295  * generated and either added to the result array or passed to the block. If
7296  * the pattern contains no groups, each individual result consists of the
7297  * matched string, <code>$&</code>. If the pattern contains groups, each
7298  * individual result is itself an array containing one entry per group.
7299  *
7300  * a = "cruel world"
7301  * a.scan(/\w+/) #=> ["cruel", "world"]
7302  * a.scan(/.../) #=> ["cru", "el ", "wor"]
7303  * a.scan(/(...)/) #=> [["cru"], ["el "], ["wor"]]
7304  * a.scan(/(..)(..)/) #=> [["cr", "ue"], ["l ", "wo"]]
7305  *
7306  * And the block form:
7307  *
7308  * a.scan(/\w+/) {|w| print "<<#{w}>> " }
7309  * print "\n"
7310  * a.scan(/(.)(.)/) {|x,y| print y, x }
7311  * print "\n"
7312  *
7313  * <em>produces:</em>
7314  *
7315  * <<cruel>> <<world>>
7316  * rceu lowlr
7317  */
7318 
7319 static VALUE
7321 {
7322  VALUE result;
7323  long start = 0;
7324  long last = -1, prev = 0;
7325  char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);
7326 
7327  pat = get_pat(pat, 1);
7328  if (!rb_block_given_p()) {
7329  VALUE ary = rb_ary_new();
7330 
7331  while (!NIL_P(result = scan_once(str, pat, &start))) {
7332  last = prev;
7333  prev = start;
7334  rb_ary_push(ary, result);
7335  }
7336  if (last >= 0) rb_reg_search(pat, str, last, 0);
7337  return ary;
7338  }
7339 
7340  while (!NIL_P(result = scan_once(str, pat, &start))) {
7341  last = prev;
7342  prev = start;
7343  rb_yield(result);
7344  str_mod_check(str, p, len);
7345  }
7346  if (last >= 0) rb_reg_search(pat, str, last, 0);
7347  return str;
7348 }
7349 
7350 
7351 /*
7352  * call-seq:
7353  * str.hex -> integer
7354  *
7355  * Treats leading characters from <i>str</i> as a string of hexadecimal digits
7356  * (with an optional sign and an optional <code>0x</code>) and returns the
7357  * corresponding number. Zero is returned on error.
7358  *
7359  * "0x0a".hex #=> 10
7360  * "-1234".hex #=> -4660
7361  * "0".hex #=> 0
7362  * "wombat".hex #=> 0
7363  */
7364 
7365 static VALUE
7367 {
7368  return rb_str_to_inum(str, 16, FALSE);
7369 }
7370 
7371 
7372 /*
7373  * call-seq:
7374  * str.oct -> integer
7375  *
7376  * Treats leading characters of <i>str</i> as a string of octal digits (with an
7377  * optional sign) and returns the corresponding number. Returns 0 if the
7378  * conversion fails.
7379  *
7380  * "123".oct #=> 83
7381  * "-377".oct #=> -255
7382  * "bad".oct #=> 0
7383  * "0377bad".oct #=> 255
7384  */
7385 
7386 static VALUE
7388 {
7389  return rb_str_to_inum(str, -8, FALSE);
7390 }
7391 
7392 
7393 /*
7394  * call-seq:
7395  * str.crypt(salt_str) -> new_str
7396  *
7397  * Applies a one-way cryptographic hash to <i>str</i> by invoking the
7398  * standard library function <code>crypt(3)</code> with the given
7399  * salt string. While the format and the result are system and
7400  * implementation dependent, using a salt matching the regular
7401  * expression <code>\A[a-zA-Z0-9./]{2}</code> should be valid and
7402  * safe on any platform, in which only the first two characters are
7403  * significant.
7404  *
7405  * This method is for use in system specific scripts, so if you want
7406  * a cross-platform hash function consider using Digest or OpenSSL
7407  * instead.
7408  */
7409 
7410 static VALUE
7412 {
7413  extern char *crypt(const char *, const char *);
7414  VALUE result;
7415  const char *s, *saltp;
7416  char *res;
7417 #ifdef BROKEN_CRYPT
7418  char salt_8bit_clean[3];
7419 #endif
7420 
7421  StringValue(salt);
7422  if (RSTRING_LEN(salt) < 2)
7423  rb_raise(rb_eArgError, "salt too short (need >=2 bytes)");
7424 
7425  s = RSTRING_PTR(str);
7426  if (!s) s = "";
7427  saltp = RSTRING_PTR(salt);
7428 #ifdef BROKEN_CRYPT
7429  if (!ISASCII((unsigned char)saltp[0]) || !ISASCII((unsigned char)saltp[1])) {
7430  salt_8bit_clean[0] = saltp[0] & 0x7f;
7431  salt_8bit_clean[1] = saltp[1] & 0x7f;
7432  salt_8bit_clean[2] = '\0';
7433  saltp = salt_8bit_clean;
7434  }
7435 #endif
7436  res = crypt(s, saltp);
7437  if (!res) {
7438  rb_sys_fail("crypt");
7439  }
7440  result = rb_str_new2(res);
7442  return result;
7443 }
7444 
7445 
7446 /*
7447  * call-seq:
7448  * str.intern -> symbol
7449  * str.to_sym -> symbol
7450  *
7451  * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
7452  * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
7453  *
7454  * "Koala".intern #=> :Koala
7455  * s = 'cat'.to_sym #=> :cat
7456  * s == :cat #=> true
7457  * s = '@cat'.to_sym #=> :@cat
7458  * s == :@cat #=> true
7459  *
7460  * This can also be used to create symbols that cannot be represented using the
7461  * <code>:xxx</code> notation.
7462  *
7463  * 'cat and dog'.to_sym #=> :"cat and dog"
7464  */
7465 
7466 VALUE
7468 {
7469  VALUE str = RB_GC_GUARD(s);
7470  ID id;
7471 
7472  id = rb_intern_str(str);
7473  return ID2SYM(id);
7474 }
7475 
7476 
7477 /*
7478  * call-seq:
7479  * str.ord -> integer
7480  *
7481  * Return the <code>Integer</code> ordinal of a one-character string.
7482  *
7483  * "a".ord #=> 97
7484  */
7485 
7486 VALUE
7488 {
7489  unsigned int c;
7490 
7492  return UINT2NUM(c);
7493 }
7494 /*
7495  * call-seq:
7496  * str.sum(n=16) -> integer
7497  *
7498  * Returns a basic <em>n</em>-bit checksum of the characters in <i>str</i>,
7499  * where <em>n</em> is the optional <code>Fixnum</code> parameter, defaulting
7500  * to 16. The result is simply the sum of the binary value of each character in
7501  * <i>str</i> modulo <code>2**n - 1</code>. This is not a particularly good
7502  * checksum.
7503  */
7504 
7505 static VALUE
7507 {
7508  VALUE vbits;
7509  int bits;
7510  char *ptr, *p, *pend;
7511  long len;
7512  VALUE sum = INT2FIX(0);
7513  unsigned long sum0 = 0;
7514 
7515  if (argc == 0) {
7516  bits = 16;
7517  }
7518  else {
7519  rb_scan_args(argc, argv, "01", &vbits);
7520  bits = NUM2INT(vbits);
7521  }
7522  ptr = p = RSTRING_PTR(str);
7523  len = RSTRING_LEN(str);
7524  pend = p + len;
7525 
7526  while (p < pend) {
7527  if (FIXNUM_MAX - UCHAR_MAX < sum0) {
7528  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7529  str_mod_check(str, ptr, len);
7530  sum0 = 0;
7531  }
7532  sum0 += (unsigned char)*p;
7533  p++;
7534  }
7535 
7536  if (bits == 0) {
7537  if (sum0) {
7538  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7539  }
7540  }
7541  else {
7542  if (sum == INT2FIX(0)) {
7543  if (bits < (int)sizeof(long)*CHAR_BIT) {
7544  sum0 &= (((unsigned long)1)<<bits)-1;
7545  }
7546  sum = LONG2FIX(sum0);
7547  }
7548  else {
7549  VALUE mod;
7550 
7551  if (sum0) {
7552  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7553  }
7554 
7555  mod = rb_funcall(INT2FIX(1), rb_intern("<<"), 1, INT2FIX(bits));
7556  mod = rb_funcall(mod, '-', 1, INT2FIX(1));
7557  sum = rb_funcall(sum, '&', 1, mod);
7558  }
7559  }
7560  return sum;
7561 }
7562 
7563 static VALUE
7564 rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
7565 {
7566  rb_encoding *enc;
7567  VALUE w;
7568  long width, len, flen = 1, fclen = 1;
7569  VALUE res;
7570  char *p;
7571  const char *f = " ";
7572  long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
7573  volatile VALUE pad;
7574  int singlebyte = 1, cr;
7575 
7576  rb_scan_args(argc, argv, "11", &w, &pad);
7577  enc = STR_ENC_GET(str);
7578  width = NUM2LONG(w);
7579  if (argc == 2) {
7580  StringValue(pad);
7581  enc = rb_enc_check(str, pad);
7582  f = RSTRING_PTR(pad);
7583  flen = RSTRING_LEN(pad);
7584  fclen = str_strlen(pad, enc);
7585  singlebyte = single_byte_optimizable(pad);
7586  if (flen == 0 || fclen == 0) {
7587  rb_raise(rb_eArgError, "zero width padding");
7588  }
7589  }
7590  len = str_strlen(str, enc);
7591  if (width < 0 || len >= width) return rb_str_dup(str);
7592  n = width - len;
7593  llen = (jflag == 'l') ? 0 : ((jflag == 'r') ? n : n/2);
7594  rlen = n - llen;
7595  cr = ENC_CODERANGE(str);
7596  if (flen > 1) {
7597  llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
7598  rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
7599  }
7600  size = RSTRING_LEN(str);
7601  if ((len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
7602  (len *= flen) >= LONG_MAX - llen2 - rlen2 ||
7603  (len += llen2 + rlen2) >= LONG_MAX - size) {
7604  rb_raise(rb_eArgError, "argument too big");
7605  }
7606  len += size;
7607  res = rb_str_new5(str, 0, len);
7608  p = RSTRING_PTR(res);
7609  if (flen <= 1) {
7610  memset(p, *f, llen);
7611  p += llen;
7612  }
7613  else {
7614  while (llen >= fclen) {
7615  memcpy(p,f,flen);
7616  p += flen;
7617  llen -= fclen;
7618  }
7619  if (llen > 0) {
7620  memcpy(p, f, llen2);
7621  p += llen2;
7622  }
7623  }
7624  memcpy(p, RSTRING_PTR(str), size);
7625  p += size;
7626  if (flen <= 1) {
7627  memset(p, *f, rlen);
7628  p += rlen;
7629  }
7630  else {
7631  while (rlen >= fclen) {
7632  memcpy(p,f,flen);
7633  p += flen;
7634  rlen -= fclen;
7635  }
7636  if (rlen > 0) {
7637  memcpy(p, f, rlen2);
7638  p += rlen2;
7639  }
7640  }
7641  *p = '\0';
7642  STR_SET_LEN(res, p-RSTRING_PTR(res));
7643  OBJ_INFECT_RAW(res, str);
7644  if (!NIL_P(pad)) OBJ_INFECT_RAW(res, pad);
7645  rb_enc_associate(res, enc);
7646  if (argc == 2)
7647  cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(pad));
7648  if (cr != ENC_CODERANGE_BROKEN)
7649  ENC_CODERANGE_SET(res, cr);
7650  return res;
7651 }
7652 
7653 
7654 /*
7655  * call-seq:
7656  * str.ljust(integer, padstr=' ') -> new_str
7657  *
7658  * If <i>integer</i> is greater than the length of <i>str</i>, returns a new
7659  * <code>String</code> of length <i>integer</i> with <i>str</i> left justified
7660  * and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
7661  *
7662  * "hello".ljust(4) #=> "hello"
7663  * "hello".ljust(20) #=> "hello "
7664  * "hello".ljust(20, '1234') #=> "hello123412341234123"
7665  */
7666 
7667 static VALUE
7669 {
7670  return rb_str_justify(argc, argv, str, 'l');
7671 }
7672 
7673 
7674 /*
7675  * call-seq:
7676  * str.rjust(integer, padstr=' ') -> new_str
7677  *
7678  * If <i>integer</i> is greater than the length of <i>str</i>, returns a new
7679  * <code>String</code> of length <i>integer</i> with <i>str</i> right justified
7680  * and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
7681  *
7682  * "hello".rjust(4) #=> "hello"
7683  * "hello".rjust(20) #=> " hello"
7684  * "hello".rjust(20, '1234') #=> "123412341234123hello"
7685  */
7686 
7687 static VALUE
7689 {
7690  return rb_str_justify(argc, argv, str, 'r');
7691 }
7692 
7693 
7694 /*
7695  * call-seq:
7696  * str.center(width, padstr=' ') -> new_str
7697  *
7698  * Centers +str+ in +width+. If +width+ is greater than the length of +str+,
7699  * returns a new String of length +width+ with +str+ centered and padded with
7700  * +padstr+; otherwise, returns +str+.
7701  *
7702  * "hello".center(4) #=> "hello"
7703  * "hello".center(20) #=> " hello "
7704  * "hello".center(20, '123') #=> "1231231hello12312312"
7705  */
7706 
7707 static VALUE
7709 {
7710  return rb_str_justify(argc, argv, str, 'c');
7711 }
7712 
7713 /*
7714  * call-seq:
7715  * str.partition(sep) -> [head, sep, tail]
7716  * str.partition(regexp) -> [head, match, tail]
7717  *
7718  * Searches <i>sep</i> or pattern (<i>regexp</i>) in the string
7719  * and returns the part before it, the match, and the part
7720  * after it.
7721  * If it is not found, returns two empty strings and <i>str</i>.
7722  *
7723  * "hello".partition("l") #=> ["he", "l", "lo"]
7724  * "hello".partition("x") #=> ["hello", "", ""]
7725  * "hello".partition(/.l/) #=> ["h", "el", "lo"]
7726  */
7727 
7728 static VALUE
7730 {
7731  long pos;
7732  int regex = FALSE;
7733 
7734  if (RB_TYPE_P(sep, T_REGEXP)) {
7735  pos = rb_reg_search(sep, str, 0, 0);
7736  regex = TRUE;
7737  }
7738  else {
7739  VALUE tmp;
7740 
7741  tmp = rb_check_string_type(sep);
7742  if (NIL_P(tmp)) {
7743  rb_raise(rb_eTypeError, "type mismatch: %s given",
7744  rb_obj_classname(sep));
7745  }
7746  sep = tmp;
7747  pos = rb_str_index(str, sep, 0);
7748  }
7749  if (pos < 0) {
7750  failed:
7751  return rb_ary_new3(3, str, str_new_empty(str), str_new_empty(str));
7752  }
7753  if (regex) {
7754  sep = rb_str_subpat(str, sep, INT2FIX(0));
7755  if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed;
7756  }
7757  return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
7758  sep,
7759  rb_str_subseq(str, pos+RSTRING_LEN(sep),
7760  RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
7761 }
7762 
7763 /*
7764  * call-seq:
7765  * str.rpartition(sep) -> [head, sep, tail]
7766  * str.rpartition(regexp) -> [head, match, tail]
7767  *
7768  * Searches <i>sep</i> or pattern (<i>regexp</i>) in the string from the end
7769  * of the string, and returns the part before it, the match, and the part
7770  * after it.
7771  * If it is not found, returns two empty strings and <i>str</i>.
7772  *
7773  * "hello".rpartition("l") #=> ["hel", "l", "o"]
7774  * "hello".rpartition("x") #=> ["", "", "hello"]
7775  * "hello".rpartition(/.l/) #=> ["he", "ll", "o"]
7776  */
7777 
7778 static VALUE
7780 {
7781  long pos = RSTRING_LEN(str);
7782  int regex = FALSE;
7783 
7784  if (RB_TYPE_P(sep, T_REGEXP)) {
7785  pos = rb_reg_search(sep, str, pos, 1);
7786  regex = TRUE;
7787  }
7788  else {
7789  VALUE tmp;
7790 
7791  tmp = rb_check_string_type(sep);
7792  if (NIL_P(tmp)) {
7793  rb_raise(rb_eTypeError, "type mismatch: %s given",
7794  rb_obj_classname(sep));
7795  }
7796  sep = tmp;
7797  pos = rb_str_sublen(str, pos);
7798  pos = rb_str_rindex(str, sep, pos);
7799  }
7800  if (pos < 0) {
7801  return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), str);
7802  }
7803  if (regex) {
7804  sep = rb_reg_nth_match(0, rb_backref_get());
7805  }
7806  else {
7807  pos = rb_str_offset(str, pos);
7808  }
7809  return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
7810  sep,
7811  rb_str_subseq(str, pos+RSTRING_LEN(sep),
7812  RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
7813 }
7814 
7815 /*
7816  * call-seq:
7817  * str.start_with?([prefixes]+) -> true or false
7818  *
7819  * Returns true if +str+ starts with one of the +prefixes+ given.
7820  *
7821  * "hello".start_with?("hell") #=> true
7822  *
7823  * # returns true if one of the prefixes matches.
7824  * "hello".start_with?("heaven", "hell") #=> true
7825  * "hello".start_with?("heaven", "paradise") #=> false
7826  */
7827 
7828 static VALUE
7830 {
7831  int i;
7832 
7833  for (i=0; i<argc; i++) {
7834  VALUE tmp = argv[i];
7835  StringValue(tmp);
7836  rb_enc_check(str, tmp);
7837  if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
7838  if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
7839  return Qtrue;
7840  }
7841  return Qfalse;
7842 }
7843 
7844 /*
7845  * call-seq:
7846  * str.end_with?([suffixes]+) -> true or false
7847  *
7848  * Returns true if +str+ ends with one of the +suffixes+ given.
7849  */
7850 
7851 static VALUE
7853 {
7854  int i;
7855  char *p, *s, *e;
7856  rb_encoding *enc;
7857 
7858  for (i=0; i<argc; i++) {
7859  VALUE tmp = argv[i];
7860  StringValue(tmp);
7861  enc = rb_enc_check(str, tmp);
7862  if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
7863  p = RSTRING_PTR(str);
7864  e = p + RSTRING_LEN(str);
7865  s = e - RSTRING_LEN(tmp);
7866  if (rb_enc_left_char_head(p, s, e, enc) != s)
7867  continue;
7868  if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
7869  return Qtrue;
7870  }
7871  return Qfalse;
7872 }
7873 
7874 void
7876 {
7877  if (!NIL_P(val) && !RB_TYPE_P(val, T_STRING)) {
7878  rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
7879  }
7880  *var = val;
7881 }
7882 
7883 
7884 /*
7885  * call-seq:
7886  * str.force_encoding(encoding) -> str
7887  *
7888  * Changes the encoding to +encoding+ and returns self.
7889  */
7890 
7891 static VALUE
7893 {
7894  str_modifiable(str);
7895  rb_enc_associate(str, rb_to_encoding(enc));
7896  ENC_CODERANGE_CLEAR(str);
7897  return str;
7898 }
7899 
7900 /*
7901  * call-seq:
7902  * str.b -> str
7903  *
7904  * Returns a copied string whose encoding is ASCII-8BIT.
7905  */
7906 
7907 static VALUE
7909 {
7910  VALUE str2 = str_alloc(rb_cString);
7911  str_replace_shared_without_enc(str2, str);
7912  OBJ_INFECT_RAW(str2, str);
7913  ENC_CODERANGE_CLEAR(str2);
7914  return str2;
7915 }
7916 
7917 /*
7918  * call-seq:
7919  * str.valid_encoding? -> true or false
7920  *
7921  * Returns true for a string which encoded correctly.
7922  *
7923  * "\xc2\xa1".force_encoding("UTF-8").valid_encoding? #=> true
7924  * "\xc2".force_encoding("UTF-8").valid_encoding? #=> false
7925  * "\x80".force_encoding("UTF-8").valid_encoding? #=> false
7926  */
7927 
7928 static VALUE
7930 {
7931  int cr = rb_enc_str_coderange(str);
7932 
7933  return cr == ENC_CODERANGE_BROKEN ? Qfalse : Qtrue;
7934 }
7935 
7936 /*
7937  * call-seq:
7938  * str.ascii_only? -> true or false
7939  *
7940  * Returns true for a string which has only ASCII characters.
7941  *
7942  * "abc".force_encoding("UTF-8").ascii_only? #=> true
7943  * "abc\u{6666}".force_encoding("UTF-8").ascii_only? #=> false
7944  */
7945 
7946 static VALUE
7948 {
7949  int cr = rb_enc_str_coderange(str);
7950 
7951  return cr == ENC_CODERANGE_7BIT ? Qtrue : Qfalse;
7952 }
7953 
7968 VALUE
7969 rb_str_ellipsize(VALUE str, long len)
7970 {
7971  static const char ellipsis[] = "...";
7972  const long ellipsislen = sizeof(ellipsis) - 1;
7973  rb_encoding *const enc = rb_enc_get(str);
7974  const long blen = RSTRING_LEN(str);
7975  const char *const p = RSTRING_PTR(str), *e = p + blen;
7976  VALUE estr, ret = 0;
7977 
7978  if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
7979  if (len * rb_enc_mbminlen(enc) >= blen ||
7980  (e = rb_enc_nth(p, e, len, enc)) - p == blen) {
7981  ret = str;
7982  }
7983  else if (len <= ellipsislen ||
7984  !(e = rb_enc_step_back(p, e, e, len = ellipsislen, enc))) {
7985  if (rb_enc_asciicompat(enc)) {
7986  ret = rb_str_new_with_class(str, ellipsis, len);
7987  rb_enc_associate(ret, enc);
7988  }
7989  else {
7990  estr = rb_usascii_str_new(ellipsis, len);
7991  ret = rb_str_encode(estr, rb_enc_from_encoding(enc), 0, Qnil);
7992  }
7993  }
7994  else if (ret = rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
7995  rb_str_cat(ret, ellipsis, ellipsislen);
7996  }
7997  else {
7998  estr = rb_str_encode(rb_usascii_str_new(ellipsis, ellipsislen),
7999  rb_enc_from_encoding(enc), 0, Qnil);
8000  rb_str_append(ret, estr);
8001  }
8002  return ret;
8003 }
8004 
8005 static VALUE
8007 {
8008  int cr;
8009  str = StringValue(str);
8010  cr = rb_enc_str_coderange(str);
8011  if (cr == ENC_CODERANGE_BROKEN) {
8012  rb_raise(rb_eArgError, "replacement must be valid byte sequence '%+"PRIsVALUE"'", str);
8013  }
8014  else if (cr == ENC_CODERANGE_7BIT) {
8015  rb_encoding *e = STR_ENC_GET(str);
8016  if (!rb_enc_asciicompat(enc)) {
8017  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
8018  rb_enc_name(enc), rb_enc_name(e));
8019  }
8020  }
8021  else { /* ENC_CODERANGE_VALID */
8022  rb_encoding *e = STR_ENC_GET(str);
8023  if (enc != e) {
8024  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
8025  rb_enc_name(enc), rb_enc_name(e));
8026  }
8027  }
8028  return str;
8029 }
8030 
8036 VALUE
8038 {
8039  int cr = ENC_CODERANGE(str);
8040  rb_encoding *enc;
8041  int encidx;
8042  VALUE buf = Qnil;
8043  const char *rep;
8044  long replen;
8045  int tainted = 0;
8046 
8047  if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID)
8048  return Qnil;
8049 
8050  enc = STR_ENC_GET(str);
8051  if (!NIL_P(repl)) {
8052  repl = str_compat_and_valid(repl, enc);
8053  tainted = OBJ_TAINTED_RAW(repl);
8054  }
8055 
8056  if (rb_enc_dummy_p(enc)) {
8057  return Qnil;
8058  }
8059  encidx = rb_enc_to_index(enc);
8060 
8061 #define DEFAULT_REPLACE_CHAR(str) do { \
8062  static const char replace[sizeof(str)-1] = str; \
8063  rep = replace; replen = (int)sizeof(replace); \
8064  } while (0)
8065 
8066  if (rb_enc_asciicompat(enc)) {
8067  const char *p = RSTRING_PTR(str);
8068  const char *e = RSTRING_END(str);
8069  const char *p1 = p;
8070  int rep7bit_p;
8071  if (rb_block_given_p()) {
8072  rep = NULL;
8073  replen = 0;
8074  rep7bit_p = FALSE;
8075  }
8076  else if (!NIL_P(repl)) {
8077  rep = RSTRING_PTR(repl);
8078  replen = RSTRING_LEN(repl);
8079  rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT);
8080  }
8081  else if (encidx == rb_utf8_encindex()) {
8082  DEFAULT_REPLACE_CHAR("\xEF\xBF\xBD");
8083  rep7bit_p = FALSE;
8084  }
8085  else {
8086  DEFAULT_REPLACE_CHAR("?");
8087  rep7bit_p = TRUE;
8088  }
8089  cr = ENC_CODERANGE_7BIT;
8090 
8091  p = search_nonascii(p, e);
8092  if (!p) {
8093  p = e;
8094  }
8095  while (p < e) {
8096  int ret = rb_enc_precise_mbclen(p, e, enc);
8097  if (MBCLEN_NEEDMORE_P(ret)) {
8098  break;
8099  }
8100  else if (MBCLEN_CHARFOUND_P(ret)) {
8101  cr = ENC_CODERANGE_VALID;
8102  p += MBCLEN_CHARFOUND_LEN(ret);
8103  }
8104  else if (MBCLEN_INVALID_P(ret)) {
8105  /*
8106  * p1~p: valid ascii/multibyte chars
8107  * p ~e: invalid bytes + unknown bytes
8108  */
8109  long clen = rb_enc_mbmaxlen(enc);
8110  if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
8111  if (p > p1) {
8112  rb_str_buf_cat(buf, p1, p - p1);
8113  }
8114 
8115  if (e - p < clen) clen = e - p;
8116  if (clen <= 2) {
8117  clen = 1;
8118  }
8119  else {
8120  const char *q = p;
8121  clen--;
8122  for (; clen > 1; clen--) {
8123  ret = rb_enc_precise_mbclen(q, q + clen, enc);
8124  if (MBCLEN_NEEDMORE_P(ret)) break;
8125  if (MBCLEN_INVALID_P(ret)) continue;
8126  UNREACHABLE;
8127  }
8128  }
8129  if (rep) {
8130  rb_str_buf_cat(buf, rep, replen);
8131  if (!rep7bit_p) cr = ENC_CODERANGE_VALID;
8132  }
8133  else {
8134  repl = rb_yield(rb_enc_str_new(p, clen, enc));
8135  repl = str_compat_and_valid(repl, enc);
8136  tainted |= OBJ_TAINTED_RAW(repl);
8137  rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
8138  if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
8139  cr = ENC_CODERANGE_VALID;
8140  }
8141  p += clen;
8142  p1 = p;
8143  p = search_nonascii(p, e);
8144  if (!p) {
8145  p = e;
8146  break;
8147  }
8148  }
8149  else {
8150  UNREACHABLE;
8151  }
8152  }
8153  if (NIL_P(buf)) {
8154  if (p == e) {
8155  ENC_CODERANGE_SET(str, cr);
8156  return Qnil;
8157  }
8158  buf = rb_str_buf_new(RSTRING_LEN(str));
8159  }
8160  if (p1 < p) {
8161  rb_str_buf_cat(buf, p1, p - p1);
8162  }
8163  if (p < e) {
8164  if (rep) {
8165  rb_str_buf_cat(buf, rep, replen);
8166  if (!rep7bit_p) cr = ENC_CODERANGE_VALID;
8167  }
8168  else {
8169  repl = rb_yield(rb_enc_str_new(p, e-p, enc));
8170  repl = str_compat_and_valid(repl, enc);
8171  tainted |= OBJ_TAINTED_RAW(repl);
8172  rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
8173  if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
8174  cr = ENC_CODERANGE_VALID;
8175  }
8176  }
8177  }
8178  else {
8179  /* ASCII incompatible */
8180  const char *p = RSTRING_PTR(str);
8181  const char *e = RSTRING_END(str);
8182  const char *p1 = p;
8183  long mbminlen = rb_enc_mbminlen(enc);
8184  if (!NIL_P(repl)) {
8185  rep = RSTRING_PTR(repl);
8186  replen = RSTRING_LEN(repl);
8187  }
8188  else if (encidx == ENCINDEX_UTF_16BE) {
8189  DEFAULT_REPLACE_CHAR("\xFF\xFD");
8190  }
8191  else if (encidx == ENCINDEX_UTF_16LE) {
8192  DEFAULT_REPLACE_CHAR("\xFD\xFF");
8193  }
8194  else if (encidx == ENCINDEX_UTF_32BE) {
8195  DEFAULT_REPLACE_CHAR("\x00\x00\xFF\xFD");
8196  }
8197  else if (encidx == ENCINDEX_UTF_32LE) {
8198  DEFAULT_REPLACE_CHAR("\xFD\xFF\x00\x00");
8199  }
8200  else {
8201  DEFAULT_REPLACE_CHAR("?");
8202  }
8203 
8204  while (p < e) {
8205  int ret = rb_enc_precise_mbclen(p, e, enc);
8206  if (MBCLEN_NEEDMORE_P(ret)) {
8207  break;
8208  }
8209  else if (MBCLEN_CHARFOUND_P(ret)) {
8210  p += MBCLEN_CHARFOUND_LEN(ret);
8211  }
8212  else if (MBCLEN_INVALID_P(ret)) {
8213  const char *q = p;
8214  long clen = rb_enc_mbmaxlen(enc);
8215  if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
8216  if (p > p1) rb_str_buf_cat(buf, p1, p - p1);
8217 
8218  if (e - p < clen) clen = e - p;
8219  if (clen <= mbminlen * 2) {
8220  clen = mbminlen;
8221  }
8222  else {
8223  clen -= mbminlen;
8224  for (; clen > mbminlen; clen-=mbminlen) {
8225  ret = rb_enc_precise_mbclen(q, q + clen, enc);
8226  if (MBCLEN_NEEDMORE_P(ret)) break;
8227  if (MBCLEN_INVALID_P(ret)) continue;
8228  UNREACHABLE;
8229  }
8230  }
8231  if (rep) {
8232  rb_str_buf_cat(buf, rep, replen);
8233  }
8234  else {
8235  repl = rb_yield(rb_enc_str_new(p, e-p, enc));
8236  repl = str_compat_and_valid(repl, enc);
8237  tainted |= OBJ_TAINTED_RAW(repl);
8238  rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
8239  }
8240  p += clen;
8241  p1 = p;
8242  }
8243  else {
8244  UNREACHABLE;
8245  }
8246  }
8247  if (NIL_P(buf)) {
8248  if (p == e) {
8250  return Qnil;
8251  }
8252  buf = rb_str_buf_new(RSTRING_LEN(str));
8253  }
8254  if (p1 < p) {
8255  rb_str_buf_cat(buf, p1, p - p1);
8256  }
8257  if (p < e) {
8258  if (rep) {
8259  rb_str_buf_cat(buf, rep, replen);
8260  }
8261  else {
8262  repl = rb_yield(rb_enc_str_new(p, e-p, enc));
8263  repl = str_compat_and_valid(repl, enc);
8264  tainted |= OBJ_TAINTED_RAW(repl);
8265  rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
8266  }
8267  }
8268  cr = ENC_CODERANGE_VALID;
8269  }
8270  FL_SET_RAW(buf, tainted|OBJ_TAINTED_RAW(str));
8272  return buf;
8273 }
8274 
8275 /*
8276  * call-seq:
8277  * str.scrub -> new_str
8278  * str.scrub(repl) -> new_str
8279  * str.scrub{|bytes|} -> new_str
8280  *
8281  * If the string is invalid byte sequence then replace invalid bytes with given replacement
8282  * character, else returns self.
8283  * If block is given, replace invalid bytes with returned value of the block.
8284  *
8285  * "abc\u3042\x81".scrub #=> "abc\u3042\uFFFD"
8286  * "abc\u3042\x81".scrub("*") #=> "abc\u3042*"
8287  * "abc\u3042\xE3\x80".scrub{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"
8288  */
8289 static VALUE
8291 {
8292  VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
8293  VALUE new = rb_str_scrub(str, repl);
8294  return NIL_P(new) ? rb_str_dup(str): new;
8295 }
8296 
8297 /*
8298  * call-seq:
8299  * str.scrub! -> str
8300  * str.scrub!(repl) -> str
8301  * str.scrub!{|bytes|} -> str
8302  *
8303  * If the string is invalid byte sequence then replace invalid bytes with given replacement
8304  * character, else returns self.
8305  * If block is given, replace invalid bytes with returned value of the block.
8306  *
8307  * "abc\u3042\x81".scrub! #=> "abc\u3042\uFFFD"
8308  * "abc\u3042\x81".scrub!("*") #=> "abc\u3042*"
8309  * "abc\u3042\xE3\x80".scrub!{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"
8310  */
8311 static VALUE
8313 {
8314  VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
8315  VALUE new = rb_str_scrub(str, repl);
8316  if (!NIL_P(new)) rb_str_replace(str, new);
8317  return str;
8318 }
8319 
8320 /**********************************************************************
8321  * Document-class: Symbol
8322  *
8323  * <code>Symbol</code> objects represent names and some strings
8324  * inside the Ruby
8325  * interpreter. They are generated using the <code>:name</code> and
8326  * <code>:"string"</code> literals
8327  * syntax, and by the various <code>to_sym</code> methods. The same
8328  * <code>Symbol</code> object will be created for a given name or string
8329  * for the duration of a program's execution, regardless of the context
8330  * or meaning of that name. Thus if <code>Fred</code> is a constant in
8331  * one context, a method in another, and a class in a third, the
8332  * <code>Symbol</code> <code>:Fred</code> will be the same object in
8333  * all three contexts.
8334  *
8335  * module One
8336  * class Fred
8337  * end
8338  * $f1 = :Fred
8339  * end
8340  * module Two
8341  * Fred = 1
8342  * $f2 = :Fred
8343  * end
8344  * def Fred()
8345  * end
8346  * $f3 = :Fred
8347  * $f1.object_id #=> 2514190
8348  * $f2.object_id #=> 2514190
8349  * $f3.object_id #=> 2514190
8350  *
8351  */
8352 
8353 
8354 /*
8355  * call-seq:
8356  * sym == obj -> true or false
8357  *
8358  * Equality---If <i>sym</i> and <i>obj</i> are exactly the same
8359  * symbol, returns <code>true</code>.
8360  */
8361 
8362 static VALUE
8363 sym_equal(VALUE sym1, VALUE sym2)
8364 {
8365  if (sym1 == sym2) return Qtrue;
8366  return Qfalse;
8367 }
8368 
8369 
8370 static int
8371 sym_printable(const char *s, const char *send, rb_encoding *enc)
8372 {
8373  while (s < send) {
8374  int n;
8375  int c = rb_enc_codepoint_len(s, send, &n, enc);
8376 
8377  if (!rb_enc_isprint(c, enc)) return FALSE;
8378  s += n;
8379  }
8380  return TRUE;
8381 }
8382 
8383 int
8385 {
8386  rb_encoding *enc;
8387  const char *ptr;
8388  long len;
8390 
8391  if (resenc == NULL) resenc = rb_default_external_encoding();
8392  enc = STR_ENC_GET(sym);
8393  ptr = RSTRING_PTR(sym);
8394  len = RSTRING_LEN(sym);
8395  if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) || len != (long)strlen(ptr) ||
8396  !rb_enc_symname_p(ptr, enc) || !sym_printable(ptr, ptr + len, enc)) {
8397  return FALSE;
8398  }
8399  return TRUE;
8400 }
8401 
8402 VALUE
8404 {
8405  rb_encoding *enc;
8406  const char *ptr;
8407  long len;
8408  rb_encoding *resenc;
8409 
8410  Check_Type(str, T_STRING);
8411  resenc = rb_default_internal_encoding();
8412  if (resenc == NULL) resenc = rb_default_external_encoding();
8413  enc = STR_ENC_GET(str);
8414  ptr = RSTRING_PTR(str);
8415  len = RSTRING_LEN(str);
8416  if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
8417  !sym_printable(ptr, ptr + len, enc)) {
8418  return rb_str_inspect(str);
8419  }
8420  return str;
8421 }
8422 
8423 VALUE
8425 {
8426  return rb_str_quote_unprintable(rb_id2str(id));
8427 }
8428 
8429 /*
8430  * call-seq:
8431  * sym.inspect -> string
8432  *
8433  * Returns the representation of <i>sym</i> as a symbol literal.
8434  *
8435  * :fred.inspect #=> ":fred"
8436  */
8437 
8438 static VALUE
8440 {
8441  VALUE str;
8442  const char *ptr;
8443  long len;
8444  ID id = SYM2ID(sym);
8445  char *dest;
8446 
8447  sym = rb_id2str(id);
8448  if (!rb_str_symname_p(sym)) {
8449  str = rb_str_inspect(sym);
8450  len = RSTRING_LEN(str);
8451  rb_str_resize(str, len + 1);
8452  dest = RSTRING_PTR(str);
8453  memmove(dest + 1, dest, len);
8454  dest[0] = ':';
8455  }
8456  else {
8457  rb_encoding *enc = STR_ENC_GET(sym);
8458  ptr = RSTRING_PTR(sym);
8459  len = RSTRING_LEN(sym);
8460  str = rb_enc_str_new(0, len + 1, enc);
8461  dest = RSTRING_PTR(str);
8462  dest[0] = ':';
8463  memcpy(dest + 1, ptr, len);
8464  }
8465  return str;
8466 }
8467 
8468 
8469 /*
8470  * call-seq:
8471  * sym.id2name -> string
8472  * sym.to_s -> string
8473  *
8474  * Returns the name or string corresponding to <i>sym</i>.
8475  *
8476  * :fred.id2name #=> "fred"
8477  */
8478 
8479 
8480 VALUE
8482 {
8483  ID id = SYM2ID(sym);
8484 
8485  return str_new3(rb_cString, rb_id2str(id));
8486 }
8487 
8488 
8489 /*
8490  * call-seq:
8491  * sym.to_sym -> sym
8492  * sym.intern -> sym
8493  *
8494  * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
8495  * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
8496  * in this case.
8497  */
8498 
8499 static VALUE
8501 {
8502  return sym;
8503 }
8504 
8505 static VALUE
8506 sym_call(VALUE args, VALUE sym, int argc, VALUE *argv, VALUE passed_proc)
8507 {
8508  VALUE obj;
8509 
8510  if (argc < 1) {
8511  rb_raise(rb_eArgError, "no receiver given");
8512  }
8513  obj = argv[0];
8514  return rb_funcall_with_block(obj, (ID)sym, argc - 1, argv + 1, passed_proc);
8515 }
8516 
8517 /*
8518  * call-seq:
8519  * sym.to_proc
8520  *
8521  * Returns a _Proc_ object which respond to the given method by _sym_.
8522  *
8523  * (1..3).collect(&:to_s) #=> ["1", "2", "3"]
8524  */
8525 
8526 static VALUE
8528 {
8529  static VALUE sym_proc_cache = Qfalse;
8530  enum {SYM_PROC_CACHE_SIZE = 67};
8531  VALUE proc;
8532  long id, index;
8533  VALUE *aryp;
8534 
8535  if (!sym_proc_cache) {
8536  sym_proc_cache = rb_ary_tmp_new(SYM_PROC_CACHE_SIZE * 2);
8537  rb_gc_register_mark_object(sym_proc_cache);
8538  rb_ary_store(sym_proc_cache, SYM_PROC_CACHE_SIZE*2 - 1, Qnil);
8539  }
8540 
8541  id = SYM2ID(sym);
8542  index = (id % SYM_PROC_CACHE_SIZE) << 1;
8543 
8544  aryp = RARRAY_PTR(sym_proc_cache);
8545  if (aryp[index] == sym) {
8546  return aryp[index + 1];
8547  }
8548  else {
8549  proc = rb_proc_new(sym_call, (VALUE)id);
8551  aryp[index] = sym;
8552  aryp[index + 1] = proc;
8553  return proc;
8554  }
8555 }
8556 
8557 /*
8558  * call-seq:
8559  *
8560  * sym.succ
8561  *
8562  * Same as <code>sym.to_s.succ.intern</code>.
8563  */
8564 
8565 static VALUE
8567 {
8569 }
8570 
8571 /*
8572  * call-seq:
8573  *
8574  * symbol <=> other_symbol -> -1, 0, +1 or nil
8575  *
8576  * Compares +symbol+ with +other_symbol+ after calling #to_s on each of the
8577  * symbols. Returns -1, 0, +1 or nil depending on whether +symbol+ is less
8578  * than, equal to, or greater than +other_symbol+.
8579  *
8580  * +nil+ is returned if the two values are incomparable.
8581  *
8582  * See String#<=> for more information.
8583  */
8584 
8585 static VALUE
8587 {
8588  if (!SYMBOL_P(other)) {
8589  return Qnil;
8590  }
8591  return rb_str_cmp_m(rb_sym_to_s(sym), rb_sym_to_s(other));
8592 }
8593 
8594 /*
8595  * call-seq:
8596  *
8597  * sym.casecmp(other) -> -1, 0, +1 or nil
8598  *
8599  * Case-insensitive version of <code>Symbol#<=></code>.
8600  */
8601 
8602 static VALUE
8604 {
8605  if (!SYMBOL_P(other)) {
8606  return Qnil;
8607  }
8608  return rb_str_casecmp(rb_sym_to_s(sym), rb_sym_to_s(other));
8609 }
8610 
8611 /*
8612  * call-seq:
8613  * sym =~ obj -> fixnum or nil
8614  * sym.match(obj) -> fixnum or nil
8615  *
8616  * Returns <code>sym.to_s =~ obj</code>.
8617  */
8618 
8619 static VALUE
8621 {
8622  return rb_str_match(rb_sym_to_s(sym), other);
8623 }
8624 
8625 /*
8626  * call-seq:
8627  * sym[idx] -> char
8628  * sym[b, n] -> string
8629  * sym.slice(idx) -> char
8630  * sym.slice(b, n) -> string
8631  *
8632  * Returns <code>sym.to_s[]</code>.
8633  */
8634 
8635 static VALUE
8637 {
8638  return rb_str_aref_m(argc, argv, rb_sym_to_s(sym));
8639 }
8640 
8641 /*
8642  * call-seq:
8643  * sym.length -> integer
8644  * sym.size -> integer
8645  *
8646  * Same as <code>sym.to_s.length</code>.
8647  */
8648 
8649 static VALUE
8651 {
8652  return rb_str_length(rb_id2str(SYM2ID(sym)));
8653 }
8654 
8655 /*
8656  * call-seq:
8657  * sym.empty? -> true or false
8658  *
8659  * Returns that _sym_ is :"" or not.
8660  */
8661 
8662 static VALUE
8664 {
8665  return rb_str_empty(rb_id2str(SYM2ID(sym)));
8666 }
8667 
8668 /*
8669  * call-seq:
8670  * sym.upcase -> symbol
8671  *
8672  * Same as <code>sym.to_s.upcase.intern</code>.
8673  */
8674 
8675 static VALUE
8677 {
8679 }
8680 
8681 /*
8682  * call-seq:
8683  * sym.downcase -> symbol
8684  *
8685  * Same as <code>sym.to_s.downcase.intern</code>.
8686  */
8687 
8688 static VALUE
8690 {
8692 }
8693 
8694 /*
8695  * call-seq:
8696  * sym.capitalize -> symbol
8697  *
8698  * Same as <code>sym.to_s.capitalize.intern</code>.
8699  */
8700 
8701 static VALUE
8703 {
8705 }
8706 
8707 /*
8708  * call-seq:
8709  * sym.swapcase -> symbol
8710  *
8711  * Same as <code>sym.to_s.swapcase.intern</code>.
8712  */
8713 
8714 static VALUE
8716 {
8718 }
8719 
8720 /*
8721  * call-seq:
8722  * sym.encoding -> encoding
8723  *
8724  * Returns the Encoding object that represents the encoding of _sym_.
8725  */
8726 
8727 static VALUE
8729 {
8730  return rb_obj_encoding(rb_id2str(SYM2ID(sym)));
8731 }
8732 
8733 ID
8735 {
8736  VALUE tmp;
8737 
8738  if (SYMBOL_P(name)) {
8739  return SYM2ID(name);
8740  }
8741  if (!RB_TYPE_P(name, T_STRING)) {
8742  tmp = rb_check_string_type(name);
8743  if (NIL_P(tmp)) {
8744  rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol",
8745  name);
8746  }
8747  name = tmp;
8748  }
8749  return rb_intern_str(name);
8750 }
8751 
8752 /*
8753  * A <code>String</code> object holds and manipulates an arbitrary sequence of
8754  * bytes, typically representing characters. String objects may be created
8755  * using <code>String::new</code> or as literals.
8756  *
8757  * Because of aliasing issues, users of strings should be aware of the methods
8758  * that modify the contents of a <code>String</code> object. Typically,
8759  * methods with names ending in ``!'' modify their receiver, while those
8760  * without a ``!'' return a new <code>String</code>. However, there are
8761  * exceptions, such as <code>String#[]=</code>.
8762  *
8763  */
8764 
8765 void
8767 {
8768 #undef rb_intern
8769 #define rb_intern(str) rb_intern_const(str)
8770 
8771  rb_cString = rb_define_class("String", rb_cObject);
8775  rb_define_method(rb_cString, "initialize", rb_str_init, -1);
8776  rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1);
8780  rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
8782  rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
8788  rb_define_method(rb_cString, "insert", rb_str_insert, 2);
8789  rb_define_method(rb_cString, "length", rb_str_length, 0);
8791  rb_define_method(rb_cString, "bytesize", rb_str_bytesize, 0);
8792  rb_define_method(rb_cString, "empty?", rb_str_empty, 0);
8799  rb_define_method(rb_cString, "upto", rb_str_upto, -1);
8802  rb_define_method(rb_cString, "replace", rb_str_replace, 1);
8805  rb_define_method(rb_cString, "getbyte", rb_str_getbyte, 1);
8806  rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
8807  rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1);
8808  rb_define_method(rb_cString, "scrub", str_scrub, -1);
8809  rb_define_method(rb_cString, "scrub!", str_scrub_bang, -1);
8810  rb_define_method(rb_cString, "freeze", rb_obj_freeze, 0);
8811 
8812  rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
8815  rb_define_method(rb_cString, "to_str", rb_str_to_s, 0);
8816  rb_define_method(rb_cString, "inspect", rb_str_inspect, 0);
8818 
8819  rb_define_method(rb_cString, "upcase", rb_str_upcase, 0);
8820  rb_define_method(rb_cString, "downcase", rb_str_downcase, 0);
8821  rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0);
8822  rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0);
8823 
8828 
8832  rb_define_method(rb_cString, "lines", rb_str_lines, -1);
8835  rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0);
8836  rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
8838  rb_define_method(rb_cString, "concat", rb_str_concat, 1);
8840  rb_define_method(rb_cString, "prepend", rb_str_prepend, 1);
8842  rb_define_method(rb_cString, "intern", rb_str_intern, 0);
8843  rb_define_method(rb_cString, "to_sym", rb_str_intern, 0);
8845 
8846  rb_define_method(rb_cString, "include?", rb_str_include, 1);
8847  rb_define_method(rb_cString, "start_with?", rb_str_start_with, -1);
8848  rb_define_method(rb_cString, "end_with?", rb_str_end_with, -1);
8849 
8851 
8852  rb_define_method(rb_cString, "ljust", rb_str_ljust, -1);
8853  rb_define_method(rb_cString, "rjust", rb_str_rjust, -1);
8854  rb_define_method(rb_cString, "center", rb_str_center, -1);
8855 
8856  rb_define_method(rb_cString, "sub", rb_str_sub, -1);
8857  rb_define_method(rb_cString, "gsub", rb_str_gsub, -1);
8859  rb_define_method(rb_cString, "chomp", rb_str_chomp, -1);
8861  rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0);
8862  rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0);
8863 
8871 
8874  rb_define_method(rb_cString, "delete", rb_str_delete, -1);
8875  rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1);
8876  rb_define_method(rb_cString, "count", rb_str_count, -1);
8877 
8882 
8883  rb_define_method(rb_cString, "each_line", rb_str_each_line, -1);
8884  rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0);
8885  rb_define_method(rb_cString, "each_char", rb_str_each_char, 0);
8886  rb_define_method(rb_cString, "each_codepoint", rb_str_each_codepoint, 0);
8887 
8888  rb_define_method(rb_cString, "sum", rb_str_sum, -1);
8889 
8890  rb_define_method(rb_cString, "slice", rb_str_aref_m, -1);
8892 
8893  rb_define_method(rb_cString, "partition", rb_str_partition, 1);
8894  rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1);
8895 
8896  rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
8897  rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
8899  rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
8901 
8902  id_to_s = rb_intern("to_s");
8903 
8904  rb_fs = Qnil;
8905  rb_define_variable("$;", &rb_fs);
8906  rb_define_variable("$-F", &rb_fs);
8907 
8908  rb_cSymbol = rb_define_class("Symbol", rb_cObject);
8912  rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */
8913 
8916  rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0);
8918  rb_define_method(rb_cSymbol, "id2name", rb_sym_to_s, 0);
8919  rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0);
8920  rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0);
8921  rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0);
8922  rb_define_method(rb_cSymbol, "succ", sym_succ, 0);
8923  rb_define_method(rb_cSymbol, "next", sym_succ, 0);
8924 
8925  rb_define_method(rb_cSymbol, "<=>", sym_cmp, 1);
8926  rb_define_method(rb_cSymbol, "casecmp", sym_casecmp, 1);
8928 
8929  rb_define_method(rb_cSymbol, "[]", sym_aref, -1);
8930  rb_define_method(rb_cSymbol, "slice", sym_aref, -1);
8931  rb_define_method(rb_cSymbol, "length", sym_length, 0);
8932  rb_define_method(rb_cSymbol, "size", sym_length, 0);
8933  rb_define_method(rb_cSymbol, "empty?", sym_empty, 0);
8934  rb_define_method(rb_cSymbol, "match", sym_match, 1);
8935 
8936  rb_define_method(rb_cSymbol, "upcase", sym_upcase, 0);
8937  rb_define_method(rb_cSymbol, "downcase", sym_downcase, 0);
8938  rb_define_method(rb_cSymbol, "capitalize", sym_capitalize, 0);
8939  rb_define_method(rb_cSymbol, "swapcase", sym_swapcase, 0);
8940 
8941  rb_define_method(rb_cSymbol, "encoding", sym_encoding, 0);
8942 
8943  if (frozen_strings)
8945 }
static int str_independent(VALUE str)
Definition: string.c:1451
#define ELTS_SHARED
Definition: ruby.h:817
#define RBASIC_CLEAR_CLASS(obj)
Definition: internal.h:609
int rb_enc_str_asciionly_p(VALUE str)
Definition: string.c:448
static VALUE sym_upcase(VALUE sym)
Definition: string.c:8676
#define ONIGENC_MBCLEN_CHARFOUND_P(r)
Definition: oniguruma.h:246
static VALUE str_new4(VALUE klass, VALUE str)
Definition: string.c:808
static long chopped_length(VALUE str)
Definition: string.c:6874
VALUE rb_str_resize(VALUE str, long len)
Definition: string.c:2024
#define ISDIGIT(c)
Definition: ruby.h:1783
VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc)
Definition: string.c:695
static VALUE str_replace_shared_without_enc(VALUE str2, VALUE str)
Definition: string.c:761
Definition: string.c:5343
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:1014
static VALUE rb_str_bytesize(VALUE str)
Definition: string.c:1317
int rb_reg_backref_number(VALUE match, VALUE backref)
Definition: re.c:1132
static VALUE str_buf_cat(VALUE str, const char *ptr, long len)
Definition: string.c:2076
static const struct st_hash_type fstring_hash_type
Definition: string.c:168
#define is_broken_string(str)
Definition: internal.h:733
#define MBCLEN_CHARFOUND_P(ret)
Definition: encoding.h:139
static long rb_str_rindex(VALUE str, VALUE sub, long pos)
Definition: string.c:2861
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:838
VALUE rb_ary_pop(VALUE ary)
Definition: array.c:944
#define rb_str_new4
Definition: intern.h:842
rb_econv_result_t
Definition: encoding.h:252
void rb_str_fill_terminator(VALUE str, const int newminlen)
Definition: string.c:1669
#define MBCLEN_CHARFOUND_LEN(ret)
Definition: encoding.h:140
#define RESIZE_CAPA(str, capacity)
Definition: string.c:96
#define RARRAY_LEN(a)
Definition: ruby.h:878
void rb_bug(const char *fmt,...)
Definition: error.c:327
#define rb_enc_mbc_to_codepoint(p, e, enc)
Definition: encoding.h:156
VALUE rb_ary_new_capa(long capa)
Definition: array.c:493
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:916
#define FALSE
Definition: nkf.h:174
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
Definition: oniguruma.h:589
#define rb_hash_lookup
Definition: tcltklib.c:269
#define RSTRING(obj)
Definition: ruby.h:1121
#define rb_intern(str)
#define RSTRING_FSTR
Definition: ruby.h:835
size_t strlen(const char *)
#define INT2NUM(x)
Definition: ruby.h:1296
#define CHECK_IF_ASCII(c)
void rb_backref_set(VALUE)
Definition: vm.c:953
#define T_FIXNUM
Definition: ruby.h:489
Definition: st.h:69
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Definition: encoding.h:64
VALUE rb_id2str(ID id)
Definition: ripper.c:17201
Definition: st.h:100
static int sym_printable(const char *s, const char *send, rb_encoding *enc)
Definition: string.c:8371
VALUE rb_str_equal(VALUE str1, VALUE str2)
Definition: string.c:2542
#define NUM2INT(x)
Definition: ruby.h:630
static rb_encoding * get_actual_encoding(const int encidx, VALUE str)
Definition: string.c:129
static int max(int a, int b)
Definition: strftime.c:141
VALUE rb_locale_str_new_cstr(const char *ptr)
Definition: string.c:725
VALUE rb_sym_to_s(VALUE sym)
Definition: string.c:8481
#define ascii_isspace(c)
Definition: string.c:6149
static int coderange_scan(const char *p, long len, rb_encoding *enc)
Definition: string.c:291
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
Definition: string.c:686
void rb_undef_alloc_func(VALUE)
Definition: vm_method.c:519
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1646
static VALUE rb_str_to_f(VALUE str)
Definition: string.c:4712
VALUE rb_str_new_frozen(VALUE orig)
Definition: string.c:833
static VALUE rb_str_oct(VALUE str)
Definition: string.c:7387
static VALUE str_compat_and_valid(VALUE str, rb_encoding *enc)
Definition: string.c:8006
st_index_t rb_str_hash(VALUE str)
Definition: string.c:2421
#define FL_TAINT
Definition: ruby.h:1137
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
Definition: string.c:2250
#define CLASS_OF(v)
Definition: ruby.h:440
static VALUE rb_str_scan(VALUE str, VALUE pat)
Definition: string.c:7320
VALUE rb_locale_str_new(const char *ptr, long len)
Definition: string.c:719
static VALUE rb_str_gsub(int argc, VALUE *argv, VALUE str)
Definition: string.c:4304
static VALUE rb_str_match(VALUE x, VALUE y)
Definition: string.c:2989
#define FIXNUM_MAX
Definition: ruby.h:228
#define Qtrue
Definition: ruby.h:426
void rb_str_set_len(VALUE str, long len)
Definition: string.c:2007
static void rb_enc_cr_str_copy_for_substr(VALUE dest, VALUE src)
Definition: string.c:398
#define is_ascii_string(str)
Definition: internal.h:732
unsigned char * USTR
Definition: string.c:5341
static unsigned int trnext(struct tr *t, rb_encoding *enc)
Definition: string.c:5350
#define ENC_CODERANGE_SET(obj, cr)
Definition: encoding.h:54
static VALUE sym_swapcase(VALUE sym)
Definition: string.c:8715
static VALUE rb_str_b(VALUE str)
Definition: string.c:7908
char * pend
Definition: string.c:5346
const int id
Definition: nkf.c:209
void Init_String(void)
Definition: string.c:8766
static VALUE rb_str_clear(VALUE str)
Definition: string.c:4343
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:219
#define ENC_CODERANGE_CLEAR(obj)
Definition: encoding.h:56
void rb_econv_close(rb_econv_t *ec)
Definition: transcode.c:1700
#define STR_UNSET_NOCAPA(s)
Definition: string.c:52
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:102
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:1037
VALUE rb_eTypeError
Definition: error.c:548
#define rb_check_arity
Definition: intern.h:296
static VALUE str_gsub(int argc, VALUE *argv, VALUE str, int bang)
Definition: string.c:4128
#define UNREACHABLE
Definition: ruby.h:42
VALUE rb_reg_match(VALUE, VALUE)
Definition: re.c:2797
long rb_memsearch(const void *, long, const void *, long, rb_encoding *)
Definition: re.c:253
static VALUE rb_str_succ_bang(VALUE str)
Definition: string.c:3360
static VALUE rb_str_enumerate_bytes(VALUE str, int wantarray)
Definition: string.c:6599
static VALUE rb_str_each_line(int argc, VALUE *argv, VALUE str)
Definition: string.c:6569
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1451
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:900
#define rb_long2int(n)
Definition: ruby.h:317
static VALUE str_new3(VALUE klass, VALUE str)
Definition: string.c:793
SSL_METHOD *(* func)(void)
Definition: ossl_ssl.c:113
VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE)
Definition: re.c:3337
#define ONIGENC_MBCLEN_CHARFOUND_LEN(r)
Definition: oniguruma.h:247
#define SYM2ID(x)
Definition: ruby.h:356
RUBY_EXTERN char * crypt(const char *, const char *)
Definition: crypt.c:500
st_index_t rb_memhash(const void *ptr, long len)
Definition: random.c:1302
VALUE rb_str_split(VALUE str, const char *sep0)
Definition: string.c:6409
static VALUE rb_str_each_byte_size(VALUE str, VALUE args, VALUE eobj)
Definition: string.c:6593
static VALUE rb_str_prepend(VALUE str, VALUE str2)
Definition: string.c:2412
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:849
static VALUE rb_str_gsub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:4253
VALUE rb_ary_tmp_new(long capa)
Definition: array.c:538
VALUE rb_str_export_to_enc(VALUE str, rb_encoding *enc)
Definition: string.c:755
static int fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
Definition: string.c:174
void ruby_sized_xfree(void *x, size_t size)
Definition: gc.c:6237
VALUE rb_funcall(VALUE, ID, int,...)
Calls a method.
Definition: vm_eval.c:781
static VALUE rb_str_codepoints(VALUE str)
Definition: string.c:6867
#define str_buf_cat2(str, ptr)
Definition: string.c:2120
static VALUE rb_str_swapcase_bang(VALUE str)
Definition: string.c:5289
static VALUE rb_str_rstrip(VALUE str)
Definition: string.c:7205
VALUE rb_filesystem_str_new(const char *ptr, long len)
Definition: string.c:731
VALUE rb_str_export(VALUE str)
Definition: string.c:743
static VALUE rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
Definition: string.c:7564
#define RGENGC_WB_PROTECTED_STRING
Definition: ruby.h:720
static VALUE rb_str_include(VALUE str, VALUE arg)
Definition: string.c:4645
static void rb_str_check_dummy_enc(rb_encoding *enc)
Definition: string.c:5034
#define RBASIC_SET_CLASS(obj, cls)
Definition: internal.h:611
VALUE rb_backref_get(void)
Definition: vm.c:947
#define str_make_independent(str)
Definition: string.c:1480
VALUE rb_str_freeze(VALUE str)
Definition: string.c:1967
long rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
Definition: string.c:1141
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:993
static VALUE str_new0(VALUE klass, const char *ptr, long len, int termlen)
Definition: string.c:498
#define Check_Type(v, t)
Definition: ruby.h:532
long rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
Definition: string.c:1147
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1857
char * p
Definition: string.c:5346
static VALUE sym_downcase(VALUE sym)
Definition: string.c:8689
static VALUE str_replace(VALUE str, VALUE str2)
Definition: string.c:1026
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Definition: ruby.h:854
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:826
VALUE rb_funcall_with_block(VALUE, ID, int, const VALUE *, VALUE)
Definition: vm_eval.c:841
static VALUE rb_str_to_i(int argc, VALUE *argv, VALUE str)
Definition: string.c:4679
#define rb_utf8_encindex()
Definition: internal.h:403
char * rb_string_value_ptr(volatile VALUE *ptr)
Definition: string.c:1599
VALUE rb_convert_type(VALUE, int, const char *, const char *)
Definition: object.c:2637
static VALUE rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3923
VALUE rb_str_intern(VALUE s)
Definition: string.c:7467
#define RB_GC_GUARD(v)
Definition: ruby.h:523
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:946
static rb_encoding * get_encoding(VALUE str)
Definition: string.c:159
static VALUE rb_str_empty(VALUE str)
Definition: string.c:1334
static VALUE rb_str_chars(VALUE str)
Definition: string.c:6773
static VALUE rb_str_reverse_bang(VALUE str)
Definition: string.c:4606
#define DATA_PTR(dta)
Definition: ruby.h:992
void rb_include_module(VALUE klass, VALUE module)
Definition: class.c:808
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: oniguruma.h:189
static VALUE rb_str_center(int argc, VALUE *argv, VALUE str)
Definition: string.c:7708
st_data_t st_index_t
Definition: st.h:48
VALUE rb_range_beg_len(VALUE, long *, long *, long, int)
Definition: range.c:1020
#define DEFAULT_REPLACE_CHAR(str)
double rb_str_to_dbl(VALUE, int)
Definition: object.c:2890
#define rb_enc_islower(c, enc)
Definition: encoding.h:180
RUBY_FUNC_EXPORTED size_t rb_str_memsize(VALUE str)
Definition: string.c:953
int st_update(st_table *table, st_data_t key, st_update_callback_func *func, st_data_t arg)
Definition: st.c:867
static VALUE rb_str_subpat(VALUE str, VALUE re, VALUE backref)
Definition: string.c:3497
VALUE rb_str_new(const char *ptr, long len)
Definition: string.c:534
static VALUE rb_str_aset_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3855
#define rb_enc_mbmaxlen(enc)
Definition: encoding.h:129
static const char * str_null_char(const char *s, long len, const int minlen, rb_encoding *enc)
Definition: string.c:1615
static char * str_fill_term(VALUE str, char *s, long len, int oldtermlen, int termlen)
Definition: string.c:1626
static VALUE rb_str_upcase_bang(VALUE str)
Definition: string.c:5052
unsigned int last
Definition: nkf.c:4310
static VALUE rb_str_format_m(VALUE str, VALUE arg)
Definition: string.c:1431
#define STR_SET_NOEMBED(str)
Definition: string.c:56
#define STR_DEC_LEN(str)
Definition: string.c:76
static long str_strlen(VALUE str, rb_encoding *enc)
Definition: string.c:1238
#define FIXNUM_P(f)
Definition: ruby.h:347
static VALUE rb_str_chomp(int argc, VALUE *argv, VALUE str)
Definition: string.c:7074
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1257
VALUE rb_str_export_locale(VALUE str)
Definition: string.c:749
#define BEG(no)
Definition: string.c:22
static VALUE sym_length(VALUE sym)
Definition: string.c:8650
VALUE rb_str_new_shared(VALUE str)
Definition: string.c:799
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1497
#define CHAR_ESC_LEN
Definition: string.c:4747
VALUE rb_sym_all_symbols(void)
Definition: ripper.c:17303
static VALUE empty_str_alloc(VALUE klass)
Definition: string.c:489
static VALUE rb_str_upcase(VALUE str)
Definition: string.c:5117
#define ONIGENC_CTYPE_ALPHA
Definition: oniguruma.h:195
static VALUE rb_str_hash_m(VALUE str)
Definition: string.c:2451
static int fstring_cmp(VALUE a, VALUE b)
Definition: string.c:224
VALUE rb_cString
Definition: string.c:47
static VALUE rb_str_aset(VALUE str, VALUE indx, VALUE val)
Definition: string.c:3788
#define ENC_CODERANGE_7BIT
Definition: encoding.h:49
VALUE rb_eRangeError
Definition: error.c:552
const char * rb_obj_classname(VALUE)
Definition: variable.c:406
VALUE rb_enc_sprintf(rb_encoding *enc, const char *format,...)
Definition: sprintf.c:1231
void rb_gc_force_recycle(VALUE p)
Definition: gc.c:4900
#define rb_ary_new2
Definition: intern.h:90
RUBY_EXTERN void * memmove(void *, const void *, size_t)
Definition: memmove.c:7
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:1031
#define sym(x)
Definition: date_core.c:3695
static VALUE rb_str_insert(VALUE str, VALUE idx, VALUE str2)
Definition: string.c:3888
VALUE rb_str_append(VALUE str, VALUE str2)
Definition: string.c:2297
RUBY_SYMBOL_EXPORT_BEGIN typedef unsigned long st_data_t
Definition: st.h:20
#define NEWOBJ_OF(obj, type, klass, flags)
Definition: ruby.h:694
#define ISALPHA(c)
Definition: ruby.h:1782
static VALUE sym_equal(VALUE sym1, VALUE sym2)
Definition: string.c:8363
static VALUE sym_inspect(VALUE sym)
Definition: string.c:8439
#define OBJ_INFECT_RAW(x, s)
Definition: ruby.h:1187
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Definition: string.c:2431
static VALUE rb_str_partition(VALUE str, VALUE sep)
Definition: string.c:7729
static long str_offset(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
Definition: string.c:1772
#define rb_enc_isctype(c, t, enc)
Definition: encoding.h:177
static VALUE rb_str_ljust(int argc, VALUE *argv, VALUE str)
Definition: string.c:7668
#define RB_TYPE_P(obj, type)
Definition: ruby.h:1672
int rb_enc_str_coderange(VALUE str)
Definition: string.c:435
static int fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
Definition: string.c:217
#define MEMZERO(p, type, n)
Definition: ruby.h:1359
Definition: ruby.h:820
VALUE rb_str_plus(VALUE str1, VALUE str2)
Definition: string.c:1352
static VALUE rb_str_setbyte(VALUE str, VALUE index, VALUE value)
Definition: string.c:4398
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1366
#define FL_TEST(x, f)
Definition: ruby.h:1169
#define ONIGENC_CTYPE_DIGIT
Definition: oniguruma.h:198
static st_table * frozen_strings
Definition: string.c:166
VALUE rb_mComparable
Definition: compar.c:14
int t(void)
Definition: conftest.c:13
neighbor_char
Definition: string.c:3053
static VALUE rb_str_capitalize_bang(VALUE str)
Definition: string.c:5223
static VALUE rb_str_strip(VALUE str)
Definition: string.c:7243
#define rb_intern_str(string)
Definition: generator.h:17
unsigned int now
Definition: string.c:5345
#define ALLOC_N(type, n)
Definition: ruby.h:1341
int rb_block_given_p(void)
Definition: eval.c:712
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Definition: hash.c:1402
static VALUE rb_str_split_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:6199
#define val
static int single_byte_optimizable(VALUE str)
Definition: string.c:234
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:940
static void rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3668
RUBY_EXTERN VALUE rb_cObject
Definition: ruby.h:1561
#define TERM_LEN(str)
Definition: string.c:87
VALUE rb_eRuntimeError
Definition: error.c:547
static VALUE sym_to_sym(VALUE sym)
Definition: string.c:8500
#define rb_enc_isascii(c, enc)
Definition: encoding.h:178
void * rb_alloc_tmp_buffer(volatile VALUE *store, long len)
Definition: string.c:925
VALUE rb_str_to_inum(VALUE str, int base, int badcheck)
Definition: bignum.c:4129
static VALUE str_new_shared(VALUE klass, VALUE str)
Definition: string.c:787
#define MBCLEN_NEEDMORE_P(ret)
Definition: encoding.h:142
VALUE rb_str_length(VALUE str)
Definition: string.c:1298
#define RSTRING_END(str)
Definition: ruby.h:849
static VALUE rb_str_rpartition(VALUE str, VALUE sep)
Definition: string.c:7779
int rb_isspace(int c)
Definition: encoding.c:1947
static VALUE rb_str_crypt(VALUE str, VALUE salt)
Definition: string.c:7411
static VALUE rb_str_cmp_m(VALUE str1, VALUE str2)
Definition: string.c:2595
int rb_str_symname_p(VALUE sym)
Definition: string.c:8384
VALUE rb_ary_new(void)
Definition: array.c:499
VALUE rb_str_new_cstr(const char *ptr)
Definition: string.c:560
static void str_modify_keep_cr(VALUE str)
Definition: string.c:1517
#define dp(v)
Definition: vm_debug.h:21
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Definition: transcode.c:2577
#define UINT2NUM(x)
Definition: ruby.h:1306
#define STR_BUF_MIN_SIZE
Definition: string.c:888
#define STR_SET_EMBED(str)
Definition: string.c:60
static VALUE rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
Definition: string.c:6025
#define snprintf
Definition: subst.h:6
#define NIL_P(v)
Definition: ruby.h:438
#define ISASCII(c)
Definition: ruby.h:1774
#define add(x, y)
Definition: date_strftime.c:23
static VALUE rb_str_delete(int argc, VALUE *argv, VALUE str)
Definition: string.c:5881
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:611
#define rb_enc_step_back(s, p, e, n, enc)
Definition: encoding.h:172
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:790
int st_delete(st_table *, st_data_t *, st_data_t *)
static VALUE rb_str_enumerate_chars(VALUE str, int wantarray)
Definition: string.c:6679
static VALUE rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
Definition: string.c:6420
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:794
static VALUE rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl)
Definition: string.c:6005
#define RUBY_DTRACE_STRING_CREATE_ENABLED()
Definition: probes.h:48
VALUE rb_str_concat(VALUE str1, VALUE str2)
Definition: string.c:2339
#define TOUPPER(c)
Definition: ruby.h:1786
#define END(no)
Definition: string.c:23
#define OBJ_FROZEN(x)
Definition: ruby.h:1193
RUBY_ALIAS_FUNCTION(rb_str_dup_frozen(VALUE str), rb_str_new_frozen,(str))
Definition: string.c:1976
#define STR_ENC_GET(str)
Definition: string.c:124
static VALUE rb_str_strip_bang(VALUE str)
Definition: string.c:7222
#define TYPE(x)
Definition: ruby.h:505
int argc
Definition: ruby.c:131
VALUE rb_str_scrub(VALUE str, VALUE repl)
Definition: string.c:8037
#define Qfalse
Definition: ruby.h:425
VALUE rb_cEncodingConverter
Definition: transcode.c:25
long rb_str_offset(VALUE str, long pos)
Definition: string.c:1780
#define rb_sourcefile()
Definition: tcltklib.c:98
#define STR_SET_EMBED_LEN(str, n)
Definition: string.c:61
#define STR_ASSOC_P(s)
Definition: internal.h:728
#define ALLOCA_N(type, n)
Definition: ruby.h:1345
#define T_BIGNUM
Definition: ruby.h:487
#define range(low, item, hi)
Definition: date_strftime.c:21
#define ENC_CODERANGE_UNKNOWN
Definition: encoding.h:48
#define LONG_MAX
Definition: ruby.h:191
void rb_gc_register_mark_object(VALUE obj)
Definition: gc.c:4923
static VALUE rb_str_tr_bang(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5660
#define rb_enc_isprint(c, enc)
Definition: encoding.h:184
#define RUBY_FUNC_EXPORTED
Definition: defines.h:246
#define MEMCPY(p1, p2, type, n)
Definition: ruby.h:1360
#define ENC_CODERANGE_BROKEN
Definition: encoding.h:51
#define rb_enc_isupper(c, enc)
Definition: encoding.h:181
VALUE rb_eEncCompatError
Definition: error.c:555
#define rb_str_new2
Definition: intern.h:840
VALUE rb_obj_alloc(VALUE)
Definition: object.c:1804
#define rb_enc_codepoint(p, e, enc)
Definition: encoding.h:155
#define OBJ_FREEZE(x)
Definition: ruby.h:1194
void rb_str_update(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3744
#define rb_enc_mbminlen(enc)
Definition: encoding.h:128
unsigned int max
Definition: string.c:5345
#define STR_SHARED_P(s)
Definition: internal.h:727
static VALUE rb_str_each_char_size(VALUE str, VALUE args, VALUE eobj)
Definition: string.c:6673
static VALUE sym_call(VALUE args, VALUE sym, int argc, VALUE *argv, VALUE passed_proc)
Definition: string.c:8506
VALUE rb_eIndexError
Definition: error.c:550
static VALUE rb_str_rjust(int argc, VALUE *argv, VALUE str)
Definition: string.c:7688
#define ENC_CODERANGE_VALID
Definition: encoding.h:50
#define numberof(array)
Definition: etc.c:602
long rb_str_sublen(VALUE str, long pos)
Definition: string.c:1827
static VALUE sym_capitalize(VALUE sym)
Definition: string.c:8702
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: oniguruma.h:267
VALUE rb_str_times(VALUE str, VALUE times)
Definition: string.c:1383
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:970
static VALUE sym_cmp(VALUE sym, VALUE other)
Definition: string.c:8586
void rb_str_modify_expand(VALUE str, long expand)
Definition: string.c:1491
#define sub(x, y)
Definition: date_strftime.c:24
static void rb_str_splice(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3701
static VALUE str_eql(const VALUE str1, const VALUE str2)
Definition: string.c:2512
#define RSTRING_LEN(str)
Definition: ruby.h:841
static VALUE sym_encoding(VALUE sym)
Definition: string.c:8728
VALUE rb_yield(VALUE)
Definition: vm_eval.c:948
static VALUE rb_str_swapcase(VALUE str)
Definition: string.c:5334
VALUE rb_obj_as_string(VALUE obj)
Definition: string.c:1011
#define RARRAY_CONST_PTR(a)
Definition: ruby.h:886
VALUE rb_str_subseq(VALUE str, long beg, long len)
Definition: string.c:1838
#define REALLOC_N(var, type, n)
Definition: ruby.h:1343
char * rb_string_value_cstr(volatile VALUE *ptr)
Definition: string.c:1643
#define RUBY_MAX_CHAR_LEN
Definition: string.c:50
#define TRUE
Definition: nkf.h:175
static VALUE rb_str_byteslice(int argc, VALUE *argv, VALUE str)
Definition: string.c:4528
static long str_rindex(VALUE str, VALUE sub, const char *s, long pos, rb_encoding *enc)
Definition: string.c:2837
VALUE rb_str_format(int, const VALUE *, VALUE)
Definition: sprintf.c:421
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:958
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:496
#define STR_TMPLOCK
Definition: string.c:51
#define rb_enc_name(enc)
Definition: encoding.h:125
#define RSTRING_EMBED_LEN_MAX
Definition: ruby.h:819
int rb_enc_symname_p(const char *name, rb_encoding *enc)
Definition: ripper.c:16874
static VALUE rb_str_tr(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5702
static VALUE rb_str_chop_bang(VALUE str)
Definition: string.c:6901
static VALUE str_new_empty(VALUE str)
Definition: string.c:880
VALUE rb_hash_new(void)
Definition: hash.c:307
void ruby_xfree(void *x)
Definition: gc.c:6245
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1719
static VALUE rb_str_enumerate_codepoints(VALUE str, int wantarray)
Definition: string.c:6780
static VALUE rb_str_squeeze(int argc, VALUE *argv, VALUE str)
Definition: string.c:5988
long rb_reg_search(VALUE, VALUE, long, int)
Definition: re.c:1410
static VALUE str_duplicate(VALUE klass, VALUE str)
Definition: string.c:1054
VALUE rb_check_hash_type(VALUE hash)
Definition: hash.c:597
int rb_str_cmp(VALUE str1, VALUE str2)
Definition: string.c:2485
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
#define PRIsVALUE
Definition: ruby.h:137
#define no_digits()
unsigned long ID
Definition: ruby.h:89
VALUE rb_str_buf_new_cstr(const char *ptr)
Definition: string.c:907
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1272
static VALUE rb_str_aref_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3627
static VALUE str_scrub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:8312
static VALUE sym_to_proc(VALUE sym)
Definition: string.c:8527
#define Qnil
Definition: ruby.h:427
static VALUE rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:5898
static VALUE get_pat(VALUE, int)
Definition: string.c:3943
const char * name
Definition: oniguruma.h:160
VALUE rb_str_buf_cat(VALUE str, const char *ptr, long len)
Definition: string.c:2123
#define BUILTIN_TYPE(x)
Definition: ruby.h:502
#define OBJ_TAINT(x)
Definition: ruby.h:1184
unsigned long VALUE
Definition: ruby.h:88
static enum neighbor_char enc_pred_char(char *p, long len, rb_encoding *enc)
Definition: string.c:3112
VALUE rb_cSymbol
Definition: string.c:48
static int tr_find(unsigned int c, const char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
Definition: string.c:5775
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1309
#define rb_funcall2
Definition: ruby.h:1464
static VALUE result
Definition: nkf.c:40
VALUE rb_str_replace(VALUE str, VALUE str2)
Definition: string.c:4322
static VALUE rb_str_lstrip_bang(VALUE str)
Definition: string.c:7094
#define rb_enc_is_newline(p, end, enc)
Definition: encoding.h:175
static VALUE str_new(VALUE klass, const char *ptr, long len)
Definition: string.c:528
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
Definition: intern.h:237
static VALUE str_alloc(VALUE klass)
Definition: string.c:482
#define UNINITIALIZED_VAR(x)
Definition: vm_core.h:121
#define RBASIC(obj)
Definition: ruby.h:1116
static VALUE rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3982
VALUE rb_str_buf_cat2(VALUE str, const char *ptr)
Definition: string.c:2133
#define ENC_CODERANGE_AND(a, b)
Definition: encoding.h:59
static VALUE rb_str_is_ascii_only_p(VALUE str)
Definition: string.c:7947
void rb_str_shared_replace(VALUE str, VALUE str2)
Definition: string.c:972
#define rb_usascii_encindex()
Definition: internal.h:404
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:930
#define rb_ary_new3
Definition: intern.h:91
static int rb_enc_dummy_p(rb_encoding *enc)
Definition: encoding.h:245
VALUE rb_check_funcall(VALUE, ID, int, const VALUE *)
Definition: vm_eval.c:410
#define TERM_FILL(ptr, termlen)
Definition: string.c:88
#define RUBY_DTRACE_STRING_CREATE(arg0, arg1, arg2)
Definition: probes.h:49
#define rb_enc_asciicompat(enc)
Definition: encoding.h:188
static VALUE rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:6951
VALUE rb_ensure(VALUE(*b_proc)(ANYARGS), VALUE data1, VALUE(*e_proc)(ANYARGS), VALUE data2)
Definition: eval.c:839
VALUE rb_str_buf_cat_ascii(VALUE str, const char *ptr)
Definition: string.c:2257
int memcmp(const void *s1, const void *s2, size_t len)
Definition: memcmp.c:7
VALUE rb_str_quote_unprintable(VALUE str)
Definition: string.c:8403
static VALUE sym_casecmp(VALUE sym, VALUE other)
Definition: string.c:8603
long rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr)
Definition: string.c:340
static int zero_filled(const char *s, int n)
Definition: string.c:1606
static char * str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
Definition: string.c:1702
#define RARRAY_LENINT(ary)
Definition: ruby.h:884
RUBY_EXTERN VALUE rb_rs
Definition: intern.h:518
static VALUE rb_str_getbyte(VALUE str, VALUE index)
Definition: string.c:4379
static void rb_enc_cr_str_exact_copy(VALUE dest, VALUE src)
Definition: string.c:428
VALUE rb_block_clear_env_self(VALUE proc)
Definition: proc.c:640
void rb_sys_fail(const char *mesg)
Definition: error.c:1976
static VALUE rb_str_chr(VALUE str)
Definition: string.c:4367
#define ENCODING_IS_ASCII8BIT(obj)
Definition: encoding.h:43
static const char * search_nonascii(const char *p, const char *e)
Definition: string.c:254
static VALUE str_scrub(int argc, VALUE *argv, VALUE str)
Definition: string.c:8290
static void str_modifiable(VALUE str)
Definition: string.c:1442
static VALUE rb_str_bytes(VALUE str)
Definition: string.c:6667
static VALUE rb_str_index_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2747
VALUE rb_fstring(VALUE str)
Definition: string.c:201
#define CHAR_BIT
Definition: ruby.h:198
VALUE rb_str_to_str(VALUE str)
Definition: string.c:964
static VALUE rb_str_match_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3039
#define FL_UNSET(x, f)
Definition: ruby.h:1177
static void str_mod_check(VALUE s, const char *p, long len)
Definition: string.c:460
VALUE rb_string_value(volatile VALUE *ptr)
Definition: string.c:1588
static VALUE rb_str_lines(int argc, VALUE *argv, VALUE str)
Definition: string.c:6587
VALUE rb_tainted_str_new_cstr(const char *ptr)
Definition: string.c:598
#define LONG2NUM(x)
Definition: ruby.h:1317
static const char isspacetable[256]
Definition: string.c:6130
int rb_respond_to(VALUE, ID)
Definition: vm_method.c:1651
static VALUE scan_once(VALUE str, VALUE pat, long *start)
Definition: string.c:7251
static VALUE rb_str_sub(int argc, VALUE *argv, VALUE str)
Definition: string.c:4120
VALUE rb_usascii_str_new(const char *ptr, long len)
Definition: string.c:540
VALUE rb_str_buf_append(VALUE str, VALUE str2)
Definition: string.c:2281
static VALUE rb_str_s_try_convert(VALUE dummy, VALUE str)
Definition: string.c:1696
#define RMATCH_REGS(obj)
Definition: re.h:52
RUBY_EXTERN VALUE rb_default_rs
Definition: intern.h:519
static VALUE sym_succ(VALUE sym)
Definition: string.c:8566
void rb_str_free(VALUE str)
Definition: string.c:941
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Definition: string.c:737
static VALUE rb_str_end_with(int argc, VALUE *argv, VALUE str)
Definition: string.c:7852
#define RSTRING_PTR(str)
Definition: ruby.h:845
#define rb_enc_right_char_head(s, p, e, enc)
Definition: encoding.h:171
static void str_enc_copy(VALUE str1, VALUE str2)
Definition: string.c:392
rb_encoding * rb_enc_get_from_index(int index)
Definition: encoding.c:602
#define ENCODING_GET(obj)
Definition: encoding.h:38
VALUE rb_equal(VALUE, VALUE)
Definition: object.c:89
static ID id_to_s
Definition: string.c:1008
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:832
#define STR_SET_SHARED(str, shared_str)
Definition: string.c:116
#define STR_ASSOC
#define STR_HEAP_PTR(str)
Definition: string.c:121
int size
Definition: encoding.c:49
static VALUE rb_str_hex(VALUE str)
Definition: string.c:7366
char * rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
Definition: string.c:1752
static char * str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
Definition: string.c:1758
#define f
static VALUE rb_str_reverse(VALUE str)
Definition: string.c:4547
#define INT2FIX(i)
Definition: ruby.h:231
static VALUE rb_str_downcase(VALUE str)
Definition: string.c:5200
#define UNLIMITED_ARGUMENTS
Definition: intern.h:44
char * rb_str_subpos(VALUE str, long beg, long *lenp)
Definition: string.c:1859
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, s, end)
Definition: oniguruma.h:234
int rb_sourceline(void)
Definition: vm.c:1001
VALUE rb_str_unlocktmp(VALUE str)
Definition: string.c:1990
VALUE rb_tainted_str_new(const char *ptr, long len)
Definition: string.c:589
#define MBCLEN_INVALID_P(ret)
Definition: encoding.h:141
static VALUE rb_str_valid_encoding_p(VALUE str)
Definition: string.c:7929
#define RARRAY_AREF(a, i)
Definition: ruby.h:901
static VALUE rb_str_each_byte(VALUE str)
Definition: string.c:6650
static VALUE rb_str_chop(VALUE str)
Definition: string.c:6936
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Definition: transcode.c:1444
VALUE rb_check_convert_type(VALUE, int, const char *, const char *)
Definition: object.c:2652
static VALUE rb_str_count(int argc, VALUE *argv, VALUE str)
Definition: string.c:6061
#define STR_SET_LEN(str, n)
Definition: string.c:67
static VALUE rb_str_eql(VALUE str1, VALUE str2)
Definition: string.c:2562
static void rb_str_subpat_set(VALUE str, VALUE re, VALUE backref, VALUE val)
Definition: string.c:3750
static long enc_strlen(const char *p, const char *e, rb_encoding *enc, int cr)
Definition: string.c:1095
static VALUE rb_str_lstrip(VALUE str)
Definition: string.c:7135
#define lesser(a, b)
Definition: string.c:2457
VALUE rb_check_array_type(VALUE ary)
Definition: array.c:632
#define FL_SET_RAW(x, f)
Definition: ruby.h:1174
VALUE rb_hash_aref(VALUE hash, VALUE key)
Definition: hash.c:706
static enum neighbor_char enc_succ_alnum_char(char *p, long len, rb_encoding *enc, char *carry)
Definition: string.c:3174
#define RARRAY_PTR(a)
Definition: ruby.h:907
static VALUE sym_match(VALUE sym, VALUE other)
Definition: string.c:8620
VALUE rb_reg_quote(VALUE)
Definition: re.c:3013
static long rb_str_index(VALUE str, VALUE sub, long offset)
Definition: string.c:2685
#define FL_WB_PROTECTED
Definition: ruby.h:1134
#define ENC_CODERANGE(obj)
Definition: encoding.h:52
static VALUE rb_str_upto(int argc, VALUE *argv, VALUE beg)
Definition: string.c:3401
static VALUE str_byte_substr(VALUE str, long beg, long len)
Definition: string.c:4416
VALUE rb_str_cat(VALUE str, const char *ptr, long len)
Definition: string.c:2139
uint8_t key[16]
Definition: random.c:1250
VALUE rb_any_to_s(VALUE)
Definition: object.c:452
long rb_str_strlen(VALUE str)
Definition: string.c:1284
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Definition: string.c:607
#define LONG2FIX(i)
Definition: ruby.h:232
#define SIZEOF_VALUE
Definition: ruby.h:91
static VALUE tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
Definition: string.c:5406
#define RTEST(v)
Definition: ruby.h:437
int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
Definition: string.c:4750
VALUE rb_proc_new(VALUE(*)(ANYARGS), VALUE)
Definition: proc.c:2312
#define T_STRING
Definition: ruby.h:482
VALUE rb_str_locktmp(VALUE)
void rb_gc_resurrect(VALUE obj)
Definition: gc.c:3615
#define OBJ_INFECT(x, s)
Definition: ruby.h:1188
#define RREGEXP(obj)
Definition: ruby.h:1122
static VALUE rb_str_capitalize(VALUE str)
Definition: string.c:5271
VALUE rb_str_drop_bytes(VALUE str, long len)
Definition: string.c:3640
size_t rb_str_capacity(VALUE str)
Definition: string.c:468
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Definition: string.c:577
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1324
static VALUE rb_str_init(int argc, VALUE *argv, VALUE str)
Definition: string.c:1085
void rb_define_variable(const char *, VALUE *)
Definition: variable.c:604
void rb_str_setter(VALUE val, ID id, VALUE *var)
Definition: string.c:7875
static VALUE rb_str_rstrip_bang(VALUE str)
Definition: string.c:7156
VALUE rb_str_tmp_new(long len)
Definition: string.c:919
static VALUE rb_str_each_char(VALUE str)
Definition: string.c:6756
VALUE rb_fs
Definition: string.c:251
#define ISPRINT(c)
Definition: ruby.h:1776
#define rb_enc_left_char_head(s, p, e, enc)
Definition: encoding.h:170
static VALUE str_replace_shared(VALUE str2, VALUE str)
Definition: string.c:779
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Definition: string.c:680
static void str_make_independent_expand(VALUE str, long expand)
Definition: string.c:1460
VALUE rb_ary_concat(VALUE x, VALUE y)
Definition: array.c:3553
static unsigned int hash(const char *str, unsigned int len)
Definition: lex.c:56
#define RETURN_ENUMERATOR(obj, argc, argv)
Definition: intern.h:242
static VALUE rb_str_start_with(int argc, VALUE *argv, VALUE str)
Definition: string.c:7829
VALUE rb_str_substr(VALUE str, long beg, long len)
Definition: string.c:1944
static void str_discard(VALUE str)
Definition: string.c:1527
#define RREGEXP_SRC_LEN(r)
Definition: ruby.h:917
void rb_must_asciicompat(VALUE str)
Definition: string.c:1579
#define assert(condition)
Definition: ossl.h:45
const char * name
Definition: nkf.c:208
#define FL_SET(x, f)
Definition: ruby.h:1175
VALUE rb_str_associated(VALUE str)
Definition: string.c:1569
#define ID2SYM(x)
Definition: ruby.h:355
const char * rb_id2name(ID id)
Definition: ripper.c:17271
int gen
Definition: string.c:5344
#define STR_NOCAPA_P(s)
Definition: internal.h:730
static VALUE sym_empty(VALUE sym)
Definition: string.c:8663
static VALUE rb_str_to_s(VALUE str)
Definition: string.c:4727
static VALUE str_byte_aref(VALUE str, VALUE indx)
Definition: string.c:4471
VALUE rb_external_str_new(const char *ptr, long len)
Definition: string.c:707
void rb_str_associate(VALUE str, VALUE add)
Definition: string.c:1538
#define rb_enc_to_index(enc)
Definition: encoding.h:77
VALUE rb_str_succ(VALUE orig)
Definition: string.c:3262
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1242
static VALUE rb_str_downcase_bang(VALUE str)
Definition: string.c:5135
static VALUE rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len, int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
Definition: string.c:2164
void rb_warning(const char *fmt,...)
Definition: error.c:236
#define rb_check_frozen(obj)
Definition: intern.h:277
#define CONST_ID(var, str)
Definition: ruby.h:1436
st_table * st_init_table(const struct st_hash_type *)
Definition: st.c:266
static VALUE rb_str_sum(int argc, VALUE *argv, VALUE str)
Definition: string.c:7506
VALUE rb_str_inspect(VALUE str)
Definition: string.c:4795
void rb_free_tmp_buffer(volatile VALUE *store)
Definition: string.c:933
static void tr_setup_table(VALUE str, char stable[TR_TABLE_SIZE], int first, VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
Definition: string.c:5711
VALUE rb_obj_freeze(VALUE)
Definition: object.c:1070
#define SPECIAL_CONST_P(x)
Definition: ruby.h:1165
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Definition: transcode.c:2884
void void xfree(void *)
VALUE rb_str_buf_new(long capa)
Definition: string.c:891
int rb_num_to_uint(VALUE val, unsigned int *ret)
Definition: numeric.c:132
static VALUE rb_str_casecmp(VALUE str1, VALUE str2)
Definition: string.c:2627
#define ONIGERR_INVALID_CODE_POINT_VALUE
Definition: oniguruma.h:587
int rb_str_comparable(VALUE str1, VALUE str2)
Definition: string.c:2460
#define rb_enc_mbcput(c, buf, enc)
Definition: encoding.h:165
VALUE rb_str_cat2(VALUE str, const char *ptr)
Definition: string.c:2158
#define SYMBOL_P(x)
Definition: ruby.h:354
#define mod(x, y)
Definition: date_strftime.c:28
VALUE rb_str_ord(VALUE s)
Definition: string.c:7487
VALUE rb_str_locktmp_ensure(VALUE str, VALUE(*func)(VALUE), VALUE arg)
Definition: string.c:2000
#define rb_str_dup_frozen
static VALUE sym_aref(int argc, VALUE *argv, VALUE sym)
Definition: string.c:8636
#define NULL
Definition: _sdbm.c:102
#define OBJ_TAINTED_RAW(x)
Definition: ruby.h:1181
#define FIX2LONG(x)
Definition: ruby.h:345
VALUE rb_invcmp(VALUE x, VALUE y)
Definition: compar.c:42
VALUE rb_str_resurrect(VALUE str)
Definition: string.c:1068
static VALUE rb_str_aref(VALUE str, VALUE indx)
Definition: string.c:3508
VALUE rb_check_string_type(VALUE str)
Definition: string.c:1678
VALUE rb_usascii_str_new_cstr(const char *ptr)
Definition: string.c:569
VALUE rb_id_quote_unprintable(ID id)
Definition: string.c:8424
VALUE rb_reg_regcomp(VALUE)
Definition: re.c:2597
static int match(VALUE str, VALUE pat, VALUE hash, int(*cb)(VALUE, VALUE))
Definition: date_parse.c:273
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1479
int st_foreach(st_table *, int(*)(ANYARGS), st_data_t)
Definition: st.c:1034
static VALUE rb_str_delete_bang(int, VALUE *, VALUE)
Definition: string.c:5805
#define STR_NOCAPA
Definition: internal.h:729
void rb_warn(const char *fmt,...)
Definition: error.c:223
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
Definition: string.c:7969
VALUE rb_eArgError
Definition: error.c:549
static ID cmp
Definition: compar.c:16
#define rb_enc_prev_char(s, p, e, enc)
Definition: encoding.h:168
static VALUE rb_str_force_encoding(VALUE str, VALUE enc)
Definition: string.c:7892
#define T_REGEXP
Definition: ruby.h:483
#define STR_HEAP_SIZE(str)
Definition: string.c:122
#define IS_EVSTR(p, e)
Definition: string.c:4889
VALUE rb_str_dump(VALUE str)
Definition: string.c:4902
#define NUM2LONG(x)
Definition: ruby.h:600
#define STR_NOEMBED
Definition: internal.h:724
static VALUE rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2912
#define TR_TABLE_SIZE
Definition: string.c:5709
#define RB_OBJ_WRITE(a, slot, b)
Definition: ruby.h:1221
VALUE rb_reg_nth_match(int, VALUE)
Definition: re.c:1515
#define rb_enc_code_to_mbclen(c, enc)
Definition: encoding.h:162
static VALUE rb_str_each_codepoint(VALUE str)
Definition: string.c:6849
void rb_str_modify(VALUE str)
Definition: string.c:1483
#define STR_EMBED_P(str)
Definition: internal.h:731
char ** argv
Definition: ruby.c:132
ID rb_to_id(VALUE name)
Definition: string.c:8734
#define DBL2NUM(dbl)
Definition: ruby.h:815
#define StringValue(v)
Definition: ruby.h:539
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
Definition: string.c:548
static enum neighbor_char enc_succ_char(char *p, long len, rb_encoding *enc)
Definition: string.c:3060
VALUE rb_external_str_new_cstr(const char *ptr)
Definition: string.c:713
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:590
#define rb_str_new5
Definition: intern.h:843
VALUE rb_obj_class(VALUE)
Definition: object.c:226
VALUE rb_str_dup(VALUE str)
Definition: string.c:1062
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Definition: string.c:874