static int quoted_term; #define quoted_term_char ((unsigned char)quoted_term) #define WHEN_QUOTED_TERM(x) ((quoted_term >= 0) && (x)) #define QUOTED_TERM_P(c) WHEN_QUOTED_TERM((c) == quoted_term_char) #define STR_FUNC_ESCAPE 0x01 #define STR_FUNC_EXPAND 0x02 #define STR_FUNC_REGEXP 0x04 #define STR_FUNC_QWORDS 0x08 #define STR_FUNC_INDENT 0x20 enum string_type { str_squote = (0), str_dquote = (STR_FUNC_EXPAND), str_xquote = (STR_FUNC_ESCAPE|STR_FUNC_EXPAND), str_regexp = (STR_FUNC_REGEXP|STR_FUNC_ESCAPE|STR_FUNC_EXPAND), str_sword = (STR_FUNC_QWORDS), str_dword = (STR_FUNC_QWORDS|STR_FUNC_EXPAND), }; #define NEW_STRTERM(func, term, paren) \ rb_node_newnode(NODE_STRTERM, (func), (term), (paren)) static int parse_string(quote) NODE *quote; { int func = quote->nd_func; int term = quote->nd_term; int paren = quote->nd_paren; int c, space = 0; if (func == -1) return tSTRING_END; c = nextc(); if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { do {c = nextc();} while (ISSPACE(c)); space = 1; } if (c == term) { if (!lex_strnest) { eos: if (func & STR_FUNC_QWORDS) { quote->nd_func = -1; return ' '; } if (!(func & STR_FUNC_REGEXP)) return tSTRING_END; yylval.num = regx_options(); return tREGEXP_END; } } if (c == '\\' && WHEN_QUOTED_TERM(peek(quoted_term_char))) { if ((c = nextc()) == term) goto eos; } if (space) { pushback(c); return ' '; } newtok(); if ((func & STR_FUNC_EXPAND) && c == '#') { switch (c = nextc()) { case '$': case '@': pushback(c); return tSTRING_DVAR; case '{': return tSTRING_DBEG; } tokadd('#'); } pushback(c); if (tokadd_string(func, term, paren) == -1) { ruby_sourceline = nd_line(quote); rb_compile_error("unterminated string meets end of file"); return tSTRING_END; } tokfix(); yylval.node = NEW_STR(rb_str_new(tok(), toklen())); return tSTRING_CONTENT; } static int read_escape() { int c; switch (c = nextc()) { case '\\': /* Backslash */ return c; case 'n': /* newline */ return '\n'; case 't': /* horizontal tab */ return '\t'; case 'r': /* carriage-return */ return '\r'; case 'f': /* form-feed */ return '\f'; case 'v': /* vertical tab */ return '\13'; case 'a': /* alarm(bell) */ return '\007'; case 'e': /* escape */ return 033; case '0': case '1': case '2': case '3': /* octal constant */ case '4': case '5': case '6': case '7': { int numlen; pushback(c); c = scan_oct(lex_p, 3, &numlen); lex_p += numlen; } return c; case 'x': /* hex constant */ { int numlen; c = scan_hex(lex_p, 2, &numlen); if (numlen == 0) { yyerror("Invalid escape character syntax"); return 0; } lex_p += numlen; } return c; case 'b': /* backspace */ return '\010'; case 's': /* space */ return ' '; case 'M': if ((c = nextc()) != '-') { yyerror("Invalid escape character syntax"); pushback(c); return '\0'; } if ((c = nextc()) == '\\') { return read_escape() | 0x80; } else if (c == -1) goto eof; else { return ((c & 0xff) | 0x80); } case 'C': if ((c = nextc()) != '-') { yyerror("Invalid escape character syntax"); pushback(c); return '\0'; } case 'c': if ((c = nextc())== '\\') { c = read_escape(); } else if (c == '?') return 0177; else if (c == -1) goto eof; return c & 0x9f; eof: case -1: yyerror("Invalid escape character syntax"); return '\0'; default: return c; } } static int tokadd_escape(term) int term; { int c; switch (c = nextc()) { case '\n': return 0; /* just ignore */ case '0': case '1': case '2': case '3': /* octal constant */ case '4': case '5': case '6': case '7': { int i; tokadd('\\'); tokadd(c); for (i=0; i<2; i++) { c = nextc(); if (c == -1) goto eof; if (c < '0' || '7' < c) { pushback(c); break; } tokadd(c); } } return 0; case 'x': /* hex constant */ { int numlen; tokadd('\\'); tokadd(c); scan_hex(lex_p, 2, &numlen); if (numlen == 0) { yyerror("Invalid escape character syntax"); return -1; } while (numlen--) tokadd(nextc()); } return 0; case 'M': if ((c = nextc()) != '-') { yyerror("Invalid escape character syntax"); pushback(c); return 0; } tokadd('\\'); tokadd('M'); tokadd('-'); goto escaped; case 'C': if ((c = nextc()) != '-') { yyerror("Invalid escape character syntax"); pushback(c); return 0; } tokadd('\\'); tokadd('C'); tokadd('-'); goto escaped; case 'c': tokadd('\\'); tokadd('c'); escaped: if ((c = nextc()) == '\\') { return tokadd_escape(term); } else if (c == -1) goto eof; tokadd(c); return 0; eof: case -1: yyerror("Invalid escape character syntax"); return -1; default: if (c != '\\' || c != term) tokadd('\\'); tokadd(c); } return 0; } static int regx_options() { char kcode = 0; int options = 0; int c; newtok(); while (c = nextc(), ISALPHA(c)) { switch (c) { case 'i': options |= RE_OPTION_IGNORECASE; break; case 'x': options |= RE_OPTION_EXTENDED; break; case 'm': options |= RE_OPTION_MULTILINE; break; case 'o': options |= RE_OPTION_ONCE; break; case 'n': kcode = 16; break; case 'e': kcode = 32; break; case 's': kcode = 48; break; case 'u': kcode = 64; break; default: tokadd(c); break; } } pushback(c); if (toklen()) { tokfix(); rb_compile_error("unknown regexp option%s - %s", toklen() > 1 ? "s" : "", tok()); } return options | kcode; } static int tokadd_string(func, term, paren) int func, term, paren; { int c; while ((c = nextc()) != -1) { if (paren && c == paren) { lex_strnest++; } else if (c == term) { if (!lex_strnest) { pushback(c); break; } --lex_strnest; } else if ((func & STR_FUNC_EXPAND) && c == '#' && lex_p < lex_pend) { int c2 = *lex_p; if (c2 == '$' || c2 == '@' || c2 == '{') { pushback(c); break; } } else if (c == '\\') { c = nextc(); if (QUOTED_TERM_P(c)) { pushback(c); return c; } switch (c) { case '\n': continue; case '\\': if (func & STR_FUNC_ESCAPE) tokadd(c); break; default: if (func & STR_FUNC_REGEXP) { pushback(c); if (tokadd_escape(term) < 0) return -1; continue; } else if (func & STR_FUNC_EXPAND) { pushback(c); if (func & STR_FUNC_ESCAPE) tokadd('\\'); c = read_escape(); } else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { /* ignore backslashed spaces in %w */ } else if (c != term && !(paren && c == paren)) { tokadd('\\'); } } } else if (ismbchar(c)) { int i, len = mbclen(c)-1; for (i = 0; i < len; i++) { tokadd(c); c = nextc(); } } else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { pushback(c); break; } tokadd(c); } return c; }