マーク付けに使う文字の変更 #2
構文規則を元にyacc/bison用ソースを書いてみた。
parser.y
%{ #define YYSTYPE char* #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <string.h> int yylex(void); void yyerror(char const *); static char *estrdupcat(const char *s1, const char *s2); static char *estrdup(const char *s); %} %token STRING_EXCEPT_SPECIAL START_MARK END_MARK START_ANNOTATION %% text : string_headed | marked_string_headed ; string_headed : string_except_parenthesis { printf("string [%s]\n", $1); free($1); } | string_except_parenthesis { printf("string [%s]\n", $1); free($1); } marked_string_headed ; marked_string_headed : marked_string | marked_string marked_string_headed | marked_string string_headed ; marked_string : START_MARK marked_string_body_with_annotation END_MARK ; marked_string_body_with_annotation : STRING_EXCEPT_SPECIAL { printf("marked-string [%s]\n", $1); free($1); } | STRING_EXCEPT_SPECIAL START_ANNOTATION string_except_parenthesis { printf("marked-string [%s]: annotation[%s]\n", $1, $3); free($1); free($2); free($3); } ; string_except_parenthesis : STRING_EXCEPT_SPECIAL { $$ = estrdupcat(NULL, $1); free($1); } | START_ANNOTATION { $$ = estrdupcat(NULL, $1); free($1); } | string_except_parenthesis STRING_EXCEPT_SPECIAL { $$ = estrdupcat($1, $2); free($1); free($2); } | string_except_parenthesis START_ANNOTATION { $$ = estrdupcat($1, $2); free($1); free($2); } ; %% static int start_mark_char = '('; static int end_mark_char = ')'; static int start_annotation_char = ':'; int main(int argc, char *argv[]) { int opt; while ((opt = getopt(argc, argv, "s:e:a:")) != -1) { switch (opt) { case 's': if (strlen(optarg) > 1) fprintf(stderr, "warning: start mark requires one character: %s\n", optarg); start_mark_char = optarg[0]; break; case 'e': if (strlen(optarg) > 1) fprintf(stderr, "warning: end mark requires one character: %s\n", optarg); end_mark_char = optarg[0]; break; case 'a': if (strlen(optarg) > 1) fprintf(stderr, "warning: start annotation requires one character: %s\n", optarg); start_annotation_char = optarg[0]; break; default: fprintf(stderr, "usage: %s [-s start_mark] [-e end_mark] [-a start_annotation]\n", argv[0]); exit(1); } } return yyparse(); } void yyerror(char const *s) { fprintf(stderr, "%s\n", s); } static char *estrdupcat(const char *s1, const char *s2) { size_t len = (s1 == NULL ? 0 : strlen(s1)) + strlen(s2) + 1; char *p = malloc(len); if (p == NULL) { fputs("error: estrdupcat cannot allocate any memories.\n", stderr); exit(1); } return s1 == NULL ? strcpy(p, s2) : strcat(strcpy(p, s1), s2); } int yylex(void) { #define YYTEXT_INIT_SIZE 8192 static size_t yytext_size = YYTEXT_INIT_SIZE; static char *yytext = NULL; char *p; int c; if (! yytext && ! (yytext = malloc(YYTEXT_INIT_SIZE))) { fputs("error: yylex cannot allocate any memories\n", stderr); exit(1); } p = yytext; c = getchar(); if (c == EOF) return c; if (c == start_mark_char) return START_MARK; if (c == end_mark_char) return END_MARK; if (c == start_annotation_char) { yytext[0] = start_annotation_char, yytext[1] = '\0'; yylval = estrdup(yytext); return START_ANNOTATION; } while (1) { *p++ = c; if (p == yytext + yytext_size) { size_t s = yytext_size * 2; char *q = realloc(yytext, s); if (q == NULL) { fputs("error: yylex cannot allocate any memories, because token is too long.\n", stderr); exit(1); } yytext = q, p = yytext + yytext_size, yytext_size = s; } c = getchar(); if (c == EOF || c == start_mark_char || c == end_mark_char || c == start_annotation_char) { ungetc(c, stdin); *p = '\0'; yylval = estrdup(yytext); return STRING_EXCEPT_SPECIAL; } } #undef YYTEXT_INIT_SIZE } static char *estrdup(const char *s) { char *p = malloc(strlen(s) + 1); if (p == NULL) { fputs("error: estrdup cannot allocate any memories.\n", stderr); exit(1); } return strcpy(p, s); }
このyylex関数が扱う字句の長さはメモリの許す限りの長さとなる。
コマンドラインオプションの解析にはgetopt関数を使っている。
オプションでメタ文字を指定しない場合は、今までのメタ文字がデフォルトで設定される。
$ echo -n "a (dog:bowwow) and a (cat:mewmew)" | ./parser string [a ] marked-string [dog]: annotation[bowwow] string [ and a ] marked-string [cat]: annotation[mewmew] $ echo -n "a {dog:bowwow} and a {cat:mewmew}" | ./parser string [a {dog:bowwow} and a {cat:mewmew}] $ echo -n "a {dog:bowwow} and a {cat:mewmew}" | ./parser -s"{" -e"}" string [a ] marked-string [dog]: annotation[bowwow] string [ and a ] marked-string [cat]: annotation[mewmew] $ echo -n "a {dog=bowwow} and a {cat=mewmew}" | ./parser -s"{" -e"}" -a"=" string [a ] marked-string [dog]: annotation[bowwow] string [ and a ] marked-string [cat]: annotation[mewmew]
変な文字指定をすれば、
$ echo -n "a (dog:bowwow) and a (cat:mewmew)" | ./parser -e":" -aa string [a ] marked-string [dog] string [bowwow) and a ] marked-string [c]: annotation[t] string [mewmew)]
のような変だが正しい結果も出てくる。