From c6605630b5b93e543f0974dcae850d6823dfb336 Mon Sep 17 00:00:00 2001 From: Nathan Fisher Date: Mon, 9 Oct 2023 22:48:31 -0400 Subject: [PATCH] Ready for testing --- gemtext-parser.c | 391 ++++++++++++++++++++++++++------------- include/gemtext-parser.h | 21 ++- 2 files changed, 279 insertions(+), 133 deletions(-) diff --git a/gemtext-parser.c b/gemtext-parser.c index 28dc1ad..266103a 100644 --- a/gemtext-parser.c +++ b/gemtext-parser.c @@ -1,3 +1,4 @@ +#include // assert #include // errno #include // NULL, size_t #include // fclose @@ -134,11 +135,79 @@ int lineBufferAppendString(lineBuffer *lb, char *c, size_t len) { return ret; } +void lineBufferRewind(lineBuffer *lb) { + lb->len--; + lb->cursor--; +} + void lineBufferReset(lineBuffer *lb) { lb->len = 0; lb->cursor = lb->buf; } +int gemtextParserSendPreformatted(gemtextParser *parser, gemtextLineQueue *lq) { + preformattedNode *node; + gemtextLine *line; + char *buf; + + line = calloc(1, sizeof(gemtextLine)); + if (line == NULL) return errno; + line->lineType = preformattedLine; + node = calloc(1, sizeof(preformattedNode)); + if (node == NULL) return errno; + // back up our cursor four spaces and insert a lf char + parser->buffer.cursor -= 4; + parser->buffer.len -= 4; + lineBufferAppendCharUnchecked(&parser->buffer, '\n'); + buf = strndup(parser->buffer.buf, parser->buffer.len); + if (buf == NULL) return errno; + node->altText = parser->altText; + parser->altText = NULL; + node->body = buf; + line->node = node; + gemtextLineQueuePush(lq, line); + lineBufferReset(&parser->buffer); + parser->state = lineStart; + parser->mode = normalMode; + return 0; +} + +int gemtextParserSendLink(gemtextParser *parser, gemtextLineQueue *lq) { + gemtextLink *link; + gemtextLine *line; + char *url = NULL, *display = NULL; + + link = calloc(1, sizeof(gemtextLink)); + if (link == NULL) return errno; + link->display = link->url = NULL; + line = calloc(1, sizeof(gemtextLine)); + if (line == NULL) { + free(link); + return errno; + } + if (parser->linkUrl == NULL) { + url = strndup(parser->buffer.buf, parser->buffer.len - 1); + } else { + url = parser->linkUrl; + display = strndup(parser->buffer.buf, parser->buffer.len); + if (display == NULL) { + free(link); + free(line); + return errno; + } + } + link->url = url; + link->display = display; + line->lineType = linkLine; + line->link = link; + gemtextLineQueuePush(lq, line); + lineBufferReset(&parser->buffer); + parser->state = lineStart; + parser->mode = normalMode; + parser->linkUrl = NULL; + return 0; +} + int gemtextParserSend(gemtextParser *parser, gemtextLineType lt, gemtextLineQueue *lq) { gemtextLine *line; char *buf; @@ -160,136 +229,199 @@ void logParseError(int err) { //todo } -gemtextLink* readLink(FILE *stream, lineBuffer *lb) { - char c; - char *buf; - int ret = 0; - gemtextLink *link = calloc(1, sizeof(gemtextLink)); +void switchMode(gemtextParser *parser, gemtextParserMode mode, char c) { + lineBufferReset(&parser->buffer); + switch (c) { + case ' ': + case '\t': + parser->state = trimStart; + break; + default: + lineBufferReset(&parser->buffer); + lineBufferAppendCharUnchecked(&parser->buffer, c); + parser->state = normalState; + } + parser->mode = mode; +} - if (link == NULL) return NULL; - while (1) { - fread(&c, 1, 1, stream); - switch (c) { - case ' ': - case '\t': - if (lb->len == 0) - continue; - buf = strndup(lb->buf, lb->len); - link->url = buf; - break; - case '\n': - if (lb->len == 0) { - free(link); - return NULL; - } - buf = strndup(lb->buf, lb->len); - link->url = buf; - return link; - default: - ret = lineBufferAppendChar(lb, c); - if (ret != 0) { - free(link); - return NULL; +void enterPreformattedMode(gemtextParser *parser) { + parser->mode = preformattedMode; + parser->state = preformattedAlt; + lineBufferReset(&parser->buffer); +} + +int parseLink(gemtextParser *parser, gemtextLineQueue *lq, char c) { + int ret = 0; + char *buf = NULL; + + assert(parser->mode == linkMode); + switch (parser->state) { + case lineStart: + if (c == ' ' || c == '\t') { + lineBufferRewind(&parser->buffer); + } else if (c == '\n') { + ret = gemtextParserSend(parser, normalLine, lq); + } else { + parser->state = normalState; + } + break; + case normalState: + if (c == ' ' || c == '\t') { + buf = strndup(parser->buffer.buf, parser->buffer.len - 1); + if (buf == NULL) return errno; + parser->linkUrl = buf; + parser->state = linkDisplayStart; + lineBufferReset(&parser->buffer); + } else if (c == '\n') { + buf = strndup(parser->buffer.buf, parser->buffer.len); + if (buf == NULL) return errno; + ret = gemtextParserSendLink(parser, lq); + } + break; + case linkDisplayStart: + if (c == ' ' || c == '\t') { + lineBufferRewind(&parser->buffer); + } else if (c == '\n') { + ret = gemtextParserSendLink(parser, lq); + } else { + parser->state = linkDisplay; + } + break; + case linkDisplay: + if (c == '\n') { + ret = gemtextParserSendLink(parser, lq); + } + break; + default: + ret = 1; + break; + } + return ret; +} + +int parsePreformatted(gemtextParser *parser, gemtextLineQueue *lq, char c) { + char *buf = NULL; + + assert(parser->mode == preformattedMode); + switch (parser->state) { + case preformattedAlt: + if (c == '\n') { + parser->state = normalState; + if (parser->buffer.len > 0) { + buf = strncpy(buf, parser->buffer.buf, parser->buffer.len); + if (buf == NULL) return errno; + parser->altText = buf; } } - } - lineBufferReset(lb); - while (1) { - fread(&c, 1, 1, stream); - switch (c) { - case '\n': - link->display = strndup(lb->buf, lb->len); - break; - default: - ret = lineBufferAppendChar(lb, c); - if (ret != 0) { - free(link->url); - free(link); - return NULL; - } + break; + case normalState: + if (c == '\n') { + parser->state = lineStart; + } + break; + case lineStart: + if (c == '\n') { + parser->state = firstBacktickChar; + } + break; + case firstBacktickChar: + if (c == '`') { + parser->state = secondBacktickChar; + } else { + parser->state = normalState; + } + break; + case secondBacktickChar: + if (c == '`') { + parser->state = thirdBacktickChar; + } else { + parser->state = normalState; + } + break; + case thirdBacktickChar: + if (c == '\n') { + gemtextParserSendPreformatted(parser, lq); + } else { + // We discard anything past the third backtick + parser->buffer.cursor--; + parser->buffer.len--; } - } - return link; -} - -enterH1Mode(gemtextParser *parser, char c) { - lineBufferReset(&parser->buffer); - switch (c) { - case ' ': - case '\t': break; default: - lineBufferReset(&parser->buffer); - lineBufferAppendCharUnchecked(&parser->buffer, c); + return 1; } - parser->mode = h1Mode; - parser->state = normalState; + return 0; } -enterH2Mode(gemtextParser *parser, char c) { - lineBufferReset(&parser->buffer); - switch (c) { - case ' ': - case '\t': +int parseQuote(gemtextParser *parser, gemtextLineQueue *lq, char c) { + int ret = 0; + + switch (parser->state) { + case lineStart: + if (c == '>') { + parser->state = trimStart; + } else { + parser->buffer.len--; + parser->buffer.cursor--; + ret = gemtextParserSend(parser, quoteLine, lq); + if (ret) return ret; + lineBufferAppendCharUnchecked(&parser->buffer, c); + parser->state = normalState; + } + break; + case normalState: + if (c == '\n') { + parser->state = lineStart; + } + break; + case trimStart: + if (c == ' ' || c == '\t') { + // rewind and trim the whitespace + parser->buffer.len--; + parser->buffer.cursor--; + } else if (c == '\n') { + ret = gemtextParserSend(parser, normalLine, lq); + } else { + parser->state = normalState; + } break; default: - lineBufferReset(&parser->buffer); - lineBufferAppendCharUnchecked(&parser->buffer, c); + // Should be unreachable + ret = 1; + break; } - parser->mode = h2Mode; - parser->state = normalState; + return ret; } -enterH3Mode(gemtextParser *parser, char c) { - lineBufferReset(&parser->buffer); - switch (c) { - case ' ': - case '\t': +int parseGeneric(gemtextParser *parser, gemtextLineQueue *lq, gemtextLineType lt, char c) { + int ret = 0; + + switch (parser->state) { + case lineStart: + if (c == ' ' || c == '\t') { + // rewind the cursor to trim the line start + parser->buffer.len--; + parser->buffer.cursor--; + } else if (c == '\n') { + ret = gemtextParserSend(parser, lt, lq); + } else { + parser->state = normalState; + } + break; + case normalState: + if (c == '\n') { + ret = gemtextParserSend(parser, h1Line, lq); + } break; default: - lineBufferReset(&parser->buffer); - lineBufferAppendCharUnchecked(&parser->buffer, c); + // Should be unreachable + ret = 1; + break; } - parser->mode = h3Mode; - parser->state = normalState; + return ret; } -int parseLink(gemtextParser *parser, char c, gemtextLineQueue *lq) { - // todo - return 0; -} - -int parsePreformatted(gemtextParser *parser, char c, gemtextLineQueue *lq) { - // todo - return 0; -} - -int parseQuote(gemtextParser *parser, char c, gemtextLineQueue *lq) { - // todo - return 0; -} - -int parseH1(gemtextParser *parser, char c, gemtextLineQueue *lq) { - // todo - return 0; -} - -int parseH2(gemtextParser *parser, char c, gemtextLineQueue *lq) { - // todo - return 0; -} - -int parseH3(gemtextParser *parser, char c, gemtextLineQueue *lq) { - // todo - return 0; -} - -int parseList(gemtextParser *parser, char c, gemtextLineQueue *lq) { - // todo - return 0; -} - -int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) { +int parseNormal(gemtextParser *parser, gemtextLineQueue *lq, char c) { int ret; switch (parser->state) { @@ -321,8 +453,7 @@ int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) { break; case firstLinkChar: if (c == '>') { - parser->state = lineStart; - parser->mode = linkMode; + switchMode(parser, linkMode, c); } else if (c == '\n') { ret = gemtextParserSend(parser, normalLine, lq); if (ret) return ret; @@ -337,7 +468,7 @@ int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) { ret = gemtextParserSend(parser, normalLine, lq); if (ret) return ret; } else { - enterH1Mode(parser, c); + switchMode(parser, h1Mode, c); } break; case secondHashChar: @@ -347,7 +478,7 @@ int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) { ret = gemtextParserSend(parser, normalLine, lq); if (ret) return ret; } else { - enterH2Mode(parser, c); + switchMode(parser, h2Mode, c); } break; case thirdHashChar: @@ -355,7 +486,7 @@ int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) { ret = gemtextParserSend(parser, normalLine, lq); if (ret) return ret; } else { - enterH3Mode(parser, c); + switchMode(parser, h3Mode, c); } break; case firstBacktickChar: @@ -370,14 +501,16 @@ int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) { } case secondBacktickChar: if (c == '`') { - parser->state = thirdBacktickChar; + enterPreformattedMode(parser); } else if (c == '\n') { ret = gemtextParserSend(parser, normalLine, lq); if (ret) return ret; + } else { + parser->state = normalState; + parser->mode = normalMode; } - case thirdBacktickChar: - case normalState: - break; + default: + break; } return 0; } @@ -406,28 +539,28 @@ int parseGemtext(gemtextParser *parser, gemtextLineQueue *lq) { } switch (parser->mode) { case normalMode: - ret = parseNormal(parser, c, lq); + ret = parseNormal(parser, lq, c); break; case preformattedMode: - ret = parsePreformatted(parser, c, lq); + ret = parsePreformatted(parser, lq, c); break; case quoteMode: - ret = parseQuote(parser, c, lq); + ret = parseQuote(parser, lq, c); break; case linkMode: - ret = parseLink(parser, c, lq); + ret = parseLink(parser, lq, c); break; case h1Mode: - ret = parseH1(parser, c, lq); + ret = parseGeneric(parser, lq, h1Line, c); break; case h2Mode: - ret = parseH2(parser, c, lq); + ret = parseGeneric(parser, lq, h2Line, c); break; case h3Mode: - ret = parseH3(parser, c, lq); + ret = parseGeneric(parser, lq, h3Line, c); break; case listMode: - ret = parseList(parser, c, lq); + ret = parseGeneric(parser, lq, listLine, c); break; } if (ret) { @@ -436,4 +569,4 @@ int parseGemtext(gemtextParser *parser, gemtextLineQueue *lq) { } } return 0; -} \ No newline at end of file +} diff --git a/include/gemtext-parser.h b/include/gemtext-parser.h index c9d87f9..ab5a4ad 100644 --- a/include/gemtext-parser.h +++ b/include/gemtext-parser.h @@ -22,12 +22,16 @@ typedef enum { lineStart, lineEnd, firstLinkChar, + linkDisplayStart, + linkDisplay, firstHashChar, secondHashChar, thirdHashChar, firstBacktickChar, secondBacktickChar, thirdBacktickChar, + preformattedAlt, + trimStart, normalState, } gemtextParserState; @@ -55,12 +59,20 @@ typedef struct { char *display; } gemtextLink; +typedef struct { + char *altText; + char *body; +} preformattedNode; + typedef struct { FILE *stream; gemtextParserMode mode; gemtextParserState state; lineBuffer buffer; - char *linkUrl; + union { + char *linkUrl; + char *altText; + }; } gemtextParser; struct _gemtextLine { @@ -68,8 +80,9 @@ struct _gemtextLine { struct _gemtextLine *prev; gemtextLineType lineType; union { - char *str; - gemtextLink *link; + char *str; + gemtextLink *link; + preformattedNode *node; }; }; @@ -83,4 +96,4 @@ typedef struct { gemtextLine *tail; } gemtextLineQueue; -#endif \ No newline at end of file +#endif