#include // assert #include // errno #include // NULL, size_t #include // fclose #include // calloc, free #include // memcpy #include "gemtext-parser.h" int lineBufferInit(lineBuffer *lb) { char *buf = calloc(1, LBUF_SIZE); if (buf == NULL) return 2; lb->len = 0; lb->capacity = LBUF_SIZE; lb->buf = buf; lb->cursor = buf; return 0; } int gemtextParserInit(gemtextParser *parser, FILE *stream) { int ret = 0; parser->stream = stream; parser->nodeType = unset; parser->state = lineStart; parser->linkUrl = NULL; ret = lineBufferInit(&parser->buffer); return ret; } gemtextParser* gemtextParserNew(FILE *stream) { gemtextParser *parser = calloc(1, sizeof(gemtextParser)); if (parser == NULL) return NULL; if (gemtextParserInit(parser, stream) != 0) { free(parser); return NULL; } return parser; } void gemtextParserDeinit(gemtextParser *parser) { fclose(parser->stream); free(parser->buffer.buf); if (parser->nodeType == linkNode && parser->linkUrl != NULL) { free(parser->linkUrl); } else if (parser->nodeType == preformattedNode && parser->altText != NULL) { free(parser->altText); } } void gemtextParserDestroy(gemtextParser *parser) { gemtextParserDeinit(parser); free(parser); } int gemtextNodeQueueInit(gemtextNodeQueue *nq) { int ret; nq->head = NULL; nq->tail = NULL; ret = pthread_mutex_init(&nq->mutex, NULL); if (ret != 0) return ret; return pthread_cond_init(&nq->cond, NULL); } void gemtextNodeQueuePush(gemtextNodeQueue *nq, gemtextNode *node) { pthread_mutex_lock(&nq->mutex); if (nq->tail == NULL) { nq->tail = nq->head = node; } else { node->next = nq->tail; nq->tail->prev = node; nq->tail = node; } nq->count++; pthread_mutex_unlock(&nq->mutex); } gemtextNode* gemtextNodeQueuePop(gemtextNodeQueue *nq) { gemtextNode *node; while (nq->count == 0) pthread_cond_wait(&nq->cond, &nq->mutex); pthread_mutex_lock(&nq->mutex); nq->count++; node = nq->head; if (node->nodeType == endOfStream) return node; if (nq->tail == nq->head) { nq->tail = nq->head = NULL; } else { nq->head = nq->head->prev; } pthread_mutex_unlock(&nq->mutex); node->prev = node->next = NULL; return node; } gemtextNode* gemtextNodeQueueTryPop(gemtextNodeQueue *nq) { gemtextNode *node; if (nq->count == 0) return NULL; pthread_mutex_lock(&nq->mutex); nq->count++; node = nq->head; if (node->nodeType == endOfStream) return node; if (nq->tail == nq->head) { nq->tail = nq->head = NULL; } else { nq->head = nq->head->prev; } pthread_mutex_unlock(&nq->mutex); node->prev = node->next = NULL; return node; } void gemtextNodeDeinit(gemtextNode *node) { switch (node->nodeType) { case linkNode: if (node->link->display != NULL) { free(node->link->display); } free(node->link->url); free(node->link); break; case preformattedNode: if (node->block->altText != NULL) { free(node->block->altText); } free(node->block->body); free(node->block); break; case endOfStream: break; default: free(node->str); break; } free(node); } int lineBufferExtend(lineBuffer *lb, size_t len) { char *buf = calloc(1, lb->capacity + len); if (buf == NULL) return 2; memcpy(buf, lb->buf, lb->len); free(lb->buf); lb->buf = buf; lb->cursor = buf + lb->len; return 0; } int lineBufferAppendChar(lineBuffer *lb, char c) { int ret = 0; if (lb->len >= lb->capacity - 1) { ret = lineBufferExtend(lb, LBUF_SIZE); if (ret != 0) return ret; } *lb->cursor = c; lb->cursor++; lb->len++; return ret; } void lineBufferAppendCharUnchecked(lineBuffer *lb, char c) { *lb->cursor = c; lb->cursor++; lb->len++; } int lineBufferAppendString(lineBuffer *lb, char *c, size_t len) { int ret = 0, i = 0; size_t rem = 0; // Find the remaining length rem = lb->capacity - lb->len; // if the length won't fit our string, extend the buffer. // We do len - rem + LBUF_SIZE for a safety margin if (rem < len) { ret = lineBufferExtend(lb, len - rem + LBUF_SIZE); if (ret != 0) return ret; } for (i = 0; i < len; i++) { // We use 'unchecked' because we did our checks above lineBufferAppendCharUnchecked(lb, *c); c++; } return ret; } void lineBufferRewind(lineBuffer *lb) { lb->len--; lb->cursor--; } void lineBufferReset(lineBuffer *lb) { lb->len = 0; lb->cursor = lb->buf; } int gemtextParserSendPreformatted(gemtextParser *parser, gemtextNodeQueue *nq) { preformattedBlock *block; gemtextNode *node; char *buf; node = calloc(1, sizeof(gemtextNode)); if (node == NULL) return errno; node->nodeType = preformattedNode; block = calloc(1, sizeof(preformattedBlock)); if (block == NULL) return errno; // back up our cursor four spaces and insert a lf char parser->buffer.cursor -= 4; parser->buffer.len -= 4; lineBufferAppendCharUnchecked(&parser->buffer, '\n'); buf = strndup(parser->buffer.buf, parser->buffer.len); if (buf == NULL) return errno; block->altText = parser->altText; parser->altText = NULL; block->body = buf; node->block = block; gemtextNodeQueuePush(nq, node); lineBufferReset(&parser->buffer); parser->state = lineStart; parser->nodeType = unset; return 0; } int gemtextParserSendLink(gemtextParser *parser, gemtextNodeQueue *nq) { gemtextLink *link; gemtextNode *node; char *url = NULL, *display = NULL; link = calloc(1, sizeof(gemtextLink)); if (link == NULL) return errno; link->display = link->url = NULL; node = calloc(1, sizeof(gemtextNode)); if (node == NULL) { free(link); return errno; } if (parser->linkUrl == NULL) { url = strndup(parser->buffer.buf, parser->buffer.len - 1); } else { url = parser->linkUrl; display = strndup(parser->buffer.buf, parser->buffer.len); if (display == NULL) { free(link); free(node); return errno; } } link->url = url; link->display = display; node->nodeType = linkNode; node->link = link; gemtextNodeQueuePush(nq, node); lineBufferReset(&parser->buffer); parser->state = lineStart; parser->nodeType = unset; parser->linkUrl = NULL; return 0; } int gemtextParserSend(gemtextParser *parser, gemtextNodeQueue *nq) { gemtextNode *node; char *buf; node = calloc(1, sizeof(gemtextNode)); if (node == NULL) return errno; node->nodeType = parser->nodeType; buf = strndup(parser->buffer.buf, parser->buffer.len); if (buf == NULL) return errno; node->str = buf; gemtextNodeQueuePush(nq, node); lineBufferReset(&parser->buffer); parser->state = lineStart; parser->nodeType = unset; return 0; } void logParseError(int err) { //todo } void switchMode(gemtextParser *parser, gemtextNodeType node_type, char c) { lineBufferReset(&parser->buffer); switch (c) { case ' ': case '\t': parser->state = trimStart; break; default: lineBufferReset(&parser->buffer); lineBufferAppendCharUnchecked(&parser->buffer, c); parser->state = normalState; } parser->nodeType = node_type; } void enterPreformattedMode(gemtextParser *parser) { parser->nodeType = preformattedNode; parser->state = trimStart; lineBufferReset(&parser->buffer); } int parseLink(gemtextParser *parser, gemtextNodeQueue *lq, char c) { int ret = 0; char *buf = NULL; assert(parser->nodeType == linkNode); switch (parser->state) { case lineStart: if (c != ' ' && c != '\t') { lineBufferReset(&parser->buffer); lineBufferAppendCharUnchecked(&parser->buffer, c); parser->state = normalState; } else if (c == '\n') { ret = gemtextParserSend(parser, lq); } break; case normalState: if (c == ' ' || c == '\t') { buf = strndup(parser->buffer.buf, parser->buffer.len - 1); if (buf == NULL) return errno; parser->linkUrl = buf; parser->state = linkDisplayStart; lineBufferReset(&parser->buffer); } else if (c == '\n') { buf = strndup(parser->buffer.buf, parser->buffer.len); if (buf == NULL) return errno; ret = gemtextParserSendLink(parser, lq); } break; case linkDisplayStart: if (c == ' ' || c == '\t') { lineBufferRewind(&parser->buffer); } else if (c == '\n') { ret = gemtextParserSendLink(parser, lq); } else { parser->state = linkDisplay; } break; case linkDisplay: if (c == '\n') { ret = gemtextParserSendLink(parser, lq); } break; default: ret = 1; break; } return ret; } int parsePreformatted(gemtextParser *parser, gemtextNodeQueue *lq, char c) { char *buf = NULL; assert(parser->nodeType == preformattedNode); switch (parser->state) { case trimStart: if (c == '\n') { lineBufferReset(&parser->buffer); parser->state = lineStart; parser->altText = NULL; } else if (c == ' ' || c == '\t') { lineBufferRewind(&parser->buffer); } else { parser->state = preformattedAlt; } break; case preformattedAlt: if (c == '\n') { parser->state = lineStart; if (parser->buffer.len > 0) { buf = strndup(parser->buffer.buf, parser->buffer.len - 1); if (buf == NULL) return errno; parser->altText = buf; lineBufferReset(&parser->buffer); } } break; case normalState: if (c == '\n') { parser->state = lineStart; } break; case lineStart: if (c == '\n') { parser->state = lineStart; } else if (c == '`') { parser->state = firstBacktickChar; } else { parser->state = normalState; } break; case firstBacktickChar: if (c == '`') { parser->state = secondBacktickChar; } else { parser->state = normalState; } break; case secondBacktickChar: if (c == '`') { parser->state = thirdBacktickChar; } else { parser->state = normalState; } break; case thirdBacktickChar: if (c == '\n') { gemtextParserSendPreformatted(parser, lq); } else { // We discard anything past the third backtick parser->buffer.cursor--; parser->buffer.len--; } break; default: return 1; } return 0; } int parseQuote(gemtextParser *parser, gemtextNodeQueue *lq, char c) { int ret = 0; switch (parser->state) { case lineStart: if (c == '>') { parser->state = trimStart; lineBufferRewind(&parser->buffer); } else { lineBufferRewind(&parser->buffer); ret = gemtextParserSend(parser, lq); if (ret) return ret; ret = fseek(parser->stream, -1, SEEK_CUR); if (ret) return ret; parser->state = lineStart; parser->nodeType = normalNode; } break; case normalState: if (c == '\n') { parser->state = lineStart; } break; case trimStart: if (c == ' ' || c == '\t') { // rewind and trim the whitespace parser->buffer.len--; parser->buffer.cursor--; } else if (c == '\n') { if (parser->buffer.len == 1) { parser->nodeType = normalNode; } ret = gemtextParserSend(parser, lq); } else { parser->state = normalState; } break; default: // Should be unreachable ret = 1; break; } return ret; } int parseGeneric(gemtextParser *parser, gemtextNodeQueue *nq, char c) { int ret = 0; switch (parser->state) { case lineStart: case trimStart: if (c == ' ' || c == '\t') { // rewind the cursor to trim the line start parser->buffer.len--; parser->buffer.cursor--; } else if (c == '\n') { ret = gemtextParserSend(parser, nq); } else { parser->state = normalState; } break; case normalState: if (c == '\n') { ret = gemtextParserSend(parser, nq); } break; default: // Should be unreachable ret = 1; break; } return ret; } int parseNormal(gemtextParser *parser, gemtextNodeQueue *nq, char c) { int ret; switch (parser->state) { case lineStart: switch (c) { case '=': parser->state = firstLinkChar; break; case '>': parser->nodeType = quoteNode; parser->state = trimStart; lineBufferRewind(&parser->buffer); break; case '*': parser->nodeType = listNode; parser->state = trimStart; lineBufferRewind(&parser->buffer); break; case '#': parser->state = firstHashChar; break; case '`': parser->state = firstBacktickChar; break; case '\n': parser->nodeType = normalNode; ret = gemtextParserSend(parser, nq); if (ret) return ret; break; default: break; } break; case firstLinkChar: if (c == '>') { parser->nodeType = linkNode; parser->state = lineStart; } else if (c == '\n') { parser->nodeType = normalNode; ret = gemtextParserSend(parser, nq); if (ret) return ret; } else { parser->state = normalState; } break; case firstHashChar: if (c == '#') { parser->state = secondHashChar; } else if (c == '\n') { parser->nodeType = normalNode; ret = gemtextParserSend(parser, nq); if (ret) return ret; } else { switchMode(parser, h1Node, c); } break; case secondHashChar: if (c == '#') { parser->nodeType = h3Node; parser->state = trimStart; lineBufferReset(&parser->buffer); } else if (c == '\n') { parser->nodeType = normalNode; ret = gemtextParserSend(parser, nq); if (ret) return ret; } else { switchMode(parser, h2Node, c); } break; case thirdHashChar: if (c == '\n') { parser->nodeType = normalNode; ret = gemtextParserSend(parser, nq); if (ret) return ret; } else { switchMode(parser, h3Node, c); } break; case firstBacktickChar: if (c == '\n') { parser->nodeType = normalNode; ret = gemtextParserSend(parser, nq); if (ret) return ret; } else if (c == '`') { parser->state = secondBacktickChar; } else { parser->state = normalState; parser->nodeType = normalNode; } break; case secondBacktickChar: if (c == '`') { enterPreformattedMode(parser); } else if (c == '\n') { parser->nodeType = normalNode; ret = gemtextParserSend(parser, nq); if (ret) return ret; } else { parser->state = normalState; parser->nodeType = normalNode; } break; default: break; } return 0; } int parseGemtext(gemtextParser *parser, gemtextNodeQueue *nq) { char c; int ret; gemtextNode *node; for (;;) { ret = fread(&c, 1, 1, parser->stream); if (ret == 1) { ret = lineBufferAppendChar(&parser->buffer, c); if (ret) { logParseError(ret); return ret; } } else { // If we were unable to read a char, assume we're at the end of the // stream and send the node to the queue if (parser->state != lineStart && parser->state != trimStart) { switch (parser->nodeType) { case preformattedNode: ret = gemtextParserSendPreformatted(parser, nq); break; case linkNode: ret = gemtextParserSendLink(parser, nq); break; default: ret = gemtextParserSend(parser, nq); break; } if (ret) return ret; } // Send an `endOfStream` node since we know we're done node = calloc(1, sizeof(gemtextNode)); if (node == NULL) return errno; node->nodeType = endOfStream; node->prev = node->next = NULL; node->str = NULL; gemtextNodeQueuePush(nq, node); break; } switch (parser->nodeType) { case unset: case normalNode: ret = parseNormal(parser, nq, c); break; case preformattedNode: ret = parsePreformatted(parser, nq, c); break; case quoteNode: ret = parseQuote(parser, nq, c); break; case linkNode: ret = parseLink(parser, nq, c); break; default: ret = parseGeneric(parser, nq, c); break; } if (ret) { logParseError(ret); return ret; } } return 0; }