gemtext-parser/gemtext-parser.c
2023-11-04 00:32:53 -04:00

676 lines
18 KiB
C

#include <assert.h> // assert
#include <errno.h> // errno
#include <stddef.h> // NULL, size_t
#include <stdio.h> // fclose
#include <stdlib.h> // calloc, free
#include <string.h> // memcpy
#include "gemtext-parser.h"
int lineBufferInit(lineBuffer *lb) {
char *buf = calloc(1, LBUF_SIZE);
if (buf == NULL) return 2;
lb->len = 0;
lb->capacity = LBUF_SIZE;
lb->buf = buf;
lb->cursor = buf;
return 0;
}
int gemtextParserInit(gemtextParser *parser, FILE *stream) {
int ret = 0;
parser->stream = stream;
parser->nodeType = unset;
parser->state = lineStart;
parser->linkUrl = NULL;
ret = lineBufferInit(&parser->buffer);
return ret;
}
gemtextParser* gemtextParserNew(FILE *stream) {
gemtextParser *parser = calloc(1, sizeof(gemtextParser));
if (parser == NULL)
return NULL;
if (gemtextParserInit(parser, stream) != 0) {
free(parser);
return NULL;
}
return parser;
}
void gemtextParserDeinit(gemtextParser *parser) {
fclose(parser->stream);
free(parser->buffer.buf);
if (parser->nodeType == linkNode && parser->linkUrl != NULL) {
free(parser->linkUrl);
} else if (parser->nodeType == preformattedNode && parser->altText != NULL) {
free(parser->altText);
}
}
void gemtextParserDestroy(gemtextParser *parser) {
gemtextParserDeinit(parser);
free(parser);
}
int gemtextNodeQueueInit(gemtextNodeQueue *nq) {
int ret;
nq->head = NULL;
nq->tail = NULL;
ret = pthread_mutex_init(&nq->mutex, NULL);
if (ret != 0)
return ret;
return pthread_cond_init(&nq->cond, NULL);
}
void gemtextNodeQueuePush(gemtextNodeQueue *nq, gemtextNode *node) {
pthread_mutex_lock(&nq->mutex);
if (nq->tail == NULL) {
nq->tail = nq->head = node;
} else {
node->next = nq->tail;
nq->tail->prev = node;
nq->tail = node;
}
nq->count++;
pthread_mutex_unlock(&nq->mutex);
}
gemtextNode* gemtextNodeQueuePop(gemtextNodeQueue *nq) {
gemtextNode *node;
while (nq->count == 0)
pthread_cond_wait(&nq->cond, &nq->mutex);
pthread_mutex_lock(&nq->mutex);
nq->count++;
node = nq->head;
if (node->nodeType == endOfStream)
return node;
if (nq->tail == nq->head) {
nq->tail = nq->head = NULL;
} else {
nq->head = nq->head->prev;
}
pthread_mutex_unlock(&nq->mutex);
node->prev = node->next = NULL;
return node;
}
gemtextNode* gemtextNodeQueueTryPop(gemtextNodeQueue *nq) {
gemtextNode *node;
if (nq->count == 0)
return NULL;
pthread_mutex_lock(&nq->mutex);
nq->count++;
node = nq->head;
if (node->nodeType == endOfStream)
return node;
if (nq->tail == nq->head) {
nq->tail = nq->head = NULL;
} else {
nq->head = nq->head->prev;
}
pthread_mutex_unlock(&nq->mutex);
node->prev = node->next = NULL;
return node;
}
void gemtextNodeDeinit(gemtextNode *node) {
switch (node->nodeType) {
case linkNode:
if (node->link->display != NULL) {
free(node->link->display);
}
free(node->link->url);
free(node->link);
break;
case preformattedNode:
if (node->block->altText != NULL) {
free(node->block->altText);
}
free(node->block->body);
free(node->block);
break;
case endOfStream:
break;
default:
free(node->str);
break;
}
free(node);
}
int lineBufferExtend(lineBuffer *lb, size_t len) {
char *buf = calloc(1, lb->capacity + len);
if (buf == NULL) return 2;
memcpy(buf, lb->buf, lb->len);
free(lb->buf);
lb->buf = buf;
lb->cursor = buf + lb->len;
return 0;
}
int lineBufferAppendChar(lineBuffer *lb, char c) {
int ret = 0;
if (lb->len >= lb->capacity - 1) {
ret = lineBufferExtend(lb, LBUF_SIZE);
if (ret != 0) return ret;
}
*lb->cursor = c;
lb->cursor++;
lb->len++;
return ret;
}
void lineBufferAppendCharUnchecked(lineBuffer *lb, char c) {
*lb->cursor = c;
lb->cursor++;
lb->len++;
}
int lineBufferAppendString(lineBuffer *lb, char *c, size_t len) {
int ret = 0, i = 0;
size_t rem = 0;
// Find the remaining length
rem = lb->capacity - lb->len;
// if the length won't fit our string, extend the buffer.
// We do len - rem + LBUF_SIZE for a safety margin
if (rem < len) {
ret = lineBufferExtend(lb, len - rem + LBUF_SIZE);
if (ret != 0) return ret;
}
for (i = 0; i < len; i++) {
// We use 'unchecked' because we did our checks above
lineBufferAppendCharUnchecked(lb, *c);
c++;
}
return ret;
}
void lineBufferRewind(lineBuffer *lb) {
lb->len--;
lb->cursor--;
}
void lineBufferReset(lineBuffer *lb) {
lb->len = 0;
lb->cursor = lb->buf;
}
int gemtextParserSendPreformatted(gemtextParser *parser, gemtextNodeQueue *nq) {
preformattedBlock *block;
gemtextNode *node;
char *buf;
node = calloc(1, sizeof(gemtextNode));
if (node == NULL) return errno;
node->nodeType = preformattedNode;
block = calloc(1, sizeof(preformattedBlock));
if (block == NULL) return errno;
// back up our cursor four spaces and insert a lf char
parser->buffer.cursor -= 4;
parser->buffer.len -= 4;
lineBufferAppendCharUnchecked(&parser->buffer, '\n');
buf = strndup(parser->buffer.buf, parser->buffer.len);
if (buf == NULL) return errno;
block->altText = parser->altText;
parser->altText = NULL;
block->body = buf;
node->block = block;
gemtextNodeQueuePush(nq, node);
lineBufferReset(&parser->buffer);
parser->state = lineStart;
parser->nodeType = unset;
return 0;
}
int gemtextParserSendLink(gemtextParser *parser, gemtextNodeQueue *nq) {
gemtextLink *link;
gemtextNode *node;
char *url = NULL, *display = NULL;
link = calloc(1, sizeof(gemtextLink));
if (link == NULL) return errno;
link->display = link->url = NULL;
node = calloc(1, sizeof(gemtextNode));
if (node == NULL) {
free(link);
return errno;
}
if (parser->linkUrl == NULL) {
url = strndup(parser->buffer.buf, parser->buffer.len - 1);
} else {
url = parser->linkUrl;
display = strndup(parser->buffer.buf, parser->buffer.len);
if (display == NULL) {
free(link);
free(node);
return errno;
}
}
link->url = url;
link->display = display;
node->nodeType = linkNode;
node->link = link;
gemtextNodeQueuePush(nq, node);
lineBufferReset(&parser->buffer);
parser->state = lineStart;
parser->nodeType = unset;
parser->linkUrl = NULL;
return 0;
}
int gemtextParserSend(gemtextParser *parser, gemtextNodeQueue *nq) {
gemtextNode *node;
char *buf;
node = calloc(1, sizeof(gemtextNode));
if (node == NULL) return errno;
node->nodeType = parser->nodeType;
buf = strndup(parser->buffer.buf, parser->buffer.len);
if (buf == NULL) return errno;
node->str = buf;
gemtextNodeQueuePush(nq, node);
lineBufferReset(&parser->buffer);
parser->state = lineStart;
parser->nodeType = unset;
return 0;
}
void logParseError(int err) {
//todo
}
void switchMode(gemtextParser *parser, gemtextNodeType node_type, char c) {
lineBufferReset(&parser->buffer);
switch (c) {
case ' ':
case '\t':
parser->state = trimStart;
break;
default:
lineBufferReset(&parser->buffer);
lineBufferAppendCharUnchecked(&parser->buffer, c);
parser->state = normalState;
}
parser->nodeType = node_type;
}
void enterPreformattedMode(gemtextParser *parser) {
parser->nodeType = preformattedNode;
parser->state = trimStart;
lineBufferReset(&parser->buffer);
}
int parseLink(gemtextParser *parser, gemtextNodeQueue *lq, char c) {
int ret = 0;
char *buf = NULL;
assert(parser->nodeType == linkNode);
switch (parser->state) {
case lineStart:
if (c != ' ' && c != '\t') {
lineBufferReset(&parser->buffer);
lineBufferAppendCharUnchecked(&parser->buffer, c);
parser->state = normalState;
} else if (c == '\n') {
ret = gemtextParserSend(parser, lq);
}
break;
case normalState:
if (c == ' ' || c == '\t') {
buf = strndup(parser->buffer.buf, parser->buffer.len - 1);
if (buf == NULL) return errno;
parser->linkUrl = buf;
parser->state = linkDisplayStart;
lineBufferReset(&parser->buffer);
} else if (c == '\n') {
buf = strndup(parser->buffer.buf, parser->buffer.len);
if (buf == NULL) return errno;
ret = gemtextParserSendLink(parser, lq);
}
break;
case linkDisplayStart:
if (c == ' ' || c == '\t') {
lineBufferRewind(&parser->buffer);
} else if (c == '\n') {
ret = gemtextParserSendLink(parser, lq);
} else {
parser->state = linkDisplay;
}
break;
case linkDisplay:
if (c == '\n') {
ret = gemtextParserSendLink(parser, lq);
}
break;
default:
ret = 1;
break;
}
return ret;
}
int parsePreformatted(gemtextParser *parser, gemtextNodeQueue *lq, char c) {
char *buf = NULL;
assert(parser->nodeType == preformattedNode);
switch (parser->state) {
case trimStart:
if (c == '\n') {
lineBufferReset(&parser->buffer);
parser->state = lineStart;
parser->altText = NULL;
} else if (c == ' ' || c == '\t') {
lineBufferRewind(&parser->buffer);
} else {
parser->state = preformattedAlt;
}
break;
case preformattedAlt:
if (c == '\n') {
parser->state = lineStart;
if (parser->buffer.len > 0) {
buf = strndup(parser->buffer.buf, parser->buffer.len - 1);
if (buf == NULL) return errno;
parser->altText = buf;
lineBufferReset(&parser->buffer);
}
}
break;
case normalState:
if (c == '\n') {
parser->state = lineStart;
}
break;
case lineStart:
if (c == '\n') {
parser->state = lineStart;
} else if (c == '`') {
parser->state = firstBacktickChar;
} else {
parser->state = normalState;
}
break;
case firstBacktickChar:
if (c == '`') {
parser->state = secondBacktickChar;
} else {
parser->state = normalState;
}
break;
case secondBacktickChar:
if (c == '`') {
parser->state = thirdBacktickChar;
} else {
parser->state = normalState;
}
break;
case thirdBacktickChar:
if (c == '\n') {
gemtextParserSendPreformatted(parser, lq);
} else {
// We discard anything past the third backtick
parser->buffer.cursor--;
parser->buffer.len--;
}
break;
default:
return 1;
}
return 0;
}
int parseQuote(gemtextParser *parser, gemtextNodeQueue *lq, char c) {
int ret = 0;
switch (parser->state) {
case lineStart:
if (c == '>') {
parser->state = trimStart;
lineBufferRewind(&parser->buffer);
} else {
lineBufferRewind(&parser->buffer);
ret = gemtextParserSend(parser, lq);
if (ret) return ret;
ret = fseek(parser->stream, -1, SEEK_CUR);
if (ret) return ret;
parser->state = lineStart;
parser->nodeType = normalNode;
}
break;
case normalState:
if (c == '\n') {
parser->state = lineStart;
}
break;
case trimStart:
if (c == ' ' || c == '\t') {
// rewind and trim the whitespace
parser->buffer.len--;
parser->buffer.cursor--;
} else if (c == '\n') {
if (parser->buffer.len == 1) {
parser->nodeType = normalNode;
}
ret = gemtextParserSend(parser, lq);
} else {
parser->state = normalState;
}
break;
default:
// Should be unreachable
ret = 1;
break;
}
return ret;
}
int parseGeneric(gemtextParser *parser, gemtextNodeQueue *nq, char c) {
int ret = 0;
switch (parser->state) {
case lineStart:
case trimStart:
if (c == ' ' || c == '\t') {
// rewind the cursor to trim the line start
parser->buffer.len--;
parser->buffer.cursor--;
} else if (c == '\n') {
ret = gemtextParserSend(parser, nq);
} else {
parser->state = normalState;
}
break;
case normalState:
if (c == '\n') {
ret = gemtextParserSend(parser, nq);
}
break;
default:
// Should be unreachable
ret = 1;
break;
}
return ret;
}
int parseNormal(gemtextParser *parser, gemtextNodeQueue *nq, char c) {
int ret;
switch (parser->state) {
case lineStart:
switch (c) {
case '=':
parser->state = firstLinkChar;
break;
case '>':
parser->nodeType = quoteNode;
parser->state = trimStart;
lineBufferRewind(&parser->buffer);
break;
case '*':
parser->nodeType = listNode;
parser->state = trimStart;
lineBufferRewind(&parser->buffer);
break;
case '#':
parser->state = firstHashChar;
break;
case '`':
parser->state = firstBacktickChar;
break;
case '\n':
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
break;
default:
break;
}
break;
case firstLinkChar:
if (c == '>') {
parser->nodeType = linkNode;
parser->state = lineStart;
} else if (c == '\n') {
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
} else {
parser->state = normalState;
}
break;
case firstHashChar:
if (c == '#') {
parser->state = secondHashChar;
} else if (c == '\n') {
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
} else {
switchMode(parser, h1Node, c);
}
break;
case secondHashChar:
if (c == '#') {
parser->nodeType = h3Node;
parser->state = trimStart;
lineBufferReset(&parser->buffer);
} else if (c == '\n') {
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
} else {
switchMode(parser, h2Node, c);
}
break;
case thirdHashChar:
if (c == '\n') {
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
} else {
switchMode(parser, h3Node, c);
}
break;
case firstBacktickChar:
if (c == '\n') {
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
} else if (c == '`') {
parser->state = secondBacktickChar;
} else {
parser->state = normalState;
parser->nodeType = normalNode;
}
break;
case secondBacktickChar:
if (c == '`') {
enterPreformattedMode(parser);
} else if (c == '\n') {
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
} else {
parser->state = normalState;
parser->nodeType = normalNode;
}
break;
default:
break;
}
return 0;
}
int parseGemtext(gemtextParser *parser, gemtextNodeQueue *nq) {
char c;
int ret;
gemtextNode *node;
for (;;) {
ret = fread(&c, 1, 1, parser->stream);
if (ret == 1) {
ret = lineBufferAppendChar(&parser->buffer, c);
if (ret) {
logParseError(ret);
return ret;
}
} else {
// If we were unable to read a char, assume we're at the end of the
// stream and send the node to the queue
if (parser->state != lineStart && parser->state != trimStart) {
switch (parser->nodeType) {
case preformattedNode:
ret = gemtextParserSendPreformatted(parser, nq);
break;
case linkNode:
ret = gemtextParserSendLink(parser, nq);
break;
default:
ret = gemtextParserSend(parser, nq);
break;
}
if (ret) return ret;
}
// Send an `endOfStream` node since we know we're done
node = calloc(1, sizeof(gemtextNode));
if (node == NULL) return errno;
node->nodeType = endOfStream;
node->prev = node->next = NULL;
node->str = NULL;
gemtextNodeQueuePush(nq, node);
break;
}
switch (parser->nodeType) {
case unset:
case normalNode:
ret = parseNormal(parser, nq, c);
break;
case preformattedNode:
ret = parsePreformatted(parser, nq, c);
break;
case quoteNode:
ret = parseQuote(parser, nq, c);
break;
case linkNode:
ret = parseLink(parser, nq, c);
break;
default:
ret = parseGeneric(parser, nq, c);
break;
}
if (ret) {
logParseError(ret);
return ret;
}
}
return 0;
}