686 lines
19 KiB
C
686 lines
19 KiB
C
#include <assert.h> // assert
|
|
#include <errno.h> // errno
|
|
#include <stddef.h> // NULL, size_t
|
|
#include <stdio.h> // fclose
|
|
#include <stdlib.h> // calloc, free
|
|
#include <string.h> // memcpy
|
|
|
|
#include "gemtext-parser.h"
|
|
|
|
int lineBufferInit(lineBuffer *lb) {
|
|
char *buf = calloc(1, LBUF_SIZE);
|
|
if (buf == NULL) return 2;
|
|
lb->len = 0;
|
|
lb->capacity = LBUF_SIZE;
|
|
lb->buf = buf;
|
|
lb->cursor = buf;
|
|
return 0;
|
|
}
|
|
|
|
int gemtextParserInit(gemtextParser *parser, FILE *stream) {
|
|
int ret = 0;
|
|
|
|
parser->stream = stream;
|
|
parser->mode = normalMode;
|
|
parser->state = lineStart;
|
|
parser->linkUrl = NULL;
|
|
ret = lineBufferInit(&parser->buffer);
|
|
return ret;
|
|
}
|
|
|
|
gemtextParser* gemtextParserNew(FILE *stream) {
|
|
gemtextParser *parser = calloc(1, sizeof(gemtextParser));
|
|
if (parser == NULL)
|
|
return NULL;
|
|
if (gemtextParserInit(parser, stream) != 0) {
|
|
free(parser);
|
|
return NULL;
|
|
}
|
|
return parser;
|
|
}
|
|
|
|
void gemtextParserDeinit(gemtextParser *parser) {
|
|
fclose(parser->stream);
|
|
free(parser->buffer.buf);
|
|
if (parser->mode == linkMode && parser->linkUrl != NULL) {
|
|
free(parser->linkUrl);
|
|
} else if (parser->mode == preformattedMode && parser->altText != NULL) {
|
|
free(parser->altText);
|
|
}
|
|
}
|
|
|
|
void gemtextParserDestroy(gemtextParser *parser) {
|
|
gemtextParserDeinit(parser);
|
|
free(parser);
|
|
}
|
|
|
|
int gemtextNodeQueueInit(gemtextNodeQueue *queue) {
|
|
int ret;
|
|
|
|
queue->head = NULL;
|
|
queue->tail = NULL;
|
|
ret = pthread_mutex_init(&queue->mutex, NULL);
|
|
if (ret != 0)
|
|
return ret;
|
|
return pthread_cond_init(&queue->cond, NULL);
|
|
}
|
|
|
|
void gemtextNodeQueuePush(gemtextNodeQueue *queue, gemtextNode *node) {
|
|
pthread_mutex_lock(&queue->mutex);
|
|
if (queue->tail == NULL) {
|
|
queue->tail = queue->head = node;
|
|
} else {
|
|
node->next = queue->tail;
|
|
queue->tail->prev = node;
|
|
queue->tail = node;
|
|
}
|
|
queue->count++;
|
|
pthread_mutex_unlock(&queue->mutex);
|
|
}
|
|
|
|
gemtextNode* gemtextNodeQueuePop(gemtextNodeQueue *lq) {
|
|
gemtextNode *node;
|
|
|
|
while (lq->count == 0)
|
|
pthread_cond_wait(&lq->cond, &lq->mutex);
|
|
pthread_mutex_lock(&lq->mutex);
|
|
lq->count++;
|
|
node = lq->head;
|
|
if (node->nodeType == endOfStream)
|
|
return node;
|
|
if (lq->tail == lq->head) {
|
|
lq->tail = lq->head = NULL;
|
|
} else {
|
|
lq->head = lq->head->prev;
|
|
}
|
|
pthread_mutex_unlock(&lq->mutex);
|
|
node->prev = node->next = NULL;
|
|
return node;
|
|
}
|
|
|
|
gemtextNode* gemtextNodeQueueTryPop(gemtextNodeQueue *lq) {
|
|
gemtextNode *node;
|
|
|
|
if (lq->count == 0)
|
|
return NULL;
|
|
pthread_mutex_lock(&lq->mutex);
|
|
lq->count++;
|
|
node = lq->head;
|
|
if (node->nodeType == endOfStream)
|
|
return node;
|
|
if (lq->tail == lq->head) {
|
|
lq->tail = lq->head = NULL;
|
|
} else {
|
|
lq->head = lq->head->prev;
|
|
}
|
|
pthread_mutex_unlock(&lq->mutex);
|
|
node->prev = node->next = NULL;
|
|
return node;
|
|
}
|
|
|
|
void gemtextNodeDeinit(gemtextNode *node) {
|
|
switch (node->nodeType) {
|
|
case linkNode:
|
|
if (node->link->display != NULL) {
|
|
free(node->link->display);
|
|
}
|
|
free(node->link->url);
|
|
free(node->link);
|
|
break;
|
|
case preformattedNode:
|
|
if (node->block->altText != NULL) {
|
|
free(node->block->altText);
|
|
}
|
|
free(node->block->body);
|
|
free(node->block);
|
|
break;
|
|
case endOfStream:
|
|
break;
|
|
default:
|
|
free(node->str);
|
|
break;
|
|
}
|
|
free(node);
|
|
}
|
|
|
|
int lineBufferExtend(lineBuffer *lb, size_t len) {
|
|
char *buf = calloc(1, lb->capacity + len);
|
|
if (buf == NULL) return 2;
|
|
memcpy(buf, lb->buf, lb->len);
|
|
free(lb->buf);
|
|
lb->buf = buf;
|
|
lb->cursor = buf + lb->len;
|
|
return 0;
|
|
}
|
|
|
|
int lineBufferAppendChar(lineBuffer *lb, char c) {
|
|
int ret = 0;
|
|
|
|
if (lb->len >= lb->capacity - 1) {
|
|
ret = lineBufferExtend(lb, LBUF_SIZE);
|
|
if (ret != 0) return ret;
|
|
}
|
|
*lb->cursor = c;
|
|
lb->cursor++;
|
|
lb->len++;
|
|
return ret;
|
|
}
|
|
|
|
void lineBufferAppendCharUnchecked(lineBuffer *lb, char c) {
|
|
*lb->cursor = c;
|
|
lb->cursor++;
|
|
lb->len++;
|
|
}
|
|
|
|
int lineBufferAppendString(lineBuffer *lb, char *c, size_t len) {
|
|
int ret = 0, i = 0;
|
|
size_t rem = 0;
|
|
|
|
// Find the remaining length
|
|
rem = lb->capacity - lb->len;
|
|
// if the length won't fit our string, extend the buffer.
|
|
// We do len - rem + LBUF_SIZE for a safety margin
|
|
if (rem < len) {
|
|
ret = lineBufferExtend(lb, len - rem + LBUF_SIZE);
|
|
if (ret != 0) return ret;
|
|
}
|
|
|
|
for (i = 0; i < len; i++) {
|
|
// We use 'unchecked' because we did our checks above
|
|
lineBufferAppendCharUnchecked(lb, *c);
|
|
c++;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void lineBufferRewind(lineBuffer *lb) {
|
|
lb->len--;
|
|
lb->cursor--;
|
|
}
|
|
|
|
void lineBufferReset(lineBuffer *lb) {
|
|
lb->len = 0;
|
|
lb->cursor = lb->buf;
|
|
}
|
|
|
|
int gemtextParserSendPreformatted(gemtextParser *parser, gemtextNodeQueue *lq) {
|
|
preformattedBlock *block;
|
|
gemtextNode *node;
|
|
char *buf;
|
|
|
|
node = calloc(1, sizeof(gemtextNode));
|
|
if (node == NULL) return errno;
|
|
node->nodeType = preformattedNode;
|
|
block = calloc(1, sizeof(preformattedBlock));
|
|
if (block == NULL) return errno;
|
|
// back up our cursor four spaces and insert a lf char
|
|
parser->buffer.cursor -= 4;
|
|
parser->buffer.len -= 4;
|
|
lineBufferAppendCharUnchecked(&parser->buffer, '\n');
|
|
buf = strndup(parser->buffer.buf, parser->buffer.len);
|
|
if (buf == NULL) return errno;
|
|
block->altText = parser->altText;
|
|
parser->altText = NULL;
|
|
block->body = buf;
|
|
node->block = block;
|
|
gemtextNodeQueuePush(lq, node);
|
|
lineBufferReset(&parser->buffer);
|
|
parser->state = lineStart;
|
|
parser->mode = normalMode;
|
|
return 0;
|
|
}
|
|
|
|
int gemtextParserSendLink(gemtextParser *parser, gemtextNodeQueue *lq) {
|
|
gemtextLink *link;
|
|
gemtextNode *node;
|
|
char *url = NULL, *display = NULL;
|
|
|
|
link = calloc(1, sizeof(gemtextLink));
|
|
if (link == NULL) return errno;
|
|
link->display = link->url = NULL;
|
|
node = calloc(1, sizeof(gemtextNode));
|
|
if (node == NULL) {
|
|
free(link);
|
|
return errno;
|
|
}
|
|
if (parser->linkUrl == NULL) {
|
|
url = strndup(parser->buffer.buf, parser->buffer.len - 1);
|
|
} else {
|
|
url = parser->linkUrl;
|
|
display = strndup(parser->buffer.buf, parser->buffer.len);
|
|
if (display == NULL) {
|
|
free(link);
|
|
free(node);
|
|
return errno;
|
|
}
|
|
}
|
|
link->url = url;
|
|
link->display = display;
|
|
node->nodeType = linkNode;
|
|
node->link = link;
|
|
gemtextNodeQueuePush(lq, node);
|
|
lineBufferReset(&parser->buffer);
|
|
parser->state = lineStart;
|
|
parser->mode = normalMode;
|
|
parser->linkUrl = NULL;
|
|
return 0;
|
|
}
|
|
|
|
int gemtextParserSend(gemtextParser *parser, gemtextNodeType lt, gemtextNodeQueue *lq) {
|
|
gemtextNode *node;
|
|
char *buf;
|
|
|
|
node = calloc(1, sizeof(gemtextNode));
|
|
if (node == NULL) return errno;
|
|
node->nodeType = lt;
|
|
buf = strndup(parser->buffer.buf, parser->buffer.len);
|
|
if (buf == NULL) return errno;
|
|
node->str = buf;
|
|
gemtextNodeQueuePush(lq, node);
|
|
lineBufferReset(&parser->buffer);
|
|
parser->state = lineStart;
|
|
parser->mode = normalMode;
|
|
return 0;
|
|
}
|
|
|
|
void logParseError(int err) {
|
|
//todo
|
|
}
|
|
|
|
void switchMode(gemtextParser *parser, gemtextParserMode mode, char c) {
|
|
lineBufferReset(&parser->buffer);
|
|
switch (c) {
|
|
case ' ':
|
|
case '\t':
|
|
parser->state = trimStart;
|
|
break;
|
|
default:
|
|
lineBufferReset(&parser->buffer);
|
|
lineBufferAppendCharUnchecked(&parser->buffer, c);
|
|
parser->state = normalState;
|
|
}
|
|
parser->mode = mode;
|
|
}
|
|
|
|
void enterPreformattedMode(gemtextParser *parser) {
|
|
parser->mode = preformattedMode;
|
|
parser->state = trimStart;
|
|
lineBufferReset(&parser->buffer);
|
|
}
|
|
|
|
int parseLink(gemtextParser *parser, gemtextNodeQueue *lq, char c) {
|
|
int ret = 0;
|
|
char *buf = NULL;
|
|
|
|
assert(parser->mode == linkMode);
|
|
switch (parser->state) {
|
|
case lineStart:
|
|
if (c != ' ' && c != '\t') {
|
|
lineBufferReset(&parser->buffer);
|
|
lineBufferAppendCharUnchecked(&parser->buffer, c);
|
|
parser->state = normalState;
|
|
} else if (c == '\n') {
|
|
ret = gemtextParserSend(parser, normalNode, lq);
|
|
}
|
|
break;
|
|
case normalState:
|
|
if (c == ' ' || c == '\t') {
|
|
buf = strndup(parser->buffer.buf, parser->buffer.len - 1);
|
|
if (buf == NULL) return errno;
|
|
parser->linkUrl = buf;
|
|
parser->state = linkDisplayStart;
|
|
lineBufferReset(&parser->buffer);
|
|
} else if (c == '\n') {
|
|
buf = strndup(parser->buffer.buf, parser->buffer.len);
|
|
if (buf == NULL) return errno;
|
|
ret = gemtextParserSendLink(parser, lq);
|
|
}
|
|
break;
|
|
case linkDisplayStart:
|
|
if (c == ' ' || c == '\t') {
|
|
lineBufferRewind(&parser->buffer);
|
|
} else if (c == '\n') {
|
|
ret = gemtextParserSendLink(parser, lq);
|
|
} else {
|
|
parser->state = linkDisplay;
|
|
}
|
|
break;
|
|
case linkDisplay:
|
|
if (c == '\n') {
|
|
ret = gemtextParserSendLink(parser, lq);
|
|
}
|
|
break;
|
|
default:
|
|
ret = 1;
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int parsePreformatted(gemtextParser *parser, gemtextNodeQueue *lq, char c) {
|
|
char *buf = NULL;
|
|
|
|
assert(parser->mode == preformattedMode);
|
|
switch (parser->state) {
|
|
case trimStart:
|
|
if (c == '\n') {
|
|
lineBufferReset(&parser->buffer);
|
|
parser->state = lineStart;
|
|
parser->altText = NULL;
|
|
} else if (c == ' ' || c == '\t') {
|
|
lineBufferRewind(&parser->buffer);
|
|
} else {
|
|
parser->state = preformattedAlt;
|
|
}
|
|
break;
|
|
case preformattedAlt:
|
|
if (c == '\n') {
|
|
parser->state = lineStart;
|
|
if (parser->buffer.len > 0) {
|
|
buf = strndup(parser->buffer.buf, parser->buffer.len - 1);
|
|
if (buf == NULL) return errno;
|
|
parser->altText = buf;
|
|
lineBufferReset(&parser->buffer);
|
|
}
|
|
}
|
|
break;
|
|
case normalState:
|
|
if (c == '\n') {
|
|
parser->state = lineStart;
|
|
}
|
|
break;
|
|
case lineStart:
|
|
if (c == '\n') {
|
|
parser->state = lineStart;
|
|
} else if (c == '`') {
|
|
parser->state = firstBacktickChar;
|
|
} else {
|
|
parser->state = normalState;
|
|
}
|
|
break;
|
|
case firstBacktickChar:
|
|
if (c == '`') {
|
|
parser->state = secondBacktickChar;
|
|
} else {
|
|
parser->state = normalState;
|
|
}
|
|
break;
|
|
case secondBacktickChar:
|
|
if (c == '`') {
|
|
parser->state = thirdBacktickChar;
|
|
} else {
|
|
parser->state = normalState;
|
|
}
|
|
break;
|
|
case thirdBacktickChar:
|
|
if (c == '\n') {
|
|
gemtextParserSendPreformatted(parser, lq);
|
|
} else {
|
|
// We discard anything past the third backtick
|
|
parser->buffer.cursor--;
|
|
parser->buffer.len--;
|
|
}
|
|
break;
|
|
default:
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int parseQuote(gemtextParser *parser, gemtextNodeQueue *lq, char c) {
|
|
int ret = 0;
|
|
|
|
switch (parser->state) {
|
|
case lineStart:
|
|
if (c == '>') {
|
|
parser->state = trimStart;
|
|
lineBufferRewind(&parser->buffer);
|
|
} else {
|
|
lineBufferRewind(&parser->buffer);
|
|
ret = gemtextParserSend(parser, quoteNode, lq);
|
|
if (ret) return ret;
|
|
ret = fseek(parser->stream, -1, SEEK_CUR);
|
|
if (ret) return ret;
|
|
parser->state = lineStart;
|
|
parser->mode = normalMode;
|
|
}
|
|
break;
|
|
case normalState:
|
|
if (c == '\n') {
|
|
parser->state = lineStart;
|
|
}
|
|
break;
|
|
case trimStart:
|
|
if (c == ' ' || c == '\t') {
|
|
// rewind and trim the whitespace
|
|
parser->buffer.len--;
|
|
parser->buffer.cursor--;
|
|
} else if (c == '\n') {
|
|
ret = gemtextParserSend(parser, normalNode, lq);
|
|
} else {
|
|
parser->state = normalState;
|
|
}
|
|
break;
|
|
default:
|
|
// Should be unreachable
|
|
ret = 1;
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int parseGeneric(gemtextParser *parser, gemtextNodeQueue *lq, gemtextNodeType lt, char c) {
|
|
int ret = 0;
|
|
|
|
switch (parser->state) {
|
|
case lineStart:
|
|
case trimStart:
|
|
if (c == ' ' || c == '\t') {
|
|
// rewind the cursor to trim the line start
|
|
parser->buffer.len--;
|
|
parser->buffer.cursor--;
|
|
} else if (c == '\n') {
|
|
ret = gemtextParserSend(parser, lt, lq);
|
|
} else {
|
|
parser->state = normalState;
|
|
}
|
|
break;
|
|
case normalState:
|
|
if (c == '\n') {
|
|
ret = gemtextParserSend(parser, lt, lq);
|
|
}
|
|
break;
|
|
default:
|
|
// Should be unreachable
|
|
ret = 1;
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int parseNormal(gemtextParser *parser, gemtextNodeQueue *lq, char c) {
|
|
int ret;
|
|
|
|
switch (parser->state) {
|
|
case lineStart:
|
|
switch (c) {
|
|
case '=':
|
|
parser->state = firstLinkChar;
|
|
break;
|
|
case '>':
|
|
parser->mode = quoteMode;
|
|
parser->state = trimStart;
|
|
lineBufferRewind(&parser->buffer);
|
|
break;
|
|
case '*':
|
|
parser->mode = listMode;
|
|
parser->state = trimStart;
|
|
lineBufferRewind(&parser->buffer);
|
|
break;
|
|
case '#':
|
|
parser->state = firstHashChar;
|
|
break;
|
|
case '`':
|
|
parser->state = firstBacktickChar;
|
|
break;
|
|
case '\n':
|
|
ret = gemtextParserSend(parser, normalNode, lq);
|
|
if (ret) return ret;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
case firstLinkChar:
|
|
if (c == '>') {
|
|
parser->mode = linkMode;
|
|
parser->state = lineStart;
|
|
} else if (c == '\n') {
|
|
ret = gemtextParserSend(parser, normalNode, lq);
|
|
if (ret) return ret;
|
|
} else {
|
|
parser->state = normalState;
|
|
}
|
|
break;
|
|
case firstHashChar:
|
|
if (c == '#') {
|
|
parser->state = secondHashChar;
|
|
} else if (c == '\n') {
|
|
ret = gemtextParserSend(parser, normalNode, lq);
|
|
if (ret) return ret;
|
|
} else {
|
|
switchMode(parser, h1Mode, c);
|
|
}
|
|
break;
|
|
case secondHashChar:
|
|
if (c == '#') {
|
|
parser->mode = h3Mode;
|
|
parser->state = trimStart;
|
|
lineBufferReset(&parser->buffer);
|
|
} else if (c == '\n') {
|
|
ret = gemtextParserSend(parser, normalNode, lq);
|
|
if (ret) return ret;
|
|
} else {
|
|
switchMode(parser, h2Mode, c);
|
|
}
|
|
break;
|
|
case thirdHashChar:
|
|
if (c == '\n') {
|
|
ret = gemtextParserSend(parser, normalNode, lq);
|
|
if (ret) return ret;
|
|
} else {
|
|
switchMode(parser, h3Mode, c);
|
|
}
|
|
break;
|
|
case firstBacktickChar:
|
|
if (c == '\n') {
|
|
ret = gemtextParserSend(parser, normalNode, lq);
|
|
if (ret) return ret;
|
|
} else if (c == '`') {
|
|
parser->state = secondBacktickChar;
|
|
} else {
|
|
parser->state = normalState;
|
|
parser->mode = normalMode;
|
|
}
|
|
break;
|
|
case secondBacktickChar:
|
|
if (c == '`') {
|
|
enterPreformattedMode(parser);
|
|
} else if (c == '\n') {
|
|
ret = gemtextParserSend(parser, normalNode, lq);
|
|
if (ret) return ret;
|
|
} else {
|
|
parser->state = normalState;
|
|
parser->mode = normalMode;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int parseGemtext(gemtextParser *parser, gemtextNodeQueue *lq) {
|
|
char c;
|
|
int ret;
|
|
gemtextNode *node;
|
|
|
|
for (;;) {
|
|
ret = fread(&c, 1, 1, parser->stream);
|
|
if (ret == 1) {
|
|
ret = lineBufferAppendChar(&parser->buffer, c);
|
|
if (ret) {
|
|
logParseError(ret);
|
|
return ret;
|
|
}
|
|
} else {
|
|
if (parser->state != lineStart && parser->state != trimStart) {
|
|
switch (parser->mode) {
|
|
case normalMode:
|
|
ret = gemtextParserSend(parser, normalNode, lq);
|
|
break;
|
|
case preformattedMode:
|
|
ret = gemtextParserSendPreformatted(parser, lq);
|
|
break;
|
|
case quoteMode:
|
|
ret = gemtextParserSend(parser, quoteNode, lq);
|
|
break;
|
|
case linkMode:
|
|
ret = gemtextParserSendLink(parser, lq);
|
|
break;
|
|
case h1Mode:
|
|
ret = gemtextParserSend(parser, h1Node, lq);
|
|
break;
|
|
case h2Mode:
|
|
ret = gemtextParserSend(parser, h2Node, lq);
|
|
break;
|
|
case h3Mode:
|
|
ret = gemtextParserSend(parser, h3Node, lq);
|
|
break;
|
|
case listMode:
|
|
ret = gemtextParserSend(parser, listNode, lq);
|
|
break;
|
|
}
|
|
if (ret) return ret;
|
|
}
|
|
node = calloc(1, sizeof(gemtextNode));
|
|
if (node == NULL) return errno;
|
|
node->nodeType = endOfStream;
|
|
node->prev = node->next = NULL;
|
|
node->str = NULL;
|
|
gemtextNodeQueuePush(lq, node);
|
|
break;
|
|
}
|
|
switch (parser->mode) {
|
|
case normalMode:
|
|
ret = parseNormal(parser, lq, c);
|
|
break;
|
|
case preformattedMode:
|
|
ret = parsePreformatted(parser, lq, c);
|
|
break;
|
|
case quoteMode:
|
|
ret = parseQuote(parser, lq, c);
|
|
break;
|
|
case linkMode:
|
|
ret = parseLink(parser, lq, c);
|
|
break;
|
|
case h1Mode:
|
|
ret = parseGeneric(parser, lq, h1Node, c);
|
|
break;
|
|
case h2Mode:
|
|
ret = parseGeneric(parser, lq, h2Node, c);
|
|
break;
|
|
case h3Mode:
|
|
ret = parseGeneric(parser, lq, h3Node, c);
|
|
break;
|
|
case listMode:
|
|
ret = parseGeneric(parser, lq, listNode, c);
|
|
break;
|
|
}
|
|
if (ret) {
|
|
logParseError(ret);
|
|
return ret;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|