Compare commits

...

10 commits

Author SHA1 Message Date
Nathan Fisher
6e89d6e263 Better docs and code commenting 2023-11-04 00:32:53 -04:00
Nathan Fisher
ae253cec83 Refactor, getting rid of parsing modes (just use node types instead) 2023-11-04 00:19:11 -04:00
Nathan Fisher
e69a1ceaa1 Rename several data structures 2023-11-03 18:37:35 -04:00
Nathan Fisher
bb0b5a1bcc Test for endOfStream line type in tests 2023-10-30 01:20:43 -04:00
Nathan Fisher
dc6a0150d9 Add second, more complicated gemtext test 2023-10-29 19:04:02 -04:00
Nathan Fisher
b4c0b24a4a Make test pass with preformatted block 2023-10-25 23:05:10 -04:00
Nathan Fisher
c20957b1be Commit test .gmi file 2023-10-22 18:51:23 -04:00
Nathan Fisher
56e4b87fcb First successful parsing tests 2023-10-14 01:35:11 -04:00
Nathan Fisher
0a1a4e8803 Added documentation via doxygen 2023-10-11 00:28:53 -04:00
Nathan Fisher
c6605630b5 Ready for testing 2023-10-09 22:48:31 -04:00
11 changed files with 3828 additions and 267 deletions

2
.gitignore vendored
View file

@ -2,7 +2,9 @@ test/output/
test/* test/*
!test/*.c !test/*.c
!test/Makefile !test/Makefile
!test/*.gmi
*.o *.o
*.a *.a
*.so *.so
*.core *.core
doc

2822
Doxyfile Normal file

File diff suppressed because it is too large Load diff

View file

@ -43,7 +43,7 @@ srcs += gemtext-parser.c
objs = $(srcs:.c=.o) objs = $(srcs:.c=.o)
libname = libgemtext-parser libname = libgemtext
staticlib = $(libname).a staticlib = $(libname).a
all: static all: static
@ -53,7 +53,17 @@ static: $(staticlib)
$(staticlib): $(objs) $(staticlib): $(objs)
$(AR) rcs $@ $? $(AR) rcs $@ $?
clean: docs: Doxyfile $(hdrs)
rm -rf $(objs) $(staticlib) doxygen
.PHONY: all clean test: static
$(MAKE) -C test
testclean:
$(MAKE) -C test clean
clean:
rm -rf $(objs) $(staticlib) doc
$(MAKE) -C test clean
.PHONY: all docs clean static test testclean

7
config.mk Normal file
View file

@ -0,0 +1,7 @@
PREFIX ?= /usr/local
bindir = $(DESTDIR)$(PREFIX)/bin
includedir = $(DESTDIR)$(PREFIX)/include
libdir = $(DESTDIR)$(PREFIX)/lib
sharedir = $(DESTDIR)$(PREFIX)/share
mandir = $(sharedir)/man
docdir = $(sharedir)/doc/gemini

View file

@ -1,3 +1,4 @@
#include <assert.h> // assert
#include <errno.h> // errno #include <errno.h> // errno
#include <stddef.h> // NULL, size_t #include <stddef.h> // NULL, size_t
#include <stdio.h> // fclose #include <stdio.h> // fclose
@ -16,72 +17,130 @@ int lineBufferInit(lineBuffer *lb) {
return 0; return 0;
} }
gemtextParser* gemtextParserInit(FILE *stream) { int gemtextParserInit(gemtextParser *parser, FILE *stream) {
int ret = 0;
parser->stream = stream;
parser->nodeType = unset;
parser->state = lineStart;
parser->linkUrl = NULL;
ret = lineBufferInit(&parser->buffer);
return ret;
}
gemtextParser* gemtextParserNew(FILE *stream) {
gemtextParser *parser = calloc(1, sizeof(gemtextParser)); gemtextParser *parser = calloc(1, sizeof(gemtextParser));
if (parser == NULL) if (parser == NULL)
return NULL; return NULL;
parser->stream = stream; if (gemtextParserInit(parser, stream) != 0) {
parser->mode = normalMode;
parser->state = lineStart;
if (lineBufferInit(&parser->buffer) != 0) {
free(parser); free(parser);
return NULL; return NULL;
} }
parser->linkUrl = NULL;
return parser; return parser;
} }
void gemtextParserDeinit(gemtextParser *parser) { void gemtextParserDeinit(gemtextParser *parser) {
fclose(parser->stream); fclose(parser->stream);
free(parser->buffer.buf); free(parser->buffer.buf);
if (parser->linkUrl != NULL) { if (parser->nodeType == linkNode && parser->linkUrl != NULL) {
free(parser->linkUrl); free(parser->linkUrl);
} else if (parser->nodeType == preformattedNode && parser->altText != NULL) {
free(parser->altText);
} }
}
void gemtextParserDestroy(gemtextParser *parser) {
gemtextParserDeinit(parser);
free(parser); free(parser);
} }
int gemtextLineQueueInit(gemtextLineQueue *queue) { int gemtextNodeQueueInit(gemtextNodeQueue *nq) {
int ret; int ret;
queue->head = NULL; nq->head = NULL;
queue->tail = NULL; nq->tail = NULL;
ret = pthread_mutex_init(&queue->mutex, NULL); ret = pthread_mutex_init(&nq->mutex, NULL);
if (ret != 0) if (ret != 0)
return ret; return ret;
return pthread_cond_init(&queue->cond, NULL); return pthread_cond_init(&nq->cond, NULL);
} }
void gemtextLineQueuePush(gemtextLineQueue *queue, gemtextLine *line) { void gemtextNodeQueuePush(gemtextNodeQueue *nq, gemtextNode *node) {
pthread_mutex_lock(&queue->mutex); pthread_mutex_lock(&nq->mutex);
if (queue->tail == NULL) { if (nq->tail == NULL) {
queue->tail = queue->head = line; nq->tail = nq->head = node;
} else { } else {
line->next = queue->tail; node->next = nq->tail;
queue->tail->prev = line; nq->tail->prev = node;
queue->tail = line; nq->tail = node;
} }
queue->count++; nq->count++;
pthread_mutex_unlock(&queue->mutex); pthread_mutex_unlock(&nq->mutex);
} }
gemtextLine* gemtextLineQueuePop(gemtextLineQueue *lq) { gemtextNode* gemtextNodeQueuePop(gemtextNodeQueue *nq) {
gemtextLine *line; gemtextNode *node;
while (lq->count == 0) while (nq->count == 0)
pthread_cond_wait(&lq->cond, &lq->mutex); pthread_cond_wait(&nq->cond, &nq->mutex);
pthread_mutex_lock(&lq->mutex); pthread_mutex_lock(&nq->mutex);
lq->count++; nq->count++;
line = lq->head; node = nq->head;
if (line->lineType == endOfStream) if (node->nodeType == endOfStream)
return line; return node;
if (lq->tail == lq->head) { if (nq->tail == nq->head) {
lq->tail = lq->head = NULL; nq->tail = nq->head = NULL;
} else { } else {
lq->head = lq->head->prev; nq->head = nq->head->prev;
} }
pthread_mutex_unlock(&lq->mutex); pthread_mutex_unlock(&nq->mutex);
line->prev = line->next = NULL; node->prev = node->next = NULL;
return line; return node;
}
gemtextNode* gemtextNodeQueueTryPop(gemtextNodeQueue *nq) {
gemtextNode *node;
if (nq->count == 0)
return NULL;
pthread_mutex_lock(&nq->mutex);
nq->count++;
node = nq->head;
if (node->nodeType == endOfStream)
return node;
if (nq->tail == nq->head) {
nq->tail = nq->head = NULL;
} else {
nq->head = nq->head->prev;
}
pthread_mutex_unlock(&nq->mutex);
node->prev = node->next = NULL;
return node;
}
void gemtextNodeDeinit(gemtextNode *node) {
switch (node->nodeType) {
case linkNode:
if (node->link->display != NULL) {
free(node->link->display);
}
free(node->link->url);
free(node->link);
break;
case preformattedNode:
if (node->block->altText != NULL) {
free(node->block->altText);
}
free(node->block->body);
free(node->block);
break;
case endOfStream:
break;
default:
free(node->str);
break;
}
free(node);
} }
int lineBufferExtend(lineBuffer *lb, size_t len) { int lineBufferExtend(lineBuffer *lb, size_t len) {
@ -134,25 +193,93 @@ int lineBufferAppendString(lineBuffer *lb, char *c, size_t len) {
return ret; return ret;
} }
void lineBufferRewind(lineBuffer *lb) {
lb->len--;
lb->cursor--;
}
void lineBufferReset(lineBuffer *lb) { void lineBufferReset(lineBuffer *lb) {
lb->len = 0; lb->len = 0;
lb->cursor = lb->buf; lb->cursor = lb->buf;
} }
int gemtextParserSend(gemtextParser *parser, gemtextLineType lt, gemtextLineQueue *lq) { int gemtextParserSendPreformatted(gemtextParser *parser, gemtextNodeQueue *nq) {
gemtextLine *line; preformattedBlock *block;
char *buf; gemtextNode *node;
char *buf;
line = calloc(1, sizeof(gemtextLine)); node = calloc(1, sizeof(gemtextNode));
if (line == NULL) return errno; if (node == NULL) return errno;
line->lineType = lt; node->nodeType = preformattedNode;
block = calloc(1, sizeof(preformattedBlock));
if (block == NULL) return errno;
// back up our cursor four spaces and insert a lf char
parser->buffer.cursor -= 4;
parser->buffer.len -= 4;
lineBufferAppendCharUnchecked(&parser->buffer, '\n');
buf = strndup(parser->buffer.buf, parser->buffer.len); buf = strndup(parser->buffer.buf, parser->buffer.len);
if (buf == NULL) return errno; if (buf == NULL) return errno;
line->str = buf; block->altText = parser->altText;
gemtextLineQueuePush(lq, line); parser->altText = NULL;
block->body = buf;
node->block = block;
gemtextNodeQueuePush(nq, node);
lineBufferReset(&parser->buffer); lineBufferReset(&parser->buffer);
parser->state = lineStart; parser->state = lineStart;
parser->mode = normalMode; parser->nodeType = unset;
return 0;
}
int gemtextParserSendLink(gemtextParser *parser, gemtextNodeQueue *nq) {
gemtextLink *link;
gemtextNode *node;
char *url = NULL, *display = NULL;
link = calloc(1, sizeof(gemtextLink));
if (link == NULL) return errno;
link->display = link->url = NULL;
node = calloc(1, sizeof(gemtextNode));
if (node == NULL) {
free(link);
return errno;
}
if (parser->linkUrl == NULL) {
url = strndup(parser->buffer.buf, parser->buffer.len - 1);
} else {
url = parser->linkUrl;
display = strndup(parser->buffer.buf, parser->buffer.len);
if (display == NULL) {
free(link);
free(node);
return errno;
}
}
link->url = url;
link->display = display;
node->nodeType = linkNode;
node->link = link;
gemtextNodeQueuePush(nq, node);
lineBufferReset(&parser->buffer);
parser->state = lineStart;
parser->nodeType = unset;
parser->linkUrl = NULL;
return 0;
}
int gemtextParserSend(gemtextParser *parser, gemtextNodeQueue *nq) {
gemtextNode *node;
char *buf;
node = calloc(1, sizeof(gemtextNode));
if (node == NULL) return errno;
node->nodeType = parser->nodeType;
buf = strndup(parser->buffer.buf, parser->buffer.len);
if (buf == NULL) return errno;
node->str = buf;
gemtextNodeQueuePush(nq, node);
lineBufferReset(&parser->buffer);
parser->state = lineStart;
parser->nodeType = unset;
return 0; return 0;
} }
@ -160,136 +287,221 @@ void logParseError(int err) {
//todo //todo
} }
gemtextLink* readLink(FILE *stream, lineBuffer *lb) { void switchMode(gemtextParser *parser, gemtextNodeType node_type, char c) {
char c; lineBufferReset(&parser->buffer);
char *buf; switch (c) {
int ret = 0; case ' ':
gemtextLink *link = calloc(1, sizeof(gemtextLink)); case '\t':
parser->state = trimStart;
break;
default:
lineBufferReset(&parser->buffer);
lineBufferAppendCharUnchecked(&parser->buffer, c);
parser->state = normalState;
}
parser->nodeType = node_type;
}
if (link == NULL) return NULL; void enterPreformattedMode(gemtextParser *parser) {
while (1) { parser->nodeType = preformattedNode;
fread(&c, 1, 1, stream); parser->state = trimStart;
switch (c) { lineBufferReset(&parser->buffer);
case ' ': }
case '\t':
if (lb->len == 0) int parseLink(gemtextParser *parser, gemtextNodeQueue *lq, char c) {
continue; int ret = 0;
buf = strndup(lb->buf, lb->len); char *buf = NULL;
link->url = buf;
break; assert(parser->nodeType == linkNode);
case '\n': switch (parser->state) {
if (lb->len == 0) { case lineStart:
free(link); if (c != ' ' && c != '\t') {
return NULL; lineBufferReset(&parser->buffer);
} lineBufferAppendCharUnchecked(&parser->buffer, c);
buf = strndup(lb->buf, lb->len); parser->state = normalState;
link->url = buf; } else if (c == '\n') {
return link; ret = gemtextParserSend(parser, lq);
default: }
ret = lineBufferAppendChar(lb, c); break;
if (ret != 0) { case normalState:
free(link); if (c == ' ' || c == '\t') {
return NULL; buf = strndup(parser->buffer.buf, parser->buffer.len - 1);
if (buf == NULL) return errno;
parser->linkUrl = buf;
parser->state = linkDisplayStart;
lineBufferReset(&parser->buffer);
} else if (c == '\n') {
buf = strndup(parser->buffer.buf, parser->buffer.len);
if (buf == NULL) return errno;
ret = gemtextParserSendLink(parser, lq);
}
break;
case linkDisplayStart:
if (c == ' ' || c == '\t') {
lineBufferRewind(&parser->buffer);
} else if (c == '\n') {
ret = gemtextParserSendLink(parser, lq);
} else {
parser->state = linkDisplay;
}
break;
case linkDisplay:
if (c == '\n') {
ret = gemtextParserSendLink(parser, lq);
}
break;
default:
ret = 1;
break;
}
return ret;
}
int parsePreformatted(gemtextParser *parser, gemtextNodeQueue *lq, char c) {
char *buf = NULL;
assert(parser->nodeType == preformattedNode);
switch (parser->state) {
case trimStart:
if (c == '\n') {
lineBufferReset(&parser->buffer);
parser->state = lineStart;
parser->altText = NULL;
} else if (c == ' ' || c == '\t') {
lineBufferRewind(&parser->buffer);
} else {
parser->state = preformattedAlt;
}
break;
case preformattedAlt:
if (c == '\n') {
parser->state = lineStart;
if (parser->buffer.len > 0) {
buf = strndup(parser->buffer.buf, parser->buffer.len - 1);
if (buf == NULL) return errno;
parser->altText = buf;
lineBufferReset(&parser->buffer);
} }
} }
} break;
lineBufferReset(lb); case normalState:
while (1) { if (c == '\n') {
fread(&c, 1, 1, stream); parser->state = lineStart;
switch (c) { }
case '\n': break;
link->display = strndup(lb->buf, lb->len); case lineStart:
break; if (c == '\n') {
default: parser->state = lineStart;
ret = lineBufferAppendChar(lb, c); } else if (c == '`') {
if (ret != 0) { parser->state = firstBacktickChar;
free(link->url); } else {
free(link); parser->state = normalState;
return NULL; }
} break;
case firstBacktickChar:
if (c == '`') {
parser->state = secondBacktickChar;
} else {
parser->state = normalState;
}
break;
case secondBacktickChar:
if (c == '`') {
parser->state = thirdBacktickChar;
} else {
parser->state = normalState;
}
break;
case thirdBacktickChar:
if (c == '\n') {
gemtextParserSendPreformatted(parser, lq);
} else {
// We discard anything past the third backtick
parser->buffer.cursor--;
parser->buffer.len--;
} }
}
return link;
}
enterH1Mode(gemtextParser *parser, char c) {
lineBufferReset(&parser->buffer);
switch (c) {
case ' ':
case '\t':
break; break;
default: default:
lineBufferReset(&parser->buffer); return 1;
lineBufferAppendCharUnchecked(&parser->buffer, c);
} }
parser->mode = h1Mode; return 0;
parser->state = normalState;
} }
enterH2Mode(gemtextParser *parser, char c) { int parseQuote(gemtextParser *parser, gemtextNodeQueue *lq, char c) {
lineBufferReset(&parser->buffer); int ret = 0;
switch (c) {
case ' ': switch (parser->state) {
case '\t': case lineStart:
if (c == '>') {
parser->state = trimStart;
lineBufferRewind(&parser->buffer);
} else {
lineBufferRewind(&parser->buffer);
ret = gemtextParserSend(parser, lq);
if (ret) return ret;
ret = fseek(parser->stream, -1, SEEK_CUR);
if (ret) return ret;
parser->state = lineStart;
parser->nodeType = normalNode;
}
break;
case normalState:
if (c == '\n') {
parser->state = lineStart;
}
break;
case trimStart:
if (c == ' ' || c == '\t') {
// rewind and trim the whitespace
parser->buffer.len--;
parser->buffer.cursor--;
} else if (c == '\n') {
if (parser->buffer.len == 1) {
parser->nodeType = normalNode;
}
ret = gemtextParserSend(parser, lq);
} else {
parser->state = normalState;
}
break; break;
default: default:
lineBufferReset(&parser->buffer); // Should be unreachable
lineBufferAppendCharUnchecked(&parser->buffer, c); ret = 1;
break;
} }
parser->mode = h2Mode; return ret;
parser->state = normalState;
} }
enterH3Mode(gemtextParser *parser, char c) { int parseGeneric(gemtextParser *parser, gemtextNodeQueue *nq, char c) {
lineBufferReset(&parser->buffer); int ret = 0;
switch (c) {
case ' ': switch (parser->state) {
case '\t': case lineStart:
case trimStart:
if (c == ' ' || c == '\t') {
// rewind the cursor to trim the line start
parser->buffer.len--;
parser->buffer.cursor--;
} else if (c == '\n') {
ret = gemtextParserSend(parser, nq);
} else {
parser->state = normalState;
}
break;
case normalState:
if (c == '\n') {
ret = gemtextParserSend(parser, nq);
}
break; break;
default: default:
lineBufferReset(&parser->buffer); // Should be unreachable
lineBufferAppendCharUnchecked(&parser->buffer, c); ret = 1;
break;
} }
parser->mode = h3Mode; return ret;
parser->state = normalState;
} }
int parseLink(gemtextParser *parser, char c, gemtextLineQueue *lq) { int parseNormal(gemtextParser *parser, gemtextNodeQueue *nq, char c) {
// todo
return 0;
}
int parsePreformatted(gemtextParser *parser, char c, gemtextLineQueue *lq) {
// todo
return 0;
}
int parseQuote(gemtextParser *parser, char c, gemtextLineQueue *lq) {
// todo
return 0;
}
int parseH1(gemtextParser *parser, char c, gemtextLineQueue *lq) {
// todo
return 0;
}
int parseH2(gemtextParser *parser, char c, gemtextLineQueue *lq) {
// todo
return 0;
}
int parseH3(gemtextParser *parser, char c, gemtextLineQueue *lq) {
// todo
return 0;
}
int parseList(gemtextParser *parser, char c, gemtextLineQueue *lq) {
// todo
return 0;
}
int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) {
int ret; int ret;
switch (parser->state) { switch (parser->state) {
@ -299,12 +511,15 @@ int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) {
parser->state = firstLinkChar; parser->state = firstLinkChar;
break; break;
case '>': case '>':
parser->mode = quoteMode; parser->nodeType = quoteNode;
parser->state = lineStart; parser->state = trimStart;
lineBufferRewind(&parser->buffer);
break; break;
case '*': case '*':
parser->mode = listMode; parser->nodeType = listNode;
parser->state = normalState; parser->state = trimStart;
lineBufferRewind(&parser->buffer);
break;
case '#': case '#':
parser->state = firstHashChar; parser->state = firstHashChar;
break; break;
@ -312,7 +527,8 @@ int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) {
parser->state = firstBacktickChar; parser->state = firstBacktickChar;
break; break;
case '\n': case '\n':
ret = gemtextParserSend(parser, normalLine, lq); parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret; if (ret) return ret;
break; break;
default: default:
@ -321,10 +537,11 @@ int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) {
break; break;
case firstLinkChar: case firstLinkChar:
if (c == '>') { if (c == '>') {
parser->nodeType = linkNode;
parser->state = lineStart; parser->state = lineStart;
parser->mode = linkMode;
} else if (c == '\n') { } else if (c == '\n') {
ret = gemtextParserSend(parser, normalLine, lq); parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret; if (ret) return ret;
} else { } else {
parser->state = normalState; parser->state = normalState;
@ -334,58 +551,69 @@ int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) {
if (c == '#') { if (c == '#') {
parser->state = secondHashChar; parser->state = secondHashChar;
} else if (c == '\n') { } else if (c == '\n') {
ret = gemtextParserSend(parser, normalLine, lq); parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret; if (ret) return ret;
} else { } else {
enterH1Mode(parser, c); switchMode(parser, h1Node, c);
} }
break; break;
case secondHashChar: case secondHashChar:
if (c == '#') { if (c == '#') {
parser->state = thirdHashChar; parser->nodeType = h3Node;
parser->state = trimStart;
lineBufferReset(&parser->buffer);
} else if (c == '\n') { } else if (c == '\n') {
ret = gemtextParserSend(parser, normalLine, lq); parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret; if (ret) return ret;
} else { } else {
enterH2Mode(parser, c); switchMode(parser, h2Node, c);
} }
break; break;
case thirdHashChar: case thirdHashChar:
if (c == '\n') { if (c == '\n') {
ret = gemtextParserSend(parser, normalLine, lq); parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret; if (ret) return ret;
} else { } else {
enterH3Mode(parser, c); switchMode(parser, h3Node, c);
} }
break; break;
case firstBacktickChar: case firstBacktickChar:
if (c == '\n') { if (c == '\n') {
ret = gemtextParserSend(parser, normalLine, lq); parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret; if (ret) return ret;
} else if (c == '`') { } else if (c == '`') {
parser->state = secondBacktickChar; parser->state = secondBacktickChar;
} else { } else {
parser->state = normalState; parser->state = normalState;
parser->mode = normalMode; parser->nodeType = normalNode;
} }
break;
case secondBacktickChar: case secondBacktickChar:
if (c == '`') { if (c == '`') {
parser->state = thirdBacktickChar; enterPreformattedMode(parser);
} else if (c == '\n') { } else if (c == '\n') {
ret = gemtextParserSend(parser, normalLine, lq); parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret; if (ret) return ret;
} else {
parser->state = normalState;
parser->nodeType = normalNode;
} }
case thirdBacktickChar: break;
case normalState: default:
break; break;
} }
return 0; return 0;
} }
int parseGemtext(gemtextParser *parser, gemtextLineQueue *lq) { int parseGemtext(gemtextParser *parser, gemtextNodeQueue *nq) {
char c; char c;
int ret; int ret;
gemtextLine *line; gemtextNode *node;
for (;;) { for (;;) {
ret = fread(&c, 1, 1, parser->stream); ret = fread(&c, 1, 1, parser->stream);
@ -396,38 +624,47 @@ int parseGemtext(gemtextParser *parser, gemtextLineQueue *lq) {
return ret; return ret;
} }
} else { } else {
line = calloc(1, sizeof(gemtextLine)); // If we were unable to read a char, assume we're at the end of the
if (line == NULL) return errno; // stream and send the node to the queue
line->lineType = endOfStream; if (parser->state != lineStart && parser->state != trimStart) {
line->prev = line->next = NULL; switch (parser->nodeType) {
line->str = NULL; case preformattedNode:
gemtextLineQueuePush(lq, line); ret = gemtextParserSendPreformatted(parser, nq);
break;
case linkNode:
ret = gemtextParserSendLink(parser, nq);
break;
default:
ret = gemtextParserSend(parser, nq);
break;
}
if (ret) return ret;
}
// Send an `endOfStream` node since we know we're done
node = calloc(1, sizeof(gemtextNode));
if (node == NULL) return errno;
node->nodeType = endOfStream;
node->prev = node->next = NULL;
node->str = NULL;
gemtextNodeQueuePush(nq, node);
break; break;
} }
switch (parser->mode) { switch (parser->nodeType) {
case normalMode: case unset:
ret = parseNormal(parser, c, lq); case normalNode:
ret = parseNormal(parser, nq, c);
break; break;
case preformattedMode: case preformattedNode:
ret = parsePreformatted(parser, c, lq); ret = parsePreformatted(parser, nq, c);
break; break;
case quoteMode: case quoteNode:
ret = parseQuote(parser, c, lq); ret = parseQuote(parser, nq, c);
break; break;
case linkMode: case linkNode:
ret = parseLink(parser, c, lq); ret = parseLink(parser, nq, c);
break; break;
case h1Mode: default:
ret = parseH1(parser, c, lq); ret = parseGeneric(parser, nq, c);
break;
case h2Mode:
ret = parseH2(parser, c, lq);
break;
case h3Mode:
ret = parseH3(parser, c, lq);
break;
case listMode:
ret = parseList(parser, c, lq);
break; break;
} }
if (ret) { if (ret) {

View file

@ -1,3 +1,6 @@
/** \file gemtext-parser.h
* \brief A fast Gemtext markup parser
*/
#ifndef GEMTEXT_PARSER_H #ifndef GEMTEXT_PARSER_H
#define GEMTEXT_PARSER_H 1 #define GEMTEXT_PARSER_H 1
@ -5,82 +8,247 @@
#include <stddef.h> // size_t #include <stddef.h> // size_t
#include <stdio.h> // FILE #include <stdio.h> // FILE
#define LBUF_SIZE 512 #define LBUF_SIZE 512 ///< The default size of a lineBuffer
/** An enumeration representing the state of the parsing action. These values
* are to be taken in context with the current gemtextParserMode */
typedef enum { typedef enum {
normalMode, lineStart = 0, ///< The cursor is at the start of a new line
preformattedMode, lineEnd = 1, ///< The cursor is at the end of a line
quoteMode, firstLinkChar = 2, ///< The first link character was the previous character
linkMode, linkDisplayStart = 3, /**< The url of a link has been parsed and the cursor is at the
h1Mode, beginning of the display element */
h2Mode, linkDisplay = 4, ///< The link's display element is being parsed
h3Mode, firstHashChar = 5, ///< A Single '#' character has been encountered
listMode, secondHashChar = 6, ///< Two '#' characters have been encountered sequentially
} gemtextParserMode; thirdHashChar = 7, ///< Three '#' characters have been encountered sequentially
firstBacktickChar = 8, ///< A single '`' character has been encountered
typedef enum { secondBacktickChar = 9, ///< Two '`' characters have been encountered sequentially
lineStart, thirdBacktickChar = 10, ///< Three '`' characters have been encountered sequentially
lineEnd, preformattedAlt = 11, ///< A Preformatted block's alt text is being parsed
firstLinkChar, trimStart = 12, ///< The *mode* is known and leading whitespace is being trimmed
firstHashChar, normalState = 13, ///< The *mode* is known and normal parsing is occurring
secondHashChar,
thirdHashChar,
firstBacktickChar,
secondBacktickChar,
thirdBacktickChar,
normalState,
} gemtextParserState; } gemtextParserState;
/**
* An enum type representing the various line types in gemtext markup
*/
typedef enum { typedef enum {
normalLine, unset = 0, ///< The node type has not yet been set
linkLine, normalNode = 1, ///< A normal text line
listLine, linkNode = 2, ///< A link line
h1Line, listNode = 3, ///< A list member
h2Line, h1Node = 4, ///< An H1 heading
h3Line, h2Node = 5, ///< An H2 heading
preformattedLine, h3Node = 6, ///< An H3 heading
quoteLine, preformattedNode = 7, ///< A preformatted text block
endOfStream, quoteNode = 8, ///< A Quote block
} gemtextLineType; endOfStream = 9, /**< Notifies the receiver that the stream is over and no
more lines are to be expected */
} gemtextNodeType;
/**
* A growable byte array
*/
typedef struct { typedef struct {
size_t capacity; size_t capacity; ///< The current capacity of the internal buffer
size_t len; size_t len; ///< The actual number of bytes currently in use
char *cursor; char *cursor; ///< A pointer to the next byte to be used in the internal buffer
char *buf; char *buf; ///< A Pointer to the beginning of the internal buffer
} lineBuffer; } lineBuffer;
/**
* A Gemtext link element
*/
typedef struct { typedef struct {
char *url; char *url; ///< The url of the gemtext link
char *display; char *display; ///< Optional text to be displayed in lieu of the url
} gemtextLink; } gemtextLink;
/**
* A block of preformatted text
*/
typedef struct { typedef struct {
FILE *stream; char *altText; /**< Some descriptive text to be read by screen readers if
gemtextParserMode mode; this is ascii art */
gemtextParserState state; char *body; ///< The body of the preformatted block
lineBuffer buffer; } preformattedBlock;
char *linkUrl;
/**
* The main Gemtext parser
*/
typedef struct {
FILE *stream; /**< A stream of bytes to read gemtext from */
gemtextNodeType nodeType; /**< The current parsing mode */
gemtextParserState state; /**< The state of the parser within each mode */
lineBuffer buffer; /**< The internal buffer used to store bytes until
a gemtextLine is ready to be sent */
union {
char *linkUrl; /**< The url portion of a linkLine */
char *altText; /**< The alt text associated with a preformatted block */
};
} gemtextParser; } gemtextParser;
struct _gemtextLine { /** A Gemtext node */
struct _gemtextLine *next; struct _gemtextNode {
struct _gemtextLine *prev; struct _gemtextNode *next; ///< The next line in the queue
gemtextLineType lineType; struct _gemtextNode *prev; ///< The previous line in the queue
gemtextNodeType nodeType; ///< Identifies the type of line
union { union {
char *str; char *str; ///< The text body of most line types
gemtextLink *link; gemtextLink *link; ///< The body of a link line
preformattedBlock *block; ///< The body and alt text of a preformatted block
}; };
}; };
typedef struct _gemtextLine gemtextLine; /** A Gemtext node */
typedef struct _gemtextNode gemtextNode;
/**
* A fifo queue used to pass gemtextLine elements from the worker thread to the
* rendering thread.
*/
typedef struct { typedef struct {
pthread_cond_t cond; pthread_cond_t cond; ///< Signals the rendering thread to wait for an incoming line
size_t count; size_t count; ///< The number of elements currently in the queue
pthread_mutex_t mutex; pthread_mutex_t mutex; ///< The lock ensuring exclusive access
gemtextLine *head; gemtextNode *head; ///< The oldest line in the queue
gemtextLine *tail; gemtextNode *tail; ///< The newest line in the queue
} gemtextLineQueue; } gemtextNodeQueue;
/**
* Initialize a lineBuffer struct to it's default values.
* ### Return values
* Returns 0 for success, 2 if memory allocation fails.
* \param lb A pointer to an already allocated lineBuffer
*/
int lineBufferInit(lineBuffer *lb);
/**
* Initialize a gemtextParser to it's default values.
* ### Return values
* Returns 0 upon success, 2 if memory allocation for the internal
* buffer fails.
* \param parser A pointer to an already allocated gemtextParser
* \param stream A FILE which we whose bytes will be read and parsed as gemtext lines
*/
int gemtextParserInit(gemtextParser *parser, FILE *stream);
/**
* Creates a new gemtextParser and initializes it to default values.
* If memory allocation fails a NULL pointer will be returned.
* \param stream The FILE stream which we will read and parse as gemtext lines
*/
gemtextParser* gemtextParserNew(FILE *stream);
/**
* Frees all memory associated with pointer members of this parser and closes
* the internal FILE stream.
* \param parser The gemtextParser to be finalized
*/
void gemtextParserDeinit(gemtextParser *parser);
/**
* Frees all memory associated with this gemtextParser.
* \param parser The gemtextParser to be freed
*/
void gemtextParserDestroy(gemtextParser *parser);
/**
* Initializes a gemtextNodeQueue with default values.
* ### Return values
* Returns 0 on success. If there is a failure initializing the internal
* mutex or condition variable, an error code is returned instead.
* \param nq The already allocated gemtextNodeQueue
*/
int gemtextNodeQueueInit(gemtextNodeQueue *nq);
/**
* Pushes a gemtextNode into the queue. This function will not fail, but
* can block if another thread holds the gemtextQueue's internal mutex.
* \param nq The queue which will receive the gemtext node
* \param node The gemtextNode to be queued
*/
void gemtextNodeQueuePush(gemtextNodeQueue *nq, gemtextNode *node);
/**
* Gets the oldest node inserted in the queue. This function will either
* return a valid gemtextNode or block until one becomes available.
* \param nq The queue from which we are attempting to pop a node
*/
gemtextNode* gemtextNodeQueuePop(gemtextNodeQueue *nq);
/**
* Attempts to get the oldest node inserted in the queue. If there are no nodes
* left in the queue, returns NULL.
* \param nq The queue from which we are attempting to pop a node
*/
gemtextNode* gemtextNodeQueueTryPop(gemtextNodeQueue *nq);
/**
* Frees all memory associated with a gemtextNode structure
* \param node The gemtextNode to be de-allocated
*/
void gemtextNodeDeinit(gemtextNode *node);
/**
* Extends the LineBuffer lb by len bytes.
* ### Return values
* Returns 0 upon success, or 2 if memory allocation fails.
* \param lb The buffer to be extended
* \param len The number of bytes to extend the buffer by
*/
int lineBufferExtend(lineBuffer *lb, size_t len);
/**
* Appends a character c to the lineBuffer lb. If there is no space left in the
* internal buffer, it will be re-allocated first.
* ### Return values
* Returns 0 for success, or 2 if memory allocation fails.
* \param lb The buffer we are appending to
* \param c The character to be appended to this buffer
*/
int lineBufferAppendChar(lineBuffer *lb, char c);
/**
* Appends a character c to the lineBuffer c without checking if there is space
* available first.
* > **Warning!** Due to the fact that this function is unchecked, it should
* > only be called if you are absolutely certain that there is space remaining
* > in the internal buffer, such as after calling lineBufferRewind to move the
* > cursor back by one character. Failure to follow this warning may result in
* > *buffer overflow* memory access violation.
*/
void lineBufferAppendCharUnchecked(lineBuffer *lb, char c);
/**
* Appends a string beginning at the pointer *c of len bytes to lineBuffer lb.
* ### Return values
* Returns 0 on success, or 2 for memory allocation errors.
* \param lb The buffer we are appending to
* \param c A pointer to an array of chars
* \param len The number of bytes to append from c
*/
int lineBufferAppendString(lineBuffer *lb, char *c, size_t len);
/**
* Rewinds the internal cursor pointer and count for lineBuffer lb by 1.
*/
void lineBufferRewind(lineBuffer *lb);
/**
* Resets the internal count of lineBuffer lb to 0 and moves it's cursor back
* to the start of the internal buffer.
*/
void lineBufferReset(lineBuffer *lb);
/**
* Parses gemtext into a series of nodes to be places in the gemtextNodeQueue lq.
* ### Return values
* Returns 0 on success, any other number is an error code
* \param parser A gemtextParser struct used to maintain state while parsing
* \param nq A gemtextNodeQueue which will receive gemtextLine elements as they are parsed
*/
int parseGemtext(gemtextParser *parser, gemtextNodeQueue *nq);
#endif #endif

View file

@ -0,0 +1,71 @@
# _,.---._ .-._ .--.-. ,--.--------.
# _,..---._ ,-.' , - `. /==/ \ .-._/==/ //==/, - , -\
# /==/, - \ /==/_, , - \|==|, \/ /, |==\ -\\==\.-. - ,-./
# |==| _ _\==| .=. |==|- \| | \==\- \`--`\==\- \
# |==| .=. |==|_ : ;=: - |==| , | -| `--`-' \==\_ \
# |==|,| | -|==| , '=' |==| - _ | |==|- |
# |==| '=' /\==\ - ,_ /|==| /\ , | |==|, |
# |==|-, _`/ '.='. - .' /==/, | |- | /==/ -/
# `-.`.____.' `--`--'' `--`./ `--` `--`--`
# _ __ ,---. .-._ .=-.-. _,.----.
# .-`.' ,`..--.' \ /==/ \ .-._ /==/_ /.' .' - \
# /==/, - \==\-/\ \ |==|, \/ /, /==|, |/==/ , ,-'
# |==| _ .=. /==/-|_\ | |==|- \| ||==| ||==|- | .
# |==| , '=',\==\, - \ |==| , | -||==|- ||==|_ `-' \
# |==|- '..'/==/ - ,| |==| - _ ||==| ,||==| _ , |
# |==|, | /==/- /\ - \|==| /\ , ||==|- |\==\. /
# /==/ - | \==\ _.\=\.-'/==/, | |- |/==/. / `-.`.___.-'
# `--`---' `--` `--`./ `--``--`-`
#
# @(#)Copyright (c) 2023, Nathan D. Fisher.
#
# This is free software. It comes with NO WARRANTY.
# Permission to use, modify and distribute this source code
# is granted subject to the following conditions.
# 1/ that the above copyright notice and this notice
# are preserved in all copies and that due credit be given
# to the author.
# 2/ that any changes to this code are clearly commented
# as such so that the author does not get blamed for bugs
# other than his own.
#
include ../config.mk
CFLAGS += -I../include
LDLIBS += ../libgemtext.a
LDLIBS += $(LIBS)
tests += parse-gemtext0
tests += parse-gemtext1
total != echo $(tests) | wc -w | awk '{ print $$1 }'
.PHONY: test
test: $(tests) output
@echo -e "\n\t=== \e[0;33mRunning $(total) tests\e[0m ===\n"
@idx=1 ; success=0 ; fail=0; skip=0; for t in $(tests) ; \
do printf "[%02i/$(total)] %-25s" $${idx} $${t} ; \
idx=$$(expr $${idx} + 1) ; \
./$${t} ; \
retval=$$? ; \
if [ $${retval} -eq 0 ] ; \
then echo -e '\e[0;32mSuccess\e[0m' ; \
success=$$(expr $${success} + 1) ; \
elif [ $${retval} -eq 255 ] ; \
then echo Skipped ; \
skip=$$(expr $${skip} + 1) ; \
else echo -e '\e[0;31mFailure\e[0m' ; \
fail=$$(expr $${fail} + 1) ; \
fi ; done || true ; \
if [ $${fail} == 0 ] ; \
then echo -e '\nResults: \e[0;32mOk\e[0m.' "$${success} succeeded; $${fail} failed; $${skip} skipped" ; \
else echo -e '\nResults: \e[0;31mFAILED\e[0m.' "$${success} succeeded; $${fail} failed; $${skip} skipped" ; \
fi
output:
@ [-d $@ ] 2>/dev/null || install -d $@
.PHONY: clean
clean:
rm -rf $(tests) output/*

102
test/parse-gemtext0.c Normal file
View file

@ -0,0 +1,102 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "gemtext-parser.h"
gemtextNodeQueue lq;
gemtextParser parser;
int main() {
int ret = 0;
FILE *stream = NULL;
gemtextNode *node = NULL;
stream = fopen("test0.gmi", "r");
assert(stream != NULL);
ret = gemtextNodeQueueInit(&lq);
assert(ret == 0);
ret = gemtextParserInit(&parser, stream);
assert(ret == 0);
ret = parseGemtext(&parser, &lq);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == h1Node);
assert(memcmp(node->str, "A Test Gemtext file", 19) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == h2Node);
assert(memcmp(node->str, "Used for testing the parser in normal operation", 47) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(*node->str == '\n');
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "This is", 7) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(*node->str == '\n');
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == quoteNode);
assert(memcmp(node->str, "Walk before you run.\n- Anonymous", 32) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(*node->str == '\n');
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == h3Node);
assert(memcmp(node->str, "Let's check a list", 18) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == listNode);
assert(memcmp(node->str, "First item", 9) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == listNode);
assert(memcmp(node->str, "second item", 11) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(*node->str == '\n');
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == linkNode);
assert(memcmp(node->link->url, "gemini://example.org/test.gmi", 29) == 0);
assert(memcmp(node->link->display, "This is a link", 14) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(*node->str == '\n');
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == preformattedNode);
assert(memcmp(node->block->altText, "Test preformatted block", 23) == 0);
assert(memcmp(node->block->body, "This is a preformatted block", 28) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node != NULL);
assert(node->nodeType == endOfStream);
gemtextNodeDeinit(node);
gemtextParserDeinit(&parser);
return ret;
}

106
test/parse-gemtext1.c Normal file
View file

@ -0,0 +1,106 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "gemtext-parser.h"
gemtextNodeQueue lq;
gemtextParser parser;
char * preBlk =
"Just a regular preformatted block.\n"
"Nothing special";
int main() {
int ret = 0;
FILE *stream = NULL;
gemtextNode *node = NULL;
stream = fopen("test1.gmi", "r");
assert(stream != NULL);
ret = gemtextNodeQueueInit(&lq);
assert(ret == 0);
ret = gemtextParserInit(&parser, stream);
assert(ret == 0);
ret = parseGemtext(&parser, &lq);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType = h2Node);
assert(memcmp(node->str, "A more complicated example", 26) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == listNode);
assert(memcmp(node->str, "list item with no leading space", 30) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == listNode);
assert(memcmp(node->str, "list item with several leading spaces", 37) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == h2Node);
assert(memcmp(node->str, "After this H2, an empty quote", 29) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "\n", 1) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == h3Node);
assert(memcmp(node->str, "Now we'll", 9) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "``", 2) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "=", 1) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "And maybe", 9) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == linkNode);
assert(memcmp(node->link->url, "spartan://example.org", 21) == 0);
assert(node->link->display == NULL);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "Let's enter", 11) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == preformattedNode);
assert(node->block->altText == NULL);
assert(memcmp(node->block->body, preBlk, 50) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "And we'll finish", 16) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == linkNode);
assert(node->link->display == NULL);
assert(memcmp(node->link->url, "finger://example.org/joe", 24) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node != NULL);
assert(node->nodeType == endOfStream);
gemtextNodeDeinit(node);
return ret;
}

19
test/test0.gmi Normal file
View file

@ -0,0 +1,19 @@
# A Test Gemtext file
## Used for testing the parser in normal operation
This is a simple gemtext file used for testing the gemtext parser. There's nothing particularly special about it and we're not using it to test any edge cases. For now, we are only concerned with how the parser handles well-formatted gemtext such as this.
> Walk before you run.
> - Anonymous
### Let's check a list
* First item
* second item
=> gemini://example.org/test.gmi This is a link
``` Test preformatted block
This is a preformatted block.
Everything in this block should appear exactly as entered in a Monospace font,
with no styling applied.
```

17
test/test1.gmi Normal file
View file

@ -0,0 +1,17 @@
# A more complicated example
*list item with no leading space
* list item with several leading spaces
##After this H2, an empty quote
>
### Now we'll mess with incomplete linetype signifiers
``
=
And maybe a link with no display element..
=> spartan://example.org/
Let's enter preformatted for reals this time.
```
Just a regular preformatted block.
Nothing special
```
And we'll finish with a link.
=> finger://example.org/joe