Compare commits

..

10 commits

Author SHA1 Message Date
Nathan Fisher
6e89d6e263 Better docs and code commenting 2023-11-04 00:32:53 -04:00
Nathan Fisher
ae253cec83 Refactor, getting rid of parsing modes (just use node types instead) 2023-11-04 00:19:11 -04:00
Nathan Fisher
e69a1ceaa1 Rename several data structures 2023-11-03 18:37:35 -04:00
Nathan Fisher
bb0b5a1bcc Test for endOfStream line type in tests 2023-10-30 01:20:43 -04:00
Nathan Fisher
dc6a0150d9 Add second, more complicated gemtext test 2023-10-29 19:04:02 -04:00
Nathan Fisher
b4c0b24a4a Make test pass with preformatted block 2023-10-25 23:05:10 -04:00
Nathan Fisher
c20957b1be Commit test .gmi file 2023-10-22 18:51:23 -04:00
Nathan Fisher
56e4b87fcb First successful parsing tests 2023-10-14 01:35:11 -04:00
Nathan Fisher
0a1a4e8803 Added documentation via doxygen 2023-10-11 00:28:53 -04:00
Nathan Fisher
c6605630b5 Ready for testing 2023-10-09 22:48:31 -04:00
11 changed files with 3828 additions and 267 deletions

2
.gitignore vendored
View file

@ -2,7 +2,9 @@ test/output/
test/*
!test/*.c
!test/Makefile
!test/*.gmi
*.o
*.a
*.so
*.core
doc

2822
Doxyfile Normal file

File diff suppressed because it is too large Load diff

View file

@ -43,7 +43,7 @@ srcs += gemtext-parser.c
objs = $(srcs:.c=.o)
libname = libgemtext-parser
libname = libgemtext
staticlib = $(libname).a
all: static
@ -53,7 +53,17 @@ static: $(staticlib)
$(staticlib): $(objs)
$(AR) rcs $@ $?
clean:
rm -rf $(objs) $(staticlib)
docs: Doxyfile $(hdrs)
doxygen
.PHONY: all clean
test: static
$(MAKE) -C test
testclean:
$(MAKE) -C test clean
clean:
rm -rf $(objs) $(staticlib) doc
$(MAKE) -C test clean
.PHONY: all docs clean static test testclean

7
config.mk Normal file
View file

@ -0,0 +1,7 @@
PREFIX ?= /usr/local
bindir = $(DESTDIR)$(PREFIX)/bin
includedir = $(DESTDIR)$(PREFIX)/include
libdir = $(DESTDIR)$(PREFIX)/lib
sharedir = $(DESTDIR)$(PREFIX)/share
mandir = $(sharedir)/man
docdir = $(sharedir)/doc/gemini

View file

@ -1,3 +1,4 @@
#include <assert.h> // assert
#include <errno.h> // errno
#include <stddef.h> // NULL, size_t
#include <stdio.h> // fclose
@ -16,72 +17,130 @@ int lineBufferInit(lineBuffer *lb) {
return 0;
}
gemtextParser* gemtextParserInit(FILE *stream) {
int gemtextParserInit(gemtextParser *parser, FILE *stream) {
int ret = 0;
parser->stream = stream;
parser->nodeType = unset;
parser->state = lineStart;
parser->linkUrl = NULL;
ret = lineBufferInit(&parser->buffer);
return ret;
}
gemtextParser* gemtextParserNew(FILE *stream) {
gemtextParser *parser = calloc(1, sizeof(gemtextParser));
if (parser == NULL)
return NULL;
parser->stream = stream;
parser->mode = normalMode;
parser->state = lineStart;
if (lineBufferInit(&parser->buffer) != 0) {
if (gemtextParserInit(parser, stream) != 0) {
free(parser);
return NULL;
}
parser->linkUrl = NULL;
return parser;
}
void gemtextParserDeinit(gemtextParser *parser) {
fclose(parser->stream);
free(parser->buffer.buf);
if (parser->linkUrl != NULL) {
if (parser->nodeType == linkNode && parser->linkUrl != NULL) {
free(parser->linkUrl);
} else if (parser->nodeType == preformattedNode && parser->altText != NULL) {
free(parser->altText);
}
}
void gemtextParserDestroy(gemtextParser *parser) {
gemtextParserDeinit(parser);
free(parser);
}
int gemtextLineQueueInit(gemtextLineQueue *queue) {
int gemtextNodeQueueInit(gemtextNodeQueue *nq) {
int ret;
queue->head = NULL;
queue->tail = NULL;
ret = pthread_mutex_init(&queue->mutex, NULL);
nq->head = NULL;
nq->tail = NULL;
ret = pthread_mutex_init(&nq->mutex, NULL);
if (ret != 0)
return ret;
return pthread_cond_init(&queue->cond, NULL);
return pthread_cond_init(&nq->cond, NULL);
}
void gemtextLineQueuePush(gemtextLineQueue *queue, gemtextLine *line) {
pthread_mutex_lock(&queue->mutex);
if (queue->tail == NULL) {
queue->tail = queue->head = line;
void gemtextNodeQueuePush(gemtextNodeQueue *nq, gemtextNode *node) {
pthread_mutex_lock(&nq->mutex);
if (nq->tail == NULL) {
nq->tail = nq->head = node;
} else {
line->next = queue->tail;
queue->tail->prev = line;
queue->tail = line;
node->next = nq->tail;
nq->tail->prev = node;
nq->tail = node;
}
queue->count++;
pthread_mutex_unlock(&queue->mutex);
nq->count++;
pthread_mutex_unlock(&nq->mutex);
}
gemtextLine* gemtextLineQueuePop(gemtextLineQueue *lq) {
gemtextLine *line;
gemtextNode* gemtextNodeQueuePop(gemtextNodeQueue *nq) {
gemtextNode *node;
while (lq->count == 0)
pthread_cond_wait(&lq->cond, &lq->mutex);
pthread_mutex_lock(&lq->mutex);
lq->count++;
line = lq->head;
if (line->lineType == endOfStream)
return line;
if (lq->tail == lq->head) {
lq->tail = lq->head = NULL;
while (nq->count == 0)
pthread_cond_wait(&nq->cond, &nq->mutex);
pthread_mutex_lock(&nq->mutex);
nq->count++;
node = nq->head;
if (node->nodeType == endOfStream)
return node;
if (nq->tail == nq->head) {
nq->tail = nq->head = NULL;
} else {
lq->head = lq->head->prev;
nq->head = nq->head->prev;
}
pthread_mutex_unlock(&lq->mutex);
line->prev = line->next = NULL;
return line;
pthread_mutex_unlock(&nq->mutex);
node->prev = node->next = NULL;
return node;
}
gemtextNode* gemtextNodeQueueTryPop(gemtextNodeQueue *nq) {
gemtextNode *node;
if (nq->count == 0)
return NULL;
pthread_mutex_lock(&nq->mutex);
nq->count++;
node = nq->head;
if (node->nodeType == endOfStream)
return node;
if (nq->tail == nq->head) {
nq->tail = nq->head = NULL;
} else {
nq->head = nq->head->prev;
}
pthread_mutex_unlock(&nq->mutex);
node->prev = node->next = NULL;
return node;
}
void gemtextNodeDeinit(gemtextNode *node) {
switch (node->nodeType) {
case linkNode:
if (node->link->display != NULL) {
free(node->link->display);
}
free(node->link->url);
free(node->link);
break;
case preformattedNode:
if (node->block->altText != NULL) {
free(node->block->altText);
}
free(node->block->body);
free(node->block);
break;
case endOfStream:
break;
default:
free(node->str);
break;
}
free(node);
}
int lineBufferExtend(lineBuffer *lb, size_t len) {
@ -134,25 +193,93 @@ int lineBufferAppendString(lineBuffer *lb, char *c, size_t len) {
return ret;
}
void lineBufferRewind(lineBuffer *lb) {
lb->len--;
lb->cursor--;
}
void lineBufferReset(lineBuffer *lb) {
lb->len = 0;
lb->cursor = lb->buf;
}
int gemtextParserSend(gemtextParser *parser, gemtextLineType lt, gemtextLineQueue *lq) {
gemtextLine *line;
char *buf;
int gemtextParserSendPreformatted(gemtextParser *parser, gemtextNodeQueue *nq) {
preformattedBlock *block;
gemtextNode *node;
char *buf;
line = calloc(1, sizeof(gemtextLine));
if (line == NULL) return errno;
line->lineType = lt;
node = calloc(1, sizeof(gemtextNode));
if (node == NULL) return errno;
node->nodeType = preformattedNode;
block = calloc(1, sizeof(preformattedBlock));
if (block == NULL) return errno;
// back up our cursor four spaces and insert a lf char
parser->buffer.cursor -= 4;
parser->buffer.len -= 4;
lineBufferAppendCharUnchecked(&parser->buffer, '\n');
buf = strndup(parser->buffer.buf, parser->buffer.len);
if (buf == NULL) return errno;
line->str = buf;
gemtextLineQueuePush(lq, line);
block->altText = parser->altText;
parser->altText = NULL;
block->body = buf;
node->block = block;
gemtextNodeQueuePush(nq, node);
lineBufferReset(&parser->buffer);
parser->state = lineStart;
parser->mode = normalMode;
parser->nodeType = unset;
return 0;
}
int gemtextParserSendLink(gemtextParser *parser, gemtextNodeQueue *nq) {
gemtextLink *link;
gemtextNode *node;
char *url = NULL, *display = NULL;
link = calloc(1, sizeof(gemtextLink));
if (link == NULL) return errno;
link->display = link->url = NULL;
node = calloc(1, sizeof(gemtextNode));
if (node == NULL) {
free(link);
return errno;
}
if (parser->linkUrl == NULL) {
url = strndup(parser->buffer.buf, parser->buffer.len - 1);
} else {
url = parser->linkUrl;
display = strndup(parser->buffer.buf, parser->buffer.len);
if (display == NULL) {
free(link);
free(node);
return errno;
}
}
link->url = url;
link->display = display;
node->nodeType = linkNode;
node->link = link;
gemtextNodeQueuePush(nq, node);
lineBufferReset(&parser->buffer);
parser->state = lineStart;
parser->nodeType = unset;
parser->linkUrl = NULL;
return 0;
}
int gemtextParserSend(gemtextParser *parser, gemtextNodeQueue *nq) {
gemtextNode *node;
char *buf;
node = calloc(1, sizeof(gemtextNode));
if (node == NULL) return errno;
node->nodeType = parser->nodeType;
buf = strndup(parser->buffer.buf, parser->buffer.len);
if (buf == NULL) return errno;
node->str = buf;
gemtextNodeQueuePush(nq, node);
lineBufferReset(&parser->buffer);
parser->state = lineStart;
parser->nodeType = unset;
return 0;
}
@ -160,136 +287,221 @@ void logParseError(int err) {
//todo
}
gemtextLink* readLink(FILE *stream, lineBuffer *lb) {
char c;
char *buf;
int ret = 0;
gemtextLink *link = calloc(1, sizeof(gemtextLink));
void switchMode(gemtextParser *parser, gemtextNodeType node_type, char c) {
lineBufferReset(&parser->buffer);
switch (c) {
case ' ':
case '\t':
parser->state = trimStart;
break;
default:
lineBufferReset(&parser->buffer);
lineBufferAppendCharUnchecked(&parser->buffer, c);
parser->state = normalState;
}
parser->nodeType = node_type;
}
if (link == NULL) return NULL;
while (1) {
fread(&c, 1, 1, stream);
switch (c) {
case ' ':
case '\t':
if (lb->len == 0)
continue;
buf = strndup(lb->buf, lb->len);
link->url = buf;
break;
case '\n':
if (lb->len == 0) {
free(link);
return NULL;
}
buf = strndup(lb->buf, lb->len);
link->url = buf;
return link;
default:
ret = lineBufferAppendChar(lb, c);
if (ret != 0) {
free(link);
return NULL;
void enterPreformattedMode(gemtextParser *parser) {
parser->nodeType = preformattedNode;
parser->state = trimStart;
lineBufferReset(&parser->buffer);
}
int parseLink(gemtextParser *parser, gemtextNodeQueue *lq, char c) {
int ret = 0;
char *buf = NULL;
assert(parser->nodeType == linkNode);
switch (parser->state) {
case lineStart:
if (c != ' ' && c != '\t') {
lineBufferReset(&parser->buffer);
lineBufferAppendCharUnchecked(&parser->buffer, c);
parser->state = normalState;
} else if (c == '\n') {
ret = gemtextParserSend(parser, lq);
}
break;
case normalState:
if (c == ' ' || c == '\t') {
buf = strndup(parser->buffer.buf, parser->buffer.len - 1);
if (buf == NULL) return errno;
parser->linkUrl = buf;
parser->state = linkDisplayStart;
lineBufferReset(&parser->buffer);
} else if (c == '\n') {
buf = strndup(parser->buffer.buf, parser->buffer.len);
if (buf == NULL) return errno;
ret = gemtextParserSendLink(parser, lq);
}
break;
case linkDisplayStart:
if (c == ' ' || c == '\t') {
lineBufferRewind(&parser->buffer);
} else if (c == '\n') {
ret = gemtextParserSendLink(parser, lq);
} else {
parser->state = linkDisplay;
}
break;
case linkDisplay:
if (c == '\n') {
ret = gemtextParserSendLink(parser, lq);
}
break;
default:
ret = 1;
break;
}
return ret;
}
int parsePreformatted(gemtextParser *parser, gemtextNodeQueue *lq, char c) {
char *buf = NULL;
assert(parser->nodeType == preformattedNode);
switch (parser->state) {
case trimStart:
if (c == '\n') {
lineBufferReset(&parser->buffer);
parser->state = lineStart;
parser->altText = NULL;
} else if (c == ' ' || c == '\t') {
lineBufferRewind(&parser->buffer);
} else {
parser->state = preformattedAlt;
}
break;
case preformattedAlt:
if (c == '\n') {
parser->state = lineStart;
if (parser->buffer.len > 0) {
buf = strndup(parser->buffer.buf, parser->buffer.len - 1);
if (buf == NULL) return errno;
parser->altText = buf;
lineBufferReset(&parser->buffer);
}
}
}
lineBufferReset(lb);
while (1) {
fread(&c, 1, 1, stream);
switch (c) {
case '\n':
link->display = strndup(lb->buf, lb->len);
break;
default:
ret = lineBufferAppendChar(lb, c);
if (ret != 0) {
free(link->url);
free(link);
return NULL;
}
break;
case normalState:
if (c == '\n') {
parser->state = lineStart;
}
break;
case lineStart:
if (c == '\n') {
parser->state = lineStart;
} else if (c == '`') {
parser->state = firstBacktickChar;
} else {
parser->state = normalState;
}
break;
case firstBacktickChar:
if (c == '`') {
parser->state = secondBacktickChar;
} else {
parser->state = normalState;
}
break;
case secondBacktickChar:
if (c == '`') {
parser->state = thirdBacktickChar;
} else {
parser->state = normalState;
}
break;
case thirdBacktickChar:
if (c == '\n') {
gemtextParserSendPreformatted(parser, lq);
} else {
// We discard anything past the third backtick
parser->buffer.cursor--;
parser->buffer.len--;
}
}
return link;
}
enterH1Mode(gemtextParser *parser, char c) {
lineBufferReset(&parser->buffer);
switch (c) {
case ' ':
case '\t':
break;
default:
lineBufferReset(&parser->buffer);
lineBufferAppendCharUnchecked(&parser->buffer, c);
return 1;
}
parser->mode = h1Mode;
parser->state = normalState;
return 0;
}
enterH2Mode(gemtextParser *parser, char c) {
lineBufferReset(&parser->buffer);
switch (c) {
case ' ':
case '\t':
int parseQuote(gemtextParser *parser, gemtextNodeQueue *lq, char c) {
int ret = 0;
switch (parser->state) {
case lineStart:
if (c == '>') {
parser->state = trimStart;
lineBufferRewind(&parser->buffer);
} else {
lineBufferRewind(&parser->buffer);
ret = gemtextParserSend(parser, lq);
if (ret) return ret;
ret = fseek(parser->stream, -1, SEEK_CUR);
if (ret) return ret;
parser->state = lineStart;
parser->nodeType = normalNode;
}
break;
case normalState:
if (c == '\n') {
parser->state = lineStart;
}
break;
case trimStart:
if (c == ' ' || c == '\t') {
// rewind and trim the whitespace
parser->buffer.len--;
parser->buffer.cursor--;
} else if (c == '\n') {
if (parser->buffer.len == 1) {
parser->nodeType = normalNode;
}
ret = gemtextParserSend(parser, lq);
} else {
parser->state = normalState;
}
break;
default:
lineBufferReset(&parser->buffer);
lineBufferAppendCharUnchecked(&parser->buffer, c);
// Should be unreachable
ret = 1;
break;
}
parser->mode = h2Mode;
parser->state = normalState;
return ret;
}
enterH3Mode(gemtextParser *parser, char c) {
lineBufferReset(&parser->buffer);
switch (c) {
case ' ':
case '\t':
int parseGeneric(gemtextParser *parser, gemtextNodeQueue *nq, char c) {
int ret = 0;
switch (parser->state) {
case lineStart:
case trimStart:
if (c == ' ' || c == '\t') {
// rewind the cursor to trim the line start
parser->buffer.len--;
parser->buffer.cursor--;
} else if (c == '\n') {
ret = gemtextParserSend(parser, nq);
} else {
parser->state = normalState;
}
break;
case normalState:
if (c == '\n') {
ret = gemtextParserSend(parser, nq);
}
break;
default:
lineBufferReset(&parser->buffer);
lineBufferAppendCharUnchecked(&parser->buffer, c);
// Should be unreachable
ret = 1;
break;
}
parser->mode = h3Mode;
parser->state = normalState;
return ret;
}
int parseLink(gemtextParser *parser, char c, gemtextLineQueue *lq) {
// todo
return 0;
}
int parsePreformatted(gemtextParser *parser, char c, gemtextLineQueue *lq) {
// todo
return 0;
}
int parseQuote(gemtextParser *parser, char c, gemtextLineQueue *lq) {
// todo
return 0;
}
int parseH1(gemtextParser *parser, char c, gemtextLineQueue *lq) {
// todo
return 0;
}
int parseH2(gemtextParser *parser, char c, gemtextLineQueue *lq) {
// todo
return 0;
}
int parseH3(gemtextParser *parser, char c, gemtextLineQueue *lq) {
// todo
return 0;
}
int parseList(gemtextParser *parser, char c, gemtextLineQueue *lq) {
// todo
return 0;
}
int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) {
int parseNormal(gemtextParser *parser, gemtextNodeQueue *nq, char c) {
int ret;
switch (parser->state) {
@ -299,12 +511,15 @@ int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) {
parser->state = firstLinkChar;
break;
case '>':
parser->mode = quoteMode;
parser->state = lineStart;
parser->nodeType = quoteNode;
parser->state = trimStart;
lineBufferRewind(&parser->buffer);
break;
case '*':
parser->mode = listMode;
parser->state = normalState;
parser->nodeType = listNode;
parser->state = trimStart;
lineBufferRewind(&parser->buffer);
break;
case '#':
parser->state = firstHashChar;
break;
@ -312,7 +527,8 @@ int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) {
parser->state = firstBacktickChar;
break;
case '\n':
ret = gemtextParserSend(parser, normalLine, lq);
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
break;
default:
@ -321,10 +537,11 @@ int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) {
break;
case firstLinkChar:
if (c == '>') {
parser->nodeType = linkNode;
parser->state = lineStart;
parser->mode = linkMode;
} else if (c == '\n') {
ret = gemtextParserSend(parser, normalLine, lq);
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
} else {
parser->state = normalState;
@ -334,58 +551,69 @@ int parseNormal(gemtextParser *parser, char c, gemtextLineQueue *lq) {
if (c == '#') {
parser->state = secondHashChar;
} else if (c == '\n') {
ret = gemtextParserSend(parser, normalLine, lq);
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
} else {
enterH1Mode(parser, c);
switchMode(parser, h1Node, c);
}
break;
case secondHashChar:
if (c == '#') {
parser->state = thirdHashChar;
parser->nodeType = h3Node;
parser->state = trimStart;
lineBufferReset(&parser->buffer);
} else if (c == '\n') {
ret = gemtextParserSend(parser, normalLine, lq);
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
} else {
enterH2Mode(parser, c);
switchMode(parser, h2Node, c);
}
break;
case thirdHashChar:
if (c == '\n') {
ret = gemtextParserSend(parser, normalLine, lq);
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
} else {
enterH3Mode(parser, c);
switchMode(parser, h3Node, c);
}
break;
case firstBacktickChar:
if (c == '\n') {
ret = gemtextParserSend(parser, normalLine, lq);
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
} else if (c == '`') {
parser->state = secondBacktickChar;
} else {
parser->state = normalState;
parser->mode = normalMode;
parser->nodeType = normalNode;
}
break;
case secondBacktickChar:
if (c == '`') {
parser->state = thirdBacktickChar;
enterPreformattedMode(parser);
} else if (c == '\n') {
ret = gemtextParserSend(parser, normalLine, lq);
parser->nodeType = normalNode;
ret = gemtextParserSend(parser, nq);
if (ret) return ret;
} else {
parser->state = normalState;
parser->nodeType = normalNode;
}
case thirdBacktickChar:
case normalState:
break;
break;
default:
break;
}
return 0;
}
int parseGemtext(gemtextParser *parser, gemtextLineQueue *lq) {
int parseGemtext(gemtextParser *parser, gemtextNodeQueue *nq) {
char c;
int ret;
gemtextLine *line;
gemtextNode *node;
for (;;) {
ret = fread(&c, 1, 1, parser->stream);
@ -396,38 +624,47 @@ int parseGemtext(gemtextParser *parser, gemtextLineQueue *lq) {
return ret;
}
} else {
line = calloc(1, sizeof(gemtextLine));
if (line == NULL) return errno;
line->lineType = endOfStream;
line->prev = line->next = NULL;
line->str = NULL;
gemtextLineQueuePush(lq, line);
// If we were unable to read a char, assume we're at the end of the
// stream and send the node to the queue
if (parser->state != lineStart && parser->state != trimStart) {
switch (parser->nodeType) {
case preformattedNode:
ret = gemtextParserSendPreformatted(parser, nq);
break;
case linkNode:
ret = gemtextParserSendLink(parser, nq);
break;
default:
ret = gemtextParserSend(parser, nq);
break;
}
if (ret) return ret;
}
// Send an `endOfStream` node since we know we're done
node = calloc(1, sizeof(gemtextNode));
if (node == NULL) return errno;
node->nodeType = endOfStream;
node->prev = node->next = NULL;
node->str = NULL;
gemtextNodeQueuePush(nq, node);
break;
}
switch (parser->mode) {
case normalMode:
ret = parseNormal(parser, c, lq);
switch (parser->nodeType) {
case unset:
case normalNode:
ret = parseNormal(parser, nq, c);
break;
case preformattedMode:
ret = parsePreformatted(parser, c, lq);
case preformattedNode:
ret = parsePreformatted(parser, nq, c);
break;
case quoteMode:
ret = parseQuote(parser, c, lq);
case quoteNode:
ret = parseQuote(parser, nq, c);
break;
case linkMode:
ret = parseLink(parser, c, lq);
case linkNode:
ret = parseLink(parser, nq, c);
break;
case h1Mode:
ret = parseH1(parser, c, lq);
break;
case h2Mode:
ret = parseH2(parser, c, lq);
break;
case h3Mode:
ret = parseH3(parser, c, lq);
break;
case listMode:
ret = parseList(parser, c, lq);
default:
ret = parseGeneric(parser, nq, c);
break;
}
if (ret) {
@ -436,4 +673,4 @@ int parseGemtext(gemtextParser *parser, gemtextLineQueue *lq) {
}
}
return 0;
}
}

View file

@ -1,3 +1,6 @@
/** \file gemtext-parser.h
* \brief A fast Gemtext markup parser
*/
#ifndef GEMTEXT_PARSER_H
#define GEMTEXT_PARSER_H 1
@ -5,82 +8,247 @@
#include <stddef.h> // size_t
#include <stdio.h> // FILE
#define LBUF_SIZE 512
#define LBUF_SIZE 512 ///< The default size of a lineBuffer
/** An enumeration representing the state of the parsing action. These values
* are to be taken in context with the current gemtextParserMode */
typedef enum {
normalMode,
preformattedMode,
quoteMode,
linkMode,
h1Mode,
h2Mode,
h3Mode,
listMode,
} gemtextParserMode;
typedef enum {
lineStart,
lineEnd,
firstLinkChar,
firstHashChar,
secondHashChar,
thirdHashChar,
firstBacktickChar,
secondBacktickChar,
thirdBacktickChar,
normalState,
lineStart = 0, ///< The cursor is at the start of a new line
lineEnd = 1, ///< The cursor is at the end of a line
firstLinkChar = 2, ///< The first link character was the previous character
linkDisplayStart = 3, /**< The url of a link has been parsed and the cursor is at the
beginning of the display element */
linkDisplay = 4, ///< The link's display element is being parsed
firstHashChar = 5, ///< A Single '#' character has been encountered
secondHashChar = 6, ///< Two '#' characters have been encountered sequentially
thirdHashChar = 7, ///< Three '#' characters have been encountered sequentially
firstBacktickChar = 8, ///< A single '`' character has been encountered
secondBacktickChar = 9, ///< Two '`' characters have been encountered sequentially
thirdBacktickChar = 10, ///< Three '`' characters have been encountered sequentially
preformattedAlt = 11, ///< A Preformatted block's alt text is being parsed
trimStart = 12, ///< The *mode* is known and leading whitespace is being trimmed
normalState = 13, ///< The *mode* is known and normal parsing is occurring
} gemtextParserState;
/**
* An enum type representing the various line types in gemtext markup
*/
typedef enum {
normalLine,
linkLine,
listLine,
h1Line,
h2Line,
h3Line,
preformattedLine,
quoteLine,
endOfStream,
} gemtextLineType;
unset = 0, ///< The node type has not yet been set
normalNode = 1, ///< A normal text line
linkNode = 2, ///< A link line
listNode = 3, ///< A list member
h1Node = 4, ///< An H1 heading
h2Node = 5, ///< An H2 heading
h3Node = 6, ///< An H3 heading
preformattedNode = 7, ///< A preformatted text block
quoteNode = 8, ///< A Quote block
endOfStream = 9, /**< Notifies the receiver that the stream is over and no
more lines are to be expected */
} gemtextNodeType;
/**
* A growable byte array
*/
typedef struct {
size_t capacity;
size_t len;
char *cursor;
char *buf;
size_t capacity; ///< The current capacity of the internal buffer
size_t len; ///< The actual number of bytes currently in use
char *cursor; ///< A pointer to the next byte to be used in the internal buffer
char *buf; ///< A Pointer to the beginning of the internal buffer
} lineBuffer;
/**
* A Gemtext link element
*/
typedef struct {
char *url;
char *display;
char *url; ///< The url of the gemtext link
char *display; ///< Optional text to be displayed in lieu of the url
} gemtextLink;
/**
* A block of preformatted text
*/
typedef struct {
FILE *stream;
gemtextParserMode mode;
gemtextParserState state;
lineBuffer buffer;
char *linkUrl;
char *altText; /**< Some descriptive text to be read by screen readers if
this is ascii art */
char *body; ///< The body of the preformatted block
} preformattedBlock;
/**
* The main Gemtext parser
*/
typedef struct {
FILE *stream; /**< A stream of bytes to read gemtext from */
gemtextNodeType nodeType; /**< The current parsing mode */
gemtextParserState state; /**< The state of the parser within each mode */
lineBuffer buffer; /**< The internal buffer used to store bytes until
a gemtextLine is ready to be sent */
union {
char *linkUrl; /**< The url portion of a linkLine */
char *altText; /**< The alt text associated with a preformatted block */
};
} gemtextParser;
struct _gemtextLine {
struct _gemtextLine *next;
struct _gemtextLine *prev;
gemtextLineType lineType;
/** A Gemtext node */
struct _gemtextNode {
struct _gemtextNode *next; ///< The next line in the queue
struct _gemtextNode *prev; ///< The previous line in the queue
gemtextNodeType nodeType; ///< Identifies the type of line
union {
char *str;
gemtextLink *link;
char *str; ///< The text body of most line types
gemtextLink *link; ///< The body of a link line
preformattedBlock *block; ///< The body and alt text of a preformatted block
};
};
typedef struct _gemtextLine gemtextLine;
/** A Gemtext node */
typedef struct _gemtextNode gemtextNode;
/**
* A fifo queue used to pass gemtextLine elements from the worker thread to the
* rendering thread.
*/
typedef struct {
pthread_cond_t cond;
size_t count;
pthread_mutex_t mutex;
gemtextLine *head;
gemtextLine *tail;
} gemtextLineQueue;
pthread_cond_t cond; ///< Signals the rendering thread to wait for an incoming line
size_t count; ///< The number of elements currently in the queue
pthread_mutex_t mutex; ///< The lock ensuring exclusive access
gemtextNode *head; ///< The oldest line in the queue
gemtextNode *tail; ///< The newest line in the queue
} gemtextNodeQueue;
#endif
/**
* Initialize a lineBuffer struct to it's default values.
* ### Return values
* Returns 0 for success, 2 if memory allocation fails.
* \param lb A pointer to an already allocated lineBuffer
*/
int lineBufferInit(lineBuffer *lb);
/**
* Initialize a gemtextParser to it's default values.
* ### Return values
* Returns 0 upon success, 2 if memory allocation for the internal
* buffer fails.
* \param parser A pointer to an already allocated gemtextParser
* \param stream A FILE which we whose bytes will be read and parsed as gemtext lines
*/
int gemtextParserInit(gemtextParser *parser, FILE *stream);
/**
* Creates a new gemtextParser and initializes it to default values.
* If memory allocation fails a NULL pointer will be returned.
* \param stream The FILE stream which we will read and parse as gemtext lines
*/
gemtextParser* gemtextParserNew(FILE *stream);
/**
* Frees all memory associated with pointer members of this parser and closes
* the internal FILE stream.
* \param parser The gemtextParser to be finalized
*/
void gemtextParserDeinit(gemtextParser *parser);
/**
* Frees all memory associated with this gemtextParser.
* \param parser The gemtextParser to be freed
*/
void gemtextParserDestroy(gemtextParser *parser);
/**
* Initializes a gemtextNodeQueue with default values.
* ### Return values
* Returns 0 on success. If there is a failure initializing the internal
* mutex or condition variable, an error code is returned instead.
* \param nq The already allocated gemtextNodeQueue
*/
int gemtextNodeQueueInit(gemtextNodeQueue *nq);
/**
* Pushes a gemtextNode into the queue. This function will not fail, but
* can block if another thread holds the gemtextQueue's internal mutex.
* \param nq The queue which will receive the gemtext node
* \param node The gemtextNode to be queued
*/
void gemtextNodeQueuePush(gemtextNodeQueue *nq, gemtextNode *node);
/**
* Gets the oldest node inserted in the queue. This function will either
* return a valid gemtextNode or block until one becomes available.
* \param nq The queue from which we are attempting to pop a node
*/
gemtextNode* gemtextNodeQueuePop(gemtextNodeQueue *nq);
/**
* Attempts to get the oldest node inserted in the queue. If there are no nodes
* left in the queue, returns NULL.
* \param nq The queue from which we are attempting to pop a node
*/
gemtextNode* gemtextNodeQueueTryPop(gemtextNodeQueue *nq);
/**
* Frees all memory associated with a gemtextNode structure
* \param node The gemtextNode to be de-allocated
*/
void gemtextNodeDeinit(gemtextNode *node);
/**
* Extends the LineBuffer lb by len bytes.
* ### Return values
* Returns 0 upon success, or 2 if memory allocation fails.
* \param lb The buffer to be extended
* \param len The number of bytes to extend the buffer by
*/
int lineBufferExtend(lineBuffer *lb, size_t len);
/**
* Appends a character c to the lineBuffer lb. If there is no space left in the
* internal buffer, it will be re-allocated first.
* ### Return values
* Returns 0 for success, or 2 if memory allocation fails.
* \param lb The buffer we are appending to
* \param c The character to be appended to this buffer
*/
int lineBufferAppendChar(lineBuffer *lb, char c);
/**
* Appends a character c to the lineBuffer c without checking if there is space
* available first.
* > **Warning!** Due to the fact that this function is unchecked, it should
* > only be called if you are absolutely certain that there is space remaining
* > in the internal buffer, such as after calling lineBufferRewind to move the
* > cursor back by one character. Failure to follow this warning may result in
* > *buffer overflow* memory access violation.
*/
void lineBufferAppendCharUnchecked(lineBuffer *lb, char c);
/**
* Appends a string beginning at the pointer *c of len bytes to lineBuffer lb.
* ### Return values
* Returns 0 on success, or 2 for memory allocation errors.
* \param lb The buffer we are appending to
* \param c A pointer to an array of chars
* \param len The number of bytes to append from c
*/
int lineBufferAppendString(lineBuffer *lb, char *c, size_t len);
/**
* Rewinds the internal cursor pointer and count for lineBuffer lb by 1.
*/
void lineBufferRewind(lineBuffer *lb);
/**
* Resets the internal count of lineBuffer lb to 0 and moves it's cursor back
* to the start of the internal buffer.
*/
void lineBufferReset(lineBuffer *lb);
/**
* Parses gemtext into a series of nodes to be places in the gemtextNodeQueue lq.
* ### Return values
* Returns 0 on success, any other number is an error code
* \param parser A gemtextParser struct used to maintain state while parsing
* \param nq A gemtextNodeQueue which will receive gemtextLine elements as they are parsed
*/
int parseGemtext(gemtextParser *parser, gemtextNodeQueue *nq);
#endif

View file

@ -0,0 +1,71 @@
# _,.---._ .-._ .--.-. ,--.--------.
# _,..---._ ,-.' , - `. /==/ \ .-._/==/ //==/, - , -\
# /==/, - \ /==/_, , - \|==|, \/ /, |==\ -\\==\.-. - ,-./
# |==| _ _\==| .=. |==|- \| | \==\- \`--`\==\- \
# |==| .=. |==|_ : ;=: - |==| , | -| `--`-' \==\_ \
# |==|,| | -|==| , '=' |==| - _ | |==|- |
# |==| '=' /\==\ - ,_ /|==| /\ , | |==|, |
# |==|-, _`/ '.='. - .' /==/, | |- | /==/ -/
# `-.`.____.' `--`--'' `--`./ `--` `--`--`
# _ __ ,---. .-._ .=-.-. _,.----.
# .-`.' ,`..--.' \ /==/ \ .-._ /==/_ /.' .' - \
# /==/, - \==\-/\ \ |==|, \/ /, /==|, |/==/ , ,-'
# |==| _ .=. /==/-|_\ | |==|- \| ||==| ||==|- | .
# |==| , '=',\==\, - \ |==| , | -||==|- ||==|_ `-' \
# |==|- '..'/==/ - ,| |==| - _ ||==| ,||==| _ , |
# |==|, | /==/- /\ - \|==| /\ , ||==|- |\==\. /
# /==/ - | \==\ _.\=\.-'/==/, | |- |/==/. / `-.`.___.-'
# `--`---' `--` `--`./ `--``--`-`
#
# @(#)Copyright (c) 2023, Nathan D. Fisher.
#
# This is free software. It comes with NO WARRANTY.
# Permission to use, modify and distribute this source code
# is granted subject to the following conditions.
# 1/ that the above copyright notice and this notice
# are preserved in all copies and that due credit be given
# to the author.
# 2/ that any changes to this code are clearly commented
# as such so that the author does not get blamed for bugs
# other than his own.
#
include ../config.mk
CFLAGS += -I../include
LDLIBS += ../libgemtext.a
LDLIBS += $(LIBS)
tests += parse-gemtext0
tests += parse-gemtext1
total != echo $(tests) | wc -w | awk '{ print $$1 }'
.PHONY: test
test: $(tests) output
@echo -e "\n\t=== \e[0;33mRunning $(total) tests\e[0m ===\n"
@idx=1 ; success=0 ; fail=0; skip=0; for t in $(tests) ; \
do printf "[%02i/$(total)] %-25s" $${idx} $${t} ; \
idx=$$(expr $${idx} + 1) ; \
./$${t} ; \
retval=$$? ; \
if [ $${retval} -eq 0 ] ; \
then echo -e '\e[0;32mSuccess\e[0m' ; \
success=$$(expr $${success} + 1) ; \
elif [ $${retval} -eq 255 ] ; \
then echo Skipped ; \
skip=$$(expr $${skip} + 1) ; \
else echo -e '\e[0;31mFailure\e[0m' ; \
fail=$$(expr $${fail} + 1) ; \
fi ; done || true ; \
if [ $${fail} == 0 ] ; \
then echo -e '\nResults: \e[0;32mOk\e[0m.' "$${success} succeeded; $${fail} failed; $${skip} skipped" ; \
else echo -e '\nResults: \e[0;31mFAILED\e[0m.' "$${success} succeeded; $${fail} failed; $${skip} skipped" ; \
fi
output:
@ [-d $@ ] 2>/dev/null || install -d $@
.PHONY: clean
clean:
rm -rf $(tests) output/*

102
test/parse-gemtext0.c Normal file
View file

@ -0,0 +1,102 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "gemtext-parser.h"
gemtextNodeQueue lq;
gemtextParser parser;
int main() {
int ret = 0;
FILE *stream = NULL;
gemtextNode *node = NULL;
stream = fopen("test0.gmi", "r");
assert(stream != NULL);
ret = gemtextNodeQueueInit(&lq);
assert(ret == 0);
ret = gemtextParserInit(&parser, stream);
assert(ret == 0);
ret = parseGemtext(&parser, &lq);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == h1Node);
assert(memcmp(node->str, "A Test Gemtext file", 19) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == h2Node);
assert(memcmp(node->str, "Used for testing the parser in normal operation", 47) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(*node->str == '\n');
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "This is", 7) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(*node->str == '\n');
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == quoteNode);
assert(memcmp(node->str, "Walk before you run.\n- Anonymous", 32) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(*node->str == '\n');
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == h3Node);
assert(memcmp(node->str, "Let's check a list", 18) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == listNode);
assert(memcmp(node->str, "First item", 9) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == listNode);
assert(memcmp(node->str, "second item", 11) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(*node->str == '\n');
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == linkNode);
assert(memcmp(node->link->url, "gemini://example.org/test.gmi", 29) == 0);
assert(memcmp(node->link->display, "This is a link", 14) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(*node->str == '\n');
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == preformattedNode);
assert(memcmp(node->block->altText, "Test preformatted block", 23) == 0);
assert(memcmp(node->block->body, "This is a preformatted block", 28) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node != NULL);
assert(node->nodeType == endOfStream);
gemtextNodeDeinit(node);
gemtextParserDeinit(&parser);
return ret;
}

106
test/parse-gemtext1.c Normal file
View file

@ -0,0 +1,106 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "gemtext-parser.h"
gemtextNodeQueue lq;
gemtextParser parser;
char * preBlk =
"Just a regular preformatted block.\n"
"Nothing special";
int main() {
int ret = 0;
FILE *stream = NULL;
gemtextNode *node = NULL;
stream = fopen("test1.gmi", "r");
assert(stream != NULL);
ret = gemtextNodeQueueInit(&lq);
assert(ret == 0);
ret = gemtextParserInit(&parser, stream);
assert(ret == 0);
ret = parseGemtext(&parser, &lq);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType = h2Node);
assert(memcmp(node->str, "A more complicated example", 26) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == listNode);
assert(memcmp(node->str, "list item with no leading space", 30) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == listNode);
assert(memcmp(node->str, "list item with several leading spaces", 37) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == h2Node);
assert(memcmp(node->str, "After this H2, an empty quote", 29) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "\n", 1) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == h3Node);
assert(memcmp(node->str, "Now we'll", 9) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "``", 2) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "=", 1) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "And maybe", 9) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == linkNode);
assert(memcmp(node->link->url, "spartan://example.org", 21) == 0);
assert(node->link->display == NULL);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "Let's enter", 11) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == preformattedNode);
assert(node->block->altText == NULL);
assert(memcmp(node->block->body, preBlk, 50) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == normalNode);
assert(memcmp(node->str, "And we'll finish", 16) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node->nodeType == linkNode);
assert(node->link->display == NULL);
assert(memcmp(node->link->url, "finger://example.org/joe", 24) == 0);
gemtextNodeDeinit(node);
node = gemtextNodeQueueTryPop(&lq);
assert(node != NULL);
assert(node->nodeType == endOfStream);
gemtextNodeDeinit(node);
return ret;
}

19
test/test0.gmi Normal file
View file

@ -0,0 +1,19 @@
# A Test Gemtext file
## Used for testing the parser in normal operation
This is a simple gemtext file used for testing the gemtext parser. There's nothing particularly special about it and we're not using it to test any edge cases. For now, we are only concerned with how the parser handles well-formatted gemtext such as this.
> Walk before you run.
> - Anonymous
### Let's check a list
* First item
* second item
=> gemini://example.org/test.gmi This is a link
``` Test preformatted block
This is a preformatted block.
Everything in this block should appear exactly as entered in a Monospace font,
with no styling applied.
```

17
test/test1.gmi Normal file
View file

@ -0,0 +1,17 @@
# A more complicated example
*list item with no leading space
* list item with several leading spaces
##After this H2, an empty quote
>
### Now we'll mess with incomplete linetype signifiers
``
=
And maybe a link with no display element..
=> spartan://example.org/
Let's enter preformatted for reals this time.
```
Just a regular preformatted block.
Nothing special
```
And we'll finish with a link.
=> finger://example.org/joe