commit e46d8a4d383015f61d2b763986e24063b55d243d
parent e49a67ae9a3d0c4166ad8475a9b30d2328c3944c
Author: Nathaniel Chappelle <nathaniel@chappelle.dev>
Date: Thu, 29 Jan 2026 00:23:49 -0800
I was onto something good but broke the json parser
Diffstat:
| M | Makefile | | | 10 | ++++++++-- |
| A | headers.c | | | 127 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | headers.h | | | 7 | +++++++ |
| M | json.c | | | 41 | ++++++++++++++++++++++++++++++++++++++++- |
| M | json.h | | | 3 | +++ |
| M | stamail.c | | | 47 | +++++++++++++++++++++++++++++++++++++++++++++++ |
| A | thread.c | | | 215 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | thread.h | | | 7 | +++++++ |
8 files changed, 454 insertions(+), 3 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
CC = cc
CFLAGS = -std=c99 -Wall -Wextra -O2
-OBJ = stamail.o json.o
+OBJ = stamail.o json.o thread.o headers.o
BIN = stamail
all: $(BIN)
@@ -14,11 +14,17 @@ stamail.o: stamail.c json.h
json.o: json.c json.h jsmn.h
$(CC) $(CFLAGS) -c json.c
+thread.o: thread.c thread.h
+ $(CC) $(CFLAGS) -c thread.c
+
+headers.o: headers.c headers.h
+ $(CC) $(CFLAGS) -c headers.c
+
clean:
rm -rf $(OBJ) $(BIN) ./output/
messages:
- notmuch show --format=json '*' | ./stamail
+ cat ./test/test.json | ./stamail
.PHONY: all clean
diff --git a/headers.c b/headers.c
@@ -0,0 +1,127 @@
+/* headers.c - Read email headers from maildir files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <ctype.h>
+#include "stamail.h"
+
+/* Read a header value from file */
+static char *read_header(FILE *fp, const char *header_name, int *len) {
+ static char line[4096];
+ static char value[4096];
+ int value_len = 0;
+ size_t header_len = strlen(header_name);
+ int found = 0;
+
+ rewind(fp);
+
+ while (fgets(line, sizeof(line), fp)) {
+ /* End of headers (blank line) */
+ if (line[0] == '\n' || line[0] == '\r')
+ break;
+
+ /* Check if this line starts with our header */
+ if (!found && strncasecmp(line, header_name, header_len) == 0 &&
+ line[header_len] == ':') {
+ found = 1;
+ /* Skip header name and colon, skip whitespace */
+ char *p = line + header_len + 1;
+ while (*p && isspace(*p)) p++;
+
+ /* Copy value, removing newline */
+ while (*p && *p != '\n' && *p != '\r' &&
+ value_len < (int)sizeof(value) - 1) {
+ value[value_len++] = *p++;
+ }
+ continue;
+ }
+
+ /* Handle continuation lines (start with whitespace) */
+ if (found && (line[0] == ' ' || line[0] == '\t')) {
+ char *p = line;
+ while (*p && isspace(*p)) p++;
+
+ /* Add space before continuation */
+ if (value_len > 0 && value_len < (int)sizeof(value) - 1)
+ value[value_len++] = ' ';
+
+ while (*p && *p != '\n' && *p != '\r' &&
+ value_len < (int)sizeof(value) - 1) {
+ value[value_len++] = *p++;
+ }
+ continue;
+ }
+
+ /* If we found our header and this isn't a continuation, we're done */
+ if (found)
+ break;
+ }
+
+ if (!found || value_len == 0)
+ return NULL;
+
+ value[value_len] = '\0';
+ *len = value_len;
+
+ /* Allocate and return a copy */
+ char *result = malloc(value_len + 1);
+ if (result) {
+ memcpy(result, value, value_len + 1);
+ }
+ return result;
+}
+
+/* Extract In-Reply-To from maildir file */
+int extract_threading_headers(struct message_node *node) {
+ if (!node->m.filename || node->m.filename_len == 0)
+ return -1;
+
+ /* Build null-terminated filename */
+ char path[2048];
+ if (node->m.filename_len >= (int)sizeof(path))
+ return -1;
+
+ memcpy(path, node->m.filename, node->m.filename_len);
+ path[node->m.filename_len] = '\0';
+
+ FILE *fp = fopen(path, "r");
+ if (!fp) {
+ perror(path);
+ return -1;
+ }
+
+ /* Read In-Reply-To header */
+ int len;
+ char *in_reply_to = read_header(fp, "In-Reply-To", &len);
+ if (in_reply_to) {
+ /* Strip < > brackets */
+ char *start = in_reply_to;
+ int new_len = len;
+
+ if (len > 2 && start[0] == '<' && start[len-1] == '>') {
+ start++;
+ new_len -= 2;
+ }
+
+ /* Store in message node (allocate permanent copy) */
+ node->m.in_reply_to = malloc(new_len + 1);
+ if (node->m.in_reply_to) {
+ memcpy((char *)node->m.in_reply_to, start, new_len);
+ ((char *)node->m.in_reply_to)[new_len] = '\0';
+ node->m.in_reply_to_len = new_len;
+ }
+
+ free(in_reply_to);
+ }
+
+ /* Read References header */
+ char *references = read_header(fp, "References", &len);
+ if (references) {
+ node->m.references = references; /* Keep the allocation */
+ node->m.references_len = len;
+ }
+
+ fclose(fp);
+ return 0;
+}
diff --git a/headers.h b/headers.h
@@ -0,0 +1,7 @@
+#ifndef HEADERS_H
+#define HEADERS_H
+
+static char *read_header(FILE *fp, const char *header_name, int *len);
+int extract_threading_headers(struct message_node *node);
+
+#endif
diff --git a/json.c b/json.c
@@ -18,7 +18,11 @@ static void scan(const char *js, jsmntok_t *t, int *i, int ntok,
(*i)++;
const char *id = NULL, *from = NULL, *subject = NULL, *date = NULL, *body = NULL;
+ const char *in_reply_to = NULL, *references = NULL;
+ const char *filename = NULL;
int id_len=0, from_len=0, subject_len=0, date_len=0, body_len=0;
+ int in_reply_to_len=0, references_len=0;
+ int filename_len=0;
for (int k = 0; k < n; k++) {
jsmntok_t *key = &t[*i]; (*i)++;
@@ -30,6 +34,24 @@ static void scan(const char *js, jsmntok_t *t, int *i, int ntok,
id_len = val->end - val->start;
(*i)++;
}
+ /* filename for irt and references */
+ else if (tok_streq(js, key, "filename") && val->type == JSMN_ARRAY) {
+ /* filename is an array, get first element */
+ if (val->size > 0) {
+ (*i)++;
+ jsmntok_t *fname = &t[*i];
+ if (fname->type == JSMN_STRING) {
+ filename = js + fname->start;
+ filename_len = fname->end - fname->start;
+ }
+ /* Skip rest of array */
+ for (int f = 1; f < val->size; f++) {
+ (*i)++;
+ }
+ } else {
+ (*i)++;
+ }
+ }
/* headers */
else if (tok_streq(js, key, "headers") && val->type == JSMN_OBJECT) {
int hn = val->size;
@@ -48,6 +70,20 @@ static void scan(const char *js, jsmntok_t *t, int *i, int ntok,
date = js + hval->start;
date_len = hval->end - hval->start;
}
+ else if (tok_streq(js, hkey, "In-reply-to")) {
+ in_reply_to = js + hval->start;
+ in_reply_to_len = hval->end - hval->start;
+ /* Strip < and > brackets if present */
+ if (in_reply_to_len > 2 &&
+ in_reply_to[0] == '<' &&
+ in_reply_to[in_reply_to_len - 1] == '>') {
+ in_reply_to++;
+ in_reply_to_len -= 2;
+ }
+ } else if (tok_streq(js, hkey, "References")) {
+ references = js + hval->start;
+ references_len = hval->end - hval->start;
+ }
(*i)++;
}
}
@@ -101,7 +137,10 @@ static void scan(const char *js, jsmntok_t *t, int *i, int ntok,
.from = from, .from_len = from_len,
.subject = subject, .subject_len = subject_len,
.date = date, .date_len = date_len,
- .body = body, .body_len = body_len
+ .body = body, .body_len = body_len,
+ .filename = filename, .filename_len = filename_len,
+ .in_reply_to = NULL, .in_reply_to_len = 0,
+ .references = NULL, .references_len = 0
};
cb(&m, ud);
// printf("Message-ID: %.*s\n", id_len, id);
diff --git a/json.h b/json.h
@@ -16,6 +16,9 @@ struct message {
const char *references;
int references_len;
+ const char *filename;
+ int filename_len;
+
time_t timestamp;
};
diff --git a/stamail.c b/stamail.c
@@ -5,8 +5,11 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
+
#include "json.h"
#include "stamail.h"
+#include "thread.h"
+#include "headers.h"
/* Callback handler */
/*
@@ -28,6 +31,7 @@ static struct message_node *head = NULL;
static struct message_node *tail = NULL;
static size_t message_count = 0;
+
/* Current callback handler */
static void collect_message(struct message *m, void *ud) {
(void)ud;
@@ -313,6 +317,19 @@ int main(int argc, char *argv[]) {
parse_mail_json(buf, len, collect_message, NULL);
+ /* Debug: print what we got from JSON */
+ for (struct message_node *n = head; n; n = n->next) {
+ struct message *m = &n->m;
+ printf("MSG: %.*s\n", m->subject_len, m->subject);
+ printf(" ID: %.*s\n", m->id_len, m->id);
+ if (m->in_reply_to_len > 0) {
+ printf(" IRT: %.*s\n", m->in_reply_to_len, m->in_reply_to);
+ } else {
+ printf(" IRT: (none)\n");
+ }
+ printf("\n");
+ }
+
for (struct message_node *n = head; n; n = n->next) {
struct message *m = &n->m;
@@ -331,6 +348,25 @@ int main(int argc, char *argv[]) {
free(buf);
return 1;
}
+
+ /* Extract threading headers from maildir files */
+ fprintf(stderr, "Reading threading headers from %zu messages...\n", message_count);
+ for (struct message_node *n = head; n; n = n->next) {
+ extract_threading_headers(n);
+ }
+
+ /* Debug: print what we got */
+ for (struct message_node *n = head; n; n = n->next) {
+ struct message *m = &n->m;
+ printf("MSG: %.*s\n", m->subject_len, m->subject);
+ printf(" ID: %.*s\n", m->id_len, m->id);
+ if (m->in_reply_to_len > 0) {
+ printf(" IRT: %.*s\n", m->in_reply_to_len, m->in_reply_to);
+ } else {
+ printf(" IRT: (none)\n");
+ }
+ printf("\n");
+ }
/* Create output directory */
if (ensure_dir(output_dir) != 0) {
@@ -363,6 +399,17 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "Done! Output written to %s/\n", output_dir);
+ /* Build thread tree */
+ int num_threads = 0;
+ struct thread_node **threads = build_threads(head, &num_threads);
+
+ if (threads) {
+ printf("Debug print of threading");
+ print_threads(threads, num_threads);
+ }
+
+
+
free(buf);
struct message_node *n = head;
while (n) {
diff --git a/thread.c b/thread.c
@@ -0,0 +1,215 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <ctype.h>
+
+#include "stamail.h"
+
+#define HASH_SIZE 1024
+
+struct msg_entry {
+ const char *msgid;
+ int msgid_len;
+ struct thread_node *node;
+ struct msg_entry *next;
+};
+
+static struct msg_entry *msg_table[HASH_SIZE];
+
+/* Simple hash function */
+static unsigned int hash_msgid(const char *msgid, int len) {
+ unsigned int hash = 5381;
+ for (int i = 0; i < len; i++)
+ hash = ((hash << 5) + hash) + (unsigned char)msgid[i];
+ return hash % HASH_SIZE;
+}
+
+/* Add message to hash table */
+static void hash_add(const char *msgid, int msgid_len, struct thread_node *node) {
+ unsigned int h = hash_msgid(msgid, msgid_len);
+ struct msg_entry *e = malloc(sizeof(*e));
+ if (!e) return;
+
+ e->msgid = msgid;
+ e->msgid_len = msgid_len;
+ e->node = node;
+ e->next = msg_table[h];
+ msg_table[h] = e;
+}
+
+/* Look up message by Message-ID */
+static struct thread_node * hash_find(const char *msgid, int msgid_len) {
+ unsigned int h = hash_msgid(msgid, msgid_len);
+
+ for (struct msg_entry *e = msg_table[h]; e; e = e->next) {
+ if (e->msgid_len == msgid_len &&
+ memcmp(e->msgid, msgid, msgid_len) == 0) {
+ return e->node;
+ }
+ }
+ return NULL;
+}
+
+/* Clean up hash table */
+static void hash_free(void) {
+ for (int i = 0; i < HASH_SIZE; i++) {
+ struct msg_entry *e = msg_table[i];
+ while (e) {
+ struct msg_entry *next = e->next;
+ free(e);
+ e = next;
+ }
+ msg_table[i] = NULL;
+ }
+}
+
+/* Create a new thread node */
+static struct thread_node *thread_node_new(struct message *msg) {
+ struct thread_node *n = calloc(1, sizeof(*n));
+ if (!n) return NULL;
+
+ n->msg = msg;
+ return n;
+}
+
+/* Add child to parent node */
+static void add_child(struct thread_node *parent, struct thread_node *child) {
+ if (!parent || !child) return;
+
+ child->parent = parent;
+ child->sibling = parent->child;
+ parent->child = child;
+ parent->num_children++;
+
+ /* Set depth */
+ child->depth = parent->depth + 1;
+}
+
+#if 0
+/* Normalize subject (remove Re:, Fwd:, [list], etc.) */
+static char * normalize_subject(const char *subject, int len) {
+ static char buf[512];
+ const char *s = subject;
+ const char *end = subject + len;
+
+ while (s < end) {
+ while (s < end && isspace(*s)) s++;
+
+ if (end - s >= 3 && strncasecmp(s, "re:", 3) == 0) {
+ s += 3;
+ continue;
+ }
+
+ if (end - s >= 4 && strncasecmp(s, "fwd:", 4) == 0) {
+ s += 4;
+ continue;
+ }
+
+ /* Look for [list-name] */
+ if (s < end && *s == '[') {
+ while (s < end && *s != ']') s++;
+ if (s < end) s++; /* skip ] */
+ continue;
+ }
+
+ break;
+ }
+
+ while (s < end && isspace(*s)) s++;
+
+ int out = 0;
+ while (s < end && out < (int)sizeof(buf) - 1) {
+ buf[out++] = *s++;
+ }
+ buf[out] = '\0';
+
+ return buf;
+}
+#endif
+
+/* Build thread tree from message list */
+struct thread_node **build_threads(struct message_node *messages, int *num_threads) {
+ struct thread_node **roots = NULL;
+ int root_count = 0;
+ int root_capacity = 64;
+
+ roots = malloc(root_capacity * sizeof(struct thread_node *));
+ if (!roots) return NULL;
+
+ /* Create thread nodes for all messages */
+ for (struct message_node *mn = messages; mn; mn = mn->next) {
+ struct thread_node *node = thread_node_new(&mn->m);
+ if (!node) continue;
+
+ hash_add(mn->m.id, mn->m.id_len, node);
+ }
+
+ /* Build parent-child relationships */
+ for (struct message_node *mn = messages; mn; mn = mn->next) {
+ struct thread_node *node = hash_find(mn->m.id, mn->m.id_len);
+ if (!node) continue;
+
+ /* Look for parent via In-Reply-To */
+ if (mn->m.in_reply_to && mn->m.in_reply_to_len > 0) {
+ struct thread_node *parent = hash_find(mn->m.in_reply_to,
+ mn->m.in_reply_to_len);
+ if (parent) {
+ add_child(parent, node);
+ continue;
+ }
+ }
+
+ /* TODO: Parse References header for more complex threading */
+
+ /* No parent found - this is a root */
+ if (!node->parent) {
+ if (root_count >= root_capacity) {
+ root_capacity *= 2;
+ roots = realloc(roots, root_capacity * sizeof(struct thread_node *));
+ }
+ roots[root_count++] = node;
+ }
+ }
+
+ *num_threads = root_count;
+ hash_free();
+ return roots;
+}
+
+/* Print thread tree for debugging (TODO Remove when tested) */
+static void print_thread_node(struct thread_node *node, const char *prefix, int is_last) {
+ if (!node) return;
+
+ printf("%s", prefix);
+ printf("%s", is_last ? "└─ " : "├─ ");
+
+ if (node->msg) {
+ printf("%.*s (depth=%d)\n",
+ node->msg->subject_len, node->msg->subject,
+ node->depth);
+ } else {
+ printf("(placeholder)\n");
+ }
+
+ char new_prefix[1024];
+ snprintf(new_prefix, sizeof(new_prefix), "%s%s",
+ prefix, is_last ? " " : "│ ");
+
+ struct thread_node *child = node->child;
+ while (child) {
+ int is_last_child = (child->sibling == NULL);
+ print_thread_node(child, new_prefix, is_last_child);
+ child = child->sibling;
+ }
+}
+
+void print_threads(struct thread_node **roots, int num_threads) {
+ printf("\n=== Thread Structure ===\n\n");
+
+ for (int i = 0; i < num_threads; i++) {
+ printf("Thread %d:\n", i + 1);
+ print_thread_node(roots[i], "", 1);
+ printf("\n");
+ }
+}
diff --git a/thread.h b/thread.h
@@ -0,0 +1,7 @@
+#ifndef THREAD_H
+#define THREAD_H
+
+struct thread_node **build_threads(struct message_node *messages, int *num_threads);
+void print_threads(struct thread_node **roots, int num_threads);
+
+#endif