stamail

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit bb57ecf91f513883ed9b761bc1e4853ba1b3565b
parent 1794f2f4ab1d9691963832b9fee1ea959f3ad632
Author: Nathaniel Chappelle <nathaniel@chappelle.dev>
Date:   Thu, 29 Jan 2026 15:40:55 -0800

Threading works! ghost messages gone too

Diffstat:
Amb2md.pl | 1856+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mstamail.c | 26++++++++++++++++++++++++--
Mstamail.h | 2+-
3 files changed, 1881 insertions(+), 3 deletions(-)

diff --git a/mb2md.pl b/mb2md.pl @@ -0,0 +1,1856 @@ +#!/usr/bin/perl -w +# +# $Id: mb2md.pl,v 1.26 2004/03/28 00:09:46 juri Exp $ +# +# mb2md-3.20.pl Converts Mbox mailboxes to Maildir format. +# + +# !! This is a version modified for Dovecot. Use Dovecot mailing list +# !! <dovecot@dovecot.org> for questions, patches, etc. You don't have to be +# !! subscribed to send mail there. Do not send mail directly to people +# !! listed below. + +# Public domain. +# +# currently maintained by: +# Juri Haberland <juri@koschikode.com> +# initially wrote by: +# Robin Whittle +# +# This script's web abode is http://batleth.sapienti-sat.org/projects/mb2md/ . +# For a changelog see http://batleth.sapienti-sat.org/projects/mb2md/changelog.txt +# +# The Mbox -> Maildir inner loop is based on qmail's script mbox2maildir, which +# was kludged by Ivan Kohler in 1997 from convertandcreate (public domain) +# by Russel Nelson. Both these convert a single mailspool file. +# +# The qmail distribution has a maildir2mbox.c program. +# +# What is does: +# ============= +# +# Reads a directory full of Mbox format mailboxes and creates a set of +# Maildir format mailboxes. Some details of this are to suit Courier +# IMAP's naming conventions for Maildir mailboxes. +# +# http://www.inter7.com/courierimap/ +# +# This is intended to automate the conversion of the old +# /var/spool/mail/blah file - with one call of this script - and to +# convert one or more mailboxes in a specifed directory with separate +# calls with other command line arguments. +# +# Run this as the user - in these examples "blah". + +# This version supports conversion of: +# +# Date The date-time in the "From " line of the message in the +# Mbox format is the date when the message was *received*. +# This is transformed into the date-time of the file which +# contains the message in the Maildir mailbox. +# +# This relies on the Date::Parse perl module and the utime +# perl function. +# +# The script tries to cope with errant forms of the +# Mbox "From " line which it may encounter, but if +# there is something really screwy in a From line, +# then perhaps the script will fail when "touch" +# is given an invalid date. Please report the +# exact nature of any such "From " line! +# +# +# Flagged +# Replied +# Read = Seen +# Tagged for Deletion +# +# In the Mbox message, flags for these are found in the +# "Status: N" or "X-Status: N" headers, where "N" is 0 +# or more of the following characters in the left column. +# +# They are converted to characters in the right column, +# which become the last characters of the file name, +# following the ":2," which indicates IMAP message status. +# +# +# F -> F Flagged +# A -> R Replied +# R -> S Read = Seen +# D -> T Tagged for Deletion (Trash) +# +# This is based on the work of Philip Mak who wrote a +# completely separate Mbox -> Maildir converter called +# perfect_maildir and posted it to the Mutt-users mailing +# list on 25 December 2001: +# +# http://www.mail-archive.com/mutt-users@mutt.org/msg21872.html +# +# Michael Best originally integrated those changes into mb2md. +# +# UIDs (Dovecot and Courier) +# Using the -U or -u options will cause this program to maintain +# UIDVALIDITY and UIDLAST for folders and UIDs for individual +# messages. The X-IMAP:, X-IMAPbase:, and X-UID: headers are +# examined and appropriate files generated for Dovecot or Courier +# in the destination Maildir to ensure these values are all kept. +# +# UID support added by Julian Fitzell <jfitzell@gmail.com> June, 2008 +# +# Message Keywords (Dovecot only) +# Using the -K option will cause this program to maintain message +# keywords (also known by other names such as tags). This is +# currently only supported for Dovecot and involves looking at +# the X-IMAP:, X-IMAPbase:, and X-Keywords: headers. The keywords +# are written to a file in the Maildir which maps them to flags. +# The flags are then appended the message filenames. +# +# Keyword support added by Julian Fitzell <jfitzell@gmail.com> June, 2008 +# +# In addition, the names of the message files in the Maildir are of a +# regular length and are of the form: +# +# 7654321.000123.mbox:2,xxx +# +# Where "7654321" is the Unix time in seconds when the script was +# run and "000123" is the six zeroes padded message number as +# messages are converted from the Mbox file. "xxx" represents zero or +# more of the above flags F, R, S or T. +# +# Message Size Tags +# +# Additionally, there is optional support for including ,S= and ,W= tags +# before the colon. These message names are still valid Maildir filenames +# and the tags are used by mail programs to speed up calculation of quotas +# and the return of message sizes to IMAP clients. ,S= is part of the +# Maildir++ standard. +# (See: http://www.inter7.com/courierimap/README.maildirquota.html ) +# As far as I can tell, ,W= is probably only used by Dovecot. +# (See: http://wiki.dovecot.org/MailboxFormat/Maildir ) +# +# Size Tags added by Julian Fitzell <jfitzell@gmail.com> June, 2008 +# +# --------------------------------------------------------------------- +# +# +# USAGE +# ===== +# +# Run this as the user of the mailboxes, not as root. +# +# +# mb2md -h +# mb2md [-c] [-K] [-U|-u] [-S] [-W] -m [-d destdir] +# mb2md [-c] [-K] [-U|-u] [-S] [-W] -s sourcefile [-d destdir] +# mb2md [-c] [-K] [-U|-u] [-S] [-W] -s sourcedir [-l wu-mailboxlist] [-R|-f somefolder] [-d destdir] [-r strip_extension] +# +# -c use the Content-Length: headers (if present) to find the +# beginning of the next message +# Use with caution! Results may be unreliable. I recommend to do +# a run without "-c" first and only use it if you are certain, +# that the mbox in question really needs the "-c" option +# +# -K Preserve message keywords in a Dovecot-compatible way. This +# looks for X-Keywords: tags and X-IMAP: and X-IMAPbase: tags +# to determine keywords for messages and creates a Dovecot- +# compatible "dovecot-keywords" file in "destdir" +# NOTE: NO LOCKING IS DONE AND THE FILE MUST NOT ALREADY EXIST. +# IF YOU USE THIS OPTION ON A MAILDIR THAT MAY BE ACCESSED BY +# ANOTHER PROGRAM AT THE SAME TIME, STRANGE THINGS MAY HAPPEN. +# +# -U Preserve message UIDs in a Dovecot-compatible way +# Looks for X-UID:, X-IMAP:, and X-IMAPbase: headers and +# creates a Dovecot-compatible dovecot-uidlist file in +# "destdir" +# NOTE: NO LOCKING IS DONE AND THE FILE MUST NOT ALREADY EXIST. +# IF YOU USE THIS OPTION ON A MAILDIR THAT MAY BE ACCESSED BY +# ANOTHER PROGRAM AT THE SAME TIME, STRANGE THINGS MAY HAPPEN. +# +# -u Same as -U above, except creates a Courier IMAP-compatible +# courierimapuiddb file instead. The only difference according +# to http://wiki.dovecot.org/MailboxFormat/Maildir is that +# Courier IMAP only stores the maildir file's basename +# (everything before the colon) +# NOTE: NO LOCKING IS DONE AND THE FILE MUST NOT ALREADY EXIST. +# IF YOU USE THIS OPTION ON A MAILDIR THAT MAY BE ACCESSED BY +# ANOTHER PROGRAM AT THE SAME TIME, STRANGE THINGS MAY HAPPEN. +# +# -S Add Maildir++ standard ,S= tag to the message filenames +# indicating the size of the message on disk. This can be used +# by Courier and Dovecot in calculating quotas. +# I think Dovecot always uses this but not sure about Courier. +# For Exim, see the quota_size_regex and maildir_tag config +# statements. +# +# -W Add ,W= tag to the message filename indicating the RFC822.SIZE +# of the message. This is the size of the message when actually +# sent to an IMAP client with LF characters converted to CRLF +# pairs as per the spec. Dovecot uses this to speed up returning +# these sizes. Not sure if any other applications use it. +# +# -m If this is used then the source will +# be the single mailbox at /var/spool/mail/blah for +# user blah and the destination mailbox will be the +# "destdir" mailbox itself. +# +# +# -s source Directory or file relative to the user's home directory, +# which is where the the "somefolders" directories are located. +# Or if starting with a "/" it is taken as a +# absolute path, e.g. /mnt/oldmail/user +# +# or +# +# A single mbox file which will be converted to +# the destdir. +# +# -R If defined, do not skip directories found in a mailbox +# directory, but runs recursively into each of them, +# creating all wanted folders in Maildir. +# Incompatible with '-f' +# +# -f somefolder Directories, relative to "sourcedir" where the Mbox files +# are. All mailboxes in the "sourcedir" +# directory will be converted and placed in the +# "destdir" directory. (Typically the Inbox directory +# which in this instance is also functioning as a +# folder for other mailboxes.) +# +# The "somefolder" directory +# name will be encoded into the new mailboxes' names. +# See the examples below. +# +# This does not save an UW IMAP dummy message file +# at the start of the Mbox file. Small changes +# in the code could adapt it for looking for +# other distinctive patterns of dummy messages too. +# +# Don't let the source directory you give as "somefolders" +# contain any "."s in its name, unless you want to +# create subfolders from the IMAP user's point of +# view. See the example below. +# +# Incompatible with '-f' +# +# +# -d destdir Directory where the Maildir format directories will be created. +# If not given, then the destination will be ~/Maildir . +# Typically, this is what the IMAP server sees as the +# Inbox and the folder for all user mailboxes. +# If this begins with a '/' the path is considered to be +# absolute, otherwise it is relative to the users +# home directory. +# +# -r strip_ext If defined this extension will be stripped from +# the original mailbox file name before creating +# the corresponding maildir. The extension must be +# given without the leading dot ("."). See the example below. +# +# -l WU-file File containing the list of subscribed folders. If +# migrating from WU-IMAP the list of subscribed folders will +# be found in the file called .mailboxlist in the users +# home directory. This will convert all subscribed folders +# for a single user: +# /bin/mb2md -s mail -l .mailboxlist -R -d Maildir +# and for all users in a directory as root you can do the +# following: +# for i in *; do echo $i;su - $i -c "/bin/mb2md -s mail -l .mailboxlist -R -d Maildir";done +# +# +# Example +# ======= +# +# We have a bunch of directories of Mbox mailboxes located at +# /home/blah/oldmail/ +# +# /home/blah/oldmail/fffff +# /home/blah/oldmail/ggggg +# /home/blah/oldmail/xxx/aaaa +# /home/blah/oldmail/xxx/bbbb +# /home/blah/oldmail/xxx/cccc +# /home/blah/oldmail/xxx/dddd +# /home/blah/oldmail/yyyy/huey +# /home/blah/oldmail/yyyy/duey +# /home/blah/oldmail/yyyy/louie +# +# With the UW IMAP server, fffff and ggggg would have appeared in the root +# of this mail server, along with the Inbox. aaaa, bbbb etc, would have +# appeared in a folder called xxx from that root, and xxx was just a folder +# not a mailbox for storing messages. +# +# We also have the mailspool Inbox at: +# +# /var/spool/mail/blah +# +# +# To convert these, as user blah, we give the first command: +# +# mb2md -m +# +# The main Maildir directory will be created if it does not exist. +# (This is true of any argument options, not just "-m".) +# +# /home/blah/Maildir/ +# +# It has the following subdirectories: +# +# /home/blah/Maildir/tmp/ +# /home/blah/Maildir/new/ +# /home/blah/Maildir/cur/ +# +# Then /var/spool/blah file is read, split into individual files and +# written into /home/blah/Maildir/cur/ . +# +# Now we give the second command: +# +# mb2md -s oldmail -R +# +# This reads recursively all Mbox mailboxes and creates: +# +# /home/blah/Maildir/.fffff/ +# /home/blah/Maildir/.ggggg/ +# /home/blah/Maildir/.xxx/ +# /home/blah/Maildir/.xxx.aaaa/ +# /home/blah/Maildir/.xxx.bbbb/ +# /home/blah/Maildir/.xxx.cccc/ +# /home/blah/Maildir/.xxx.aaaa/ +# /home/blah/Maildir/.yyyy/ +# /home/blah/Maildir/.yyyy.huey/ +# /home/blah/Maildir/.yyyy.duey/ +# /home/blah/Maildir/.yyyy.louie/ +# +# The result, from the IMAP client's point of view is: +# +# Inbox ----------------- +# | +# | fffff ----------- +# | ggggg ----------- +# | +# - xxx ------------- +# | | aaaa -------- +# | | bbbb -------- +# | | cccc -------- +# | | dddd -------- +# | +# - yyyy ------------ +# | huey ------- +# | duey ------- +# | louie ------ +# +# Note that although ~/Maildir/.xxx/ and ~/Maildir/.yyyy may appear +# as folders to the IMAP client the above commands to not generate +# any Maildir folders of these names. These are simply elements +# of the names of other Maildir directories. (if you used '-R', they +# whill be able to act as normal folders, containing messages AND folders) +# +# With a separate run of this script, using just the "-s" option +# without "-f" nor "-R", it would be possible to create mailboxes which +# appear at the same location as far as the IMAP client is +# concerned. By having Mbox mailboxes in some directory: +# ~/oldmail/nnn/ of the form: +# +# /home/blah/oldmail/nn/xxxx +# /home/blah/oldmail/nn/yyyyy +# +# then the command: +# +# mb2md -s oldmail/nn +# +# will create two new Maildirs: +# +# /home/blah/Maildir/.xxx/ +# /home/blah/Maildir/.yyyy/ +# +# Then what used to be the xxx and yyyy folders now function as +# mailboxes too. Netscape 4.77 needed to be put to sleep and given ECT +# to recognise this - deleting the contents of (Win2k example): +# +# C:\Program Files\Netscape\Users\uu\ImapMail\aaa.bbb.ccc\ +# +# where "uu" is the user and "aaa.bbb.ccc" is the IMAP server +# +# I often find that deleting all this directory's contents, except +# "rules.dat", forces Netscape back to reality after its IMAP innards +# have become twisted. Then maybe use File > Subscribe - but this +# seems incapable of subscribing to folders. +# +# For Outlook Express, select the mail server, then click the +# "IMAP Folders" button and use "Reset list". In the "All" +# window, select the mailboxes you want to see in normal +# usage. +# +# +# This script did not recurse subdirectories or delete old mailboxes, before addition of the '-R' parameter :) +# +# Be sure not to be accessing the Mbox mailboxes while running this +# script. It does not attempt to lock them. Likewise, don't run two +# copies of this script either. +# +# +# Trickier usage . . . +# ==================== +# +# If you have a bunch of mailboxes in a directory ~/oldmail/doors/ +# and you want them to appear in folders such as: +# +# ~/Maildir/.music.bands.doors.Jim +# ~/Maildir/.music.bands.doors.John +# +# etc. so they appear in an IMAP folder: +# +# Inbox ----------------- +# | music +# | bands +# | doors +# | Jim +# | John +# | Robbie +# | Ray +# +# Then you could rename the source directory to: +# +# ~/oldmail/music.bands.doors/ +# +# then use: +# +# mb2md -s oldmail -f music.bands.doors +# +# +# Or simply use '-R' switch with: +# mb2md -s oldmail -R +# +# +# Stripping mailbox extensions: +# ============================= +# +# If you want to convert mailboxes that came for example from +# a Windows box than you might want to strip the extension of +# the mailbox name so that it won't create a subfolder in your +# mail clients view. +# +# Example: +# You have several mailboxes named Trash.mbx, Sent.mbx, Drafts.mbx +# If you don't strip the extension "mbx" you will get the following +# hierarchy: +# +# Inbox +# | +# - Trash +# | | mbx +# | +# - Sent +# | | mbx +# | +# - Drafts +# | mbx +# +# This is more than ugly! +# Just use: +# mb2md -s oldmail -r mbx +# +# Note: don't specify the dot! It will be stripped off +# automagically ;) +# +#------------------------------------------------------------------------------ + + +use strict; +use Getopt::Std; +use Date::Parse; +use IO::Handle; +use Fcntl; + + # print the usage message +sub usage() { + print "Usage:\n"; + print " mb2md -h\n"; + print " mb2md [-c] [-K] [-U|-u] [-S] [-W] -m [-d destdir]\n"; + print " mb2md [-c] [-K] [-U|-u] [-S] [-W] -s sourcefile [-d destdir]\n"; + die " mb2md [-c] [-K] [-U|-u] [-S] [-W] -s sourcedir [-l wu-mailboxlist] [-R|-f somefolder] [-d destdir] [-r strip_extension]\n"; +} + # get options +my %opts; +getopts('d:f:chms:r:l:RUuKSW', \%opts) || usage(); +usage() if ( defined($opts{h}) + || (!defined($opts{m}) && !defined($opts{s})) ); + +# Get uid, username and home dir +my ($name, $passwd, $uid, $gid, $quota, $comment, $gcos, $homedir, $shell) = getpwuid($<); + +# Get arguments and determine source +# and target directories. +my $mbroot = undef; # this is the base directory for the mboxes +my $mbdir = undef; # this is an mbox dir relative to the $mbroot +my $mbfile = undef; # this is an mbox file +my $dest = undef; +my $strip_ext = undef; +my $use_cl = undef; # defines whether we use the Content-Length: header if present +my $create_dovecot_keywords = 0; # defines whether we generate a Dovecot-compatible keywords file +my $create_dovecot_uidlist = 0; # defines whether we generate a Dovecot-compatible uidlist UID file +my $create_courier_uidlist = 0; # defines whether we generate a Courier IMAP-compatible courierimapuiddb UID file +my $note_message_size = 0; # Whether we should add the ,S= message size tag +my $note_rfc822_size = 0; # Whether we should add the ,W= RFC822.SIZE tag + +# if option "-c" is given, we use the Content-Length: header if present +# dangerous! may be unreliable, as the whole CL stuff is a bad idea +if (defined($opts{c})) +{ + $use_cl = 1; +} else { + $use_cl = 0; +} + +# The -U and -u options cannot be specified together +if (defined($opts{U}) && defined($opts{u})) +{ + die("Options -U and -u cannot be specified together"); +} + +# if option "-K" is given, we will generate a Dovecot-compatible +# dovecot-keywords file in each Maildir +if (defined($opts{K})) +{ + $create_dovecot_keywords = 1; +} + +# if option "-U" is given, we will generate a Dovecot-compatible +# dovecot-uidlist file in each Maildir +if (defined($opts{U})) +{ + $create_dovecot_uidlist = 1; +} + +# if option "-u" is given, we will generate a Courier IMAP-compatible +# courierimapuiddb file in each Maildir +if (defined($opts{u})) +{ + $create_courier_uidlist = 1; +} + +if (defined($opts{S})) +{ + $note_message_size = 1; +} + +if (defined($opts{W})) +{ + $note_rfc822_size = 1; +} + +# first, if the user has gone the -m option +# we simply convert their mailfile +if (defined($opts{m})) +{ + if (defined($ENV{'MAIL'})) { + $mbfile = $ENV{'MAIL'}; + } elsif ( -f "/var/spool/mail/$name" ) { + $mbfile = "/var/spool/mail/$name" + } elsif ( -f "/var/mail/$name" ) { + $mbfile = "/var/mail/$name" + } else { + die("I searched \$MAIL, /var/spool/mail/$name and /var/mail/$name, ". + "but I couldn't find your mail spool file - "); + } +} +# see if the user has specified a source directory +elsif (defined($opts{s})) +{ + # if opts{s} doesn't start with a "/" then + # it is a subdir of the users $home + # if it does start with a "/" then + # let's take $mbroot as a absolut path + $opts{s} = "$homedir/$opts{s}" if ($opts{s} !~ /^\//); + + # check if the given source is a mbox file + if (-f $opts{s}) + { + $mbfile = $opts{s}; + } + + # otherwise check if it is a directory + elsif (-d $opts{s}) + { + $mbroot = $opts{s}; + # get rid of trailing /'s + $mbroot =~ s/\/$//; + + # check if we have a specified sub directory, + # otherwise the sub directory is '.' + if (defined($opts{f})) + { + $mbdir = $opts{f}; + # get rid of trailing /'s + $mbdir =~ s/\/$//; + } + } + + # otherwise we have an error + else + { + die("Fatal: Source is not an mbox file or a directory!\n"); + } +} + + +# get the dest +defined($opts{d}) && ($dest = $opts{d}) || ($dest = "Maildir"); +# see if we have anything to strip +defined($opts{r}) && ($strip_ext = $opts{r}); +# No '-f' with '-R' +if((defined($opts{R}))&&(defined($opts{f}))) { die "No recursion with \"-f\"";} +# Get list of folders +my @flist; +if(defined($opts{l})) +{ + open (LIST,$opts{l}) or die "Could not open mailbox list $opts{l}: $!"; + @flist=<LIST>; + close LIST; +} + +# if the destination is relative to the home dir, +# check that the home dir exists +die("Fatal: home dir $homedir doesn't exist.\n") if ($dest !~ /^\// && ! -e $homedir); + +# +# form the destination value +# slap the home dir on the front of the dest if the dest does not begin +# with a '/' +$dest = "$homedir/$dest" if ($dest !~ /^\//); +# get rid of trailing /'s +$dest =~ s/\/$//; + + +# Count the number of mailboxes, or +# at least files, we found. +my $mailboxcount = 0; + +# Since we'll be making sub directories of the main +# Maildir, we need to make sure that the main maildir +# exists +&maildirmake($dest); + +# Now we do different things depending on whether we convert one mbox +# file or a directory of mbox files +if (defined($mbfile)) +{ + if (!isamailboxfile($mbfile)) + { + print "Skipping $mbfile: not a mbox file\n"; + } + else + { + print "Converting $mbfile to maildir: $dest\n"; + # this is easy, we just run the convert function + &convert($mbfile, $dest); + } +} +# if '-f' was used ... +elsif (defined($mbdir)) +{ + print "Converting mboxdir/mbdir: $mbroot/$mbdir to maildir: $dest/\n"; + + # Now set our source directory + my $sourcedir = "$mbroot/$mbdir"; + + # check that the directory we are supposed to be finding mbox + # files in, exists and is a directory + -e $sourcedir or die("Fatal: MBDIR directory $sourcedir/ does not exist.\n"); + -d $sourcedir or die("Fatal: MBDIR $sourcedir is not a directory.\n"); + + + &convertit($mbdir,""); +} +# Else, let's work in $mbroot +else +{ + opendir(SDIR, $mbroot) + or die("Fatal: Cannot open source directory $mbroot/ \n"); + + + while (my $sourcefile = readdir(SDIR)) + { + if (-d "$mbroot/$sourcefile") { + # Recurse only if requested (to be changed ?) + if (defined($opts{R})) { + print "convertit($sourcefile,\"\")\n"; + &convertit($sourcefile,""); + } else { + print("$sourcefile is a directory, but '-R' was not used... skipping\n"); + } + } + elsif (!-f "$mbroot/$sourcefile") + { + print "Skipping $mbroot/$sourcefile : not a file nor a dir\n"; + next; + } + elsif (!isamailboxfile("$mbroot/$sourcefile")) + { + print "Skipping $mbroot/$sourcefile : not a mbox file\n"; + next; + } + else + { + &convertit($sourcefile,""); + } + } # end of "while ($sfile = readdir(SDIR))" loop. + closedir(SDIR); + printf("$mailboxcount files processed.\n"); +} +# + +exit 0; + +# My debbugging placeholder I can put somewhere to show how far the script ran. +# die("So far so good.\n\n"); + +# The isamailboxfile function +# ---------------------- +# +# Here we check if the file is a mailbox file, not an address-book or +# something else. +# If file is empty, we say it is a mbox, to create it empty. +# +# Returns 1 if file is said mbox, 0 else. +sub isamailboxfile { + my ($mbxfile) = @_; + return 1 if(-z $mbxfile); + sysopen(MBXFILE, "$mbxfile", O_RDONLY) or die "Could not open $mbxfile ! \n"; + while(<MBXFILE>) { + if (/^From/) { + close(MBXFILE); + return 1; + } + else { + close(MBXFILE); + return 0; + } + } +} + +# The convertit function +# ----------------------- +# +# This function creates all subdirs in maildir, and calls convert() +# for each mbox file. +# Yes, it becomes the 'main loop' :) +sub convertit +{ + # Get subdir as argument + my ($dir,$oldpath) = @_; + + $oldpath =~ s/\/\///; + + # Skip files beginning with '.' since they are + # not normally mbox files nor dirs (includes '.' and '..') + if ($dir =~ /^\./) + { + print "Skipping $dir : name begins with a '.'\n"; + return; + } + my $destinationdir = $dir; + my $temppath = $oldpath; + + # We don't want to have .'s in the $targetfile file + # name because they will become directories in the + # Maildir. Therefore we convert them to _'s + $temppath =~ s/\./\_/g; + $destinationdir =~ s/\./\_/g; + + # Appending $oldpath => path is only missing $dest + $destinationdir = "$temppath.$destinationdir"; + + # Converting '/' to '.' in $destinationdir + $destinationdir =~s/\/+/\./g; + + # source dir + my $srcdir="$mbroot/$oldpath/$dir"; + + print("convertit(): Converting $dir in $mbroot/$oldpath to $dest/$destinationdir\n"); + &maildirmake("$dest/$destinationdir"); + + # Subfolders are Maildir++ folders and should be marked by the + # presence of an empty "maildirfolder" file + sysopen(F, "$dest/$destinationdir/maildirfolder", O_CREAT|O_WRONLY, 0660) && close F; + + print("destination = $destinationdir\n"); + if (-d $srcdir) { + opendir(SUBDIR, "$srcdir") or die "can't open $srcdir !\n"; + my @subdirlist=readdir(SUBDIR); + closedir(SUBDIR); + foreach (@subdirlist) { + next if (/^\.+$/); + print("Sub: $_\n"); + print("convertit($_,\"$oldpath/$dir\")\n"); + &convertit($_,"$oldpath/$dir"); + } + } else { + # Source file verifs .... + # + return if(defined($opts{l}) && !inlist("$oldpath/$dir",@flist)); + + if (!isamailboxfile("$mbroot/$oldpath/$dir")) + { + print "Skipping $dir (is not mbox)\n"; + return; + } + + # target file verifs... + # + # if $strip_extension is defined, + # strip it off the $targetfile + defined($strip_ext) && ($destinationdir =~ s/\.$strip_ext$//); + &convert("$mbroot/$oldpath/$dir","$dest/$destinationdir"); + $mailboxcount++; + } +} +# The maildirmake function +# ------------------------ +# +# It does the same thing that the maildirmake binary that +# comes with courier-imap distribution +# +sub maildirmake +{ + foreach(@_) { + -d $_ or mkdir $_,0770 or die("Fatal: Directory $_ doesn't exist and can't be created.\n"); + + -d "$_/tmp" or mkdir("$_/tmp",0770) or die("Fatal: Unable to make $_/tmp/ subdirectory.\n"); + -d "$_/new" or mkdir("$_/new",0770) or die("Fatal: Unable to make $_/new/ subdirectory.\n"); + -d "$_/cur" or mkdir("$_/cur",0770) or die("Fatal: Unable to make $_/cur/ subdirectory.\n"); + } +} + +# The inlist function +# ------------------------ +# +# It checks that the folder to be converted is in the list of subscribed +# folders in WU-IMAP +# +sub inlist +{ + my ($file,@flist) = @_; + my $valid = 0; + # Get rid of the first / if any + $file =~ s/^\///; + foreach my $folder (@flist) { + chomp $folder; + if ($file eq $folder) { + $valid = 1; + last; + } + } + if (!$valid) { + print "$file is not in list\n"; + } + else { + print "$file is in list\n"; + } + + return $valid; +} + +# + +# The convert function +# --------------------- +# +# This function does the down and dirty work of +# actually converting the mbox to a maildir +# +sub convert +{ + # get the source and destination as arguments + my ($mbox, $maildir) = @_; + + print("Source Mbox is $mbox\n"); + print("Target Maildir is $maildir \n") ; + + # create the directories for the new maildir + # + # if it is the root maildir (ie. converting the inbox) + # these already exist but thats not a big issue + + &maildirmake($maildir); + + # Change to the target mailbox directory. + + chdir "$maildir" ; + + # Converts a Mbox to multiple files + # in a Maildir. + # This is adapted from mbox2maildir. + # + # Open the Mbox mailbox file. + + + if (sysopen(MBOX, "$mbox", O_RDONLY)) + { + #printf("Converting Mbox $mbox . . . \n"); + } + else + { + die("Fatal: unable to open input mailbox file: $mbox ! \n"); + } + + # This loop scans the input mailbox for + # a line starting with "From ". The + # "^" before it is pattern-matching + # lingo for it being at the start of a + # line. + # + # Each email in Mbox mailbox starts + # with such a line, which is why any + # such line in the body of the email + # has to have a ">" put in front of it. + # + # This is not required in a Maildir + # mailbox, and some majik below + # finds any such quoted "> From"s and + # gets rid of the "> " quote. + # + # Each email is put in a file + # in the cur/ subdirectory with a + # name of the form: + # + # nnnnnnnnn.cccc.mbox:2,XXXX + # + # where: + # "nnnnnnnnn" is the Unix time since + # 1970 when this script started + # running, incremented by 1 for + # every email. This is to ensure + # unique names for each message + # file. + # + # ".cccc" is the message count of + # messages from this mbox. + # + # ".mbox" is just to indicate that + # this message was converted from + # an Mbox mailbox. + # + # ":2," is the start of potentially + # multiple IMAP flag characters + # "XXXX", but may be followed by + # nothing. + # + # This is sort-of compliant with + # the Maildir naming conventions + # specified at: + # + # http://www.qmail.org/man/man5/maildir.html + # + # This approach does not involve the + # process ID or the hostname, but it is + # probably good enough. + # + # When the IMAP server looks at this + # mailbox, it will move the files to + # the cur/ directory and change their + # names as it pleases. In the case + # of Courier IMAP, the names will + # become like: + # + # 995096541.25351.mbox:2,S + # + # with 25351 being Courier IMAP's + # process ID. The :2, is the start + # of the flags, and the "S" means + # that this one has been seen by + # the user. (But is this the same + # meaning as the user actually + # having opened the message to see + # its contents, rather than just the + # IMAP server having been asked to + # list the message's Subject etc. + # so the client could list it in the + # visible Inbox?) + # + # This contrasts with a message + # created by Courier IMAP, say with + # a message copy, which is like: + # + # 995096541.25351.zair,S=14285:2,S + # + # where ",S=14285" is the size of the + # message in bytes. + # + # Courier Maildrop's names are similar + # but lack the ":2,XXXX" flags . . . + # except for my modified Maildrop + # which can deliver them with a + # ":2,T" - flagged for deletion. + # + # I have extended the logic of the + # per-message inner loop to stop + # saving a file for a message with: + # + # Subject: DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA + # + # This is the dummy message, always + # at the start of an Mbox format + # mailbox file - and is put there + # by UW IMAPD. Since quite a few + # people will use this for + # converting from a UW system, + # I figure it is worth it. + # + # I will not save any such message + # file for the dummy message. + # + # Plan + # ---- + # + # We want to read the entire Mbox file, whilst + # going through a loop for each message we find. + # + # We want to read all the headers of the message, + # starting with the "From " line. For that "From " + # line we want to get a date. + # + # For all other header lines, we want to store them + # in $headers whilst parsing them to find: + # + # 1 - Any flags in the "Status: " or "X-Status: " or + # "X-Mozilla-Status: " lines. + # + # 2 - A subject line indicating this is the dummy message + # at the start (typically, but not necessarily) of + # the Mbox. + # + # Once we reach the end of the headers, we will crunch any + # flags we found to create a file name. Then, unless this is + # the dummy message we create that file and write all the + # headers to it. + # + # Then we continue reading the Mbox, converting ">From " to + # "From " and writing it to the file, until we reach one of: + # + # 1 - Another "From " line (indicating the start of another + # message). + # + # or + # + # 2 - The end of the Mbox. + # + # In the former case, which we detect at the start of the loop + # we need to close the file and touch it to alter its date-time. + # + # In the later case, we also need to close the file and touch + # it to alter its date-time - but this is beyond the end of the + # loop. + + + # Variables + # --------- + + my $messagecount = 0; + + # For generating unique filenames for + # each message. Initialise it here with + # numeric time in seconds since 1970. + my $unique = time; + + # Name of message file to delete if we found that + # it was created by reading the Mbox dummy message. + + my $deletedummy = ''; + + # To store the complete "From (address) (date-time) + # which delineates the start of each message + # in the Mbox + my $fromline = ''; + + + # Set to 1 when we are reading the header lines, + # including the "From " line. + # + # 0 means we are reading the message body and looking + # for another "From " line. + + my $inheaders = 0; + + # Variable to hold all headers (apart from + # the first line "From ...." which is not + # part of the message itself. + my $headers = ''; + + # Variable to hold the accumulated characters + # we find in header lines of the type: + # + # Status: + # X-Status: + # X-Mozilla-Status: + # X-Evolution: + my $flags = ''; + + # To build the file name for the message in. + my $messagefn = ''; + + + # The date string from the "From " line of each + # message will be written here - and used by + # touch to alter the date-time of each message + # file. Put non-date text here to make it + # spit the dummy if my code fails to find a + # date to write into this. + + my $receivedate = 'Bogus'; + + # The subject of the message + my $subject = ''; + + my $previous_line_was_empty = 1; + + # We record the message start line here, for error + # reporting. + my $startline; + + # If defined, we use this as the number of bytes in the + # message body rather than looking for a /^From / line. + my $contentlength; + + # A From lines can either occur as the first + # line of a file, or after an empty line. + # Most mail systems will quote all From lines + # appearing in the message, but some will only + # do it when necessary. + # Since we initialise the variable to true, + # we don't need to check for beginning of file. + + # The path to the UID list file + my $uidlistfile; + if ($create_dovecot_uidlist) + { + $uidlistfile = "${maildir}/dovecot-uidlist"; + } else { + $uidlistfile = "${maildir}/courierimapuiddb"; + } + # Store the UIDVALIDITY and UIDLAST from the X-IMAP: + # header + my $uidvalidity; + my $uidlast = 0; + + # Store the UID for the current message + my $uidcurr = 0; + + # Array to hold all the UIDs and filenames for outputing + # into a uidlist file + my @uidlist; + my $douidlist = $create_dovecot_uidlist || $create_courier_uidlist; + if ($douidlist && scalar(stat($uidlistfile))) + { + $douidlist = 0; + printf("WARNING: Skipping UIDs for this folder. %s already exists.\n", $uidlistfile); + } + + # The path to the Dovecot keywords list + my $keywordsfile = "$maildir/dovecot-keywords"; + # Hash to hold a list of all valid keywords for the folder. + # We use a hash to make looking up keywords in there fast. + my %validkeywords; + # A list of already encountered keys. The index of each key + # is used when generating message filenames and they get + # written to the dovecot-keywords file. We also have a + # hash that maps from the keyword to the array index to + # facilitate checking if we already have an index for the + # keyword + my @keywords; + my %keywordshash; + + # List of keyword flags used by Dovecot. The dovecot-keyword + # file maintains a 0-based index of keywords in use in the + # folder. The message filenames use the flags a-z to mark + # messages as having keywords (a=0, b=1, etc). Note that + # this means Dovecot only supports 26 different keywords + # per mail folder. This array maps the numeric indexes to + # the letter flags (in case Dovecot begins to use other + # flags in the future). + my @keywordflags = ('a'..'z'); + + # Store the keyword header found for the current message + my $messagekeywords; + + # If there already exists a dovecot-keywords file then + # we can't deal with keywords even if the user wants us to. + # It's not technically impossible, just more than this code + # can be bothered to deal with. + my $dokeywords = $create_dovecot_keywords; + if ($dokeywords && scalar(stat($keywordsfile))) + { + $dokeywords = 0; + printf("WARNING: Skipping keywords for this folder. %s already exists.\n", $keywordsfile); + } + + my $postclose = sub + { + if ($messagefn ne '' && $messagefn ne $deletedummy) + { + if ($note_message_size || $note_rfc822_size) + { + my $params = ""; + my $realsize = -s $messagefn; + + if ($note_message_size) + { + $params .= ",S=$realsize"; + } + + if ($note_rfc822_size && open(MSG, "<$messagefn")) + { + my $lfs = 0; + my $line; + while ($line = <MSG>) + { + $lfs += ($line =~ m/(?<!\r)\n/gs); + } + close(MSG); + $params .= ",W=" . ($realsize + $lfs); + } + my $oldfn = $messagefn; + $messagefn =~ s/:/$params:/; + rename($oldfn, $messagefn); + $uidlist[-1] =~ s/:/$params:/; + } + + my $t = str2time($receivedate); + if (defined($t)) + { + utime $t, $t, $messagefn; + } else { + printf("WARNING: Unable to parse date for msg %d of %s\n", $messagecount, $mbox); + printf("%s\n", $receivedate); + } + } + }; + + while(<MBOX>) + { + # exchange possible Windows EOL (CRLF) with Unix EOL (LF) + $_ =~ s/\r\n$/\n/; + + if ( /^From (MAILER.DAEMON|.*@).*[0-9]{2}\:[0-9]{2}\:[0-9]{2} [0-9]{4}\n$/ + && $previous_line_was_empty + && (!defined $contentlength) + ) + { + # We are reading the "From " line which has an + # email address followed by a receive date. + # Turn on the $inheaders flag until we reach + # the end of the headers. + + $inheaders = 1; + + # In case we don't find an X-UID: header, set + # the UID for the current message to 1 higher + # than the previous message + $uidcurr += 1; + + # This is a new message so we need to undefine + # the message keyword header before looking at + # the new message (which may not have one) + undef($messagekeywords); + + # record the message start line + + $startline = $.; + + # If this is not the first run through the loop + # then this means we have already been working + # on a message. + + if ($messagecount > 0) + { + # If so, then close that message file and then + # use utime to change its date-time. + # + # Note this code should be duplicated to do + # the same thing at the end of the while loop + # since we must close and touch the final message + # file we were writing when we hit the end of the + # Mbox file. + + close (OUT); + &$postclose(); + } + + # Because we opened the Mbox file without any + # variable, I think this means that we have its + # current line in Perl's default variable "$_". + # So all sorts of pattern matching magic works + # directly on it. + + # We are currently reading the first line starting with + # "From " which contains the date we want. + # + # This will be of the form: + # + # From dduck@test.org Wed Nov 24 11:05:35 1999 + # + # at least with UW-IMAP. + # + # However, I did find a nasty exception to this in my + # tests, of the form: + # + # "bounce-MusicNewsletter 5-rw=test.org"@announce2.mp3.com + # + # This makes it trickier to get rid of the email address, + # but I did find a way. I can't rule out that there would + # be some address like this with an "@" in the quoted + # portion too. + # + # Unfortunately, testing with an old Inbox Mbox file, + # I also found an instance where the email address + # had no @ sign at all. It was just an email + # account name, with no host. + # + # I could search for the day of the week. If I skipped + # at least one word of non-whitespace (1 or more contiguous + # non-whitespace characters) then searched for a day of + # the week, then I should be able to avoid almost + # every instance of a day of the week appearing in + # the email address. + # + # Do I need a failsafe arrangement to provide some + # other date to touch if I don't get what seems like + # a date in my resulting string? For now, no. + # + # I will take one approach if there is an @ in the + # "From " line and another (just skip the first word + # after "From ") if there is no @ in the line. + # + # If I knew more about Perl I would probably do it in + # a more elegant way. + + # Copy the current line into $fromline. + + $fromline = $_; + + # Now get rid of the "From ". " =~ s" means substitute. + # Find the word "From " at the start of the line and + # replace it with nothing. The nothing is what is + # between the second and third slash. + + $fromline =~ s/^From // ; + + + # Likewise get rid of the email address. + # This first section is if we determine there is one + # (or more . . . ) "@" characters in the line, which + # would normally be the case. + + if ($fromline =~ m/@/) + { + # The line has at least one "@" in it, so we assume + # this is in the middle of an email address. + # + # If the email address had no spaces, then we could + # get rid of the whole thing by searching for any number + # of non-whitespace characters (\S) contiguously, and + # then I think a space. Subsitute nothing for this. + # + # $fromline =~ s/(\S)+ // ; + # + # But we need something to match any number of non-@ + # characters, then the "@" and then all the non-whitespace + # characters from there (which takes us to the end of + # "test.org") and then the space following that. + # + # A tutorial on regular expressions is: + # + # http://www.perldoc.com/perl5.6.1/pod/perlretut.html + # + # Get rid of all non-@ characters up to the first "@": + + $fromline =~ s/[^@]+//; + + + # Get rid of the "@". + + $fromline =~ s/@//; + } + # If there was an "@" in the line, then we have now + # removed the first one (lets hope there aren't more!) + # and everything which preceded it. + # + # we now remove either something like + # '(foo bar)'. eg. '(no mail address)', + # or everything after the '@' up to the trailing + # timezone + # + # FIXME: all those regexp should be combined to just one single one + + # If the first character is a quote, remove everything up to + # the next quote. + if ($fromline =~ m/^\s*"/) + { + $fromline =~ s/"[^"]*"//; + } else { + $fromline =~ s/(\((\S*| )+\)|\S+) *//; + } + + chomp $fromline; + + # Stash the date-time for later use. We will use it + # to touch the file after we have closed it. + + $receivedate = $fromline; + + # Debugging lines: + # + # print "$receivedate is the receivedate of message $messagecount.\n"; + # $receivedate = "Wed Nov 24 11:05:35 1999"; + # + # To look at the exact date-time of files: + # + # ls -lFa --full-time + # + # End of handling the "From " line. + } + + + # Now process header lines which are not the "From " line. + + if ( ($inheaders eq 1) + && (! /^From /) + ) + { + # Now we are reading the header lines after the "From " line. + # Keep looking for the blank line which indicates the end of the + # headers. + + + # ".=" means append the current line to the $headers + # variable. + # + # For some reason, I was getting two blank lines + # at the end of the headers, rather than one, + # so I decided not to read in the blank line + # which terminates the headers. + # + # Delete the "unless ($_ eq "\n")" to get rid + # of this kludge. + # + # Don't copy status headers, etc. if we've used + # the info in them already for something. + + $headers .= $_ unless ( ($_ eq "\n") || + (/^Status: /) || + (/^X-Status: /) || + (/^X-Mozilla-Status: /i) || + (/^X\-Evolution:\s+/oi) || + (/^X-IMAP(?:base)?: /) || + (/^X-UID: /) || + (/^X-Keywords:\s+/)); + + if (/^X-IMAP(?:base)?: (\d+)\s+(\d+)\s*([^\s].*)?\s*$/) + { + if (defined($uidvalidity)) + { + printf("WARNING: Second X-IMAP: header found. Ignoring it (line %d, msg %d).\n", $., $messagecount); + } else { + $uidvalidity = $1; + $uidlast = $2; + } + + # Valid keywords for the mailbox are stored + # in the X-IMAP: or X-IMAPbase: header. Any + # keywords in messages that are not in this + # list should be ignored + if (defined($3)) + { + foreach my $keyword (split(/\s+/, $3)) + { + $validkeywords{$keyword} = 1; + } + } + } + + if (/^X-UID: (\d+)/) + { + # UIDs must increase; we must have a UID at least 1 + # greater than the previous message + if ($1 < $uidcurr) + { + printf("WARNING: UID from X-UID: header too low. Ignoring it (line %d, msg %d).\n", $., $messagecount); + } else { + $uidcurr = $1; + } + } + + if (/^X-Keywords:\s+(.*)\s*$/) + { + # Grab the keywords for use when we generate the + # message filename below + $messagekeywords = $1; + } + + # Now scan the line for various status flags + # and to fine the Subject line. + + $flags .= $1 if /^Status: ([A-Z]+)/; + $flags .= $1 if /^X-Status: ([A-Z]+)/; + if (/^X-Mozilla-Status: ([0-9a-f]{4})/i) + { + $flags .= 'R' if (hex($1) & 0x0001); + $flags .= 'A' if (hex($1) & 0x0002); + $flags .= 'D' if (hex($1) & 0x0008); + } + if(/^X\-Evolution:\s+\w{8}\-(\w{4})/oi) + { + $b = pack("H4", $1); #pack it as 4 digit hex (0x0000) + $b = unpack("B32", $b); #unpack into bit string + + # "usually" only the right most six bits are used + # however, I have come across a seventh bit in + # about 15 (out of 10,000) messages with this bit + # activated. + # I have not found any documentation in the source. + # If you find out what it does, please let me know. + + # Notes: + # Evolution 1.4 does mark forwarded messages. + # The sixth bit is to denote an attachment + + $flags .= 'A' if($b =~ /[01]{15}1/); #replied + $flags .= 'D' if($b =~ /[01]{14}1[01]{1}/); #deleted + $flags .= 'T' if($b =~ /[01]{13}1[01]{2}/); #draft + $flags .= 'F' if($b =~ /[01]{12}1[01]{3}/); #flagged + $flags .= 'R' if($b =~ /[01]{11}1[01]{4}/); #seen/read + } + $subject = $1 if /^Subject: (.*)$/; + if ($use_cl eq 1) + { + $contentlength = $1 if /^Content-Length: (\d+)$/; + } + + # Now look out for the end of the headers - a blank + # line. When we find it, create the file name and + # analyse the Subject line. + + if ($_ eq "\n") + { + # We are at the end of the headers. Set the + # $inheaders flag back to 0. + + $inheaders = 0; + + # Include the current newline in the content length + + ++$contentlength if defined $contentlength; + + # Create the file name for the current message. + # + # A simple version of this would be: + # + # $messagefn = "cur/$unique.$messagecount.mbox:2,"; + # + # This would create names with $messagecount values of + # 1, 2, etc. But for neatness when looking at a + # directory of such messages, sorted by filename, + # I want to have leading zeroes on message count, so + # that they would be 000001 etc. This makes them + # appear in message order rather than 1 being after + # 19 etc. So this is good for up to 999,999 messages + # in a mailbox. It is a cosmetic matter for a person + # looking into the Maildir directory manually. + # To do this, use sprintf instead with "%06d" for + # 6 characters of zero-padding: + + $messagefn = sprintf ("cur/%d.%06d.mbox:2,", $unique, $messagecount) ; + + # If the message has not been flagged as Opened + # then it should be put in the new/ folder. This + # Works with Exim/UW-IMAP folders but is otherwise + # untested. + $messagefn =~ s/^cur/new/ unless $flags =~ /O/; + + # Append flag characters to the end of the + # filename, according to flag characters + # collected from the message headers + + $messagefn .= 'F' if $flags =~ /F/; # Flagged. + $messagefn .= 'R' if $flags =~ /A/; # Replied to. + $messagefn .= 'S' if $flags =~ /R/; # Seen or Read. + $messagefn .= 'T' if $flags =~ /D/; # Tagged for deletion. + + # If the user has asked us to generate Dovecot- + # compatible keyword listings, let's give it a go + if ($dokeywords && + defined($messagekeywords) && + scalar(keys(%validkeywords))) + { + foreach my $keyword (split(/\s+/, $messagekeywords)) + { + # Only keywords listed in the X-IMAP(base): header + # are valid for this folder + next unless $validkeywords{$keyword}; + + # Check if we've already used this keyword and + # assigned it an index. Try to assign one if not + unless (defined($keywordshash{$keyword})) + { + unless (scalar(@keywords) < scalar(@keywordflags)) + { + printf("WARNING: Too many keywords (%d max). Ignoring keyword '%s' for message %d\n", scalar(@keywordflags), $keyword, $messagecount); + next; + } + + # Add the keyword to the array + push(@keywords, $keyword); + # Update the keyword to index hash + $keywordshash{$keyword} = scalar(@keywords)-1; + } + + $messagefn .= $keywordflags[$keywordshash{$keyword}]; + } + } + + + # Opens filename $messagefn for output (>) with filehandle OUT. + + open(OUT, ">$messagefn") or die("Fatal: unable to create new message $messagefn"); + + # Count the messages. + + $messagecount++; + + # If the current UID is higher than UIDLAST, we + # need to update UIDLAST + $uidlast = $uidcurr if ($uidcurr > $uidlast); + + # Only for the first message, + # check to see if it is a dummy. + # Delete the message file we + # just created if it was for the + # dummy message at the start + # of the Mbox. + # + # Add search terms as required. + # The last 2 lines are for rent. + # + # "m" means match the regular expression, + # but we can do without it. + # + # Do I need to escape the ' in "DON'T"? + # I didn't in the original version. + + if ( (($messagecount == 1) && defined($subject)) + && ($subject =~ m/^DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA/) + ) + { + # Stash the file name of the dummy message so we + # can delete it later. + + $deletedummy = "$messagefn"; + + # If there was a dummy message, we still want + # the next message to be able to use UID 1 + $uidcurr = $uidlast = 0; + } else { + # If this is not a dummy message then store + # the UID and message filename for outputing + # into the uidlist file at the end (dropping + # "cur/" from the beginning) + push(@uidlist, "$uidcurr ". substr($messagefn, 4)); + } + + # Print the collected headers to the message file. + + print OUT "$headers"; + + + # Clear $headers and $flags ready for the next message. + + $headers = ''; + $flags = ''; + + # End of processing the headers once we found the + # blank line which terminated them + } + + # End of dealing with the headers. + } + + + if ( $inheaders eq 0) + { + + # We are now processing the message body. + # + # Now we have passed the headers to the + # output file, we scan until the while + # loop finds another "From " line. + + # Decrement our content length if we're + # using it to find the end of the message + # body + + if (defined $contentlength) { + + # Decrement our $contentlength variable + + $contentlength -= length($_); + + # The proper end for a message with Content-Length + # specified is the $contentlength variable should + # be exactly -1 and we should be on a bare + # newline. Note that the bare newline is not + # printed to the end of the current message as + # it's actually a message separator in the mbox + # format rather than part of the message. The + # next line _should_ be a From_ line, but just in + # case the Content-Length header is incorrect + # (e.g. a corrupt mailbox), we just continue + # putting lines into the current message until we + # see the next From_ line. + + if ($contentlength < 0) { + if ($contentlength == -1 && $_ eq "\n") { + $contentlength = undef; + next; + } + $contentlength = undef; + } + } + + # + # We want to copy every part of the message + # body to the output file, except for the + # quoted ">From " lines, which was the + # way the IMAP server encoded body lines + # starting with "From ". + # + # Pattern matching Perl majik to + # get rid of an Mbox quoted From. + # + # This works on the default variable "$_" which + # contains the text from the Mbox mailbox - I + # guess this is the case because of our + # (open(MBOX ....) line above, which did not + # assign this to anything else, so it would go + # to the default variable. This enables + # inscrutably terse Perlisms to follow. + # + # "s" means "Subsitute" and it looks for any + # occurrence of ">From" starting at the start + # of the line. When it finds this, it replaces + # it with "From". + # + # So this finds all instances in the Mbox message + # where the original line started with the word + # "From" but was converted to ">From" in order to + # not be mistaken for the "From ..." line which + # is used to demark each message in the Mbox. + # This was was a destructive conversion because + # any message which originally had ">From" at the + # start of the line, before being put into the + # Mbox, will now have that line without the ">". + + s/^>From /From /; + + # Glorious tersness here. Thanks Simon for + # explaining this. + # + # "print OUT" means print the default variable to + # the file of file handle OUT. This is where + # the bulk of the message text is written to + # the output file. + + print OUT or die("Fatal: unable to write to new message to $messagefn"); + + + # End of the if statement dealing with message body. + } + + $previous_line_was_empty = ( /^ *\n$/ ); + + # End of while (MBOX) loop. + } + # Close the input file. + + close(MBOX); + + # Close the output file, and duplicate the code + # from the start of the while loop which touches + # the date-time of the most recent message file. + + close(OUT); + &$postclose(); + + # After all the messages have been + # converted, check to see if the + # first one was a dummy. + # if so, delete it and make + # the message count one less. + + if ($deletedummy ne "") + { + printf("Dummy mail system first message detected and not saved.\n"); + unlink $deletedummy; + + $messagecount--; + + } + + # If the user asked for a Dovecot keywords file and + # we found any keywords in this folder then write + # the file out. + if ($dokeywords && scalar(@keywords)) + { + + # $dokeywords should be false if the file already exists + # but we open it in O_EXCL mode to be sure. + # NOTE: NO LOCKING IS PERFORMED so beware running this + # on an active Maildir folder + if (sysopen(KEYWORDS, $keywordsfile, O_WRONLY|O_CREAT|O_EXCL, 0660)) + { + for (my $i = 0;$i < scalar(@keywords);$i++) + { + printf(KEYWORDS "%d %s\n", $i, $keywords[$i]); + } + close(KEYWORDS); + printf("Created keywords list: %s\n", $keywordsfile); + } + } + + # If the user asked for a uidlist file + # and we found an X-IMAP: or X-IMAPbase: header, then + # let's generate the file. + if ($douidlist && defined($uidvalidity)) + { + if ($create_courier_uidlist) + { + # Courier IMAP only wants the basename of the + # maildir file (up to the colon) so let's strip + # the endings off. + grep(s/:.*$//,@uidlist); + } + + # If there's already a uid list file, we don't + # know how to deal with the old UIDVALIDITY or + # whether the UIDs from the incoming messages + # are valid or unique. So we use O_EXCL and just + # bail out if the file exists and let the mail + # system update the index with new UIDs for + # these messages + # NOTE: NO LOCKING IS DONE SO DON'T RUN THIS ON + # AN ACTIVE MAILDIR + if (sysopen(UIDLIST, $uidlistfile, O_WRONLY|O_CREAT|O_EXCL, 0660)) + { + # The first 1 is the file format version number + # The second number is the UIDVALIDITY value + # The last number is the next number to be given + # to a new message (one higher than UIDLAST) + printf(UIDLIST "1 %d %d\n", $uidvalidity, $uidlast+1); + print(UIDLIST join("\n", @uidlist)); + print(UIDLIST "\n") if (scalar(@uidlist) > 0); + close(UIDLIST); + printf("Created UID list: %s\n", $uidlistfile); + } else { + printf("WARNING: Unable to create %s. Does it already exist?\n", $uidlistfile); + } + } + + printf("$messagecount messages.\n\n"); +} diff --git a/stamail.c b/stamail.c @@ -127,9 +127,16 @@ void parse_mail_sexp(const char *sexp, size_t len, n->child->type == SEXP_LIST) { struct sexp *plist = n->child->child; - if (plist_get(plist, ":id")) { + struct sexp *id = plist_get(plist, ":id"); + struct sexp *headers = plist_get(plist, ":headers"); + + if (id && headers && headers->type == SEXP_LIST) { parse_message_plist(plist, &m); - cb(&m, ud); + + /* sanity: must have a non-empty message-id */ + if (m.id && m.id_len > 1) { + cb(&m, ud); + } } } @@ -149,12 +156,27 @@ static void fprinthtml(FILE *fp, const char *s, int len) { return; for (int i = 0; i < len; i++) { + /* Handle escaped newlines: \n */ + if (s[i] == '\\' && i + 1 < len && s[i + 1] == 'n') { + fputs("<br/>\n", fp); + i++; /* skip 'n' */ + continue; + } + + /* Handle escaped tabs */ + if (s[i] == '\\' && i + 1 < len && s[i + 1] == 't') { + fputs("&nbsp;&nbsp;&nbsp;&nbsp;", fp); + i++; + continue; + } switch (s[i]) { case '<': fputs("&lt;", fp); break; case '>': fputs("&gt;", fp); break; case '&': fputs("&amp;", fp); break; case '"': fputs("&quot;", fp); break; case '\'': fputs("&#39;", fp); break; + case '\n': fputs("<br/>\n", fp); break; + case '\r': /* ignore */ break; default: fputc(s[i], fp); break; } } diff --git a/stamail.h b/stamail.h @@ -51,6 +51,6 @@ struct thread_node { /* Called for every message found */ typedef void (*message_cb)(struct message *m, void *userdata); - +void walk(struct sexp *n); #endif