use emailjs MailParser instead of Imap.parseHeader, simplify DB structure
parent
8e7aa3d83d
commit
3d218e0aa5
91
Syncer.js
91
Syncer.js
|
@ -2,6 +2,7 @@ const gen = require('gen-thread');
|
||||||
const Imap = require('imap');
|
const Imap = require('imap');
|
||||||
const ImapManager = require('./ImapManager.js');
|
const ImapManager = require('./ImapManager.js');
|
||||||
const EventEmitter = require('events').EventEmitter;
|
const EventEmitter = require('events').EventEmitter;
|
||||||
|
const MailParser = require('mailparser').MailParser;
|
||||||
|
|
||||||
module.exports = Syncer;
|
module.exports = Syncer;
|
||||||
|
|
||||||
|
@ -196,8 +197,6 @@ Syncer.prototype.syncBox = function*(srv, accountId, boxName, boxKind, doFull)
|
||||||
account_id: accountId,
|
account_id: accountId,
|
||||||
highestmodseq: 0,
|
highestmodseq: 0,
|
||||||
kind: boxKind||''
|
kind: boxKind||''
|
||||||
//unread_count: boxStatus.messages.new,
|
|
||||||
//total_count: boxStatus.messages.total,
|
|
||||||
}).returning('id').row(gen.ef());
|
}).returning('id').row(gen.ef());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,7 +204,7 @@ Syncer.prototype.syncBox = function*(srv, accountId, boxName, boxKind, doFull)
|
||||||
var missing = [];
|
var missing = [];
|
||||||
var [ maxUid ] = yield this.pg.select('MAX(uid)').from('messages')
|
var [ maxUid ] = yield this.pg.select('MAX(uid)').from('messages')
|
||||||
.where({ folder_id: boxRow.id }).val(gen.ef());
|
.where({ folder_id: boxRow.id }).val(gen.ef());
|
||||||
if (boxStatus.highestmodseq)
|
if (boxRow.highestmodseq)
|
||||||
{
|
{
|
||||||
this.events.emit('sync', { state: 'start', quick: true, email: this.accounts[accountId].email, folder: boxRow.name });
|
this.events.emit('sync', { state: 'start', quick: true, email: this.accounts[accountId].email, folder: boxRow.name });
|
||||||
process.stderr.write(this.accounts[accountId].email+'/'+boxRow.name+': quick resync\n');
|
process.stderr.write(this.accounts[accountId].email+'/'+boxRow.name+': quick resync\n');
|
||||||
|
@ -227,8 +226,8 @@ Syncer.prototype.syncBox = function*(srv, accountId, boxName, boxKind, doFull)
|
||||||
}, (messages, state) => this.saveMessages(messages, boxRow.id, state));
|
}, (messages, state) => this.saveMessages(messages, boxRow.id, state));
|
||||||
|
|
||||||
yield this.pg.update('folders', {
|
yield this.pg.update('folders', {
|
||||||
uidvalidity: boxRow.uidvalidity,
|
uidvalidity: boxStatus.uidvalidity,
|
||||||
highestmodseq: boxRow.highestmodseq||0
|
highestmodseq: boxStatus.highestmodseq||0
|
||||||
}).where({ id: boxRow.id }).run(gen.ef());
|
}).where({ id: boxRow.id }).run(gen.ef());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -375,6 +374,15 @@ Syncer.prototype.saveMessages = function*(messages, boxId)
|
||||||
yield* this.addMessage(boxId, messages[i][0], messages[i][1]);
|
yield* this.addMessage(boxId, messages[i][0], messages[i][1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Syncer.prototype.parseMsg = function*(msg)
|
||||||
|
{
|
||||||
|
var parser = new MailParser({ streamAttachments: false, defaultCharset: 'utf-8' });
|
||||||
|
parser.once('end', gen.cb());
|
||||||
|
parser.write(msg);
|
||||||
|
var [ obj ] = yield parser.end();
|
||||||
|
return obj;
|
||||||
|
}
|
||||||
|
|
||||||
Syncer.prototype.addMessage = function*(boxId, msgrow, attrs)
|
Syncer.prototype.addMessage = function*(boxId, msgrow, attrs)
|
||||||
{
|
{
|
||||||
var self = this;
|
var self = this;
|
||||||
|
@ -383,49 +391,42 @@ Syncer.prototype.addMessage = function*(boxId, msgrow, attrs)
|
||||||
{
|
{
|
||||||
[ pgtx, end_transaction ] = yield this.pg.transaction(gen.cb(), function(e) { if (e) throw e; });
|
[ pgtx, end_transaction ] = yield this.pg.transaction(gen.cb(), function(e) { if (e) throw e; });
|
||||||
|
|
||||||
var header = Imap.parseHeader(msgrow.headers);
|
let header = yield* this.parseMsg(msgrow.headers);
|
||||||
for (var i in header)
|
header.references = header.references || [];
|
||||||
for (var k = 0; k < header[i].length; k++)
|
|
||||||
header[i][k] = header[i][k].replace(/\x00/g, '');
|
|
||||||
header.from = header.from && splitEmails(header.from[0])[0];
|
|
||||||
header.replyto = header['reply-to'] && splitEmails(header['reply-to'][0])[0];
|
|
||||||
var re = /(<[^>]*>)/;
|
|
||||||
header.references = (header.references && header.references[0] || '').split(re).filter(a => a.match(re));
|
|
||||||
if (header.references.length)
|
if (header.references.length)
|
||||||
{
|
{
|
||||||
if (header.references.length > 10)
|
if (!header.inReplyTo || !header.inReplyTo[0])
|
||||||
header.references = [ header.references[0] ].concat(header.references.slice(header.references.length-9));
|
header.inReplyTo = [ header.references[header.references.length-1] ];
|
||||||
if (!header['in-reply-to'] || !header['in-reply-to'][0])
|
else if (header.references[header.references.length-1] != header.inReplyTo[0])
|
||||||
header['in-reply-to'] = [ header.references[header.references.length-1] ];
|
header.references.push(header.inReplyTo[0]);
|
||||||
else if (header.references[header.references.length-1] != header['in-reply-to'][0])
|
|
||||||
header.references.push(header['in-reply-to'][0]);
|
|
||||||
}
|
|
||||||
if (header.date)
|
|
||||||
{
|
|
||||||
var t = Date.parse(header.date[0]);
|
|
||||||
if (!isNaN(t))
|
|
||||||
header.date = new Date(t);
|
|
||||||
else
|
|
||||||
header.date = null;
|
|
||||||
}
|
}
|
||||||
if (!header.date)
|
if (!header.date)
|
||||||
header.date = new Date(attrs.date);
|
header.date = new Date(attrs.date);
|
||||||
|
|
||||||
|
if (!header.from)
|
||||||
|
{
|
||||||
|
console.log(msgrow.headers);
|
||||||
|
console.log(header);
|
||||||
|
}
|
||||||
|
delete msgrow.headers;
|
||||||
msgrow.folder_id = boxId;
|
msgrow.folder_id = boxId;
|
||||||
msgrow.from_email = header.from && header.from.email || '';
|
msgrow.subject = header.subject || '';
|
||||||
msgrow.from_name = header.from && header.from.name || '';
|
msgrow.props = JSON.stringify({
|
||||||
msgrow.replyto_email = header.replyto && header.replyto.email || '';
|
from: ((header.from||[]).map((a) => [ a.name, a.address ]))[0],
|
||||||
msgrow.replyto_name = header.replyto && header.replyto.name || '';
|
to: (header.to||[]).map((a) => [ a.name, a.address ]),
|
||||||
msgrow.to_list = header.to && header.to[0] || '';
|
cc: (header.cc||[]).map((a) => [ a.name, a.address ]),
|
||||||
msgrow.cc_list = header.cc && header.cc[0] || '';
|
bcc: (header.bcc||[]).map((a) => [ a.name, a.address ]),
|
||||||
msgrow.bcc_list = header.bcc && header.bcc[0] || '';
|
replyto: (header.replyTo||[]).map((a) => [ a.name, a.address ]),
|
||||||
msgrow.subject = header.subject && header.subject[0] || '';
|
});
|
||||||
msgrow.messageid = header['message-id'] && header['message-id'][0] || '';
|
msgrow.messageid = header.messageId || '';
|
||||||
msgrow.inreplyto = header['in-reply-to'] && header['in-reply-to'][0] || '';
|
msgrow.inreplyto = header.inReplyTo && header.inReplyTo[0] || '';
|
||||||
msgrow.inreplyto = msgrow.inreplyto.replace(/^[\s\S]*(<[^>]*>)[\s\S]*$/, '$1');
|
|
||||||
msgrow.time = header.date;
|
msgrow.time = header.date;
|
||||||
|
msgrow.size = attrs.size;
|
||||||
msgrow.flags = toPgArray(msgrow.flags);
|
msgrow.flags = toPgArray(msgrow.flags);
|
||||||
msgrow.refs = toPgArray(header.references);
|
msgrow.refs = toPgArray(header.references);
|
||||||
|
for (let i in msgrow)
|
||||||
|
if (typeof msgrow[i] == 'string')
|
||||||
|
msgrow[i] = msgrow[i].replace(/\x00/g, '');
|
||||||
|
|
||||||
var thisIsFirst = false;
|
var thisIsFirst = false;
|
||||||
if (header.references.length)
|
if (header.references.length)
|
||||||
|
@ -441,7 +442,7 @@ Syncer.prototype.addMessage = function*(boxId, msgrow, attrs)
|
||||||
}
|
}
|
||||||
msgrow.thread_id = threadId;
|
msgrow.thread_id = threadId;
|
||||||
}
|
}
|
||||||
console.log(msgrow.time+' '+msgrow.from_email+' '+msgrow.subject);
|
console.log(msgrow.time+' '+(header.from && header.from[0] && header.from[0].address || '?')+' '+msgrow.subject);
|
||||||
[ msgrow.id ] = yield pgtx.insert('messages', msgrow).returning('id').val(gen.ef());
|
[ msgrow.id ] = yield pgtx.insert('messages', msgrow).returning('id').val(gen.ef());
|
||||||
if (!msgrow.thread_id)
|
if (!msgrow.thread_id)
|
||||||
{
|
{
|
||||||
|
@ -469,18 +470,6 @@ Syncer.prototype.addMessage = function*(boxId, msgrow, attrs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function splitEmails(s)
|
|
||||||
{
|
|
||||||
var re = /^[\s,]*(?:(?:["'](.*?)["']|([^<]+))\s*<([^>]+)>|<?([^<>]+)>?)/; // '
|
|
||||||
var m, r = [];
|
|
||||||
while (m = re.exec(s))
|
|
||||||
{
|
|
||||||
s = s.substr(m[0].length);
|
|
||||||
r.push({ name: (m[1]||m[2]||'').trim(), email: (m[3]||m[4]||'').trim() });
|
|
||||||
}
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
function toPgArray(a)
|
function toPgArray(a)
|
||||||
{
|
{
|
||||||
a = JSON.stringify(a);
|
a = JSON.stringify(a);
|
||||||
|
|
46
SyncerWeb.js
46
SyncerWeb.js
|
@ -175,6 +175,31 @@ function rewriteCss(ast)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function sanitizeHtml(html)
|
||||||
|
{
|
||||||
|
let styles = '';
|
||||||
|
html = (html||'').replace(/<style[^<>]*>([\s\S]*?)<\/style\s*>/ig, function(m, m1)
|
||||||
|
{
|
||||||
|
styles += m1+'\n';
|
||||||
|
return '';
|
||||||
|
});
|
||||||
|
html = html.replace(/^[\s\S]*?<body[^<>]*>([\s\S]*)<\/body>[\s\S]*$/i, '$1');
|
||||||
|
html = html.replace(/^[\s\S]*?<html[^<>]*>([\s\S]*)<\/html>[\s\S]*$/i, '$1');
|
||||||
|
if (styles)
|
||||||
|
{
|
||||||
|
html = '<style>\n'+styles+'</style>\n'+html;
|
||||||
|
styles = '';
|
||||||
|
}
|
||||||
|
html = htmlawed.sanitize(html||'', { safe: 1, elements: '* +style', keep_bad: 0, comment: 1 });
|
||||||
|
html = html.replace(/<style[^>]*>([\s\S]*)<\/style\s*>/ig, function(m, m1)
|
||||||
|
{
|
||||||
|
var ast = css.parse(m1, { silent: true });
|
||||||
|
rewriteCss(ast);
|
||||||
|
return '<style>'+css.stringify(ast)+'</style>';
|
||||||
|
});
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
function* getBody(pg, messages, boxId)
|
function* getBody(pg, messages, boxId)
|
||||||
{
|
{
|
||||||
var p = new MailParser({ streamAttachments: false, defaultCharset: 'windows-1251' });
|
var p = new MailParser({ streamAttachments: false, defaultCharset: 'windows-1251' });
|
||||||
|
@ -184,26 +209,7 @@ function* getBody(pg, messages, boxId)
|
||||||
p.on('end', gen.cb());
|
p.on('end', gen.cb());
|
||||||
p.write(msg[0].headers);
|
p.write(msg[0].headers);
|
||||||
let [ obj ] = yield p.end();
|
let [ obj ] = yield p.end();
|
||||||
let styles = '';
|
obj.html = sanitizeHtml(obj.html);
|
||||||
obj.html = (obj.html||'').replace(/<style[^<>]*>([\s\S]*?)<\/style\s*>/ig, function(m, m1)
|
|
||||||
{
|
|
||||||
styles += m1+'\n';
|
|
||||||
return '';
|
|
||||||
});
|
|
||||||
obj.html = obj.html.replace(/^[\s\S]*?<body[^<>]*>([\s\S]*)<\/body>[\s\S]*$/i, '$1');
|
|
||||||
obj.html = obj.html.replace(/^[\s\S]*?<html[^<>]*>([\s\S]*)<\/html>[\s\S]*$/i, '$1');
|
|
||||||
if (styles)
|
|
||||||
{
|
|
||||||
obj.html = '<style>\n'+styles+'</style>\n'+obj.html;
|
|
||||||
styles = '';
|
|
||||||
}
|
|
||||||
obj.html = htmlawed.sanitize(obj.html||'', { safe: 1, elements: '* +style', keep_bad: 0, comment: 1 });
|
|
||||||
obj.html = obj.html.replace(/<style[^>]*>([\s\S]*)<\/style\s*>/ig, function(m, m1)
|
|
||||||
{
|
|
||||||
var ast = css.parse(m1, { silent: true });
|
|
||||||
rewriteCss(ast);
|
|
||||||
return '<style>'+css.stringify(ast)+'</style>';
|
|
||||||
});
|
|
||||||
let upd = { body_text: obj.text||'', body_html: obj.html };
|
let upd = { body_text: obj.text||'', body_html: obj.html };
|
||||||
upd.body_html_text = obj.html.replace(/<style[^>]*>.*<\/style\s*>|<\/?[^>]*>/g, '');
|
upd.body_html_text = obj.html.replace(/<style[^>]*>.*<\/style\s*>|<\/?[^>]*>/g, '');
|
||||||
yield pg.update('messages m', upd).where({ folder_id: boxId, uid: msg[0].uid }).run(gen.ef());
|
yield pg.update('messages m', upd).where({ folder_id: boxId, uid: msg[0].uid }).run(gen.ef());
|
||||||
|
|
60
db.sql
60
db.sql
|
@ -18,7 +18,6 @@ create table folders (
|
||||||
uidvalidity int not null,
|
uidvalidity int not null,
|
||||||
account_id int not null,
|
account_id int not null,
|
||||||
name varchar(255) not null,
|
name varchar(255) not null,
|
||||||
unread_count int not null,
|
|
||||||
highestmodseq int not null default 0,
|
highestmodseq int not null default 0,
|
||||||
kind varchar(255) not null,
|
kind varchar(255) not null,
|
||||||
foreign key (account_id) references accounts (id) on delete cascade on update cascade
|
foreign key (account_id) references accounts (id) on delete cascade on update cascade
|
||||||
|
@ -34,20 +33,12 @@ create table messages (
|
||||||
inreplyto varchar(1000) not null,
|
inreplyto varchar(1000) not null,
|
||||||
refs varchar(1000)[] not null,
|
refs varchar(1000)[] not null,
|
||||||
subject text not null,
|
subject text not null,
|
||||||
from_email varchar(255) not null,
|
props jsonb not null,
|
||||||
from_name varchar(255) not null,
|
body_html text not null default '',
|
||||||
replyto_email varchar(255) not null,
|
body_text text not null default '',
|
||||||
replyto_name varchar(255) not null,
|
body_html_text text not null default '',
|
||||||
to_list text not null,
|
|
||||||
cc_list text not null,
|
|
||||||
bcc_list text not null,
|
|
||||||
headers text not null,
|
|
||||||
body_html text not null,
|
|
||||||
body_text text not null,
|
|
||||||
body_html_text text not null,
|
|
||||||
text_index tsvector not null,
|
|
||||||
time timestamptz not null,
|
time timestamptz not null,
|
||||||
size unsigned not null,
|
size int not null,
|
||||||
flags varchar(255)[] not null,
|
flags varchar(255)[] not null,
|
||||||
foreign key (folder_id) references folders (id) on delete cascade on update cascade
|
foreign key (folder_id) references folders (id) on delete cascade on update cascade
|
||||||
);
|
);
|
||||||
|
@ -56,29 +47,23 @@ create index messages_flags on messages using gin (folder_id, flags);
|
||||||
create index messages_messageid on messages (messageid);
|
create index messages_messageid on messages (messageid);
|
||||||
create index messages_refs on messages using gin (refs);
|
create index messages_refs on messages using gin (refs);
|
||||||
create index messages_time on messages (folder_id, time);
|
create index messages_time on messages (folder_id, time);
|
||||||
create index messages_text on messages using gin (text_index);
|
create or replace function messages_fulltext(msg messages) returns tsvector
|
||||||
create or replace function fn_messages_text_index() returns trigger
|
language plpgsql immutable as $$
|
||||||
security definer language plpgsql as $$
|
|
||||||
begin
|
begin
|
||||||
NEW.text_index = (
|
return setweight(to_tsvector('russian', regexp_replace(
|
||||||
setweight(to_tsvector('russian', regexp_replace(NEW.from_name || ' ' || NEW.from_email || ' ' ||
|
coalesce(msg.props->>'from', '') || ' ' ||
|
||||||
NEW.replyto_name || ' ' || NEW.replyto_email || ' ' ||
|
coalesce(msg.props->>'replyto', '') || ' ' ||
|
||||||
NEW.to_list || ' ' || NEW.cc_list || ' ' || NEW.bcc_list || ' ' || NEW.subject, '\W+', ' ', 'g')), 'A') ||
|
coalesce(msg.props->>'to', '') || ' ' ||
|
||||||
setweight(to_tsvector('russian', NEW.body_html_text || ' ' || NEW.body_text), 'B')
|
coalesce(msg.props->>'cc', '') || ' ' ||
|
||||||
);
|
coalesce(msg.props->>'bcc', '') || ' ' ||
|
||||||
return NEW;
|
coalesce(msg.props->>'attachments', '') || ' ' ||
|
||||||
|
msg.subject,
|
||||||
|
'\W+', ' ', 'g'
|
||||||
|
)), 'A')
|
||||||
|
|| setweight(to_tsvector('russian', msg.body_html_text || ' ' || msg.body_text), 'B');
|
||||||
end
|
end
|
||||||
$$;
|
$$;
|
||||||
create trigger messages_text_index before insert or update on messages
|
create index messages_text on messages using gin (messages_fulltext(messages));
|
||||||
for each row execute procedure fn_messages_text_index();
|
|
||||||
|
|
||||||
create table attachments (
|
|
||||||
id serial not null primary key,
|
|
||||||
msg_id int not null,
|
|
||||||
ctype varchar(255) not null,
|
|
||||||
size unsigned not null,
|
|
||||||
foreign key (msg_id) references messages (id) on delete cascade on update cascade
|
|
||||||
);
|
|
||||||
|
|
||||||
create table threads (
|
create table threads (
|
||||||
id serial not null primary key,
|
id serial not null primary key,
|
||||||
|
@ -90,6 +75,7 @@ create index threads_first_msg on threads (first_msg);
|
||||||
|
|
||||||
alter table messages add foreign key (thread_id) references threads (id) on delete restrict on update cascade;
|
alter table messages add foreign key (thread_id) references threads (id) on delete restrict on update cascade;
|
||||||
|
|
||||||
--create table tt as with recursive t (id, messageid, upperid, uppermsg) as (select (array_agg(m1.id))[0], m1.messageid, (array_agg(m1.id))[1], m1.messageid from messages m1 left join messages m2 on m1.messageid!='' and m1.inreplyto!='' and m2.messageid=m1.inreplyto where m2.id is null group by m1.messageid union select m1.id, m1.messageid, t.upperid, t.uppermsg from messages m1 inner join t on m1.inreplyto!='' and m1.inreplyto=t.messageid where m1.messageid!='') select * from t;
|
alter table accounts owner to operetta;
|
||||||
|
alter table folders owner to operetta;
|
||||||
--alter table messages alter flags type varchar(255)[] using (case when flags&1=1 then array['recent'] else array[]::varchar(255)[] end) || (case when flags&2=2 then array['flagged'] else array[]::varchar(255)[] end) || (case when flags&4=4 then array['answered'] else array[]::varchar(255)[] end) || (case when flags&8=8 then array['unread'] else array[]::varchar(255)[] end);
|
alter table messages owner to operetta;
|
||||||
|
alter table threads owner to operetta;
|
||||||
|
|
|
@ -42,7 +42,7 @@ var syncerweb = new SyncerWeb(syncer, pg, cfg);
|
||||||
gen.run(function*()
|
gen.run(function*()
|
||||||
{
|
{
|
||||||
yield* syncer.init(cfg);
|
yield* syncer.init(cfg);
|
||||||
//yield* syncer.syncAll();
|
yield* syncer.syncAll();
|
||||||
});
|
});
|
||||||
|
|
||||||
syncerweb.listen(8057);
|
syncerweb.listen(8057);
|
||||||
|
|
Loading…
Reference in New Issue