use emailjs MailParser instead of Imap.parseHeader, simplify DB structure
parent
8e7aa3d83d
commit
3d218e0aa5
91
Syncer.js
91
Syncer.js
|
@ -2,6 +2,7 @@ const gen = require('gen-thread');
|
|||
const Imap = require('imap');
|
||||
const ImapManager = require('./ImapManager.js');
|
||||
const EventEmitter = require('events').EventEmitter;
|
||||
const MailParser = require('mailparser').MailParser;
|
||||
|
||||
module.exports = Syncer;
|
||||
|
||||
|
@ -196,8 +197,6 @@ Syncer.prototype.syncBox = function*(srv, accountId, boxName, boxKind, doFull)
|
|||
account_id: accountId,
|
||||
highestmodseq: 0,
|
||||
kind: boxKind||''
|
||||
//unread_count: boxStatus.messages.new,
|
||||
//total_count: boxStatus.messages.total,
|
||||
}).returning('id').row(gen.ef());
|
||||
}
|
||||
|
||||
|
@ -205,7 +204,7 @@ Syncer.prototype.syncBox = function*(srv, accountId, boxName, boxKind, doFull)
|
|||
var missing = [];
|
||||
var [ maxUid ] = yield this.pg.select('MAX(uid)').from('messages')
|
||||
.where({ folder_id: boxRow.id }).val(gen.ef());
|
||||
if (boxStatus.highestmodseq)
|
||||
if (boxRow.highestmodseq)
|
||||
{
|
||||
this.events.emit('sync', { state: 'start', quick: true, email: this.accounts[accountId].email, folder: boxRow.name });
|
||||
process.stderr.write(this.accounts[accountId].email+'/'+boxRow.name+': quick resync\n');
|
||||
|
@ -227,8 +226,8 @@ Syncer.prototype.syncBox = function*(srv, accountId, boxName, boxKind, doFull)
|
|||
}, (messages, state) => this.saveMessages(messages, boxRow.id, state));
|
||||
|
||||
yield this.pg.update('folders', {
|
||||
uidvalidity: boxRow.uidvalidity,
|
||||
highestmodseq: boxRow.highestmodseq||0
|
||||
uidvalidity: boxStatus.uidvalidity,
|
||||
highestmodseq: boxStatus.highestmodseq||0
|
||||
}).where({ id: boxRow.id }).run(gen.ef());
|
||||
}
|
||||
|
||||
|
@ -375,6 +374,15 @@ Syncer.prototype.saveMessages = function*(messages, boxId)
|
|||
yield* this.addMessage(boxId, messages[i][0], messages[i][1]);
|
||||
}
|
||||
|
||||
Syncer.prototype.parseMsg = function*(msg)
|
||||
{
|
||||
var parser = new MailParser({ streamAttachments: false, defaultCharset: 'utf-8' });
|
||||
parser.once('end', gen.cb());
|
||||
parser.write(msg);
|
||||
var [ obj ] = yield parser.end();
|
||||
return obj;
|
||||
}
|
||||
|
||||
Syncer.prototype.addMessage = function*(boxId, msgrow, attrs)
|
||||
{
|
||||
var self = this;
|
||||
|
@ -383,49 +391,42 @@ Syncer.prototype.addMessage = function*(boxId, msgrow, attrs)
|
|||
{
|
||||
[ pgtx, end_transaction ] = yield this.pg.transaction(gen.cb(), function(e) { if (e) throw e; });
|
||||
|
||||
var header = Imap.parseHeader(msgrow.headers);
|
||||
for (var i in header)
|
||||
for (var k = 0; k < header[i].length; k++)
|
||||
header[i][k] = header[i][k].replace(/\x00/g, '');
|
||||
header.from = header.from && splitEmails(header.from[0])[0];
|
||||
header.replyto = header['reply-to'] && splitEmails(header['reply-to'][0])[0];
|
||||
var re = /(<[^>]*>)/;
|
||||
header.references = (header.references && header.references[0] || '').split(re).filter(a => a.match(re));
|
||||
let header = yield* this.parseMsg(msgrow.headers);
|
||||
header.references = header.references || [];
|
||||
if (header.references.length)
|
||||
{
|
||||
if (header.references.length > 10)
|
||||
header.references = [ header.references[0] ].concat(header.references.slice(header.references.length-9));
|
||||
if (!header['in-reply-to'] || !header['in-reply-to'][0])
|
||||
header['in-reply-to'] = [ header.references[header.references.length-1] ];
|
||||
else if (header.references[header.references.length-1] != header['in-reply-to'][0])
|
||||
header.references.push(header['in-reply-to'][0]);
|
||||
}
|
||||
if (header.date)
|
||||
{
|
||||
var t = Date.parse(header.date[0]);
|
||||
if (!isNaN(t))
|
||||
header.date = new Date(t);
|
||||
else
|
||||
header.date = null;
|
||||
if (!header.inReplyTo || !header.inReplyTo[0])
|
||||
header.inReplyTo = [ header.references[header.references.length-1] ];
|
||||
else if (header.references[header.references.length-1] != header.inReplyTo[0])
|
||||
header.references.push(header.inReplyTo[0]);
|
||||
}
|
||||
if (!header.date)
|
||||
header.date = new Date(attrs.date);
|
||||
|
||||
if (!header.from)
|
||||
{
|
||||
console.log(msgrow.headers);
|
||||
console.log(header);
|
||||
}
|
||||
delete msgrow.headers;
|
||||
msgrow.folder_id = boxId;
|
||||
msgrow.from_email = header.from && header.from.email || '';
|
||||
msgrow.from_name = header.from && header.from.name || '';
|
||||
msgrow.replyto_email = header.replyto && header.replyto.email || '';
|
||||
msgrow.replyto_name = header.replyto && header.replyto.name || '';
|
||||
msgrow.to_list = header.to && header.to[0] || '';
|
||||
msgrow.cc_list = header.cc && header.cc[0] || '';
|
||||
msgrow.bcc_list = header.bcc && header.bcc[0] || '';
|
||||
msgrow.subject = header.subject && header.subject[0] || '';
|
||||
msgrow.messageid = header['message-id'] && header['message-id'][0] || '';
|
||||
msgrow.inreplyto = header['in-reply-to'] && header['in-reply-to'][0] || '';
|
||||
msgrow.inreplyto = msgrow.inreplyto.replace(/^[\s\S]*(<[^>]*>)[\s\S]*$/, '$1');
|
||||
msgrow.subject = header.subject || '';
|
||||
msgrow.props = JSON.stringify({
|
||||
from: ((header.from||[]).map((a) => [ a.name, a.address ]))[0],
|
||||
to: (header.to||[]).map((a) => [ a.name, a.address ]),
|
||||
cc: (header.cc||[]).map((a) => [ a.name, a.address ]),
|
||||
bcc: (header.bcc||[]).map((a) => [ a.name, a.address ]),
|
||||
replyto: (header.replyTo||[]).map((a) => [ a.name, a.address ]),
|
||||
});
|
||||
msgrow.messageid = header.messageId || '';
|
||||
msgrow.inreplyto = header.inReplyTo && header.inReplyTo[0] || '';
|
||||
msgrow.time = header.date;
|
||||
msgrow.size = attrs.size;
|
||||
msgrow.flags = toPgArray(msgrow.flags);
|
||||
msgrow.refs = toPgArray(header.references);
|
||||
for (let i in msgrow)
|
||||
if (typeof msgrow[i] == 'string')
|
||||
msgrow[i] = msgrow[i].replace(/\x00/g, '');
|
||||
|
||||
var thisIsFirst = false;
|
||||
if (header.references.length)
|
||||
|
@ -441,7 +442,7 @@ Syncer.prototype.addMessage = function*(boxId, msgrow, attrs)
|
|||
}
|
||||
msgrow.thread_id = threadId;
|
||||
}
|
||||
console.log(msgrow.time+' '+msgrow.from_email+' '+msgrow.subject);
|
||||
console.log(msgrow.time+' '+(header.from && header.from[0] && header.from[0].address || '?')+' '+msgrow.subject);
|
||||
[ msgrow.id ] = yield pgtx.insert('messages', msgrow).returning('id').val(gen.ef());
|
||||
if (!msgrow.thread_id)
|
||||
{
|
||||
|
@ -469,18 +470,6 @@ Syncer.prototype.addMessage = function*(boxId, msgrow, attrs)
|
|||
}
|
||||
}
|
||||
|
||||
function splitEmails(s)
|
||||
{
|
||||
var re = /^[\s,]*(?:(?:["'](.*?)["']|([^<]+))\s*<([^>]+)>|<?([^<>]+)>?)/; // '
|
||||
var m, r = [];
|
||||
while (m = re.exec(s))
|
||||
{
|
||||
s = s.substr(m[0].length);
|
||||
r.push({ name: (m[1]||m[2]||'').trim(), email: (m[3]||m[4]||'').trim() });
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
function toPgArray(a)
|
||||
{
|
||||
a = JSON.stringify(a);
|
||||
|
|
46
SyncerWeb.js
46
SyncerWeb.js
|
@ -175,6 +175,31 @@ function rewriteCss(ast)
|
|||
}
|
||||
}
|
||||
|
||||
function sanitizeHtml(html)
|
||||
{
|
||||
let styles = '';
|
||||
html = (html||'').replace(/<style[^<>]*>([\s\S]*?)<\/style\s*>/ig, function(m, m1)
|
||||
{
|
||||
styles += m1+'\n';
|
||||
return '';
|
||||
});
|
||||
html = html.replace(/^[\s\S]*?<body[^<>]*>([\s\S]*)<\/body>[\s\S]*$/i, '$1');
|
||||
html = html.replace(/^[\s\S]*?<html[^<>]*>([\s\S]*)<\/html>[\s\S]*$/i, '$1');
|
||||
if (styles)
|
||||
{
|
||||
html = '<style>\n'+styles+'</style>\n'+html;
|
||||
styles = '';
|
||||
}
|
||||
html = htmlawed.sanitize(html||'', { safe: 1, elements: '* +style', keep_bad: 0, comment: 1 });
|
||||
html = html.replace(/<style[^>]*>([\s\S]*)<\/style\s*>/ig, function(m, m1)
|
||||
{
|
||||
var ast = css.parse(m1, { silent: true });
|
||||
rewriteCss(ast);
|
||||
return '<style>'+css.stringify(ast)+'</style>';
|
||||
});
|
||||
return html;
|
||||
}
|
||||
|
||||
function* getBody(pg, messages, boxId)
|
||||
{
|
||||
var p = new MailParser({ streamAttachments: false, defaultCharset: 'windows-1251' });
|
||||
|
@ -184,26 +209,7 @@ function* getBody(pg, messages, boxId)
|
|||
p.on('end', gen.cb());
|
||||
p.write(msg[0].headers);
|
||||
let [ obj ] = yield p.end();
|
||||
let styles = '';
|
||||
obj.html = (obj.html||'').replace(/<style[^<>]*>([\s\S]*?)<\/style\s*>/ig, function(m, m1)
|
||||
{
|
||||
styles += m1+'\n';
|
||||
return '';
|
||||
});
|
||||
obj.html = obj.html.replace(/^[\s\S]*?<body[^<>]*>([\s\S]*)<\/body>[\s\S]*$/i, '$1');
|
||||
obj.html = obj.html.replace(/^[\s\S]*?<html[^<>]*>([\s\S]*)<\/html>[\s\S]*$/i, '$1');
|
||||
if (styles)
|
||||
{
|
||||
obj.html = '<style>\n'+styles+'</style>\n'+obj.html;
|
||||
styles = '';
|
||||
}
|
||||
obj.html = htmlawed.sanitize(obj.html||'', { safe: 1, elements: '* +style', keep_bad: 0, comment: 1 });
|
||||
obj.html = obj.html.replace(/<style[^>]*>([\s\S]*)<\/style\s*>/ig, function(m, m1)
|
||||
{
|
||||
var ast = css.parse(m1, { silent: true });
|
||||
rewriteCss(ast);
|
||||
return '<style>'+css.stringify(ast)+'</style>';
|
||||
});
|
||||
obj.html = sanitizeHtml(obj.html);
|
||||
let upd = { body_text: obj.text||'', body_html: obj.html };
|
||||
upd.body_html_text = obj.html.replace(/<style[^>]*>.*<\/style\s*>|<\/?[^>]*>/g, '');
|
||||
yield pg.update('messages m', upd).where({ folder_id: boxId, uid: msg[0].uid }).run(gen.ef());
|
||||
|
|
60
db.sql
60
db.sql
|
@ -18,7 +18,6 @@ create table folders (
|
|||
uidvalidity int not null,
|
||||
account_id int not null,
|
||||
name varchar(255) not null,
|
||||
unread_count int not null,
|
||||
highestmodseq int not null default 0,
|
||||
kind varchar(255) not null,
|
||||
foreign key (account_id) references accounts (id) on delete cascade on update cascade
|
||||
|
@ -34,20 +33,12 @@ create table messages (
|
|||
inreplyto varchar(1000) not null,
|
||||
refs varchar(1000)[] not null,
|
||||
subject text not null,
|
||||
from_email varchar(255) not null,
|
||||
from_name varchar(255) not null,
|
||||
replyto_email varchar(255) not null,
|
||||
replyto_name varchar(255) not null,
|
||||
to_list text not null,
|
||||
cc_list text not null,
|
||||
bcc_list text not null,
|
||||
headers text not null,
|
||||
body_html text not null,
|
||||
body_text text not null,
|
||||
body_html_text text not null,
|
||||
text_index tsvector not null,
|
||||
props jsonb not null,
|
||||
body_html text not null default '',
|
||||
body_text text not null default '',
|
||||
body_html_text text not null default '',
|
||||
time timestamptz not null,
|
||||
size unsigned not null,
|
||||
size int not null,
|
||||
flags varchar(255)[] not null,
|
||||
foreign key (folder_id) references folders (id) on delete cascade on update cascade
|
||||
);
|
||||
|
@ -56,29 +47,23 @@ create index messages_flags on messages using gin (folder_id, flags);
|
|||
create index messages_messageid on messages (messageid);
|
||||
create index messages_refs on messages using gin (refs);
|
||||
create index messages_time on messages (folder_id, time);
|
||||
create index messages_text on messages using gin (text_index);
|
||||
create or replace function fn_messages_text_index() returns trigger
|
||||
security definer language plpgsql as $$
|
||||
create or replace function messages_fulltext(msg messages) returns tsvector
|
||||
language plpgsql immutable as $$
|
||||
begin
|
||||
NEW.text_index = (
|
||||
setweight(to_tsvector('russian', regexp_replace(NEW.from_name || ' ' || NEW.from_email || ' ' ||
|
||||
NEW.replyto_name || ' ' || NEW.replyto_email || ' ' ||
|
||||
NEW.to_list || ' ' || NEW.cc_list || ' ' || NEW.bcc_list || ' ' || NEW.subject, '\W+', ' ', 'g')), 'A') ||
|
||||
setweight(to_tsvector('russian', NEW.body_html_text || ' ' || NEW.body_text), 'B')
|
||||
);
|
||||
return NEW;
|
||||
return setweight(to_tsvector('russian', regexp_replace(
|
||||
coalesce(msg.props->>'from', '') || ' ' ||
|
||||
coalesce(msg.props->>'replyto', '') || ' ' ||
|
||||
coalesce(msg.props->>'to', '') || ' ' ||
|
||||
coalesce(msg.props->>'cc', '') || ' ' ||
|
||||
coalesce(msg.props->>'bcc', '') || ' ' ||
|
||||
coalesce(msg.props->>'attachments', '') || ' ' ||
|
||||
msg.subject,
|
||||
'\W+', ' ', 'g'
|
||||
)), 'A')
|
||||
|| setweight(to_tsvector('russian', msg.body_html_text || ' ' || msg.body_text), 'B');
|
||||
end
|
||||
$$;
|
||||
create trigger messages_text_index before insert or update on messages
|
||||
for each row execute procedure fn_messages_text_index();
|
||||
|
||||
create table attachments (
|
||||
id serial not null primary key,
|
||||
msg_id int not null,
|
||||
ctype varchar(255) not null,
|
||||
size unsigned not null,
|
||||
foreign key (msg_id) references messages (id) on delete cascade on update cascade
|
||||
);
|
||||
create index messages_text on messages using gin (messages_fulltext(messages));
|
||||
|
||||
create table threads (
|
||||
id serial not null primary key,
|
||||
|
@ -90,6 +75,7 @@ create index threads_first_msg on threads (first_msg);
|
|||
|
||||
alter table messages add foreign key (thread_id) references threads (id) on delete restrict on update cascade;
|
||||
|
||||
--create table tt as with recursive t (id, messageid, upperid, uppermsg) as (select (array_agg(m1.id))[0], m1.messageid, (array_agg(m1.id))[1], m1.messageid from messages m1 left join messages m2 on m1.messageid!='' and m1.inreplyto!='' and m2.messageid=m1.inreplyto where m2.id is null group by m1.messageid union select m1.id, m1.messageid, t.upperid, t.uppermsg from messages m1 inner join t on m1.inreplyto!='' and m1.inreplyto=t.messageid where m1.messageid!='') select * from t;
|
||||
|
||||
--alter table messages alter flags type varchar(255)[] using (case when flags&1=1 then array['recent'] else array[]::varchar(255)[] end) || (case when flags&2=2 then array['flagged'] else array[]::varchar(255)[] end) || (case when flags&4=4 then array['answered'] else array[]::varchar(255)[] end) || (case when flags&8=8 then array['unread'] else array[]::varchar(255)[] end);
|
||||
alter table accounts owner to operetta;
|
||||
alter table folders owner to operetta;
|
||||
alter table messages owner to operetta;
|
||||
alter table threads owner to operetta;
|
||||
|
|
|
@ -42,7 +42,7 @@ var syncerweb = new SyncerWeb(syncer, pg, cfg);
|
|||
gen.run(function*()
|
||||
{
|
||||
yield* syncer.init(cfg);
|
||||
//yield* syncer.syncAll();
|
||||
yield* syncer.syncAll();
|
||||
});
|
||||
|
||||
syncerweb.listen(8057);
|
||||
|
|
Loading…
Reference in New Issue