bugzilla-4intranet/Bugzilla/Migrate/Gnats.pm

710 lines
23 KiB
Perl

# -*- Mode: perl; indent-tabs-mode: nil -*-
#
# The contents of this file are subject to the Mozilla Public
# License Version 1.1 (the "License"); you may not use this file
# except in compliance with the License. You may obtain a copy of
# the License at http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS
# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
# implied. See the License for the specific language governing
# rights and limitations under the License.
#
# The Original Code is The Bugzilla Migration Tool.
#
# The Initial Developer of the Original Code is Lambda Research
# Corporation. Portions created by the Initial Developer are Copyright
# (C) 2009 the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Max Kanat-Alexander <mkanat@bugzilla.org>
package Bugzilla::Migrate::Gnats;
use strict;
use base qw(Bugzilla::Migrate);
use Bugzilla::Constants;
use Bugzilla::Install::Util qw(indicate_progress);
use Bugzilla::Util qw(format_time trim generate_random_password lsearch);
use Email::Address;
use Email::MIME;
use File::Basename;
use IO::File;
use List::Util qw(first);
use constant REQUIRED_MODULES => [
{
package => 'Email-Simple-FromHandle',
module => 'Email::Simple::FromHandle',
# This version added seekable handles.
version => 0.050,
},
];
use constant FIELD_MAP => {
'Number' => 'bug_id',
'Category' => 'product',
'Synopsis' => 'short_desc',
'Responsible' => 'assigned_to',
'State' => 'bug_status',
'Class' => 'cf_type',
'Classification' => '',
'Originator' => 'reporter',
'Arrival-Date' => 'creation_ts',
'Last-Modified' => 'delta_ts',
'Release' => 'version',
'Severity' => 'bug_severity',
'Description' => 'comment',
};
use constant VALUE_MAP => {
bug_severity => {
'serious' => 'major',
'cosmetic' => 'trivial',
'new-feature' => 'enhancement',
'non-critical' => 'normal',
},
bug_status => {
'open' => 'NEW',
'analyzed' => 'ASSIGNED',
'suspended' => 'RESOLVED',
'feedback' => 'RESOLVED',
'released' => 'VERIFIED',
},
bug_status_resolution => {
'feedback' => 'FIXED',
'released' => 'FIXED',
'closed' => 'FIXED',
'suspended' => 'LATER',
},
priority => {
'medium' => 'Normal',
},
};
use constant GNATS_CONFIG_VARS => (
{
name => 'gnats_path',
default => '/var/lib/gnats',
desc => <<END,
# The path to the directory that contains the GNATS database.
END
},
{
name => 'default_email_domain',
default => 'example.com',
desc => <<'END',
# Some GNATS users do not have full email addresses, but Bugzilla requires
# every user to have an email address. What domain should be appended to
# usernames that don't have emails, to make them into email addresses?
# (For example, if you leave this at the default, "unknown" would become
# "unknown@example.com".)
END
},
{
name => 'component_name',
default => 'General',
desc => <<'END',
# GNATS has only "Category" to classify bugs. However, Bugzilla has a
# multi-level system of Products that contain Components. When importing
# GNATS categories, they become a Product with one Component. What should
# the name of that Component be?
END
},
{
name => 'version_regex',
default => '',
desc => <<'END',
# In GNATS, the "version" field can contain almost anything. However, in
# Bugzilla, it's a drop-down, so you don't want too many choices in there.
# If you specify a regular expression here, versions will be tested against
# this regular expression, and if they match, the first match (the first set
# of parentheses in the regular expression, also called "$1") will be used
# as the version value for the bug instead of the full version value specified
# in GNATS.
END
},
{
name => 'default_originator',
default => 'gnats-admin',
desc => <<'END',
# Sometimes, a PR has no valid Originator, so we fall back to the From
# header of the email. If the From header also isn't a valid username
# (is just a name with spaces in it--we can't convert that to an email
# address) then this username (which can either be a GNATS username or an
# email address) will be considered to be the Originator of the PR.
END
}
);
sub CONFIG_VARS {
my $self = shift;
my @vars = (GNATS_CONFIG_VARS, $self->SUPER::CONFIG_VARS);
my $field_map = first { $_->{name} eq 'translate_fields' } @vars;
$field_map->{default} = FIELD_MAP;
my $value_map = first { $_->{name} eq 'translate_values' } @vars;
$value_map->{default} = VALUE_MAP;
return @vars;
}
# Directories that aren't projects, or that we shouldn't be parsing
use constant SKIP_DIRECTORIES => qw(
gnats-adm
gnats-queue
pending
);
use constant NON_COMMENT_FIELDS => qw(
Audit-Trail
Closed-Date
Confidential
Unformatted
attachments
);
# Certain fields can contain things that look like fields in them,
# because they might contain quoted emails. To avoid mis-parsing,
# we list out here the exact order of fields at the end of a PR
# and wait for the next field to consider that we actually have
# a field to parse.
use constant END_FIELD_ORDER => [qw(
Description
How-To-Repeat
Fix
Release-Note
Audit-Trail
Unformatted
)];
use constant CUSTOM_FIELDS => {
cf_type => {
type => FIELD_TYPE_SINGLE_SELECT,
description => 'Type',
},
};
use constant FIELD_REGEX => qr/^>(\S+):\s*(.*)$/;
# Used for bugs that have no Synopsis.
use constant NO_SUBJECT => "(no subject)";
# This is the divider that GNATS uses between attachments in its database
# files. It's missign two hyphens at the beginning because MIME Emails use
# -- to start boundaries.
use constant GNATS_BOUNDARY => '----gnatsweb-attachment----';
use constant LONG_VERSION_LENGTH => 32;
#########
# Hooks #
#########
sub before_insert {
my $self = shift;
# gnats_id isn't a valid User::create field, and we don't need it
# anymore now.
delete $_->{gnats_id} foreach @{ $self->users };
# Grab a version out of a bug for each product, so that there is a
# valid "version" argument for Bugzilla::Product->create.
foreach my $product (@{ $self->products }) {
my $bug = first { $_->{product} eq $product->{name} and $_->{version} }
@{ $self->bugs };
if (defined $bug) {
$product->{version} = $bug->{version};
}
else {
$product->{version} = 'unspecified';
}
}
}
#########
# Users #
#########
sub _read_users {
my $self = shift;
my $path = $self->config('gnats_path');
my $file = "$path/gnats-adm/responsible";
$self->debug("Reading users from $file");
my $default_domain = $self->config('default_email_domain');
open(my $users_fh, '<', $file) || die "$file: $!";
my @users;
foreach my $line (<$users_fh>) {
$line = trim($line);
next if $line =~ /^#/;
my ($id, $name, $email) = split(':', $line, 3);
$email ||= "$id\@$default_domain";
# We can't call our own translate_value, because that depends on
# the existence of user_map, which doesn't exist until after
# this method. However, we still want to translate any users found.
$email = $self->SUPER::translate_value('user', $email);
push(@users, { realname => $name, login_name => $email,
gnats_id => $id });
}
close($users_fh);
return \@users;
}
sub user_map {
my $self = shift;
$self->{user_map} ||= { map { $_->{gnats_id} => $_->{login_name} }
@{ $self->users } };
return $self->{user_map};
}
sub add_user {
my ($self, $id, $email) = @_;
return if defined $self->user_map->{$id};
$self->user_map->{$id} = $email;
push(@{ $self->users }, { login_name => $email, gnats_id => $id });
}
sub user_to_email {
my ($self, $value) = @_;
if (defined $self->user_map->{$value}) {
$value = $self->user_map->{$value};
}
elsif ($value !~ /@/) {
my $domain = $self->config('default_email_domain');
$value = "$value\@$domain";
}
return $value;
}
############
# Products #
############
sub _read_products {
my $self = shift;
my $path = $self->config('gnats_path');
my $file = "$path/gnats-adm/categories";
$self->debug("Reading categories from $file");
open(my $categories_fh, '<', $file) || die "$file: $!";
my @products;
foreach my $line (<$categories_fh>) {
$line = trim($line);
next if $line =~ /^#/;
my ($name, $description, $assigned_to, $cc) = split(':', $line, 4);
my %product = ( name => $name, description => $description );
my @initial_cc = split(',', $cc);
@initial_cc = @{ $self->translate_value('user', \@initial_cc) };
$assigned_to = $self->translate_value('user', $assigned_to);
my %component = ( name => $self->config('component_name'),
description => $description,
initialowner => $assigned_to,
initial_cc => \@initial_cc );
$product{components} = [\%component];
push(@products, \%product);
}
close($categories_fh);
return \@products;
}
################
# Reading Bugs #
################
sub _read_bugs {
my $self = shift;
my $path = $self->config('gnats_path');
my @directories = glob("$path/*");
my @bugs;
foreach my $directory (@directories) {
next if !-d $directory;
my $name = basename($directory);
next if grep($_ eq $name, SKIP_DIRECTORIES);
push(@bugs, @{ $self->_parse_project($directory) });
}
@bugs = sort { $a->{Number} <=> $b->{Number} } @bugs;
return \@bugs;
}
sub _parse_project {
my ($self, $directory) = @_;
my @files = glob("$directory/*");
$self->debug("Reading Project: $directory");
# Sometimes other files get into gnats directories.
@files = grep { basename($_) =~ /^\d+$/ } @files;
my @bugs;
my $count = 1;
my $total = scalar @files;
print basename($directory) . ":\n";
foreach my $file (@files) {
push(@bugs, $self->_parse_bug_file($file));
if (!$self->verbose) {
indicate_progress({ current => $count++, every => 5,
total => $total });
}
}
return \@bugs;
}
sub _parse_bug_file {
my ($self, $file) = @_;
$self->debug("Reading $file");
open(my $fh, "<", $file) || die "$file: $!";
my $email = Email::Simple::FromHandle->new($fh);
my $fields = $self->_get_gnats_field_data($email);
# We parse attachments here instead of during translate_bug,
# because otherwise we'd be taking up huge amounts of memory storing
# all the raw attachment data in memory.
$fields->{attachments} = $self->_parse_attachments($fields);
close($fh);
return $fields;
}
sub _get_gnats_field_data {
my ($self, $email) = @_;
my ($current_field, @value_lines, %fields);
$email->reset_handle();
my $handle = $email->handle;
foreach my $line (<$handle>) {
# If this line starts a field name
if ($line =~ FIELD_REGEX) {
my ($new_field, $rest_of_line) = ($1, $2);
# If this is one of the last few PR fields, then make sure
# that we're getting our fields in the right order.
my $new_field_valid = 1;
my $current_field_pos =
lsearch(END_FIELD_ORDER, $current_field || '');
if ($current_field_pos > -1) {
my $new_field_pos = lsearch(END_FIELD_ORDER, $new_field);
# We accept any field, as long as it's later than this one.
$new_field_valid = $new_field_pos > $current_field_pos ? 1 : 0;
}
if ($new_field_valid) {
if ($current_field) {
$fields{$current_field} = _handle_lines(\@value_lines);
@value_lines = ();
}
$current_field = $new_field;
$line = $rest_of_line;
}
}
push(@value_lines, $line) if defined $line;
}
$fields{$current_field} = _handle_lines(\@value_lines);
$fields{cc} = [$email->header('Cc')] if $email->header('Cc');
# If the Originator is invalid and we don't have a translation for it,
# use the From header instead.
my $originator = $self->translate_value('reporter', $fields{Originator},
{ check_only => 1 });
if ($originator !~ Bugzilla->params->{emailregexp}) {
# We use the raw header sometimes, because it looks like "From: user"
# which Email::Address won't parse but we can still use.
my $address = $email->header('From');
my ($parsed) = Email::Address->parse($address);
if ($parsed) {
$address = $parsed->address;
}
if ($address) {
$self->debug(
"PR $fields{Number} had an Originator that was not a valid"
. " user ($fields{Originator}). Using From ($address)"
. " instead.\n");
my $address_email = $self->translate_value('reporter', $address,
{ check_only => 1 });
if ($address_email !~ Bugzilla->params->{emailregexp}) {
$self->debug(" From was also invalid, using default_originator.\n");
$address = $self->config('default_originator');
}
$fields{Originator} = $address;
}
}
$self->debug(\%fields, 3);
return \%fields;
}
sub _handle_lines {
my ($lines) = @_;
my $value = join('', @$lines);
$value =~ s/\s+$//;
return $value;
}
####################
# Translating Bugs #
####################
sub translate_bug {
my ($self, $fields) = @_;
my ($bug, $other_fields) = $self->SUPER::translate_bug($fields);
$bug->{attachments} = delete $other_fields->{attachments};
if (defined $other_fields->{_add_to_comment}) {
$bug->{comment} .= delete $other_fields->{_add_to_comment};
}
my ($changes, $extra_comment) =
$self->_parse_audit_trail($bug, $other_fields->{'Audit-Trail'});
my @comments;
foreach my $change (@$changes) {
if (exists $change->{comment}) {
push(@comments, {
thetext => $change->{comment},
who => $change->{who},
bug_when => $change->{bug_when} });
delete $change->{comment};
}
}
$bug->{history} = $changes;
if (trim($extra_comment)) {
push(@comments, { thetext => $extra_comment, who => $bug->{reporter},
bug_when => $bug->{delta_ts} || $bug->{creation_ts} });
}
$bug->{comments} = \@comments;
$bug->{component} = $self->config('component_name');
if (!$bug->{short_desc}) {
$bug->{short_desc} = NO_SUBJECT;
}
foreach my $attachment (@{ $bug->{attachments} || [] }) {
$attachment->{submitter} = $bug->{reporter};
$attachment->{creation_ts} = $bug->{creation_ts};
}
$self->debug($bug, 3);
return $bug;
}
sub _parse_audit_trail {
my ($self, $bug, $audit_trail) = @_;
return [] if !trim($audit_trail);
$self->debug(" Parsing audit trail...", 2);
if ($audit_trail !~ /^\S+-Changed-\S+:/ms) {
# This is just a comment from the bug's creator.
$self->debug(" Audit trail is just a comment.", 2);
return ([], $audit_trail);
}
my (@changes, %current_data, $current_column, $on_why);
my $extra_comment = '';
my $current_field;
my @all_lines = split("\n", $audit_trail);
foreach my $line (@all_lines) {
# GNATS history looks like:
# Status-Changed-From-To: open->closed
# Status-Changed-By: jack
# Status-Changed-When: Mon May 12 14:46:59 2003
# Status-Changed-Why:
# This is some comment here about the change.
if ($line =~ /^(\S+)-Changed-(\S+):(.*)/) {
my ($field, $column, $value) = ($1, $2, $3);
my $bz_field = $self->translate_field($field);
# If it's not a field we're importing, we don't care about
# its history.
next if !$bz_field;
# GNATS doesn't track values for description changes,
# unfortunately, and that's the only information we'd be able to
# use in Bugzilla for the audit trail on that field.
next if $bz_field eq 'comment';
$current_field = $bz_field if !$current_field;
if ($bz_field ne $current_field) {
$self->_store_audit_change(
\@changes, $current_field, \%current_data);
%current_data = ();
$current_field = $bz_field;
}
$value = trim($value);
$self->debug(" $bz_field $column: $value", 3);
if ($column eq 'From-To') {
my ($from, $to) = split('->', $value, 2);
# Sometimes there's just a - instead of a -> between the values.
if (!defined($to)) {
($from, $to) = split('-', $value, 2);
}
$current_data{added} = $to;
$current_data{removed} = $from;
}
elsif ($column eq 'By') {
my $email = $self->translate_value('user', $value);
# Sometimes we hit users in the audit trail that we haven't
# seen anywhere else.
$current_data{who} = $email;
}
elsif ($column eq 'When') {
$current_data{bug_when} = $self->parse_date($value);
}
if ($column eq 'Why') {
$value = '' if !defined $value;
$current_data{comment} = $value;
$on_why = 1;
}
else {
$on_why = 0;
}
}
elsif ($on_why) {
# "Why" lines are indented four characters.
$line =~ s/^\s{4}//;
$current_data{comment} .= "$line\n";
}
else {
$self->debug(
"Extra Audit-Trail line on $bug->{product} $bug->{bug_id}:"
. " $line\n", 2);
$extra_comment .= "$line\n";
}
}
$self->_store_audit_change(\@changes, $current_field, \%current_data);
return (\@changes, $extra_comment);
}
sub _store_audit_change {
my ($self, $changes, $old_field, $current_data) = @_;
$current_data->{field} = $old_field;
$current_data->{removed} =
$self->translate_value($old_field, $current_data->{removed});
$current_data->{added} =
$self->translate_value($old_field, $current_data->{added});
push(@$changes, { %$current_data });
}
sub _parse_attachments {
my ($self, $fields) = @_;
my $unformatted = delete $fields->{'Unformatted'};
my $gnats_boundary = GNATS_BOUNDARY;
# A sanity checker to make sure that we're parsing attachments right.
my $num_attachments = 0;
$num_attachments++ while ($unformatted =~ /\Q$gnats_boundary\E/g);
# Sometimes there's a GNATS_BOUNDARY that is on the same line as other data.
$unformatted =~ s/(\S\s*)\Q$gnats_boundary\E$/$1\n$gnats_boundary/mg;
# Often the "Unformatted" section starts with stuff before
# ----gnatsweb-attachment---- that isn't necessary.
$unformatted =~ s/^\s*From:.+?Reply-to:[^\n]+//s;
$unformatted = trim($unformatted);
return [] if !$unformatted;
$self->debug('Reading attachments...', 2);
my $boundary = generate_random_password(48);
$unformatted =~ s/\Q$gnats_boundary\E/--$boundary/g;
# Sometimes the whole Unformatted section is indented by exactly
# one space, and needs to be fixed.
if ($unformatted =~ /--\Q$boundary\E\n /) {
$unformatted =~ s/^ //mg;
}
$unformatted = <<END;
From: nobody
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="$boundary"
This is a multi-part message in MIME format.
--$boundary
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
$unformatted
--$boundary--
END
my $email = new Email::MIME(\$unformatted);
my @parts = $email->parts;
# Remove the fake body.
my $part1 = shift @parts;
if ($part1->body) {
$self->debug(" Additional Unformatted data found on "
. $fields->{Category} . " bug " . $fields->{Number});
$self->debug($part1->body, 3);
$fields->{_add_comment} .= "\n\nUnformatted:\n" . $part1->body;
}
my @attachments;
foreach my $part (@parts) {
$self->debug(' Parsing attachment: ' . $part->filename);
my $temp_fh = IO::File->new_tmpfile or die ("Can't create tempfile: $!");
$temp_fh->binmode;
print $temp_fh $part->body;
my $content_type = $part->content_type;
$content_type =~ s/; name=.+$//;
my $attachment = { filename => $part->filename,
description => $part->filename,
mimetype => $content_type,
data => $temp_fh };
$self->debug($attachment, 3);
push(@attachments, $attachment);
}
if (scalar(@attachments) ne $num_attachments) {
warn "WARNING: Expected $num_attachments attachments but got "
. scalar(@attachments) . "\n" ;
$self->debug($unformatted, 3);
}
return \@attachments;
}
sub translate_value {
my $self = shift;
my ($field, $value, $options) = @_;
my $original_value = $value;
$options ||= {};
if (!ref($value) and grep($_ eq $field, $self->USER_FIELDS)) {
if ($value =~ /(\S+\@\S+)/) {
$value = $1;
$value =~ s/^<//;
$value =~ s/>$//;
}
else {
# Sometimes names have extra stuff on the end like "(Somebody's Name)"
$value =~ s/\s+\(.+\)$//;
# Sometimes user fields look like "(user)" instead of just "user".
$value =~ s/^\((.+)\)$/$1/;
$value = trim($value);
}
}
if ($field eq 'version' and $value ne '') {
my $version_re = $self->config('version_regex');
if ($version_re and $value =~ $version_re) {
$value = $1;
}
# In the GNATS that I tested this with, there were many extremely long
# values for "version" that caused some import problems (they were
# longer than the max allowed version value). So if the version value
# is longer than 32 characters, pull out the first thing that looks
# like a version number.
elsif (length($value) > LONG_VERSION_LENGTH) {
$value =~ s/^.+?\b(\d[\w\.]+)\b.+$/$1/;
}
}
my @args = @_;
$args[1] = $value;
$value = $self->SUPER::translate_value(@args);
return $value if ref $value;
if (grep($_ eq $field, $self->USER_FIELDS)) {
my $from_value = $value;
$value = $self->user_to_email($value);
$args[1] = $value;
# If we got something new from user_to_email, do any necessary
# translation of it.
$value = $self->SUPER::translate_value(@args);
if (!$options->{check_only}) {
$self->add_user($from_value, $value);
}
}
return $value;
}
1;