1#!/usr/bin/perl -w
2# (c) 2007, Joe Perches <joe@perches.com>
3#           created from checkpatch.pl
4#
5# Print selected MAINTAINERS information for
6# the files modified in a patch or for a file
7#
8# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
9#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
10#
11# Licensed under the terms of the GNU GPL License version 2
12
13use strict;
14
15my $P = $0;
16my $V = '0.26-xen';
17
18use Getopt::Long qw(:config no_auto_abbrev);
19
20my $xen_path = "./";
21my $email = 1;
22my $email_usename = 1;
23my $email_maintainer = 1;
24my $email_reviewer = 1;
25my $email_list = 1;
26my $email_subscriber_list = 0;
27my $email_git_penguin_chiefs = 0;
28my $email_git = 0;
29my $email_git_all_signature_types = 0;
30my $email_git_blame = 0;
31my $email_git_blame_signatures = 1;
32my $email_git_fallback = 0;
33my $email_git_min_signatures = 1;
34my $email_git_max_maintainers = 5;
35my $email_git_min_percent = 5;
36my $email_git_since = "1-year-ago";
37my $email_hg_since = "-365";
38my $interactive = 0;
39my $email_remove_duplicates = 0;
40my $email_use_mailmap = 1;
41my $email_drop_the_rest_supporter_if_supporter_found = 1;
42my $output_multiline = 1;
43my $output_separator = ", ";
44my $output_roles = 0;
45my $output_rolestats = 0;
46my $scm = 0;
47my $web = 0;
48my $subsystem = 0;
49my $status = 0;
50my $keywords = 1;
51my $sections = 0;
52my $file_emails = 0;
53my $from_filename = 0;
54my $pattern_depth = 0;
55my $version = 0;
56my $help = 0;
57
58my $vcs_used = 0;
59
60my $exit = 0;
61
62my %commit_author_hash;
63my %commit_signer_hash;
64
65my @penguin_chief = ();
66push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
67#Andrew wants in on most everything - 2009/01/14
68#push(@penguin_chief, "Andrew Morton:akpm\@linux-foundation.org");
69
70my @penguin_chief_names = ();
71foreach my $chief (@penguin_chief) {
72    if ($chief =~ m/^(.*):(.*)/) {
73	my $chief_name = $1;
74	my $chief_addr = $2;
75	push(@penguin_chief_names, $chief_name);
76    }
77}
78my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
79
80# Signature types of people who are either
81# 	a) responsible for the code in question, or
82# 	b) familiar enough with it to give relevant feedback
83my @signature_tags = ();
84push(@signature_tags, "Signed-off-by:");
85push(@signature_tags, "Reviewed-by:");
86push(@signature_tags, "Acked-by:");
87
88my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
89
90# rfc822 email address - preloaded methods go here.
91my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
92my $rfc822_char = '[\\000-\\377]';
93
94# VCS command support: class-like functions and strings
95
96my %VCS_cmds;
97
98my %VCS_cmds_git = (
99    "execute_cmd" => \&git_execute_cmd,
100    "available" => '(which("git") ne "") && (-d ".git")',
101    "find_signers_cmd" =>
102	"git log --no-color --follow --since=\$email_git_since " .
103	    '--format="GitCommit: %H%n' .
104		      'GitAuthor: %an <%ae>%n' .
105		      'GitDate: %aD%n' .
106		      'GitSubject: %s%n' .
107		      '%b%n"' .
108	    " -- \$file",
109    "find_commit_signers_cmd" =>
110	"git log --no-color " .
111	    '--format="GitCommit: %H%n' .
112		      'GitAuthor: %an <%ae>%n' .
113		      'GitDate: %aD%n' .
114		      'GitSubject: %s%n' .
115		      '%b%n"' .
116	    " -1 \$commit",
117    "find_commit_author_cmd" =>
118	"git log --no-color " .
119	    '--format="GitCommit: %H%n' .
120		      'GitAuthor: %an <%ae>%n' .
121		      'GitDate: %aD%n' .
122		      'GitSubject: %s%n"' .
123	    " -1 \$commit",
124    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
125    "blame_file_cmd" => "git blame -l \$file",
126    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
127    "blame_commit_pattern" => "^([0-9a-f]+) ",
128    "author_pattern" => "^GitAuthor: (.*)",
129    "subject_pattern" => "^GitSubject: (.*)",
130);
131
132my %VCS_cmds_hg = (
133    "execute_cmd" => \&hg_execute_cmd,
134    "available" => '(which("hg") ne "") && (-d ".hg")',
135    "find_signers_cmd" =>
136	"hg log --date=\$email_hg_since " .
137	    "--template='HgCommit: {node}\\n" .
138	                "HgAuthor: {author}\\n" .
139			"HgSubject: {desc}\\n'" .
140	    " -- \$file",
141    "find_commit_signers_cmd" =>
142	"hg log " .
143	    "--template='HgSubject: {desc}\\n'" .
144	    " -r \$commit",
145    "find_commit_author_cmd" =>
146	"hg log " .
147	    "--template='HgCommit: {node}\\n" .
148		        "HgAuthor: {author}\\n" .
149			"HgSubject: {desc|firstline}\\n'" .
150	    " -r \$commit",
151    "blame_range_cmd" => "",		# not supported
152    "blame_file_cmd" => "hg blame -n \$file",
153    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
154    "blame_commit_pattern" => "^([ 0-9a-f]+):",
155    "author_pattern" => "^HgAuthor: (.*)",
156    "subject_pattern" => "^HgSubject: (.*)",
157);
158
159my $conf = which_conf(".get_maintainer.conf");
160if (-f $conf) {
161    my @conf_args;
162    open(my $conffile, '<', "$conf")
163	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
164
165    while (<$conffile>) {
166	my $line = $_;
167
168	$line =~ s/\s*\n?$//g;
169	$line =~ s/^\s*//g;
170	$line =~ s/\s+/ /g;
171
172	next if ($line =~ m/^\s*#/);
173	next if ($line =~ m/^\s*$/);
174
175	my @words = split(" ", $line);
176	foreach my $word (@words) {
177	    last if ($word =~ m/^#/);
178	    push (@conf_args, $word);
179	}
180    }
181    close($conffile);
182    unshift(@ARGV, @conf_args) if @conf_args;
183}
184
185if (!GetOptions(
186		'email!' => \$email,
187		'git!' => \$email_git,
188		'git-all-signature-types!' => \$email_git_all_signature_types,
189		'git-blame!' => \$email_git_blame,
190		'git-blame-signatures!' => \$email_git_blame_signatures,
191		'git-fallback!' => \$email_git_fallback,
192		'git-chief-penguins!' => \$email_git_penguin_chiefs,
193		'git-min-signatures=i' => \$email_git_min_signatures,
194		'git-max-maintainers=i' => \$email_git_max_maintainers,
195		'git-min-percent=i' => \$email_git_min_percent,
196		'git-since=s' => \$email_git_since,
197		'hg-since=s' => \$email_hg_since,
198		'i|interactive!' => \$interactive,
199		'remove-duplicates!' => \$email_remove_duplicates,
200		'mailmap!' => \$email_use_mailmap,
201		'drop_the_rest_supporter!' => \$email_drop_the_rest_supporter_if_supporter_found,
202		'm!' => \$email_maintainer,
203		'r!' => \$email_reviewer,
204		'n!' => \$email_usename,
205		'l!' => \$email_list,
206		's!' => \$email_subscriber_list,
207		'multiline!' => \$output_multiline,
208		'roles!' => \$output_roles,
209		'rolestats!' => \$output_rolestats,
210		'separator=s' => \$output_separator,
211		'subsystem!' => \$subsystem,
212		'status!' => \$status,
213		'scm!' => \$scm,
214		'web!' => \$web,
215		'pattern-depth=i' => \$pattern_depth,
216		'k|keywords!' => \$keywords,
217		'sections!' => \$sections,
218		'fe|file-emails!' => \$file_emails,
219		'f|file' => \$from_filename,
220		'v|version' => \$version,
221		'h|help|usage' => \$help,
222		)) {
223    die "$P: invalid argument - use --help if necessary\n";
224}
225
226if ($help != 0) {
227    usage();
228    exit 0;
229}
230
231if ($version != 0) {
232    print("${P} ${V}\n");
233    exit 0;
234}
235
236if (-t STDIN && !@ARGV) {
237    # We're talking to a terminal, but have no command line arguments.
238    die "$P: missing patchfile or -f file - use --help if necessary\n";
239}
240
241$output_multiline = 0 if ($output_separator ne ", ");
242$output_rolestats = 1 if ($interactive);
243$output_roles = 1 if ($output_rolestats);
244
245if ($sections) {
246    $email = 0;
247    $email_list = 0;
248    $scm = 0;
249    $status = 0;
250    $subsystem = 0;
251    $web = 0;
252    $keywords = 0;
253    $interactive = 0;
254} else {
255    my $selections = $email + $scm + $status + $subsystem + $web;
256    if ($selections == 0) {
257	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
258    }
259}
260
261if ($email &&
262    ($email_maintainer + $email_reviewer +
263     $email_list + $email_subscriber_list +
264     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
265    die "$P: Please select at least 1 email option\n";
266}
267
268## Read MAINTAINERS for type/value pairs
269
270my @typevalue = ();
271my %keyword_hash;
272
273open (my $maint, '<', "${xen_path}MAINTAINERS")
274    or die "$P: Can't open MAINTAINERS: $!\n";
275while (<$maint>) {
276    my $line = $_;
277
278    if ($line =~ m/^([A-Z]):\s*(.*)/) {
279	my $type = $1;
280	my $value = $2;
281
282	##Filename pattern matching
283	if ($type eq "F" || $type eq "X") {
284	    # Bash brace expansion, not nested
285	    # match {,*,*} and transform ',' to '|' one by one.
286	    # While there is more than one ',', convert one to '|'.
287	    while ($value =~ s/([^\\])\{(|[^},]*[^,\\]),((|[^},]*[^,\\]),(|[^}]*[^\\]))\}/$1\{$2|$3\}/g) {
288	    }
289	    $value =~ s/([^\\])\{(|[^},]*[^,\\]),(|[^}]*[^\\])\}/$1($2|$3)/g;
290
291	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
292	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
293	    $value =~ s/\?/\./g;         ##Convert ? to .
294	    ##if pattern is a directory and it lacks a trailing slash, add one
295	    if ((-d $value)) {
296		$value =~ s@([^/])$@$1/@;
297	    }
298	} elsif ($type eq "K") {
299	    $keyword_hash{@typevalue} = $value;
300	}
301	push(@typevalue, "$type:$value");
302    } elsif (!/^(\s)*$/) {
303	$line =~ s/\n$//g;
304	push(@typevalue, $line);
305    }
306}
307close($maint);
308
309# Check whether we have a V entry under the REST
310# and use it to get the file's version number
311my $maintainers_file_version = get_xen_maintainers_file_version();
312if (!$maintainers_file_version) {
313    die "$P: the MAINTAINERS file ".
314         "in the current directory does not appear to be from ".
315         "the xen.git source tree or a sister tree.\n\n".
316         "A 'V: xen-maintainers-<version>' entry under THE REST ".
317         "is needed to identify a Xen MAINTAINERS file.\n\n";
318}
319
320#
321# Read mail address map
322#
323
324my $mailmap;
325
326read_mailmap();
327
328sub read_mailmap {
329    $mailmap = {
330	names => {},
331	addresses => {}
332    };
333
334    return if (!$email_use_mailmap || !(-f "${xen_path}.mailmap"));
335
336    open(my $mailmap_file, '<', "${xen_path}.mailmap")
337	or warn "$P: Can't open .mailmap: $!\n";
338
339    while (<$mailmap_file>) {
340	s/#.*$//; #strip comments
341	s/^\s+|\s+$//g; #trim
342
343	next if (/^\s*$/); #skip empty lines
344	#entries have one of the following formats:
345	# name1 <mail1>
346	# <mail1> <mail2>
347	# name1 <mail1> <mail2>
348	# name1 <mail1> name2 <mail2>
349	# (see man git-shortlog)
350
351	if (/^([^<]+)<([^>]+)>$/) {
352	    my $real_name = $1;
353	    my $address = $2;
354
355	    $real_name =~ s/\s+$//;
356	    ($real_name, $address) = parse_email("$real_name <$address>");
357	    $mailmap->{names}->{$address} = $real_name;
358
359	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
360	    my $real_address = $1;
361	    my $wrong_address = $2;
362
363	    $mailmap->{addresses}->{$wrong_address} = $real_address;
364
365	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
366	    my $real_name = $1;
367	    my $real_address = $2;
368	    my $wrong_address = $3;
369
370	    $real_name =~ s/\s+$//;
371	    ($real_name, $real_address) =
372		parse_email("$real_name <$real_address>");
373	    $mailmap->{names}->{$wrong_address} = $real_name;
374	    $mailmap->{addresses}->{$wrong_address} = $real_address;
375
376	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
377	    my $real_name = $1;
378	    my $real_address = $2;
379	    my $wrong_name = $3;
380	    my $wrong_address = $4;
381
382	    $real_name =~ s/\s+$//;
383	    ($real_name, $real_address) =
384		parse_email("$real_name <$real_address>");
385
386	    $wrong_name =~ s/\s+$//;
387	    ($wrong_name, $wrong_address) =
388		parse_email("$wrong_name <$wrong_address>");
389
390	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
391	    $mailmap->{names}->{$wrong_email} = $real_name;
392	    $mailmap->{addresses}->{$wrong_email} = $real_address;
393	}
394    }
395    close($mailmap_file);
396}
397
398## use the filenames on the command line or find the filenames in the patchfiles
399
400my @files = ();
401my @range = ();
402my @keyword_tvi = ();
403my @file_emails = ();
404
405if (!@ARGV) {
406    push(@ARGV, "&STDIN");
407}
408
409foreach my $file (@ARGV) {
410    if ($file ne "&STDIN") {
411	##if $file is a directory and it lacks a trailing slash, add one
412	if ((-d $file)) {
413	    $file =~ s@([^/])$@$1/@;
414	} elsif (!(-f $file)) {
415	    die "$P: file '${file}' not found\n";
416	}
417    }
418    if ($from_filename) {
419	push(@files, $file);
420	if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) {
421	    open(my $f, '<', $file)
422		or die "$P: Can't open $file: $!\n";
423	    my $text = do { local($/) ; <$f> };
424	    close($f);
425	    if ($keywords) {
426		foreach my $line (keys %keyword_hash) {
427		    if ($text =~ m/$keyword_hash{$line}/x) {
428			push(@keyword_tvi, $line);
429		    }
430		}
431	    }
432	    if ($file_emails) {
433		my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
434		push(@file_emails, clean_file_emails(@poss_addr));
435	    }
436	}
437    } else {
438	my $file_cnt = @files;
439	my $lastfile;
440
441	open(my $patch, "< $file")
442	    or die "$P: Can't open $file: $!\n";
443
444	# We can check arbitrary information before the patch
445	# like the commit message, mail headers, etc...
446	# This allows us to match arbitrary keywords against any part
447	# of a git format-patch generated file (subject tags, etc...)
448
449	my $patch_prefix = "";			#Parsing the intro
450
451	while (<$patch>) {
452	    my $patch_line = $_;
453	    if (m/^ mode change [0-7]+ => [0-7]+ (\S+)\s*$/) {
454		my $filename = $1;
455		push(@files, $filename);
456	    } elsif (m/^rename (?:from|to) (\S+)\s*$/) {
457		my $filename = $1;
458		push(@files, $filename);
459	    } elsif (m/^diff --git a\/(\S+) b\/(\S+)\s*$/) {
460		my $filename1 = $1;
461		my $filename2 = $2;
462		push(@files, $filename1);
463		push(@files, $filename2);
464	    } elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
465		my $filename = $1;
466		if ($1 ne "/dev/null") { #Ignore the no-file placeholder
467		    $filename =~ s@^[^/]*/@@;
468		    $filename =~ s@\n@@;
469		    $lastfile = $filename;
470		    push(@files, $filename);
471		}
472		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
473	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
474		if ($email_git_blame) {
475		    push(@range, "$lastfile:$1:$2");
476		}
477	    } elsif ($keywords) {
478		foreach my $line (keys %keyword_hash) {
479		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
480			push(@keyword_tvi, $line);
481		    }
482		}
483	    }
484	}
485	close($patch);
486
487	if ($file_cnt == @files) {
488	    warn "$P: file '${file}' doesn't appear to be a patch.  "
489		. "Add -f to options?\n";
490	}
491	@files = sort_and_uniq(@files);
492    }
493}
494
495@file_emails = uniq(@file_emails);
496
497my %email_hash_name;
498my %email_hash_address;
499my @email_to = ();
500my %hash_list_to;
501my @list_to = ();
502my @scm = ();
503my @web = ();
504my @subsystem = ();
505my @status = ();
506my %deduplicate_name_hash = ();
507my %deduplicate_address_hash = ();
508
509my @maintainers = get_maintainers();
510
511if (@maintainers) {
512    @maintainers = merge_email(@maintainers);
513    output(@maintainers);
514}
515
516if ($scm) {
517    @scm = uniq(@scm);
518    output(@scm);
519}
520
521if ($status) {
522    @status = uniq(@status);
523    output(@status);
524}
525
526if ($subsystem) {
527    @subsystem = uniq(@subsystem);
528    output(@subsystem);
529}
530
531if ($web) {
532    @web = uniq(@web);
533    output(@web);
534}
535
536exit($exit);
537
538sub range_is_maintained {
539    my ($start, $end) = @_;
540
541    for (my $i = $start; $i < $end; $i++) {
542	my $line = $typevalue[$i];
543	if ($line =~ m/^([A-Z]):\s*(.*)/) {
544	    my $type = $1;
545	    my $value = $2;
546	    if ($type eq 'S') {
547		if ($value =~ /(maintain|support)/i) {
548		    return 1;
549		}
550	    }
551	}
552    }
553    return 0;
554}
555
556sub range_has_maintainer {
557    my ($start, $end) = @_;
558
559    for (my $i = $start; $i < $end; $i++) {
560	my $line = $typevalue[$i];
561	if ($line =~ m/^([A-Z]):\s*(.*)/) {
562	    my $type = $1;
563	    my $value = $2;
564	    if ($type eq 'M') {
565		return 1;
566	    }
567	}
568    }
569    return 0;
570}
571
572sub get_xen_maintainers_file_version {
573    my $tvi = find_first_section();
574
575    while ($tvi < @typevalue) {
576        my $start = find_starting_index($tvi);
577        my $end = find_ending_index($tvi);
578        my $i;
579
580        for ($i = $start; $i < $end; $i++) {
581            my $line = $typevalue[$i];
582            if ($line =~ m/^V:\s*(.*)/) {
583                # Note that get_maintainer_role() requires processing
584                # of more of the file. So do it directly
585                if ($typevalue[$start] eq "THE REST") {
586                    if ($line =~ m/xen-maintainers-(.*)/) {
587                        return $1;
588                    }
589                }
590	    }
591        }
592        $tvi = $end + 1;
593    }
594    return 0;
595}
596
597sub get_maintainers {
598    %email_hash_name = ();
599    %email_hash_address = ();
600    %commit_author_hash = ();
601    %commit_signer_hash = ();
602    @email_to = ();
603    %hash_list_to = ();
604    @list_to = ();
605    @scm = ();
606    @web = ();
607    @subsystem = ();
608    @status = ();
609    %deduplicate_name_hash = ();
610    %deduplicate_address_hash = ();
611    if ($email_git_all_signature_types) {
612	$signature_pattern = "(.+?)[Bb][Yy]:";
613    } else {
614	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
615    }
616
617    # Find responsible parties
618
619    my %exact_pattern_match_hash = ();
620    # By default "THE REST" will be suppressed.
621    my $suppress_the_rest = 1;
622
623    foreach my $file (@files) {
624
625	my %hash;
626	my $tvi = find_first_section();
627	# Unless stated otherwise, a file is maintained by "THE REST"
628	my $file_maintained_by_the_rest = 1;
629	while ($tvi < @typevalue) {
630	    my $start = find_starting_index($tvi);
631	    my $end = find_ending_index($tvi);
632	    my $exclude = 0;
633	    my $i;
634
635	    #Do not match excluded file patterns
636
637	    for ($i = $start; $i < $end; $i++) {
638		my $line = $typevalue[$i];
639		if ($line =~ m/^([A-Z]):\s*(.*)/) {
640		    my $type = $1;
641		    my $value = $2;
642		    if ($type eq 'X') {
643			if (file_match_pattern($file, $value)) {
644			    $exclude = 1;
645			    last;
646			}
647		    }
648		}
649	    }
650
651	    if (!$exclude) {
652		for ($i = $start; $i < $end; $i++) {
653		    my $line = $typevalue[$i];
654		    if ($line =~ m/^([A-Z]):\s*(.*)/) {
655			my $type = $1;
656			my $value = $2;
657			if ($type eq 'F') {
658			    if (file_match_pattern($file, $value)) {
659				my $value_pd = ($value =~ tr@/@@);
660				my $file_pd = ($file  =~ tr@/@@);
661				$value_pd++ if (substr($value,-1,1) ne "/");
662				$value_pd = -1 if ($value =~ /^\.\*/);
663				if ($value_pd >= $file_pd &&
664				    range_is_maintained($start, $end) &&
665				    range_has_maintainer($start, $end)) {
666				    $exact_pattern_match_hash{$file} = 1;
667				}
668				if ($pattern_depth == 0 ||
669				    (($file_pd - $value_pd) < $pattern_depth)) {
670				    $hash{$tvi} = $value_pd;
671				}
672			    }
673			} elsif ($type eq 'N') {
674			    if ($file =~ m/$value/x) {
675				$hash{$tvi} = 0;
676			    }
677			}
678		    }
679		}
680	    }
681	    $tvi = $end + 1;
682	}
683
684	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
685	    add_categories($line);
686	    my $role = get_maintainer_role($line);
687
688	    # Check the role, if it is not "THE REST" then the file is not
689	    # only maintained by "THE REST".
690	    if ( get_maintainer_role($line) ne "supporter:THE REST" ) {
691		    $file_maintained_by_the_rest = 0;
692	    }
693
694	    if ($sections) {
695		my $i;
696		my $start = find_starting_index($line);
697		my $end = find_ending_index($line);
698		for ($i = $start; $i < $end; $i++) {
699		    my $line = $typevalue[$i];
700		    if ($line =~ /^[FX]:/) {		##Restore file patterns
701			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
702			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
703			$line =~ s/\\\./\./g;       	##Convert \. to .
704			$line =~ s/\.\*/\*/g;       	##Convert .* to *
705			## Convert (|) back to {,}
706			# match (|*|*) and transform '|' to ',' one by one
707			# While there is more than one '|', convert one to ','.
708			while ($line =~ s/([^\\])\((|[^)|]*[^|\\])\|((|[^)|]*[^|\\])\|(|[^)]*[^\\]))\)/$1($2,$3)/g) {
709			}
710			$line =~ s/([^\\])\((|[^)|]*[^|\\])\|(|[^)]*[^\\])\)/$1\{$2,$3\}/g;
711		    }
712		    $line =~ s/^([A-Z]):/$1:\t/g;
713		    print("$line\n");
714		}
715		print("\n");
716	    }
717	}
718	# If the file is only maintained by "THE REST", then CC all of them on
719	# the patch.
720	$suppress_the_rest = 0 if $file_maintained_by_the_rest;
721    }
722
723    if ($keywords) {
724	@keyword_tvi = sort_and_uniq(@keyword_tvi);
725	foreach my $line (@keyword_tvi) {
726	    add_categories($line);
727	}
728    }
729
730    if ($email_drop_the_rest_supporter_if_supporter_found &&
731	$suppress_the_rest && $#email_to > 0) {
732        my @email_new;
733        my $do_replace = 0;
734        foreach my $email (@email_to) {
735            if ($email->[1] ne 'supporter:THE REST') {
736                $do_replace = 1;
737                push @email_new, $email;
738            }
739        }
740        @email_to = @email_new
741            if $do_replace;
742    }
743
744    foreach my $email (@email_to, @list_to) {
745	$email->[0] = deduplicate_email($email->[0]);
746    }
747
748    foreach my $file (@files) {
749	if ($email &&
750	    ($email_git || ($email_git_fallback &&
751			    !$exact_pattern_match_hash{$file}))) {
752	    vcs_file_signoffs($file);
753	}
754	if ($email && $email_git_blame) {
755	    vcs_file_blame($file);
756	}
757    }
758
759    if ($email) {
760	foreach my $chief (@penguin_chief) {
761	    if ($chief =~ m/^(.*):(.*)/) {
762		my $email_address;
763
764		$email_address = format_email($1, $2, $email_usename);
765		if ($email_git_penguin_chiefs) {
766		    push(@email_to, [$email_address, 'chief penguin']);
767		} else {
768		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
769		}
770	    }
771	}
772
773	foreach my $email (@file_emails) {
774	    my ($name, $address) = parse_email($email);
775
776	    my $tmp_email = format_email($name, $address, $email_usename);
777	    push_email_address($tmp_email, '');
778	    add_role($tmp_email, 'in file');
779	}
780    }
781
782    my @to = ();
783    if ($email || $email_list) {
784	if ($email) {
785	    @to = (@to, @email_to);
786	}
787	if ($email_list) {
788	    @to = (@to, @list_to);
789	}
790    }
791
792    if ($interactive) {
793	@to = interactive_get_maintainers(\@to);
794    }
795
796    return @to;
797}
798
799sub file_match_pattern {
800    my ($file, $pattern) = @_;
801    if (substr($pattern, -1) eq "/") {
802	if ($file =~ m@^$pattern@) {
803	    return 1;
804	}
805    } else {
806	if ($file =~ m@^$pattern@) {
807	    my $s1 = ($file =~ tr@/@@);
808	    my $s2 = ($pattern =~ tr@/@@);
809	    if ($s1 == $s2) {
810		return 1;
811	    }
812	}
813    }
814    return 0;
815}
816
817sub usage {
818    print <<EOT;
819usage: $P [options] patchfile
820       $P [options] -f file|directory
821version: $V
822
823MAINTAINER field selection options:
824  --email => print email address(es) if any
825    --git => include recent git \*-by: signers
826    --git-all-signature-types => include signers regardless of signature type
827        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
828    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
829    --git-chief-penguins => include ${penguin_chiefs}
830    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
831    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
832    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
833    --git-blame => use git blame to find modified commits for patch or file
834    --git-since => git history to use (default: $email_git_since)
835    --hg-since => hg history to use (default: $email_hg_since)
836    --interactive => display a menu (mostly useful if used with the --git option)
837    --m => include maintainer(s) if any
838    --r => include reviewer(s) if any
839    --n => include name 'Full Name <addr\@domain.tld>'
840    --l => include list(s) if any
841    --s => include subscriber only list(s) if any
842    --remove-duplicates => minimize duplicate email names/addresses
843    --roles => show roles (status:subsystem, git-signer, list, etc...)
844    --rolestats => show roles and statistics (commits/total_commits, %)
845    --file-emails => add email addresses found in -f file (default: 0 (off))
846  --scm => print SCM tree(s) if any
847  --status => print status if any
848  --subsystem => print subsystem name if any
849  --web => print website(s) if any
850
851Output type options:
852  --separator [, ] => separator for multiple entries on 1 line
853    using --separator also sets --nomultiline if --separator is not [, ]
854  --multiline => print 1 entry per line
855
856Other options:
857  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
858  --keywords => scan patch for keywords (default: $keywords)
859  --sections => print all of the subsystem sections with pattern matches
860  --mailmap => use .mailmap file (default: $email_use_mailmap)
861  --version => show version
862  --help => show this help information
863
864Default options:
865  [--email --nogit --git-fallback --m --r --n --l --multiline -pattern-depth=0
866   --remove-duplicates --rolestats]
867
868Notes:
869  Using "-f directory" may give unexpected results:
870      Used with "--git", git signators for _all_ files in and below
871          directory are examined as git recurses directories.
872          Any specified X: (exclude) pattern matches are _not_ ignored.
873      Used with "--nogit", directory is used as a pattern match,
874          no individual file within the directory or subdirectory
875          is matched.
876      Used with "--git-blame", does not iterate all files in directory
877  Using "--git-blame" is slow and may add old committers and authors
878      that are no longer active maintainers to the output.
879  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
880      other automated tools that expect only ["name"] <email address>
881      may not work because of additional output after <email address>.
882  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
883      not the percentage of the entire file authored.  # of commits is
884      not a good measure of amount of code authored.  1 major commit may
885      contain a thousand lines, 5 trivial commits may modify a single line.
886  If git is not installed, but mercurial (hg) is installed and an .hg
887      repository exists, the following options apply to mercurial:
888          --git,
889          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
890          --git-blame
891      Use --hg-since not --git-since to control date selection
892  File ".get_maintainer.conf", if it exists in the Xen source root
893      directory, can change whatever get_maintainer defaults are desired.
894      Entries in this file can be any command line argument.
895      This file is prepended to any additional command line arguments.
896      Multiple lines and # comments are allowed.
897EOT
898}
899
900sub parse_email {
901    my ($formatted_email) = @_;
902
903    my $name = "";
904    my $address = "";
905
906    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
907	$name = $1;
908	$address = $2;
909    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
910	$address = $1;
911    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
912	$address = $1;
913    }
914
915    $name =~ s/^\s+|\s+$//g;
916    $name =~ s/^\"|\"$//g;
917    $address =~ s/^\s+|\s+$//g;
918
919    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
920	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
921	$name = "\"$name\"";
922    }
923
924    return ($name, $address);
925}
926
927sub format_email {
928    my ($name, $address, $usename) = @_;
929
930    my $formatted_email;
931
932    $name =~ s/^\s+|\s+$//g;
933    $name =~ s/^\"|\"$//g;
934    $address =~ s/^\s+|\s+$//g;
935
936    if ($name =~ /[^\w \-]/i) {          ##has "must quote" chars
937	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
938	$name = "\"$name\"";
939    }
940
941    if ($usename) {
942	if ("$name" eq "") {
943	    $formatted_email = "$address";
944	} else {
945	    $formatted_email = "$name <$address>";
946	}
947    } else {
948	$formatted_email = $address;
949    }
950
951    return $formatted_email;
952}
953
954sub find_first_section {
955    my $index = 0;
956
957    while ($index < @typevalue) {
958	my $tv = $typevalue[$index];
959	if (($tv =~ m/^([A-Z]):\s*(.*)/)) {
960	    last;
961	}
962	$index++;
963    }
964
965    return $index;
966}
967
968sub find_starting_index {
969    my ($index) = @_;
970
971    while ($index > 0) {
972	my $tv = $typevalue[$index];
973	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
974	    last;
975	}
976	$index--;
977    }
978
979    return $index;
980}
981
982sub find_ending_index {
983    my ($index) = @_;
984
985    while ($index < @typevalue) {
986	my $tv = $typevalue[$index];
987	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
988	    last;
989	}
990	$index++;
991    }
992
993    return $index;
994}
995
996sub get_maintainer_role {
997    my ($index) = @_;
998
999    my $i;
1000    my $start = find_starting_index($index);
1001    my $end = find_ending_index($index);
1002
1003    my $role = "unknown";
1004    my $subsystem = $typevalue[$start];
1005    if (length($subsystem) > 20) {
1006	$subsystem = substr($subsystem, 0, 17);
1007	$subsystem =~ s/\s*$//;
1008	$subsystem = $subsystem . "...";
1009    }
1010
1011    for ($i = $start + 1; $i < $end; $i++) {
1012	my $tv = $typevalue[$i];
1013	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1014	    my $ptype = $1;
1015	    my $pvalue = $2;
1016	    if ($ptype eq "S") {
1017		$role = $pvalue;
1018	    }
1019	}
1020    }
1021
1022    $role = lc($role);
1023    if      ($role eq "supported") {
1024	$role = "supporter";
1025    } elsif ($role eq "maintained") {
1026	$role = "maintainer";
1027    } elsif ($role eq "odd fixes") {
1028	$role = "odd fixer";
1029    } elsif ($role eq "orphan") {
1030	$role = "orphan minder";
1031    } elsif ($role eq "obsolete") {
1032	$role = "obsolete minder";
1033    } elsif ($role eq "buried alive in reporters") {
1034	$role = "chief penguin";
1035    }
1036
1037    return $role . ":" . $subsystem;
1038}
1039
1040sub get_list_role {
1041    my ($index) = @_;
1042
1043    my $i;
1044    my $start = find_starting_index($index);
1045    my $end = find_ending_index($index);
1046
1047    my $subsystem = $typevalue[$start];
1048    if (length($subsystem) > 20) {
1049	$subsystem = substr($subsystem, 0, 17);
1050	$subsystem =~ s/\s*$//;
1051	$subsystem = $subsystem . "...";
1052    }
1053
1054    if ($subsystem eq "THE REST") {
1055	$subsystem = "";
1056    }
1057
1058    return $subsystem;
1059}
1060
1061sub add_categories {
1062    my ($index) = @_;
1063
1064    my $i;
1065    my $start = find_starting_index($index);
1066    my $end = find_ending_index($index);
1067
1068    push(@subsystem, $typevalue[$start]);
1069
1070    for ($i = $start + 1; $i < $end; $i++) {
1071	my $tv = $typevalue[$i];
1072	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1073	    my $ptype = $1;
1074	    my $pvalue = $2;
1075	    if ($ptype eq "L") {
1076		my ($list_name, $list_address) = parse_email($pvalue);
1077		my $list_additional = "";
1078		my $list_role = get_list_role($i);
1079
1080		if ($list_role ne "") {
1081		    $list_role = ":" . $list_role;
1082		}
1083		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1084		    $list_address = $1;
1085		    $list_additional = $2;
1086		}
1087		if ($list_additional =~ m/subscribers-only/) {
1088		    if ($email_subscriber_list) {
1089			if (!$hash_list_to{lc($list_address)}) {
1090			    $hash_list_to{lc($list_address)} = 1;
1091			    push(@list_to, [$list_address,
1092					    "subscriber list${list_role}"]);
1093			}
1094		    }
1095		} else {
1096		    if ($email_list) {
1097			if (!$hash_list_to{lc($list_address)}) {
1098			    $hash_list_to{lc($list_address)} = 1;
1099			    if ($list_additional =~ m/moderated/) {
1100				push(@list_to, [$list_address,
1101						"moderated list${list_role}"]);
1102			    } else {
1103				push(@list_to, [$list_address,
1104						"open list${list_role}"]);
1105			    }
1106			}
1107		    }
1108		}
1109	    } elsif ($ptype eq "M") {
1110		my ($name, $address) = parse_email($pvalue);
1111		if ($email_maintainer) {
1112		    my $role = get_maintainer_role($i);
1113		    push_email_addresses($pvalue, $role);
1114		}
1115	    } elsif ($ptype eq "R") {
1116		my ($name, $address) = parse_email($pvalue);
1117		if ($email_reviewer) {
1118		    push_email_addresses($pvalue, 'reviewer');
1119		}
1120	    } elsif ($ptype eq "T") {
1121		push(@scm, $pvalue);
1122	    } elsif ($ptype eq "W") {
1123		push(@web, $pvalue);
1124	    } elsif ($ptype eq "S") {
1125		push(@status, $pvalue);
1126	    }
1127	}
1128    }
1129}
1130
1131sub email_inuse {
1132    my ($name, $address) = @_;
1133
1134    return 1 if (($name eq "") && ($address eq ""));
1135    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1136    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1137
1138    return 0;
1139}
1140
1141sub push_email_address {
1142    my ($line, $role) = @_;
1143
1144    my ($name, $address) = parse_email($line);
1145
1146    if ($address eq "") {
1147	return 0;
1148    }
1149
1150    if (!$email_remove_duplicates) {
1151	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1152    } elsif (!email_inuse($name, $address)) {
1153	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1154	$email_hash_name{lc($name)}++ if ($name ne "");
1155	$email_hash_address{lc($address)}++;
1156    }
1157
1158    return 1;
1159}
1160
1161sub push_email_addresses {
1162    my ($address, $role) = @_;
1163
1164    my @address_list = ();
1165
1166    if (rfc822_valid($address)) {
1167	push_email_address($address, $role);
1168    } elsif (@address_list = rfc822_validlist($address)) {
1169	my $array_count = shift(@address_list);
1170	while (my $entry = shift(@address_list)) {
1171	    push_email_address($entry, $role);
1172	}
1173    } else {
1174	if (!push_email_address($address, $role)) {
1175	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1176	}
1177    }
1178}
1179
1180sub add_role {
1181    my ($line, $role) = @_;
1182
1183    my ($name, $address) = parse_email($line);
1184    my $email = format_email($name, $address, $email_usename);
1185
1186    foreach my $entry (@email_to) {
1187	if ($email_remove_duplicates) {
1188	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1189	    if (($name eq $entry_name || $address eq $entry_address)
1190		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1191	    ) {
1192		if ($entry->[1] eq "") {
1193		    $entry->[1] = "$role";
1194		} else {
1195		    $entry->[1] = "$entry->[1],$role";
1196		}
1197	    }
1198	} else {
1199	    if ($email eq $entry->[0]
1200		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1201	    ) {
1202		if ($entry->[1] eq "") {
1203		    $entry->[1] = "$role";
1204		} else {
1205		    $entry->[1] = "$entry->[1],$role";
1206		}
1207	    }
1208	}
1209    }
1210}
1211
1212sub which {
1213    my ($bin) = @_;
1214
1215    foreach my $path (split(/:/, $ENV{PATH})) {
1216	if (-e "$path/$bin") {
1217	    return "$path/$bin";
1218	}
1219    }
1220
1221    return "";
1222}
1223
1224sub which_conf {
1225    my ($conf) = @_;
1226
1227    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1228	if (-e "$path/$conf") {
1229	    return "$path/$conf";
1230	}
1231    }
1232
1233    return "";
1234}
1235
1236sub mailmap_email {
1237    my ($line) = @_;
1238
1239    my ($name, $address) = parse_email($line);
1240    my $email = format_email($name, $address, 1);
1241    my $real_name = $name;
1242    my $real_address = $address;
1243
1244    if (exists $mailmap->{names}->{$email} ||
1245	exists $mailmap->{addresses}->{$email}) {
1246	if (exists $mailmap->{names}->{$email}) {
1247	    $real_name = $mailmap->{names}->{$email};
1248	}
1249	if (exists $mailmap->{addresses}->{$email}) {
1250	    $real_address = $mailmap->{addresses}->{$email};
1251	}
1252    } else {
1253	if (exists $mailmap->{names}->{$address}) {
1254	    $real_name = $mailmap->{names}->{$address};
1255	}
1256	if (exists $mailmap->{addresses}->{$address}) {
1257	    $real_address = $mailmap->{addresses}->{$address};
1258	}
1259    }
1260    return format_email($real_name, $real_address, 1);
1261}
1262
1263sub mailmap {
1264    my (@addresses) = @_;
1265
1266    my @mapped_emails = ();
1267    foreach my $line (@addresses) {
1268	push(@mapped_emails, mailmap_email($line));
1269    }
1270    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1271    return @mapped_emails;
1272}
1273
1274sub merge_by_realname {
1275    my %address_map;
1276    my (@emails) = @_;
1277
1278    foreach my $email (@emails) {
1279	my ($name, $address) = parse_email($email);
1280	if (exists $address_map{$name}) {
1281	    $address = $address_map{$name};
1282	    $email = format_email($name, $address, 1);
1283	} else {
1284	    $address_map{$name} = $address;
1285	}
1286    }
1287}
1288
1289sub git_execute_cmd {
1290    my ($cmd) = @_;
1291    my @lines = ();
1292
1293    my $output = `$cmd`;
1294    $output =~ s/^\s*//gm;
1295    @lines = split("\n", $output);
1296
1297    return @lines;
1298}
1299
1300sub hg_execute_cmd {
1301    my ($cmd) = @_;
1302    my @lines = ();
1303
1304    my $output = `$cmd`;
1305    @lines = split("\n", $output);
1306
1307    return @lines;
1308}
1309
1310sub extract_formatted_signatures {
1311    my (@signature_lines) = @_;
1312
1313    my @type = @signature_lines;
1314
1315    s/\s*(.*):.*/$1/ for (@type);
1316
1317    # cut -f2- -d":"
1318    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1319
1320## Reformat email addresses (with names) to avoid badly written signatures
1321
1322    foreach my $signer (@signature_lines) {
1323	$signer = deduplicate_email($signer);
1324    }
1325
1326    return (\@type, \@signature_lines);
1327}
1328
1329sub vcs_find_signers {
1330    my ($cmd) = @_;
1331    my $commits;
1332    my @lines = ();
1333    my @signatures = ();
1334
1335    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1336
1337    my $pattern = $VCS_cmds{"commit_pattern"};
1338
1339    $commits = grep(/$pattern/, @lines);	# of commits
1340
1341    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1342
1343    return (0, @signatures) if !@signatures;
1344
1345    save_commits_by_author(@lines) if ($interactive);
1346    save_commits_by_signer(@lines) if ($interactive);
1347
1348    if (!$email_git_penguin_chiefs) {
1349	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1350    }
1351
1352    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1353
1354    return ($commits, @$signers_ref);
1355}
1356
1357sub vcs_find_author {
1358    my ($cmd) = @_;
1359    my @lines = ();
1360
1361    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1362
1363    if (!$email_git_penguin_chiefs) {
1364	@lines = grep(!/${penguin_chiefs}/i, @lines);
1365    }
1366
1367    return @lines if !@lines;
1368
1369    my @authors = ();
1370    foreach my $line (@lines) {
1371	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1372	    my $author = $1;
1373	    my ($name, $address) = parse_email($author);
1374	    $author = format_email($name, $address, 1);
1375	    push(@authors, $author);
1376	}
1377    }
1378
1379    save_commits_by_author(@lines) if ($interactive);
1380    save_commits_by_signer(@lines) if ($interactive);
1381
1382    return @authors;
1383}
1384
1385sub vcs_save_commits {
1386    my ($cmd) = @_;
1387    my @lines = ();
1388    my @commits = ();
1389
1390    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1391
1392    foreach my $line (@lines) {
1393	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1394	    push(@commits, $1);
1395	}
1396    }
1397
1398    return @commits;
1399}
1400
1401sub vcs_blame {
1402    my ($file) = @_;
1403    my $cmd;
1404    my @commits = ();
1405
1406    return @commits if (!(-f $file));
1407
1408    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1409	my @all_commits = ();
1410
1411	$cmd = $VCS_cmds{"blame_file_cmd"};
1412	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1413	@all_commits = vcs_save_commits($cmd);
1414
1415	foreach my $file_range_diff (@range) {
1416	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1417	    my $diff_file = $1;
1418	    my $diff_start = $2;
1419	    my $diff_length = $3;
1420	    next if ("$file" ne "$diff_file");
1421	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1422		push(@commits, $all_commits[$i]);
1423	    }
1424	}
1425    } elsif (@range) {
1426	foreach my $file_range_diff (@range) {
1427	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1428	    my $diff_file = $1;
1429	    my $diff_start = $2;
1430	    my $diff_length = $3;
1431	    next if ("$file" ne "$diff_file");
1432	    $cmd = $VCS_cmds{"blame_range_cmd"};
1433	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1434	    push(@commits, vcs_save_commits($cmd));
1435	}
1436    } else {
1437	$cmd = $VCS_cmds{"blame_file_cmd"};
1438	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1439	@commits = vcs_save_commits($cmd);
1440    }
1441
1442    foreach my $commit (@commits) {
1443	$commit =~ s/^\^//g;
1444    }
1445
1446    return @commits;
1447}
1448
1449my $printed_novcs = 0;
1450sub vcs_exists {
1451    %VCS_cmds = %VCS_cmds_git;
1452    return 1 if eval $VCS_cmds{"available"};
1453    %VCS_cmds = %VCS_cmds_hg;
1454    return 2 if eval $VCS_cmds{"available"};
1455    %VCS_cmds = ();
1456    if (!$printed_novcs) {
1457	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1458	warn("Using a git repository produces better results.\n");
1459	warn("Try latest git repository using:\n");
1460	warn("git clone git://xenbits.xen.org/xen.git\n");
1461	$printed_novcs = 1;
1462    }
1463    return 0;
1464}
1465
1466sub vcs_is_git {
1467    vcs_exists();
1468    return $vcs_used == 1;
1469}
1470
1471sub vcs_is_hg {
1472    return $vcs_used == 2;
1473}
1474
1475sub interactive_get_maintainers {
1476    my ($list_ref) = @_;
1477    my @list = @$list_ref;
1478
1479    vcs_exists();
1480
1481    my %selected;
1482    my %authored;
1483    my %signed;
1484    my $count = 0;
1485    my $maintained = 0;
1486    foreach my $entry (@list) {
1487	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1488	$selected{$count} = 1;
1489	$authored{$count} = 0;
1490	$signed{$count} = 0;
1491	$count++;
1492    }
1493
1494    #menu loop
1495    my $done = 0;
1496    my $print_options = 0;
1497    my $redraw = 1;
1498    while (!$done) {
1499	$count = 0;
1500	if ($redraw) {
1501	    printf STDERR "\n%1s %2s %-65s",
1502			  "*", "#", "email/list and role:stats";
1503	    if ($email_git ||
1504		($email_git_fallback && !$maintained) ||
1505		$email_git_blame) {
1506		print STDERR "auth sign";
1507	    }
1508	    print STDERR "\n";
1509	    foreach my $entry (@list) {
1510		my $email = $entry->[0];
1511		my $role = $entry->[1];
1512		my $sel = "";
1513		$sel = "*" if ($selected{$count});
1514		my $commit_author = $commit_author_hash{$email};
1515		my $commit_signer = $commit_signer_hash{$email};
1516		my $authored = 0;
1517		my $signed = 0;
1518		$authored++ for (@{$commit_author});
1519		$signed++ for (@{$commit_signer});
1520		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1521		printf STDERR "%4d %4d", $authored, $signed
1522		    if ($authored > 0 || $signed > 0);
1523		printf STDERR "\n     %s\n", $role;
1524		if ($authored{$count}) {
1525		    my $commit_author = $commit_author_hash{$email};
1526		    foreach my $ref (@{$commit_author}) {
1527			print STDERR "     Author: @{$ref}[1]\n";
1528		    }
1529		}
1530		if ($signed{$count}) {
1531		    my $commit_signer = $commit_signer_hash{$email};
1532		    foreach my $ref (@{$commit_signer}) {
1533			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1534		    }
1535		}
1536
1537		$count++;
1538	    }
1539	}
1540	my $date_ref = \$email_git_since;
1541	$date_ref = \$email_hg_since if (vcs_is_hg());
1542	if ($print_options) {
1543	    $print_options = 0;
1544	    if (vcs_exists()) {
1545		print STDERR <<EOT
1546
1547Version Control options:
1548g  use git history      [$email_git]
1549gf use git-fallback     [$email_git_fallback]
1550b  use git blame        [$email_git_blame]
1551bs use blame signatures [$email_git_blame_signatures]
1552c# minimum commits      [$email_git_min_signatures]
1553%# min percent          [$email_git_min_percent]
1554d# history to use       [$$date_ref]
1555x# max maintainers      [$email_git_max_maintainers]
1556t  all signature types  [$email_git_all_signature_types]
1557m  use .mailmap         [$email_use_mailmap]
1558EOT
1559	    }
1560	    print STDERR <<EOT
1561
1562Additional options:
15630  toggle all
1564tm toggle maintainers
1565tg toggle git entries
1566tl toggle open list entries
1567ts toggle subscriber list entries
1568f  emails in file       [$file_emails]
1569k  keywords in file     [$keywords]
1570r  remove duplicates    [$email_remove_duplicates]
1571p# pattern match depth  [$pattern_depth]
1572EOT
1573	}
1574	print STDERR
1575"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1576
1577	my $input = <STDIN>;
1578	chomp($input);
1579
1580	$redraw = 1;
1581	my $rerun = 0;
1582	my @wish = split(/[, ]+/, $input);
1583	foreach my $nr (@wish) {
1584	    $nr = lc($nr);
1585	    my $sel = substr($nr, 0, 1);
1586	    my $str = substr($nr, 1);
1587	    my $val = 0;
1588	    $val = $1 if $str =~ /^(\d+)$/;
1589
1590	    if ($sel eq "y") {
1591		$interactive = 0;
1592		$done = 1;
1593		$output_rolestats = 0;
1594		$output_roles = 0;
1595		last;
1596	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1597		$selected{$nr - 1} = !$selected{$nr - 1};
1598	    } elsif ($sel eq "*" || $sel eq '^') {
1599		my $toggle = 0;
1600		$toggle = 1 if ($sel eq '*');
1601		for (my $i = 0; $i < $count; $i++) {
1602		    $selected{$i} = $toggle;
1603		}
1604	    } elsif ($sel eq "0") {
1605		for (my $i = 0; $i < $count; $i++) {
1606		    $selected{$i} = !$selected{$i};
1607		}
1608	    } elsif ($sel eq "t") {
1609		if (lc($str) eq "m") {
1610		    for (my $i = 0; $i < $count; $i++) {
1611			$selected{$i} = !$selected{$i}
1612			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1613		    }
1614		} elsif (lc($str) eq "g") {
1615		    for (my $i = 0; $i < $count; $i++) {
1616			$selected{$i} = !$selected{$i}
1617			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1618		    }
1619		} elsif (lc($str) eq "l") {
1620		    for (my $i = 0; $i < $count; $i++) {
1621			$selected{$i} = !$selected{$i}
1622			    if ($list[$i]->[1] =~ /^(open list)/i);
1623		    }
1624		} elsif (lc($str) eq "s") {
1625		    for (my $i = 0; $i < $count; $i++) {
1626			$selected{$i} = !$selected{$i}
1627			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1628		    }
1629		}
1630	    } elsif ($sel eq "a") {
1631		if ($val > 0 && $val <= $count) {
1632		    $authored{$val - 1} = !$authored{$val - 1};
1633		} elsif ($str eq '*' || $str eq '^') {
1634		    my $toggle = 0;
1635		    $toggle = 1 if ($str eq '*');
1636		    for (my $i = 0; $i < $count; $i++) {
1637			$authored{$i} = $toggle;
1638		    }
1639		}
1640	    } elsif ($sel eq "s") {
1641		if ($val > 0 && $val <= $count) {
1642		    $signed{$val - 1} = !$signed{$val - 1};
1643		} elsif ($str eq '*' || $str eq '^') {
1644		    my $toggle = 0;
1645		    $toggle = 1 if ($str eq '*');
1646		    for (my $i = 0; $i < $count; $i++) {
1647			$signed{$i} = $toggle;
1648		    }
1649		}
1650	    } elsif ($sel eq "o") {
1651		$print_options = 1;
1652		$redraw = 1;
1653	    } elsif ($sel eq "g") {
1654		if ($str eq "f") {
1655		    bool_invert(\$email_git_fallback);
1656		} else {
1657		    bool_invert(\$email_git);
1658		}
1659		$rerun = 1;
1660	    } elsif ($sel eq "b") {
1661		if ($str eq "s") {
1662		    bool_invert(\$email_git_blame_signatures);
1663		} else {
1664		    bool_invert(\$email_git_blame);
1665		}
1666		$rerun = 1;
1667	    } elsif ($sel eq "c") {
1668		if ($val > 0) {
1669		    $email_git_min_signatures = $val;
1670		    $rerun = 1;
1671		}
1672	    } elsif ($sel eq "x") {
1673		if ($val > 0) {
1674		    $email_git_max_maintainers = $val;
1675		    $rerun = 1;
1676		}
1677	    } elsif ($sel eq "%") {
1678		if ($str ne "" && $val >= 0) {
1679		    $email_git_min_percent = $val;
1680		    $rerun = 1;
1681		}
1682	    } elsif ($sel eq "d") {
1683		if (vcs_is_git()) {
1684		    $email_git_since = $str;
1685		} elsif (vcs_is_hg()) {
1686		    $email_hg_since = $str;
1687		}
1688		$rerun = 1;
1689	    } elsif ($sel eq "t") {
1690		bool_invert(\$email_git_all_signature_types);
1691		$rerun = 1;
1692	    } elsif ($sel eq "f") {
1693		bool_invert(\$file_emails);
1694		$rerun = 1;
1695	    } elsif ($sel eq "r") {
1696		bool_invert(\$email_remove_duplicates);
1697		$rerun = 1;
1698	    } elsif ($sel eq "m") {
1699		bool_invert(\$email_use_mailmap);
1700		read_mailmap();
1701		$rerun = 1;
1702	    } elsif ($sel eq "k") {
1703		bool_invert(\$keywords);
1704		$rerun = 1;
1705	    } elsif ($sel eq "p") {
1706		if ($str ne "" && $val >= 0) {
1707		    $pattern_depth = $val;
1708		    $rerun = 1;
1709		}
1710	    } elsif ($sel eq "h" || $sel eq "?") {
1711		print STDERR <<EOT
1712
1713Interactive mode allows you to select the various maintainers, submitters,
1714commit signers and mailing lists that could be CC'd on a patch.
1715
1716Any *'d entry is selected.
1717
1718If you have git or hg installed, you can choose to summarize the commit
1719history of files in the patch.  Also, each line of the current file can
1720be matched to its commit author and that commits signers with blame.
1721
1722Various knobs exist to control the length of time for active commit
1723tracking, the maximum number of commit authors and signers to add,
1724and such.
1725
1726Enter selections at the prompt until you are satisfied that the selected
1727maintainers are appropriate.  You may enter multiple selections separated
1728by either commas or spaces.
1729
1730EOT
1731	    } else {
1732		print STDERR "invalid option: '$nr'\n";
1733		$redraw = 0;
1734	    }
1735	}
1736	if ($rerun) {
1737	    print STDERR "git-blame can be very slow, please have patience..."
1738		if ($email_git_blame);
1739	    goto &get_maintainers;
1740	}
1741    }
1742
1743    #drop not selected entries
1744    $count = 0;
1745    my @new_emailto = ();
1746    foreach my $entry (@list) {
1747	if ($selected{$count}) {
1748	    push(@new_emailto, $list[$count]);
1749	}
1750	$count++;
1751    }
1752    return @new_emailto;
1753}
1754
1755sub bool_invert {
1756    my ($bool_ref) = @_;
1757
1758    if ($$bool_ref) {
1759	$$bool_ref = 0;
1760    } else {
1761	$$bool_ref = 1;
1762    }
1763}
1764
1765sub deduplicate_email {
1766    my ($email) = @_;
1767
1768    my $matched = 0;
1769    my ($name, $address) = parse_email($email);
1770    $email = format_email($name, $address, 1);
1771    $email = mailmap_email($email);
1772
1773    return $email if (!$email_remove_duplicates);
1774
1775    ($name, $address) = parse_email($email);
1776
1777    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
1778	$name = $deduplicate_name_hash{lc($name)}->[0];
1779	$address = $deduplicate_name_hash{lc($name)}->[1];
1780	$matched = 1;
1781    } elsif ($deduplicate_address_hash{lc($address)}) {
1782	$name = $deduplicate_address_hash{lc($address)}->[0];
1783	$address = $deduplicate_address_hash{lc($address)}->[1];
1784	$matched = 1;
1785    }
1786    if (!$matched) {
1787	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
1788	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
1789    }
1790    $email = format_email($name, $address, 1);
1791    $email = mailmap_email($email);
1792    return $email;
1793}
1794
1795sub save_commits_by_author {
1796    my (@lines) = @_;
1797
1798    my @authors = ();
1799    my @commits = ();
1800    my @subjects = ();
1801
1802    foreach my $line (@lines) {
1803	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1804	    my $author = $1;
1805	    $author = deduplicate_email($author);
1806	    push(@authors, $author);
1807	}
1808	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
1809	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
1810    }
1811
1812    for (my $i = 0; $i < @authors; $i++) {
1813	my $exists = 0;
1814	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
1815	    if (@{$ref}[0] eq $commits[$i] &&
1816		@{$ref}[1] eq $subjects[$i]) {
1817		$exists = 1;
1818		last;
1819	    }
1820	}
1821	if (!$exists) {
1822	    push(@{$commit_author_hash{$authors[$i]}},
1823		 [ ($commits[$i], $subjects[$i]) ]);
1824	}
1825    }
1826}
1827
1828sub save_commits_by_signer {
1829    my (@lines) = @_;
1830
1831    my $commit = "";
1832    my $subject = "";
1833
1834    foreach my $line (@lines) {
1835	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
1836	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
1837	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
1838	    my @signatures = ($line);
1839	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1840	    my @types = @$types_ref;
1841	    my @signers = @$signers_ref;
1842
1843	    my $type = $types[0];
1844	    my $signer = $signers[0];
1845
1846	    $signer = deduplicate_email($signer);
1847
1848	    my $exists = 0;
1849	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
1850		if (@{$ref}[0] eq $commit &&
1851		    @{$ref}[1] eq $subject &&
1852		    @{$ref}[2] eq $type) {
1853		    $exists = 1;
1854		    last;
1855		}
1856	    }
1857	    if (!$exists) {
1858		push(@{$commit_signer_hash{$signer}},
1859		     [ ($commit, $subject, $type) ]);
1860	    }
1861	}
1862    }
1863}
1864
1865sub vcs_assign {
1866    my ($role, $divisor, @lines) = @_;
1867
1868    my %hash;
1869    my $count = 0;
1870
1871    return if (@lines <= 0);
1872
1873    if ($divisor <= 0) {
1874	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
1875	$divisor = 1;
1876    }
1877
1878    @lines = mailmap(@lines);
1879
1880    return if (@lines <= 0);
1881
1882    @lines = sort(@lines);
1883
1884    # uniq -c
1885    $hash{$_}++ for @lines;
1886
1887    # sort -rn
1888    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
1889	my $sign_offs = $hash{$line};
1890	my $percent = $sign_offs * 100 / $divisor;
1891
1892	$percent = 100 if ($percent > 100);
1893	$count++;
1894	last if ($sign_offs < $email_git_min_signatures ||
1895		 $count > $email_git_max_maintainers ||
1896		 $percent < $email_git_min_percent);
1897	push_email_address($line, '');
1898	if ($output_rolestats) {
1899	    my $fmt_percent = sprintf("%.0f", $percent);
1900	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
1901	} else {
1902	    add_role($line, $role);
1903	}
1904    }
1905}
1906
1907sub vcs_file_signoffs {
1908    my ($file) = @_;
1909
1910    my @signers = ();
1911    my $commits;
1912
1913    $vcs_used = vcs_exists();
1914    return if (!$vcs_used);
1915
1916    my $cmd = $VCS_cmds{"find_signers_cmd"};
1917    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
1918
1919    ($commits, @signers) = vcs_find_signers($cmd);
1920
1921    foreach my $signer (@signers) {
1922	$signer = deduplicate_email($signer);
1923    }
1924
1925    vcs_assign("commit_signer", $commits, @signers);
1926}
1927
1928sub vcs_file_blame {
1929    my ($file) = @_;
1930
1931    my @signers = ();
1932    my @all_commits = ();
1933    my @commits = ();
1934    my $total_commits;
1935    my $total_lines;
1936
1937    $vcs_used = vcs_exists();
1938    return if (!$vcs_used);
1939
1940    @all_commits = vcs_blame($file);
1941    @commits = uniq(@all_commits);
1942    $total_commits = @commits;
1943    $total_lines = @all_commits;
1944
1945    if ($email_git_blame_signatures) {
1946	if (vcs_is_hg()) {
1947	    my $commit_count;
1948	    my @commit_signers = ();
1949	    my $commit = join(" -r ", @commits);
1950	    my $cmd;
1951
1952	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
1953	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1954
1955	    ($commit_count, @commit_signers) = vcs_find_signers($cmd);
1956
1957	    push(@signers, @commit_signers);
1958	} else {
1959	    foreach my $commit (@commits) {
1960		my $commit_count;
1961		my @commit_signers = ();
1962		my $cmd;
1963
1964		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
1965		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1966
1967		($commit_count, @commit_signers) = vcs_find_signers($cmd);
1968
1969		push(@signers, @commit_signers);
1970	    }
1971	}
1972    }
1973
1974    if ($from_filename) {
1975	if ($output_rolestats) {
1976	    my @blame_signers;
1977	    if (vcs_is_hg()) {{		# Double brace for last exit
1978		my $commit_count;
1979		my @commit_signers = ();
1980		@commits = uniq(@commits);
1981		@commits = sort(@commits);
1982		my $commit = join(" -r ", @commits);
1983		my $cmd;
1984
1985		$cmd = $VCS_cmds{"find_commit_author_cmd"};
1986		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1987
1988		my @lines = ();
1989
1990		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1991
1992		if (!$email_git_penguin_chiefs) {
1993		    @lines = grep(!/${penguin_chiefs}/i, @lines);
1994		}
1995
1996		last if !@lines;
1997
1998		my @authors = ();
1999		foreach my $line (@lines) {
2000		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2001			my $author = $1;
2002			$author = deduplicate_email($author);
2003			push(@authors, $author);
2004		    }
2005		}
2006
2007		save_commits_by_author(@lines) if ($interactive);
2008		save_commits_by_signer(@lines) if ($interactive);
2009
2010		push(@signers, @authors);
2011	    }}
2012	    else {
2013		foreach my $commit (@commits) {
2014		    my $i;
2015		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2016		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2017		    my @author = vcs_find_author($cmd);
2018		    next if !@author;
2019
2020		    my $formatted_author = deduplicate_email($author[0]);
2021
2022		    my $count = grep(/$commit/, @all_commits);
2023		    for ($i = 0; $i < $count ; $i++) {
2024			push(@blame_signers, $formatted_author);
2025		    }
2026		}
2027	    }
2028	    if (@blame_signers) {
2029		vcs_assign("authored lines", $total_lines, @blame_signers);
2030	    }
2031	}
2032	foreach my $signer (@signers) {
2033	    $signer = deduplicate_email($signer);
2034	}
2035	vcs_assign("commits", $total_commits, @signers);
2036    } else {
2037	foreach my $signer (@signers) {
2038	    $signer = deduplicate_email($signer);
2039	}
2040	vcs_assign("modified commits", $total_commits, @signers);
2041    }
2042}
2043
2044sub uniq {
2045    my (@parms) = @_;
2046
2047    my %saw;
2048    @parms = grep(!$saw{$_}++, @parms);
2049    return @parms;
2050}
2051
2052sub sort_and_uniq {
2053    my (@parms) = @_;
2054
2055    my %saw;
2056    @parms = sort @parms;
2057    @parms = grep(!$saw{$_}++, @parms);
2058    return @parms;
2059}
2060
2061sub clean_file_emails {
2062    my (@file_emails) = @_;
2063    my @fmt_emails = ();
2064
2065    foreach my $email (@file_emails) {
2066	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2067	my ($name, $address) = parse_email($email);
2068	if ($name eq '"[,\.]"') {
2069	    $name = "";
2070	}
2071
2072	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
2073	if (@nw > 2) {
2074	    my $first = $nw[@nw - 3];
2075	    my $middle = $nw[@nw - 2];
2076	    my $last = $nw[@nw - 1];
2077
2078	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
2079		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2080		(length($middle) == 1 ||
2081		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2082		$name = "$first $middle $last";
2083	    } else {
2084		$name = "$middle $last";
2085	    }
2086	}
2087
2088	if (substr($name, -1) =~ /[,\.]/) {
2089	    $name = substr($name, 0, length($name) - 1);
2090	} elsif (substr($name, -2) =~ /[,\.]"/) {
2091	    $name = substr($name, 0, length($name) - 2) . '"';
2092	}
2093
2094	if (substr($name, 0, 1) =~ /[,\.]/) {
2095	    $name = substr($name, 1, length($name) - 1);
2096	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
2097	    $name = '"' . substr($name, 2, length($name) - 2);
2098	}
2099
2100	my $fmt_email = format_email($name, $address, $email_usename);
2101	push(@fmt_emails, $fmt_email);
2102    }
2103    return @fmt_emails;
2104}
2105
2106sub merge_email {
2107    my @lines;
2108    my %saw;
2109
2110    for (@_) {
2111	my ($address, $role) = @$_;
2112	if (!$saw{$address}) {
2113	    if ($output_roles) {
2114		push(@lines, "$address ($role)");
2115	    } else {
2116		push(@lines, $address);
2117	    }
2118	    $saw{$address} = 1;
2119	}
2120    }
2121
2122    return @lines;
2123}
2124
2125sub output {
2126    my (@parms) = @_;
2127
2128    if ($output_multiline) {
2129	foreach my $line (@parms) {
2130	    print("${line}\n");
2131	}
2132    } else {
2133	print(join($output_separator, @parms));
2134	print("\n");
2135    }
2136}
2137
2138my $rfc822re;
2139
2140sub make_rfc822re {
2141#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2142#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2143#   This regexp will only work on addresses which have had comments stripped
2144#   and replaced with rfc822_lwsp.
2145
2146    my $specials = '()<>@,;:\\\\".\\[\\]';
2147    my $controls = '\\000-\\037\\177';
2148
2149    my $dtext = "[^\\[\\]\\r\\\\]";
2150    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2151
2152    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2153
2154#   Use zero-width assertion to spot the limit of an atom.  A simple
2155#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2156    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2157    my $word = "(?:$atom|$quoted_string)";
2158    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2159
2160    my $sub_domain = "(?:$atom|$domain_literal)";
2161    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2162
2163    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2164
2165    my $phrase = "$word*";
2166    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2167    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2168    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2169
2170    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2171    my $address = "(?:$mailbox|$group)";
2172
2173    return "$rfc822_lwsp*$address";
2174}
2175
2176sub rfc822_strip_comments {
2177    my $s = shift;
2178#   Recursively remove comments, and replace with a single space.  The simpler
2179#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2180#   chars in atoms, for example.
2181
2182    while ($s =~ s/^((?:[^"\\]|\\.)*
2183                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2184                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2185    return $s;
2186}
2187
2188#   valid: returns true if the parameter is an RFC822 valid address
2189#
2190sub rfc822_valid {
2191    my $s = rfc822_strip_comments(shift);
2192
2193    if (!$rfc822re) {
2194        $rfc822re = make_rfc822re();
2195    }
2196
2197    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2198}
2199
2200#   validlist: In scalar context, returns true if the parameter is an RFC822
2201#              valid list of addresses.
2202#
2203#              In list context, returns an empty list on failure (an invalid
2204#              address was found); otherwise a list whose first element is the
2205#              number of addresses found and whose remaining elements are the
2206#              addresses.  This is needed to disambiguate failure (invalid)
2207#              from success with no addresses found, because an empty string is
2208#              a valid list.
2209
2210sub rfc822_validlist {
2211    my $s = rfc822_strip_comments(shift);
2212
2213    if (!$rfc822re) {
2214        $rfc822re = make_rfc822re();
2215    }
2216    # * null list items are valid according to the RFC
2217    # * the '1' business is to aid in distinguishing failure from no results
2218
2219    my @r;
2220    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2221	$s =~ m/^$rfc822_char*$/) {
2222        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2223            push(@r, $1);
2224        }
2225        return wantarray ? (scalar(@r), @r) : 1;
2226    }
2227    return wantarray ? () : 0;
2228}
2229