postgresql/src/tools/git_changelog
Tom Lane bf429cebd0 Improve git_changelog as per discussion with Robert Haas.
1. Resurrect the behavior where old commits on master will have Branch:
labels for branches sprouted after the commit was made.  I'm still
dubious about this mode, but if you want it, say --post-date or -p.

2. Annotate the Branch: labels with the release or branch in which the
commit was publicly released.  For example, on a release branch you could
see
Branch: REL8_3_STABLE Release: REL8_3_2 [92c3a8004] 2008-03-29 00:15:37 +0000
showing that the fix was released in 8.3.2.  Commits on master will
usually instead have notes like
Branch: master Release: REL8_4_BR [6fc9d4272] 2008-03-29 00:15:28 +0000
showing that this commit is ancestral to release branches 8.4 and later.
If no Release: marker appears, the commit hasn't yet made it into any
release.

3. Add support for release branches older than 7.4.

4. The implementation is improved by running git log on each branch only
back to where the branch sprouts from master.  This saves a good deal
of time (about 50% of the runtime when generating the complete history).
We generate the post-date-mode tags via a direct understanding that
they should be applied to master commits made before the branch sprouted,
rather than backing into them via matching (which isn't any too
reliable when people used identical log messages for successive commits).
2010-09-26 20:22:17 -04:00

284 lines
8.6 KiB
Perl
Executable file

#!/usr/bin/perl
#
# src/tools/git_changelog
#
# Display all commits on active branches, merging together commits from
# different branches that occur close together in time and with identical
# log messages. Commits are annotated with branch and release info thus:
# Branch: REL8_3_STABLE Release: REL8_3_2 [92c3a8004] 2008-03-29 00:15:37 +0000
# This shows that the commit on REL8_3_STABLE was released in 8.3.2.
# Commits on master will usually instead have notes like
# Branch: master Release: REL8_4_BR [6fc9d4272] 2008-03-29 00:15:28 +0000
# showing that this commit is ancestral to release branches 8.4 and later.
# If no Release: marker appears, the commit hasn't yet made it into any
# release.
#
# Most of the time, matchable commits occur in the same order on all branches,
# and we print them out in that order. However, if commit A occurs before
# commit B on branch X and commit B occurs before commit A on branch Y, then
# there's no ordering which is consistent with both branches.
#
# When we encounter a situation where there's no single "best" commit to
# print next, we print the one that involves the least distortion of the
# commit order, summed across all branches. In the event of a tie on the
# distortion measure (which is actually the common case: normally, the
# distortion is zero), we choose the commit with latest timestamp. If
# that's a tie too, the commit from the newer branch prints first.
#
use strict;
use warnings;
require Time::Local;
require Getopt::Long;
require IPC::Open2;
# Adjust this list when the set of interesting branches changes.
# (We could get this from "git branches", but not worth the trouble.)
# NB: master must be first!
my @BRANCHES = qw(master REL9_0_STABLE REL8_4_STABLE REL8_3_STABLE
REL8_2_STABLE REL8_1_STABLE REL8_0_STABLE REL7_4_STABLE REL7_3_STABLE
REL7_2_STABLE REL7_1_STABLE REL7_0_PATCHES REL6_5_PATCHES REL6_4);
# Might want to make this parameter user-settable.
my $timestamp_slop = 600;
my $post_date = 0;
my $since;
Getopt::Long::GetOptions('post-date' => \$post_date,
'since=s' => \$since) || usage();
usage() if @ARGV;
my @git = qw(git log --date=iso);
push @git, '--since=' . $since if defined $since;
# Collect the release tag data
my %rel_tags;
{
my $cmd = "git for-each-ref refs/tags";
my $pid = IPC::Open2::open2(my $git_out, my $git_in, $cmd)
|| die "can't run $cmd: $!";
while (my $line = <$git_out>) {
if ($line =~ m|^([a-f0-9]+)\s+commit\s+refs/tags/(\S+)|) {
my $commit = $1;
my $tag = $2;
if ($tag =~ /^REL\d+_\d+$/ ||
$tag =~ /^REL\d+_\d+_\d+$/) {
$rel_tags{$commit} = $tag;
}
}
}
waitpid($pid, 0);
my $child_exit_status = $? >> 8;
die "$cmd failed" if $child_exit_status != 0;
}
# Collect the commit data
my %all_commits;
my %all_commits_by_branch;
# This remembers where each branch sprouted from master. Note the values
# will be wrong if --since terminates the log listing before the branch
# sprouts; but in that case it doesn't matter since we also won't reach
# the part of master where it would matter.
my %sprout_tags;
for my $branch (@BRANCHES) {
my @cmd = @git;
if ($branch eq "master") {
push @cmd, "origin/$branch";
} else {
push @cmd, "--parents";
push @cmd, "master..origin/$branch";
}
my $pid = IPC::Open2::open2(my $git_out, my $git_in, @cmd)
|| die "can't run @cmd: $!";
my $last_tag = undef;
my $last_parent;
my %commit;
while (my $line = <$git_out>) {
if ($line =~ /^commit\s+(\S+)/) {
push_commit(\%commit) if %commit;
$last_tag = $rel_tags{$1} if defined $rel_tags{$1};
%commit = (
'branch' => $branch,
'commit' => $1,
'last_tag' => $last_tag,
'message' => '',
);
if ($line =~ /^commit\s+\S+\s+(\S+)/) {
$last_parent = $1;
} else {
$last_parent = undef;
}
}
elsif ($line =~ /^Author:\s+(.*)/) {
$commit{'author'} = $1;
}
elsif ($line =~ /^Date:\s+(.*)/) {
$commit{'date'} = $1;
}
elsif ($line =~ /^\s\s/) {
$commit{'message'} .= $line;
}
}
push_commit(\%commit) if %commit;
$sprout_tags{$last_parent} = $branch if defined $last_parent;
waitpid($pid, 0);
my $child_exit_status = $? >> 8;
die "@cmd failed" if $child_exit_status != 0;
}
# Run through the master branch and apply tags. We already tagged the other
# branches, but master needs a separate pass after we've acquired the
# sprout_tags data. Also, in post-date mode we need to add phony entries
# for branches that sprouted after a particular master commit was made.
{
my $last_tag = undef;
my %sprouted_branches;
for my $cc (@{$all_commits_by_branch{'master'}}) {
my $commit = $cc->{'commit'};
my $c = $cc->{'commits'}->[0];
$last_tag = $rel_tags{$commit} if defined $rel_tags{$commit};
if (defined $sprout_tags{$commit}) {
$last_tag = $sprout_tags{$commit};
# normalize branch names for making sprout tags
$last_tag =~ s/^(REL\d+_\d+).*/$1_BR/;
}
$c->{'last_tag'} = $last_tag;
if ($post_date) {
if (defined $sprout_tags{$commit}) {
$sprouted_branches{$sprout_tags{$commit}} = 1;
}
# insert new commits between master and any other commits
my @new_commits = ( shift @{$cc->{'commits'}} );
for my $branch (reverse sort keys %sprouted_branches) {
my $ccopy = {%{$c}};
$ccopy->{'branch'} = $branch;
push @new_commits, $ccopy;
}
$cc->{'commits'} = [ @new_commits, @{$cc->{'commits'}} ];
}
}
}
my %position;
for my $branch (@BRANCHES) {
$position{$branch} = 0;
}
while (1) {
my $best_branch;
my $best_inversions;
my $best_timestamp;
for my $branch (@BRANCHES) {
my $leader = $all_commits_by_branch{$branch}->[$position{$branch}];
next if !defined $leader;
my $inversions = 0;
for my $branch2 (@BRANCHES) {
if (defined $leader->{'branch_position'}{$branch2}) {
$inversions += $leader->{'branch_position'}{$branch2}
- $position{$branch2};
}
}
if (!defined $best_inversions ||
$inversions < $best_inversions ||
($inversions == $best_inversions &&
$leader->{'timestamp'} > $best_timestamp)) {
$best_branch = $branch;
$best_inversions = $inversions;
$best_timestamp = $leader->{'timestamp'};
}
}
last if !defined $best_branch;
my $winner =
$all_commits_by_branch{$best_branch}->[$position{$best_branch}];
printf "Author: %s\n", $winner->{'author'};
foreach my $c (@{$winner->{'commits'}}) {
printf "Branch: %s", $c->{'branch'};
if (defined $c->{'last_tag'}) {
printf " Release: %s", $c->{'last_tag'};
}
printf " [%s] %s\n", substr($c->{'commit'}, 0, 9), $c->{'date'};
}
print "Commit-Order-Inversions: $best_inversions\n"
if $best_inversions != 0;
print "\n";
print $winner->{'message'};
print "\n";
$winner->{'done'} = 1;
for my $branch (@BRANCHES) {
my $leader = $all_commits_by_branch{$branch}->[$position{$branch}];
if (defined $leader && $leader->{'done'}) {
++$position{$branch};
redo;
}
}
}
sub push_commit {
my ($c) = @_;
my $ht = hash_commit($c);
my $ts = parse_datetime($c->{'date'});
my $cc;
# Note that this code will never merge two commits on the same branch,
# even if they have the same hash (author/message) and nearby
# timestamps. This means that there could be multiple potential
# matches when we come to add a commit from another branch. Prefer
# the closest-in-time one.
for my $candidate (@{$all_commits{$ht}}) {
my $diff = abs($ts - $candidate->{'timestamp'});
if ($diff < $timestamp_slop &&
!exists $candidate->{'branch_position'}{$c->{'branch'}})
{
if (!defined $cc ||
$diff < abs($ts - $cc->{'timestamp'})) {
$cc = $candidate;
}
}
}
if (!defined $cc) {
$cc = {
'author' => $c->{'author'},
'message' => $c->{'message'},
'commit' => $c->{'commit'},
'commits' => [],
'timestamp' => $ts
};
push @{$all_commits{$ht}}, $cc;
}
# stash only the fields we'll need later
my $smallc = {
'branch' => $c->{'branch'},
'commit' => $c->{'commit'},
'date' => $c->{'date'},
'last_tag' => $c->{'last_tag'}
};
push @{$cc->{'commits'}}, $smallc;
push @{$all_commits_by_branch{$c->{'branch'}}}, $cc;
$cc->{'branch_position'}{$c->{'branch'}} =
-1+@{$all_commits_by_branch{$c->{'branch'}}};
}
sub hash_commit {
my ($c) = @_;
return $c->{'author'} . "\0" . $c->{'message'};
}
sub parse_datetime {
my ($dt) = @_;
$dt =~ /^(\d\d\d\d)-(\d\d)-(\d\d)\s+(\d\d):(\d\d):(\d\d)\s+([-+])(\d\d)(\d\d)$/;
my $gm = Time::Local::timegm($6, $5, $4, $3, $2-1, $1);
my $tzoffset = ($8 * 60 + $9) * 60;
$tzoffset = - $tzoffset if $7 eq '-';
return $gm - $tzoffset;
}
sub usage {
print STDERR <<EOM;
Usage: git_changelog [--post-date/-p] [--since=SINCE]
--post-date Show branches made after a commit occurred
--since Print only commits dated since SINCE
EOM
exit 1;
}