#! /usr/bin/perl # # Copyright 2004 Double Precision, Inc. # # See COPYING for distribution information. # # Split the shared index into multiple files. The entire shared index is # piped on stdin. There are two modes of operation: # - if nletters is specified and is greater than 0, then # split based on the first n characters of the username # - if nletters is omitted or zero, then split based on the 'sharedgroup' # account option. This requires the options to be given as column 6 of # the input. # # Assume that account names use only the Latin alphabet. use IO::File; my $hasEncode=0; eval 'use Encode; $hasEncode=1;'; if ($hasEncode) { $hasEncode=0; grep {$hasEncode=1 if $_ eq "UTF-32BE"; } Encode->encodings(":all"); } my $mult=1; $mult=4 if $hasEncode; my $outputdir=shift @ARGV; my $nletters=shift @ARGV; die "Usage: $0 outputdir [ letters ]\n" unless -d $outputdir; print "*** WARNING - Encode not found, you should upgrade to Perl 5.8.0\n" unless $hasEncode; $nletters=0 unless defined($nletters); my %FILES; # All opened files my @MRU; # Recycle using most-recently-used mechanism. sub indexfile { my $filename=shift @_; Encode::from_to($filename, "UTF-32BE", "UTF-8") if $hasEncode; return "$filename"; } while (defined($_=)) { chomp; s/\#.*//; my @fields=split /\t/; next unless $#fields>2; # Comments, etc... my $key; if ($nletters > 0) { $key=$fields[0]; Encode::from_to($key, "UTF-8", "UTF-32BE") if $hasEncode; $key=substr($key, 0, $nletters * $mult); } elsif ($fields[5] =~ /(^|,)sharedgroup=([^,]+)/) { $key = $2; Encode::from_to($key, "UTF-8", "UTF-32BE") if $hasEncode; } else { $key = ""; } while (length($key) < $nletters * $mult) { my $u="_"; $u=Encode::encode("UTF-32BE", $u) if $hasEncode; $key .= $u; } if (defined $FILES{$key}) { @MRU=grep {$_ ne $key} @MRU; push @MRU, $key; } else { unless ($#MRU < 3) { my $oldest=shift @MRU; close($FILES{$oldest}); $FILES{$oldest}=undef; } push @MRU, $key; open( ($FILES{$key}=new IO::File), ">>$outputdir/index" . indexfile($key) . "\n") || die "$outputdir/index" . indexfile($key) . ": $!\n"; } my $fh=$FILES{$key}; splice(@fields,5,1); # hide options (print $fh (join("\t", @fields) . "\n")) || exit 1; } grep { ( close($FILES{$_}) || exit(1)) if defined $FILES{$_}} keys %FILES; while ($nletters > 0) { my %NEWKEYS; my %NEWFILES; --$nletters; foreach (keys %FILES) { $NEWKEYS{substr($_, 0, $nletters * $mult)}=1; push @{$NEWFILES{substr($_, 0, $nletters * $mult)}}, $_; } foreach (keys %NEWFILES) { my $fn=indexfile($_); open(FH, ">$outputdir/index$fn") || die "$outputdir/index$fn: $!\n"; grep { my $x=indexfile($_);print FH "$x\t*\tindex$x\n" || exit 1; } @{$NEWFILES{$_}}; close(FH) || exit 1; } %FILES=%NEWKEYS; }