-
Notifications
You must be signed in to change notification settings - Fork 0
/
html-update-link-dates
executable file
·174 lines (153 loc) · 5.77 KB
/
html-update-link-dates
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/bin/perl -wpi.bak
# html-update-link-dates
# Michael Ernst <mernst@alum.mit.edu>
# Time-stamp: <2016-10-04 10:35:20 mernst>
# usage:
# html-update-link-dates file [...]
# For any text of the form
# <a href="filename">text</a> (14 Mar 1967),
# <a href="filename">text</a> (14 Mar 1967, 19 Mbytes)
# <a href="filename">text</a> (PDF, 14 Mar 1967),
# <a href="filename">text</a> (PDF, 14 Mar 1967, 19 Mbytes)
# update the date and file size text, but don't update the HTML file's
# modification date.
# For examples, see http://homes.cs.washington.edu/~mernst/software/ and
# http://pag.csail.mit.edu/daikon/download/.
# Warning: if the filename is the HTML file being edited, the date/size may
# be incorrect.
# To do:
# Need to operate by paragraphs, not by lines.
use FindBin ();
use lib "$FindBin::Bin";
use checkargs;
require POSIX;
require Time::Local;
# Forward declaration for Perl 5.6.
sub file_date_replacement ( $$$$$ );
BEGIN
{ for my $file (@ARGV)
{ $file =~ s/\/$/index.html/;
if (! (-f $file && -r $file))
{ die "$file isn't a readable normal file"; }
my ($atime,$mtime,$gid);
(undef,undef,undef,undef,undef,$gid,undef,undef,
$atime,$mtime,undef,undef,undef) = stat($file);
$file_gid{$file} = $gid;
$file_atime{$file} = $atime;
$file_mtime{$file} = $mtime; }
# What should I do about May??
$longmonthre = '(?:January|February|March|April|May|June|July|August|September|October|November|December)';
$datere = '(?:[0123]?[0-9] (?:[A-Z][a-z][a-z]|' . $longmonthre . ') [0-9]{4})';
$sizere = '(?:[0-9.]+ [kMG]?bytes)';
$datesizere = '(' . $datere . '(?:, ' . $sizere . ')?)';
}
s/(<a href\s*=\s*\")([^:\"> ]+)(\">.*?<\/a>(?:,? (?:by|from) [A-Za-z]+(?: [A-Za-z]+)?)?(?: \((?:PDF, )?|, ))$datesizere([\)]?)/file_date_replacement($1,$2,$3,$4,$5)/eg;
END
{ for my $file (keys %file_atime)
{ utime($file_atime{$file}, $file_mtime{$file}, $file);
# $< is the real userid for whoever is running this program.
chown($<, $file_gid{$file}, $file); }
# Ordinary, non-erroneous termination
exit(0);
}
###########################################################################
sub file_date_replacement ( $$$$$ )
{ my ($pre, $file, $mid, $old_date, $post) = check_args(5, @_);
# print "\#$pre\#$file\#$mid\#$old_date\#$post\#\n";
my $dir = $ARGV;
$dir =~ s/(^|\/)[^\/]*$/$1/;
my $old_size;
if ($old_date =~ /^($datere), ($sizere)/)
{ $old_date = $1;
$old_size = $2; }
my $longmonth = ($old_date =~ m/$longmonthre/);
my $full_file = $file;
if ($full_file !~ s:^/:/www/:)
{ $full_file = $dir . $file; }
$full_file = simplify_path_name($full_file);
$full_file =~ s/\/$/\/index.html/;
if (! -f $full_file)
{ my $date = "00 Mth 0000";
if (defined($old_size))
{ $date .= ", 00 Mbytes"; }
# Don't print warning if commented out.
if (($pre =~ /<!--/) && ($pre !~ /-->/)) {
print STDERR "html-update-link-dates: Didn't find $full_file\n";
}
return "$pre$file${mid}$date$post"; }
my $date = file_date($full_file, $longmonth);
my $size = file_size($full_file);
# If this script changed the HTML file, then don't reset its write date.
# This means that browsers which check the modification time before
# downloading a new version will get the lastest version. And indeed the
# text has changed, though not substantially...
if (($date ne $old_date) || (defined($old_size) && ($size ne $old_size)))
{ delete $file_atime{$ARGV};
delete $file_mtime{$ARGV};
delete $file_gid{$ARGV}; }
if (defined($old_size))
{ $date .= ", $size"; }
return "$pre$file$mid$date$post";
}
# Return human-readable file date in form DD MMM YYYY
# If second argument is provided and true, use full month name, not abbrev.
sub file_date ( $;$ )
{ my ($file, @rest) = check_args_range(1, 2, @_);
my $longmonth = (scalar(@rest) == 1) && $rest[0];
my $mtime;
(undef,undef,undef,undef,undef,undef,undef,undef,
undef,$mtime,undef,undef,undef) = stat($file);
my ($mday,$mon,$year);
(undef,undef,undef,$mday,$mon,$year,undef,undef,undef) = localtime($mtime);
my $dateformat = ($longmonth ? "%d %B %Y" : "%d %b %Y");
my $date = POSIX::strftime($dateformat, 0, 0, 0, $mday, $mon, $year);
$date =~ s/^0//;
# print STDERR "file_date($file) => $date\n";
return $date;
}
# Return human-readable file size in form NN [kM]bytes
sub file_size ( $ )
{ my ($file) = check_args(1, @_);
my $size;
(undef,undef,undef,undef,undef,undef,undef,$size,
undef,undef,undef,undef,undef) = stat($file);
if ($size < 1000)
{ return "$size bytes"; }
my ($divisor, $metric_letter);
if ($size < 950000)
{ $divisor = 1000;
$metric_letter = "k"; }
elsif ($size < 950000000)
{ $divisor = 1000000;
$metric_letter = "M"; }
else
{ $divisor = 1000000000;
$metric_letter = "G"; }
# This truncates, but it really ought to round instead. Do so by adding
# 5 to the third most significant digit (since we truncate to two
# significant digits).
my $to_add = "5" . ("0" x (length($size)-3));
$trunc_size = sprintf("%f", ($size+$to_add)/$divisor);
if ($trunc_size =~ m/^([0-9][0-9])([0-9]*)(\.[0-9]*)?$/)
{ $trunc_size = $1 . "0" x length($2); }
elsif ($trunc_size !~ s/^([0-9]\.?[0-9])([0-9]*)$/$1/)
{ die "What trunc_size? " . $trunc_size; }
return $trunc_size . " " . $metric_letter . "bytes";
}
## Lifted from em_util.pm, 9/8/97
# Simplify a directory path by canonicalizing to not contain
# any ".." or "." components
# e.g. simplify_path_name("./bink/baz/foo/../foo2/../../bar") is
# "./bink/bar"
sub simplify_path_name ( $ )
{
my ($path) = check_args(1, @_);
my $result = $path;
while (($result =~ s%/[^/\n]*/\.\./%/%) && $result ne $path) {
$path = $result;
}
while (($result =~ s%//%/%g) && $result ne $path) {
$path = $result;
}
return $result;
}