6 use Fcntl qw(:flock SEEK_END);
9 use CGI::Carp qw(fatalsToBrowser);
11 my $dbdir = "/home/steve/rss";
12 my $lock = "$dbdir/.update.lock";
13 my $dbfile = "$dbdir/db";
21 $in = $cgi->param('feed');
23 die "No feed specified!\n";
27 open(my $lockfile, ">>", "$lock") or die "Can't open lockfile: $!";
31 flock($fh, LOCK_EX) or die "Cannot lock lockfile - $!\n";
33 # and, in case someone appended while we were waiting...
34 seek($fh, 0, SEEK_END) or die "Cannot seek - $!\n";
39 flock($fh, LOCK_UN) or die "Cannot unlock lockfile - $!\n";
42 # Check to see if we don't want this feed item
43 sub filtered_out($$) {
48 print "should I filter this?\n";
49 print " id: " . $entry->id() . "\n";
50 print " title: " . $entry->title() . "\n";
53 # First of all, check for entries we just don't care about
54 if (($entry->id() =~ m,http://www.avclub.com/,) &&
55 ($entry->title() =~ m/^\s+TV:/)) {
61 if (($entry->id() =~ m,http://www.avclub.com/,) &&
62 ($entry->title() =~ m/^\s+Features:\s+Contest:/)) {
68 if ($entry->id() =~ m,bbc.co.uk.*uk-wales,) {
74 if ($entry->id() =~ m,bbc.co.uk.*uk-northern-ireland,) {
80 if (($entry->id() =~ m,bbc.co.uk,) &&
81 ($entry->title() =~ m,(VIDEO|AUDIO):,)) {
94 # Check to see if this feed item is a duplicate
95 sub duplicate_entry($$) {
99 my $key = $entry->id();
102 print "Is this a duplicate?\n";
103 print " id: " . $entry->id() . "\n";
104 print " title: " . $entry->title() . "\n";
110 tie %feeds_db, 'DB_File', "$dbfile";
111 if (exists $feeds_db{$key}) {
113 print " yes, last seen " . $feeds_db{"$key"} . "\n";
123 if (defined($entry->modified())) {
124 $dt = $entry->modified()->iso8601();
125 } elsif (defined($entry->issued())) {
126 $dt = $entry->issued()->iso8601();
128 $dt = DateTime->now()->iso8601();
130 $feeds_db{$key} = $dt;
135 # Copy an entry, but clean up the content (i.e. remove ads)
136 sub modify_entry ($$) {
138 my $oldentry = shift;
139 my $entry = XML::Feed::Entry->new('RSS');
140 my $text = $oldentry->content->body();
142 # First of all, clone the existing fields apart from the content
143 $entry->title($oldentry->title());
144 $entry->base($oldentry->base());
145 $entry->link($oldentry->link());
146 if (defined($oldentry->summary())) {
147 $entry->summary($oldentry->summary());
149 if (defined($oldentry->category())) {
150 $entry->category($oldentry->category());
152 if (defined($oldentry->author())) {
153 $entry->author($oldentry->author());
155 if (defined($oldentry->id())) {
156 $entry->id($oldentry->id());
158 if (defined($oldentry->issued())) {
159 $entry->issued($oldentry->issued());
161 if (defined($oldentry->modified())) {
162 $entry->modified($oldentry->modified());
165 # Now deal with the content text
167 print "entry content: $text\n\n";
171 $text =~ s,<a href="\S+doubleclick.net.*</a>,,g;
172 $text =~ s,<a href="\S+twitter.com.*</a></p>,</p>,g;
173 $text =~ s,<a href="\S+facebook.com.*facebook_icon_large.png"></a>,,g;
175 # Make Gregarius work with xkcd - copy the alt text into the body
177 if ($feedin->title() =~ m/xkcd.com/) {
178 my $alt_text = $text;
179 $alt_text =~ s,.*alt=\"(.*)\".*,$1,;
181 print "xkcd link, alt text is \"$alt_text\"\n";
183 $text .= "<p>XKCD alt text: \"$alt_text\"</p>\n";
187 print "entry content filtered: $text\n";
189 $entry->content($text);
194 if ($mode eq "command" && -e "$in") {
195 $feedin = XML::Feed->parse("$in");
197 $feedin = XML::Feed->parse(URI->new("$in"));
205 print "Looking at $in:\n";
206 print " format: " . $feedin->format() . "\n";
209 $feedout = XML::Feed->new('RSS', version => '2.00');
211 if (defined($feedin->title())) {
213 print " title: " . $feedin->title() . "\n";
215 $feedout->title($feedin->title);
217 if (defined($feedin->link())) {
219 print " link: " . $feedin->link() . "\n";
221 $feedout->link($feedin->link);
223 if (defined($feedin->tagline())) {
225 print " tagline: " . $feedin->tagline() . "\n";
227 $feedout->tagline($feedin->tagline);
231 print " entries in: " . scalar($feedin->entries) . "\n";
233 foreach my $entry ($feedin->entries) {
234 if (!filtered_out($feedin, $entry) && !duplicate_entry($feedin, $entry)) {
235 my $new_entry = modify_entry($feedin, $entry);
236 $feedout->add_entry($new_entry);
241 print " entries out: $num_out\n";
245 my $mime = "application/rss+xml";
246 print $cgi->header($mime);
247 print $feedout->as_xml;