本を読む

読書やコンピュータなどに関するメモ

Plagger::Plugin::CustomFeed::cbook24

勝手RSSサービス「コンピュータ系新刊情報」をやっている。ここでは、cbook24の情報をiCalendar化するのに以前からPlaggerを使っていた。ただし、いちどRSS化をしてからHTTP経由で取得しなおすのは無駄だ。そこで、RSS化の部分もPlaggerの野良プラグインにして、一度にRSSとiCalendarの両方を生成するようにした。

2006-12-15追記:このプラグインを更新した

●Plagger::Plugin::CustomFeed::cbook24
package Plagger::Plugin::CustomFeed::cbook24;
use strict;
use base qw( Plagger::Plugin );

use Encode;
use Plagger::UserAgent;
use Plagger::Util qw( decode_content );

sub register {
    my($self, $context) = @_;
    $context->register_hook(
        $self,
        'customfeed.handle' => \&handle,
    );
}

sub handle {
    my($self, $context, $args) = @_;

    if ($args->{feed}->url =~ m|^http://www\.cbook24\.com/bm_browsing\.asp|){
        $self->aggregate($context, $args);
        return 1;
    }

    return;
}

sub aggregate {
    my($self, $context, $args) = @_;

    my $url = $args->{feed}->url;

    my $feed = Plagger::Feed->new;
    $feed->type('cbook24');
    $feed->title('cbook24');
    $feed->link($url);

    my $agent = Plagger::UserAgent->new;

    my $re_next = decode('utf-8','\<a\s+href="([^"]*)">\s*次  へ&nbsp;\s*\</a>');
    my $re_date = decode('utf-8','発売日:.*?(\d{4}/\d+/\d+)');

    PAGE: {
        $context->log(info => "GET $url");
        my $res = $agent->fetch($url, $self);
        if ($res->is_error) {
            $context->log(error => "GET $url failed: " . $res->status_code);
            return;
        }
        my $content = decode_content($res);
        $content =~ s/[\r\n]/ /g;

        my $nexturl;
        if($content =~ m|$re_next|o){
            $nexturl = URI->new_abs($1, $url);
        }

        $content =~ s/.*?\<td vAlign="bottom">//;
        $content =~ s/\<td valign="top" align="right" width="150">.*$//;

        my @items = split(/\<td align="center" valign="top" width="50%">/, $content);
        foreach my $item (@items){
            $item =~ m|$re_date|o
                or next;
            my $dcdate = Plagger::Date->strptime('%Y/%m/%d', $&);

            $item =~ s|\<a href="(.*?)">(.*?)\</a>||
                or next;
            my $itemurl = URI->new_abs($1, $url);
            my $itemtitle = $2;

            $item =~ s|\</table>.*||s;
            my $desc = $item;
            $desc =~ s/\s+//g; $desc =~ s/\&nbsp;//g;
            $desc =~ s|\</tr>|\n|g;
            $desc =~ s/\<.*?>//g;

            my $entry = Plagger::Entry->new;
            $entry->title($itemtitle);
            $entry->link($itemurl);
            $entry->date($dcdate);
            $entry->body($desc);

            $feed->add_entry($entry);
        }

        if($nexturl){
            $url = $nexturl;
            redo PAGE;
        }

    }

    $context->update->add($feed);
}

1;
__END__

=head1 NAME

Plagger::Plugin::CustomFeed::cbook24 - Custom feed for cbook24 booklist

=head1 SYNOPSIS

  - module: Subecriotion::Config
    config:
      feed:
        - url: http://www.cbook24.com/bm_browsing.asp?page=0&browsing%5Ftype=comingsoon
  - module: CustomFeed::cbook24

=head1 DESCRIPTION

This plugin creates a custom feed from cbook24 book list.

=head1 AUTHOR

Masakazu Takahashi

=head1 SEE ALSO

L<Plagger>, L<http://www.cbook24.com/>

=cut


●cbook24.yaml
global:
  log:
    level: debug
  plugin_path:
    - ~/lib/plagger/Plugin

plugins:
  - module: Subscription::Config
    config:
      feed:
        - http://www.cbook24.com/bm_browsing.asp?page=0&browsing%5Ftype=comingsoon

  - module: CustomFeed::cbook24

  - module: Publish::Feed
    config:
      dir: /temp/newbook
      format: RSS
      filename: cbook.xml

  - module: Publish::iCal
    config:
      dir: /tmp/newbook
      filename: cbook.ics

トラックバック

http://emasaka.blog65.fc2.com/tb.php/147-0097b3d3

 | HOME | 

Categories

Recent Entries

Recent Comments

Recent Trackbacks

Appendix

emasaka

emasaka

フリーター。
連絡先はこのへん

Monthly


FC2Ad