本を読む

読書やコンピュータなどに関するメモ

P::P::CustomFeed:cbook24を更新

 Plaggerを使ってcbook24の近刊情報をRSSやiCalにするためのプラグイン P::P::Customfeed::cbook24を更新したので晒しておく。

 変更点はこんな(↓)ところ。

  • HTMLの形式の変更に対応した
  • PlaggerにHTMLでデータを渡すようにした
  • iconもつけてみた。icon対応のPublishプラグインなら使える模様
  • その他、普通に書き方をいろいろ変更
  • package Plagger::Plugin::CustomFeed::cbook24;
    use strict;
    use base qw( Plagger::Plugin );
    
    use Encode;
    use Plagger::UserAgent;
    use Plagger::Util qw( decode_content );
    
    our $VERSION = '0.02';
    
    sub register {
        my($self, $context) = @_;
        $context->register_hook(
            $self,
            'customfeed.handle' => \ handle,
        );
    }
    
    sub handle {
        my($self, $context, $args) = @_;
    
        if ($args->{feed}->url =~ m|^http://www\.cbook24\.com/bm_browsing\.asp|){
            return $self->aggregate($context, $args);
            return 1;
        }
    
        return;
    }
    
    sub aggregate {
        my($self, $context, $args) = @_;
    
        my $url = URI->new($args->{feed}->url);
        my $agent = Plagger::UserAgent->new;
    
        my $feed = Plagger::Feed->new;
        $feed->title('cbook24');
        $feed->link($url);
    
        my $re_next = decode('utf-8','\<a\s+href="([^"]*)">\s*次  へ nbsp;\s*\</a>');
    
        PAGE: {
    	my $content = Plagger::Util::load_uri($url);
            $content =~ s/[\r\n]/ /g;
    
            $content =~ m|$re_next|o and my $nexturl = URI->new_abs($1, $url);
    
            $content =~ s/.*?\<td vAlign="bottom">//;
            $content =~ s/\<td valign="top" align="right" width="150">.*$//;
    
            my @items = split(/\<td align="center" valign="top" width="50%">/,
    			  $content);
            foreach my $item (@items){
    	    my $entry = parse_item($item, $url);
                $feed->add_entry($entry);
            }
    
            if($nexturl){
                $url = $nexturl;
                redo PAGE;
            }
        }
        $context->update->add($feed);
    }
    
    sub parse_item {
        my $item  = shift;
        my $url = shift;
    
        my $re_date = decode('utf-8','発売日:.*?(\d{4}/\d+/\d+)');
        $item =~ m|$re_date|o or next;
        my $date = Plagger::Date->strptime('%Y/%m/%d', $ );
    
        $item =~ m|\<a\s+href="(.*?)"\s+title="(.*?)">| or next;
        my $itemurl = URI->new_abs($1, $url);
        my $itemtitle = $2;
    
        $item =~ m|<img[^>]*src="(/assets/product_images/[\dX-]+\.jpg)">| and
    	my $icon = { url => URI->new_abs($1, $url) };
    
        $item =~ s|\</table>.*||s;
        $item =~ s|\</tr>|\n|g;
        $item =~ s/[ \t]+|<.*?>//g;
        $item =~ s|\n|<br />|g;
    
        my $entry = Plagger::Entry->new;
        $entry->title($itemtitle);
        $entry->link($itemurl);
        $entry->date($date);
        $entry->body('<p>' . $item . '</p>');
        $entry->icon($icon) if($icon);
    
        $entry;
    }
    
    1;
    __END__
    
    =head1 NAME
    
    Plagger::Plugin::CustomFeed::cbook24 - Custom feed for cbook24 booklist
    
    =head1 SYNOPSIS
    
      - module: Subecriotion::Config
        config:
          feed:
            - url: http://www.cbook24.com/bm_browsing.asp?page=0 browsing%5Ftype=comingsoon
      - module: CustomFeed::cbook24
    
    =head1 DESCRIPTION
    
    This plugin creates a custom feed from cbook24 book list.
    
    =head1 AUTHOR
    
    Masakazu Takahashi
    
    =head1 SEE ALSO
    
    L<Plagger>, L<http://www.cbook24.com/>
    
    =cut
    

    トラックバック

    http://emasaka.blog65.fc2.com/tb.php/180-7236f794

     | HOME | 

    Categories

    Recent Entries

    Recent Comments

    Recent Trackbacks

    Appendix

    emasaka

    emasaka

    フリーター。
    連絡先はこのへん

    Monthly


    FC2Ad