|perl|
#!/usr/bin/perl
# filh. android-app-comment-jp.pl
# use strict;
# use warnings;
use URI;
use YAML;
use WWW::Mechanize;
use Web::Scraper;
use utf8;
use Data::Dumper;
use Digest::MD5 qw(md5 md5_hex md5_base64);
my $uri = URI->new( 'http://jp.androlib.com/appcommlist.aspx' );
my $mech = new WWW::Mechanize;
$mech->get($uri);
my $s = scraper {
process 'div.assetcomments > div.assetcomment', 'entries[]' => scraper {
process 'b', 'title' => TEXT;
process 'i', 'body' => TEXT;
process 'span.stars', 'pub_date' => ['TEXT', sub { s|^.+(\d+)/(\d+)/(\d+).+$|$3-$1-$2|o; } ];
process 'span.stars', 'stars' => ['HTML', sub { $num_one = (s/(mstar\.png)/0/g); s|^.+$|$num_one|o; } ];
process 'div.assetcomment > a', 'url' => '@href';
};
result 'entries';
};
my $src = $s->scrape($mech->content, $mech->uri);
my $feed = {
title => 'Android Apps Comment jp',
link => $uri->as_string,
type => 'mixinews',
};
$i = 0;
for my $entry (@{ $src }) {
# last if(3 < $i++);
$starts = ("★" x $entry->{stars}) . ("☆" x (5 - $entry->{stars}));
$mes = sprintf('%s(%s), %s', $entry->{title}, $starts, $entry->{body});
$mes = substr($mes, 0, 95) . (length($mes) > 95 ? "(略)": "");
$text = sprintf('%s, %s',
$mes,
$entry->{url});
# アプリのURLが同一のため、重複で省かれないようにURLにハッシュを追加する
$buf = $text;
utf8::encode($buf);
$url_hash = "#" . substr(md5_hex($buf), 0, 5);
push @{$feed->{entries}}, {
title => $entry->{title},
link => $entry->{url} . $url_hash,
summary => $starts,
body => $text,
date => $entry->{pub_date},
};
}
binmode STDOUT, ":utf8";
print Dump$feed;