#!/usr/bin/env ruby # This code comes from http://ruby-it.org/pages/Codice+LunaRss+1_0 # Check the page for copyright notice and explanations class RssStore def initialize page @page = page end def load_feed_uris uris = Array.new uris << @page uris << @page uris << @page return uris end end def test_load_uris rss_store = RssStore.new(TestPages) uris=rss_store.load_feed_uris assert_equal ATOM_GURU, uris[0].to_s assert_equal RSS_ANTONIO, uris[1].to_s assert_equal RSS_COMMENTI_ANTONIO, uris[2].to_s end class RssStore def initialize page @page = page end def load_feed_uris uris = Array.new @page.each do |page| URI.extract(page.body,'http').each do |url| uris << url end end return uris end end require 'net/http' require 'uri' class Feed def initialize(url, opts = { }) @uri = URI.parse(url) end def read @uri.read end end FeedData = Struct.new( :channel, :titles, :descriptions, :links ) class RssParser CHANNEL_TITLE=%r{.*?(?:(?:<!\[CDATA\[)*(.*?)(?:\]\]>)*.*?)?}m ITEM_TITLE=%r{.*?(?:(?:<!\[CDATA\[)*(.*?)(?:\]\]>)*.*?)?}m ITEM_LINKS=%r{.*?(?:(?:)*.*?)?}m ITEM_DESCRIPTION=%r{.*?(?:(?:)*.*?)?}m def initialize(feed) @feed = feed end def parse FeedData.new( extract( CHANNEL_TITLE ), extract( ITEM_TITLE ), extract( ITEM_DESCRIPTION ), extract( ITEM_LINKS ) ) end private def rss_data @data ||= @feed.read end def extract pattern rss_data.scan pattern end end class RssMaker def align text text.gsub(/^\s*\./m, '') end private :align def header align <<-HTML .Content-type: text/html\r\n\r\n . . HTML end def footer align <<-HTML . . HTML end def channel name align <<-HTML .

#{name}

HTML end def entry titolo, descrizione, link align <<-HTML . . #{titolo} .
. . #{descrizione} .

HTML end end ############################## # Configurations feeds=[ 'http://www.repubblica.it/rss/scienza_e_tecnologia/rss2.0.xml', 'http://programmazione.it/rss.xml', 'http://www.hwupgrade.it/rss_news.xml', 'http://www.hwupgrade.it/rss_articoli.xml', 'http://www.beppegrillo.it/index.xml' ] NEED_PROXY = false #switch this if you have a proxy proxy_url = "http://10.0.0.2:8080" #put here your proxy, if you have one ############################## class LunaRss def get_feeds rss_make = RssMaker.new puts rss_make.header feeds.each do |feed| parser = RssParser.new feed data = parser.parse puts rss_make.channel( data.channel ) 0.upto data.titles.length do|i| puts rss_make.entry( data.titles[i], data.descriptions[i], data.links[i] ) end end puts rss_make.footer end end class RssStore def initialize page @page = page end def load_feed_uris uris = Array.new @page.each do |page| URI.extract(page.body,'http').each do |url| uris << url end end return uris end end require 'test/unit' require 'stringio' class TestLunaRss < Test::Unit::TestCase TEST_DATA_RSS_20 = <<-HTML Ruby Italia http://ruby-it.org/ Tracking all versions for Ruby Italia it-it 40 Chiaroscuro description too long Mon, 08 May 2006 13:33:54 +0200 http://ruby-it.org/pages/Chiaroscuro#936 http://ruby-it.org/pages/Chiaroscuro#936 Chiaroscuro HTML TEST_DATA_RSS_092 = <<-HTML PDI^2 http://riffraff.blogsome.com Propulsione d'Improbabilità Infinita Wed, 24 May 2006 08:02:34 +0000 http://backend.userland.com/rss092 en Summer of Code another description too long http://riffraff.blogsome.com/2006/05/24/summer-of-code/ HTML TEST_DATA_ATOM = <<-HTML mozillaZine.org 2006-05-24T23:26:54-08:00 Your Source for Daily Mozilla News and Advocacy tag:mozillazine.org,2004:1 Copyright (c) 2004, The Mozillazine Organization Opera Watch Soliciting Questions for Blake Ross Interview 2006-05-24T16:17:17-08:00 2006-05-24T16:17:17-08:00 2006-05-24T16:17:17-08:00 tag:mozillazine.org,2004:article8583 mozillaZine.org man! it's too long TOO LONG!!! HTML def test_parser_rss_20 parser = RssParser.new(StringIO.new(TEST_DATA_RSS_20)) data = parser.parse assert_equal "Ruby Italia".to_s, data.channel[0].to_s assert_equal "Chiaroscuro".to_s, data.titles[0].to_s assert_equal "description too long".to_s, data.descriptions[0].to_s assert_equal "http://ruby-it.org/pages/Chiaroscuro#936".to_s, data.links[0].to_s end def test_parser_rss_092 parser = RssParser.new(StringIO.new(TEST_DATA_RSS_092)) data = parser.parse assert_equal "PDI^2".to_s, data.channel[0].to_s assert_equal "Summer of Code".to_s, data.titles[0].to_s assert_equal "another description too long".to_s, data.descriptions[0].to_s assert_equal "http://riffraff.blogsome.com/2006/05/24/summer-of-code/".to_s, data.links[0].to_s end def test_parser_atom parser = RssParser.new(StringIO.new(TEST_DATA_ATOM)) data = parser.parse assert_equal "mozillaZine.org".to_s, data.channel[0].to_s assert_equal "Opera Watch Soliciting Questions for Blake Ross Interview".to_s, data.titles[0].to_s assert_equal "man! it's too long TOO LONG!!!".to_s, data.descriptions[0].to_s assert_equal "http://www.mozillazine.org/talkback.html?article=8583".to_s, data.links[0].to_s end end class TestLunaRssStore < Test::Unit::TestCase Page=Struct.new :title, :body ATOM_GURU = 'http://guragedev.blogspot.com/atom.xml' RSS_ANTONIO = 'http://www.antoniocangiano.com/xml/rss20/feed.xml' RSS_COMMENTI_ANTONIO = 'http://www.antoniocangiano.com/xml/rss20/comments/feed.xml' TestPages=[Page.new("Andrea",<