Codice LunaRss 0_4

Vedi tutte le pagine e le modifiche recenti o scarica i sorgenti nella pagina


LunaRss v0.4 – modificata da AntonioCarpentieri

Codice LunaRss 0_5 – refactored by Chiaroscuro

Commenti

— Chiaroscuro: propongo data come attributo @data


#!/usr/bin/env ruby

require 'net/http'

class Rss_parser

 def initialize(url, proxy)
   @url_completo = url
   @url_no_http  = @url_completo.gsub(/http\:\/\//,'')
   @url_http     = @url_no_http.gsub(/\/.*/,'')
   @url_pagina   = "/" + @url_completo.gsub(%r{http:\/\/.*?\/},'')
   @http_proxy = proxy 
 end 

 def esegui_parsing

   data = get_raw_data

   return { 
     'titoli'          => extract_data(data, item_title_pattern),
     'links'            => extract_data(data, item_links_pattern),
     'descrizioni'  => extract_data(data, item_description_pattern),
     'canali'          => extract_data(data, channel_title_pattern)            
   }   

 end

private
   def get_raw_data()      
      if @http_proxy
         proxy_uri = URI.parse(@http_proxy)
         h = Net::HTTP::Proxy(proxy_uri.host, proxy_uri.port).new(@url_http)
      else
         h = Net::HTTP.new(@url_http)
      end

      resp,data = h.get(@url_pagina,nil)

      data.gsub!(/\0/,'')

      return data
   end

   def extract_data(raw_data, pattern)      
      output_data = []
      i = 0   
      raw_data.scan pattern do |x|
         output_data[i] = x
         i += 1
      end
      return output_data
   end

 def channel_title_pattern
   %r{<channel(?!s).*?>.*?(?:<title>(?:<!\[CDATA\[)*(.*?)(?:\]\]>)*</title>.*?)?</channel>}m
 end

 def item_title_pattern
   %r{<item(?!s).*?>.*?(?:<title>(?:<!\[CDATA\[)*(.*?)(?:\]\]>)*</title>.*?)?</item>}m 
 end

 def item_links_pattern
   %r{<item(?!s).*?>.*?(?:<link>(?:<!\[CDATA\[)*(.*?)(?:\]\]>)*</link>.*?)?</item>}m
 end

 def item_description_pattern
   %r{<item(?!s).*?>.*?(?:<description>(?:<!\[CDATA\[)*(.*?)(?:\]\]>)*</description>.*?)?</item>}m
 end

end

##############################

class RssMaker
  def header
  <<-HTML
Content-type: text/html\r\n\r\n
<html>
<body>
  HTML
  end

  def footer
  <<-HTML
</body>
</html>
  HTML
  end

  def channel name
  <<-HTML
<h3>#{name}</h3>
  HTML
  end

  def entry titolo, descrizione, link
  <<-HTML
<strong>
  <a href=\"#{link}\"> #{titolo} </a>
</strong><br/>

<font size=\"-1\"> #{descrizione} </font>
<p/>
  HTML
  end

end

##############################

elenco_rss=[
    'http://www.repubblica.it/rss/scienza_e_tecnologia/rss2.0.xml',
    'http://programmazione.it/rss.xml',
    'http://www.hwupgrade.it/rss_news.xml',
    'http://www.hwupgrade.it/rss_articoli.xml',
    'http://www.beppegrillo.it/index.xml'
]

rss_make = RssMaker.new

puts rss_make.header

elenco_rss.each do |rss_file|

 rss_da_parsare = Rss_parser.new rss_file, "http://10.0.0.2:8080" 

 dati = {}
 dati = rss_da_parsare.esegui_parsing

 canale      = dati['canali']
 titolo      = dati['titoli']
 link        = dati['links'] 
 descrizione = dati['descrizioni']

 puts rss_make.channel( canale )

 0.upto titolo.length do|i|
   puts rss_make.entry( titolo[i], descrizione[i], link[i] )
 end

end

puts rss_make.footer
Updated on May 04, 2006 00:55 by Ruby Fan (81.208.83.225)