#!/usr/bin/env ruby
# This code comes from http://ruby-it.org/pages/Codice+LunaRss+0_5
# Check the page for copyright notice and explanations
def rss_data
@data ||= get_raw_data
end
require 'net/http'
FeedData = Struct.new :channel, :titles, :descriptions, :links
class Rss_parser
def initialize(url, proxy=nil)
@url_completo = url
@url_no_http = @url_completo.gsub(/http\:\/\//,'')
@url_http = @url_no_http.gsub(/\/.*/,'')
@url_pagina = "/" + @url_completo.gsub(%r{http:\/\/.*?\/},'')
@http_proxy = proxy
end
def esegui_parsing
return FeedData.new(
parse( channel_title_pattern ),
parse( item_title_pattern ),
parse( item_description_pattern ),
parse( item_links_pattern )
)
end
private
def get_raw_data()
if @http_proxy.nil?
h = Net::HTTP.new(@url_http)
else
proxy_uri = URI.parse(@http_proxy)
h = Net::HTTP::Proxy(proxy_uri.host, proxy_uri.port).new(@url_http)
end
resp,data = h.get(@url_pagina,nil)
data.gsub!(/\0/,'')
return data
end
def rss_data
@data ||= get_raw_data
end
def parse pattern
output_data = []
rss_data.scan pattern do |x|
output_data << x
end
return output_data
end
def channel_title_pattern
%r{