You need to join this project to post message / question. See Help for details.
woim.net.rb
| 1 | #!/usr/bin/ruby
|
|---|---|
| 2 | |
| 3 | # $Id: woim.net.rb 14 2010-03-20 14:35:36Z pi $
|
| 4 | # author : kyanh <@viettug.org>
|
| 5 | # purpose : fetch list of mp3 files from http://woim.net/
|
| 6 | # license : GPL version 2
|
| 7 | # home page: http://viettug.org/projects/fs/wiki/woim
|
| 8 | # doc/usage: (described in home page)
|
| 9 | # policy : http://www.woim.net/forums/viewtopic.php?t=102
|
| 10 | |
| 11 | require 'rubygems' # for the others |
| 12 | require 'curb' # for fetching data |
| 13 | |
| 14 | class Message |
| 15 | def initialize(msg) |
| 16 | puts ":: #{msg}"
|
| 17 | end
|
| 18 | end
|
| 19 | |
| 20 | module Cache |
| 21 | def filename(cache_id) |
| 22 | "./cache/#{cache_id}"
|
| 23 | end
|
| 24 | |
| 25 | def write(cache_id, contents) |
| 26 | f = open(filename(cache_id), "w")
|
| 27 | f.write(contents) |
| 28 | f.close |
| 29 | Message.new "cache updated: #{cache_id}" |
| 30 | end
|
| 31 | |
| 32 | def read(cache_id) |
| 33 | if cached?(cache_id)
|
| 34 | begin
|
| 35 | Message.new "cache loaded: #{cache_id}" |
| 36 | IO.readlines(filename(cache_id)).join()
|
| 37 | rescue
|
| 38 | return nil |
| 39 | end
|
| 40 | else
|
| 41 | return nil |
| 42 | end
|
| 43 | end
|
| 44 | |
| 45 | def cached?(cache_id) |
| 46 | File.exist?(filename(cache_id))
|
| 47 | end
|
| 48 | end
|
| 49 | |
| 50 | include Cache
|
| 51 | |
| 52 | class Fetch |
| 53 | attr_reader :url, :cache, :cached |
| 54 | |
| 55 | @@agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; Nautilus/1.0Final) Gecko/20020408" |
| 56 | @@debug = false |
| 57 | |
| 58 | def self.proxy=(a) |
| 59 | if a.is_a?(Hash) |
| 60 | @@proxy = a
|
| 61 | Message.new "data fetched via proxy #{@@proxy[:host]}:#{@@proxy[:port]}" |
| 62 | else
|
| 63 | @@proxy = nil |
| 64 | end
|
| 65 | end
|
| 66 | |
| 67 | def self.agent=(a) |
| 68 | @@agent = a if a.is_a?(String) and !a.empty? |
| 69 | end
|
| 70 | |
| 71 | def self.debug=(value) |
| 72 | @@debug = value
|
| 73 | end
|
| 74 | |
| 75 | def initialize(url, cache = nil) |
| 76 | @url = url
|
| 77 | @cache = cache
|
| 78 | @cached = false |
| 79 | end
|
| 80 | |
| 81 | def body |
| 82 | @cached = false |
| 83 | if @cache |
| 84 | cache = Cache::read(@cache) |
| 85 | if cache
|
| 86 | @cached = true |
| 87 | return cache
|
| 88 | end
|
| 89 | end
|
| 90 | begin
|
| 91 | Message.new "fetching #{@url}" |
| 92 | c = Curl::Easy.perform(@url) do |curl| |
| 93 | curl.headers["User-Agent"] = @@agent |
| 94 | curl.verbose = @@debug
|
| 95 | if @@proxy |
| 96 | curl.proxy_url = @@proxy[:host] |
| 97 | curl.proxy_port = @@proxy[:port] |
| 98 | end
|
| 99 | end
|
| 100 | return c.body_str
|
| 101 | rescue
|
| 102 | return "" |
| 103 | end
|
| 104 | end
|
| 105 | end
|
| 106 | |
| 107 | class Song |
| 108 | attr_reader :w_url, :w_id |
| 109 | |
| 110 | def initialize(song_id) |
| 111 | @w_id = song_id.to_s
|
| 112 | @w_url = "http://www.woim.net/song/#{@w_id}/index.html" |
| 113 | end
|
| 114 | |
| 115 | def mp3 |
| 116 | link_to_mp3 = ""
|
| 117 | fetch = Fetch.new(@w_url, "song_#{@w_id}") |
| 118 | body = fetch.body |
| 119 | if gs = body.match(%r|<param name="flashvars".*?code=(http://www\.woim\.net/.*?/#{@w_id}/.*?)">|i) |
| 120 | meta_url = gs[1]
|
| 121 | text = fetch.cached ? body : Fetch.new(meta_url).body
|
| 122 | gs = text.match(%r|location="(.*?)">|i)
|
| 123 | link_to_mp3 = gs[1] if gs |
| 124 | elsif gs = body.match(%r|<param name="FileName" value="(http://www\.woim\.net/.*?/#{@w_id}/.*?)">|i) |
| 125 | meta_url = gs[1]
|
| 126 | text = fetch.cached ? body : Fetch.new(meta_url).body
|
| 127 | gs = text.match(%r|<ref href="(.*?)" />|i)
|
| 128 | link_to_mp3 = gs[1] if gs |
| 129 | end
|
| 130 | if !link_to_mp3.empty? and !fetch.cached |
| 131 | ct = [] |
| 132 | ct << "<param name=\"flashvars\" code=#{meta_url}\">"
|
| 133 | ct << "location=\"#{link_to_mp3}\">"
|
| 134 | Cache::write("song_#{@w_id}", ct.join("\n")) |
| 135 | end
|
| 136 | return link_to_mp3
|
| 137 | end
|
| 138 | |
| 139 | def print_mp3 |
| 140 | puts mp3 |
| 141 | end
|
| 142 | end
|
| 143 | |
| 144 | class String |
| 145 | def sanitized |
| 146 | self.downcase.gsub(/[^0-9a-z_-]/,' ').gsub(' ','_') |
| 147 | end
|
| 148 | end
|
| 149 | |
| 150 | class Album |
| 151 | attr_reader :w_id, :w_text, :w_title, :w_artist, :w_list |
| 152 | |
| 153 | def initialize(id) |
| 154 | @w_id = id.to_s
|
| 155 | |
| 156 | fetch = Fetch.new("http://www.woim.net/album/#{@w_id.to_s}/index.html", "album_#{@w_id}") |
| 157 | @w_text = fetch.body
|
| 158 | |
| 159 | @w_title = nil |
| 160 | @w_artist = nil |
| 161 | @w_list = []
|
| 162 | |
| 163 | get_info |
| 164 | get_list |
| 165 | write_cache unless fetch.cached
|
| 166 | end
|
| 167 | |
| 168 | def print |
| 169 | Message.new "-" * 46 |
| 170 | puts "Album: #{@w_title}"
|
| 171 | puts "Artist: #{@w_artist}"
|
| 172 | unless @w_list.empty? |
| 173 | Message.new "-" * 46 |
| 174 | @w_list.each do |s| |
| 175 | puts "* #{s[:title]}"
|
| 176 | end
|
| 177 | Message.new "-" * 46 |
| 178 | Message.new "wget script to download mp3 file(s)" |
| 179 | Message.new "-" * 46 |
| 180 | @w_list.each do |s| |
| 181 | puts "wget -O \"#{@w_title.sanitized}_#{s[:title].sanitized}.mp3\" \"#{s[:mp3]}\""
|
| 182 | end
|
| 183 | end
|
| 184 | end
|
| 185 | |
| 186 | def print_m3u |
| 187 | unless @w_list.empty? |
| 188 | Message.new "-" * 46 |
| 189 | @w_list.each do |s| |
| 190 | puts "* #{s[:title]}"
|
| 191 | end
|
| 192 | Message.new "-" * 46 |
| 193 | Message.new "list of mp3 files" |
| 194 | Message.new "-" * 46 |
| 195 | @w_list.each { |s| puts s[:mp3] } |
| 196 | end
|
| 197 | end
|
| 198 | |
| 199 | private |
| 200 | |
| 201 | def write_cache |
| 202 | st = [] |
| 203 | st << 'class="album_info">'
|
| 204 | st << "Album: <h1>#{@w_title}</h1>"
|
| 205 | st << "<tr></tr>"
|
| 206 | st << "<tr>Artist: href=>#{@w_artist}</a></tr>"
|
| 207 | @w_list.each do |song| |
| 208 | st << "<td>0. href=\"http://www.woim.net/song/#{song[:id]}/\">#{song[:title]}</a>"
|
| 209 | end
|
| 210 | Cache.write("album_#{@w_id}", st.join("\n")) |
| 211 | self
|
| 212 | end
|
| 213 | |
| 214 | def get_info |
| 215 | if gs = @w_text.match(%r# |
| 216 | class="album_info">.*? |
| 217 | Album: .*? <h1>(.*?)</h1>.*? |
| 218 | <tr>.*?</tr>.*? |
| 219 | <tr>.*? href=.*?>(.*?)</a>.*?</tr> |
| 220 | #mx) |
| 221 | @w_title , @w_artist = gs[1,2] |
| 222 | Message.new "album found #{@w_title} (performed by #{@w_artist})" |
| 223 | end
|
| 224 | self
|
| 225 | end
|
| 226 | |
| 227 | def get_list |
| 228 | w_list = [] |
| 229 | @w_text.scan(%r| |
| 230 | <td>[0-9]+.*? |
| 231 | href="http://www\.woim\.net/song/([0-9]+)/.*?>(.*?)</a> |
| 232 | |mx) \ |
| 233 | do |id,title|
|
| 234 | w_list << {:id => id, :title => title}
|
| 235 | end
|
| 236 | Message.new "#{w_list.size} song(s) found" |
| 237 | w_list.each do |song|
|
| 238 | song[:mp3] = Song.new(song[:id]).mp3 |
| 239 | @w_list << {:id => song[:id], :title => song[:title], :mp3 => song[:mp3]} |
| 240 | end
|
| 241 | self
|
| 242 | end
|
| 243 | end
|
| 244 | |
| 245 | Fetch.debug = false |
| 246 | Fetch.proxy = nil # {:host => "localhost",:port => 3128} |
| 247 | |
| 248 | albums = [] |
| 249 | songs = [] |
| 250 | |
| 251 | ARGV.each do |arg| |
| 252 | if gs = arg.match(%r|album/([0-9]+)|) or gs = arg.match(%r|^([0-9]+)$|) |
| 253 | albums << gs[1]
|
| 254 | elsif gs = arg.match(%r|song/([0-9]+)|) |
| 255 | songs << gs[1]
|
| 256 | elsif gs = arg.match(%r|proxy=(.*?):([0-9]+)|) |
| 257 | Fetch.proxy = {:host => gs[1], :port => gs[2]} |
| 258 | else
|
| 259 | Message.new "failed to parse: #{url}" |
| 260 | end
|
| 261 | end
|
| 262 | |
| 263 | albums.each {|a| Album.new(a).print_m3u }
|
| 264 | songs.each {|s| Song.new(s).print_mp3 }
|