You need to join this project to post message / question. See Help for details.

woim.net.rb

phiên bản công cộng đầu tiên - , 20-03-2010 09:36

Download (6 kB)

 
1
#!/usr/bin/ruby
2
3
# $Id: woim.net.rb 14 2010-03-20 14:35:36Z pi $ 
4
# author   : kyanh <@viettug.org>
5
# purpose  : fetch list of mp3 files from http://woim.net/
6
# license  : GPL version 2
7
# home page: http://viettug.org/projects/fs/wiki/woim
8
# doc/usage: (described in home page)
9
# policy   : http://www.woim.net/forums/viewtopic.php?t=102
10
11
require 'rubygems'    # for the others
12
require 'curb'        # for fetching data
13
14
class Message
15
  def initialize(msg)
16
    puts ":: #{msg}"
17
  end
18
end
19
20
module Cache
21
  def filename(cache_id)
22
    "./cache/#{cache_id}"
23
  end
24
25
  def write(cache_id, contents)
26
    f = open(filename(cache_id), "w")
27
    f.write(contents)
28
    f.close
29
    Message.new "cache updated: #{cache_id}"
30
  end
31
  
32
  def read(cache_id)
33
    if cached?(cache_id)
34
      begin
35
        Message.new "cache loaded: #{cache_id}"
36
        IO.readlines(filename(cache_id)).join()
37
      rescue
38
        return nil
39
      end
40
    else
41
      return nil
42
    end
43
  end
44
  
45
  def cached?(cache_id)
46
    File.exist?(filename(cache_id))
47
  end
48
end
49
50
include Cache
51
52
class Fetch
53
  attr_reader :url, :cache, :cached
54
55
  @@agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; Nautilus/1.0Final) Gecko/20020408"
56
  @@debug = false
57
58
  def self.proxy=(a)
59
    if a.is_a?(Hash)
60
      @@proxy = a
61
      Message.new "data fetched via proxy #{@@proxy[:host]}:#{@@proxy[:port]}"
62
    else
63
      @@proxy = nil
64
    end
65
  end
66
  
67
  def self.agent=(a)
68
    @@agent = a if a.is_a?(String) and !a.empty?
69
  end
70
  
71
  def self.debug=(value)
72
    @@debug = value
73
  end
74
75
  def initialize(url, cache = nil)
76
    @url = url
77
    @cache = cache
78
    @cached = false
79
  end
80
81
  def body
82
    @cached = false
83
    if @cache
84
      cache = Cache::read(@cache)
85
      if cache
86
        @cached = true
87
        return cache
88
      end
89
    end
90
    begin
91
      Message.new "fetching #{@url}"
92
      c = Curl::Easy.perform(@url) do |curl|
93
        curl.headers["User-Agent"] = @@agent
94
        curl.verbose = @@debug
95
        if @@proxy
96
          curl.proxy_url  = @@proxy[:host]
97
          curl.proxy_port = @@proxy[:port]
98
        end
99
      end
100
      return c.body_str
101
    rescue
102
      return ""
103
    end
104
  end
105
end
106
107
class Song
108
  attr_reader :w_url, :w_id
109
  
110
  def initialize(song_id)
111
    @w_id = song_id.to_s
112
    @w_url = "http://www.woim.net/song/#{@w_id}/index.html"
113
  end
114
115
  def mp3
116
    link_to_mp3 = ""
117
    fetch = Fetch.new(@w_url, "song_#{@w_id}")
118
    body = fetch.body
119
    if gs = body.match(%r|<param name="flashvars".*?code=(http://www\.woim\.net/.*?/#{@w_id}/.*?)">|i)
120
      meta_url = gs[1]
121
      text = fetch.cached ? body : Fetch.new(meta_url).body
122
      gs = text.match(%r|location="(.*?)">|i)
123
      link_to_mp3 = gs[1] if gs
124
    elsif gs = body.match(%r|<param name="FileName" value="(http://www\.woim\.net/.*?/#{@w_id}/.*?)">|i)
125
      meta_url = gs[1]
126
      text = fetch.cached ? body : Fetch.new(meta_url).body
127
      gs = text.match(%r|<ref href="(.*?)" />|i)
128
      link_to_mp3 = gs[1] if gs
129
    end
130
    if !link_to_mp3.empty? and !fetch.cached
131
      ct = []
132
      ct << "<param name=\"flashvars\" code=#{meta_url}\">"
133
      ct << "location=\"#{link_to_mp3}\">"
134
      Cache::write("song_#{@w_id}", ct.join("\n"))
135
    end
136
    return link_to_mp3
137
  end
138
139
  def print_mp3
140
    puts mp3
141
  end
142
end
143
144
class String
145
  def sanitized
146
    self.downcase.gsub(/[^0-9a-z_-]/,' ').gsub(' ','_')
147
  end
148
end
149
150
class Album
151
  attr_reader :w_id, :w_text, :w_title, :w_artist, :w_list
152
153
  def initialize(id)
154
    @w_id = id.to_s
155
156
    fetch = Fetch.new("http://www.woim.net/album/#{@w_id.to_s}/index.html", "album_#{@w_id}")
157
    @w_text = fetch.body
158
159
    @w_title = nil
160
    @w_artist = nil
161
    @w_list = []
162
    
163
    get_info
164
    get_list
165
    write_cache unless fetch.cached
166
  end
167
168
  def print
169
    Message.new "-" * 46
170
    puts "Album:  #{@w_title}"
171
    puts "Artist: #{@w_artist}"
172
    unless @w_list.empty?
173
      Message.new "-" * 46
174
      @w_list.each do |s|
175
        puts "* #{s[:title]}"
176
      end
177
      Message.new "-" * 46
178
      Message.new "wget script to download mp3 file(s)"
179
      Message.new "-" * 46
180
      @w_list.each do |s|
181
        puts "wget -O \"#{@w_title.sanitized}_#{s[:title].sanitized}.mp3\" \"#{s[:mp3]}\""
182
      end
183
    end
184
  end
185
  
186
  def print_m3u
187
    unless @w_list.empty?
188
      Message.new "-" * 46
189
      @w_list.each do |s|
190
        puts "* #{s[:title]}"
191
      end
192
      Message.new "-" * 46
193
      Message.new "list of mp3 files"
194
      Message.new "-" * 46
195
      @w_list.each { |s|  puts s[:mp3] }
196
    end
197
  end
198
199
private
200
201
  def write_cache
202
    st = []
203
    st << 'class="album_info">'
204
    st << "Album: <h1>#{@w_title}</h1>"
205
    st << "<tr></tr>"
206
    st << "<tr>Artist: href=>#{@w_artist}</a></tr>"
207
    @w_list.each do |song|
208
      st << "<td>0. href=\"http://www.woim.net/song/#{song[:id]}/\">#{song[:title]}</a>"
209
    end
210
    Cache.write("album_#{@w_id}", st.join("\n"))
211
    self
212
  end
213
214
  def get_info
215
    if gs = @w_text.match(%r#
216
                class="album_info">.*?
217
                  Album:  .*? <h1>(.*?)</h1>.*?
218
                  <tr>.*?</tr>.*?
219
                  <tr>.*? href=.*?>(.*?)</a>.*?</tr>
220
                          #mx)
221
      @w_title , @w_artist = gs[1,2]
222
      Message.new "album found #{@w_title} (performed by #{@w_artist})"
223
    end
224
    self
225
  end
226
  
227
  def get_list
228
    w_list = []
229
    @w_text.scan(%r|
230
              <td>[0-9]+.*?
231
                href="http://www\.woim\.net/song/([0-9]+)/.*?>(.*?)</a>
232
                  |mx) \
233
    do |id,title|
234
      w_list << {:id => id, :title => title}
235
    end
236
    Message.new "#{w_list.size} song(s) found"
237
    w_list.each do |song|
238
      song[:mp3] = Song.new(song[:id]).mp3
239
      @w_list << {:id => song[:id], :title => song[:title], :mp3 => song[:mp3]}
240
    end
241
    self
242
  end
243
end
244
245
Fetch.debug = false
246
Fetch.proxy = nil # {:host => "localhost",:port => 3128}
247
248
albums = []
249
songs  = []
250
251
ARGV.each do |arg|
252
  if gs = arg.match(%r|album/([0-9]+)|) or gs = arg.match(%r|^([0-9]+)$|)
253
    albums << gs[1]
254
  elsif gs = arg.match(%r|song/([0-9]+)|)
255
    songs << gs[1]
256
  elsif gs = arg.match(%r|proxy=(.*?):([0-9]+)|)
257
    Fetch.proxy = {:host => gs[1], :port => gs[2]}
258
  else
259
    Message.new "failed to parse: #{url}"
260
  end
261
end
262
263
albums.each {|a| Album.new(a).print_m3u }
264
songs.each  {|s| Song.new(s).print_mp3  }