Donovan Daniels
b1c702e3cd
poorly tested but it worked well enough, I'm sure I'll be patching bugs over the next few weeks Also remove turbo because it sucks Also changed the way we handle hosts in dev
105 lines
2.8 KiB
Ruby
105 lines
2.8 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class FileDownload
|
|
include ActiveModel::Validations
|
|
class Error < StandardError; end
|
|
|
|
RETRIABLE_ERRORS = [Errno::ECONNRESET, Errno::ETIMEDOUT, Errno::EIO, Errno::EHOSTUNREACH, Errno::ECONNREFUSED, Timeout::Error, IOError].freeze
|
|
MAX_SIZE = 50.megabytes
|
|
|
|
attr_reader :url
|
|
|
|
validate :validate_url
|
|
|
|
def initialize(url)
|
|
begin
|
|
unencoded = Addressable::URI.unencode(url)
|
|
escaped = Addressable::URI.escape(unencoded)
|
|
@url = Addressable::URI.parse(escaped)
|
|
rescue Addressable::URI::InvalidURIError
|
|
@url = nil
|
|
end
|
|
validate!
|
|
end
|
|
|
|
def size
|
|
res = HTTParty.head(uncached_url, **httparty_options, timeout: 3)
|
|
|
|
if res.success?
|
|
res.content_length
|
|
else
|
|
raise(HTTParty::ResponseError, res)
|
|
end
|
|
end
|
|
|
|
def download!(tries: 3, **)
|
|
Retriable.retriable(on: RETRIABLE_ERRORS, tries: tries, base_interval: 0) do
|
|
http_get_streaming(uncached_url, **)
|
|
end
|
|
end
|
|
|
|
def validate_url
|
|
errors.add(:base, "URL must not be blank") if url.blank?
|
|
errors.add(:base, "'#{url}' is not a valid url") if url.host.blank?
|
|
errors.add(:base, "'#{url}' is not a valid url. Did you mean 'https://#{url}'?") unless url.scheme.in?(%w[http https])
|
|
end
|
|
|
|
def http_get_streaming(url, file: Tempfile.new(binmode: true), max_size: MAX_SIZE)
|
|
size = 0
|
|
|
|
res = HTTParty.get(url, httparty_options) do |chunk|
|
|
next if [301, 302].include?(chunk.code)
|
|
|
|
size += chunk.size
|
|
raise(Error, "File is too large (max size: #{max_size})") if size > max_size && max_size > 0
|
|
|
|
file.write(chunk)
|
|
end
|
|
|
|
if res.success?
|
|
file.rewind
|
|
file
|
|
else
|
|
raise("HTTP error code: #{res.code} #{res.message}")
|
|
end
|
|
end
|
|
|
|
# Prevent Cloudflare from potentially mangling the image
|
|
def uncached_url
|
|
return file_url unless is_cloudflare?(file_url)
|
|
|
|
url = file_url.dup
|
|
url.query_values = url.query_values.to_h.merge(nc: Time.now)
|
|
url
|
|
end
|
|
|
|
alias file_url url
|
|
|
|
def httparty_options
|
|
{
|
|
timeout: 10,
|
|
stream_body: true,
|
|
connection_adapter: ValidatingConnectionAdapter,
|
|
}.deep_merge(Websites.config.httparty_options)
|
|
end
|
|
|
|
def is_cloudflare?(url)
|
|
ip_addr = IPAddr.new(Resolv.getaddress(url.hostname))
|
|
CloudflareService.ips.any? { |subnet| subnet.include?(ip_addr) }
|
|
end
|
|
end
|
|
|
|
# Hook into HTTParty to validate the IP before following redirects.
|
|
# https://www.rubydoc.info/github/jnunemaker/httparty/HTTParty/ConnectionAdapter
|
|
class ValidatingConnectionAdapter < HTTParty::ConnectionAdapter
|
|
def self.call(uri, options)
|
|
ip_addr = IPAddr.new(Resolv.getaddress(uri.hostname))
|
|
raise(FileDownload::Error, "Downloads from #{ip_addr} are not allowed") if ip_blocked?(ip_addr)
|
|
super
|
|
end
|
|
|
|
def self.ip_blocked?(ip_addr)
|
|
ip_addr.private? || ip_addr.loopback? || ip_addr.link_local?
|
|
end
|
|
end
|