# frozen_string_literal: true class FileDownload include ActiveModel::Validations class Error < StandardError; end RETRIABLE_ERRORS = [Errno::ECONNRESET, Errno::ETIMEDOUT, Errno::EIO, Errno::EHOSTUNREACH, Errno::ECONNREFUSED, Timeout::Error, IOError].freeze MAX_SIZE = 100.megabytes attr_reader :url validate :validate_url def initialize(url) begin unencoded = Addressable::URI.unencode(url) escaped = Addressable::URI.escape(unencoded) @url = Addressable::URI.parse(escaped) rescue Addressable::URI::InvalidURIError @url = nil end validate! end def size res = HTTParty.head(uncached_url, **httparty_options, timeout: 3) if res.success? res.content_length else raise(HTTParty::ResponseError, res) end end def download!(tries: 3, **) Retriable.retriable(on: RETRIABLE_ERRORS, tries: tries, base_interval: 0) do http_get_streaming(uncached_url, **) end end def validate_url errors.add(:base, "URL must not be blank") if url.blank? errors.add(:base, "'#{url}' is not a valid url") if url.host.blank? errors.add(:base, "'#{url}' is not a valid url. Did you mean 'https://#{url}'?") unless url.scheme.in?(%w[http https]) end def http_get_streaming(url, file: Tempfile.new(binmode: true), max_size: MAX_SIZE) size = 0 res = HTTParty.get(url, httparty_options) do |chunk| next if [301, 302].include?(chunk.code) size += chunk.size raise(Error, "File is too large (max size: #{max_size})") if size > max_size && max_size > 0 file.write(chunk) end if res.success? file.rewind file else raise("HTTP error code: #{res.code} #{res.message}") end end # Prevent Cloudflare from potentially mangling the image def uncached_url return file_url unless is_cloudflare?(file_url) url = file_url.dup url.query_values = url.query_values.to_h.merge(nc: Time.now.to_i) url end alias file_url url def httparty_options { timeout: 10, stream_body: true, connection_adapter: ValidatingConnectionAdapter, }.deep_merge(Websites.config.httparty_options) end def is_cloudflare?(url) ip_addr = IPAddr.new(Resolv.getaddress(url.hostname)) CloudflareService.ips.any? { |subnet| subnet.include?(ip_addr) } end end # Hook into HTTParty to validate the IP before following redirects. # https://www.rubydoc.info/github/jnunemaker/httparty/HTTParty/ConnectionAdapter class ValidatingConnectionAdapter < HTTParty::ConnectionAdapter def self.call(uri, options) ip_addr = IPAddr.new(Resolv.getaddress(uri.hostname)) raise(FileDownload::Error, "Downloads from #{ip_addr} are not allowed") if ip_blocked?(ip_addr) super end def self.ip_blocked?(ip_addr) ip_addr.private? || ip_addr.loopback? || ip_addr.link_local? end end