From 6d763831d00027600e4da9807e6be3afb47abd4b Mon Sep 17 00:00:00 2001
From: James Lopez <james@jameslopez.es>
Date: Mon, 20 Jun 2016 17:20:53 +0200
Subject: [PATCH] fixed a few MySQL issues and added changelog

---
 CHANGELOG                                     |  1 +
 app/validators/addressable_url_validator.rb   |  6 +--
 ...620110927_fix_no_validatable_import_url.rb | 53 +++++++++++++++----
 3 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 44e6a194745..5b84d136d2d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,7 @@
 Please view this file on the master branch, on stable branches it's out of date.
 
 v 8.9.0 (unreleased)
+  - Set import_url validation to be more strict
   - Fix error when CI job variables key specified but not defined
   - Fix pipeline status when there are no builds in pipeline
   - Fix Error 500 when using closes_issues API with an external issue tracker
diff --git a/app/validators/addressable_url_validator.rb b/app/validators/addressable_url_validator.rb
index 585dc182e27..ee6a5a11850 100644
--- a/app/validators/addressable_url_validator.rb
+++ b/app/validators/addressable_url_validator.rb
@@ -22,6 +22,8 @@ class AddressableUrlValidator < ActiveModel::EachValidator
     end
   end
 
+  private
+
   def valid_url?(value)
     return false unless value
 
@@ -32,8 +34,6 @@ class AddressableUrlValidator < ActiveModel::EachValidator
     false
   end
 
-  private
-
   def default_options
     @default_options ||= { protocols: %w(http https ssh git) }
   end
@@ -44,6 +44,6 @@ class AddressableUrlValidator < ActiveModel::EachValidator
 
   def valid_protocol?(value)
     options = default_options.merge(self.options)
-    value =~ /\A#{URI.regexp(options[:protocols])}\z/
+    !!(value =~ /\A#{URI.regexp(options[:protocols])}\z/)
   end
 end
diff --git a/db/migrate/20160620110927_fix_no_validatable_import_url.rb b/db/migrate/20160620110927_fix_no_validatable_import_url.rb
index e56a8a0c853..9cb84faaec1 100644
--- a/db/migrate/20160620110927_fix_no_validatable_import_url.rb
+++ b/db/migrate/20160620110927_fix_no_validatable_import_url.rb
@@ -1,5 +1,9 @@
-# See http://doc.gitlab.com/ce/development/migration_style_guide.html
-# for more information on how to write migrations for GitLab.
+# Updates project records containing invalid URLs using the AddressableUrlValidator.
+# This is optimized assuming the number of invalid records is low, but
+# we still need to loop through all the projects with an +import_url+
+# so we use batching for the latter.
+#
+# This migration is non-reversible as we would have to keep the old data.
 
 class FixNoValidatableImportUrl < ActiveRecord::Migration
   include Gitlab::Database::MigrationHelpers
@@ -14,8 +18,8 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration
       @results = []
     end
 
-    def next
-      @results = ActiveRecord::Base.connection.execute(batched_sql)
+    def next?
+      @results = ActiveRecord::Base.connection.exec_query(batched_sql)
       @offset += @batch_size
       @results.any?
     end
@@ -23,11 +27,36 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration
     private
 
     def batched_sql
-      "#{@query} OFFSET #{@offset} LIMIT #{@batch_size}"
+      "#{@query} LIMIT #{@batch_size} OFFSET #{@offset}"
+    end
+  end
+
+  # AddressableValidator - Snapshot of AddressableUrlValidator
+  module AddressableUrlValidatorSnap
+    extend self
+
+    def valid_url?(value)
+      return false unless value
+
+      value.strip!
+
+      valid_uri?(value) && valid_protocol?(value)
+    rescue Addressable::URI::InvalidURIError
+      false
+    end
+
+    def valid_uri?(value)
+      Addressable::URI.parse(value).is_a?(Addressable::URI)
+    end
+
+    def valid_protocol?(value)
+      !!(value =~ /\A#{URI.regexp(%w(http https ssh git))}\z/)
     end
   end
 
   def up
+    say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.')
+
     invalid_import_url_project_ids.each { |project_id| cleanup_import_url(project_id) }
   end
 
@@ -35,18 +64,20 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration
     ids = []
     batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL")
 
-    while batches.nexts
-      ids += batches.results.map { |result| invalid_url?(result[:import_url]) ? result[:id] : nil }
+    while batches.next?
+      batches.results.each do |result|
+        ids << result['id'] unless valid_url?(result['import_url'])
+      end
     end
 
-    ids.compact
+    ids
   end
 
-  def invalid_url?(url)
-    AddressableUrlValidator.new({ attributes: 1 }).valid_url?(url)
+  def valid_url?(url)
+    AddressableUrlValidatorSnap.valid_url?(url)
   end
 
   def cleanup_import_url(project_id)
-    execute("UPDATE projects SET mirror = false, import_url = NULL WHERE id = #{project_id}")
+    execute("UPDATE projects SET import_url = NULL WHERE id = #{project_id}")
   end
 end
-- 
GitLab