From b30c16aa3298221b1957fef61e69c47be74bb25e Mon Sep 17 00:00:00 2001
From: David Turner <novalis@novalis.org>
Date: Mon, 17 Apr 2017 20:13:08 -0400
Subject: [PATCH] repository: index submodules by path

Submodules have a name in the configuration, but this name is simply
the path at which the submodule was initially checked in (by default
-- the name is totally arbitrary).  If a submodule is moved, it
retains its original name, but its path changes.  Since we discover
submodules inside trees, we have their path but not necessarily their
name.

Make the submodules() function return the submodule hash indexed by
path rather than name, so that renamed submodules can be looked up.

Signed-off-by: David Turner <novalis@novalis.org>
---
 changelogs/unreleased/moved-submodules.yml    |  4 +
 lib/gitlab/git/gitmodules_parser.rb           | 77 +++++++++++++++++++
 lib/gitlab/git/repository.rb                  | 52 ++++---------
 spec/lib/gitlab/git/gitmodules_parser_spec.rb | 28 +++++++
 spec/lib/gitlab/git/repository_spec.rb        |  8 +-
 5 files changed, 128 insertions(+), 41 deletions(-)
 create mode 100644 changelogs/unreleased/moved-submodules.yml
 create mode 100644 lib/gitlab/git/gitmodules_parser.rb
 create mode 100644 spec/lib/gitlab/git/gitmodules_parser_spec.rb

diff --git a/changelogs/unreleased/moved-submodules.yml b/changelogs/unreleased/moved-submodules.yml
new file mode 100644
index 00000000000..eee858717ed
--- /dev/null
+++ b/changelogs/unreleased/moved-submodules.yml
@@ -0,0 +1,4 @@
+---
+title: 'Handle renamed submodules in repository browser'
+merge_request: 10798
+author: David Turner
diff --git a/lib/gitlab/git/gitmodules_parser.rb b/lib/gitlab/git/gitmodules_parser.rb
new file mode 100644
index 00000000000..f4e3b5e5129
--- /dev/null
+++ b/lib/gitlab/git/gitmodules_parser.rb
@@ -0,0 +1,77 @@
+module Gitlab
+  module Git
+    class GitmodulesParser
+      def initialize(content)
+        @content = content
+      end
+
+      # Parses the contents of a .gitmodules file and returns a hash of
+      # submodule information, indexed by path.
+      def parse
+        reindex_by_path(get_submodules_by_name)
+      end
+
+      private
+
+      class State
+        def initialize
+          @result = {}
+          @current_submodule = nil
+        end
+
+        def start_section(section)
+          # In some .gitmodules files (e.g. nodegit's), a header
+          # with the same name appears multiple times; we want to
+          # accumulate the configs across these
+          @current_submodule = @result[section] || { 'name' => section }
+          @result[section] = @current_submodule
+        end
+
+        def set_attribute(attr, value)
+          @current_submodule[attr] = value
+        end
+
+        def section_started?
+          !@current_submodule.nil?
+        end
+
+        def submodules_by_name
+          @result
+        end
+      end
+
+      def get_submodules_by_name
+        iterator = State.new
+
+        @content.split("\n").each_with_object(iterator) do |text, iterator|
+          next if text =~ /^\s*#/
+
+          if text =~ /\A\[submodule "(?<name>[^"]+)"\]\z/
+            iterator.start_section($~[:name])
+          else
+            next unless iterator.section_started?
+
+            next unless text =~ /\A\s*(?<key>\w+)\s*=\s*(?<value>.*)\z/
+
+            value = $~[:value].chomp
+            iterator.set_attribute($~[:key], value)
+          end
+        end
+
+        iterator.submodules_by_name
+      end
+
+      def reindex_by_path(submodules_by_name)
+        # Convert from an indexed by name to an array indexed by path
+        # If a submodule doesn't have a path, it is considered bogus
+        # and is ignored
+        submodules_by_name.each_with_object({}) do |(name, data), results|
+          path = data.delete 'path'
+          next unless path
+
+          results[path] = data
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb
index 85695d0a4df..c1f942f931a 100644
--- a/lib/gitlab/git/repository.rb
+++ b/lib/gitlab/git/repository.rb
@@ -617,9 +617,9 @@ module Gitlab
       #
       # Ex.
       #   {
-      #     "rack"  => {
+      #     "current_path/rack"  => {
+      #       "name" => "original_path/rack",
       #       "id" => "c67be4624545b4263184c4a0e8f887efd0a66320",
-      #       "path" => "rack",
       #       "url" => "git://github.com/chneukirchen/rack.git"
       #     },
       #     "encoding" => {
@@ -637,7 +637,8 @@ module Gitlab
           return {}
         end
 
-        parse_gitmodules(commit, content)
+        parser = GitmodulesParser.new(content)
+        fill_submodule_ids(commit, parser.parse)
       end
 
       # Return total commits count accessible from passed ref
@@ -998,42 +999,19 @@ module Gitlab
         end
       end
 
-      # Parses the contents of a .gitmodules file and returns a hash of
-      # submodule information.
-      def parse_gitmodules(commit, content)
-        modules = {}
-
-        name = nil
-        content.each_line do |line|
-          case line.strip
-          when /\A\[submodule "(?<name>[^"]+)"\]\z/ # Submodule header
-            name = $~[:name]
-            modules[name] = {}
-          when /\A(?<key>\w+)\s*=\s*(?<value>.*)\z/ # Key/value pair
-            key = $~[:key]
-            value = $~[:value].chomp
-
-            next unless name && modules[name]
-
-            modules[name][key] = value
-
-            if key == 'path'
-              begin
-                modules[name]['id'] = blob_content(commit, value)
-              rescue InvalidBlobName
-                # The current entry is invalid
-                modules.delete(name)
-                name = nil
-              end
-            end
-          when /\A#/ # Comment
-            next
-          else # Invalid line
-            name = nil
+      # Fill in the 'id' field of a submodule hash from its values
+      # as-of +commit+. Return a Hash consisting only of entries
+      # from the submodule hash for which the 'id' field is filled.
+      def fill_submodule_ids(commit, submodule_data)
+        submodule_data.each do |path, data|
+          id = begin
+            blob_content(commit, path)
+          rescue InvalidBlobName
+            nil
           end
+          data['id'] = id
         end
-
-        modules
+        submodule_data.select { |path, data| data['id'] }
       end
 
       # Returns true if +commit+ introduced changes to +path+, using commit
diff --git a/spec/lib/gitlab/git/gitmodules_parser_spec.rb b/spec/lib/gitlab/git/gitmodules_parser_spec.rb
new file mode 100644
index 00000000000..143aa2218c9
--- /dev/null
+++ b/spec/lib/gitlab/git/gitmodules_parser_spec.rb
@@ -0,0 +1,28 @@
+require 'spec_helper'
+
+describe Gitlab::Git::GitmodulesParser do
+  it 'should parse a .gitmodules file correctly' do
+    parser = described_class.new(<<-'GITMODULES'.strip_heredoc)
+      [submodule "vendor/libgit2"]
+         path = vendor/libgit2
+      [submodule "vendor/libgit2"]
+         url = https://github.com/nodegit/libgit2.git
+
+      # a comment
+      [submodule "moved"]
+          path = new/path
+          url = https://example.com/some/project
+      [submodule "bogus"]
+          url = https://example.com/another/project
+    GITMODULES
+
+    modules = parser.parse
+
+    expect(modules).to eq({
+                            'vendor/libgit2' => { 'name' => 'vendor/libgit2',
+                                                  'url' => 'https://github.com/nodegit/libgit2.git' },
+                            'new/path' => { 'name' => 'moved',
+                                            'url' => 'https://example.com/some/project' }
+                          })
+  end
+end
diff --git a/spec/lib/gitlab/git/repository_spec.rb b/spec/lib/gitlab/git/repository_spec.rb
index e1e4aa9fde9..20ed84ee1e6 100644
--- a/spec/lib/gitlab/git/repository_spec.rb
+++ b/spec/lib/gitlab/git/repository_spec.rb
@@ -341,7 +341,7 @@ describe Gitlab::Git::Repository, seed_helper: true do
         expect(submodule).to eq([
           "six", {
             "id" => "409f37c4f05865e4fb208c771485f211a22c4c2d",
-            "path" => "six",
+            "name" => "six",
             "url" => "git://github.com/randx/six.git"
           }
         ])
@@ -349,14 +349,14 @@ describe Gitlab::Git::Repository, seed_helper: true do
 
       it 'should handle nested submodules correctly' do
         nested = submodules['nested/six']
-        expect(nested['path']).to eq('nested/six')
+        expect(nested['name']).to eq('nested/six')
         expect(nested['url']).to eq('git://github.com/randx/six.git')
         expect(nested['id']).to eq('24fb71c79fcabc63dfd8832b12ee3bf2bf06b196')
       end
 
       it 'should handle deeply nested submodules correctly' do
         nested = submodules['deeper/nested/six']
-        expect(nested['path']).to eq('deeper/nested/six')
+        expect(nested['name']).to eq('deeper/nested/six')
         expect(nested['url']).to eq('git://github.com/randx/six.git')
         expect(nested['id']).to eq('24fb71c79fcabc63dfd8832b12ee3bf2bf06b196')
       end
@@ -376,7 +376,7 @@ describe Gitlab::Git::Repository, seed_helper: true do
         expect(submodules.first).to eq([
           "six", {
             "id" => "409f37c4f05865e4fb208c771485f211a22c4c2d",
-            "path" => "six",
+            "name" => "six",
             "url" => "git://github.com/randx/six.git"
           }
         ])
-- 
GitLab