399 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
		
		
			
		
	
	
			399 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
|  | using System; | ||
|  | using System.Collections.Concurrent; | ||
|  | using System.Collections.Generic; | ||
|  | using System.IO; | ||
|  | using System.Linq; | ||
|  | using System.Text.RegularExpressions; | ||
|  | using System.Threading.Tasks; | ||
|  | using JetBrains.Annotations; | ||
|  | using UnityEngine; | ||
|  | 
 | ||
|  | namespace UnityEditor.Searcher | ||
|  | { | ||
|  |     [PublicAPI] | ||
|  |     public class SearcherDatabase : SearcherDatabaseBase | ||
|  |     { | ||
|  |         Dictionary<string, IReadOnlyList<ValueTuple<string, float>>> m_Index = new Dictionary<string, IReadOnlyList<ValueTuple<string, float>>>(); | ||
|  | 
 | ||
|  |         class Result | ||
|  |         { | ||
|  |             public SearcherItem item; | ||
|  |             public float maxScore; | ||
|  |         } | ||
|  | 
 | ||
|  |         const bool k_IsParallel = true; | ||
|  | 
 | ||
|  |         public Func<string, SearcherItem, bool> MatchFilter { get; set; } | ||
|  | 
 | ||
|  |         public static SearcherDatabase Create( | ||
|  |             List<SearcherItem> items, | ||
|  |             string databaseDirectory, | ||
|  |             bool serializeToFile = true | ||
|  |         ) | ||
|  |         { | ||
|  |             if (serializeToFile && databaseDirectory != null && !Directory.Exists(databaseDirectory)) | ||
|  |                 Directory.CreateDirectory(databaseDirectory); | ||
|  | 
 | ||
|  |             var database = new SearcherDatabase(databaseDirectory, items); | ||
|  | 
 | ||
|  |             if (serializeToFile) | ||
|  |                 database.SerializeToFile(); | ||
|  | 
 | ||
|  |             database.BuildIndex(); | ||
|  |             return database; | ||
|  |         } | ||
|  | 
 | ||
|  |         public static SearcherDatabase Load(string databaseDirectory) | ||
|  |         { | ||
|  |             if (!Directory.Exists(databaseDirectory)) | ||
|  |                 throw new InvalidOperationException("databaseDirectory not found."); | ||
|  | 
 | ||
|  |             var database = new SearcherDatabase(databaseDirectory, null); | ||
|  |             database.LoadFromFile(); | ||
|  |             database.BuildIndex(); | ||
|  | 
 | ||
|  |             return database; | ||
|  |         } | ||
|  | 
 | ||
|  |         public SearcherDatabase(IReadOnlyCollection<SearcherItem> db) | ||
|  |             : this("", db) | ||
|  |         { | ||
|  |         } | ||
|  | 
 | ||
|  |         SearcherDatabase(string databaseDirectory, IReadOnlyCollection<SearcherItem> db) | ||
|  |             : base(databaseDirectory) | ||
|  |         { | ||
|  |             m_ItemList = new List<SearcherItem>(); | ||
|  |             var nextId = 0; | ||
|  | 
 | ||
|  |             if (db != null) | ||
|  |                 foreach (var item in db) | ||
|  |                     AddItemToIndex(item, ref nextId, null); | ||
|  |         } | ||
|  | 
 | ||
|  |         public override List<SearcherItem> Search(string query, out float localMaxScore) | ||
|  |         { | ||
|  |             // Match assumes the query is trimmed | ||
|  |             query = query.Trim(' ', '\t'); | ||
|  |             localMaxScore = 0; | ||
|  | 
 | ||
|  |             if (string.IsNullOrWhiteSpace(query)) | ||
|  |             { | ||
|  |                 if (MatchFilter == null) | ||
|  |                     return m_ItemList; | ||
|  | 
 | ||
|  |                 // ReSharper disable once RedundantLogicalConditionalExpressionOperand | ||
|  |                 if (k_IsParallel && m_ItemList.Count > 100) | ||
|  |                     return FilterMultiThreaded(query); | ||
|  | 
 | ||
|  |                 return FilterSingleThreaded(query); | ||
|  |             } | ||
|  | 
 | ||
|  |             var finalResults = new List<SearcherItem> { null }; | ||
|  |             var max = new Result(); | ||
|  |             var tokenizedQuery = new List<string>(); | ||
|  |             foreach (var token in Tokenize(query)) | ||
|  |             { | ||
|  |                 tokenizedQuery.Add(token.Trim().ToLower()); | ||
|  |             } | ||
|  | 
 | ||
|  |             // ReSharper disable once RedundantLogicalConditionalExpressionOperand | ||
|  |             if (k_IsParallel && m_ItemList.Count > 100) | ||
|  |                 SearchMultithreaded(query, tokenizedQuery, max, finalResults); | ||
|  |             else | ||
|  |                 SearchSingleThreaded(query, tokenizedQuery, max, finalResults); | ||
|  | 
 | ||
|  |             localMaxScore = max.maxScore; | ||
|  |             if (max.item != null) | ||
|  |                 finalResults[0] = max.item; | ||
|  |             else | ||
|  |                 finalResults.RemoveAt(0); | ||
|  | 
 | ||
|  |             return finalResults; | ||
|  |         } | ||
|  | 
 | ||
|  |         protected virtual bool Match(string query, IReadOnlyList<string> tokenizedQuery, SearcherItem item, out float score) | ||
|  |         { | ||
|  |             var filter = MatchFilter?.Invoke(query, item) ?? true; | ||
|  |             return Match(tokenizedQuery, item.Path, out score) && filter; | ||
|  |         } | ||
|  | 
 | ||
|  |         List<SearcherItem> FilterSingleThreaded(string query) | ||
|  |         { | ||
|  |             var result = new List<SearcherItem>(); | ||
|  | 
 | ||
|  |             foreach (var searcherItem in m_ItemList) | ||
|  |             { | ||
|  |                 if (!MatchFilter.Invoke(query, searcherItem)) | ||
|  |                     continue; | ||
|  | 
 | ||
|  |                 result.Add(searcherItem); | ||
|  |             } | ||
|  | 
 | ||
|  |             return result; | ||
|  |         } | ||
|  | 
 | ||
|  |         List<SearcherItem> FilterMultiThreaded(string query) | ||
|  |         { | ||
|  |             var result = new List<SearcherItem>(); | ||
|  |             var count = Environment.ProcessorCount; | ||
|  |             var tasks = new Task[count]; | ||
|  |             var lists = new List<SearcherItem>[count]; | ||
|  |             var itemsPerTask = (int)Math.Ceiling(m_ItemList.Count / (float)count); | ||
|  | 
 | ||
|  |             for (var i = 0; i < count; i++) | ||
|  |             { | ||
|  |                 var i1 = i; | ||
|  |                 tasks[i] = Task.Run(() => | ||
|  |                 { | ||
|  |                     lists[i1] = new List<SearcherItem>(); | ||
|  | 
 | ||
|  |                     for (var j = 0; j < itemsPerTask; j++) | ||
|  |                     { | ||
|  |                         var index = j + itemsPerTask * i1; | ||
|  |                         if (index >= m_ItemList.Count) | ||
|  |                             break; | ||
|  | 
 | ||
|  |                         var item = m_ItemList[index]; | ||
|  |                         if (!MatchFilter.Invoke(query, item)) | ||
|  |                             continue; | ||
|  | 
 | ||
|  |                         lists[i1].Add(item); | ||
|  |                     } | ||
|  |                 }); | ||
|  |             } | ||
|  | 
 | ||
|  |             Task.WaitAll(tasks); | ||
|  | 
 | ||
|  |             for (var i = 0; i < count; i++) | ||
|  |             { | ||
|  |                 result.AddRange(lists[i]); | ||
|  |             } | ||
|  | 
 | ||
|  |             return result; | ||
|  |         } | ||
|  | 
 | ||
|  |         readonly float k_ScoreCutOff = 0.33f; | ||
|  | 
 | ||
|  |         void SearchSingleThreaded(string query, IReadOnlyList<string> tokenizedQuery, Result max, ICollection<SearcherItem> finalResults) | ||
|  |         { | ||
|  |             List<Result> results = new List<Result>(); | ||
|  | 
 | ||
|  |             foreach (var item in m_ItemList) | ||
|  |             { | ||
|  |                 float score = 0; | ||
|  |                 if (query.Length == 0 || Match(query, tokenizedQuery, item, out score)) | ||
|  |                 { | ||
|  |                     if (score > max.maxScore) | ||
|  |                     { | ||
|  |                         max.item = item; | ||
|  |                         max.maxScore = score; | ||
|  |                     } | ||
|  |                     results.Add(new Result() { item = item, maxScore = score}); | ||
|  |                 } | ||
|  |             } | ||
|  | 
 | ||
|  |             PostprocessResults(results, finalResults, max); | ||
|  |         } | ||
|  | 
 | ||
|  |         void SearchMultithreaded(string query, IReadOnlyList<string> tokenizedQuery, Result max, List<SearcherItem> finalResults) | ||
|  |         { | ||
|  |             var count = Environment.ProcessorCount; | ||
|  |             var tasks = new Task[count]; | ||
|  |             var localResults = new Result[count]; | ||
|  |             var queue = new ConcurrentQueue<Result>(); | ||
|  |             var itemsPerTask = (int)Math.Ceiling(m_ItemList.Count / (float)count); | ||
|  | 
 | ||
|  |             for (var i = 0; i < count; i++) | ||
|  |             { | ||
|  |                 var i1 = i; | ||
|  |                 localResults[i1] = new Result(); | ||
|  |                 tasks[i] = Task.Run(() => | ||
|  |                 { | ||
|  |                     var result = localResults[i1]; | ||
|  |                     for (var j = 0; j < itemsPerTask; j++) | ||
|  |                     { | ||
|  |                         var index = j + itemsPerTask * i1; | ||
|  |                         if (index >= m_ItemList.Count) | ||
|  |                             break; | ||
|  |                         var item = m_ItemList[index]; | ||
|  |                         float score = 0; | ||
|  |                         if (query.Length == 0 || Match(query, tokenizedQuery, item, out score)) | ||
|  |                         { | ||
|  |                             if (score > result.maxScore) | ||
|  |                             { | ||
|  |                                 result.maxScore = score; | ||
|  |                                 result.item = item; | ||
|  |                             } | ||
|  | 
 | ||
|  |                             queue.Enqueue(new Result { item = item, maxScore = score }); | ||
|  |                         } | ||
|  |                     } | ||
|  |                 }); | ||
|  |             } | ||
|  | 
 | ||
|  |             Task.WaitAll(tasks); | ||
|  | 
 | ||
|  |             for (var i = 0; i < count; i++) | ||
|  |             { | ||
|  |                 if (localResults[i].maxScore > max.maxScore) | ||
|  |                 { | ||
|  |                     max.maxScore = localResults[i].maxScore; | ||
|  |                     max.item = localResults[i].item; | ||
|  |                 } | ||
|  |             } | ||
|  | 
 | ||
|  |             PostprocessResults(queue, finalResults, max); | ||
|  |         } | ||
|  | 
 | ||
|  |         void PostprocessResults(IEnumerable<Result> results, ICollection<SearcherItem> items, Result max) | ||
|  |         { | ||
|  |             foreach (var result in results) | ||
|  |             { | ||
|  |                 var normalizedScore = result.maxScore / max.maxScore; | ||
|  |                 if (result.item != null && result.item != max.item && normalizedScore > k_ScoreCutOff) | ||
|  |                 { | ||
|  |                     items.Add(result.item); | ||
|  |                 } | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         public override void BuildIndex() | ||
|  |         { | ||
|  |             m_Index.Clear(); | ||
|  | 
 | ||
|  |             foreach (var item in m_ItemList) | ||
|  |             { | ||
|  |                 if (!m_Index.ContainsKey(item.Path)) | ||
|  |                 { | ||
|  |                     List<ValueTuple<string, float>> terms  = new List<ValueTuple<string, float>>(); | ||
|  | 
 | ||
|  |                     // If the item uses synonyms to return results for similar words/phrases, add them to the search terms | ||
|  |                     IList<string> tokens = null; | ||
|  |                     if (item.Synonyms == null) | ||
|  |                         tokens = Tokenize(item.Name); | ||
|  |                     else | ||
|  |                         tokens = Tokenize(string.Format("{0} {1}", item.Name, string.Join(" ", item.Synonyms))); | ||
|  | 
 | ||
|  |                     // Fixes bug: https://fogbugz.unity3d.com/f/cases/1359158/ | ||
|  |                     // Without this, node names with spaces or those with Pascal casing were not added to index  | ||
|  |                     var nodeName = item.Name.ToLower().Replace(" ", String.Empty); | ||
|  |                     tokens.Add(nodeName); | ||
|  |                      | ||
|  |                     string tokenSuite = ""; | ||
|  |                     foreach (var token in tokens) | ||
|  |                     { | ||
|  |                         var t = token.ToLower(); | ||
|  |                         if (t.Length > 1) | ||
|  |                         { | ||
|  |                             terms.Add(new ValueTuple<string, float>(t, 0.8f)); | ||
|  |                         } | ||
|  | 
 | ||
|  |                         if (tokenSuite.Length > 0) | ||
|  |                         { | ||
|  |                             tokenSuite += " " + t; | ||
|  |                             terms.Add(new ValueTuple<string, float>(tokenSuite, 1f)); | ||
|  |                         } | ||
|  |                         else | ||
|  |                         { | ||
|  |                             tokenSuite = t; | ||
|  |                         } | ||
|  |                     } | ||
|  | 
 | ||
|  |                     // Add a term containing all the uppercase letters (CamelCase World BBox => CCWBB) | ||
|  |                     var initialList = Regex.Split(item.Name, @"\P{Lu}+"); | ||
|  |                     var initials = string.Concat(initialList).Trim(); | ||
|  |                     if (!string.IsNullOrEmpty(initials)) | ||
|  |                         terms.Add(new ValueTuple<string, float>(initials.ToLower(), 0.5f)); | ||
|  | 
 | ||
|  |                     m_Index.Add(item.Path, terms); | ||
|  |                 } | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         static IList<string> Tokenize(string s) | ||
|  |         { | ||
|  |             var knownTokens = new HashSet<string>(); | ||
|  |             var tokens = new List<string>(); | ||
|  | 
 | ||
|  |             // Split on word boundaries | ||
|  |             foreach (var t in Regex.Split(s, @"\W")) | ||
|  |             { | ||
|  |                 // Split camel case words | ||
|  |                 var tt = Regex.Split(t, @"(\p{Lu}+\P{Lu}*)"); | ||
|  |                 foreach (var ttt in tt) | ||
|  |                 { | ||
|  |                     var tttt = ttt.Trim(); | ||
|  |                     if (!string.IsNullOrEmpty(tttt) && !knownTokens.Contains(tttt)) | ||
|  |                     { | ||
|  |                         knownTokens.Add(tttt); | ||
|  |                         tokens.Add(tttt); | ||
|  |                     } | ||
|  |                 } | ||
|  |             } | ||
|  | 
 | ||
|  |             return tokens; | ||
|  |         } | ||
|  | 
 | ||
|  |         bool Match(IReadOnlyList<string> tokenizedQuery, string itemPath, out float score) | ||
|  |         { | ||
|  |             itemPath = itemPath.Trim(); | ||
|  |             if (itemPath == "") | ||
|  |             { | ||
|  |                 if (tokenizedQuery.Count == 0) | ||
|  |                 { | ||
|  |                     score = 1; | ||
|  |                     return true; | ||
|  |                 } | ||
|  |                 else | ||
|  |                 { | ||
|  |                     score = 0; | ||
|  |                     return false; | ||
|  |                 } | ||
|  |             } | ||
|  | 
 | ||
|  |             IReadOnlyList<ValueTuple<string, float>> indexTerms; | ||
|  |             if (!m_Index.TryGetValue(itemPath, out indexTerms)) | ||
|  |             { | ||
|  |                 score = 0; | ||
|  |                 return false; | ||
|  |             } | ||
|  | 
 | ||
|  |             float maxScore = 0.0f; | ||
|  |             foreach (var t in indexTerms) | ||
|  |             { | ||
|  |                 float scoreForTerm = 0f; | ||
|  |                 var querySuite = ""; | ||
|  |                 var querySuiteFactor = 1.25f; | ||
|  |                 foreach (var q in tokenizedQuery) | ||
|  |                 { | ||
|  |                     if (t.Item1.StartsWith(q)) | ||
|  |                     { | ||
|  |                         scoreForTerm += t.Item2 * q.Length / t.Item1.Length; | ||
|  |                     } | ||
|  | 
 | ||
|  |                     if (querySuite.Length > 0) | ||
|  |                     { | ||
|  |                         querySuite += " " + q; | ||
|  |                         if (t.Item1.StartsWith(querySuite)) | ||
|  |                         { | ||
|  |                             scoreForTerm += t.Item2 * querySuiteFactor * querySuite.Length / t.Item1.Length; | ||
|  |                         } | ||
|  |                     } | ||
|  |                     else | ||
|  |                     { | ||
|  |                         querySuite = q; | ||
|  |                     } | ||
|  | 
 | ||
|  |                     querySuiteFactor *= querySuiteFactor; | ||
|  |                 } | ||
|  | 
 | ||
|  |                 maxScore = Mathf.Max(maxScore, scoreForTerm); | ||
|  |             } | ||
|  | 
 | ||
|  |             score = maxScore; | ||
|  |             return score > 0; | ||
|  |         } | ||
|  |     } | ||
|  | } |