mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-12-18 22:36:08 -05:00
feat: add support to opt-in for fuzzy search (#10378)
The rationale for keeping it behind a flag is due to fuzzy search being computationally intensive #5261 Admins may opt-in by setting the `[indexer].REPO_INDEXER_FUZZY_ENABLED` flag to true. Closes #10331 Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/10378 Reviewed-by: Gusted <gusted@noreply.codeberg.org> Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com> Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
This commit is contained in:
parent
32429c0b13
commit
cdc27b0d62
13 changed files with 175 additions and 52 deletions
|
|
@ -39,6 +39,10 @@ const (
|
|||
// llu:TrKeysSuffix search.
|
||||
var GrepSearchOptions = [3]string{"exact", "union", "regexp"}
|
||||
|
||||
func (mode GrepMode) String() string {
|
||||
return GrepSearchOptions[mode]
|
||||
}
|
||||
|
||||
type GrepOptions struct {
|
||||
RefName string
|
||||
MaxResultLimit int
|
||||
|
|
|
|||
|
|
@ -259,12 +259,16 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
|||
|
||||
if opts.Mode == internal.CodeSearchModeUnion {
|
||||
query := bleve.NewDisjunctionQuery()
|
||||
for _, field := range strings.Fields(opts.Keyword) {
|
||||
for field := range strings.FieldsSeq(opts.Keyword) {
|
||||
query.AddQuery(inner_bleve.MatchPhraseQuery(field, "Content", repoIndexerAnalyzer, false, 1.0))
|
||||
}
|
||||
keywordQuery = query
|
||||
} else {
|
||||
keywordQuery = inner_bleve.MatchPhraseQuery(opts.Keyword, "Content", repoIndexerAnalyzer, false, 1.0)
|
||||
keywordQuery = inner_bleve.MatchPhraseQuery(opts.Keyword,
|
||||
"Content",
|
||||
repoIndexerAnalyzer,
|
||||
opts.Mode == internal.CodeSearchModeFuzzy,
|
||||
1.0)
|
||||
}
|
||||
|
||||
if len(opts.RepoIDs) > 0 {
|
||||
|
|
|
|||
|
|
@ -335,11 +335,14 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
|
|||
// Search searches for codes and language stats by given conditions.
|
||||
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
||||
searchType := esMultiMatchTypePhrase
|
||||
if opts.Mode == internal.CodeSearchModeUnion {
|
||||
if opts.Mode == internal.CodeSearchModeUnion || opts.Mode == internal.CodeSearchModeFuzzy {
|
||||
searchType = esMultiMatchTypeBestFields
|
||||
}
|
||||
|
||||
kwQuery := elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType)
|
||||
if opts.Mode == internal.CodeSearchModeFuzzy {
|
||||
kwQuery = kwQuery.Fuzziness("AUTO")
|
||||
}
|
||||
query := elastic.NewBoolQuery()
|
||||
query = query.Must(kwQuery)
|
||||
if len(opts.RepoIDs) > 0 {
|
||||
|
|
|
|||
|
|
@ -91,12 +91,23 @@ func index(ctx context.Context, indexer internal.Indexer, repoID int64) error {
|
|||
return repo_model.UpdateIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode, sha)
|
||||
}
|
||||
|
||||
func setSearchOption(set bool, val string) {
|
||||
if set {
|
||||
if !slices.Contains(CodeSearchOptions, val) {
|
||||
CodeSearchOptions = append(CodeSearchOptions, val)
|
||||
}
|
||||
} else if i := slices.Index(CodeSearchOptions, val); i >= 0 {
|
||||
CodeSearchOptions = append(CodeSearchOptions[:i], CodeSearchOptions[i+1:]...)
|
||||
}
|
||||
}
|
||||
|
||||
// Init initialize the repo indexer
|
||||
func Init() {
|
||||
if !setting.Indexer.RepoIndexerEnabled {
|
||||
(*globalIndexer.Load()).Close()
|
||||
return
|
||||
}
|
||||
setSearchOption(setting.Indexer.RepoIndexerEnableFuzzy, "fuzzy")
|
||||
|
||||
ctx, cancel, finished := process.GetManager().AddTypedContext(context.Background(), "Service: CodeIndexer", process.SystemProcessType, false)
|
||||
|
||||
|
|
|
|||
|
|
@ -116,6 +116,57 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
|
|||
})
|
||||
}
|
||||
|
||||
t.Run("Fuzzy", func(t *testing.T) {
|
||||
for _, kw := range []struct {
|
||||
keyword string
|
||||
ids []int64
|
||||
}{
|
||||
{
|
||||
keyword: "reppo1", // should match repo1
|
||||
ids: []int64{repoID},
|
||||
},
|
||||
{
|
||||
keyword: "1", // must not be fuzzy match only repo1
|
||||
ids: []int64{repoID},
|
||||
},
|
||||
{
|
||||
keyword: "Description!", // should match "Description"
|
||||
ids: []int64{repoID},
|
||||
},
|
||||
{
|
||||
keyword: "escription", // should match "Description"
|
||||
ids: []int64{repoID},
|
||||
},
|
||||
{
|
||||
keyword: "form", // should match "for"
|
||||
ids: []int64{repoID},
|
||||
},
|
||||
{
|
||||
keyword: "invalid", // should not match anything
|
||||
ids: []int64{},
|
||||
},
|
||||
} {
|
||||
t.Run(kw.keyword, func(t *testing.T) {
|
||||
_, res, _, err := indexer.Search(t.Context(), &internal.SearchOptions{
|
||||
Keyword: kw.keyword,
|
||||
Paginator: &db.ListOptions{
|
||||
Page: 1,
|
||||
PageSize: 10,
|
||||
},
|
||||
Mode: SearchModeFuzzy,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
ids := make([]int64, 0, len(res))
|
||||
for _, hit := range res {
|
||||
ids = append(ids, hit.RepoID)
|
||||
}
|
||||
|
||||
assert.Equal(t, kw.ids, ids)
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
require.NoError(t, indexer.Delete(t.Context(), repoID))
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,13 +25,18 @@ type CodeSearchMode int
|
|||
const (
|
||||
CodeSearchModeExact CodeSearchMode = iota
|
||||
CodeSearchModeUnion
|
||||
CodeSearchModeFuzzy
|
||||
)
|
||||
|
||||
func (mode CodeSearchMode) String() string {
|
||||
if mode == CodeSearchModeUnion {
|
||||
switch mode {
|
||||
case CodeSearchModeFuzzy:
|
||||
return "fuzzy"
|
||||
case CodeSearchModeUnion:
|
||||
return "union"
|
||||
default:
|
||||
return "exact"
|
||||
}
|
||||
return "exact"
|
||||
}
|
||||
|
||||
type SearchOptions struct {
|
||||
|
|
|
|||
|
|
@ -36,13 +36,14 @@ type SearchResultLanguages = internal.SearchResultLanguages
|
|||
type SearchOptions = internal.SearchOptions
|
||||
|
||||
// llu:TrKeysSuffix search.
|
||||
var CodeSearchOptions = [2]string{"exact", "union"}
|
||||
var CodeSearchOptions = []string{"exact", "union", "fuzzy"}
|
||||
|
||||
type SearchMode = internal.CodeSearchMode
|
||||
|
||||
const (
|
||||
SearchModeExact = internal.CodeSearchModeExact
|
||||
SearchModeUnion = internal.CodeSearchModeUnion
|
||||
SearchModeFuzzy = internal.CodeSearchModeFuzzy
|
||||
)
|
||||
|
||||
func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) {
|
||||
|
|
|
|||
|
|
@ -23,16 +23,17 @@ var Indexer = struct {
|
|||
IssueIndexerName string
|
||||
StartupTimeout time.Duration
|
||||
|
||||
RepoIndexerEnabled bool
|
||||
RepoIndexerRepoTypes []string
|
||||
RepoType string
|
||||
RepoPath string
|
||||
RepoConnStr string
|
||||
RepoIndexerName string
|
||||
MaxIndexerFileSize int64
|
||||
IncludePatterns []Glob
|
||||
ExcludePatterns []Glob
|
||||
ExcludeVendored bool
|
||||
RepoIndexerEnabled bool
|
||||
RepoIndexerRepoTypes []string
|
||||
RepoIndexerEnableFuzzy bool
|
||||
RepoType string
|
||||
RepoPath string
|
||||
RepoConnStr string
|
||||
RepoIndexerName string
|
||||
MaxIndexerFileSize int64
|
||||
IncludePatterns []Glob
|
||||
ExcludePatterns []Glob
|
||||
ExcludeVendored bool
|
||||
}{
|
||||
IssueType: "bleve",
|
||||
IssuePath: "indexers/issues.bleve",
|
||||
|
|
@ -40,14 +41,15 @@ var Indexer = struct {
|
|||
IssueConnAuth: "",
|
||||
IssueIndexerName: "gitea_issues",
|
||||
|
||||
RepoIndexerEnabled: false,
|
||||
RepoIndexerRepoTypes: []string{"sources", "forks", "mirrors", "templates"},
|
||||
RepoType: "bleve",
|
||||
RepoPath: "indexers/repos.bleve",
|
||||
RepoConnStr: "",
|
||||
RepoIndexerName: "gitea_codes",
|
||||
MaxIndexerFileSize: 1024 * 1024,
|
||||
ExcludeVendored: true,
|
||||
RepoIndexerEnabled: false,
|
||||
RepoIndexerRepoTypes: []string{"sources", "forks", "mirrors", "templates"},
|
||||
RepoIndexerEnableFuzzy: false,
|
||||
RepoType: "bleve",
|
||||
RepoPath: "indexers/repos.bleve",
|
||||
RepoConnStr: "",
|
||||
RepoIndexerName: "gitea_codes",
|
||||
MaxIndexerFileSize: 1024 * 1024,
|
||||
ExcludeVendored: true,
|
||||
}
|
||||
|
||||
type Glob struct {
|
||||
|
|
@ -87,6 +89,7 @@ func loadIndexerFrom(rootCfg ConfigProvider) {
|
|||
|
||||
Indexer.RepoIndexerEnabled = sec.Key("REPO_INDEXER_ENABLED").MustBool(false)
|
||||
Indexer.RepoIndexerRepoTypes = strings.Split(sec.Key("REPO_INDEXER_REPO_TYPES").MustString("sources,forks,mirrors,templates"), ",")
|
||||
Indexer.RepoIndexerEnableFuzzy = sec.Key("REPO_INDEXER_FUZZY_ENABLED").MustBool(false)
|
||||
Indexer.RepoType = sec.Key("REPO_INDEXER_TYPE").MustString("bleve")
|
||||
Indexer.RepoPath = filepath.ToSlash(sec.Key("REPO_INDEXER_PATH").MustString(filepath.ToSlash(filepath.Join(AppDataPath, "indexers/repos.bleve"))))
|
||||
if !filepath.IsAbs(Indexer.RepoPath) {
|
||||
|
|
|
|||
|
|
@ -100,6 +100,8 @@
|
|||
"repo.issue_indexer.title": "Issue Indexer",
|
||||
"search.milestone_kind": "Search milestones…",
|
||||
"search.syntax": "Search syntax",
|
||||
"search.fuzzy": "Fuzzy",
|
||||
"search.fuzzy_tooltip": "Include results is an approximate match to the search term",
|
||||
"repo.settings.push_mirror.branch_filter.label": "Branch filter (optional)",
|
||||
"repo.settings.push_mirror.branch_filter.description": "Branches to be mirrored. Leave blank to mirror all branches. See <a href=\"%[1]s\">%[2]s documentation</a> for syntax. Examples: <code>main, release/*</code>",
|
||||
"incorrect_root_url": "This Forgejo instance is configured to be served on \"%s\". You are currently viewing Forgejo through a different URL, which may cause parts of the application to break. The canonical URL is controlled by Forgejo admins via the ROOT_URL setting in the app.ini.",
|
||||
|
|
|
|||
|
|
@ -38,10 +38,14 @@ func Code(ctx *context.Context) {
|
|||
path := ctx.FormTrim("path")
|
||||
|
||||
mode := code_indexer.SearchModeExact
|
||||
if m := ctx.FormTrim("mode"); m == "union" ||
|
||||
m == "fuzzy" ||
|
||||
ctx.FormBool("fuzzy") {
|
||||
if m := ctx.FormTrim("mode"); m == "union" {
|
||||
mode = code_indexer.SearchModeUnion
|
||||
} else if m == "fuzzy" || ctx.FormBool("fuzzy") {
|
||||
if setting.Indexer.RepoIndexerEnableFuzzy {
|
||||
mode = code_indexer.SearchModeFuzzy
|
||||
} else {
|
||||
mode = code_indexer.SearchModeUnion
|
||||
}
|
||||
}
|
||||
|
||||
ctx.Data["Keyword"] = keyword
|
||||
|
|
|
|||
|
|
@ -22,13 +22,16 @@ type searchMode int
|
|||
const (
|
||||
ExactSearchMode searchMode = iota
|
||||
UnionSearchMode
|
||||
FuzzySearchMode
|
||||
RegExpSearchMode
|
||||
)
|
||||
|
||||
func searchModeFromString(s string) searchMode {
|
||||
switch s {
|
||||
case "fuzzy", "union":
|
||||
case "union":
|
||||
return UnionSearchMode
|
||||
case "fuzzy":
|
||||
return FuzzySearchMode
|
||||
case "regexp":
|
||||
return RegExpSearchMode
|
||||
default:
|
||||
|
|
@ -36,23 +39,13 @@ func searchModeFromString(s string) searchMode {
|
|||
}
|
||||
}
|
||||
|
||||
func (m searchMode) String() string {
|
||||
switch m {
|
||||
case ExactSearchMode:
|
||||
return "exact"
|
||||
case UnionSearchMode:
|
||||
return "union"
|
||||
case RegExpSearchMode:
|
||||
return "regexp"
|
||||
default:
|
||||
panic("cannot happen")
|
||||
}
|
||||
}
|
||||
|
||||
func (m searchMode) ToIndexer() code_indexer.SearchMode {
|
||||
if m == ExactSearchMode {
|
||||
return code_indexer.SearchModeExact
|
||||
}
|
||||
if setting.Indexer.RepoIndexerEnableFuzzy && m == FuzzySearchMode {
|
||||
return code_indexer.SearchModeFuzzy
|
||||
}
|
||||
return code_indexer.SearchModeUnion
|
||||
}
|
||||
|
||||
|
|
@ -83,7 +76,6 @@ func Search(ctx *context.Context) {
|
|||
ctx.Data["Keyword"] = keyword
|
||||
ctx.Data["Language"] = language
|
||||
ctx.Data["CodeSearchPath"] = path
|
||||
ctx.Data["CodeSearchMode"] = mode.String()
|
||||
ctx.Data["PageIsViewCode"] = true
|
||||
ctx.Data["CodeIndexerDisabled"] = !setting.Indexer.RepoIndexerEnabled
|
||||
if setting.Indexer.RepoIndexerEnabled {
|
||||
|
|
@ -106,11 +98,14 @@ func Search(ctx *context.Context) {
|
|||
var searchResults []*code_indexer.Result
|
||||
var searchResultLanguages []*code_indexer.SearchResultLanguages
|
||||
if setting.Indexer.RepoIndexerEnabled {
|
||||
m := mode.ToIndexer()
|
||||
ctx.Data["CodeSearchMode"] = m.String()
|
||||
|
||||
var err error
|
||||
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
|
||||
RepoIDs: []int64{ctx.Repo.Repository.ID},
|
||||
Keyword: keyword,
|
||||
Mode: mode.ToIndexer(),
|
||||
Mode: m,
|
||||
Language: language,
|
||||
Filename: path,
|
||||
Paginator: &db.ListOptions{
|
||||
|
|
@ -128,11 +123,14 @@ func Search(ctx *context.Context) {
|
|||
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
|
||||
}
|
||||
} else {
|
||||
m := mode.ToGitGrep()
|
||||
ctx.Data["CodeSearchMode"] = m.String()
|
||||
|
||||
res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, git.GrepOptions{
|
||||
ContextLineNumber: 1,
|
||||
RefName: ctx.Repo.RefName,
|
||||
Filename: path,
|
||||
Mode: mode.ToGitGrep(),
|
||||
Mode: m,
|
||||
})
|
||||
if err != nil {
|
||||
ctx.ServerError("GrepSearch", err)
|
||||
|
|
|
|||
|
|
@ -42,10 +42,14 @@ func CodeSearch(ctx *context.Context) {
|
|||
path := ctx.FormTrim("path")
|
||||
|
||||
mode := code_indexer.SearchModeExact
|
||||
if m := ctx.FormTrim("mode"); m == "union" ||
|
||||
m == "fuzzy" ||
|
||||
ctx.FormBool("fuzzy") {
|
||||
if m := ctx.FormTrim("mode"); m == "union" {
|
||||
mode = code_indexer.SearchModeUnion
|
||||
} else if m == "fuzzy" || ctx.FormBool("fuzzy") {
|
||||
if setting.Indexer.RepoIndexerEnableFuzzy {
|
||||
mode = code_indexer.SearchModeFuzzy
|
||||
} else {
|
||||
mode = code_indexer.SearchModeUnion
|
||||
}
|
||||
}
|
||||
|
||||
ctx.Data["Keyword"] = keyword
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import (
|
|||
"net/http"
|
||||
"testing"
|
||||
|
||||
code_indexer "forgejo.org/modules/indexer/code"
|
||||
"forgejo.org/modules/setting"
|
||||
"forgejo.org/modules/test"
|
||||
"forgejo.org/tests"
|
||||
|
|
@ -16,11 +17,43 @@ func TestExploreCodeSearchIndexer(t *testing.T) {
|
|||
defer tests.PrepareTestEnv(t)()
|
||||
defer test.MockVariableValue(&setting.Indexer.RepoIndexerEnabled, true)()
|
||||
|
||||
req := NewRequest(t, "GET", "/explore/code?q=file&fuzzy=true")
|
||||
resp := MakeRequest(t, req, http.StatusOK)
|
||||
doc := NewHTMLParser(t, resp.Body).Find(".explore")
|
||||
t.Run("Exact", func(t *testing.T) {
|
||||
req := NewRequest(t, "GET", "/explore/code?q=file&mode=exact")
|
||||
resp := MakeRequest(t, req, http.StatusOK)
|
||||
doc := NewHTMLParser(t, resp.Body).Find(".explore")
|
||||
|
||||
doc.Find(".file-body").Each(func(i int, sel *goquery.Selection) {
|
||||
assert.Positive(t, sel.Find(".code-inner").Find(".search-highlight").Length(), 0)
|
||||
active, ok := doc.Find("[data-test-tag=fuzzy-dropdown] .active input").Attr("value")
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "exact", active)
|
||||
|
||||
doc.Find(".file-body").Each(func(i int, sel *goquery.Selection) {
|
||||
assert.Positive(t, sel.Find(".code-inner").Find(".search-highlight").Length())
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("Fuzzy", func(t *testing.T) {
|
||||
defer test.MockVariableValue(&setting.Indexer.RepoIndexerEnableFuzzy, true)()
|
||||
code_indexer.CodeSearchOptions = []string{"exact", "union", "fuzzy"} // usually set by Init
|
||||
|
||||
req := NewRequest(t, "GET", "/explore/code?q=file&mode=fuzzy")
|
||||
resp := MakeRequest(t, req, http.StatusOK)
|
||||
doc := NewHTMLParser(t, resp.Body).Find(".explore")
|
||||
|
||||
active, ok := doc.Find("[data-test-tag=fuzzy-dropdown] .active input").Attr("value")
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "fuzzy", active)
|
||||
})
|
||||
|
||||
t.Run("No Fuzzy", func(t *testing.T) {
|
||||
defer test.MockVariableValue(&setting.Indexer.RepoIndexerEnableFuzzy, false)()
|
||||
code_indexer.CodeSearchOptions = []string{"exact", "union"} // usually set by Init
|
||||
|
||||
req := NewRequest(t, "GET", "/explore/code?q=file&mode=fuzzy")
|
||||
resp := MakeRequest(t, req, http.StatusOK)
|
||||
doc := NewHTMLParser(t, resp.Body).Find(".explore")
|
||||
|
||||
active, ok := doc.Find("[data-test-tag=fuzzy-dropdown] .active input").Attr("value")
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "union", active)
|
||||
})
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue