Skip to content

Commit b3a34de

Browse files
committed
[WIP] implement common file path filter LRU cache
Building on the work in PR git-lfs#6047, we implement a common LRU cache for the Filter structure in our "filepathfilter" package, which is used by all commands that accept command-line or configuration options to filter their actions by file paths. Specifically, any of our commands which accept --include or --exclude options (or their -I and -X short forms), or read the "lfs.fetchInclude" and "lfs.fetchExclude" configuration options, should benefit from the use of a cache to return previously-determined results when matching file paths against the file path filter patterns. We set the default cache size at 10,000 entries, and provide a new "lfs.pathFilterCacheSize" configuration option which may be used to resize the cache as desired. The special values "none" and "0" disable the cache, and the value "unlimited" allows the cache to grow without bound. Note that to pass the cache size setting to the "filepathfilter" package we rename its "option" type to "Option" so it will be exported and available from our "commands" package. We use the Least-Recently Used (LRU) cache implemention from the "golang/groupcache" Go module, which used a map plus a doubly-linked list to track the most- and least-recently used entries. We add a mutex around our accesses to the cache in case any of our commands use the Allows() method of the Filter structure from concurrent goroutines.
1 parent 3f00f87 commit b3a34de

File tree

8 files changed

+80
-31
lines changed

8 files changed

+80
-31
lines changed

commands/command_migrate.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,6 @@ var (
6363
// migrateFixup is the flag indicating whether or not to infer the
6464
// included and excluded filepath patterns.
6565
migrateFixup bool
66-
67-
// migrateCacheFilePathFilterResult is the flag indicating that a file path
68-
// filter result may be cached
69-
migrateCacheFilePathFilterResults bool
7066
)
7167

7268
// migrate takes the given command and arguments, *gitobj.ObjectDatabase, as well
@@ -405,7 +401,6 @@ func init() {
405401
importCmd.Flags().BoolVar(&migrateNoRewrite, "no-rewrite", false, "Add new history without rewriting previous")
406402
importCmd.Flags().StringVarP(&migrateCommitMessage, "message", "m", "", "With --no-rewrite, an optional commit message")
407403
importCmd.Flags().BoolVar(&migrateFixup, "fixup", false, "Infer filepaths based on .gitattributes")
408-
importCmd.Flags().BoolVar(&migrateCacheFilePathFilterResults, "cache-file-path-filter-results", false, "Cache file path filter results")
409404

410405
exportCmd := NewCommand("export", migrateExportCommand)
411406
exportCmd.Flags().BoolVar(&migrateVerbose, "verbose", false, "Verbose logging")

commands/commands.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"net"
88
"os"
99
"path/filepath"
10+
"strconv"
1011
"strings"
1112
"sync"
1213
"time"
@@ -27,6 +28,10 @@ import (
2728
// Populate man pages
2829
//go:generate go run ../docs/man/mangen.go
2930

31+
const (
32+
defaultFilepathFilterCacheSize = 10000
33+
)
34+
3035
var (
3136
ErrorBuffer = &bytes.Buffer{}
3237
ErrorWriter = newMultiWriter(os.Stderr, ErrorBuffer)
@@ -134,7 +139,7 @@ func buildFilepathFilter(config *config.Configuration, includeArg, excludeArg *s
134139

135140
func buildFilepathFilterWithPatternType(config *config.Configuration, includeArg, excludeArg *string, useFetchOptions bool, patternType filepathfilter.PatternType) *filepathfilter.Filter {
136141
inc, exc := determineIncludeExcludePaths(config, includeArg, excludeArg, useFetchOptions)
137-
return filepathfilter.New(inc, exc, patternType, filepathfilter.UseCache(migrateCacheFilePathFilterResults))
142+
return filepathfilter.New(inc, exc, patternType, determineFilepathFilterCache(config))
138143
}
139144

140145
func downloadTransfer(p *lfs.WrappedPointer) (name, path, oid string, size int64, missing bool, err error) {
@@ -523,6 +528,29 @@ func determineIncludeExcludePaths(config *config.Configuration, includeArg, excl
523528
return
524529
}
525530

531+
func determineFilepathFilterCache(config *config.Configuration) filepathfilter.Option {
532+
cacheSize := defaultFilepathFilterCacheSize
533+
534+
if configSize, ok := config.Git.Get("lfs.pathfiltercachesize"); ok {
535+
switch configSize {
536+
case "none":
537+
return filepathfilter.DisableCache()
538+
case "unlimited":
539+
cacheSize = 0
540+
default:
541+
if s, err := strconv.Atoi(configSize); err == nil {
542+
if s == 0 {
543+
return filepathfilter.DisableCache()
544+
} else if s > 0 {
545+
cacheSize = s
546+
}
547+
}
548+
}
549+
}
550+
551+
return filepathfilter.EnableCache(cacheSize)
552+
}
553+
526554
func buildProgressMeter(dryRun bool, d tq.Direction) *tq.Meter {
527555
m := tq.NewMeter(cfg)
528556
m.Logger = m.LoggerFromEnv(cfg.Os)

docs/man/git-lfs-config.adoc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,14 @@ interpreted as the password.
106106
+
107107
Enables in-memory SSH and Git Credential caching for a single 'git lfs'
108108
command. Default: enabled.
109+
* `lfs.pathFilterCacheSize`
110+
+
111+
Sets the size of the in-memory cache of results from matching file paths
112+
against the filter defined by the `lfs.fetchInclude` and `lfs.fetchExclude`
113+
options, or for commands which accept them, by the `--include` and `--exclude`
114+
command-line options or their `-I` and `-X` equivalents.
115+
Set to `0` or `none` to disable the cache, or to `unlimited` to allow
116+
the cache to grow without bound. Default: 10,000 unique file paths.
109117
* `lfs.storage`
110118
+
111119
Allow override LFS storage directory. Non-absolute path is relativized

docs/man/git-lfs-migrate.adoc

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,6 @@ export::
149149
`--include-ref` or `--exclude-ref`.
150150
`--everything`::
151151
See <<_include_and_exclude_references>>.
152-
`--cache-file-path-filter-results`::
153-
Makes Git LFS cache the result of include or exclude file path pattern matching
154-
while importing Git objects to LFS. This may speed up migration of a large
155-
repository.
156152
+
157153
Note: Git refs are "case-sensitive" on all platforms in "packed from"
158154
(see `git-pack-refs(1)`). On "case-insensitive" file systems, e.g. NTFS

filepathfilter/filepathfilter.go

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@ package filepathfilter
22

33
import (
44
"strings"
5+
"sync"
56

67
"github.com/git-lfs/git-lfs/v3/tr"
78
"github.com/git-lfs/wildmatch/v2"
9+
"github.com/golang/groupcache/lru"
810
"github.com/rubyist/tracerx"
911
)
1012

@@ -19,7 +21,8 @@ type Filter struct {
1921
include []Pattern
2022
exclude []Pattern
2123
defaultValue bool
22-
cache map[string]bool
24+
cache *lru.Cache
25+
cacheLock sync.Mutex
2326
}
2427

2528
type PatternType bool
@@ -39,38 +42,48 @@ func (p PatternType) String() string {
3942
type options struct {
4043
defaultValue bool
4144
useCache bool
45+
cacheSize int
4246
}
4347

44-
type option func(*options)
48+
type Option func(*options)
4549

4650
// DefaultValue is an option representing the default value of a filepathfilter
4751
// if no patterns match. If this option is not provided, the default is true.
48-
func DefaultValue(val bool) option {
52+
func DefaultValue(val bool) Option {
4953
return func(args *options) {
5054
args.defaultValue = val
5155
}
5256
}
5357

54-
func UseCache(val bool) option {
58+
func EnableCache(size int) Option {
5559
return func(args *options) {
56-
args.useCache = val
60+
args.useCache = true
61+
args.cacheSize = size
5762
}
5863
}
5964

60-
func NewFromPatterns(include, exclude []Pattern, setters ...option) *Filter {
65+
func DisableCache() Option {
66+
return func(args *options) {
67+
args.useCache = false
68+
}
69+
}
70+
71+
func NewFromPatterns(include, exclude []Pattern, setters ...Option) *Filter {
6172
args := &options{defaultValue: true, useCache: false}
6273
for _, setter := range setters {
6374
setter(args)
6475
}
65-
var cache map[string]bool
66-
cache = nil
67-
if args.useCache {
68-
cache = make(map[string]bool)
76+
77+
f := &Filter{include: include, exclude: exclude, defaultValue: args.defaultValue}
78+
79+
if args.useCache && args.cacheSize >= 0 {
80+
f.cache = lru.New(args.cacheSize)
6981
}
70-
return &Filter{include: include, exclude: exclude, defaultValue: args.defaultValue, cache: cache}
82+
83+
return f
7184
}
7285

73-
func New(include, exclude []string, ptype PatternType, setters ...option) *Filter {
86+
func New(include, exclude []string, ptype PatternType, setters ...Option) *Filter {
7487
return NewFromPatterns(
7588
convertToWildmatch(include, ptype),
7689
convertToWildmatch(exclude, ptype), setters...)
@@ -95,7 +108,7 @@ func wildmatchToString(ps ...Pattern) []string {
95108
return s
96109
}
97110

98-
func (f *Filter) allowsUncached(filename string) bool {
111+
func (f *Filter) allows(filename string) bool {
99112
var included bool
100113
for _, inc := range f.include {
101114
if included = inc.Match(filename); included {
@@ -135,17 +148,23 @@ func (f *Filter) Allows(filename string) bool {
135148
return true
136149
}
137150

138-
if f.cache == nil {
139-
return f.allowsUncached(filename)
151+
if f.cache != nil {
152+
f.cacheLock.Lock()
153+
res, ok := f.cache.Get(filename)
154+
f.cacheLock.Unlock()
155+
if ok {
156+
return res.(bool)
157+
}
140158
}
141159

142-
cachedResult, cacheHit := f.cache[filename]
143-
if cacheHit {
144-
return cachedResult
160+
res := f.allows(filename)
161+
162+
if f.cache != nil {
163+
f.cacheLock.Lock()
164+
f.cache.Add(filename, res)
165+
f.cacheLock.Unlock()
145166
}
146167

147-
res := f.allowsUncached(filename)
148-
f.cache[filename] = res
149168
return res
150169
}
151170

git/githistory/rewriter_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ func TestRewriterIgnoresPathsThatDontMatchFilterWithResultCaching(t *testing.T)
228228
include := []string{"*.txt"}
229229
exclude := []string{"subdir/*.txt"}
230230

231-
filter := filepathfilter.New(include, exclude, filepathfilter.GitIgnore, filepathfilter.UseCache(true))
231+
filter := filepathfilter.New(include, exclude, filepathfilter.GitIgnore, filepathfilter.UseCache(10))
232232

233233
db := DatabaseFromFixture(t, "non-repeated-subtrees.git")
234234
r := NewRewriter(db, WithFilter(filter))

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ require (
99
github.com/git-lfs/go-netrc v0.0.0-20250218165306-ba0029b43d11
1010
github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825
1111
github.com/git-lfs/wildmatch/v2 v2.0.1
12+
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8
1213
github.com/jmhodges/clock v1.2.0
1314
github.com/leonelquinteros/gotext v1.5.0
1415
github.com/mattn/go-isatty v0.0.4

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 h1:riQhgheTL7tMF4d
1616
github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825/go.mod h1:fenKRzpXDjNpsIBhuhUzvjCKlDjKam0boRAenTE0Q6A=
1717
github.com/git-lfs/wildmatch/v2 v2.0.1 h1:Ds+aobrV5bK0wStILUOn9irllPyf9qrFETbKzwzoER8=
1818
github.com/git-lfs/wildmatch/v2 v2.0.1/go.mod h1:EVqonpk9mXbREP3N8UkwoWdrF249uHpCUo5CPXY81gw=
19+
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ=
20+
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw=
1921
github.com/gorilla/securecookie v1.1.1 h1:miw7JPhV+b/lAHSXz4qd/nN9jRiAFV5FwjeKyCS8BvQ=
2022
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
2123
github.com/gorilla/sessions v1.2.1 h1:DHd3rPN5lE3Ts3D8rKkQ8x/0kqfeNmBAaiSi+o7FsgI=

0 commit comments

Comments
 (0)