1
1
/*
2
- Copyright 2016 GitHub Inc.
2
+ Copyright 2016 GitHub Inc.
3
+ See https://github.com/github/gh-osc/blob/master/LICENSE
3
4
*/
4
5
5
6
package binlog
23
24
startEntryUnknownTableRegexp = regexp .MustCompile ("^### Row event for unknown table .*? at ([0-9]+)$" )
24
25
endLogPosRegexp = regexp .MustCompile ("^#[0-9]{6} .*? end_log_pos ([0-9]+)" )
25
26
statementRegxp = regexp .MustCompile ("### (INSERT INTO|UPDATE|DELETE FROM) `(.*?)`[.]`(.*?)`" )
27
+ tokenRegxp = regexp .MustCompile ("### (WHERE|SET)$" )
28
+ positionalColumnRegexp = regexp .MustCompile ("### @([0-9]+)=(.+)$" )
29
+ )
30
+
31
+ // BinlogEntryState is a state in the binlog parser automaton / state machine
32
+ type BinlogEntryState string
33
+
34
+ // States of the state machine
35
+ const (
36
+ InvalidState BinlogEntryState = "InvalidState"
37
+ SearchForStartPosOrStatementState = "SearchForStartPosOrStatementState"
38
+ ExpectEndLogPosState = "ExpectEndLogPosState"
39
+ ExpectTokenState = "ExpectTokenState"
40
+ PositionalColumnAssignmentState = "PositionalColumnAssignmentState"
26
41
)
27
42
28
43
// MySQLBinlogReader reads binary log entries by executing the `mysqlbinlog`
@@ -33,6 +48,7 @@ type MySQLBinlogReader struct {
33
48
MySQLBinlogBinary string
34
49
}
35
50
51
+ // NewMySQLBinlogReader creates a new reader that directly parses binlog files from the filesystem
36
52
func NewMySQLBinlogReader (basedir string , datadir string ) (mySQLBinlogReader * MySQLBinlogReader ) {
37
53
mySQLBinlogReader = & MySQLBinlogReader {
38
54
Basedir : basedir ,
@@ -61,7 +77,8 @@ func (this *MySQLBinlogReader) ReadEntries(logFile string, startPos uint64, stop
61
77
if err != nil {
62
78
return entries , log .Errore (err )
63
79
}
64
- chunkEntries , err := parseEntries (entriesBytes )
80
+
81
+ chunkEntries , err := parseEntries (bufio .NewScanner (bytes .NewReader (entriesBytes )))
65
82
if err != nil {
66
83
return entries , log .Errore (err )
67
84
}
@@ -77,64 +94,136 @@ func (this *MySQLBinlogReader) ReadEntries(logFile string, startPos uint64, stop
77
94
return entries , err
78
95
}
79
96
80
- func parseEntries (entriesBytes []byte ) (entries [](* BinlogEntry ), err error ) {
81
- scanner := bufio .NewScanner (bytes .NewReader (entriesBytes ))
82
- expectEndLogPos := false
83
- var startLogPos uint64
84
- var endLogPos uint64
97
+ // automaton step: accept wither beginning of new entry, or beginning of new statement
98
+ func searchForStartPosOrStatement (scanner * bufio.Scanner , binlogEntry * BinlogEntry , previousEndLogPos uint64 ) (nextState BinlogEntryState , nextBinlogEntry * BinlogEntry , err error ) {
99
+ onStartEntry := func (submatch []string ) (BinlogEntryState , * BinlogEntry , error ) {
100
+ startLogPos , _ := strconv .ParseUint (submatch [1 ], 10 , 64 )
85
101
86
- binlogEntry := & BinlogEntry {}
102
+ if previousEndLogPos != 0 && startLogPos != previousEndLogPos {
103
+ return InvalidState , binlogEntry , fmt .Errorf ("Expected startLogPos %+v to equal previous endLogPos %+v" , startLogPos , previousEndLogPos )
104
+ }
105
+ nextBinlogEntry = binlogEntry
106
+ if binlogEntry .LogPos != 0 && binlogEntry .StatementType != "" {
107
+ // Current entry is already a true entry, with startpos and with statement
108
+ nextBinlogEntry = NewBinlogEntry ()
109
+ }
87
110
88
- for scanner .Scan () {
89
- line := scanner .Text ()
111
+ nextBinlogEntry .LogPos = startLogPos
112
+ return ExpectEndLogPosState , nextBinlogEntry , nil
113
+ }
90
114
91
- onStartEntry := func (submatch []string ) error {
92
- startLogPos , _ = strconv .ParseUint (submatch [1 ], 10 , 64 )
115
+ onStatementEntry := func (submatch []string ) (BinlogEntryState , * BinlogEntry , error ) {
116
+ nextBinlogEntry = binlogEntry
117
+ if binlogEntry .LogPos != 0 && binlogEntry .StatementType != "" {
118
+ // Current entry is already a true entry, with startpos and with statement
119
+ nextBinlogEntry = binlogEntry .Duplicate ()
120
+ }
93
121
94
- if endLogPos != 0 && startLogPos != endLogPos {
95
- return fmt .Errorf ("Expected startLogPos %+v to equal previous endLogPos %+v" , startLogPos , endLogPos )
96
- }
97
- // We are entering a new entry, let's push the previous one
98
- if binlogEntry .LogPos != 0 && binlogEntry .StatementType != "" {
99
- entries = append (entries , binlogEntry )
100
- log .Debugf ("entry: %+v" , * binlogEntry )
101
- binlogEntry = & BinlogEntry {}
102
- }
122
+ nextBinlogEntry .StatementType = strings .Split (submatch [1 ], " " )[0 ]
123
+ nextBinlogEntry .DatabaseName = submatch [2 ]
124
+ nextBinlogEntry .TableName = submatch [3 ]
103
125
104
- //log.Debugf(line)
105
- binlogEntry .LogPos = startLogPos
106
- // Next iteration we will read the end_log_pos
107
- expectEndLogPos = true
126
+ return ExpectTokenState , nextBinlogEntry , nil
127
+ }
108
128
109
- return nil
129
+ onPositionalColumn := func (submatch []string ) (BinlogEntryState , * BinlogEntry , error ) {
130
+ columnIndex , _ := strconv .ParseUint (submatch [1 ], 10 , 64 )
131
+ if _ , found := binlogEntry .PositionalColumns [columnIndex ]; found {
132
+ return InvalidState , binlogEntry , fmt .Errorf ("Positional column %+v found more than once in %+v, statement=%+v" , columnIndex , binlogEntry .LogPos , binlogEntry .StatementType )
110
133
}
111
- if expectEndLogPos {
112
- submatch := endLogPosRegexp .FindStringSubmatch (line )
113
- if len (submatch ) <= 1 {
114
- return entries , log .Errorf ("Expected to find end_log_pos following pos %+v" , startLogPos )
115
- }
116
- endLogPos , _ = strconv .ParseUint (submatch [1 ], 10 , 64 )
134
+ columnValue := submatch [2 ]
135
+ columnValue = strings .TrimPrefix (columnValue , "'" )
136
+ columnValue = strings .TrimSuffix (columnValue , "'" )
137
+ binlogEntry .PositionalColumns [columnIndex ] = columnValue
117
138
118
- binlogEntry .EndLogPos = endLogPos
119
- expectEndLogPos = false
120
- } else if submatch := startEntryRegexp .FindStringSubmatch (line ); len (submatch ) > 1 {
121
- if err := onStartEntry (submatch ); err != nil {
122
- return entries , log .Errore (err )
123
- }
124
- } else if submatch := startEntryUnknownTableRegexp .FindStringSubmatch (line ); len (submatch ) > 1 {
125
- if err := onStartEntry (submatch ); err != nil {
126
- return entries , log .Errore (err )
127
- }
128
- } else if submatch := statementRegxp .FindStringSubmatch (line ); len (submatch ) > 1 {
129
- binlogEntry .StatementType = strings .Split (submatch [1 ], " " )[0 ]
130
- binlogEntry .DatabaseName = submatch [2 ]
131
- binlogEntry .TableName = submatch [3 ]
132
- }
139
+ return SearchForStartPosOrStatementState , binlogEntry , nil
140
+ }
141
+
142
+ line := scanner .Text ()
143
+ if submatch := startEntryRegexp .FindStringSubmatch (line ); len (submatch ) > 1 {
144
+ return onStartEntry (submatch )
145
+ }
146
+ if submatch := startEntryUnknownTableRegexp .FindStringSubmatch (line ); len (submatch ) > 1 {
147
+ return onStartEntry (submatch )
148
+ }
149
+ if submatch := statementRegxp .FindStringSubmatch (line ); len (submatch ) > 1 {
150
+ return onStatementEntry (submatch )
151
+ }
152
+ if submatch := positionalColumnRegexp .FindStringSubmatch (line ); len (submatch ) > 1 {
153
+ return onPositionalColumn (submatch )
154
+ }
155
+ // Haven't found a match
156
+ return SearchForStartPosOrStatementState , binlogEntry , nil
157
+ }
158
+
159
+ // automaton step: expect an end_log_pos line`
160
+ func expectEndLogPos (scanner * bufio.Scanner , binlogEntry * BinlogEntry ) (nextState BinlogEntryState , err error ) {
161
+ line := scanner .Text ()
162
+
163
+ submatch := endLogPosRegexp .FindStringSubmatch (line )
164
+ if len (submatch ) > 1 {
165
+ binlogEntry .EndLogPos , _ = strconv .ParseUint (submatch [1 ], 10 , 64 )
166
+ return SearchForStartPosOrStatementState , nil
167
+ }
168
+ return InvalidState , fmt .Errorf ("Expected to find end_log_pos following pos %+v" , binlogEntry .LogPos )
169
+ }
133
170
171
+ // automaton step: a not-strictly-required but good-to-have-around validation that
172
+ // we see an expected token following a statement
173
+ func expectToken (scanner * bufio.Scanner , binlogEntry * BinlogEntry ) (nextState BinlogEntryState , err error ) {
174
+ line := scanner .Text ()
175
+ if submatch := tokenRegxp .FindStringSubmatch (line ); len (submatch ) > 1 {
176
+ return SearchForStartPosOrStatementState , nil
134
177
}
135
- if binlogEntry .LogPos != 0 {
178
+ return InvalidState , fmt .Errorf ("Expected to find token following pos %+v" , binlogEntry .LogPos )
179
+ }
180
+
181
+ // parseEntries will parse output of `mysqlbinlog --verbose --base64-output=DECODE-ROWS`
182
+ // It issues an automaton / state machine to do its thang.
183
+ func parseEntries (scanner * bufio.Scanner ) (entries [](* BinlogEntry ), err error ) {
184
+ binlogEntry := NewBinlogEntry ()
185
+ var state BinlogEntryState = SearchForStartPosOrStatementState
186
+ var endLogPos uint64
187
+
188
+ appendBinlogEntry := func () {
189
+ if binlogEntry .LogPos == 0 {
190
+ return
191
+ }
192
+ if binlogEntry .StatementType == "" {
193
+ return
194
+ }
136
195
entries = append (entries , binlogEntry )
137
196
log .Debugf ("entry: %+v" , * binlogEntry )
197
+ fmt .Println (fmt .Sprintf ("%s `%s`.`%s`" , binlogEntry .StatementType , binlogEntry .DatabaseName , binlogEntry .TableName ))
198
+ }
199
+ for scanner .Scan () {
200
+ switch state {
201
+ case SearchForStartPosOrStatementState :
202
+ {
203
+ var nextBinlogEntry * BinlogEntry
204
+ state , nextBinlogEntry , err = searchForStartPosOrStatement (scanner , binlogEntry , endLogPos )
205
+ if nextBinlogEntry != binlogEntry {
206
+ appendBinlogEntry ()
207
+ binlogEntry = nextBinlogEntry
208
+ }
209
+ }
210
+ case ExpectEndLogPosState :
211
+ {
212
+ state , err = expectEndLogPos (scanner , binlogEntry )
213
+ }
214
+ case ExpectTokenState :
215
+ {
216
+ state , err = expectToken (scanner , binlogEntry )
217
+ }
218
+ default :
219
+ {
220
+ err = fmt .Errorf ("Unexpected state %+v" , state )
221
+ }
222
+ }
223
+ if err != nil {
224
+ return entries , log .Errore (err )
225
+ }
138
226
}
227
+ appendBinlogEntry ()
139
228
return entries , err
140
229
}
0 commit comments