在Excel中使用正则expression式的通用UDF
我需要每周parsing和总结几千条文本。 Excel通配符不够灵活,我想删除粘贴到Notepad ++进行处理或馈送到脚本的额外步骤。
这里是我提出的工具。 他们仍然有点慢 – 公司的笔记本电脑每秒可能会有3000行 – 但是它们很方便。
RXMatch – 返回第一个匹配项,返回子组的选项。
=RXMatch("Apple","A(..)",1) -> "pp"
RXCount – 计数的比赛数量
=RXCount("Apple","p") -> 2
RXPrint – 将第一个匹配和/或子组embedded到模板string中
=RXPrint("Apple","(\S)\S+","\1 is for \0") -> "A is for Apple"
RXPrintAll – 将每个匹配embedded到模板string中,并join结果
=RXPrintAll("Apple Banana","(\S)\S+","\1 is for \0") -> "A is for Apple, B is for Banana"
RXMatches – 返回一个垂直的匹配数组,选项返回一个子组
=RXMatches("Apple Banana","\S+") -> {"Apple";"Banana"}
RXMatch
Public Function RXMatch(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As String Dim retval As String ' Takes a string and returns the matching text ' Text is the string to be searched ' Pattern is the regex pattern ' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number) ' IgnoreCase (optional) set to False for a case-sensitive search Dim RE As Object Dim Matches As Object Set RE = CreateObject("vbscript.regexp") RE.IgnoreCase = IgnoreCase RE.Pattern = Pattern Set Matches = RE.Execute(Text) If (Matches.Count > 0) Then If (Group > 0) Then retval = Matches(0).submatches(Group - 1) Else retval = Matches(0) End If Else retval = "" End If RXMatch = retval End Function
RXCount
Public Function RXCount(Text As String, Pattern As String, Optional IgnoreCase As Boolean = True) As Integer Dim retval As Integer ' Counts the number of matches ' Text is the string to be searched ' Pattern is the regex pattern ' IgnoreCase (optional) set to False for a case-sensitive search Dim RE As Object Dim Matches As Object Set RE = CreateObject("vbscript.regexp") RE.IgnoreCase = IgnoreCase RE.Global = True RE.Pattern = Pattern Set Matches = RE.Execute(Text) retval = Matches.Count RXCount = retval End Function
RXPrint
Public Function RXPrint(Text As String, Pattern As String, Optional Template As String = "\0", Optional IgnoreCase As Boolean = True) As String Dim retval As String ' Takes a string and returns a new string formatted according to the given template, using the first match found ' Text is the string to be searched ' Pattern is the regex pattern ' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match ' IgnoreCase (optional) set to False for a case-sensitive search Dim REText, RETemplate As Object Dim MatchesText, MatchesTemplate As Object Set REText = CreateObject("vbscript.regexp") REText.IgnoreCase = IgnoreCase REText.Pattern = Pattern Set MatchesText = REText.Execute(Text) Set RETemplate = CreateObject("vbscript.regexp") RETemplate.Global = True RETemplate.Pattern = "(?:\\(.))|([^\\]+)" Set MatchesTemplate = RETemplate.Execute(Template) If (MatchesText.Count > 0) Then ReDim retArray(0 To MatchesTemplate.Count - 1) As String Dim escaped As String Dim plaintext As String For i = 0 To MatchesTemplate.Count - 1 escaped = MatchesTemplate(i).submatches(0) plaintext = MatchesTemplate(i).submatches(1) If (Len(escaped) > 0) Then If (IsNumeric(escaped)) Then Dim groupnum As Integer groupnum = CInt(escaped) If groupnum = 0 Then retArray(i) = MatchesText(0) ElseIf (groupnum > MatchesText(0).submatches.Count) Then retArray(i) = "?" Else retArray(i) = MatchesText(0).submatches(groupnum - 1) End If Else retArray(i) = escaped End If Else retArray(i) = plaintext End If Next i retval = Join(retArray, "") Else retval = "" End If RXPrint = retval End Function
RXPrintAll
Public Function RXPrintAll(Text As String, Pattern As String, Optional Template As String = "\0", Optional Delimiter As String = ", ", Optional IgnoreCase As Boolean = True) As String Dim retval As String ' Takes a string and returns a new string formatted according to the given template, repeated for each match ' Text is the string to be searched ' Pattern is the regex pattern ' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match ' Delimiter (optional) specified how the results will be joined ' IgnoreCase (optional) set to False for a case-sensitive search Dim REText, RETemplate As Object Dim MatchesText, MatchesTemplate As Object Set REText = CreateObject("vbscript.regexp") REText.IgnoreCase = IgnoreCase REText.Global = True REText.Pattern = Pattern Set MatchesText = REText.Execute(Text) Set RETemplate = CreateObject("vbscript.regexp") RETemplate.Global = True RETemplate.Pattern = "(?:\\(.))|([^\\]+)" Set MatchesTemplate = RETemplate.Execute(Template) If (MatchesText.Count > 0) Then ReDim retArrays(0 To MatchesText.Count - 1) For j = 0 To MatchesText.Count - 1 ReDim retArray(0 To MatchesTemplate.Count - 1) As String Dim escaped As String Dim plaintext As String For i = 0 To MatchesTemplate.Count - 1 escaped = MatchesTemplate(i).submatches(0) plaintext = MatchesTemplate(i).submatches(1) If (Len(escaped) > 0) Then If (IsNumeric(escaped)) Then Dim groupnum As Integer groupnum = CInt(escaped) If groupnum = 0 Then retArray(i) = MatchesText(j) ElseIf (groupnum > MatchesText(j).submatches.Count) Then retArray(i) = "?" Else retArray(i) = MatchesText(j).submatches(groupnum - 1) End If Else retArray(i) = escaped End If Else retArray(i) = plaintext End If Next i retArrays(j) = Join(retArray, "") Next j retval = Join(retArrays, Delimiter) Else retval = "" End If RXPrintAll = retval End Function
RXMatches
Public Function RXMatches(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As Variant Dim retval() As String ' Takes a string and returns all matches in a vertical array ' Text is the string to be searched ' Pattern is the regex pattern ' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number) ' IgnoreCase (optional) set to False for a case-sensitive search Dim RE As Object Dim Matches As Object Set RE = CreateObject("vbscript.regexp") RE.IgnoreCase = IgnoreCase RE.Global = True RE.Pattern = Pattern Set Matches = RE.Execute(Text) If (Matches.Count > 0) Then ReDim retval(0 To Matches.Count - 1) For i = 0 To Matches.Count - 1 If (Group > 0) Then retval(i) = Matches(i).submatches(Group - 1) Else retval(i) = Matches(i) End If Next i Else ReDim retval(1) retval(0) = "" End If RXMatches = Application.Transpose(retval) End Function