beta 2012.11.23 17:35

author: Hans Hagen <pragma@wxs.nl> 2012-11-23 17:35:00 +0100
committer: Hans Hagen <pragma@wxs.nl> 2012-11-23 17:35:00 +0100
commit: a57ab2d223a7bcc8e9ae57e148c30ac6e91fafdb (patch)
tree: 535c20bedddf44347bf2c451ba6e8ba00d47d9ea /scripts
parent: 0a53837307e4b27c2b6543e3cab740c661d24481 (diff)
download: context-a57ab2d223a7bcc8e9ae57e148c30ac6e91fafdb.tar.gz
3 files changed, 153 insertions, 9 deletions
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua
index 5e924b07d..403f2ba63 100644
--- a/scripts/context/lua/mtxrun.lua
+++ b/scripts/context/lua/mtxrun.lua
@@ -5711,7 +5711,7 @@ if not modules then modules = { } end modules ['util-prs'] = {
 
 local lpeg, table, string = lpeg, table, string
 
-local P, R, V, S, C, Ct, Cs, Carg, Cc, Cg, Cf = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cg, lpeg.Cf
+local P, R, V, S, C, Ct, Cs, Carg, Cc, Cg, Cf, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cg, lpeg.Cf, lpeg.Cp
 local lpegmatch, patterns = lpeg.match, lpeg.patterns
 local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find
 local tostring, type, next, rawset = tostring, type, next, rawset
@@ -6008,10 +6008,13 @@ end
 
 -- inspect(lpeg.match(pattern,[[key="value"]]))
 
-local newline = S('\r\n')
+local defaultspecification = { separator = ",", quote = '"' }
+
+-- this version accepts multiple separators and quotes as used in the
+-- database module
 
 function parsers.csvsplitter(specification)
-    specification   = specification or { }
+    specification     = specification and table.setmetatableindex(specification,defaultspecification) or defaultspecification
     local separator = specification.separator
     local quotechar = specification.quote
     local separator = S(separator ~= "" and separator or ",")
@@ -6035,6 +6038,47 @@ function parsers.csvsplitter(specification)
     end
 end
 
+-- and this is a slightly patched version of a version posted by Philipp Gesang
+
+-- local mycsvsplitter = utilities.parsers.rfc4180splitter()
+--
+-- local crap = [[
+-- first,second,third,fourth
+-- "1","2","3","4"
+-- "a","b","c","d"
+-- "foo","bar""baz","boogie","xyzzy"
+-- ]]
+--
+-- local list, names = mycsvsplitter(crap,true)   inspect(list) inspect(names)
+-- local list, names = mycsvsplitter(crap)        inspect(list) inspect(names)
+
+function parsers.rfc4180splitter(specification)
+    specification     = specification and table.setmetatableindex(specification,defaultspecification) or defaultspecification
+    local separator   = specification.separator --> rfc: COMMA
+    local quotechar   = P(specification.quote)  -->      DQUOTE
+    local dquotechar  = quotechar * quotechar   -->      2DQUOTE
+                      / specification.quote
+    local separator   = S(separator ~= "" and separator or ",")
+    local escaped     = quotechar
+                      * Cs((dquotechar + (1 - quotechar))^0)
+                      * quotechar
+    local non_escaped = C((1 - quotechar - newline - separator)^1)
+    local field       = escaped + non_escaped
+    local record      = Ct((field * separator^-1)^1)
+    local headerline  = record * Cp()
+    local wholeblob   = Ct((newline^-1 * record)^0)
+    return function(data,getheader)
+        if getheader then
+            local header, position = lpegmatch(headerline,data)
+            local data = lpegmatch(wholeblob,data,position)
+            return data, header
+        else
+            return lpegmatch(wholeblob,data)
+        end
+    end
+end
+
+
 
 end -- of closure
 
@@ -16371,6 +16415,10 @@ function package.extraclibpath(...)
     end
 end
 
+if not package.loaders then
+    package.loaders = package.searchers -- 5.2
+end
+
 if not package.loaders[-2] then
     -- use package-path and package-cpath
     package.loaders[-2] = package.loaders[2]
diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua
index 5e924b07d..403f2ba63 100644
--- a/scripts/context/stubs/mswin/mtxrun.lua
+++ b/scripts/context/stubs/mswin/mtxrun.lua
@@ -5711,7 +5711,7 @@ if not modules then modules = { } end modules ['util-prs'] = {
 
 local lpeg, table, string = lpeg, table, string
 
-local P, R, V, S, C, Ct, Cs, Carg, Cc, Cg, Cf = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cg, lpeg.Cf
+local P, R, V, S, C, Ct, Cs, Carg, Cc, Cg, Cf, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cg, lpeg.Cf, lpeg.Cp
 local lpegmatch, patterns = lpeg.match, lpeg.patterns
 local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find
 local tostring, type, next, rawset = tostring, type, next, rawset
@@ -6008,10 +6008,13 @@ end
 
 -- inspect(lpeg.match(pattern,[[key="value"]]))
 
-local newline = S('\r\n')
+local defaultspecification = { separator = ",", quote = '"' }
+
+-- this version accepts multiple separators and quotes as used in the
+-- database module
 
 function parsers.csvsplitter(specification)
-    specification   = specification or { }
+    specification     = specification and table.setmetatableindex(specification,defaultspecification) or defaultspecification
     local separator = specification.separator
     local quotechar = specification.quote
     local separator = S(separator ~= "" and separator or ",")
@@ -6035,6 +6038,47 @@ function parsers.csvsplitter(specification)
     end
 end
 
+-- and this is a slightly patched version of a version posted by Philipp Gesang
+
+-- local mycsvsplitter = utilities.parsers.rfc4180splitter()
+--
+-- local crap = [[
+-- first,second,third,fourth
+-- "1","2","3","4"
+-- "a","b","c","d"
+-- "foo","bar""baz","boogie","xyzzy"
+-- ]]
+--
+-- local list, names = mycsvsplitter(crap,true)   inspect(list) inspect(names)
+-- local list, names = mycsvsplitter(crap)        inspect(list) inspect(names)
+
+function parsers.rfc4180splitter(specification)
+    specification     = specification and table.setmetatableindex(specification,defaultspecification) or defaultspecification
+    local separator   = specification.separator --> rfc: COMMA
+    local quotechar   = P(specification.quote)  -->      DQUOTE
+    local dquotechar  = quotechar * quotechar   -->      2DQUOTE
+                      / specification.quote
+    local separator   = S(separator ~= "" and separator or ",")
+    local escaped     = quotechar
+                      * Cs((dquotechar + (1 - quotechar))^0)
+                      * quotechar
+    local non_escaped = C((1 - quotechar - newline - separator)^1)
+    local field       = escaped + non_escaped
+    local record      = Ct((field * separator^-1)^1)
+    local headerline  = record * Cp()
+    local wholeblob   = Ct((newline^-1 * record)^0)
+    return function(data,getheader)
+        if getheader then
+            local header, position = lpegmatch(headerline,data)
+            local data = lpegmatch(wholeblob,data,position)
+            return data, header
+        else
+            return lpegmatch(wholeblob,data)
+        end
+    end
+end
+
+
 
 end -- of closure
 
@@ -16371,6 +16415,10 @@ function package.extraclibpath(...)
     end
 end
 
+if not package.loaders then
+    package.loaders = package.searchers -- 5.2
+end
+
 if not package.loaders[-2] then
     -- use package-path and package-cpath
     package.loaders[-2] = package.loaders[2]
diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun
index 5e924b07d..403f2ba63 100755
--- a/scripts/context/stubs/unix/mtxrun
+++ b/scripts/context/stubs/unix/mtxrun
@@ -5711,7 +5711,7 @@ if not modules then modules = { } end modules ['util-prs'] = {
 
 local lpeg, table, string = lpeg, table, string
 
-local P, R, V, S, C, Ct, Cs, Carg, Cc, Cg, Cf = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cg, lpeg.Cf
+local P, R, V, S, C, Ct, Cs, Carg, Cc, Cg, Cf, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cg, lpeg.Cf, lpeg.Cp
 local lpegmatch, patterns = lpeg.match, lpeg.patterns
 local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find
 local tostring, type, next, rawset = tostring, type, next, rawset
@@ -6008,10 +6008,13 @@ end
 
 -- inspect(lpeg.match(pattern,[[key="value"]]))
 
-local newline = S('\r\n')
+local defaultspecification = { separator = ",", quote = '"' }
+
+-- this version accepts multiple separators and quotes as used in the
+-- database module
 
 function parsers.csvsplitter(specification)
-    specification   = specification or { }
+    specification     = specification and table.setmetatableindex(specification,defaultspecification) or defaultspecification
     local separator = specification.separator
     local quotechar = specification.quote
     local separator = S(separator ~= "" and separator or ",")
@@ -6035,6 +6038,47 @@ function parsers.csvsplitter(specification)
     end
 end
 
+-- and this is a slightly patched version of a version posted by Philipp Gesang
+
+-- local mycsvsplitter = utilities.parsers.rfc4180splitter()
+--
+-- local crap = [[
+-- first,second,third,fourth
+-- "1","2","3","4"
+-- "a","b","c","d"
+-- "foo","bar""baz","boogie","xyzzy"
+-- ]]
+--
+-- local list, names = mycsvsplitter(crap,true)   inspect(list) inspect(names)
+-- local list, names = mycsvsplitter(crap)        inspect(list) inspect(names)
+
+function parsers.rfc4180splitter(specification)
+    specification     = specification and table.setmetatableindex(specification,defaultspecification) or defaultspecification
+    local separator   = specification.separator --> rfc: COMMA
+    local quotechar   = P(specification.quote)  -->      DQUOTE
+    local dquotechar  = quotechar * quotechar   -->      2DQUOTE
+                      / specification.quote
+    local separator   = S(separator ~= "" and separator or ",")
+    local escaped     = quotechar
+                      * Cs((dquotechar + (1 - quotechar))^0)
+                      * quotechar
+    local non_escaped = C((1 - quotechar - newline - separator)^1)
+    local field       = escaped + non_escaped
+    local record      = Ct((field * separator^-1)^1)
+    local headerline  = record * Cp()
+    local wholeblob   = Ct((newline^-1 * record)^0)
+    return function(data,getheader)
+        if getheader then
+            local header, position = lpegmatch(headerline,data)
+            local data = lpegmatch(wholeblob,data,position)
+            return data, header
+        else
+            return lpegmatch(wholeblob,data)
+        end
+    end
+end
+
+
 
 end -- of closure
 
@@ -16371,6 +16415,10 @@ function package.extraclibpath(...)
     end
 end
 
+if not package.loaders then
+    package.loaders = package.searchers -- 5.2
+end
+
 if not package.loaders[-2] then
     -- use package-path and package-cpath
     package.loaders[-2] = package.loaders[2]
author	Hans Hagen <pragma@wxs.nl>	2012-11-23 17:35:00 +0100
committer	Hans Hagen <pragma@wxs.nl>	2012-11-23 17:35:00 +0100
commit	a57ab2d223a7bcc8e9ae57e148c30ac6e91fafdb (patch)
tree	535c20bedddf44347bf2c451ba6e8ba00d47d9ea /scripts
parent	0a53837307e4b27c2b6543e3cab740c661d24481 (diff)
download	context-a57ab2d223a7bcc8e9ae57e148c30ac6e91fafdb.tar.gz