... | ... |
@@ -11,11 +11,11 @@ license below), you may download the binary. |
11 | 11 |
|
12 | 12 |
Here are the SHA-256 checksums for the binaries: |
13 | 13 |
|
14 |
- da5dd7d276af73553cb97677f5cf5798237de0968f3a150199a16a14340c87ff csv2xlsx_386.exe |
|
15 |
- b9a96331ee89a685c665921200b4e51cff0dca98b6bb200d9e6a4bcb73a6ed04 csv2xlsx_amd64.exe |
|
16 |
- 8667562a9b81ec0c4e978ff928594e25b4917240f833157c929a5a26e50de0b0 csv2xlsx_linux_386 |
|
17 |
- 669fcaff7ea1824fd86a89c740c8743c5d4730505a2aba65cdfccdbe25eb3152 csv2xlsx_linux_amd64 |
|
18 |
- e84d29d4f8142a95ed80275fb94ee1356f5c3e02f0b5619b189273196adeece8 csv2xlsx_osx |
|
14 |
+ 3bc2df1a0be3bb8cee327ebab81029d4e0cad61b51b0996f36a6bd5002d8771c csv2xlsx_386.exe |
|
15 |
+ f6003e08e612a996d0d5099ccbb31cd7ca3a60155fca607bf913fe98a485d0ef csv2xlsx_amd64.exe |
|
16 |
+ c6c2fd10025234e3400e84be5dceb45dc370729b14b3084c384f4c82bd229950 csv2xlsx_linux_386 |
|
17 |
+ 9f177c313787c554fa9fabdb0852f76aaf7432ba0c136afb943faf757b827cff csv2xlsx_linux_amd64 |
|
18 |
+ d171af9b071429a2ce76caa8063dcdedd81ab0983be01b04ab2ece1195197194 csv2xlsx_osx |
|
19 | 19 |
|
20 | 20 |
|
21 | 21 |
### Usage |
... | ... |
@@ -38,6 +38,8 @@ Please see below for a list of command line options. |
38 | 38 |
column range to use (see below) |
39 | 39 |
-dateformat string |
40 | 40 |
format for CSV date cells (default YYYY-MM-DD) (default "2006-01-02") |
41 |
+ -encoding |
|
42 |
+ encoding string to use for the CSV file, case-insensitive (defaults to "utf-8") |
|
41 | 43 |
-exceldateformat string |
42 | 44 |
Excel format for date cells (default as in Excel) |
43 | 45 |
-h display usage information |
... | ... |
@@ -62,6 +64,55 @@ Please see below for a list of command line options. |
62 | 64 |
separated from numbers with a colon (e.g. 0:text,3-16:number,17:date) |
63 | 65 |
``` |
64 | 66 |
|
67 |
+### Supported encodings |
|
68 |
+ |
|
69 |
+ * Codepage037 |
|
70 |
+ * Codepage437 |
|
71 |
+ * Codepage850 |
|
72 |
+ * Codepage852 |
|
73 |
+ * Codepage855 |
|
74 |
+ * Codepage858 |
|
75 |
+ * Codepage860 |
|
76 |
+ * Codepage862 |
|
77 |
+ * Codepage863 |
|
78 |
+ * Codepage865 |
|
79 |
+ * Codepage866 |
|
80 |
+ * Codepage1047 |
|
81 |
+ * Codepage1140 |
|
82 |
+ * ISO8859_1 |
|
83 |
+ * ISO8859_2 |
|
84 |
+ * ISO8859_3 |
|
85 |
+ * ISO8859_4 |
|
86 |
+ * ISO8859_5 |
|
87 |
+ * ISO8859_6 |
|
88 |
+ * ISO8859_6E |
|
89 |
+ * ISO8859_6I |
|
90 |
+ * ISO8859_7 |
|
91 |
+ * ISO8859_8 |
|
92 |
+ * ISO8859_8E |
|
93 |
+ * ISO8859_8I |
|
94 |
+ * ISO8859_9 |
|
95 |
+ * ISO8859_10 |
|
96 |
+ * ISO8859_13 |
|
97 |
+ * ISO8859_14 |
|
98 |
+ * ISO8859_15 |
|
99 |
+ * ISO8859_16 |
|
100 |
+ * Koi8r |
|
101 |
+ * Koi8u |
|
102 |
+ * Macintosh |
|
103 |
+ * MacintoshCyrillic |
|
104 |
+ * Windows874 |
|
105 |
+ * Windows1250 |
|
106 |
+ * Windows1251 |
|
107 |
+ * Windows1252 |
|
108 |
+ * Windows1253 |
|
109 |
+ * Windows1254 |
|
110 |
+ * Windows1255 |
|
111 |
+ * Windows1256 |
|
112 |
+ * Windows1257 |
|
113 |
+ * Windows1258 |
|
114 |
+ |
|
115 |
+ |
|
65 | 116 |
### Source |
66 | 117 |
|
67 | 118 |
This tool fulfills a special requirement and I will extend its functionality, if need arises. As I found out there are lots |
... | ... |
@@ -90,6 +141,10 @@ I am still amazed what you can accomplish within less than 200 lines of code in |
90 | 141 |
- option --colsep now handles \t for tab correctly |
91 | 142 |
- lots of bug fixes |
92 | 143 |
|
144 |
+ 2017-12-21 0.2 |
|
145 |
+ Added option --encoding |
|
146 |
+ |
|
147 |
+ |
|
93 | 148 |
### License |
94 | 149 |
|
95 | 150 |
This code is licensed under the 2-Clause BSD License: |
... | ... |
@@ -3,16 +3,18 @@ package main |
3 | 3 |
import ( |
4 | 4 |
"bufio" |
5 | 5 |
"encoding/csv" |
6 |
- "errors" |
|
7 | 6 |
"flag" |
8 | 7 |
"fmt" |
9 |
- "github.com/tealeg/xlsx" |
|
8 |
+ "io" |
|
10 | 9 |
"os" |
11 | 10 |
"path/filepath" |
12 | 11 |
"strconv" |
13 | 12 |
"strings" |
14 | 13 |
"time" |
15 | 14 |
"unicode/utf8" |
15 |
+ |
|
16 |
+ "github.com/tealeg/xlsx" |
|
17 |
+ "golang.org/x/text/encoding/charmap" |
|
16 | 18 |
) |
17 | 19 |
|
18 | 20 |
var ( |
... | ... |
@@ -21,6 +23,7 @@ var ( |
21 | 23 |
parmSheet string |
22 | 24 |
parmInFile string |
23 | 25 |
parmOutFile string |
26 |
+ parmEncoding string |
|
24 | 27 |
parmColSep rune |
25 | 28 |
parmDateFormat string |
26 | 29 |
parmExcelDateFormat string |
... | ... |
@@ -51,7 +54,7 @@ func parseCommaGroup(grpstr string) (map[int]string, error) { |
51 | 54 |
// we need exactly one number or an a-b interval (2 number parts) |
52 | 55 |
parts := strings.Split(grpstr, "-") |
53 | 56 |
if len(parts) < 1 || len(parts) > 2 { |
54 |
- return nil, errors.New(fmt.Sprintf("Invalid range group '%s' found.", grpstr)) |
|
57 |
+ return nil, fmt.Errorf("Invalid range group '%s' found.", grpstr) |
|
55 | 58 |
} |
56 | 59 |
// check for type (currently needed only for columns, will be ignored for lines) |
57 | 60 |
datatype := "standard" |
... | ... |
@@ -106,6 +109,7 @@ func parseCommandLine() { |
106 | 109 |
flag.StringVar(&parmRows, "rows", "", "list of line numbers to use (1,2,8 or 1,3-14,28)") |
107 | 110 |
flag.StringVar(&parmSheet, "sheet", "fromCSV", "tab name of the Excel sheet") |
108 | 111 |
flag.StringVar(&tmpStr, "colsep", "|", "column separator (default '|') ") |
112 |
+ flag.StringVar(&parmEncoding, "encoding", "utf-8", "character encoding") |
|
109 | 113 |
// not settable with csv reader |
110 | 114 |
//flag.StringVar(&parmRowSep, "rowsep", "\n", "row separator (default LF) ") |
111 | 115 |
flag.BoolVar(&parmNoHeader, "noheader", false, "no headers in first line, only data lines (default false)") |
... | ... |
@@ -151,14 +155,116 @@ func parseCommandLine() { |
151 | 155 |
// currently there is not need for gigabyte files, but maybe this should be done streaming. |
152 | 156 |
// in addition, we need row and column counts first to set the default ranges later on in the program flow. |
153 | 157 |
func loadInputFile(filename string) (rows [][]string) { |
158 |
+ var rdr io.Reader |
|
154 | 159 |
f, err := os.Open(filename) |
155 | 160 |
defer f.Close() |
156 | 161 |
if err != nil { |
157 | 162 |
fmt.Println(err) |
158 | 163 |
os.Exit(1) |
159 | 164 |
} |
160 |
- // use csv reader to read entire file |
|
161 |
- r := csv.NewReader(bufio.NewReader(f)) |
|
165 |
+ enc := strings.ToLower(parmEncoding) |
|
166 |
+ |
|
167 |
+ if enc == "utf8" || enc == "utf-8" { |
|
168 |
+ rdr = bufio.NewReader(f) |
|
169 |
+ } else { |
|
170 |
+ switch strings.ToUpper(parmEncoding) { |
|
171 |
+ case "CODEPAGE037": |
|
172 |
+ rdr = charmap.CodePage037.NewDecoder().Reader(f) |
|
173 |
+ case "CODEPAGE437": |
|
174 |
+ rdr = charmap.CodePage437.NewDecoder().Reader(f) |
|
175 |
+ case "CODEPAGE850": |
|
176 |
+ rdr = charmap.CodePage850.NewDecoder().Reader(f) |
|
177 |
+ case "CODEPAGE852": |
|
178 |
+ rdr = charmap.CodePage852.NewDecoder().Reader(f) |
|
179 |
+ case "CODEPAGE855": |
|
180 |
+ rdr = charmap.CodePage855.NewDecoder().Reader(f) |
|
181 |
+ case "CODEPAGE858": |
|
182 |
+ rdr = charmap.CodePage858.NewDecoder().Reader(f) |
|
183 |
+ case "CODEPAGE860": |
|
184 |
+ rdr = charmap.CodePage860.NewDecoder().Reader(f) |
|
185 |
+ case "CODEPAGE862": |
|
186 |
+ rdr = charmap.CodePage862.NewDecoder().Reader(f) |
|
187 |
+ case "CODEPAGE863": |
|
188 |
+ rdr = charmap.CodePage863.NewDecoder().Reader(f) |
|
189 |
+ case "CODEPAGE865": |
|
190 |
+ rdr = charmap.CodePage865.NewDecoder().Reader(f) |
|
191 |
+ case "CODEPAGE866": |
|
192 |
+ rdr = charmap.CodePage866.NewDecoder().Reader(f) |
|
193 |
+ case "CODEPAGE1047": |
|
194 |
+ rdr = charmap.CodePage1047.NewDecoder().Reader(f) |
|
195 |
+ case "CODEPAGE1140": |
|
196 |
+ rdr = charmap.CodePage1140.NewDecoder().Reader(f) |
|
197 |
+ case "ISO8859_1": |
|
198 |
+ rdr = charmap.ISO8859_1.NewDecoder().Reader(f) |
|
199 |
+ case "ISO8859_2": |
|
200 |
+ rdr = charmap.ISO8859_2.NewDecoder().Reader(f) |
|
201 |
+ case "ISO8859_3": |
|
202 |
+ rdr = charmap.ISO8859_3.NewDecoder().Reader(f) |
|
203 |
+ case "ISO8859_4": |
|
204 |
+ rdr = charmap.ISO8859_4.NewDecoder().Reader(f) |
|
205 |
+ case "ISO8859_5": |
|
206 |
+ rdr = charmap.ISO8859_5.NewDecoder().Reader(f) |
|
207 |
+ case "ISO8859_6": |
|
208 |
+ rdr = charmap.ISO8859_6.NewDecoder().Reader(f) |
|
209 |
+ case "ISO8859_6E": |
|
210 |
+ rdr = charmap.ISO8859_6E.NewDecoder().Reader(f) |
|
211 |
+ case "ISO8859_6I": |
|
212 |
+ rdr = charmap.ISO8859_6I.NewDecoder().Reader(f) |
|
213 |
+ case "ISO8859_7": |
|
214 |
+ rdr = charmap.ISO8859_7.NewDecoder().Reader(f) |
|
215 |
+ case "ISO8859_8": |
|
216 |
+ rdr = charmap.ISO8859_8.NewDecoder().Reader(f) |
|
217 |
+ case "ISO8859_8E": |
|
218 |
+ rdr = charmap.ISO8859_8E.NewDecoder().Reader(f) |
|
219 |
+ case "ISO8859_8I": |
|
220 |
+ rdr = charmap.ISO8859_8I.NewDecoder().Reader(f) |
|
221 |
+ case "ISO8859_9": |
|
222 |
+ rdr = charmap.ISO8859_9.NewDecoder().Reader(f) |
|
223 |
+ case "ISO8859_10": |
|
224 |
+ rdr = charmap.ISO8859_10.NewDecoder().Reader(f) |
|
225 |
+ case "ISO8859_13": |
|
226 |
+ rdr = charmap.ISO8859_13.NewDecoder().Reader(f) |
|
227 |
+ case "ISO8859_14": |
|
228 |
+ rdr = charmap.ISO8859_14.NewDecoder().Reader(f) |
|
229 |
+ case "ISO8859_15": |
|
230 |
+ rdr = charmap.ISO8859_15.NewDecoder().Reader(f) |
|
231 |
+ case "ISO8859_16": |
|
232 |
+ rdr = charmap.ISO8859_16.NewDecoder().Reader(f) |
|
233 |
+ case "KOI8R": |
|
234 |
+ rdr = charmap.KOI8R.NewDecoder().Reader(f) |
|
235 |
+ case "KOI8U": |
|
236 |
+ rdr = charmap.KOI8U.NewDecoder().Reader(f) |
|
237 |
+ case "MACINTOSH": |
|
238 |
+ rdr = charmap.Macintosh.NewDecoder().Reader(f) |
|
239 |
+ case "MACINTOSHCYRILLIC": |
|
240 |
+ rdr = charmap.MacintoshCyrillic.NewDecoder().Reader(f) |
|
241 |
+ case "WINDOWS874": |
|
242 |
+ rdr = charmap.Windows874.NewDecoder().Reader(f) |
|
243 |
+ case "WINDOWS1250": |
|
244 |
+ rdr = charmap.Windows1250.NewDecoder().Reader(f) |
|
245 |
+ case "WINDOWS1251": |
|
246 |
+ rdr = charmap.Windows1251.NewDecoder().Reader(f) |
|
247 |
+ case "WINDOWS1252": |
|
248 |
+ rdr = charmap.Windows1252.NewDecoder().Reader(f) |
|
249 |
+ case "WINDOWS1253": |
|
250 |
+ rdr = charmap.Windows1253.NewDecoder().Reader(f) |
|
251 |
+ case "WINDOWS1254": |
|
252 |
+ rdr = charmap.Windows1254.NewDecoder().Reader(f) |
|
253 |
+ case "WINDOWS1255": |
|
254 |
+ rdr = charmap.Windows1255.NewDecoder().Reader(f) |
|
255 |
+ case "WINDOWS1256": |
|
256 |
+ rdr = charmap.Windows1256.NewDecoder().Reader(f) |
|
257 |
+ case "WINDOWS1257": |
|
258 |
+ rdr = charmap.Windows1257.NewDecoder().Reader(f) |
|
259 |
+ case "WINDOWS1258": |
|
260 |
+ rdr = charmap.Windows1258.NewDecoder().Reader(f) |
|
261 |
+ default: |
|
262 |
+ fmt.Println("Invalid encoding specified, defaulting to UTF-8") |
|
263 |
+ rdr = bufio.NewReader(f) |
|
264 |
+ } |
|
265 |
+ } |
|
266 |
+ |
|
267 |
+ r := csv.NewReader(rdr) |
|
162 | 268 |
r.Comma = parmColSep |
163 | 269 |
r.FieldsPerRecord = -1 |
164 | 270 |
r.LazyQuotes = true |
... | ... |
@@ -200,7 +306,7 @@ func writeCellContents(cell *xlsx.Cell, colString, colType string, rownum, colnu |
200 | 306 |
switch colType { |
201 | 307 |
case "text": |
202 | 308 |
cell.SetString(colString) |
203 |
- case "number","currency": |
|
309 |
+ case "number", "currency": |
|
204 | 310 |
floatVal, err := strconv.ParseFloat(colString, 64) |
205 | 311 |
if err != nil { |
206 | 312 |
fmt.Println(fmt.Sprintf("Cell (%d,%d) is not a valid number, value: %s", rownum, colnum, colString)) |