Showing 7 changed files with 172 additions and 11 deletions
+60 -5
README.md
... ...
@@ -11,11 +11,11 @@ license below), you may download the binary.
11 11
 
12 12
 Here are the SHA-256 checksums for the binaries:
13 13
 
14
-	da5dd7d276af73553cb97677f5cf5798237de0968f3a150199a16a14340c87ff  csv2xlsx_386.exe
15
-	b9a96331ee89a685c665921200b4e51cff0dca98b6bb200d9e6a4bcb73a6ed04  csv2xlsx_amd64.exe
16
-	8667562a9b81ec0c4e978ff928594e25b4917240f833157c929a5a26e50de0b0  csv2xlsx_linux_386
17
-	669fcaff7ea1824fd86a89c740c8743c5d4730505a2aba65cdfccdbe25eb3152  csv2xlsx_linux_amd64
18
-	e84d29d4f8142a95ed80275fb94ee1356f5c3e02f0b5619b189273196adeece8  csv2xlsx_osx
14
+  3bc2df1a0be3bb8cee327ebab81029d4e0cad61b51b0996f36a6bd5002d8771c  csv2xlsx_386.exe
15
+  f6003e08e612a996d0d5099ccbb31cd7ca3a60155fca607bf913fe98a485d0ef  csv2xlsx_amd64.exe
16
+  c6c2fd10025234e3400e84be5dceb45dc370729b14b3084c384f4c82bd229950  csv2xlsx_linux_386
17
+  9f177c313787c554fa9fabdb0852f76aaf7432ba0c136afb943faf757b827cff  csv2xlsx_linux_amd64
18
+  d171af9b071429a2ce76caa8063dcdedd81ab0983be01b04ab2ece1195197194  csv2xlsx_osx  
19 19
 
20 20
 
21 21
 ### Usage
... ...
@@ -38,6 +38,8 @@ Please see below for a list of command line options.
38 38
     	column range to use (see below)
39 39
   -dateformat string
40 40
     	format for CSV date cells (default YYYY-MM-DD) (default "2006-01-02")
41
+  -encoding
42
+      encoding string to use for the CSV file, case-insensitive (defaults to "utf-8")
41 43
   -exceldateformat string
42 44
     	Excel format for date cells (default as in Excel)
43 45
   -h	display usage information
... ...
@@ -62,6 +64,55 @@ Please see below for a list of command line options.
62 64
         separated from numbers with a colon (e.g. 0:text,3-16:number,17:date)
63 65
 ```
64 66
 
67
+### Supported encodings
68
+
69
+ * Codepage037
70
+ * Codepage437
71
+ * Codepage850
72
+ * Codepage852
73
+ * Codepage855
74
+ * Codepage858
75
+ * Codepage860
76
+ * Codepage862
77
+ * Codepage863
78
+ * Codepage865
79
+ * Codepage866
80
+ * Codepage1047
81
+ * Codepage1140
82
+ * ISO8859_1
83
+ * ISO8859_2
84
+ * ISO8859_3
85
+ * ISO8859_4
86
+ * ISO8859_5
87
+ * ISO8859_6
88
+ * ISO8859_6E
89
+ * ISO8859_6I
90
+ * ISO8859_7
91
+ * ISO8859_8
92
+ * ISO8859_8E
93
+ * ISO8859_8I
94
+ * ISO8859_9
95
+ * ISO8859_10
96
+ * ISO8859_13
97
+ * ISO8859_14
98
+ * ISO8859_15
99
+ * ISO8859_16
100
+ * Koi8r
101
+ * Koi8u
102
+ * Macintosh
103
+ * MacintoshCyrillic
104
+ * Windows874
105
+ * Windows1250
106
+ * Windows1251
107
+ * Windows1252
108
+ * Windows1253
109
+ * Windows1254
110
+ * Windows1255
111
+ * Windows1256
112
+ * Windows1257
113
+ * Windows1258
114
+
115
+
65 116
 ### Source
66 117
 
67 118
 This tool fulfills a special requirement and I will extend its functionality, if need arises. As I found out there are lots 
... ...
@@ -90,6 +141,10 @@ I am still amazed what you can accomplish within less than 200 lines of code in
90 141
                 - option --colsep now handles \t for tab correctly
91 142
                 - lots of bug fixes
92 143
 
144
+    2017-12-21 0.2
145
+                Added option --encoding
146
+
147
+
93 148
 ### License
94 149
 
95 150
 This code is licensed under the 2-Clause BSD License:
+112 -6
csv2xlsx.go 1000755 → 1000644
... ...
@@ -3,16 +3,18 @@ package main
3 3
 import (
4 4
 	"bufio"
5 5
 	"encoding/csv"
6
-	"errors"
7 6
 	"flag"
8 7
 	"fmt"
9
-	"github.com/tealeg/xlsx"
8
+	"io"
10 9
 	"os"
11 10
 	"path/filepath"
12 11
 	"strconv"
13 12
 	"strings"
14 13
 	"time"
15 14
 	"unicode/utf8"
15
+
16
+	"github.com/tealeg/xlsx"
17
+	"golang.org/x/text/encoding/charmap"
16 18
 )
17 19
 
18 20
 var (
... ...
@@ -21,6 +23,7 @@ var (
21 23
 	parmSheet           string
22 24
 	parmInFile          string
23 25
 	parmOutFile         string
26
+	parmEncoding        string
24 27
 	parmColSep          rune
25 28
 	parmDateFormat      string
26 29
 	parmExcelDateFormat string
... ...
@@ -51,7 +54,7 @@ func parseCommaGroup(grpstr string) (map[int]string, error) {
51 54
 	// we need exactly one number or an a-b interval (2 number parts)
52 55
 	parts := strings.Split(grpstr, "-")
53 56
 	if len(parts) < 1 || len(parts) > 2 {
54
-		return nil, errors.New(fmt.Sprintf("Invalid range group '%s' found.", grpstr))
57
+		return nil, fmt.Errorf("Invalid range group '%s' found.", grpstr)
55 58
 	}
56 59
 	// check for type (currently needed only for columns, will be ignored for lines)
57 60
 	datatype := "standard"
... ...
@@ -106,6 +109,7 @@ func parseCommandLine() {
106 109
 	flag.StringVar(&parmRows, "rows", "", "list of line numbers to use (1,2,8 or 1,3-14,28)")
107 110
 	flag.StringVar(&parmSheet, "sheet", "fromCSV", "tab name of the Excel sheet")
108 111
 	flag.StringVar(&tmpStr, "colsep", "|", "column separator (default '|') ")
112
+	flag.StringVar(&parmEncoding, "encoding", "utf-8", "character encoding")
109 113
 	// not settable with csv reader
110 114
 	//flag.StringVar(&parmRowSep, "rowsep", "\n", "row separator (default LF) ")
111 115
 	flag.BoolVar(&parmNoHeader, "noheader", false, "no headers in first line, only data lines (default false)")
... ...
@@ -151,14 +155,116 @@ func parseCommandLine() {
151 155
 // currently there is not need for gigabyte files, but maybe this should be done streaming.
152 156
 // in addition, we need row and column counts first to set the default ranges later on in the program flow.
153 157
 func loadInputFile(filename string) (rows [][]string) {
158
+	var rdr io.Reader
154 159
 	f, err := os.Open(filename)
155 160
 	defer f.Close()
156 161
 	if err != nil {
157 162
 		fmt.Println(err)
158 163
 		os.Exit(1)
159 164
 	}
160
-	// use csv reader to read entire file
161
-	r := csv.NewReader(bufio.NewReader(f))
165
+	enc := strings.ToLower(parmEncoding)
166
+
167
+	if enc == "utf8" || enc == "utf-8" {
168
+		rdr = bufio.NewReader(f)
169
+	} else {
170
+		switch strings.ToUpper(parmEncoding) {
171
+		case "CODEPAGE037":
172
+			rdr = charmap.CodePage037.NewDecoder().Reader(f)
173
+		case "CODEPAGE437":
174
+			rdr = charmap.CodePage437.NewDecoder().Reader(f)
175
+		case "CODEPAGE850":
176
+			rdr = charmap.CodePage850.NewDecoder().Reader(f)
177
+		case "CODEPAGE852":
178
+			rdr = charmap.CodePage852.NewDecoder().Reader(f)
179
+		case "CODEPAGE855":
180
+			rdr = charmap.CodePage855.NewDecoder().Reader(f)
181
+		case "CODEPAGE858":
182
+			rdr = charmap.CodePage858.NewDecoder().Reader(f)
183
+		case "CODEPAGE860":
184
+			rdr = charmap.CodePage860.NewDecoder().Reader(f)
185
+		case "CODEPAGE862":
186
+			rdr = charmap.CodePage862.NewDecoder().Reader(f)
187
+		case "CODEPAGE863":
188
+			rdr = charmap.CodePage863.NewDecoder().Reader(f)
189
+		case "CODEPAGE865":
190
+			rdr = charmap.CodePage865.NewDecoder().Reader(f)
191
+		case "CODEPAGE866":
192
+			rdr = charmap.CodePage866.NewDecoder().Reader(f)
193
+		case "CODEPAGE1047":
194
+			rdr = charmap.CodePage1047.NewDecoder().Reader(f)
195
+		case "CODEPAGE1140":
196
+			rdr = charmap.CodePage1140.NewDecoder().Reader(f)
197
+		case "ISO8859_1":
198
+			rdr = charmap.ISO8859_1.NewDecoder().Reader(f)
199
+		case "ISO8859_2":
200
+			rdr = charmap.ISO8859_2.NewDecoder().Reader(f)
201
+		case "ISO8859_3":
202
+			rdr = charmap.ISO8859_3.NewDecoder().Reader(f)
203
+		case "ISO8859_4":
204
+			rdr = charmap.ISO8859_4.NewDecoder().Reader(f)
205
+		case "ISO8859_5":
206
+			rdr = charmap.ISO8859_5.NewDecoder().Reader(f)
207
+		case "ISO8859_6":
208
+			rdr = charmap.ISO8859_6.NewDecoder().Reader(f)
209
+		case "ISO8859_6E":
210
+			rdr = charmap.ISO8859_6E.NewDecoder().Reader(f)
211
+		case "ISO8859_6I":
212
+			rdr = charmap.ISO8859_6I.NewDecoder().Reader(f)
213
+		case "ISO8859_7":
214
+			rdr = charmap.ISO8859_7.NewDecoder().Reader(f)
215
+		case "ISO8859_8":
216
+			rdr = charmap.ISO8859_8.NewDecoder().Reader(f)
217
+		case "ISO8859_8E":
218
+			rdr = charmap.ISO8859_8E.NewDecoder().Reader(f)
219
+		case "ISO8859_8I":
220
+			rdr = charmap.ISO8859_8I.NewDecoder().Reader(f)
221
+		case "ISO8859_9":
222
+			rdr = charmap.ISO8859_9.NewDecoder().Reader(f)
223
+		case "ISO8859_10":
224
+			rdr = charmap.ISO8859_10.NewDecoder().Reader(f)
225
+		case "ISO8859_13":
226
+			rdr = charmap.ISO8859_13.NewDecoder().Reader(f)
227
+		case "ISO8859_14":
228
+			rdr = charmap.ISO8859_14.NewDecoder().Reader(f)
229
+		case "ISO8859_15":
230
+			rdr = charmap.ISO8859_15.NewDecoder().Reader(f)
231
+		case "ISO8859_16":
232
+			rdr = charmap.ISO8859_16.NewDecoder().Reader(f)
233
+		case "KOI8R":
234
+			rdr = charmap.KOI8R.NewDecoder().Reader(f)
235
+		case "KOI8U":
236
+			rdr = charmap.KOI8U.NewDecoder().Reader(f)
237
+		case "MACINTOSH":
238
+			rdr = charmap.Macintosh.NewDecoder().Reader(f)
239
+		case "MACINTOSHCYRILLIC":
240
+			rdr = charmap.MacintoshCyrillic.NewDecoder().Reader(f)
241
+		case "WINDOWS874":
242
+			rdr = charmap.Windows874.NewDecoder().Reader(f)
243
+		case "WINDOWS1250":
244
+			rdr = charmap.Windows1250.NewDecoder().Reader(f)
245
+		case "WINDOWS1251":
246
+			rdr = charmap.Windows1251.NewDecoder().Reader(f)
247
+		case "WINDOWS1252":
248
+			rdr = charmap.Windows1252.NewDecoder().Reader(f)
249
+		case "WINDOWS1253":
250
+			rdr = charmap.Windows1253.NewDecoder().Reader(f)
251
+		case "WINDOWS1254":
252
+			rdr = charmap.Windows1254.NewDecoder().Reader(f)
253
+		case "WINDOWS1255":
254
+			rdr = charmap.Windows1255.NewDecoder().Reader(f)
255
+		case "WINDOWS1256":
256
+			rdr = charmap.Windows1256.NewDecoder().Reader(f)
257
+		case "WINDOWS1257":
258
+			rdr = charmap.Windows1257.NewDecoder().Reader(f)
259
+		case "WINDOWS1258":
260
+			rdr = charmap.Windows1258.NewDecoder().Reader(f)
261
+		default:
262
+			fmt.Println("Invalid encoding specified, defaulting to UTF-8")
263
+			rdr = bufio.NewReader(f)
264
+		}
265
+	}
266
+
267
+	r := csv.NewReader(rdr)
162 268
 	r.Comma = parmColSep
163 269
 	r.FieldsPerRecord = -1
164 270
 	r.LazyQuotes = true
... ...
@@ -200,7 +306,7 @@ func writeCellContents(cell *xlsx.Cell, colString, colType string, rownum, colnu
200 306
 	switch colType {
201 307
 	case "text":
202 308
 		cell.SetString(colString)
203
-	case "number","currency":
309
+	case "number", "currency":
204 310
 		floatVal, err := strconv.ParseFloat(colString, 64)
205 311
 		if err != nil {
206 312
 			fmt.Println(fmt.Sprintf("Cell (%d,%d) is not a valid number, value: %s", rownum, colnum, colString))
BIN
csv2xlsx_386.exe
Binary file not shown.
BIN
csv2xlsx_amd64.exe
Binary file not shown.
BIN
csv2xlsx_linux_386
Binary file not shown.
BIN
csv2xlsx_linux_amd64
Binary file not shown.
BIN
csv2xlsx_osx
Binary file not shown.