| ... | ... |
@@ -11,11 +11,11 @@ license below), you may download the binary. |
| 11 | 11 |
|
| 12 | 12 |
Here are the SHA-256 checksums for the binaries: |
| 13 | 13 |
|
| 14 |
- da5dd7d276af73553cb97677f5cf5798237de0968f3a150199a16a14340c87ff csv2xlsx_386.exe |
|
| 15 |
- b9a96331ee89a685c665921200b4e51cff0dca98b6bb200d9e6a4bcb73a6ed04 csv2xlsx_amd64.exe |
|
| 16 |
- 8667562a9b81ec0c4e978ff928594e25b4917240f833157c929a5a26e50de0b0 csv2xlsx_linux_386 |
|
| 17 |
- 669fcaff7ea1824fd86a89c740c8743c5d4730505a2aba65cdfccdbe25eb3152 csv2xlsx_linux_amd64 |
|
| 18 |
- e84d29d4f8142a95ed80275fb94ee1356f5c3e02f0b5619b189273196adeece8 csv2xlsx_osx |
|
| 14 |
+ 3bc2df1a0be3bb8cee327ebab81029d4e0cad61b51b0996f36a6bd5002d8771c csv2xlsx_386.exe |
|
| 15 |
+ f6003e08e612a996d0d5099ccbb31cd7ca3a60155fca607bf913fe98a485d0ef csv2xlsx_amd64.exe |
|
| 16 |
+ c6c2fd10025234e3400e84be5dceb45dc370729b14b3084c384f4c82bd229950 csv2xlsx_linux_386 |
|
| 17 |
+ 9f177c313787c554fa9fabdb0852f76aaf7432ba0c136afb943faf757b827cff csv2xlsx_linux_amd64 |
|
| 18 |
+ d171af9b071429a2ce76caa8063dcdedd81ab0983be01b04ab2ece1195197194 csv2xlsx_osx |
|
| 19 | 19 |
|
| 20 | 20 |
|
| 21 | 21 |
### Usage |
| ... | ... |
@@ -38,6 +38,8 @@ Please see below for a list of command line options. |
| 38 | 38 |
column range to use (see below) |
| 39 | 39 |
-dateformat string |
| 40 | 40 |
format for CSV date cells (default YYYY-MM-DD) (default "2006-01-02") |
| 41 |
+ -encoding |
|
| 42 |
+ encoding string to use for the CSV file, case-insensitive (defaults to "utf-8") |
|
| 41 | 43 |
-exceldateformat string |
| 42 | 44 |
Excel format for date cells (default as in Excel) |
| 43 | 45 |
-h display usage information |
| ... | ... |
@@ -62,6 +64,55 @@ Please see below for a list of command line options. |
| 62 | 64 |
separated from numbers with a colon (e.g. 0:text,3-16:number,17:date) |
| 63 | 65 |
``` |
| 64 | 66 |
|
| 67 |
+### Supported encodings |
|
| 68 |
+ |
|
| 69 |
+ * Codepage037 |
|
| 70 |
+ * Codepage437 |
|
| 71 |
+ * Codepage850 |
|
| 72 |
+ * Codepage852 |
|
| 73 |
+ * Codepage855 |
|
| 74 |
+ * Codepage858 |
|
| 75 |
+ * Codepage860 |
|
| 76 |
+ * Codepage862 |
|
| 77 |
+ * Codepage863 |
|
| 78 |
+ * Codepage865 |
|
| 79 |
+ * Codepage866 |
|
| 80 |
+ * Codepage1047 |
|
| 81 |
+ * Codepage1140 |
|
| 82 |
+ * ISO8859_1 |
|
| 83 |
+ * ISO8859_2 |
|
| 84 |
+ * ISO8859_3 |
|
| 85 |
+ * ISO8859_4 |
|
| 86 |
+ * ISO8859_5 |
|
| 87 |
+ * ISO8859_6 |
|
| 88 |
+ * ISO8859_6E |
|
| 89 |
+ * ISO8859_6I |
|
| 90 |
+ * ISO8859_7 |
|
| 91 |
+ * ISO8859_8 |
|
| 92 |
+ * ISO8859_8E |
|
| 93 |
+ * ISO8859_8I |
|
| 94 |
+ * ISO8859_9 |
|
| 95 |
+ * ISO8859_10 |
|
| 96 |
+ * ISO8859_13 |
|
| 97 |
+ * ISO8859_14 |
|
| 98 |
+ * ISO8859_15 |
|
| 99 |
+ * ISO8859_16 |
|
| 100 |
+ * Koi8r |
|
| 101 |
+ * Koi8u |
|
| 102 |
+ * Macintosh |
|
| 103 |
+ * MacintoshCyrillic |
|
| 104 |
+ * Windows874 |
|
| 105 |
+ * Windows1250 |
|
| 106 |
+ * Windows1251 |
|
| 107 |
+ * Windows1252 |
|
| 108 |
+ * Windows1253 |
|
| 109 |
+ * Windows1254 |
|
| 110 |
+ * Windows1255 |
|
| 111 |
+ * Windows1256 |
|
| 112 |
+ * Windows1257 |
|
| 113 |
+ * Windows1258 |
|
| 114 |
+ |
|
| 115 |
+ |
|
| 65 | 116 |
### Source |
| 66 | 117 |
|
| 67 | 118 |
This tool fulfills a special requirement and I will extend its functionality, if need arises. As I found out there are lots |
| ... | ... |
@@ -90,6 +141,10 @@ I am still amazed what you can accomplish within less than 200 lines of code in |
| 90 | 141 |
- option --colsep now handles \t for tab correctly |
| 91 | 142 |
- lots of bug fixes |
| 92 | 143 |
|
| 144 |
+ 2017-12-21 0.2 |
|
| 145 |
+ Added option --encoding |
|
| 146 |
+ |
|
| 147 |
+ |
|
| 93 | 148 |
### License |
| 94 | 149 |
|
| 95 | 150 |
This code is licensed under the 2-Clause BSD License: |
| ... | ... |
@@ -3,16 +3,18 @@ package main |
| 3 | 3 |
import ( |
| 4 | 4 |
"bufio" |
| 5 | 5 |
"encoding/csv" |
| 6 |
- "errors" |
|
| 7 | 6 |
"flag" |
| 8 | 7 |
"fmt" |
| 9 |
- "github.com/tealeg/xlsx" |
|
| 8 |
+ "io" |
|
| 10 | 9 |
"os" |
| 11 | 10 |
"path/filepath" |
| 12 | 11 |
"strconv" |
| 13 | 12 |
"strings" |
| 14 | 13 |
"time" |
| 15 | 14 |
"unicode/utf8" |
| 15 |
+ |
|
| 16 |
+ "github.com/tealeg/xlsx" |
|
| 17 |
+ "golang.org/x/text/encoding/charmap" |
|
| 16 | 18 |
) |
| 17 | 19 |
|
| 18 | 20 |
var ( |
| ... | ... |
@@ -21,6 +23,7 @@ var ( |
| 21 | 23 |
parmSheet string |
| 22 | 24 |
parmInFile string |
| 23 | 25 |
parmOutFile string |
| 26 |
+ parmEncoding string |
|
| 24 | 27 |
parmColSep rune |
| 25 | 28 |
parmDateFormat string |
| 26 | 29 |
parmExcelDateFormat string |
| ... | ... |
@@ -51,7 +54,7 @@ func parseCommaGroup(grpstr string) (map[int]string, error) {
|
| 51 | 54 |
// we need exactly one number or an a-b interval (2 number parts) |
| 52 | 55 |
parts := strings.Split(grpstr, "-") |
| 53 | 56 |
if len(parts) < 1 || len(parts) > 2 {
|
| 54 |
- return nil, errors.New(fmt.Sprintf("Invalid range group '%s' found.", grpstr))
|
|
| 57 |
+ return nil, fmt.Errorf("Invalid range group '%s' found.", grpstr)
|
|
| 55 | 58 |
} |
| 56 | 59 |
// check for type (currently needed only for columns, will be ignored for lines) |
| 57 | 60 |
datatype := "standard" |
| ... | ... |
@@ -106,6 +109,7 @@ func parseCommandLine() {
|
| 106 | 109 |
flag.StringVar(&parmRows, "rows", "", "list of line numbers to use (1,2,8 or 1,3-14,28)") |
| 107 | 110 |
flag.StringVar(&parmSheet, "sheet", "fromCSV", "tab name of the Excel sheet") |
| 108 | 111 |
flag.StringVar(&tmpStr, "colsep", "|", "column separator (default '|') ") |
| 112 |
+ flag.StringVar(&parmEncoding, "encoding", "utf-8", "character encoding") |
|
| 109 | 113 |
// not settable with csv reader |
| 110 | 114 |
//flag.StringVar(&parmRowSep, "rowsep", "\n", "row separator (default LF) ") |
| 111 | 115 |
flag.BoolVar(&parmNoHeader, "noheader", false, "no headers in first line, only data lines (default false)") |
| ... | ... |
@@ -151,14 +155,116 @@ func parseCommandLine() {
|
| 151 | 155 |
// currently there is not need for gigabyte files, but maybe this should be done streaming. |
| 152 | 156 |
// in addition, we need row and column counts first to set the default ranges later on in the program flow. |
| 153 | 157 |
func loadInputFile(filename string) (rows [][]string) {
|
| 158 |
+ var rdr io.Reader |
|
| 154 | 159 |
f, err := os.Open(filename) |
| 155 | 160 |
defer f.Close() |
| 156 | 161 |
if err != nil {
|
| 157 | 162 |
fmt.Println(err) |
| 158 | 163 |
os.Exit(1) |
| 159 | 164 |
} |
| 160 |
- // use csv reader to read entire file |
|
| 161 |
- r := csv.NewReader(bufio.NewReader(f)) |
|
| 165 |
+ enc := strings.ToLower(parmEncoding) |
|
| 166 |
+ |
|
| 167 |
+ if enc == "utf8" || enc == "utf-8" {
|
|
| 168 |
+ rdr = bufio.NewReader(f) |
|
| 169 |
+ } else {
|
|
| 170 |
+ switch strings.ToUpper(parmEncoding) {
|
|
| 171 |
+ case "CODEPAGE037": |
|
| 172 |
+ rdr = charmap.CodePage037.NewDecoder().Reader(f) |
|
| 173 |
+ case "CODEPAGE437": |
|
| 174 |
+ rdr = charmap.CodePage437.NewDecoder().Reader(f) |
|
| 175 |
+ case "CODEPAGE850": |
|
| 176 |
+ rdr = charmap.CodePage850.NewDecoder().Reader(f) |
|
| 177 |
+ case "CODEPAGE852": |
|
| 178 |
+ rdr = charmap.CodePage852.NewDecoder().Reader(f) |
|
| 179 |
+ case "CODEPAGE855": |
|
| 180 |
+ rdr = charmap.CodePage855.NewDecoder().Reader(f) |
|
| 181 |
+ case "CODEPAGE858": |
|
| 182 |
+ rdr = charmap.CodePage858.NewDecoder().Reader(f) |
|
| 183 |
+ case "CODEPAGE860": |
|
| 184 |
+ rdr = charmap.CodePage860.NewDecoder().Reader(f) |
|
| 185 |
+ case "CODEPAGE862": |
|
| 186 |
+ rdr = charmap.CodePage862.NewDecoder().Reader(f) |
|
| 187 |
+ case "CODEPAGE863": |
|
| 188 |
+ rdr = charmap.CodePage863.NewDecoder().Reader(f) |
|
| 189 |
+ case "CODEPAGE865": |
|
| 190 |
+ rdr = charmap.CodePage865.NewDecoder().Reader(f) |
|
| 191 |
+ case "CODEPAGE866": |
|
| 192 |
+ rdr = charmap.CodePage866.NewDecoder().Reader(f) |
|
| 193 |
+ case "CODEPAGE1047": |
|
| 194 |
+ rdr = charmap.CodePage1047.NewDecoder().Reader(f) |
|
| 195 |
+ case "CODEPAGE1140": |
|
| 196 |
+ rdr = charmap.CodePage1140.NewDecoder().Reader(f) |
|
| 197 |
+ case "ISO8859_1": |
|
| 198 |
+ rdr = charmap.ISO8859_1.NewDecoder().Reader(f) |
|
| 199 |
+ case "ISO8859_2": |
|
| 200 |
+ rdr = charmap.ISO8859_2.NewDecoder().Reader(f) |
|
| 201 |
+ case "ISO8859_3": |
|
| 202 |
+ rdr = charmap.ISO8859_3.NewDecoder().Reader(f) |
|
| 203 |
+ case "ISO8859_4": |
|
| 204 |
+ rdr = charmap.ISO8859_4.NewDecoder().Reader(f) |
|
| 205 |
+ case "ISO8859_5": |
|
| 206 |
+ rdr = charmap.ISO8859_5.NewDecoder().Reader(f) |
|
| 207 |
+ case "ISO8859_6": |
|
| 208 |
+ rdr = charmap.ISO8859_6.NewDecoder().Reader(f) |
|
| 209 |
+ case "ISO8859_6E": |
|
| 210 |
+ rdr = charmap.ISO8859_6E.NewDecoder().Reader(f) |
|
| 211 |
+ case "ISO8859_6I": |
|
| 212 |
+ rdr = charmap.ISO8859_6I.NewDecoder().Reader(f) |
|
| 213 |
+ case "ISO8859_7": |
|
| 214 |
+ rdr = charmap.ISO8859_7.NewDecoder().Reader(f) |
|
| 215 |
+ case "ISO8859_8": |
|
| 216 |
+ rdr = charmap.ISO8859_8.NewDecoder().Reader(f) |
|
| 217 |
+ case "ISO8859_8E": |
|
| 218 |
+ rdr = charmap.ISO8859_8E.NewDecoder().Reader(f) |
|
| 219 |
+ case "ISO8859_8I": |
|
| 220 |
+ rdr = charmap.ISO8859_8I.NewDecoder().Reader(f) |
|
| 221 |
+ case "ISO8859_9": |
|
| 222 |
+ rdr = charmap.ISO8859_9.NewDecoder().Reader(f) |
|
| 223 |
+ case "ISO8859_10": |
|
| 224 |
+ rdr = charmap.ISO8859_10.NewDecoder().Reader(f) |
|
| 225 |
+ case "ISO8859_13": |
|
| 226 |
+ rdr = charmap.ISO8859_13.NewDecoder().Reader(f) |
|
| 227 |
+ case "ISO8859_14": |
|
| 228 |
+ rdr = charmap.ISO8859_14.NewDecoder().Reader(f) |
|
| 229 |
+ case "ISO8859_15": |
|
| 230 |
+ rdr = charmap.ISO8859_15.NewDecoder().Reader(f) |
|
| 231 |
+ case "ISO8859_16": |
|
| 232 |
+ rdr = charmap.ISO8859_16.NewDecoder().Reader(f) |
|
| 233 |
+ case "KOI8R": |
|
| 234 |
+ rdr = charmap.KOI8R.NewDecoder().Reader(f) |
|
| 235 |
+ case "KOI8U": |
|
| 236 |
+ rdr = charmap.KOI8U.NewDecoder().Reader(f) |
|
| 237 |
+ case "MACINTOSH": |
|
| 238 |
+ rdr = charmap.Macintosh.NewDecoder().Reader(f) |
|
| 239 |
+ case "MACINTOSHCYRILLIC": |
|
| 240 |
+ rdr = charmap.MacintoshCyrillic.NewDecoder().Reader(f) |
|
| 241 |
+ case "WINDOWS874": |
|
| 242 |
+ rdr = charmap.Windows874.NewDecoder().Reader(f) |
|
| 243 |
+ case "WINDOWS1250": |
|
| 244 |
+ rdr = charmap.Windows1250.NewDecoder().Reader(f) |
|
| 245 |
+ case "WINDOWS1251": |
|
| 246 |
+ rdr = charmap.Windows1251.NewDecoder().Reader(f) |
|
| 247 |
+ case "WINDOWS1252": |
|
| 248 |
+ rdr = charmap.Windows1252.NewDecoder().Reader(f) |
|
| 249 |
+ case "WINDOWS1253": |
|
| 250 |
+ rdr = charmap.Windows1253.NewDecoder().Reader(f) |
|
| 251 |
+ case "WINDOWS1254": |
|
| 252 |
+ rdr = charmap.Windows1254.NewDecoder().Reader(f) |
|
| 253 |
+ case "WINDOWS1255": |
|
| 254 |
+ rdr = charmap.Windows1255.NewDecoder().Reader(f) |
|
| 255 |
+ case "WINDOWS1256": |
|
| 256 |
+ rdr = charmap.Windows1256.NewDecoder().Reader(f) |
|
| 257 |
+ case "WINDOWS1257": |
|
| 258 |
+ rdr = charmap.Windows1257.NewDecoder().Reader(f) |
|
| 259 |
+ case "WINDOWS1258": |
|
| 260 |
+ rdr = charmap.Windows1258.NewDecoder().Reader(f) |
|
| 261 |
+ default: |
|
| 262 |
+ fmt.Println("Invalid encoding specified, defaulting to UTF-8")
|
|
| 263 |
+ rdr = bufio.NewReader(f) |
|
| 264 |
+ } |
|
| 265 |
+ } |
|
| 266 |
+ |
|
| 267 |
+ r := csv.NewReader(rdr) |
|
| 162 | 268 |
r.Comma = parmColSep |
| 163 | 269 |
r.FieldsPerRecord = -1 |
| 164 | 270 |
r.LazyQuotes = true |
| ... | ... |
@@ -200,7 +306,7 @@ func writeCellContents(cell *xlsx.Cell, colString, colType string, rownum, colnu |
| 200 | 306 |
switch colType {
|
| 201 | 307 |
case "text": |
| 202 | 308 |
cell.SetString(colString) |
| 203 |
- case "number","currency": |
|
| 309 |
+ case "number", "currency": |
|
| 204 | 310 |
floatVal, err := strconv.ParseFloat(colString, 64) |
| 205 | 311 |
if err != nil {
|
| 206 | 312 |
fmt.Println(fmt.Sprintf("Cell (%d,%d) is not a valid number, value: %s", rownum, colnum, colString))
|