diff --git a/README.md b/README.md index 099dbbb..fc3d229 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ # xls -[![GoDoc](https://godoc.org/github.com/extrame/xls?status.svg)](https://godoc.org/github.com/extrame/xls) +[![GoDoc](https://godoc.org/github.com/csg800/xls?status.svg)](https://godoc.org/github.com/csg800/xls) Pure Golang xls library writen by [Rongshu Tech(chinese)](http://www.rongshu.tech). +Add Formula & Format support by [chen.s.g] (http://www.imohe.com) Thanks for contributions from Tamás Gulácsi, sergeilem. diff --git a/cell_range.go b/cell_range.go index 2dde04e..ec30526 100644 --- a/cell_range.go +++ b/cell_range.go @@ -46,6 +46,10 @@ type HyperLink struct { IsUrl bool } +func (h *HyperLink) Debug(wb *WorkBook) { + fmt.Printf("hyper link col dump:%#+v\n", h) +} + //get the hyperlink string, use the public variable Url to get the original Url func (h *HyperLink) String(wb *WorkBook) []string { res := make([]string, h.LastColB-h.FristColB+1) diff --git a/col.go b/col.go index 371af91..20dc111 100644 --- a/col.go +++ b/col.go @@ -1,17 +1,31 @@ package xls import ( + "errors" "fmt" "math" "strconv" - "time" - - "github.com/extrame/goyymmdd" ) +var ErrIsInt = errors.New("is int") + +/* Data types */ +const TYPE_STRING2 = 1 +const TYPE_STRING = 2 +const TYPE_FORMULA = 3 +const TYPE_NUMERIC = 4 +const TYPE_BOOL = 5 +const TYPE_NULL = 6 +const TYPE_INLINE = 7 +const TYPE_ERROR = 8 +const TYPE_DATETIME = 9 +const TYPE_PERCENTAGE = 10 +const TYPE_CURRENCY = 11 + //content type type contentHandler interface { + Debug(wb *WorkBook) String(*WorkBook) []string FirstCol() uint16 LastCol() uint16 @@ -26,6 +40,10 @@ type Coler interface { Row() uint16 } +func (c *Col) Debug(wb *WorkBook) { + fmt.Printf("col dump:%#+v\n", c) +} + func (c *Col) Row() uint16 { return c.RowB } @@ -42,39 +60,12 @@ func (c *Col) String(wb *WorkBook) []string { return []string{"default"} } -type XfRk struct { - Index uint16 - Rk RK -} +type RK uint32 -func (xf *XfRk) String(wb *WorkBook) string { - idx := int(xf.Index) - if len(wb.Xfs) > idx { - fNo := wb.Xfs[idx].formatNo() - if fNo >= 164 { // user defined format - if formatter := wb.Formats[fNo]; formatter != nil { - i, f, isFloat := xf.Rk.number() - if !isFloat { - f = float64(i) - } - t := timeFromExcelTime(f, wb.dateMode == 1) - return yymmdd.Format(t, formatter.str) - } - // see http://www.openoffice.org/sc/excelfileformat.pdf Page #174 - } else if 14 <= fNo && fNo <= 17 || fNo == 22 || 27 <= fNo && fNo <= 36 || 50 <= fNo && fNo <= 58 { // jp. date format - i, f, isFloat := xf.Rk.number() - if !isFloat { - f = float64(i) - } - t := timeFromExcelTime(f, wb.dateMode == 1) - return t.Format(time.RFC3339) //TODO it should be international - } - } - return xf.Rk.String() +func (rk RK) Debug(wb *WorkBook) { + fmt.Printf("rk dump:%#+v\n", rk) } -type RK uint32 - func (rk RK) number() (intNum int64, floatNum float64, isFloat bool) { multiplied := rk & 1 isInt := rk & 2 @@ -97,22 +88,40 @@ func (rk RK) number() (intNum int64, floatNum float64, isFloat bool) { return int64(val), 0, false } -func (rk RK) String() string { +func (rk RK) float() float64 { + var i, f, isFloat = rk.number() + if !isFloat { + f = float64(i) + } + + return f +} + +func (rk RK) String(wb *WorkBook) string { i, f, isFloat := rk.number() if isFloat { - return strconv.FormatFloat(f, 'f', -1, 64) + return strconv.FormatFloat(f, 'f', wb.defaultFloatBit, 64) } + return strconv.FormatInt(i, 10) } -var ErrIsInt = fmt.Errorf("is int") +type XfRk struct { + Index uint16 + Rk RK +} + +func (xf *XfRk) Debug(wb *WorkBook) { + fmt.Printf("xfrk dump:%#+v\n", wb.Xfs[xf.Index]) + xf.Rk.Debug(wb) +} -func (rk RK) Float() (float64, error) { - _, f, isFloat := rk.number() - if !isFloat { - return 0, ErrIsInt +func (xf *XfRk) String(wb *WorkBook) string { + if val, ok := wb.Format(xf.Index, xf.Rk.float()); ok { + return val } - return f, nil + + return xf.Rk.String(wb) } type MulrkCol struct { @@ -121,16 +130,24 @@ type MulrkCol struct { LastColB uint16 } +func (c *MulrkCol) Debug(wb *WorkBook) { + fmt.Printf("mulrk dump:%#+v\n", c) + + for _, v := range c.Xfrks { + v.Debug(wb) + } +} + func (c *MulrkCol) LastCol() uint16 { return c.LastColB } func (c *MulrkCol) String(wb *WorkBook) []string { var res = make([]string, len(c.Xfrks)) - for i := 0; i < len(c.Xfrks); i++ { - xfrk := c.Xfrks[i] - res[i] = xfrk.String(wb) + for i, v := range c.Xfrks { + res[i] = v.String(wb) } + return res } @@ -140,6 +157,10 @@ type MulBlankCol struct { LastColB uint16 } +func (c *MulBlankCol) Debug(wb *WorkBook) { + fmt.Printf("mul blank dump:%#+v\n", c) +} + func (c *MulBlankCol) LastCol() uint16 { return c.LastColB } @@ -154,23 +175,217 @@ type NumberCol struct { Float float64 } +func (c *NumberCol) Debug(wb *WorkBook) { + fmt.Printf("number col dump:%#+v\n", c) +} + func (c *NumberCol) String(wb *WorkBook) []string { - return []string{strconv.FormatFloat(c.Float, 'f', -1, 64)} + if wb.Debug { + fmt.Printf("number col dump:%#+v\n", c) + } + + if v, ok := wb.Format(c.Index, c.Float); ok { + return []string{v} + } + + return []string{strconv.FormatFloat(c.Float, 'f', wb.defaultFloatBit, 64)} } -type FormulaCol struct { - Header struct { - Col - IndexXf uint16 - Result [8]byte - Flags uint16 - _ uint32 +type FormulaColHeader struct { + Col + IndexXf uint16 + Result [8]byte + Flags uint16 + _ uint32 +} + +// Value formula header value +func (f *FormulaColHeader) Value() float64 { + var rknumhigh = ByteToUint32(f.Result[4:8]) + var rknumlow = ByteToUint32(f.Result[0:4]) + var sign = (rknumhigh & 0x80000000) >> 31 + var exp = float64(((int32(rknumhigh) & 0x7ff00000) >> 20) - 1023) + var mantissa = (0x100000 | (rknumhigh & 0x000fffff)) + var mantissalow1 = (rknumlow & 0x80000000) >> 31 + var mantissalow2 = (rknumlow & 0x7fffffff) + var value = float64(mantissa) / math.Pow(2, 20-exp) + + if mantissalow1 != 0 { + value += 1 / math.Pow(2, 21-exp) + } + + value += float64(mantissalow2) / math.Pow(2, 52-exp) + if 0 != sign { + value *= -1 } - Bts []byte + + return value +} + +// IsPart part of shared formula check +// WARNING: +// We can apparently not rely on $isPartOfSharedFormula. Even when $isPartOfSharedFormula = true +// the formula data may be ordinary formula data, therefore we need to check +// explicitly for the tExp token (0x01) +func (f *FormulaColHeader) IsPart() bool { + return 0 != (0x0008 & ByteToUint16(f.Result[6:8])) +} + +type FormulaCol struct { + parsed bool + Code uint16 + Btl uint16 + Btc uint16 + Bts []byte + Header *FormulaColHeader + ws int + vType int + value string +} + +func (c *FormulaCol) Debug(wb *WorkBook) { + fmt.Printf("formula col dump:%#+v\n", c) +} + +func (c *FormulaCol) Row() uint16 { + return c.Header.Col.RowB +} + +func (c *FormulaCol) FirstCol() uint16 { + return c.Header.Col.FirstColB +} + +func (c *FormulaCol) LastCol() uint16 { + return c.Header.Col.FirstColB } func (c *FormulaCol) String(wb *WorkBook) []string { - return []string{"FormulaCol"} + if !c.parsed { + c.parse(wb, true) + } + + if wb.Debug { + fmt.Printf("formula col dump:%#+v\n", c) + } + + return []string{c.value} +} + +func (c *FormulaCol) parse(wb *WorkBook, ref bool) { + c.parsed = true + + if 0 == c.Header.Result[0] && 255 == c.Header.Result[6] && 255 == c.Header.Result[7] { + // String formula. Result follows in appended STRING record + c.vType = TYPE_STRING + } else if 1 == c.Header.Result[0] && 255 == c.Header.Result[6] && 255 == c.Header.Result[7] { + // Boolean formula. Result is in +2; 0=false, 1=true + c.vType = TYPE_BOOL + if 0 == c.Header.Result[3] { + c.value = "false" + } else { + c.value = "true" + } + } else if 2 == c.Header.Result[0] && 255 == c.Header.Result[6] && 255 == c.Header.Result[7] { + // Error formula. Error code is in +2 + c.vType = TYPE_ERROR + switch c.Header.Result[3] { + case 0x00: + c.value = "#NULL!" + case 0x07: + c.value = "#DIV/0" + case 0x0F: + c.value = "#VALUE!" + case 0x17: + c.value = "#REF!" + case 0x1D: + c.value = "#NAME?" + case 0x24: + c.value = "#NUM!" + case 0x2A: + c.value = "#N/A" + } + } else if 3 == c.Header.Result[0] && 255 == c.Header.Result[6] && 255 == c.Header.Result[7] { + // Formula result is a null string + c.vType = TYPE_NULL + c.value = "" + } else { + // formula result is a number, first 14 bytes like _NUMBER record + c.vType = TYPE_NUMERIC + + var flag bool + if c.isGetCurTime() { + // if date time format is not support, use time.RFC3339 + if c.value, flag = wb.Format(c.Header.IndexXf, 0); !flag { + c.value = parseTime(0, time.RFC3339) + } + } else if c.isRef() { + if ref { + var ws = -1 + var find bool + var rIdx uint16 + var cIdx uint16 + + if 0x07 == c.Bts[0] { + var exi = ByteToUint16(c.Bts[3:5]) + rIdx = ByteToUint16(c.Bts[5:7]) + cIdx = 0x00FF & ByteToUint16(c.Bts[7:9]) + if exi <= wb.ref.Num { + ws = int(wb.ref.Info[int(exi)].FirstSheetIndex) + } + } else { + ws = c.ws + rIdx = ByteToUint16(c.Bts[3:5]) + cIdx = 0x00FF & ByteToUint16(c.Bts[5:7]) + } + + if ws < len(wb.sheets) { + if row := wb.GetSheet(ws).Row(int(rIdx)); nil != row { + find = true + c.value = row.Col(int(cIdx)) + } + } + if !find { + c.value = "#REF!" + } + } else { + c.parsed = false + } + } else { + c.value, flag = wb.Format(c.Header.IndexXf, c.Header.Value()) + if !flag { + c.value = strconv.FormatFloat(c.Header.Value(), 'f', wb.defaultFloatBit, 64) + } + } + } +} + +// isRef return cell is reference to other cell +func (c *FormulaCol) isRef() bool { + if 0x05 == c.Bts[0] && (0x24 == c.Bts[2] || 0x44 == c.Bts[2] || 0x64 == c.Bts[2]) { + return true + } else if 0x07 == c.Bts[0] && (0x3A == c.Bts[2] || 0x5A == c.Bts[2] || 0x7A == c.Bts[2]) { + return true + } + + return false +} + +// isGetCurTime return cell value is get current date or datetime flag +func (c *FormulaCol) isGetCurTime() bool { + var ret bool + var next byte + + if 0x19 == c.Bts[2] && (0x21 == c.Bts[6] || 0x41 == c.Bts[6] || 0x61 == c.Bts[6]) { + next = c.Bts[7] + } else if 0x21 == c.Bts[2] || 0x41 == c.Bts[2] || 0x61 == c.Bts[2] { + next = c.Bts[3] + } + + if 0x4A == next || 0xDD == next { + ret = true + } + + return ret } type RkCol struct { @@ -178,6 +393,10 @@ type RkCol struct { Xfrk XfRk } +func (c *RkCol) Debug(wb *WorkBook) { + fmt.Printf("rk col dump:%#+v\n", c) +} + func (c *RkCol) String(wb *WorkBook) []string { return []string{c.Xfrk.String(wb)} } @@ -188,7 +407,15 @@ type LabelsstCol struct { Sst uint32 } +func (c *LabelsstCol) Debug(wb *WorkBook) { + fmt.Printf("label sst col dump:%#+v\n", c) +} + func (c *LabelsstCol) String(wb *WorkBook) []string { + if wb.Debug { + fmt.Println("metlabel sst col dump:", c.Sst, wb.sst[int(c.Sst)]) + } + return []string{wb.sst[int(c.Sst)]} } @@ -197,6 +424,10 @@ type labelCol struct { Str string } +func (c *labelCol) Debug(wb *WorkBook) { + fmt.Printf("label col dump:%#+v\n", c) +} + func (c *labelCol) String(wb *WorkBook) []string { return []string{c.Str} } @@ -206,6 +437,10 @@ type BlankCol struct { Xf uint16 } +func (c *BlankCol) Debug(wb *WorkBook) { + fmt.Printf("blank col dump:%#+v\n", c) +} + func (c *BlankCol) String(wb *WorkBook) []string { return []string{""} } diff --git a/date.go b/date.go index b7d2d04..b6e6455 100644 --- a/date.go +++ b/date.go @@ -5,37 +5,57 @@ import ( "time" ) -const MJD_0 float64 = 2400000.5 -const MJD_JD2000 float64 = 51544.5 +// timeLocationUTC defined the UTC time location. +var timeLocationUTC, _ = time.LoadLocation("UTC") +// timeToUTCTime provides function to convert time to UTC time. +func timeToUTCTime(t time.Time) time.Time { + return time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), timeLocationUTC) +} + +// timeToExcelTime provides function to convert time to Excel time. +func timeToExcelTime(t time.Time) float64 { + return float64(t.UnixNano())/8.64e13 + 25569.0 +} + +// shiftJulianToNoon provides function to process julian date to noon. func shiftJulianToNoon(julianDays, julianFraction float64) (float64, float64) { switch { case -0.5 < julianFraction && julianFraction < 0.5: julianFraction += 0.5 case julianFraction >= 0.5: - julianDays += 1 + julianDays++ julianFraction -= 0.5 case julianFraction <= -0.5: - julianDays -= 1 + julianDays-- julianFraction += 1.5 } return julianDays, julianFraction } -// Return the integer values for hour, minutes, seconds and -// nanoseconds that comprised a given fraction of a day. +// fractionOfADay provides function to return the integer values for hour, +// minutes, seconds and nanoseconds that comprised a given fraction of a day. +// values would round to 1 us. func fractionOfADay(fraction float64) (hours, minutes, seconds, nanoseconds int) { - f := 5184000000000000 * fraction - nanoseconds = int(math.Mod(f, 1000000000)) - f = f / 1000000000 - seconds = int(math.Mod(f, 60)) - f = f / 3600 - minutes = int(math.Mod(f, 60)) - f = f / 60 - hours = int(f) - return hours, minutes, seconds, nanoseconds + + const ( + c1us = 1e3 + c1s = 1e9 + c1day = 24 * 60 * 60 * c1s + ) + + frac := int64(c1day*fraction + c1us/2) + nanoseconds = int((frac%c1s)/c1us) * c1us + frac /= c1s + seconds = int(frac % 60) + frac /= 60 + minutes = int(frac % 60) + hours = int(frac / 60) + return } +// julianDateToGregorianTime provides function to convert julian date to +// gregorian time. func julianDateToGregorianTime(part1, part2 float64) time.Time { part1I, part1F := math.Modf(part1) part2I, part2F := math.Modf(part2) @@ -47,13 +67,12 @@ func julianDateToGregorianTime(part1, part2 float64) time.Time { return time.Date(year, time.Month(month), day, hours, minutes, seconds, nanoseconds, time.UTC) } -// By this point generations of programmers have repeated the -// algorithm sent to the editor of "Communications of the ACM" in 1968 -// (published in CACM, volume 11, number 10, October 1968, p.657). -// None of those programmers seems to have found it necessary to -// explain the constants or variable names set out by Henry F. Fliegel -// and Thomas C. Van Flandern. Maybe one day I'll buy that jounal and -// expand an explanation here - that day is not today. +// By this point generations of programmers have repeated the algorithm sent to +// the editor of "Communications of the ACM" in 1968 (published in CACM, volume +// 11, number 10, October 1968, p.657). None of those programmers seems to have +// found it necessary to explain the constants or variable names set out by +// Henry F. Fliegel and Thomas C. Van Flandern. Maybe one day I'll buy that +// jounal and expand an explanation here - that day is not today. func doTheFliegelAndVanFlandernAlgorithm(jd int) (day, month, year int) { l := jd + 68569 n := (4 * l) / 146097 @@ -68,24 +87,26 @@ func doTheFliegelAndVanFlandernAlgorithm(jd int) (day, month, year int) { return d, m, y } -// Convert an excelTime representation (stored as a floating point number) to a time.Time. +// timeFromExcelTime provides function to convert an excelTime representation +// (stored as a floating point number) to a time.Time. func timeFromExcelTime(excelTime float64, date1904 bool) time.Time { var date time.Time - var intPart int64 = int64(excelTime) - // Excel uses Julian dates prior to March 1st 1900, and - // Gregorian thereafter. + var intPart = int64(excelTime) + // Excel uses Julian dates prior to March 1st 1900, and Gregorian + // thereafter. if intPart <= 61 { const OFFSET1900 = 15018.0 const OFFSET1904 = 16480.0 + const MJD0 float64 = 2400000.5 var date time.Time if date1904 { - date = julianDateToGregorianTime(MJD_0+OFFSET1904, excelTime) + date = julianDateToGregorianTime(MJD0, excelTime+OFFSET1904) } else { - date = julianDateToGregorianTime(MJD_0+OFFSET1900, excelTime) + date = julianDateToGregorianTime(MJD0, excelTime+OFFSET1900) } return date } - var floatPart float64 = excelTime - float64(intPart) + var floatPart = excelTime - float64(intPart) var dayNanoSeconds float64 = 24 * 60 * 60 * 1000 * 1000 * 1000 if date1904 { date = time.Date(1904, 1, 1, 0, 0, 0, 0, time.UTC) diff --git a/example_test.go b/example_test.go index e62fc5b..20ca64c 100644 --- a/example_test.go +++ b/example_test.go @@ -2,6 +2,7 @@ package xls import ( "fmt" + "testing" ) func ExampleOpen() { @@ -35,3 +36,23 @@ func ExampleWorkBook_GetSheet() { } } } + +func BenchmarkGetSheet(b *testing.B) { + for i := 0; i < b.N; i++ { + if xlFile, err := Open("Table.xls", "utf-8"); err == nil { + for i := 0; i < xlFile.NumSheets(); i++ { + xlFile.GetSheet(i) + } + } + } +} + +func BenchmarkGetSheetWithBuffer(b *testing.B) { + for i := 0; i < b.N; i++ { + if xlFile, err := OpenWithBuffer("Table.xls", "utf-8"); err == nil { + for i := 0; i < xlFile.NumSheets(); i++ { + xlFile.GetSheet(i) + } + } + } +} diff --git a/flag.go b/flag.go new file mode 100644 index 0000000..d1ba757 --- /dev/null +++ b/flag.go @@ -0,0 +1,122 @@ +package xls + +// Original file header of ParseXL (used as the base for this class): +// -------------------------------------------------------------------------------- +// Adapted from Excel_Spreadsheet_Reader developed by users bizon153, +// trex005, and mmp11 (SourceForge.net) +// https://sourceforge.net/projects/phpexcelreader/ +// Primary changes made by canyoncasa (dvc) for ParseXL 1.00 ... +// Modelled moreso after Perl Excel Parse/Write modules +// Added Parse_Excel_Spreadsheet object +// Reads a whole worksheet or tab as row,column array or as +// associated hash of indexed rows and named column fields +// Added variables for worksheet (tab) indexes and names +// Added an object call for loading individual woorksheets +// Changed default indexing defaults to 0 based arrays +// Fixed date/time and percent formats +// Includes patches found at SourceForge... +// unicode patch by nobody +// unpack("d") machine depedency patch by matchy +// boundsheet utf16 patch by bjaenichen +// Renamed functions for shorter names +// General code cleanup and rigor, including <80 column width +// Included a testcase Excel file and PHP example calls +// Code works for PHP 5.x + +// Primary changes made by canyoncasa (dvc) for ParseXL 1.10 ... +// http://sourceforge.net/tracker/index.php?func=detail&aid=1466964&group_id=99160&atid=623334 +// Decoding of formula conditions, results, and tokens. +// Support for user-defined named cells added as an array "namedcells" +// Patch code for user-defined named cells supports single cells only. +// NOTE: this patch only works for BIFF8 as BIFF5-7 use a different +// external sheet reference structure + +// ParseXL definitions +const XLS_BIFF8 = 0x0600 +const XLS_BIFF7 = 0x0500 +const XLS_WorkbookGlobals = 0x0005 +const XLS_Worksheet = 0x0010 + +// record identifiers +const XLS_Type_FORMULA = 0x0006 +const XLS_Type_EOF = 0x000a +const XLS_Type_PROTECT = 0x0012 +const XLS_Type_OBJECTPROTECT = 0x0063 +const XLS_Type_SCENPROTECT = 0x00dd +const XLS_Type_PASSWORD = 0x0013 +const XLS_Type_HEADER = 0x0014 +const XLS_Type_FOOTER = 0x0015 +const XLS_Type_EXTERNSHEET = 0x0017 +const XLS_Type_DEFINEDNAME = 0x0018 +const XLS_Type_VERTICALPAGEBREAKS = 0x001a +const XLS_Type_HORIZONTALPAGEBREAKS = 0x001b +const XLS_Type_NOTE = 0x001c +const XLS_Type_SELECTION = 0x001d +const XLS_Type_DATEMODE = 0x0022 +const XLS_Type_EXTERNNAME = 0x0023 +const XLS_Type_LEFTMARGIN = 0x0026 +const XLS_Type_RIGHTMARGIN = 0x0027 +const XLS_Type_TOPMARGIN = 0x0028 +const XLS_Type_BOTTOMMARGIN = 0x0029 +const XLS_Type_PRINTGRIDLINES = 0x002b +const XLS_Type_FILEPASS = 0x002f +const XLS_Type_FONT = 0x0031 +const XLS_Type_CONTINUE = 0x003c +const XLS_Type_PANE = 0x0041 +const XLS_Type_CODEPAGE = 0x0042 +const XLS_Type_DEFCOLWIDTH = 0x0055 +const XLS_Type_OBJ = 0x005d +const XLS_Type_COLINFO = 0x007d +const XLS_Type_IMDATA = 0x007f +const XLS_Type_SHEETPR = 0x0081 +const XLS_Type_HCENTER = 0x0083 +const XLS_Type_VCENTER = 0x0084 +const XLS_Type_SHEET = 0x0085 +const XLS_Type_PALETTE = 0x0092 +const XLS_Type_SCL = 0x00a0 +const XLS_Type_PAGESETUP = 0x00a1 +const XLS_Type_MULRK = 0x00bd +const XLS_Type_MULBLANK = 0x00be +const XLS_Type_DBCELL = 0x00d7 +const XLS_Type_XF = 0x00e0 +const XLS_Type_MERGEDCELLS = 0x00e5 +const XLS_Type_MSODRAWINGGROUP = 0x00eb +const XLS_Type_MSODRAWING = 0x00ec +const XLS_Type_SST = 0x00fc +const XLS_Type_LABELSST = 0x00fd +const XLS_Type_EXTSST = 0x00ff +const XLS_Type_EXTERNALBOOK = 0x01ae +const XLS_Type_DATAVALIDATIONS = 0x01b2 +const XLS_Type_TXO = 0x01b6 +const XLS_Type_HYPERLINK = 0x01b8 +const XLS_Type_DATAVALIDATION = 0x01be +const XLS_Type_DIMENSION = 0x0200 +const XLS_Type_BLANK = 0x0201 +const XLS_Type_NUMBER = 0x0203 +const XLS_Type_LABEL = 0x0204 +const XLS_Type_BOOLERR = 0x0205 +const XLS_Type_STRING = 0x0207 +const XLS_Type_ROW = 0x0208 +const XLS_Type_INDEX = 0x020b +const XLS_Type_ARRAY = 0x0221 +const XLS_Type_DEFAULTROWHEIGHT = 0x0225 +const XLS_Type_WINDOW2 = 0x023e +const XLS_Type_RK = 0x027e +const XLS_Type_STYLE = 0x0293 +const XLS_Type_FORMAT = 0x041e +const XLS_Type_SHAREDFMLA = 0x04bc +const XLS_Type_BOF = 0x0809 +const XLS_Type_SHEETPROTECTION = 0x0867 +const XLS_Type_RANGEPROTECTION = 0x0868 +const XLS_Type_SHEETLAYOUT = 0x0862 +const XLS_Type_XFEXT = 0x087d +const XLS_Type_PAGELAYOUTVIEW = 0x088b +const XLS_Type_UNKNOWN = 0xffff + +// Encryption type +const MS_BIFF_CRYPTO_NONE = 0 +const MS_BIFF_CRYPTO_XOR = 1 +const MS_BIFF_CRYPTO_RC4 = 2 + +// Size of stream blocks when using RC4 encryption +const REKEY_BLOCK = 0x400 diff --git a/format.go b/format.go index 35b576c..b98ba3a 100644 --- a/format.go +++ b/format.go @@ -1,9 +1,233 @@ package xls +import ( + "regexp" + "strconv" + "strings" + "time" +) + +// Excel styles can reference number formats that are built-in, all of which +// have an id less than 164. This is a possibly incomplete list comprised of as +// many of them as I could find. +var builtInNumFmt = map[uint16]string{ + 0: "general", + 1: "0", + 2: "0.00", + 3: "#,##0", + 4: "#,##0.00", + 9: "0%", + 10: "0.00%", + 11: "0.00e+00", + 12: "# ?/?", + 13: "# ??/??", + 14: "mm-dd-yyyy", + 15: "d-mmm-yyyy", + 16: "d-mmm", + 17: "mmm-yyyy", + 18: "h:mm am/pm", + 19: "h:mm:ss am/pm", + 20: "h:mm", + 21: "h:mm:ss", + 22: "m/d/yyyy h:mm", + 37: "#,##0 ;(#,##0)", + 38: "#,##0 ;[red](#,##0)", + 39: "#,##0.00;(#,##0.00)", + 40: "#,##0.00;[red](#,##0.00)", + 41: `_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)`, + 42: `_("$"* #,##0_);_("$* \(#,##0\);_("$"* "-"_);_(@_)`, + 43: `_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)`, + 44: `_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)`, + 45: "mm:ss", + 46: "[h]:mm:ss", + 47: "mmss.0", + 48: "##0.0e+0", + 49: "@", + 58: time.RFC3339, +} + +// Excel date time mapper to go system +var dateTimeMapper = []struct{ xls, golang string }{ + {"yyyy", "2006"}, + {"yy", "06"}, + {"mmmm", "%%%%"}, + {"dddd", "&&&&"}, + {"dd", "02"}, + {"d", "2"}, + {"mmm", "Jan"}, + {"mmss", "0405"}, + {"ss", "05"}, + {"mm:", "04:"}, + {":mm", ":04"}, + {"mm", "01"}, + {"am/pm", "pm"}, + {"m/", "1/"}, + {"%%%%", "January"}, + {"&&&&", "Monday"}, +} + +// Format value interface type Format struct { Head struct { Index uint16 Size uint16 } - str string + Raw []string + bts int + vType int +} + +// Prepare format meta data +func (f *Format) Prepare(wb *WorkBook) { + var regexColor = regexp.MustCompile("^\\[[a-zA-Z]+\\]") + var regexSharp = regexp.MustCompile("^\\d+\\.?\\d?#+") + var regexFraction = regexp.MustCompile("#\\,?#*") + + for k, v := range f.Raw { + // In Excel formats, "_" is used to add spacing, which we can't do in HTML + v = strings.Replace(v, "_", "", -1) + + // Some non-number characters are escaped with \, which we don't need + v = strings.Replace(v, "\\", "", -1) + + // Some non-number strings are quoted, so we'll get rid of the quotes, likewise any positional * symbols + v = strings.Replace(v, "*", "", -1) + v = strings.Replace(v, "\"", "", -1) + + // strip () + v = strings.Replace(v, "(", "", -1) + v = strings.Replace(v, ")", "", -1) + + // strip color information + v = regexColor.ReplaceAllString(v, "") + + // replace 0.0#### as 0.00000 + if regexSharp.MatchString(v) { + v = strings.Replace(v, "#", "0", -1) + } + + // Strip # + v = regexFraction.ReplaceAllString(v, "") + + if 0 == f.vType { + if regexp.MustCompile("^(\\[\\$[A-Z]*-[0-9A-F]*\\])*[hmsdy]").MatchString(v) { + f.vType = TYPE_DATETIME + } else if strings.HasSuffix(v, "%") { + f.vType = TYPE_PERCENTAGE + } else if strings.HasPrefix(v, "$") || strings.HasPrefix(v, "¥") { + f.vType = TYPE_CURRENCY + } + } + + f.Raw[k] = strings.Trim(v, "\r\n\t ") + } + + if 0 == f.vType { + f.vType = TYPE_NUMERIC + } + + if TYPE_NUMERIC == f.vType || TYPE_CURRENCY == f.vType || TYPE_PERCENTAGE == f.vType { + if t := strings.SplitN(f.Raw[0], ".", 2); 2 == len(t) { + f.bts = strings.Count(t[1], "") + + if f.bts > 0 { + f.bts = f.bts - 1 + } + } else if -1 != strings.Index(f.Raw[0], "General") || -1 != strings.Index(f.Raw[0], "general") { + f.bts = -1 + } else if t := strings.Index(f.Raw[0], "@"); -1 != t { + f.bts = -1 + } + + if -1 == f.bts { + f.bts = wb.defaultFloatBit + } + } +} + +// String format content to spec string +// see http://www.openoffice.org/sc/excelfileformat.pdf Page #174 +func (f *Format) String(v float64) string { + var ret string + + switch f.vType { + case TYPE_NUMERIC: + ret = strconv.FormatFloat(v, 'f', f.bts, 64) + case TYPE_CURRENCY: + ret = strconv.FormatFloat(v, 'f', f.bts, 64) + case TYPE_PERCENTAGE: + if 0 == f.bts { + ret = strconv.FormatInt(int64(v)*100, 10) + "%" + } else { + ret = strconv.FormatFloat(v*100, 'f', f.bts, 64) + "%" + } + case TYPE_DATETIME: + ret = parseTime(v, f.Raw[0]) + default: + ret = strconv.FormatFloat(v, 'f', -1, 64) + } + + return ret +} + +// ByteToUint32 Read 32-bit unsigned integer +func ByteToUint32(b []byte) uint32 { + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +// ByteToUint16 Read 16-bit unsigned integer +func ByteToUint16(b []byte) uint16 { + return (uint16(b[0]) | (uint16(b[1]) << 8)) +} + +// parseTime provides function to returns a string parsed using time.Time. +// Replace Excel placeholders with Go time placeholders. For example, replace +// yyyy with 2006. These are in a specific order, due to the fact that m is used +// in month, minute, and am/pm. It would be easier to fix that with regular +// expressions, but if it's possible to keep this simple it would be easier to +// maintain. Full-length month and days (e.g. March, Tuesday) have letters in +// them that would be replaced by other characters below (such as the 'h' in +// March, or the 'd' in Tuesday) below. First we convert them to arbitrary +// characters unused in Excel Date formats, and then at the end, turn them to +// what they should actually be. +// Based off: http://www.ozgrid.com/Excel/CustomFormats.htm +func parseTime(v float64, f string) string { + var val time.Time + if 0 == v { + val = time.Now() + } else { + val = timeFromExcelTime(v, false) + } + + // It is the presence of the "am/pm" indicator that determines if this is + // a 12 hour or 24 hours time format, not the number of 'h' characters. + if is12HourTime(f) { + f = strings.Replace(f, "hh", "03", 1) + f = strings.Replace(f, "h", "3", 1) + } else { + f = strings.Replace(f, "hh", "15", 1) + f = strings.Replace(f, "h", "15", 1) + } + for _, repl := range dateTimeMapper { + f = strings.Replace(f, repl.xls, repl.golang, 1) + } + + // If the hour is optional, strip it out, along with the possible dangling + // colon that would remain. + if val.Hour() < 1 { + f = strings.Replace(f, "]:", "]", 1) + f = strings.Replace(f, "[03]", "", 1) + f = strings.Replace(f, "[3]", "", 1) + f = strings.Replace(f, "[15]", "", 1) + } else { + f = strings.Replace(f, "[3]", "3", 1) + f = strings.Replace(f, "[15]", "15", 1) + } + + return val.Format(f) +} + +// is12HourTime checks whether an Excel time format string is a 12 hours form. +func is12HourTime(format string) bool { + return strings.Contains(format, "am/pm") || strings.Contains(format, "AM/PM") || strings.Contains(format, "a/p") || strings.Contains(format, "A/P") } diff --git a/row.go b/row.go index 3100394..f5cea22 100644 --- a/row.go +++ b/row.go @@ -2,8 +2,8 @@ package xls type rowInfo struct { Index uint16 - Fcell uint16 - Lcell uint16 + First uint16 + Last uint16 Height uint16 Notused uint16 Notused2 uint16 @@ -20,27 +20,30 @@ type Row struct { //Col Get the Nth Col from the Row, if has not, return nil. //Suggest use Has function to test it. func (r *Row) Col(i int) string { - serial := uint16(i) + var val string + var serial = uint16(i) + if ch, ok := r.cols[serial]; ok { - strs := ch.String(r.wb) - return strs[0] + val = ch.String(r.wb)[0] } else { for _, v := range r.cols { if v.FirstCol() <= serial && v.LastCol() >= serial { - strs := v.String(r.wb) - return strs[serial-v.FirstCol()] + val = v.String(r.wb)[serial-v.FirstCol()] + + break } } } - return "" -} -//LastCol Get the number of Last Col of the Row. -func (r *Row) LastCol() int { - return int(r.info.Lcell) + return val } //FirstCol Get the number of First Col of the Row. func (r *Row) FirstCol() int { - return int(r.info.Fcell) + return int(r.info.First) +} + +//LastCol Get the number of Last Col of the Row. +func (r *Row) LastCol() int { + return int(r.info.Last) } diff --git a/workbook.go b/workbook.go index b0aa076..cb31c8c 100644 --- a/workbook.go +++ b/workbook.go @@ -4,100 +4,101 @@ import ( "bytes" "encoding/binary" "io" - "os" + "strings" "unicode/utf16" ) -//xls workbook type +//WorkBook excel work book type WorkBook struct { - Is5ver bool - Type uint16 - Codepage uint16 - Xfs []st_xf_data - Fonts []Font - Formats map[uint16]*Format - //All the sheets from the workbook - sheets []*WorkSheet - Author string - rs io.ReadSeeker - sst []string - continue_utf16 uint16 - continue_rich uint16 - continue_apsb uint32 - dateMode uint16 + Debug bool + Is5ver bool + Type uint16 + Codepage uint16 + Xfs []XF + Fonts []Font + Formats map[uint16]*Format + sheets []*WorkSheet + Author string + rs io.ReadSeeker + sst []string + ref *extSheetRef + continue_utf16 uint16 + continue_rich uint16 + continue_apsb uint32 + dateMode uint16 + defaultFloatBit int } -//read workbook from ole2 file +//newWorkBookFromOle2 read workbook from ole2 file func newWorkBookFromOle2(rs io.ReadSeeker) *WorkBook { - wb := new(WorkBook) - wb.Formats = make(map[uint16]*Format) - // wb.bts = bts - wb.rs = rs - wb.sheets = make([]*WorkSheet, 0) - wb.Parse(rs) + var wb = &WorkBook{ + rs: rs, + defaultFloatBit: -1, + ref: new(extSheetRef), + sheets: make([]*WorkSheet, 0), + Formats: make(map[uint16]*Format), + } + + wb.parse(rs) + wb.prepare() + return wb } -func (w *WorkBook) Parse(buf io.ReadSeeker) { +// SetDebug set debug flag +func (w *WorkBook) SetDebug(debug bool) { + w.Debug = debug +} + +// SetFloatBit 设置小数默认保留位数,默认值 -1 不限制 +func (w *WorkBook) SetFloatBit(n int) { + w.defaultFloatBit = n +} + +func (w *WorkBook) parse(buf io.ReadSeeker) { b := new(bof) - bof_pre := new(bof) - // buf := bytes.NewReader(bts) + bp := new(bof) offset := 0 + for { if err := binary.Read(buf, binary.LittleEndian, b); err == nil { - bof_pre, b, offset = w.parseBof(buf, b, bof_pre, offset) + bp, b, offset = w.parseBof(buf, b, bp, offset) } else { break } } } -func (w *WorkBook) addXf(xf st_xf_data) { - w.Xfs = append(w.Xfs, xf) -} - -func (w *WorkBook) addFont(font *FontInfo, buf io.ReadSeeker) { - name, _ := w.get_string(buf, uint16(font.NameB)) - w.Fonts = append(w.Fonts, Font{Info: font, Name: name}) -} - -func (w *WorkBook) addFormat(format *Format) { - if w.Formats == nil { - os.Exit(1) - } - w.Formats[format.Head.Index] = format -} - func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int) (after *bof, after_using *bof, offset int) { after = b after_using = pre var bts = make([]byte, b.Size) binary.Read(buf, binary.LittleEndian, bts) - buf_item := bytes.NewReader(bts) + item := bytes.NewReader(bts) switch b.Id { - case 0x809: + case XLS_Type_BOF: bif := new(biffHeader) - binary.Read(buf_item, binary.LittleEndian, bif) + binary.Read(item, binary.LittleEndian, bif) if bif.Ver != 0x600 { wb.Is5ver = true } wb.Type = bif.Type - case 0x042: // CODEPAGE - binary.Read(buf_item, binary.LittleEndian, &wb.Codepage) - case 0x3c: // CONTINUE - if pre.Id == 0xfc { - var size uint16 + case XLS_Type_CODEPAGE: + binary.Read(item, binary.LittleEndian, &wb.Codepage) + case XLS_Type_CONTINUE: + if pre.Id == XLS_Type_SST { var err error + var str string + var size uint16 if wb.continue_utf16 >= 1 { size = wb.continue_utf16 wb.continue_utf16 = 0 } else { - err = binary.Read(buf_item, binary.LittleEndian, &size) + err = binary.Read(item, binary.LittleEndian, &size) } for err == nil && offset_pre < len(wb.sst) { - var str string if size > 0 { - str, err = wb.get_string(buf_item, size) + str, err = wb.parseString(item, size, "sst continue") wb.sst[offset_pre] = wb.sst[offset_pre] + str } @@ -106,23 +107,23 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int } offset_pre++ - err = binary.Read(buf_item, binary.LittleEndian, &size) + err = binary.Read(item, binary.LittleEndian, &size) } } offset = offset_pre after = pre after_using = b - case 0xfc: // SST + case XLS_Type_SST: info := new(SstInfo) - binary.Read(buf_item, binary.LittleEndian, info) + binary.Read(item, binary.LittleEndian, info) wb.sst = make([]string, info.Count) + var err error + var str string var size uint16 var i = 0 for ; i < int(info.Count); i++ { - var err error - if err = binary.Read(buf_item, binary.LittleEndian, &size); err == nil { - var str string - str, err = wb.get_string(buf_item, size) + if err = binary.Read(item, binary.LittleEndian, &size); err == nil { + str, err = wb.parseString(item, size, "sst") wb.sst[i] = wb.sst[i] + str } @@ -131,37 +132,86 @@ func (wb *WorkBook) parseBof(buf io.ReadSeeker, b *bof, pre *bof, offset_pre int } } offset = i - case 0x85: // bOUNDSHEET + case XLS_Type_SHEET: var bs = new(boundsheet) - binary.Read(buf_item, binary.LittleEndian, bs) + binary.Read(item, binary.LittleEndian, bs) // different for BIFF5 and BIFF8 - wb.addSheet(bs, buf_item) - case 0x0e0: // XF + wb.addSheet(bs, item) + case XLS_Type_EXTERNSHEET: + if !wb.Is5ver { + binary.Read(item, binary.LittleEndian, &wb.ref.Num) + wb.ref.Info = make([]ExtSheetInfo, wb.ref.Num) + binary.Read(item, binary.LittleEndian, &wb.ref.Info) + } + case XLS_Type_XF: if wb.Is5ver { xf := new(Xf5) - binary.Read(buf_item, binary.LittleEndian, xf) + binary.Read(item, binary.LittleEndian, xf) wb.addXf(xf) } else { xf := new(Xf8) - binary.Read(buf_item, binary.LittleEndian, xf) + binary.Read(item, binary.LittleEndian, xf) wb.addXf(xf) } - case 0x031: // FONT + case XLS_Type_FONT: f := new(FontInfo) - binary.Read(buf_item, binary.LittleEndian, f) - wb.addFont(f, buf_item) - case 0x41E: //FORMAT - font := new(Format) - binary.Read(buf_item, binary.LittleEndian, &font.Head) - font.str, _ = wb.get_string(buf_item, font.Head.Size) - wb.addFormat(font) - case 0x22: //DATEMODE - binary.Read(buf_item, binary.LittleEndian, &wb.dateMode) + binary.Read(item, binary.LittleEndian, f) + wb.addFont(f, item) + case XLS_Type_FORMAT: + format := new(Format) + binary.Read(item, binary.LittleEndian, &format.Head) + if raw, err := wb.parseString(item, format.Head.Size, "format"); nil == err && "" != raw { + format.Raw = strings.Split(raw, ";") + } else { + format.Raw = []string{} + } + + wb.addFormat(format) + case XLS_Type_DATEMODE: + binary.Read(item, binary.LittleEndian, &wb.dateMode) } return } -func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err error) { +func (w *WorkBook) addXf(xf XF) { + w.Xfs = append(w.Xfs, xf) +} + +func (w *WorkBook) addFont(font *FontInfo, buf io.ReadSeeker) { + name, _ := w.parseString(buf, uint16(font.NameB), "font") + w.Fonts = append(w.Fonts, Font{Info: font, Name: name}) +} + +func (w *WorkBook) addFormat(format *Format) { + w.Formats[format.Head.Index] = format +} + +func (w *WorkBook) addSheet(sheet *boundsheet, buf io.ReadSeeker) { + name, _ := w.parseString(buf, uint16(sheet.Name), "sheet") + w.sheets = append(w.sheets, &WorkSheet{id: len(w.sheets), bs: sheet, Name: name, wb: w}) +} + +// prepare process workbook struct +func (w *WorkBook) prepare() { + for k, v := range builtInNumFmt { + if _, ok := w.Formats[k]; !ok { + w.Formats[k] = &Format{ + Raw: strings.Split(v, ";"), + } + } + } + for _, v := range w.Formats { + v.Prepare(w) + } +} + +//reading a sheet from the compress file to memory, you should call this before you try to get anything from sheet +func (w *WorkBook) prepareSheet(sheet *WorkSheet) { + w.rs.Seek(int64(sheet.bs.Filepos), 0) + sheet.parse(w.rs) +} + +func (w *WorkBook) parseString(buf io.ReadSeeker, size uint16, from string) (res string, err error) { if w.Is5ver { var bts = make([]byte, size) _, err = buf.Read(bts) @@ -171,60 +221,76 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e var phonetic_size = uint32(0) var flag byte err = binary.Read(buf, binary.LittleEndian, &flag) + + // Rich-Text settings (richtext), 0 = Does not contain Rich-Text settings, 1 = Contains Rich-Text settings if flag&0x8 != 0 { err = binary.Read(buf, binary.LittleEndian, &richtext_num) } else if w.continue_rich > 0 { richtext_num = w.continue_rich w.continue_rich = 0 } + + // Asian phonetic settings, 0 = Does not contain Asian phonetic settings, 1 = Contains Asian phonetic settings if flag&0x4 != 0 { err = binary.Read(buf, binary.LittleEndian, &phonetic_size) } else if w.continue_apsb > 0 { phonetic_size = w.continue_apsb w.continue_apsb = 0 } + + // Character compression, 0 = Compressed (8-bit characters), 1 = Uncompressed (16-bit characters) if flag&0x1 != 0 { var bts = make([]uint16, size) var i = uint16(0) for ; i < size && err == nil; i++ { err = binary.Read(buf, binary.LittleEndian, &bts[i]) } - runes := utf16.Decode(bts[:i]) - res = string(runes) + if i < size { w.continue_utf16 = size - i + 1 + } else if i == size && err == io.EOF { + w.continue_utf16 = 1 } + + if i > 1 && 0 == bts[i-1] { + i-- + } + + res = string(utf16.Decode(bts[:i])) } else { - var bts = make([]byte, size) var n int + var bts = make([]byte, size) n, err = buf.Read(bts) if uint16(n) < size { w.continue_utf16 = size - uint16(n) err = io.EOF } + if n > 1 && 0 == bts[n-1] { + n-- + } + var bts1 = make([]uint16, n) for k, v := range bts[:n] { bts1[k] = uint16(v) } - runes := utf16.Decode(bts1) - res = string(runes) + + res = string(utf16.Decode(bts1)) } + if richtext_num > 0 { var bts []byte - var seek_size int64 + var ss int64 if w.Is5ver { - seek_size = int64(2 * richtext_num) + ss = int64(2 * richtext_num) } else { - seek_size = int64(4 * richtext_num) + ss = int64(4 * richtext_num) } - bts = make([]byte, seek_size) + bts = make([]byte, ss) err = binary.Read(buf, binary.LittleEndian, bts) if err == io.EOF { w.continue_rich = richtext_num } - - // err = binary.Read(buf, binary.LittleEndian, bts) } if phonetic_size > 0 { var bts []byte @@ -238,18 +304,20 @@ func (w *WorkBook) get_string(buf io.ReadSeeker, size uint16) (res string, err e return } -func (w *WorkBook) addSheet(sheet *boundsheet, buf io.ReadSeeker) { - name, _ := w.get_string(buf, uint16(sheet.Name)) - w.sheets = append(w.sheets, &WorkSheet{bs: sheet, Name: name, wb: w}) -} +// Format format value to string +func (w *WorkBook) Format(xf uint16, v float64) (string, bool) { + var val string + var idx = int(xf) + if len(w.Xfs) > idx { + if formatter := w.Formats[w.Xfs[idx].FormatNo()]; nil != formatter { + return formatter.String(v), true + } + } -//reading a sheet from the compress file to memory, you should call this before you try to get anything from sheet -func (w *WorkBook) prepareSheet(sheet *WorkSheet) { - w.rs.Seek(int64(sheet.bs.Filepos), 0) - sheet.parse(w.rs) + return val, false } -//Get one sheet by its number +//GetSheet get one sheet by its number func (w *WorkBook) GetSheet(num int) *WorkSheet { if num < len(w.sheets) { s := w.sheets[num] @@ -257,19 +325,18 @@ func (w *WorkBook) GetSheet(num int) *WorkSheet { w.prepareSheet(s) } return s - } else { - return nil } + return nil } -//Get the number of all sheets, look into example +//NumSheets Get the number of all sheets, look into example func (w *WorkBook) NumSheets() int { return len(w.sheets) } -//helper function to read all cells from file +//ReadAllCells helper function to read all cells from file //Notice: the max value is the limit of the max capacity of lines. -//Warning: the helper function will need big memeory if file is large. +//Warning: the helper function will need big memory if file is large. func (w *WorkBook) ReadAllCells(max int) (res [][]string) { res = make([][]string, 0) for _, sheet := range w.sheets { @@ -277,11 +344,11 @@ func (w *WorkBook) ReadAllCells(max int) (res [][]string) { max = max - len(res) w.prepareSheet(sheet) if sheet.MaxRow != 0 { - leng := int(sheet.MaxRow) + 1 - if max < leng { - leng = max + length := int(sheet.MaxRow) + 1 + if max < length { + length = max } - temp := make([][]string, leng) + temp := make([][]string, length) for k, row := range sheet.rows { data := make([]string, 0) if len(row.cols) > 0 { @@ -295,7 +362,7 @@ func (w *WorkBook) ReadAllCells(max int) (res [][]string) { data[col.FirstCol()+i] = str[i] } } - if leng > int(k) { + if length > int(k) { temp[k] = data } } diff --git a/worksheet.go b/worksheet.go index 9bf065c..cf4a88e 100644 --- a/worksheet.go +++ b/worksheet.go @@ -2,29 +2,43 @@ package xls import ( "encoding/binary" - "fmt" "io" "unicode/utf16" ) type boundsheet struct { Filepos uint32 - Type byte Visible byte + Type byte Name byte } +type extSheetRef struct { + Num uint16 + Info []ExtSheetInfo +} + +// ExtSheetInfo external sheet references provided for named cells +type ExtSheetInfo struct { + ExternalBookIndex uint16 + FirstSheetIndex uint16 + LastSheetIndex uint16 +} + //WorkSheet in one WorkBook type WorkSheet struct { - bs *boundsheet - wb *WorkBook - Name string - rows map[uint16]*Row - //NOTICE: this is the max row number of the sheet, so it should be count -1 - MaxRow uint16 - parsed bool + bs *boundsheet + wb *WorkBook + Name string + Selected bool + rows map[uint16]*Row + MaxRow uint16 + id int + parsed bool + rightToLeft bool } +// Row return row data by number func (w *WorkSheet) Row(i int) *Row { row := w.rows[uint16(i)] if row != nil { @@ -33,18 +47,26 @@ func (w *WorkSheet) Row(i int) *Row { return row } +// GetSheetVisible provides a function to get worksheet visible +func (w *WorkSheet) GetSheetVisible() bool { + if 0 == w.bs.Visible { + return true + } + + return false +} + func (w *WorkSheet) parse(buf io.ReadSeeker) { w.rows = make(map[uint16]*Row) b := new(bof) - var bof_pre *bof + var bp *bof for { if err := binary.Read(buf, binary.LittleEndian, b); err == nil { - bof_pre = w.parseBof(buf, b, bof_pre) + bp = w.parseBof(buf, b, bp) if b.Id == 0xa { break } } else { - fmt.Println(err) break } } @@ -56,6 +78,14 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof) *bof { switch b.Id { // case 0x0E5: //MERGEDCELLS // ws.mergedCells(buf) + case XLS_Type_WINDOW2: // WINDOW2 + var sheetOptions, firstVisibleRow, firstVisibleColumn uint16 + binary.Read(buf, binary.LittleEndian, &sheetOptions) + binary.Read(buf, binary.LittleEndian, &firstVisibleRow) // not valuable + binary.Read(buf, binary.LittleEndian, &firstVisibleColumn) // not valuable + //buf.Seek(int64(b.Size)-2*3, 1) + w.rightToLeft = (sheetOptions & 0x40) != 0 + w.Selected = (sheetOptions & 0x400) != 0 case 0x208: //ROW r := new(rowInfo) binary.Read(buf, binary.LittleEndian, r) @@ -63,21 +93,17 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof) *bof { case 0x0BD: //MULRK mc := new(MulrkCol) size := (b.Size - 6) / 6 - binary.Read(buf, binary.LittleEndian, &mc.Col) mc.Xfrks = make([]XfRk, size) - for i := uint16(0); i < size; i++ { - binary.Read(buf, binary.LittleEndian, &mc.Xfrks[i]) - } + binary.Read(buf, binary.LittleEndian, &mc.Col) + binary.Read(buf, binary.LittleEndian, &mc.Xfrks) binary.Read(buf, binary.LittleEndian, &mc.LastColB) col = mc case 0x0BE: //MULBLANK mc := new(MulBlankCol) size := (b.Size - 6) / 2 - binary.Read(buf, binary.LittleEndian, &mc.Col) mc.Xfs = make([]uint16, size) - for i := uint16(0); i < size; i++ { - binary.Read(buf, binary.LittleEndian, &mc.Xfs[i]) - } + binary.Read(buf, binary.LittleEndian, &mc.Col) + binary.Read(buf, binary.LittleEndian, &mc.Xfs) binary.Read(buf, binary.LittleEndian, &mc.LastColB) col = mc case 0x203: //NUMBER @@ -85,33 +111,49 @@ func (w *WorkSheet) parseBof(buf io.ReadSeeker, b *bof, pre *bof) *bof { binary.Read(buf, binary.LittleEndian, col) case 0x06: //FORMULA c := new(FormulaCol) - binary.Read(buf, binary.LittleEndian, &c.Header) + c.ws = w.id + c.Header = new(FormulaColHeader) c.Bts = make([]byte, b.Size-20) + binary.Read(buf, binary.LittleEndian, c.Header) binary.Read(buf, binary.LittleEndian, &c.Bts) col = c + c.parse(w.wb, false) + + if TYPE_STRING == c.vType { + binary.Read(buf, binary.LittleEndian, &c.Code) + binary.Read(buf, binary.LittleEndian, &c.Btl) + binary.Read(buf, binary.LittleEndian, &c.Btc) + + var fms, fme = w.wb.parseString(buf, c.Btc, "formula") + if nil == fme { + c.value = fms + } + + buf.Seek(-int64(c.Btl+4), 1) + } case 0x27e: //RK col = new(RkCol) binary.Read(buf, binary.LittleEndian, col) case 0xFD: //LABELSST col = new(LabelsstCol) binary.Read(buf, binary.LittleEndian, col) - case 0x204: + case 0x204: // LABEL c := new(labelCol) binary.Read(buf, binary.LittleEndian, &c.BlankCol) var count uint16 binary.Read(buf, binary.LittleEndian, &count) - c.Str, _ = w.wb.get_string(buf, count) + c.Str, _ = w.wb.parseString(buf, count, "label") col = c case 0x201: //BLANK col = new(BlankCol) binary.Read(buf, binary.LittleEndian, col) case 0x1b8: //HYPERLINK + var flag uint32 + var count uint32 var hy HyperLink binary.Read(buf, binary.LittleEndian, &hy.CellRange) buf.Seek(20, 1) - var flag uint32 binary.Read(buf, binary.LittleEndian, &flag) - var count uint32 if flag&0x14 != 0 { binary.Read(buf, binary.LittleEndian, &count) @@ -172,7 +214,6 @@ func (w *WorkSheet) add(content interface{}) { w.addCell(col, ch) } } - } func (w *WorkSheet) addCell(col Coler, ch contentHandler) { @@ -180,33 +221,36 @@ func (w *WorkSheet) addCell(col Coler, ch contentHandler) { } func (w *WorkSheet) addRange(rang Ranger, ch contentHandler) { - for i := rang.FirstRow(); i <= rang.LastRow(); i++ { w.addContent(i, ch) } } -func (w *WorkSheet) addContent(row_num uint16, ch contentHandler) { +func (w *WorkSheet) addContent(num uint16, ch contentHandler) { var row *Row var ok bool - if row, ok = w.rows[row_num]; !ok { + if row, ok = w.rows[num]; !ok { info := new(rowInfo) - info.Index = row_num + info.Index = num row = w.addRow(info) } row.cols[ch.FirstCol()] = ch } -func (w *WorkSheet) addRow(info *rowInfo) (row *Row) { +func (w *WorkSheet) addRow(info *rowInfo) *Row { + var ok bool + var row *Row + if info.Index > w.MaxRow { w.MaxRow = info.Index } - var ok bool + if row, ok = w.rows[info.Index]; ok { row.info = info } else { - row = &Row{info: info, cols: make(map[uint16]contentHandler)} + row = &Row{info: info, cols: make(map[uint16]contentHandler, int(info.Last-info.First))} w.rows[info.Index] = row } - return + + return row } diff --git a/xf.go b/xf.go index 8f4dd1e..240f964 100644 --- a/xf.go +++ b/xf.go @@ -1,5 +1,9 @@ package xls +type XF interface { + FormatNo() uint16 +} + type Xf5 struct { Font uint16 Format uint16 @@ -11,7 +15,7 @@ type Xf5 struct { Linestyle uint16 } -func (x *Xf5) formatNo() uint16 { +func (x *Xf5) FormatNo() uint16 { return x.Format } @@ -28,10 +32,6 @@ type Xf8 struct { Groundcolor uint16 } -func (x *Xf8) formatNo() uint16 { +func (x *Xf8) FormatNo() uint16 { return x.Format } - -type st_xf_data interface { - formatNo() uint16 -} diff --git a/xls.go b/xls.go index 7979cb9..1c60126 100644 --- a/xls.go +++ b/xls.go @@ -1,8 +1,11 @@ package xls import ( + "bytes" "io" + "io/ioutil" "os" + "strings" "github.com/extrame/ole2" ) @@ -16,7 +19,16 @@ func Open(file string, charset string) (*WorkBook, error) { } } -//Open one xls file and return the closer +//OpenWithBuffer open one xls file with memory buffer +func OpenWithBuffer(file string, charset string) (*WorkBook, error) { + if fi, err := ioutil.ReadFile(file); err == nil { + return OpenReader(bytes.NewReader(fi), charset) + } else { + return nil, err + } +} + +//OpenWithCloser open one xls file and return the closer func OpenWithCloser(file string, charset string) (*WorkBook, io.Closer, error) { if fi, err := os.Open(file); err == nil { wb, err := OpenReader(fi, charset) @@ -26,7 +38,7 @@ func OpenWithCloser(file string, charset string) (*WorkBook, io.Closer, error) { } } -//Open xls file from reader +//OpenReader open xls file from reader func OpenReader(reader io.ReadSeeker, charset string) (wb *WorkBook, err error) { var ole *ole2.Ole if ole, err = ole2.Open(reader, charset); err == nil { @@ -35,16 +47,16 @@ func OpenReader(reader io.ReadSeeker, charset string) (wb *WorkBook, err error) var book *ole2.File var root *ole2.File for _, file := range dir { - name := file.Name() - if name == "Workbook" { + name := strings.ToLower(file.Name()) + if name == "workbook" { book = file // break } - if name == "Book" { + if name == "book" { book = file // break } - if name == "Root Entry" { + if name == "root entry" { root = file } }