使用Perl或Ruby提取Excel单元格注释的最佳方法是什么?

我一直在使用Spreadhsheet :: ParseExcel成功parsingPerl文档(正如在Perl中parsingExcel文件的最佳方法是什么? ),但是我不知道如何提取单元格注释。

有任何想法吗? 在Perl或Ruby中的解决scheme将是理想的。

一个select是使用Ruby的win32ole库。

以下(稍微详细)示例连接到打开的Excel工作表,并从单元格B2获取注释文本。

require 'win32ole' xl = WIN32OLE.connect('Excel.Application') ws = xl.ActiveSheet cell = ws.Range('B2') comment = cell.Comment text = comment.Text 

更多信息和使用Ruby的win32ole库来自动化Excel的例子可以在这里find:

http://rubyonwindows.blogspot.com/search/label/excel

Python xlrd库将parsing单元格注释(如果打开xlrd.sheet.OBJ_MSO_DEBUG ,您将看到它们),但是它不会从API公开它们。 你可以parsing转储或者对它进行一些破解,这样你就可以通过编程的方式获得它们。 这是一个开始(testing极其微小):

 diff --git a/xlrd/sheet.py b/xlrd/sheet.py --- a/xlrd/sheet.py +++ b/xlrd/sheet.py @@ -206,6 +206,7 @@ self._dimncols = 0 self._cell_values = [] self._cell_types = [] + self._cell_notes = [] self._cell_xf_indexes = [] self._need_fix_ragged_rows = 0 self.defcolwidth = None @@ -252,6 +253,7 @@ return Cell( self._cell_types[rowx][colx], self._cell_values[rowx][colx], + self._cell_notes[rowx][colx], xfx, ) @@ -422,12 +424,14 @@ if self.formatting_info: self._cell_xf_indexes[nrx].extend(aa('h', [-1]) * nextra) self._cell_values[nrx].extend([''] * nextra) + self._cell_notes[nrx].extend([None] * nextra) if nc > self.ncols: self.ncols = nc self._need_fix_ragged_rows = 1 if nr > self.nrows: scta = self._cell_types.append scva = self._cell_values.append + scna = self._cell_notes.append scxa = self._cell_xf_indexes.append fmt_info = self.formatting_info xce = XL_CELL_EMPTY @@ -436,6 +440,7 @@ for _unused in xrange(self.nrows, nr): scta([xce] * nc) scva([''] * nc) + scna([None] * nc) if fmt_info: scxa([-1] * nc) else: @@ -443,6 +448,7 @@ for _unused in xrange(self.nrows, nr): scta(aa('B', [xce]) * nc) scva([''] * nc) + scna([None] * nc) if fmt_info: scxa(aa('h', [-1]) * nc) self.nrows = nr @@ -454,6 +460,7 @@ aa = array_array s_cell_types = self._cell_types s_cell_values = self._cell_values + s_cell_notes = self._cell_notes s_cell_xf_indexes = self._cell_xf_indexes s_dont_use_array = self.dont_use_array s_fmt_info = self.formatting_info @@ -465,6 +472,7 @@ nextra = ncols - rlen if nextra > 0: s_cell_values[rowx][rlen:] = [''] * nextra + s_cell_notes[rowx][rlen:] = [None] * nextra if s_dont_use_array: trow[rlen:] = [xce] * nextra if s_fmt_info: @@ -600,6 +608,7 @@ bk_get_record_parts = bk.get_record_parts bv = self.biff_version fmt_info = self.formatting_info + txos = {} eof_found = 0 while 1: # if DEBUG: print "SHEET.READ: about to read from position %d" % bk._position @@ -877,13 +886,23 @@ break elif rc == XL_OBJ: # handle SHEET-level objects; note there's a separate Book.handle_obj - self.handle_obj(data) + obj = self.handle_obj(data) + if obj: + obj_id = obj.id + else: + obj_id = None elif rc == XL_MSO_DRAWING: self.handle_msodrawingetc(rc, data_len, data) elif rc == XL_TXO: - self.handle_txo(data) + txo = self.handle_txo(data) + if txo and obj_id: + txos[obj_id] = txo + obj_id = None elif rc == XL_NOTE: - self.handle_note(data) + note = self.handle_note(data) + txo = txos.get(note.object_id) + if txo: + self._cell_notes[note.rowx][note.colx] = txo.text elif rc == XL_FEAT11: self.handle_feat11(data) elif rc in bofcodes: ##### EMBEDDED BOF ##### @@ -1387,19 +1406,16 @@ def handle_obj(self, data): - if not OBJ_MSO_DEBUG: - return - DEBUG = 1 if self.biff_version < 80: return o = MSObj() data_len = len(data) pos = 0 - if DEBUG: + if OBJ_MSO_DEBUG: fprintf(self.logfile, "... OBJ record ...\n") while pos < data_len: ft, cb = unpack('<HH', data[pos:pos+4]) - if DEBUG: + if OBJ_MSO_DEBUG: hex_char_dump(data, pos, cb, base=0, fout=self.logfile) if ft == 0x15: # ftCmo ... s/b first assert pos == 0 @@ -1430,16 +1446,14 @@ else: # didn't break out of while loop assert pos == data_len - if DEBUG: + if OBJ_MSO_DEBUG: o.dump(self.logfile, header="=== MSOBj ===", footer= " ") + return o def handle_note(self, data): - if not OBJ_MSO_DEBUG: - return - DEBUG = 1 if self.biff_version < 80: return - if DEBUG: + if OBJ_MSO_DEBUG: fprintf(self.logfile, '... NOTE record ...\n') hex_char_dump(data, 0, len(data), base=0, fout=self.logfile) o = MSNote() @@ -1453,13 +1467,11 @@ o.original_author, endpos = unpack_unicode_update_pos(data, 8, lenlen=2) assert endpos == data_len - 1 o.last_byte = data[-1] - if DEBUG: + if OBJ_MSO_DEBUG: o.dump(self.logfile, header="=== MSNote ===", footer= " ") + return o def handle_txo(self, data): - if not OBJ_MSO_DEBUG: - return - DEBUG = 1 if self.biff_version < 80: return o = MSTxo() @@ -1477,8 +1489,9 @@ rc3, data3_len, data3 = self.book.get_record_parts() assert rc3 == XL_CONTINUE # ignore the formatting runs for the moment - if DEBUG: + if OBJ_MSO_DEBUG: o.dump(self.logfile, header="=== MSTxo ===", footer= " ") + return o def handle_feat11(self, data): if not OBJ_MSO_DEBUG: @@ -1638,11 +1651,12 @@ class Cell(BaseObject): - __slots__ = ['ctype', 'value', 'xf_index'] + __slots__ = ['ctype', 'value', 'note', 'xf_index'] - def __init__(self, ctype, value, xf_index=None): + def __init__(self, ctype, value, note=None, xf_index=None): self.ctype = ctype self.value = value + self.note = note self.xf_index = xf_index def __repr__(self): 

那么你可以写一些像:

 import xlrd xlrd.sheet.OBJ_MSO_DEBUG = True xls = xlrd.open_workbook('foo.xls') for sheet in xls.sheets(): print 'sheet %s (%dx %d)' % (sheet.name, sheet.nrows, sheet.ncols) for rownum in xrange(sheet.nrows): for cell in sheet.row(rownum): print cell, cell.note