改进Python代码更新谷歌电子表格

首先,我是python的新手(实际上我只从Sololearn那里学到,也只能学到一半的课程)。 所以我请你给我一点详细的答案。 我的任务有以下几个步骤:

  1. 删除旧的.xlsx文件(如果有的话)

  2. 使用win32将两个.xls文件转换成.xlsx文件,删除第一行,然后删除.xls文件[已经下载到源代码目录xlrd的xls文件,pyexcel在打开.xls文件中显示错误(不受支持的格式或损坏)在线分析文件预测为html / htm)]

  3. 从xlsx文件获取数据

  4. 首先,删除Google电子表格上的旧工作表以删除旧数据。 创build一个具有相同名称的新工作表。 将数据插入到Google电子表格的新工作表中。

  5. 打开第二张表格(从第一张表格导入数据)并更新“虚拟表格”中的一个单元格,以确保Google电子表格在后台同步。

现在,我通过结合许多代码和使用大量的谷歌来编写代码。 代码工作正常,但需要平均约65秒来完成整个过程。 我的问题有3个部分:

  1. 有没有办法直接从.xls文件中访问数据?
  2. 有什么办法可以改善这个代码的性能。
  3. 任何其他更有效的方法来完成上述任务?

我的代码: –

import time import win32com.client as win32 import os import openpyxl from openpyxl.utils import get_column_letter import gspread from oauth2client.service_account import ServiceAccountCredentials start = time.time() # set input-output file locations source_dir = "C:\\Users\\XYZ\\Downloads" output_dir = "C:\\Users\\XYZ\\Excels" # use creds to create a client to interact with the Google Drive API # make sure to share files with email contained in json file scope = ['https://spreadsheets.google.com/feeds'] # code will not work without json file creds = ServiceAccountCredentials.from_json_keyfile_name("C:\\Users\\XYZ\\your.json", scope) gc = gspread.authorize(creds) # following code is to open any spreadsheet by name sh = gc.open("First Sheet") def save_as_xlsx(input_file,output_dir,output_file_name) : # call excel using win32, then open .xls file # delete first row and then save as .xlsx excel = win32.gencache.EnsureDispatch('Excel.Application') wb = excel.Workbooks.Open(input_file) wbk = excel.ActiveWorkbook sheet = wbk.Sheets(1) sheet.Rows(1).Delete() wb.SaveAs(output_dir + '\\' + output_file_name, FileFormat = 51) #FileFormat = 51 is for .xlsx extension. FileFormat = 56 is for .xls extension wb.Close() excel.Application.Quit() return True def get_the_data_from_xlsx(output_dir,output_file_name) : # use openpyxl.load to find out last cell of file # store cell values in list called data wb = openpyxl.load_workbook(output_dir + '\\' + output_file_name) sheet = wb.active max_row_no = sheet.max_row max_column_no = sheet.max_column max_column = get_column_letter(max_column_no) last_cell = str(max_column) + str(max_row_no) cell_addresses = sheet['A1' : last_cell] data = [] for i in cell_addresses : for e in i : data.append(e.value) return (data,last_cell) def insert_data_into_spreadsheet(name_of_worksheet,data,last_cell) : # Find a workbook by name in already opened spreadsheet # delete the worksheet to clear old data # create worksheet with same name to maintain import connections in sheets. worksheet = sh.worksheet(name_of_worksheet) sh.del_worksheet(worksheet) worksheet = sh.add_worksheet(title=name_of_worksheet, rows="500", cols="30") # store range of cells for spreadsheet in list named cell_list cell_list = worksheet.range('A1' + ':' + str(last_cell)) # attach all the values from data list as per the cell_list a = 0 for cell in cell_list : cell.value = data[a] a = a + 1 # update all cells stored in cell_list in one go worksheet.update_cells(cell_list) def delete_file(directory,file_initials) : for filename in os.listdir(directory) : if filename.startswith(file_initials) : os.unlink(directory +"\\" + filename) # check if files are in source_dir for filename in os.listdir(source_dir) : # check for file1.xls and set input_file name if any file exists. if filename.startswith("file1"): input_file = source_dir + "\\file1.xls" output_file1 = "output_file1.xlsx" # detect and delete any old file in output directory delete_file(output_dir,"output_file1") if save_as_xlsx(input_file,output_dir,output_file1) == True : # delete the file from source directory after work is done delete_file(source_dir,'file1') # get data from new xlsx file data_from_xlsx = get_the_data_from_xlsx(output_dir,output_file1) data_to_spreadsheet = data_from_xlsx[0] last_cell = data_from_xlsx[1] # insert updated data into spreadsheet insert_data_into_spreadsheet("file1_data",data_to_spreadsheet,last_cell) # repeat the same process for 2nd file if filename.startswith('file2'): input_file = source_dir + "\\file2.xls" output_file2 = "output_file2.xlsx" delete_file(output_dir,"output_file2") if save_as_xlsx(input_file,output_dir,output_file2) == True : delete_file(source_dir,'file2') data_from_xlsx = get_the_data_from_xlsx(output_dir,output_file2) data_to_spreadsheet = data_from_xlsx[0] last_cell = data_from_xlsx[1] insert_data_into_spreadsheet("file2_data",data_to_spreadsheet,last_cell) # open spreadsheet by name and open Dummy worksheet # update one cell to sync the sheet with other sheets sh = gc.open("second sheet") worksheet = sh.worksheet("Dummy") worksheet.update_acell('B1', '=Today()') end = time.time() print(end-start)