
我试图编写处理HTML网页的代码,并创build一个Excel表格。 有抛出的错误。 请让我知道是否有人可以帮忙。 我跑了很多代码被评论。 我也想包括这一点。

book = xlwt.Workbook(encoding="utf-8") sheet1 = book.add_sheet("Product List") i=0 for row in soup.findAll('li', { "class" : "product-link" }): link = row.a['href'] #print(link) #print(link[23:]) conn = http.client.HTTPSConnection("") conn.request("GET", link[23:]) req = conn.getresponse() #print(req.status, req.reason) data2 = soup2=BeautifulSoup(data2) Name=soup2.find('title') Name_text = Name.text words = Name_text.split('|') Name = words[0] print(Name) if len(soup2.find_all("div", {"id":"productDescription"})) > 0: Desc = soup2.find('div', {"id":"productDescription"}) ProdDesc = Desc.p #print(ProdDesc) if ProdDesc != None: Desc == ProdDesc.text if len(soup2.find_all("div", {"id":"productBenefits"})) > 0: Feat = soup2.find('div',{"id":"productBenefits"}) ProdFeat = Feat.ul #print(ProdFeat) if ProdFeat != None: Feat == ProdFeat.text if len(soup2.find_all("table", {"class":"mceItemTable"})) > 0: Spec = soup2.find('table',{"class":"mceItemTable"}) #print(Spec) if Spec != None: specrow = '' for row in Spec.findAll('tr'): specrow = specrow + ',' + row.text if specrow != None: Spec = specrow[1:] words = Spec.partition(",")[2] record = (Name,Desc.text,Feat.text[20:],words) print(record) for col_index, item in enumerate(record): sheet1.write(i, col_index, item) i += 1"Sanjamar1.xls") ''' if len(soup2.find_all("table", {"class":"variations"})) > 0: options = soup2.find('table',{"class":"variations"}) Prodoptions = print('options') print(Prodoptions) if len(soup2.find_all("div", {"id":"availableColorsWrapper"})) > 0: options = soup2.find('div',{"id":"availableColorsWrapper"}) ProdColors = options.ul print('Colors') print(ProdColors) if len(soup2.find_all("a", {"class":"fancybox-media"})) > 0: options = soup2.find('a',{"class":"fancybox-media"}) ProdVideos = options['href'] print('Videos') print(ProdVideos) ''' ''' j = 0 if len(soup2.find_all("a", {"class":"lit-link"})) > 0: for row1 in soup2.findAll('a', {"class":"lit-link"}): file_download1 = row1['href'] words = file_download1.split('/') print (words[-1]) if words[-1][-3:]=="pdf": print (file_download1) if file_download1 != None: if file_download1 != '': try: resource = urllib.request.urlopen(file_download1) file_name1 = words[-1] output = open(file_name1,"wb") output.write( output.close() except urllib.request.HTTPError: print('Nthn') j += 1 ''' #i += 1 


 TypeError Traceback (most recent call last) <ipython-input-17-468fc8825863> in <module>() 63 i += 1 64 ---> 65'Sanjamar.xls') 66 67 ''' C:\Users\Santosh\Anaconda3\lib\site-packages\xlwt\ in save(self, filename_or_stream) 708 709 doc = CompoundDoc.XlsDoc() --> 710, self.get_biff_data()) 711 712 C:\Users\Santosh\Anaconda3\lib\site-packages\xlwt\ in get_biff_data(self) 672 all_links = self.__all_links_rec() 673 --> 674 shared_str_table = self.__sst_rec() 675 after = country + all_links + shared_str_table 676 C:\Users\Santosh\Anaconda3\lib\site-packages\xlwt\ in _ _sst_rec(self) 634 635 def __sst_rec(self): --> 636 return self.__sst.get_biff_record() 637 638 def __ext_sst_rec(self, abs_stream_pos): C:\Users\Santosh\Anaconda3\lib\site-packages\xlwt\ in get_biff_record(self) 77 self._add_to_sst(s) 78 else: ---> 79 self._add_rt_to_sst(s) 80 del data 81 self._new_piece() C:\Users\Santosh\Anaconda3\lib\site-packages\xlwt\ in _add_rt_to_sst(self, rt) 106 107 def _add_rt_to_sst(self, rt): --> 108 rt_str, rt_fr = upack2rt(rt, self.encoding) 109 is_unicode_str = rt_str[2] == b'\x09'[0] 110 if is_unicode_str: C:\Users\Santosh\Anaconda3\lib\site-packages\xlwt\ in upack2rt(rt, encoding) 84 # code in ensures that 85 # fontx can be None only for the first piece ---> 86 fr += pack('<HH', offset, fontx) 87 # offset is the number of MS C wchar characters. 88 # That is 1 if c <= u'\uFFFF' else 2 TypeError: must be str, not bytes 

Interesting Posts