我无法弄清楚为什么我得到一个空白的输出文件

import csv import requests import re from bs4 import BeautifulSoup import sys reload(sys) sys.setdefaultencoding('utf8') #CREATE CSV FILE outfile = open("./output.csv", "wb") writer = csv.writer(outfile) #IMPORT MATCHES import csv with open('matches.csv', 'rb') as f: reader = csv.reader(f) matches = list(reader) for id in matches: id = str(id) id = re.sub("[^0-9]","",id) url = 'http://www.virtualpronetwork.com/apps/fvpaa/matches/match_report/' + id print (url) response = requests.get(url) html = response.content soup = BeautifulSoup(html) #GET TEAMS AND SCORES score = soup.findAll("div",{"class":"col-md-5 center"}) team_home = score[0] team_home = str(team_home) team_home = re.search('title="(.*)" />',team_home) team_home = team_home.group(1) team_away = score[1] team_away = str(team_away) team_away = re.search('title="(.*)" />',team_away) team_away = team_away.group(1) goals_home = score[2] goals_home = str(goals_home) goals_home = re.sub('</h2></div>','',goals_home) goals_home = re.sub('<div class="col-md-5 center"><h2>','',goals_home) goals_away = score[3] goals_away = str(goals_away) goals_away = re.sub('</h2></div>','',goals_away) goals_away = re.sub('<div class="col-md-5 center"><h2>','',goals_away) #GET HOME STATS tables = soup.findChildren('table') stats_home = tables[0] list_of_rows_home = [] for row in stats_home.findChildren('tr')[1:]: list_of_cells = [] for cell in row.findChildren('td')[0]: text = cell.text list_of_cells.append(text) for cell in row.findChildren('td')[1]: text = cell.text list_of_cells.append(text) for cell in row.findChildren('td')[2:]: list_of_cells.append(cell) list_of_rows_home.append(list_of_cells) for i in range(len(list_of_rows_home)): row = list_of_rows_home[i] cell = list_of_rows_home[i][2] cell = str(cell) goal = re.findall('goal',cell) goal = goal.count('goal') goal = goal / 2 assist = re.findall('assist',cell) assist = assist.count('assist') assist = assist / 2 motm = re.findall('motm',cell) motm = motm.count('motm') row.append(goal) row.append(assist) row.append(motm) for row in list_of_rows_home: del row[2] for i in range(len(list_of_rows_home)): row = list_of_rows_home[i] row.append(team_home) row.append(goals_home) row.append(team_away) row.append(goals_away) #GET AWAY STATS stats_away = tables[1] list_of_rows_away = [] for row in stats_away.findChildren('tr')[1:]: list_of_cells = [] for cell in row.findChildren('td')[0]: text = cell.text list_of_cells.append(text) for cell in row.findChildren('td')[1]: text = cell.text list_of_cells.append(text) for cell in row.findChildren('td')[2:]: list_of_cells.append(cell) list_of_rows_away.append(list_of_cells) for i in range(len(list_of_rows_away)): row = list_of_rows_away[i] cell = list_of_rows_away[i][2] cell = str(cell) goal = re.findall('goal',cell) goal = goal.count('goal') goal = goal / 2 assist = re.findall('assist',cell) assist = assist.count('assist') assist = assist / 2 motm = re.findall('motm',cell) motm = motm.count('motm') row.append(goal) row.append(assist) row.append(motm) for row in list_of_rows_away: del row[2] for i in range(len(list_of_rows_away)): row = list_of_rows_away[i] row.append(team_away) row.append(goals_away) row.append(team_home) row.append(goals_home) #COMPILE INTO ONE TABLE list_of_rows = list_of_rows_home + list_of_rows_away #WRITE TO CSV writer.writerows(list_of_rows) 

我的input文件是一个基本的excel文件,其中的匹配id全部排在excel文件的第一列。 当它创build输出文件时,它是空白的。 我也没有收到任何错误消息。

这个问题是在你的regexsearch,所以也许改为:

 team_home = re.search('title="(.*)"',team_home) team_home = team_home.group(1) 

select

 team_home = re.search('title="(.*)"/>',team_home) team_home = team_home.group(1) 

/>是不需要的,这实际上使得title =“”与组(1)不匹配,而组(1)又创build了一个属性错误,脚本停止。 如果你想包括/>然后删除正则expression式模式的空间,因为这是最终杀死它。