我试图从web上读取一些json文件,并用这些数据创建一个sql数据库。我使用ijson读取数据流。但当代码失败时,我需要重新开始检索数据。有没有办法继续从程序失败的地方读取json文件?
我可以用json.loads读取整个文档,但我假设数据太大,一次无法读取。
你可以在下面看到我的代码。
import sqlite3
import ssl
import urllib.request
import json
import ijson
import time
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
conn = sqlite3.connect('rawdata.sqlite')
cur = conn.cursor()
cur.execute('''DROP TABLE IF EXISTS DailyData ''')
cur.execute('''DROP TABLE IF EXISTS Countries ''')
cur.execute('''DROP TABLE IF EXISTS Continents ''')
cur.execute('''CREATE TABLE IF NOT EXISTS DailyData
(id INTEGER, Day TEXT, Month TEXT, Year TEXT, country_id INTEGER, continent_id INTEGER, Cases TEXT, Deaths TEXT)''')
cur.execute('''CREATE TABLE IF NOT EXISTS Countries
(id INTEGER, CountryCode TEXT UNIQUE, Country TEXT UNIQUE, Population TEXT, continent_id INTEGER)''')
cur.execute('''CREATE TABLE IF NOT EXISTS Continents
(id INTEGER, Continent TEXT UNIQUE)''')
url = "https://opendata.ecdc.europa.eu/covid19/casedistribution/json/"
f = urllib.request.urlopen(url, context=ctx)
reports = ijson.items(f, 'records.item')
sum = 0
count = 0
# error = 0
for item in reports :
iDataRep = item.get('dateRep')
iCases = item.get('cases')
iDeaths = item.get('deaths')
iCountryCode = item.get('countryterritoryCode')
iCountry = item.get('countriesAndTerritories')
iPopulation = item.get('popData2018')
iContinent = item.get('continentExp')
if len(iDataRep) < 0: iDataRep = 0
if len(iCases) < 0: iCases = 0
if len(iDeaths) < 0: iDeaths = 0
if len(iCountryCode) < 0: iCountryCode = 0
if len(iCountry) < 0: iCountry = 0
if len(iPopulation) < 0: iPopulation = 0
if len(iContinent) < 0: iContinent = 0
Spl = iDataRep.split('/')
iDay = Spl[0]
iMonth = Spl[1]
iYear = Spl[2]
id = count + 1
cur.execute('''INSERT OR IGNORE INTO Continents (id, Continent)
VALUES ( ?, ? )''', (id, iContinent))
cur.execute('''SELECT id FROM Continents WHERE Continent = ? ''', (iContinent, ))
continent_id = cur.fetchone()[0]
cur.execute('''INSERT OR IGNORE INTO Countries (id, CountryCode, Country, Population, continent_id)
VALUES ( ?, ?, ?, ?, ? )''', (id, iCountryCode, iCountry, iPopulation, continent_id) )
cur.execute('''SELECT id FROM Countries WHERE Country = ? ''', (iCountry, ))
country_id = cur.fetchone()[0]
cur.execute('''INSERT OR IGNORE INTO DailyData (id, Day, Month, Year, country_id, continent_id, Cases, Deaths)
VALUES ( ?, ?, ?, ?, ?, ?, ? ,?)''', (id, iDay, iMonth, iYear, country_id, continent_id, iCases, iDeaths) )
conn.commit()
# except:
# error = error + 1
# print(error)
# continue
count = count + 1
print(count, 'data retrieved...')
if count % 95 == 0:
time.sleep(1)
print('Program slept a second.')
numCountry = cur.execute('SELECT max(id) FROM Countries' )
numContinent = cur.execute('SELECT max(id) FROM Continents' )
print('From', numCountry, 'different countries and', numContinent, 'continents', count, 'data retrieved.')
cur.close()
暂无答案!
目前还没有任何答案,快来回答吧!