diff --git a/db_con.py b/db_con.py index dfaa9e2..221a26f 100644 --- a/db_con.py +++ b/db_con.py @@ -5,7 +5,7 @@ import logging logger = logging.getLogger(__name__) logging.FileHandler('logfile.log') -logging.basicConfig(filename='logfile.log', filemode='a', level=logging.INFO, +logging.basicConfig(filename='logfile.log', filemode='w', level=logging.INFO, format='%(asctime)s :: %(levelname)s :: %(name)s :: %(message)s') diff --git a/main.py b/main.py index 8b76355..1d4597c 100644 --- a/main.py +++ b/main.py @@ -17,39 +17,36 @@ def removeaccents(input_text): logger = logging.getLogger(__name__) logging.FileHandler('logfile.log') -logging.basicConfig(filename='logfile.log', filemode='a', level=logging.INFO, +logging.basicConfig(filename='logfile.log', filemode='w', level=logging.INFO, format='%(asctime)s :: %(levelname)s :: %(name)s :: %(message)s') con = DbConnect("olx_data.db") con.get_connection() -cities = con.get_data("""select c.city_id, c.name - from cities c left join - (select city_id from olx_data where date = date('now')) o - on o.city_id=c.city_id;""") +cities = con.get_data("""select city_id, name, is_dubble + from custom where city_id not in + (select city_id from olx_data + where date = date('now'));""") url_base = 'https://www.olx.pl/d/nieruchomosci/mieszkania/' -cities_list = [(url_base + removeaccents(i[1]).lower().replace(" ", "-") + '/', i[0]) for i in cities] +cities_list = [(url_base + removeaccents(i[1]).lower().replace(" ", "-") + i[2] + '/', i[0]) for i in cities] -with open('urls.csv', 'w', newline='') as f: - writer = csv.writer(f, lineterminator='\n') - header = ['rent', 'sale', 'exchange', 'city_id'] - writer.writerow(header) - for i in cities_list: - start = time.time() - curTime = date.today() - row = SelRequest(i[0]).get_olx_stats() - row.append(i[1]) - row.append(curTime) - record = """ - INSERT INTO - olx_data(adv_rent_count, adv_sale_count, adv_exchange_count, city_id, date) - VALUES - (?, ?, ?, ?, ?); """ - con.run_query(record, row) - end = time.time() - logger.info(f'executed in {end-start}') + +for i in cities_list: + start = time.time() + curTime = date.today() + row = SelRequest(i[0]).get_olx_stats() + row.append(i[1]) + row.append(curTime) + record = """ + INSERT INTO + olx_data(adv_rent_count, adv_sale_count, adv_exchange_count, city_id, date) + VALUES + (?, ?, ?, ?, ?); """ + con.run_query(record, row) + end = time.time() + logger.info(f'executed in {end-start}') # with open('first.csv', 'rb') as inp, open('first_edit.csv', 'wb') as out: @@ -82,3 +79,8 @@ with open('urls.csv', 'w', newline='') as f: # (select city_id from olx_data where date=date('now')); # select name from olx_data join cities on olx_data.city_id = cities.city_id where date = date('now'); + +# select c.city_id, c.name, c.is_dubble +# from custom c left join +# (select city_id from olx_data where date = date('now')) o +# on o.city_id=c.city_id; diff --git a/olx_data.db b/olx_data.db index 37aadbd..4f85fc7 100644 Binary files a/olx_data.db and b/olx_data.db differ diff --git a/sel_source.py b/sel_source.py index ed9129d..cd3af0c 100644 --- a/sel_source.py +++ b/sel_source.py @@ -6,7 +6,7 @@ import time logger = logging.getLogger(__name__) logging.FileHandler('logfile.log') -logging.basicConfig(filename='logfile.log', filemode='a', level=logging.INFO, +logging.basicConfig(filename='logfile.log', filemode='w', level=logging.INFO, format='%(asctime)s :: %(levelname)s :: %(name)s :: %(message)s') diff --git a/urls.csv b/urls.csv deleted file mode 100644 index 94bdf34..0000000 --- a/urls.csv +++ /dev/null @@ -1 +0,0 @@ -rent,sale,exchange,city_id