diff --git a/db_con.py b/db_con.py
index e239e06..0dd9c81 100644
--- a/db_con.py
+++ b/db_con.py
@@ -1,7 +1,7 @@
import sqlite3
from sqlite3 import Error
import logging
-
+import time
logger = logging.getLogger(__name__)
logging.FileHandler('logfile.log')
@@ -28,11 +28,13 @@ class DbConnect:
def insert_data(self, sql_query, values): # insert
cursor = self.connection.cursor()
try:
+ start = time.time()
cursor.execute(sql_query, values)
self.connection.commit()
+ end = time.time()
logger.debug(sql_query)
logger.debug(values)
- logger.info("SQL query run successfully in ... s") #dodać czas
+ logger.info(f"SQL query run successfully in {end-start} s")
except Error as e:
print(f" Query Failed……{e}")
diff --git a/main.py b/main.py
index 1aa8ae9..54d911e 100644
--- a/main.py
+++ b/main.py
@@ -12,41 +12,44 @@ logging.FileHandler('logfile.log')
logging.basicConfig(filename='logfile.log', filemode='w', level=logging.INFO,
format='%(asctime)s :: %(levelname)s :: %(name)s :: %(message)s')
-
con = DbConnect("sync/olx_data.db")
con.get_connection()
-
-cities = con.get_data("""select c.city_id, c.name, c.is_duplicate
- from cities_custom c left join
- (select city_id from olx_data where date = date('now')) o
- on o.city_id=c.city_id where o.city_id is null;""")
-
-# double, cities_custom
-
-url_base = 'https://www.olx.pl/d/nieruchomosci/mieszkania/'
-cities_list = [(url_base + con.remove_accents(i[1]).lower().replace(" ", "-") + i[2] + '/', i[0]) for i in cities]
-
drive = SelRequest()
-for i in cities_list:
- start = time.time()
- curTime = date.today()
- print(i[0])
- source = drive.get_source(i[0])
- values = drive.get_olx_stats(source)
- values.append(i[1])
- values.append(curTime)
- end = time.time()
- if end-start < 2:
- print("Too quick, something is wrong with: ", i[0], '-- id:', i[1])
- pass
- query = """
- INSERT INTO
- olx_data(adv_rent_count, adv_sale_count, adv_exchange_count, city_id, date)
- VALUES
- (?, ?, ?, ?, ?); """
- con.insert_data(query, values)
- logger.info(f'loop executed in {end-start}')
+cities_not_done = """select c.city_id, c.name, c.is_duplicate
+ from cities_custom c left join
+ (select city_id from olx_data where date = date('now')) o
+ on o.city_id=c.city_id where o.city_id is null;"""
+
+cities = con.get_data(cities_not_done)
+url_base = 'https://www.olx.pl/d/nieruchomosci/mieszkania/'
+cities_list = [(url_base + con.remove_accents(i[1]).lower().replace(" ", "-") + i[2] + '/', i[0]) for i in cities]
+not_empty = len(cities_list)
+
+while not_empty > 0:
+ for i in cities_list:
+ start = time.time()
+ curTime = date.today()
+ # print(i[0])
+ try:
+ source = drive.get_source(i[0])
+ values = drive.get_olx_stats(source)
+ values.append(i[1])
+ values.append(curTime)
+ query = """
+ INSERT INTO
+ olx_data(adv_rent_count, adv_sale_count, adv_exchange_count, city_id, date)
+ VALUES
+ (?, ?, ?, ?, ?); """
+ con.insert_data(query, values)
+ except Exception as e:
+ logger.info(f'an exception has occurred:{e}')
+ pass
+ end = time.time()
+ logger.info(f'loop executed in {end-start}')
+ cities = con.get_data(cities_not_done)
+ cities_list = [(url_base + con.remove_accents(i[1]).lower().replace(" ", "-") + i[2] + '/', i[0]) for i in cities]
+ not_empty = len(cities_list)
# with open('first.csv', 'rb') as inp, open('first_edit.csv', 'wb') as out:
diff --git a/sel_source.py b/sel_source.py
index 422d591..30497f7 100644
--- a/sel_source.py
+++ b/sel_source.py
@@ -43,8 +43,8 @@ class SelRequest:
def get_olx_stats(self, source):
soup = BeautifulSoup(source, "html.parser")
span = soup.find_all('a', {'class': 'css-pyvavn'})
- for i in span:
- print(i.contents)
+ # for i in span:
+ # print(i.contents)
# this is how span looks like:
# [Wynajem15,
# Sprzedaż35,
@@ -72,7 +72,7 @@ class SelRequest:
except ValueError:
stat.append(int(value.replace(u'\xa0', u'')))
logger.info(f'found data: {stat}')
- print(stat)
+ # print(stat)
return stat