diff --git a/db_con.py b/db_con.py index e239e06..0dd9c81 100644 --- a/db_con.py +++ b/db_con.py @@ -1,7 +1,7 @@ import sqlite3 from sqlite3 import Error import logging - +import time logger = logging.getLogger(__name__) logging.FileHandler('logfile.log') @@ -28,11 +28,13 @@ class DbConnect: def insert_data(self, sql_query, values): # insert cursor = self.connection.cursor() try: + start = time.time() cursor.execute(sql_query, values) self.connection.commit() + end = time.time() logger.debug(sql_query) logger.debug(values) - logger.info("SQL query run successfully in ... s") #dodać czas + logger.info(f"SQL query run successfully in {end-start} s") except Error as e: print(f" Query Failed……{e}") diff --git a/main.py b/main.py index 1aa8ae9..54d911e 100644 --- a/main.py +++ b/main.py @@ -12,41 +12,44 @@ logging.FileHandler('logfile.log') logging.basicConfig(filename='logfile.log', filemode='w', level=logging.INFO, format='%(asctime)s :: %(levelname)s :: %(name)s :: %(message)s') - con = DbConnect("sync/olx_data.db") con.get_connection() - -cities = con.get_data("""select c.city_id, c.name, c.is_duplicate - from cities_custom c left join - (select city_id from olx_data where date = date('now')) o - on o.city_id=c.city_id where o.city_id is null;""") - -# double, cities_custom - -url_base = 'https://www.olx.pl/d/nieruchomosci/mieszkania/' -cities_list = [(url_base + con.remove_accents(i[1]).lower().replace(" ", "-") + i[2] + '/', i[0]) for i in cities] - drive = SelRequest() -for i in cities_list: - start = time.time() - curTime = date.today() - print(i[0]) - source = drive.get_source(i[0]) - values = drive.get_olx_stats(source) - values.append(i[1]) - values.append(curTime) - end = time.time() - if end-start < 2: - print("Too quick, something is wrong with: ", i[0], '-- id:', i[1]) - pass - query = """ - INSERT INTO - olx_data(adv_rent_count, adv_sale_count, adv_exchange_count, city_id, date) - VALUES - (?, ?, ?, ?, ?); """ - con.insert_data(query, values) - logger.info(f'loop executed in {end-start}') +cities_not_done = """select c.city_id, c.name, c.is_duplicate + from cities_custom c left join + (select city_id from olx_data where date = date('now')) o + on o.city_id=c.city_id where o.city_id is null;""" + +cities = con.get_data(cities_not_done) +url_base = 'https://www.olx.pl/d/nieruchomosci/mieszkania/' +cities_list = [(url_base + con.remove_accents(i[1]).lower().replace(" ", "-") + i[2] + '/', i[0]) for i in cities] +not_empty = len(cities_list) + +while not_empty > 0: + for i in cities_list: + start = time.time() + curTime = date.today() + # print(i[0]) + try: + source = drive.get_source(i[0]) + values = drive.get_olx_stats(source) + values.append(i[1]) + values.append(curTime) + query = """ + INSERT INTO + olx_data(adv_rent_count, adv_sale_count, adv_exchange_count, city_id, date) + VALUES + (?, ?, ?, ?, ?); """ + con.insert_data(query, values) + except Exception as e: + logger.info(f'an exception has occurred:{e}') + pass + end = time.time() + logger.info(f'loop executed in {end-start}') + cities = con.get_data(cities_not_done) + cities_list = [(url_base + con.remove_accents(i[1]).lower().replace(" ", "-") + i[2] + '/', i[0]) for i in cities] + not_empty = len(cities_list) # with open('first.csv', 'rb') as inp, open('first_edit.csv', 'wb') as out: diff --git a/sel_source.py b/sel_source.py index 422d591..30497f7 100644 --- a/sel_source.py +++ b/sel_source.py @@ -43,8 +43,8 @@ class SelRequest: def get_olx_stats(self, source): soup = BeautifulSoup(source, "html.parser") span = soup.find_all('a', {'class': 'css-pyvavn'}) - for i in span: - print(i.contents) + # for i in span: + # print(i.contents) # this is how span looks like: # [Wynajem15, # Sprzedaż35, @@ -72,7 +72,7 @@ class SelRequest: except ValueError: stat.append(int(value.replace(u'\xa0', u''))) logger.info(f'found data: {stat}') - print(stat) + # print(stat) return stat