now it runs in a loop until all cities are done.
This commit is contained in:
parent
bdd8ebe258
commit
9db83ae3b8
@ -1,7 +1,7 @@
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
from sqlite3 import Error
|
from sqlite3 import Error
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logging.FileHandler('logfile.log')
|
logging.FileHandler('logfile.log')
|
||||||
@ -28,11 +28,13 @@ class DbConnect:
|
|||||||
def insert_data(self, sql_query, values): # insert
|
def insert_data(self, sql_query, values): # insert
|
||||||
cursor = self.connection.cursor()
|
cursor = self.connection.cursor()
|
||||||
try:
|
try:
|
||||||
|
start = time.time()
|
||||||
cursor.execute(sql_query, values)
|
cursor.execute(sql_query, values)
|
||||||
self.connection.commit()
|
self.connection.commit()
|
||||||
|
end = time.time()
|
||||||
logger.debug(sql_query)
|
logger.debug(sql_query)
|
||||||
logger.debug(values)
|
logger.debug(values)
|
||||||
logger.info("SQL query run successfully in ... s") #dodać czas
|
logger.info(f"SQL query run successfully in {end-start} s")
|
||||||
except Error as e:
|
except Error as e:
|
||||||
print(f" Query Failed……{e}")
|
print(f" Query Failed……{e}")
|
||||||
|
|
||||||
|
65
main.py
65
main.py
@ -12,41 +12,44 @@ logging.FileHandler('logfile.log')
|
|||||||
logging.basicConfig(filename='logfile.log', filemode='w', level=logging.INFO,
|
logging.basicConfig(filename='logfile.log', filemode='w', level=logging.INFO,
|
||||||
format='%(asctime)s :: %(levelname)s :: %(name)s :: %(message)s')
|
format='%(asctime)s :: %(levelname)s :: %(name)s :: %(message)s')
|
||||||
|
|
||||||
|
|
||||||
con = DbConnect("sync/olx_data.db")
|
con = DbConnect("sync/olx_data.db")
|
||||||
con.get_connection()
|
con.get_connection()
|
||||||
|
|
||||||
cities = con.get_data("""select c.city_id, c.name, c.is_duplicate
|
|
||||||
from cities_custom c left join
|
|
||||||
(select city_id from olx_data where date = date('now')) o
|
|
||||||
on o.city_id=c.city_id where o.city_id is null;""")
|
|
||||||
|
|
||||||
# double, cities_custom
|
|
||||||
|
|
||||||
url_base = 'https://www.olx.pl/d/nieruchomosci/mieszkania/'
|
|
||||||
cities_list = [(url_base + con.remove_accents(i[1]).lower().replace(" ", "-") + i[2] + '/', i[0]) for i in cities]
|
|
||||||
|
|
||||||
drive = SelRequest()
|
drive = SelRequest()
|
||||||
|
|
||||||
for i in cities_list:
|
cities_not_done = """select c.city_id, c.name, c.is_duplicate
|
||||||
start = time.time()
|
from cities_custom c left join
|
||||||
curTime = date.today()
|
(select city_id from olx_data where date = date('now')) o
|
||||||
print(i[0])
|
on o.city_id=c.city_id where o.city_id is null;"""
|
||||||
source = drive.get_source(i[0])
|
|
||||||
values = drive.get_olx_stats(source)
|
cities = con.get_data(cities_not_done)
|
||||||
values.append(i[1])
|
url_base = 'https://www.olx.pl/d/nieruchomosci/mieszkania/'
|
||||||
values.append(curTime)
|
cities_list = [(url_base + con.remove_accents(i[1]).lower().replace(" ", "-") + i[2] + '/', i[0]) for i in cities]
|
||||||
end = time.time()
|
not_empty = len(cities_list)
|
||||||
if end-start < 2:
|
|
||||||
print("Too quick, something is wrong with: ", i[0], '-- id:', i[1])
|
while not_empty > 0:
|
||||||
pass
|
for i in cities_list:
|
||||||
query = """
|
start = time.time()
|
||||||
INSERT INTO
|
curTime = date.today()
|
||||||
olx_data(adv_rent_count, adv_sale_count, adv_exchange_count, city_id, date)
|
# print(i[0])
|
||||||
VALUES
|
try:
|
||||||
(?, ?, ?, ?, ?); """
|
source = drive.get_source(i[0])
|
||||||
con.insert_data(query, values)
|
values = drive.get_olx_stats(source)
|
||||||
logger.info(f'loop executed in {end-start}')
|
values.append(i[1])
|
||||||
|
values.append(curTime)
|
||||||
|
query = """
|
||||||
|
INSERT INTO
|
||||||
|
olx_data(adv_rent_count, adv_sale_count, adv_exchange_count, city_id, date)
|
||||||
|
VALUES
|
||||||
|
(?, ?, ?, ?, ?); """
|
||||||
|
con.insert_data(query, values)
|
||||||
|
except Exception as e:
|
||||||
|
logger.info(f'an exception has occurred:{e}')
|
||||||
|
pass
|
||||||
|
end = time.time()
|
||||||
|
logger.info(f'loop executed in {end-start}')
|
||||||
|
cities = con.get_data(cities_not_done)
|
||||||
|
cities_list = [(url_base + con.remove_accents(i[1]).lower().replace(" ", "-") + i[2] + '/', i[0]) for i in cities]
|
||||||
|
not_empty = len(cities_list)
|
||||||
|
|
||||||
|
|
||||||
# with open('first.csv', 'rb') as inp, open('first_edit.csv', 'wb') as out:
|
# with open('first.csv', 'rb') as inp, open('first_edit.csv', 'wb') as out:
|
||||||
|
@ -43,8 +43,8 @@ class SelRequest:
|
|||||||
def get_olx_stats(self, source):
|
def get_olx_stats(self, source):
|
||||||
soup = BeautifulSoup(source, "html.parser")
|
soup = BeautifulSoup(source, "html.parser")
|
||||||
span = soup.find_all('a', {'class': 'css-pyvavn'})
|
span = soup.find_all('a', {'class': 'css-pyvavn'})
|
||||||
for i in span:
|
# for i in span:
|
||||||
print(i.contents)
|
# print(i.contents)
|
||||||
# this is how span looks like:
|
# this is how span looks like:
|
||||||
# [<a class="css-pyvavn" href="/d/nieruchomosci/mieszkania/wynajem/boleslawiec">Wynajem<span class="css-wz88">15</span></a>,
|
# [<a class="css-pyvavn" href="/d/nieruchomosci/mieszkania/wynajem/boleslawiec">Wynajem<span class="css-wz88">15</span></a>,
|
||||||
# <a class="css-pyvavn" href="/d/nieruchomosci/mieszkania/sprzedaz/boleslawiec">Sprzedaż<span class="css - wz88">35</span></a>,
|
# <a class="css-pyvavn" href="/d/nieruchomosci/mieszkania/sprzedaz/boleslawiec">Sprzedaż<span class="css - wz88">35</span></a>,
|
||||||
@ -72,7 +72,7 @@ class SelRequest:
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
stat.append(int(value.replace(u'\xa0', u'')))
|
stat.append(int(value.replace(u'\xa0', u'')))
|
||||||
logger.info(f'found data: {stat}')
|
logger.info(f'found data: {stat}')
|
||||||
print(stat)
|
# print(stat)
|
||||||
return stat
|
return stat
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user