olx-visualiser/main.py

90 lines
3.2 KiB
Python
Raw Normal View History

2022-07-11 16:36:52 +00:00
#!/home/krzychu/PycharmProjects/parser_OLX/venv/bin/python
from db_con import DbConnect
from sel_source import SelRequest
import logging
import time
from datetime import date
logger = logging.getLogger(__name__)
logging.FileHandler('logfile.log')
logging.basicConfig(filename='logfile.log', filemode='w', level=logging.INFO,
2022-07-11 16:36:52 +00:00
format='%(asctime)s :: %(levelname)s :: %(name)s :: %(message)s')
2022-07-19 08:16:05 +00:00
con = DbConnect("sync/olx_data.db")
2022-07-11 16:36:52 +00:00
con.get_connection()
drive = SelRequest()
2022-07-11 16:36:52 +00:00
cities_not_done = """select c.city_id, c.name, c.is_duplicate
from cities_custom c left join
(select city_id from olx_data where date = date('now')) o
on o.city_id=c.city_id where o.city_id is null;"""
2022-07-11 16:36:52 +00:00
cities = con.get_data(cities_not_done)
2022-07-11 16:36:52 +00:00
url_base = 'https://www.olx.pl/d/nieruchomosci/mieszkania/'
2022-07-19 08:16:05 +00:00
cities_list = [(url_base + con.remove_accents(i[1]).lower().replace(" ", "-") + i[2] + '/', i[0]) for i in cities]
not_empty = len(cities_list)
while not_empty > 0:
for i in cities_list:
start = time.time()
curTime = date.today()
# print(i[0])
try:
source = drive.get_source(i[0])
values = drive.get_olx_stats(source)
values.append(i[1])
values.append(curTime)
query = """
INSERT INTO
olx_data(adv_rent_count, adv_sale_count, adv_exchange_count, city_id, date)
VALUES
(?, ?, ?, ?, ?); """
con.insert_data(query, values)
except Exception as e:
logger.info(f'an exception has occurred:{e}')
pass
end = time.time()
logger.info(f'loop executed in {end-start}')
cities = con.get_data(cities_not_done)
cities_list = [(url_base + con.remove_accents(i[1]).lower().replace(" ", "-") + i[2] + '/', i[0]) for i in cities]
not_empty = len(cities_list)
2022-07-11 16:36:52 +00:00
# with open('first.csv', 'rb') as inp, open('first_edit.csv', 'wb') as out:
# writer = csv.writer(out)
# for row in csv.reader(inp):
# if row[2] != "0":
# writer.writerow(row)
# add_record = """
# INSERT INTO
# olx_data(city_id, adv_sale_count, adv_rent_count, adv_exchange_count)
# VALUES
# (4548, 629, 837, 7);
# """
# cities_div_urls = {}
# url_base = 'https://www.olx.pl/d/nieruchomosci/mieszkania/'
#
# for el in regions:
# cities_list = [(url_base+removeaccents(i[1]).lower().replace(" ", "-")+'/', i[0]) for i in cities if i[2] == el[1]]
# cities_div_urls[el[0]] = cities_list
#
# reg_urls = [(url_base+removeaccents(el[0]).lower()+'/', el[1]) for el in regions]
# select c.city_id, o.city_id from cities c left join
# (select city_id from olx_data where date = date('now')) o on o.city_id=c.city_id;
# select name, city_id from cities where city_id not in
# (select city_id from olx_data where date=date('now'));
# select name from olx_data join cities on olx_data.city_id = cities.city_id where date = date('now');
# select c.city_id, c.name, c.is_dubble
# from custom c left join
# (select city_id from olx_data where date = date('now')) o
2022-07-19 08:16:05 +00:00
# on o.city_id=c.city_id where o.city_id is null;