PLS/POT scraping
The snippet can be accessed without any authentication.
Authored by
Mattia Monga
Edited
plspot.py 1.32 KiB
from selenium import webdriver
from selenium.webdriver.common.by import By
LOGIN="qui serve la login d'accesso" # CAMBIARE
PASS="qui serve la password d'accesso" # CAMBIARE
ATENEO='15' # CAMBIARE: questo è unimi
PROPOSTA='561' # CAMBIARE: questo è l'id della proposta del progetto di Informatica
driver = webdriver.Chrome()
driver.get("https://plspot.cineca.it")
username = driver.find_element(By.NAME, value='username')
username.send_keys(LOGIN)
passwd = driver.find_element(By.NAME, value='password')
passwd.send_keys(PASS)
submit_button = driver.find_element(by=By.NAME, value="login")
submit_button.click()
driver.get(f"https://plspot.cineca.it/ateneo/{ATENEO}/domanda/{PROPOSTA}/domande-riepilogo-partner")
aa = driver.find_elements(by=By.CSS_SELECTOR, value="a")
PARTNERS=[]
for a in aa:
href = a.get_attribute('href')
if 'indicatori-partner' in href:
PARTNERS.append(href.split('/')[-1])
for p in PARTNERS:
driver.get(f"https://plspot.cineca.it/ateneo/{ATENEO}/domanda/{PROPOSTA}/indicatori-partner/{p}")
hh = driver.find_elements(by=By.CSS_SELECTOR, value="h4")
ateneo = hh[1].text.split('-')[1][7:]
tt = driver.find_elements(by=By.CSS_SELECTOR, value="tbody")
for t in tt:
n, d = t.text.split('\n')[1].split(' ')
ateneo += f", {n}, {d}"
print(ateneo)
driver.quit()
Please register or sign in to comment