Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Download da GiuntiTVP #30

Open
stepersy opened this issue Feb 28, 2023 · 1 comment
Open

Download da GiuntiTVP #30

stepersy opened this issue Feb 28, 2023 · 1 comment

Comments

@stepersy
Copy link

stepersy commented Feb 28, 2023

Ciao! Innanzitutto grazie mille per lo stupendo lavoro che stai facendo!
Ho visto che è stata aggiunta la possibilità di scaricare da GiuntiTVP mediante One Shot Link. Come si usa?
Ho provato a incollare il link di un mio libro (https://mydbook.giuntitvp.it/app/books/GIAC67_G6760798D/pdfParts?prependCollection=BL35LIV07_MYDBOOK2019) ma continua a rispondere "Unable to view this publicly. Aborting...".
Grazieee :)

@ckhmer1
Copy link

ckhmer1 commented Dec 9, 2023

Ciao,
io sono riuscito a farlo usando il seguente script, la login non so perchè mi restituisce 401, quindi ho usato i cookie prelevati dal browser.

import requests
import fitz
import os

COOKIES={
		"connect.sid": "XXXXXXXXXXX",
		"elmo_vc": "YYYYYYYYY",
		"PHPSESSID": "ZZZZZ",
		"shbookInitialized": "true"
}

BOOK_ID='GT2023_G3452521A'

LAST_PAGE_NUMBER=384

BOOK_URL="https://mydbook.giuntitvp.it/books/" + BOOK_ID + "/pdf/pages/%s?type="

USERNAME='[email protected]'
PASSWORD='password'

TEMP_DIR=BOOK_ID

def getnamevalue(input):
	dummy=[ t for t in input.split() if t.startswith('name=') or t.startswith('value=') ]
	data={}
	for t in dummy:
		d = t.split("=")
		data[d[0]] = d[1].split('"')[1]
	if data.get("name", "") == "":
		return
	return data.get("name", ""), data.get("value", "")


def getlogindata(username, password):
	s = 'https://mydbook.giuntitvp.it/authentication/cas?iframe=false'
	r = 'https://mydbook.giuntitvp.it/app/home'
	data={ "username": username, "password": password, "submit": "Invia", "service" : [s, s], "return" : [r, r]}
	r = requests.get("https://centralauthentication.giunti.it/cas/login?service=https://mydbook.giuntitvp.it/authentication/cas?iframe=false&return=https://mydbook.giuntitvp.it/app/home")
	for cookie in iter(r.cookies):
		print(cookie.name, cookie.value)
	inputs = [ t for t in r.text.split("\n") if '<input' in t ]
	inputs = [ t1 for t in inputs for t1 in t.split("<") if t1.startswith('input') ]
	namevalues = [ getnamevalue(t) for t in inputs ]
	for namevalue in namevalues:
		if namevalue is not None:
			n = namevalue[0]
			v = namevalue[1]
			if n in data:
				v1 = data[n]
				if isinstance(v1, str):
					data[n] = [v1, v]
				else:
					data[n].append(v)
			else:
				data[n] = v
	print(data)
	r = requests.post("https://centralauthentication.giunti.it/cas/login?service=https://mydbook.giuntitvp.it/authentication/cas?iframe=false&return=https://mydbook.giuntitvp.it/app/home", data=data)
	print(r.status_code)
	print(r.encoding)
	print(r.text)
	if r.status_code != 200:
		return
	else:
		r.encoding = "utf-8-sig"
		return r.json()


def login(username, password):
	logindata = getlogindata(username, password)
	print(logindata)
	if "error" in logindata or not logindata:
		if logindata.get("error") == "1":
			print("Incorrect credentials!")
		else:
			print("Login failed!")
	else:
		userid = str(logindata["userId"])
		if userid == "0":
			print("Unauthorized!")
		else:
			return logindata["accessToken"] + "/" + userid

def downloadfile(url):
	r = requests.get(url, stream=True, headers={"Referer": "https://mydbook.giuntitvp.it"}, cookies=COOKIES)
	length = int(r.headers.get("content-length", 1))
	if r.status_code != 200:
		return
	file = b""
	for data in r.iter_content(chunk_size=102400):
		file += data
	return file

def get_page(p):
	url = BOOK_URL % str(p)
	dta = downloadfile(url)
	if dta:	
		f = open(os.path.join(TEMP_DIR, "page_%d.jpg"%p), "wb")
		f.write(dta)
		f.close()


def download_book():
	for p in range(1, LAST_PAGE_NUMBER+1):
		print(p)
		get_page(p)

def create_pdf():
	doc = fitz.open()  # PDF with the pictures

	for p in range(1, LAST_PAGE_NUMBER+1):
		f = os.path.join(TEMP_DIR, "page_%d.jpg"%p)
		img = fitz.open(f)  # open pic as document
		rect = img[0].rect  # pic dimension
		pdfbytes = img.convert_to_pdf()  # make a PDF stream
		img.close()  # no longer needed
		imgPDF = fitz.open("pdf", pdfbytes)  # open stream as PDF
		page = doc.new_page(width = rect.width,  # new page with ...
						height = rect.height)  # pic dimension
		page.show_pdf_page(rect, imgPDF, 0)  # image fills the page
		print(p)

	doc.save("%s.pdf" % BOOK_ID)


if __name__ == '__main__':
	os.makedirs(TEMP_DIR, exist_ok=True)
	#login(USERNAME, PASSWORD)
	download_book()
	create_pdf()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants