1#Just print the text and copy from console
2print(f"A \n B \n C")
3#output:
4# A
5# B
6# C
1from tika import parser
2
3filename = 'myfile.pdf'
4
5# Parse the PDF
6parsedPDF = parser.from_file(filename)
7
8# Extract the text content from the parsed PDF
9pdf = parsedPDF["content"]
10
11# Convert double newlines into single newlines
12pdf = pdf.replace('\n\n', '\n')
13
14#####################################
15# Do something with the PDF
16#####################################
17print (pdf)
18
1>>> import re
2>>> re.sub('\r?\n', ' $ ', 'a\r\nb\r\nc')
3'a $ b $ c'
4>>> re.sub('\r?\n', ' $ ', 'a\nb\nc')
5'a $ b $ c'
6