First uncompress the pdf if it is compressed with pdftk:
pdftk myfile.pdf output unc.pdf uncompress
Then remove all objects that contain the keywords PDF-XChange|pdfxviewer.com|PDFXCViewer20|Click to buy NOW:
awk '
BEGIN {
found=0
}
{
if ( $0 ~ /^[0-9 ]+obj/ ) {
objectFound=1;
objectLineCounter=0;
objektZeilen[objectLineCounter]=$0;
objectLineCounter++;
} else if (objectFound == 1) {
objektZeilen[objectLineCounter]=$0;
if ( $0 ~ /PDF-XChange|pdfxviewer.com|PDFXCViewer20|Click to buy NOW/ ) {
found=1;
}
if ( ( $0 ~ /endobj/ ) && ( found == 0 ) ) {
for (i=0; i<length(objektZeilen); i++) {
print objektZeilen[i];
}
delete objektZeilen;
objectFound=0;
found=0;
}
if ( ( $0 ~ /endobj/ ) && ( found == 1 ) ) {
delete objektZeilen;
objectFound=0;
found=0;
}
objectLineCounter++;
} else {
print $0
}
}
' unc.pdf > test.pdf
Recompress and repair pdf with pdftk:
pdftk test.pdf output comp.pdf compress
Too bad it also removed the OCR layer. I couldn’t find out which layer is responsible for the OCR.