Sebenarnya saya menemukan sebuah masalah kecil, yakni bagaimana me-rename file-file jurnal dalam bentuk pdf yang barusan saya download di web Elsevier. Kebetulan file-file nya hanya dinamakan sesuai bagian jurnalnya, sementara yang saya inginkan adalah bagaimana agar file tadi diberi nama sesuai dengan judul karya tulis tersebut oleh penyusunnya. Me-rename nya dengan membuka satu-persatu filenya untuk melihat judulnya tentunya sangat membosankan. Agar lebih jelas, perhatikan gambar berikut:
Setelah bergulat dengan waktu, akhirnya saya bisa membuat program sederhana untuk maksud tersebut. Sehingga file-file nya bisa di-rename sesuai dengan judulnya. Kendatipun demikian, masih ada sebagian yang belum berhasil di rename dan sampai saat ini saya masih mencari tahu dimana masalahnya. Adapun hasil rename-nya adalah sebagai berikut:
Source code nya:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package pdf; | |
import java.io.File; | |
import com.itextpdf.text.pdf.PdfReader; | |
import com.itextpdf.text.pdf.parser.PdfReaderContentParser; | |
public class TestPdf1 { | |
public static void main(String[] args) | |
{ | |
PdfReader pdfreader; | |
MyTextExtraction extract; | |
PdfReaderContentParser parser; | |
String folder = "E:/ELSEVIER"; | |
File f = new File(folder); | |
File ff[] = f.listFiles(); | |
String res = ""; | |
String tempe = ""; | |
int gagal = 0, berhasil= 0; | |
for (int i = 0; i < ff.length; i++) | |
{ | |
try { | |
extract = new MyTextExtraction(); | |
pdfreader = new PdfReader(ff[i].getAbsolutePath()); | |
parser = new PdfReaderContentParser(pdfreader); | |
parser.processContent(1, extract); | |
res = extract.getResult(); | |
res = res.replaceAll("()@^[.\\'`/:*?\"<>|]?[\\/:*?\"<>|]-*", " "); | |
// if(ff[i].getName().contains("_2")){ | |
// continue; | |
// } | |
// System.out.println("Hasil: "+res); | |
pdfreader.close(); | |
String b = ff[i].getAbsolutePath(); | |
int c = b.lastIndexOf("\\"); | |
tempe = b.substring(0, c); | |
// if (ff[i].getName().contains("_1")) { | |
// b.replace("_1", ""); | |
// } | |
tempe = tempe.replace("\\", "/").concat("/").concat(res) | |
.concat(".pdf"); | |
boolean sukses = ff[i].renameTo(new File(tempe)); | |
if (sukses) { | |
// System.out.print("berhasil======================================================= " + ff[i].getAbsolutePath() | |
// + " | " + res+"\n"); | |
berhasil++; | |
}else{ | |
System.out.println("gagal: "+res +" ============= "+ ff[i].getAbsolutePath()); | |
gagal++; | |
} | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
f = null; | |
} | |
System.out.println("FINISH..., berhasil: "+berhasil+" gagal: "+gagal); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package pdf; | |
import com.itextpdf.text.pdf.parser.ImageRenderInfo; | |
import com.itextpdf.text.pdf.parser.TextExtractionStrategy; | |
import com.itextpdf.text.pdf.parser.TextRenderInfo; | |
import com.itextpdf.text.pdf.parser.Vector; | |
import com.itextpdf.text.Rectangle; | |
public class MyTextExtraction implements TextExtractionStrategy { | |
int i = 0; | |
StringBuilder builder = new StringBuilder(); | |
boolean state = true; | |
float beforeFontSize = 0; | |
@Override | |
public void beginTextBlock() { | |
// TODO Auto-generated method stub | |
} | |
@Override | |
public void renderText(TextRenderInfo renderInfo) { | |
// TODO Auto-generated method stub | |
// int font = renderInfo.getFont().getFontType(); | |
String st = renderInfo.getText(); | |
Vector curBaseline = renderInfo.getBaseline().getStartPoint(); | |
Vector topRight = renderInfo.getAscentLine().getEndPoint(); | |
Rectangle rect = new Rectangle(curBaseline.get(0), curBaseline.get(1), | |
topRight.get(0), topRight.get(1)); | |
float curFontSize = rect.getHeight(); | |
// ket: jika getFont.getFontType() dipanggil maka (++i) akan bertambah, tetapi jika | |
// tidak maka ia tetap 0 :D | |
// System.out.println("test: " + curFontSize + " " + (++i) + " " + st); | |
if (beforeFontSize <= curFontSize && state) { | |
beforeFontSize = curFontSize; | |
builder.append(st).append(" "); | |
// System.out | |
// .println("================================================before font: " | |
// + beforeFontSize); | |
} | |
if (curFontSize < beforeFontSize) { | |
state = false; | |
} | |
} | |
public String getResult() { | |
return builder.toString(); | |
} | |
@Override | |
public void endTextBlock() { | |
// TODO Auto-generated method stub | |
} | |
@Override | |
public void renderImage(ImageRenderInfo renderInfo) { | |
// TODO Auto-generated method stub | |
} | |
@Override | |
public String getResultantText() { | |
// TODO Auto-generated method stub | |
return null; | |
} | |
} |