b63bd7a13150ebe41e4d1524109c97dc9c7bfe71
make-money/treasure-hunting/\345\257\273\345\256\235\346\211\213\345\206\214.pdf
... | ... | Binary files /dev/null and "b/make-money/treasure-hunting/\345\257\273\345\256\235\346\211\213\345\206\214.pdf" differ |
media/documents/documents.md
... | ... | @@ -0,0 +1,45 @@ |
1 | + |
|
2 | +# documents |
|
3 | + |
|
4 | + |
|
5 | +## MarkItDown |
|
6 | + |
|
7 | +https://github.com/microsoft/markitdown |
|
8 | + |
|
9 | + |
|
10 | +MarkItDown is a utility for converting various files to Markdown (e.g., for indexing, text analysis, etc). It supports: |
|
11 | + |
|
12 | +- PDF |
|
13 | +- PowerPoint |
|
14 | +- Word |
|
15 | +- Excel |
|
16 | +- Images (EXIF metadata and OCR) |
|
17 | +- Audio (EXIF metadata and speech transcription) |
|
18 | +- HTML |
|
19 | +- Text-based formats (CSV, JSON, XML) |
|
20 | +- ZIP files (iterates over contents) |
|
21 | + |
|
22 | +To install MarkItDown, use pip: pip install markitdown. Alternatively, you can install it from the source: pip install -e . |
|
23 | + |
|
24 | + |
|
25 | +markitdown path-to-file.pdf > document.md |
|
26 | + |
|
27 | +markitdown 273424552.pdf > 273424552.md |
|
28 | + |
|
29 | +/d/HE2/Downloads |
|
30 | + |
|
31 | +error: |
|
32 | + |
|
33 | + Traceback (most recent call last): |
|
34 | + File "<frozen runpy>", line 198, in _run_module_as_main |
|
35 | + File "<frozen runpy>", line 88, in _run_code |
|
36 | + File "C:\Users\Administrator\AppData\Local\Programs\Python\Python313\Scripts\m |
|
37 | + arkitdown.exe\__main__.py", line 7, in <module> |
|
38 | + sys.exit(main()) |
|
39 | + ~~~~^^ |
|
40 | + File "C:\Users\Administrator\AppData\Local\Programs\Python\Python313\Lib\site- |
|
41 | + packages\markitdown\__main__.py", line 43, in main |
|
42 | + print(result.text_content) |
|
43 | + ~~~~~^^^^^^^^^^^^^^^^^^^^^ |
|
44 | + UnicodeEncodeError: 'gbk' codec can't encode character '\xa0' in position 7: ill |
|
45 | + egal multibyte sequence |