b63bd7a13150ebe41e4d1524109c97dc9c7bfe71
make-money/treasure-hunting/\345\257\273\345\256\235\346\211\213\345\206\214.pdf
| ... | ... | Binary files /dev/null and "b/make-money/treasure-hunting/\345\257\273\345\256\235\346\211\213\345\206\214.pdf" differ |
media/documents/documents.md
| ... | ... | @@ -0,0 +1,45 @@ |
| 1 | + |
|
| 2 | +# documents |
|
| 3 | + |
|
| 4 | + |
|
| 5 | +## MarkItDown |
|
| 6 | + |
|
| 7 | +https://github.com/microsoft/markitdown |
|
| 8 | + |
|
| 9 | + |
|
| 10 | +MarkItDown is a utility for converting various files to Markdown (e.g., for indexing, text analysis, etc). It supports: |
|
| 11 | + |
|
| 12 | +- PDF |
|
| 13 | +- PowerPoint |
|
| 14 | +- Word |
|
| 15 | +- Excel |
|
| 16 | +- Images (EXIF metadata and OCR) |
|
| 17 | +- Audio (EXIF metadata and speech transcription) |
|
| 18 | +- HTML |
|
| 19 | +- Text-based formats (CSV, JSON, XML) |
|
| 20 | +- ZIP files (iterates over contents) |
|
| 21 | + |
|
| 22 | +To install MarkItDown, use pip: pip install markitdown. Alternatively, you can install it from the source: pip install -e . |
|
| 23 | + |
|
| 24 | + |
|
| 25 | +markitdown path-to-file.pdf > document.md |
|
| 26 | + |
|
| 27 | +markitdown 273424552.pdf > 273424552.md |
|
| 28 | + |
|
| 29 | +/d/HE2/Downloads |
|
| 30 | + |
|
| 31 | +error: |
|
| 32 | + |
|
| 33 | + Traceback (most recent call last): |
|
| 34 | + File "<frozen runpy>", line 198, in _run_module_as_main |
|
| 35 | + File "<frozen runpy>", line 88, in _run_code |
|
| 36 | + File "C:\Users\Administrator\AppData\Local\Programs\Python\Python313\Scripts\m |
|
| 37 | + arkitdown.exe\__main__.py", line 7, in <module> |
|
| 38 | + sys.exit(main()) |
|
| 39 | + ~~~~^^ |
|
| 40 | + File "C:\Users\Administrator\AppData\Local\Programs\Python\Python313\Lib\site- |
|
| 41 | + packages\markitdown\__main__.py", line 43, in main |
|
| 42 | + print(result.text_content) |
|
| 43 | + ~~~~~^^^^^^^^^^^^^^^^^^^^^ |
|
| 44 | + UnicodeEncodeError: 'gbk' codec can't encode character '\xa0' in position 7: ill |
|
| 45 | + egal multibyte sequence |