You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

125 lines
4.7 KiB

8 months ago
8 months ago
8 months ago
8 months ago
8 months ago
8 months ago
8 months ago
8 months ago
8 months ago
8 months ago
  1. from flask import Flask, request, send_from_directory, jsonify, abort
  2. import os
  3. import logging
  4. import requests
  5. from pdf_to_word import pdf_to_word_txt, pdf_to_word_docx
  6. from datetime import datetime
  7. from threading import Thread
  8. # Set up logging
  9. logging.basicConfig(level=logging.INFO, filename='/app/logs/app.log', filemode='w',
  10. format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  11. # Configure specific logger for potentially noisy libraries or modules
  12. watchdog_logger = logging.getLogger('watchdog')
  13. watchdog_logger.setLevel(logging.ERROR) # Show only errors from watchdog
  14. app = Flask(__name__)
  15. UPLOAD_FOLDER = '/app/uploads'
  16. OUTPUT_FOLDER = '/app/outputs'
  17. CALLBACK_FOLDER = '/app/callbacks'
  18. if not os.path.exists(UPLOAD_FOLDER):
  19. os.makedirs(UPLOAD_FOLDER)
  20. if not os.path.exists(OUTPUT_FOLDER):
  21. os.makedirs(OUTPUT_FOLDER)
  22. if not os.path.exists(CALLBACK_FOLDER):
  23. os.makedirs(CALLBACK_FOLDER)
  24. def process_pdf_in_background(pdf_path, output_folder, callback_url=None):
  25. try:
  26. # Convert PDF to text (assuming you have a function that does this)
  27. output_path = pdf_to_word_docx(pdf_path, output_folder)
  28. if output_path and callback_url:
  29. with open(output_path, 'rb') as f:
  30. files = {'file': (os.path.basename(output_path), f)}
  31. response = requests.post(callback_url, files=files)
  32. response.raise_for_status()
  33. except Exception as e:
  34. logging.error(f'Background processing error: {e}', exc_info=True)
  35. def start_background_task(target, *args):
  36. thread = Thread(target=target, args=args)
  37. thread.start()
  38. @app.route('/upload-pdf', methods=['POST'])
  39. def upload_pdf():
  40. file = request.files['file']
  41. callback_url = request.form.get('callback_url')
  42. if file and file.filename.endswith('.pdf'):
  43. pdf_filename = os.path.splitext(file.filename)[0] + '_' + datetime.now().strftime("%Y%m%d_%H%M%S") + '.pdf'
  44. pdf_path = os.path.join(UPLOAD_FOLDER, pdf_filename)
  45. file.save(pdf_path)
  46. logging.info(f'PDF uploaded and saved to {pdf_path}')
  47. # Start the PDF processing in a background thread
  48. start_background_task(process_pdf_in_background, pdf_path, OUTPUT_FOLDER, callback_url)
  49. # Respond immediately
  50. return jsonify({'message': 'File upload accepted, processing started'}), 202
  51. else:
  52. logging.warning('Invalid file upload attempt')
  53. abort(400, 'Invalid file type or no file uploaded')
  54. # @app.route('/upload-pdf', methods=['POST'])
  55. # def upload_pdf():
  56. # file = request.files['file']
  57. # callback_url = request.form.get('callback_url')
  58. # if file and file.filename.endswith('.pdf'):
  59. # pdf_filename = os.path.splitext(file.filename)[0] + '_' + datetime.now().strftime("%Y%m%d_%H%M%S") + '.pdf'
  60. # pdf_path = os.path.join(UPLOAD_FOLDER, pdf_filename)
  61. # file.save(pdf_path)
  62. # logging.info(f'PDF uploaded and saved to {pdf_path}')
  63. # # Convert PDF to text
  64. # #output_path = pdf_to_word_txt(pdf_path, OUTPUT_FOLDER)
  65. # output_path = pdf_to_word_docx(pdf_path, OUTPUT_FOLDER)
  66. # if output_path:
  67. # logging.info(f'Text file created at {output_path}')
  68. # if callback_url:
  69. # try:
  70. # with open(output_path, 'rb') as f:
  71. # files = {'file': (os.path.basename(output_path), f)}
  72. # response = requests.post(callback_url, files=files)
  73. # response.raise_for_status() # Will raise an HTTPError for bad requests
  74. # return jsonify({'message': 'File processed and sent successfully'}), 200
  75. # except requests.exceptions.RequestException as e:
  76. # logging.error(f'Failed to send file to callback URL: {e}')
  77. # abort(500, 'Failed to send file to callback URL')
  78. # else:
  79. # return send_from_directory(OUTPUT_FOLDER, os.path.basename(output_path), as_attachment=True)
  80. # else:
  81. # logging.error('Failed to convert PDF')
  82. # abort(500, 'Conversion failed')
  83. # else:
  84. # logging.warning('Invalid file upload attempt')
  85. # abort(400, 'Invalid file type or no file uploaded')
  86. @app.route('/callback', methods=['POST'])
  87. def callback():
  88. file = request.files['file']
  89. if file:
  90. filepath = os.path.join(CALLBACK_FOLDER, file.filename)
  91. file.save(filepath)
  92. logging.info(f'File received and saved at {filepath}')
  93. return jsonify({'message': 'File received and saved successfully'}), 200
  94. else:
  95. logging.error('No file received at callback')
  96. abort(400, 'No file received')
  97. if __name__ == '__main__':
  98. app.run(debug=True, host='0.0.0.0', port=5000)