You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

125 lines
4.7 KiB

from flask import Flask, request, send_from_directory, jsonify, abort
import os
import logging
import requests
from pdf_to_word import pdf_to_word_txt, pdf_to_word_docx
from datetime import datetime
from threading import Thread
# Set up logging
logging.basicConfig(level=logging.INFO, filename='/app/logs/app.log', filemode='w',
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# Configure specific logger for potentially noisy libraries or modules
watchdog_logger = logging.getLogger('watchdog')
watchdog_logger.setLevel(logging.ERROR) # Show only errors from watchdog
app = Flask(__name__)
UPLOAD_FOLDER = '/app/uploads'
OUTPUT_FOLDER = '/app/outputs'
CALLBACK_FOLDER = '/app/callbacks'
if not os.path.exists(UPLOAD_FOLDER):
os.makedirs(UPLOAD_FOLDER)
if not os.path.exists(OUTPUT_FOLDER):
os.makedirs(OUTPUT_FOLDER)
if not os.path.exists(CALLBACK_FOLDER):
os.makedirs(CALLBACK_FOLDER)
def process_pdf_in_background(pdf_path, output_folder, callback_url=None):
try:
# Convert PDF to text (assuming you have a function that does this)
output_path = pdf_to_word_docx(pdf_path, output_folder)
if output_path and callback_url:
with open(output_path, 'rb') as f:
files = {'file': (os.path.basename(output_path), f)}
response = requests.post(callback_url, files=files)
response.raise_for_status()
except Exception as e:
logging.error(f'Background processing error: {e}', exc_info=True)
def start_background_task(target, *args):
thread = Thread(target=target, args=args)
thread.start()
@app.route('/upload-pdf', methods=['POST'])
def upload_pdf():
file = request.files['file']
callback_url = request.form.get('callback_url')
if file and file.filename.endswith('.pdf'):
pdf_filename = os.path.splitext(file.filename)[0] + '_' + datetime.now().strftime("%Y%m%d_%H%M%S") + '.pdf'
pdf_path = os.path.join(UPLOAD_FOLDER, pdf_filename)
file.save(pdf_path)
logging.info(f'PDF uploaded and saved to {pdf_path}')
# Start the PDF processing in a background thread
start_background_task(process_pdf_in_background, pdf_path, OUTPUT_FOLDER, callback_url)
# Respond immediately
return jsonify({'message': 'File upload accepted, processing started'}), 202
else:
logging.warning('Invalid file upload attempt')
abort(400, 'Invalid file type or no file uploaded')
# @app.route('/upload-pdf', methods=['POST'])
# def upload_pdf():
# file = request.files['file']
# callback_url = request.form.get('callback_url')
# if file and file.filename.endswith('.pdf'):
# pdf_filename = os.path.splitext(file.filename)[0] + '_' + datetime.now().strftime("%Y%m%d_%H%M%S") + '.pdf'
# pdf_path = os.path.join(UPLOAD_FOLDER, pdf_filename)
# file.save(pdf_path)
# logging.info(f'PDF uploaded and saved to {pdf_path}')
# # Convert PDF to text
# #output_path = pdf_to_word_txt(pdf_path, OUTPUT_FOLDER)
# output_path = pdf_to_word_docx(pdf_path, OUTPUT_FOLDER)
# if output_path:
# logging.info(f'Text file created at {output_path}')
# if callback_url:
# try:
# with open(output_path, 'rb') as f:
# files = {'file': (os.path.basename(output_path), f)}
# response = requests.post(callback_url, files=files)
# response.raise_for_status() # Will raise an HTTPError for bad requests
# return jsonify({'message': 'File processed and sent successfully'}), 200
# except requests.exceptions.RequestException as e:
# logging.error(f'Failed to send file to callback URL: {e}')
# abort(500, 'Failed to send file to callback URL')
# else:
# return send_from_directory(OUTPUT_FOLDER, os.path.basename(output_path), as_attachment=True)
# else:
# logging.error('Failed to convert PDF')
# abort(500, 'Conversion failed')
# else:
# logging.warning('Invalid file upload attempt')
# abort(400, 'Invalid file type or no file uploaded')
@app.route('/callback', methods=['POST'])
def callback():
file = request.files['file']
if file:
filepath = os.path.join(CALLBACK_FOLDER, file.filename)
file.save(filepath)
logging.info(f'File received and saved at {filepath}')
return jsonify({'message': 'File received and saved successfully'}), 200
else:
logging.error('No file received at callback')
abort(400, 'No file received')
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5000)