Git Product home page Git Product logo

epftools's Introduction

EPF Tools

version number Actions Status License

This is an alpha repo for EPF analysis tool

install

pip install -e git+https://github.com/gauravmeena0708/epftools#egg=epftools

Latest Example Use(2024): Generating final summary PDF

#!pip install -e git+https://github.com/gauravmeena0708/epf_tools2#egg=epftools2
import pandas as pd
from epftools import  ClaimProcessor, PDFGenerator, PDFGenerator2, DataFrameStyler

df = pd.read_csv(<claim csv>)
COLUMNS  = ['CLAIM ID', 'TASK ID', 'PENDING DAYS', 'STATUS', 'CLAIM TYPE']
RENAME_COLS  ={
    'CLAIM_ID':'CLAIM ID',
    'ACC_TASK_ID':'TASK ID',
    'STATUS':'STATUS',
    'PEN_DAYS':'PENDING DAYS',
    'FORM_TYPE_TEXT':'CLAIM TYPE'
}
df = df.rename(columns=RENAME_COLS)
processor = ClaimProcessor(10, 20)
df = processor.add_bins_and_categories(df)


elements = processor.get_elements_daily_summary(df, DataFrameStyler)
# Example usage:
html_template_path = 'data/template.html'
output_pdf_path = 'data/report_06_02_24.pdf'
wkhtmltopdf_path = r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'
pdf_generator = PDFGenerator2(html_template_path, output_pdf_path, wkhtmltopdf_path)
pdf_generator.generate_pdf(elements,html=False)

Example Use: Generating bins and categories and flat pivot

import pandas as pd
from epftools import  ClaimProcessor, PDFGenerator, PDFGenerator2

df = pd.read_csv('data/claims.csv')
processor = ClaimProcessor(15, 20)
df  = processor.add_bins_and_categories(df)
print(df.head())
df1 = processor.get_flat_pivot(df,"GROUP","days_Group")
df2 = processor.get_flat_pivot(df,"GROUP","STATUS")
df3 = processor.get_flat_pivot(df,"GROUP","CATEGORY")
df4 = processor.get_flat_pivot(df,"GROUP","CLAIM TYPE")
df5 = processor.get_flat_pivot(df,"TASK","CATEGORY")
df6 = processor.get_flat_pivot(df,"TASK","STATUS")
dataframes =[df1,df2,df3,df4,df5,df6]

PdfGenerator

pdf_generator = PDFGenerator(pdf_file="data/report.pdf")
pdf_generator.create_pdf(dataframes)

PdfGenerator2: if wkhtmmltopdf is installed

# Example usage:
html_template_path = 'data/template.html'
output_pdf_path = 'data/out.pdf'
wkhtmltopdf_path = r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'


pdf_generator = PDFGenerator2(html_template_path, output_pdf_path, wkhtmltopdf_path)
pdf_generator.generate_pdf(dataframes)

Table Style Chart Style

When you need to add series of images in PDF

from PIL import Image
import os, re, glob, base64
from epftools import  ClaimProcessor, PDFGenerator, PDFGenerator2

def get_image_file_as_base64_data(file):
    with open(file, 'rb') as image_file:
        return base64.b64encode(image_file.read()).decode()

directory_path = "./"
pattern = re.compile(r"figure_\d+.png")
all_files = os.listdir(directory_path)

matching_files = [filename for filename in all_files if pattern.match(filename)]
dataframes=[]
for file in matching_files:
    dataframes.append(f'<img src="data:;base64,{ get_image_file_as_base64_data(file) }">')

# Example usage:
html_template_path = 'data/template.html'
output_pdf_path = 'data/out.pdf'
wkhtmltopdf_path = r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'


pdf_generator = PDFGenerator2(html_template_path, output_pdf_path, wkhtmltopdf_path)
pdf_generator.generate_pdf(dataframes, False)

Making a stylized dataframes

import pandas as pd
from epftools import  ClaimProcessor, PDFGenerator, PDFGenerator2

df = pd.read_csv('data/claims.csv')
processor = ClaimProcessor(15, 20)
df = processor.add_bins_and_categories(df)
df1 = processor.get_flat_pivot(df, "days_Group", "GROUP")
df2 = processor.get_flat_pivot(df, "STATUS", "GROUP")

# Define style function for cell formatting
def highlight_min(s, color='green'):
    is_max = s == s.min()
    attr = 'background-color: {}'.format(color)
    return [attr if v else '' for v in is_max]

def highlight_max(s, color='yellow'):
    is_max = s == s.max()
    attr = 'background-color: {}'.format(color)
    return [attr if v else '' for v in is_max]

def highlight_top3(s, color='darkorange'):
    top3_values = s.nlargest(3).index
    is_top3 = s.index.isin(top3_values)
    attr = 'color: {};font-weight: bold;'.format(color)
    return [attr if v else '' for v in is_top3]

def conditional_color(val,cutoff=100,color = 'red'):
    color = color if val > cutoff else "black"
    return f"color: {color}"

def color_quantile(s, color='red'):
    quantile_4_threshold = s.quantile(0.75)
    is_in_quantile_4 = s >= quantile_4_threshold
    attr = 'background-color: {}'.format(color)
    return [attr if v else '' for v in is_in_quantile_4]
    

"""\
    .map(conditional_color,cutoff=2000, color='red',subset = pd.IndexSlice[u[:-1], ['0-15']])\
    .map(conditional_color,cutoff=400, color='red',subset = pd.IndexSlice[u[:-1], ["16-20"]]) \
    .map(conditional_color,cutoff=10, color='red',subset = pd.IndexSlice[u[:-1], [">20"]])
"""

def get_styled(df):
    u = df.index.get_level_values(0)
    cols = df.columns
    df_styled = df.style.apply(highlight_top3,color='orangered',subset = pd.IndexSlice[u[:-1], cols[:-1]],axis=1) \
    .apply(color_quantile,color='khaki',subset = pd.IndexSlice[u[:-1], cols[:-1]],axis=1) 
    return df_styled

df1_styled = get_styled(df1)
df2 = get_styled(df2)
# Display the styled DataFrame
display(df1_styled)

Coverting Stylyzed dataframes and images in pdf

import os, re, base64
from epftools import  ClaimProcessor, PDFGenerator, PDFGenerator2

def get_image_file_as_base64_data(file):
    with open(file, 'rb') as image_file:
        return base64.b64encode(image_file.read()).decode()

directory_path = "./"
pattern = re.compile(r"figure_\d+.png")
all_files = os.listdir(directory_path)
matching_files = [filename for filename in all_files if pattern.match(filename)]

elements=[]
elements.append(df1_styled.to_html())
elements.append(df2.to_html())
for file in matching_files:
    elements.append(f'<img src="data:;base64,{ get_image_file_as_base64_data(file) }">')

# Example usage:
html_template_path = 'data/template.html'
output_pdf_path = 'data/out.pdf'
wkhtmltopdf_path = r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'
pdf_generator = PDFGenerator2(html_template_path, output_pdf_path, wkhtmltopdf_path)
pdf_generator.generate_pdf(elements,html=False)

Using DataFrameStyler and PDFGenerator2(wkhtmltopdf -windows)

#!pip install -e git+https://github.com/gauravmeena0708/epf_tools2#egg=epftools2
import pandas as pd
from epftools import  ClaimProcessor, PDFGenerator, PDFGenerator2, DataFrameStyler

df = pd.read_csv('data/claims_26_12_23.csv')
processor = ClaimProcessor(10, 20)
df = processor.add_bins_and_categories(df)

def info(df):
    display(df.head())
    print(df['CATEGORY'].unique())
    print(df['CLAIM TYPE'].unique())
    print(df['INT_CATEGORY'].unique())
    print(df['STATUS2'].unique())
    
info(df)


elements = processor.get_elements_daily_summary(df, DataFrameStyler)
# Example usage:
html_template_path = 'data/template.html'
output_pdf_path = 'data/out.pdf'
wkhtmltopdf_path = r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'
pdf_generator = PDFGenerator2(html_template_path, output_pdf_path, wkhtmltopdf_path)
pdf_generator.generate_pdf(elements,html=False)

Generate pdf from a folder of images

import os
import re
import base64
from epftools import ClaimProcessor, PDFGenerator, PDFGenerator2
from glob import glob

matching_files = glob(os.path.join("./data/", "figure_*.png"))
print(matching_files)
def get_image_file_as_base64_data(file_path):
    with open(file_path, 'rb') as image_file:
        return base64.b64encode(image_file.read()).decode()


def generate_html_elements(matching_files):
    elements=[]
    image_elements = map(lambda f: f'<img src="data:;base64,{get_image_file_as_base64_data(f)}">', matching_files)
    elements.extend(list(image_elements))

    return elements




html_template_path = 'data/template.html'
output_pdf_path = 'data/out.pdf'
wkhtmltopdf_path = r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'
    
elements = generate_html_elements(matching_files)
    
pdf_generator = PDFGenerator2(html_template_path, output_pdf_path, wkhtmltopdf_path)
pdf_generator.generate_pdf(elements, html=False)

PDF Split

from epftools import  PDFTools as pt
input_path = "in.pdf"
output_path_template = "output_{0}_{1}.pdf"
page_ranges = [(1, 1), (2, 2), (3, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 11), (12, 12), (13, 13), (14, 14), (15, 15), (16, 16), (17, 17), (18, 18)]
pt.split_pdf(input_path, output_path_template, page_ranges)

Excel Merge

from epftools import  ExcelMerger
folder_path = './/due2//'
merger = ExcelMerger(folder_path,ext=".xls")
merger.merge_and_save()

Periodicity analysis

!pip install -e git+https://github.com/gauravmeena0708/epftools#egg=epftools
!pip install epftools
!pip install reportlab pdfkit PyPDF2 pytesseract pdf2image

from epftools import PeriodicityProcessor
import pandas as pd

path24 = '<path>'
processor = PeriodicityProcessor(path24, '2023-12')
dall = processor.df
dall.head()
dall2 = pd.DataFrame(columns=['text_column', 'blank', 'reason1', 'reason2', 'reason_category'])
dall2[['blank', 'reason1', 'reason2']] = dall['REJECT_REASON'].str.split(r'\d\)', n=2, expand=True)

dall['reason1'] = dall2['reason1'].str.strip()
dall['reason2'] = dall2['reason2'].str.strip()
death10d = dall[dall['FORM_NAME']=="Death-10D"]
display(len(death10d))
display(death10d.head())
df2 = processor.col_grouped_rejection(dall,"GROUP_ID")

For Dashboard 3 data

#!pip install -e git+https://github.com/gauravmeena0708/epf_tools2#egg=epftools2
import pandas as pd
from epftools import  ClaimProcessor, PDFGenerator, PDFGenerator2, DataFrameStyler

df = pd.read_csv('data/claims_06_02_24.csv')
COLUMNS  = ['CLAIM ID', 'TASK ID', 'PENDING DAYS', 'STATUS', 'CLAIM TYPE']
RENAME_COLS  ={
    'CLAIM_ID':'CLAIM ID',
    'ACC_TASK_ID':'TASK ID',
    'STATUS':'STATUS',
    'PEN_DAYS':'PENDING DAYS',
    'FORM_TYPE_TEXT':'CLAIM TYPE'
}
df = df.rename(columns=RENAME_COLS)
processor = ClaimProcessor(10, 20)
df = processor.add_bins_and_categories(df)


elements = processor.get_elements_daily_summary(df, DataFrameStyler)
# Example usage:
html_template_path = 'data/template.html'
output_pdf_path = 'data/report_06_02_24.pdf'
wkhtmltopdf_path = r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'
pdf_generator = PDFGenerator2(html_template_path, output_pdf_path, wkhtmltopdf_path)
pdf_generator.generate_pdf(elements,html=False)

epftools's People

Contributors

gauravmeena0708 avatar

Watchers

 avatar

epftools's Issues

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    ๐Ÿ–– Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. ๐Ÿ“Š๐Ÿ“ˆ๐ŸŽ‰

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google โค๏ธ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.