tkExcel - D2MatE

tkExcelクラス

機能： Excelファイル (.xlsx) の取り扱い

Excelファイルの読み込み (パスワードあり対応)
xls = tkExcelDB()
xls = xls.open()
Excelファイルへ保存
xls.save()
Excelファイルの構造抽出
xls.get_labels()
xls.get_icolumn_from_label()
簡単な検索式でデータを抽出し、リスト変数、辞書変数などで受け取る
xls.selelct()
pandas.DataFrameに変換
xls.use_dataframe()

継承: tkExcel(tkDataFile):

モジュール: openpyxl, pandas

attributes:
self.fp             # 現在開いているファイルポインタ
self.path         # 現在開いているファイルパス
self.mode         # 現在開いているモード
self.password     # 暗号化されたファイルのパスワード。Noneの場合はパスワード無し
self.tmp_file       # 暗号化されたファイルを開く際の一時ファイルパス
self.wb             # workbook
self.ws             # current worksheet
self.table_name # DBとして扱う際のテーブル名。current worksheet名に一致
self.isheet         # current worksheetの番号
self.sheetname   # current worksheet名
self.dataframe   # use_dataframe()を使った場合、pandas.DataFrame変数の参照が入る

コンストラクタ: xls = tkExcelDB_ path = None, mode = 'r', table_name = None, password = None, tmp_file = None,
                                OpenFile = True, CloseFile = False, data_only = True, description = '', **args)
    OpenFile is Trueの場合: self.open()を呼び出し、ファイル内容を取得する
ファイルオープン: xls.open(path = None, mode = None, data_only = True, encoding = None, IsPrint = True)
暗号化ファイルの読み込み: xls.open_encrypted(self, path = None, mode = None, password = None, tmp_file = 'tmp.xlsx',
                    data_only = True, encoding = None, IsPrint = True):
ファイルクローズ: xls.close()

インスタンス情報表示: xls.print_inf()

print()と同じように、現在位置から改行しながらcellに出力する: xls.print(*vals, end = '\n')
current worksheetのcellにvalを設定する: xls.write(irow, icol, val)　　　set()と同じ
cellにvalを設定する: xls.set(irow, icol, val, ws = None)
cellから値を取得する: val = xls.get(irow, icol, def_val = None, ws = None)
cellの書式と値をコピーする: xls.copy_cell_format( ws_source = None, irow_source = None, icol_source = None,
ws_target = None, irow_target = None, icol_target = None, format = 'value|fill|font|border'):

同じworkbookのシートをcurrent worksheetにコピーする: xls.copy_worksheet( ws_source, title = None):
異なるworkbookのシートをcurrent worksheetにコピーする: xls.copy_worksheet2( ws_source, title = None):

indexesとrowsのリスト変数で与えた列、行に対応するラベルとデータをリスト変数で取得:
labels = data_list = xls. get_specified_data( indexes = None, rows = None, convert_label = True)

labelで与えた正規表現に一致する列番号を返す: icol = xls.get_icolumn_from_label_regex(label, column_org = 1, row_org = 1, sheet = None)
labelで与えた文字列に一致する列番号を返す: icol = xls.get_icolumn_from_label(label, column_org = 1, row_org = 1, sheet = None)
valで与えた値に一致する行番号を返す: irow = xls.find_val_in_row(label, column_org = 1, row_org = 1, sheet = None)
valで与えた値に一致する列番号を返す: icol = xls.find_val_in_column(val, icolumn, row_org = 1, sheet = None, auto_add = False)

conditionの検索条件に合う行番号のリストを取得: irows_list = xls.select_irows(condition, key_column_org = 1, key_row_org = 1, target_row_org = 2, is_print = False, first_hit_only = False)
conditionの検索条件に合うデータから、target_labelsの列のデータの2次元リスト変数、辞書変数、行番号のリスト変数を取得:
    ret = xls. select(condition, target_labels = None, key_column_org = 1, target_row_org = 2, key_row_org = 1, ret_type = 'list', is_print = False):
    ret: ret_type == 'all': ラベルのリスト (target_labelsに対応するラベル名)、リスト変数、辞書変数、行番号のリスト変数を含む辞書変数
          ret_type == 'list': labels, data_list を返す
          ret_type == 'dict': labels, data_dict を返す
          ret_type == 'irows': labels, irows_list を返す

2次元リスト変数をpandas.DataFrameに変換: df = xls.use_dataframe(labels, data_list):

worksheetを挿入: create_sheet(self, index = 0, title = None):
worksheetを削除: remove_sheet(self, title):
worksheet名を設定: set_sheet_name(self, title, sheet = None):
index番目のworksheetを取得: get_sheet(self, index):
シート名からworksheetを取得: get_sheet_by_name(self, title):
シート名あるいはその正規表現からworksheet番号を取得: find_isheet(self, name, reg_exp = False):

行を挿入: insert_rows(self, irow):
フォントや書式を指定して列を挿入: insert_cols(self, icol, sheet = None, label = None, check_exist = False, column_org = 1, row_org = 1,
font = None, alignment = None, fill = None, border = None,
width = None, restore_width = True, restore_hidden = True):

固定行を設定: freeze_panes(self, cell = 'A1', sheet = None):

最大行数を取得: max_row(self):
最大列数を取得: max_column(self):

ラベル行からラベルのリストを取得: get_labels(self, ws = None, irow_origin = 1, icol_origin = 1)

行削除: delete_rows(self, irow):
列削除: delete_cols(self, icol):
行データのリストを取得: get_row(self, irow, ws = None):

プログラム例:

# ファイル読み込み
xls_in1 = tkExcelDB(infile1, table_name = None, password = password, tmp_file = tmp_file,
                OpenFile = True, CloseFile = True, description = '入力ファイル1', IsPrint = True)
if xls_in1.ws is None:
    print("")
    print(f"Error: Cannot read [{infile1}]")

# ファイル作成: outfileが存在したら、まずmode = 'r'で読み込み、処理をした後 save()で保存する
print(f"出力ファイル {outfile}を読み込みます")
xls_out = tkExcelDB(outfile, table_name = None, password = password, tmp_file = tmp_file,
        mode = 'r', OpenFile = True, CloseFile = True, data_only = True, description = '出力ファイル', IsPrint = True)
print(f"出力ファイル {outfile} に保存します")
xls_out.save(path = None, workbook = None)

#入力ファイルからラベルがkeyである行を抽出
key_column_org1 = xls_in1.get_icolumn_from_label(key, column_org = 1, row_org = 1, sheet = None)
if key_column_org1 is None:
print(f"Error: [{key}]列が入力ファイル1 [{infile1}] {icol_origin1}列以降の{irow_label1}行目に見つかりません")

#ファイル情報を表示
xls_in1.print_inf()

#入力ファイルからラベルがkeyの値が空白でないデータを抽出。ラベル行は１行目、データは2行目から始まる
# ret_type='all'にすると、リスト変数、辞書変数と、抽出した行番号のリストを含む辞書変数を返す
# 検索演算子としては、=, !=, <, <=, >, >= (数値型)、eq, neq (文字列型)、正規表現として like, not like ｗ使える
# 左辺はラベル名でなければいけない (ラベル名は正規表現で指定する。"番号" に正確にヒットさせる場合、"^番号$" とする)
# 例: '番号' > 100 and '番号' < 1000 and '得点' > 50
# 例: '番号' > 100 and '番号' < 1000 and '得点' > 50
condition = f"'{key}' neq ''"
inf = xls_in1.select(condition, target_labels = None, key_column_org = 1, key_row_org = 1, target_row_org = 2, ret_type = 'all', is_print = False)
target_labels = inf["labels"]
data_list = inf["list"]
data_dict = inf["dict"]
irow_list = inf["irows"]

#cellの値を取得
val = xls_in2.get(irow = irow_source, icol = icol_source)

#cell書式を含めてコピー
xls_out.copy_cell_format(ws_source = xls_in1.ws, irow_source = irow_source, icol_source = icol_source,
ws_target = None, irow_target = irow_target, icol_target = icol_target,
format = 'value|fill|font|border')

# pands.DataFrameに変換
df_labels = ['id', 'score'] # DataFrameで検索する場合、ラベル (columns) はpythonの変数名として付かなければいけない
df = db_exam.use_dataframe(labels = df_labels, data_list = data_list)

# DataFrameでの検索の仕方
df_labels = ['id', 'cancel', 'result', 'focus', 'prof', 'prof1']
dq = df.query("100 < id < 1000") # idが100より大きく1000より小さいデータを抽出
dq = df.query("id.str.contains('M')") # idに文字列 M が含まれるデータを抽出