Coverage for peakipy/utils.py: 99%
123 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-09-14 14:49 -0400
« prev ^ index » next coverage.py v7.4.4, created at 2024-09-14 14:49 -0400
1import sys
2import json
3from datetime import datetime
4from pathlib import Path
5from typing import List
7from rich import print
8from rich.table import Table
10# for printing dataframes
11peaklist_columns_for_printing = ["INDEX", "ASS", "X_PPM", "Y_PPM", "CLUSTID", "MEMCNT"]
12bad_column_selection = [
13 "clustid",
14 "amp",
15 "center_x_ppm",
16 "center_y_ppm",
17 "fwhm_x_hz",
18 "fwhm_y_hz",
19 "lineshape",
20]
21bad_color_selection = [
22 "green",
23 "blue",
24 "yellow",
25 "red",
26 "yellow",
27 "red",
28 "magenta",
29]
32def mkdir_tmp_dir(base_path: Path = Path("./")):
33 tmp_dir = base_path / "tmp"
34 tmp_dir.mkdir(exist_ok=True)
35 return tmp_dir
38def create_log_path(base_path: Path = Path("./")):
39 return base_path / "run_log.txt"
42def run_log(log_name="run_log.txt"):
43 """Write log file containing time script was run and with which arguments"""
44 with open(log_name, "a") as log:
45 sys_argv = sys.argv
46 sys_argv[0] = Path(sys_argv[0]).name
47 run_args = " ".join(sys_argv)
48 time_stamp = datetime.now()
49 time_stamp = time_stamp.strftime("%A %d %B %Y at %H:%M")
50 log.write(f"# Script run on {time_stamp}:\n{run_args}\n")
53def df_to_rich_table(df, title: str, columns: List[str], styles: str):
54 """Print dataframe using rich library
56 Parameters
57 ----------
58 df : pandas.DataFrame
59 title : str
60 title of table
61 columns : List[str]
62 list of column names (must be in df)
63 styles : List[str]
64 list of styles in same order as columns
65 """
66 table = Table(title=title)
67 for col, style in zip(columns, styles):
68 table.add_column(col, style=style)
69 for _, row in df.iterrows():
70 row = row[columns].values
71 str_row = []
72 for i in row:
73 match i:
74 case str():
75 str_row.append(f"{i}")
76 case float() if i > 1e5:
77 str_row.append(f"{i:.1e}")
78 case float():
79 str_row.append(f"{i:.3f}")
80 case bool():
81 str_row.append(f"{i}")
82 case int():
83 str_row.append(f"{i}")
84 table.add_row(*str_row)
85 return table
88def load_config(config_path):
89 if config_path.exists():
90 with open(config_path) as opened_config:
91 config_dic = json.load(opened_config)
92 return config_dic
93 else:
94 return {}
97def write_config(config_path, config_dic):
98 with open(config_path, "w") as config:
99 config.write(json.dumps(config_dic, sort_keys=True, indent=4))
102def update_config_file(config_path, config_kvs):
103 config_dic = load_config(config_path)
104 config_dic.update(config_kvs)
105 write_config(config_path, config_dic)
106 return config_dic
109def update_args_with_values_from_config_file(args, config_path="peakipy.config"):
110 """read a peakipy config file, extract params and update args dict
112 :param args: dict containing params extracted from docopt command line
113 :type args: dict
114 :param config_path: path to peakipy config file [default: peakipy.config]
115 :type config_path: str
117 :returns args: updated args dict
118 :rtype args: dict
119 :returns config: dict that resulted from reading config file
120 :rtype config: dict
122 """
123 # update args with values from peakipy.config file
124 config_path = Path(config_path)
125 if config_path.exists():
126 try:
127 config = load_config(config_path)
128 print(
129 f"[green]Using config file with dims [yellow]{config.get('dims')}[/yellow][/green]"
130 )
131 args["dims"] = config.get("dims", (0, 1, 2))
132 noise = config.get("noise")
133 if noise:
134 noise = float(noise)
136 colors = config.get("colors", ["#5e3c99", "#e66101"])
137 except json.decoder.JSONDecodeError:
138 print(
139 "[red]Your peakipy.config file is corrupted - maybe your JSON is not correct...[/red]"
140 )
141 print("[red]Not using[/red]")
142 noise = False
143 colors = args.get("colors", ("#5e3c99", "#e66101"))
144 config = {}
145 else:
146 print(
147 "[red]No peakipy.config found - maybe you need to generate one with peakipy read or see docs[/red]"
148 )
149 noise = False
150 colors = args.get("colors", ("#5e3c99", "#e66101"))
151 config = {}
153 args["noise"] = noise
154 args["colors"] = colors
156 return args, config
159def update_linewidths_from_hz_to_points(peakipy_data):
160 """in case they were adjusted when running edit.py"""
161 peakipy_data.df["XW"] = peakipy_data.df.XW_HZ * peakipy_data.pt_per_hz_f2
162 peakipy_data.df["YW"] = peakipy_data.df.YW_HZ * peakipy_data.pt_per_hz_f1
163 return peakipy_data
166def update_peak_positions_from_ppm_to_points(peakipy_data):
167 # convert peak positions from ppm to points in case they were adjusted running edit.py
168 peakipy_data.df["X_AXIS"] = peakipy_data.df.X_PPM.apply(
169 lambda x: peakipy_data.uc_f2(x, "PPM")
170 )
171 peakipy_data.df["Y_AXIS"] = peakipy_data.df.Y_PPM.apply(
172 lambda x: peakipy_data.uc_f1(x, "PPM")
173 )
174 peakipy_data.df["X_AXISf"] = peakipy_data.df.X_PPM.apply(
175 lambda x: peakipy_data.uc_f2.f(x, "PPM")
176 )
177 peakipy_data.df["Y_AXISf"] = peakipy_data.df.Y_PPM.apply(
178 lambda x: peakipy_data.uc_f1.f(x, "PPM")
179 )
180 return peakipy_data
183def save_data(df, output_name):
184 suffix = output_name.suffix
186 if suffix == ".csv":
187 df.to_csv(output_name, float_format="%.4f", index=False)
189 elif suffix == ".tab":
190 df.to_csv(output_name, sep="\t", float_format="%.4f", index=False)
192 else:
193 df.to_pickle(output_name)
196def check_data_shape_is_consistent_with_dims(peakipy_data):
197 # check data shape is consistent with dims
198 if len(peakipy_data.dims) != len(peakipy_data.data.shape):
199 print(
200 f"Dims are {peakipy_data.dims} while data shape is {peakipy_data.data.shape}?"
201 )
202 exit()
205def check_for_include_column_and_add_if_missing(peakipy_data):
206 # only include peaks with 'include'
207 if "include" in peakipy_data.df.columns:
208 pass
209 else:
210 # for compatibility
211 peakipy_data.df["include"] = peakipy_data.df.apply(lambda _: "yes", axis=1)
212 return peakipy_data
215def remove_excluded_peaks(peakipy_data):
216 if len(peakipy_data.df[peakipy_data.df.include != "yes"]) > 0:
217 excluded = peakipy_data.df[peakipy_data.df.include != "yes"][
218 peaklist_columns_for_printing
219 ]
220 table = df_to_rich_table(
221 excluded,
222 title="[yellow] Excluded peaks [/yellow]",
223 columns=excluded.columns,
224 styles=["yellow" for i in excluded.columns],
225 )
226 print(table)
227 peakipy_data.df = peakipy_data.df[peakipy_data.df.include == "yes"]
228 return peakipy_data
231def warn_if_trying_to_fit_large_clusters(max_cluster_size, peakipy_data):
232 if max_cluster_size is None:
233 max_cluster_size = peakipy_data.df.MEMCNT.max()
234 if peakipy_data.df.MEMCNT.max() > 10:
235 print(
236 f"""[red]
237 ##################################################################
238 You have some clusters of as many as {max_cluster_size} peaks.
239 You may want to consider reducing the size of your clusters as the
240 fits will struggle.
242 Otherwise you can use the --max-cluster-size flag to exclude large
243 clusters
244 ##################################################################
245 [/red]"""
246 )
247 else:
248 max_cluster_size = max_cluster_size
249 return max_cluster_size