python 创建差异百分比〈=30%的组

nuypyhwy  于 2023-03-11  发布在  Python
关注(0)|答案(5)|浏览(138)

我正在计算所有值之间的百分比差异,然后创建组。作为输出,我得到2个值的组合,但我想在一个组中合并所有值,其中小于30%的对方。
工作代码如下所示

from itertools import combinations

def pctDiff(A,B):
    return abs(A-B)*200/(A+B)

def main():
    dict2={}
    dict ={'acct_number':10202,'acct_name':'abc','v1_rev':3000,'v2_rev':4444,'v4_rev':234534,'v5_rev':5665,'v6_rev':66,'v7_rev':66,'v3_rev':66}
    vendors_revenue_list =['v1_rev','v2_rev','v3_rev','v4_rev','v5_rev','v6_rev','v7_rev','v8_rev']
    #prepared list of vendors
    for k in vendors_revenue_list:
        if k in dict.keys():
            dict2.update({k: dict[k]})

    print(dict2)
    #provides all possible combination
    for a, b in combinations(dict2, 2):
        groups = [(a,b) for a,b in combinations(dict2,2) if pctDiff(dict2[a],dict2[b]) <= 30]

    print(groups)

输出

[('v2_rev', 'v5_rev'), ('v3_rev', 'v6_rev'), ('v3_rev', 'v7_rev'), ('v6_rev', 'v7_rev')]

所需输出应为

[('v2_rev', 'v5_rev'), ('v3_rev', 'v6_rev','v7_rev')]

9jyewag0

9jyewag01#

您可以对排序后的值使用二进制搜索函数,以获得与由参考值(对于用作参考点的每个值)30%以内的值组成的组对应的键范围:

D = {"A":100, "B":110, "C":120, "D":150, "E":160, "F":250}

keys  = sorted(D,key=D.get)  # keys in value order
*values, = map(D.get,keys)   # ordered values (for binary search)

from bisect import bisect_right,bisect_left
maxPct  = 30
ratio   = (200+maxPct)/(200-maxPct) # to compute +30% from start value
groups  = set()                     # groups is a set to avoid duplicates
for i,refValue in enumerate(values):
    start = bisect_left(values, refValue/ratio)  # values below
    end   = bisect_right(values, refValue*ratio) # values above
    if end-start<2: continue                     # at least 2 values
    groups.add(tuple(keys[start:end]))           # add group

输出:

print(groups)
{('A', 'B', 'C'), ('C', 'D', 'E'), ('A', 'B', 'C', 'D', 'E')}
whhtz7ly

whhtz7ly2#

我想不出用combinations来实现这一点的方法,所以我选择只嵌套一个循环,并将满足以下条件的值附加到元组:

#provides all possible combination
    added = [False]*len(vendors_revenue_list)     # keep track of already added values
    groups = []
    for i, a in enumerate(dict2.items()):
        if added[i]:
            continue
        tup = (a[0],)        # initial element to tuple
        for j, b in enumerate(list(dict2.items())[i+1:], start=i+1):
            if added[j]:
                continue
            if (pctDiff(a[1], b[1]) <= 30):
                tup = (*tup, b[0])               # extend tuple with new value
                added[i], added[j] = True, True  # mark values as added
        if len(tup) > 1:                         # only append if a different match is found
            groups.append(tup)

    print(groups)

输出:

[('v2_rev', 'v5_rev'), ('v3_rev', 'v6_rev', 'v7_rev')]
ubof19bj

ubof19bj3#

我提出了这个解决方案,没有itertools包。

def get_keys_by_value(d, v):
    return [k for k, val in d.items() if val == v]

revenue_list = sorted([dict[k] for k in vendors_revenue_list if k in dict])

results=[]
lastpctdiff=-1
current_group=set()
for i in range(len(revenue_list)-1):
    
    pctdiff = pctDiff(revenue_list[i+1],revenue_list[i])
    
    if pctdiff < 30:

        if pctdiff != lastpctdiff:
            current_group = set()

        current_group.update(get_keys_by_value(dict,revenue_list[i]))
        current_group.update(get_keys_by_value(dict,revenue_list[i+1]))

        if current_group not in results:
            results.append(current_group)
print(results)
[{'v6_rev', 'v7_rev', 'v3_rev'}, {'v2_rev', 'v5_rev'}]
ecr0jaav

ecr0jaav4#

我认为您可能希望使用滑动窗口算法(例如,* 请参见 * Rolling or sliding window iterator?),首先,获取一个收入的排序列表,保留与企业的关联,然后,对于滑动窗口算法的每个结果,计算该结果中上项和下项之间的百分比差异,如果小于30%,则返回结果。
下面是一些示例代码:

## includes code adapted from https://stackoverflow.com/a/6822773/131187

def percent_diff(a,b):
    return abs(100*(a-b)/a) < 0.3

## original data
d ={'acct_number':10202,'acct_name':'abc','v1_rev':3000,'v2_rev':4444,'v4_rev':234534,'v5_rev':5665,'v6_rev':66,'v7_rev':66,'v3_rev':66}

## dictionaries lack order, so extract the required items for subsequent use as a list of tuples, 
## sorted by revenue

required_items = [key for key in d.keys() if '_rev' in key]

rev_items = [(d[_], _) for _ in required_items]
rev_items.sort()
print(rev_items )

n = len (rev_items )
print(n, ' revenue items')

## now do the sliding window carry-on
## notice that revenue precedes identification in each tuple, for sorting purposes

seq = range(n)

## making the calculation for window size three only <<----
window_size = 3

## actual sliding window
print ('Taking ', window_size, ' at a time')
for i in range(len(seq) - window_size + 1):
    result = seq[i: i + window_size]
    low_index, high_index = min(result), max(result)
    print('indices:', low_index, high_index, end='' )
    low, high = rev_items[low_index][0], rev_items[high_index][0]
    print (' values:', low, high, end='')
    if percent_diff(low, high):
        print (' within')
    else:
        print(' outside')
cnwbcb6i

cnwbcb6i5#

解决方案,用于处理我的所有场景

from itertools import groupby,accumulate

def pctDiff(A,B):
    return abs(A-B)*200/(A+B)

def main():
    D={}
    dict ={'acct_number':10202,'acct_name':'abc','v1_rev':100,'v2_rev':110,'v4_rev':2,'v5_rev':200,'v6_rev':210,'v7_rev':60000000,'v3_rev':2000}
    # dict ={'acct_number':10202,'acct_name':'abc','v1_rev':200,'v2_rev':210,'v4_rev':2,'v5_rev':200,'v6_rev':210,'v7_rev':60000000,'v3_rev':200}
    #dict = {'acct_number': 10202, 'acct_name': 'abc', 'v1_rev': 100, 'v2_rev': None, 'v4_rev': None, 'v5_rev': None,'v6_rev': None, 'v7_rev': None, 'v3_rev': None}
    #dict = {'acct_number': 10202, 'acct_name': 'abc', 'v1_rev': 100,'v2_rev':110,'v3_rev':300}
    vendors_revenue_list =['v1_rev','v2_rev','v4_rev','v5_rev','v6_rev','v8_rev' ,'v3_rev', 'v7_rev']
    #prepared list of vendorsof

    for k in vendors_revenue_list:
        if k in dict.keys() and dict[k] is not None:
            D.update({k: dict[k]})
    print(f'D {D}')
    find_winner_percentage_based(D)

def pctDiff(A, B):
    return abs(A - B) * 200 / (A + B)

def find_winner_percentage_based(D):
    keys = sorted(D, key=D.get)  # sorting keys in value ascending  order
    print(f'keys {keys}')
    *values, = map(D.get, keys)  # ordered values (for binary search)
    print(f'values {values}')
    if len(keys) ==1 : # only one vendor has provided data
        groups=keys
    else:
        for z in zip(values[:1] + values, values):
            print(z)
        print('1********')

        from itertools import groupby, accumulate

        G = accumulate(pctDiff(*z) >= 30 for z in zip(values[:1] + values, values)) # zip returns  iterator that generates tuples of length
        #next returns  the next item from the iterator.
        #Underscore is a Python convention to name an unused variable
        #Syntax: itertools.groupby(iterable, key_func)
        groups = [tuple(g) for _, (*g,) in groupby(keys, lambda _: next(G)) if len(g) > 1]

        print('3********')

    print(groups)
    if len(groups)> 1 :
        result=groups.pop()

    print(f'result {result}')
    print('4********')
    print(list(result))
    #keys=sorted(k for k in D if k in result) for now commenting it we don need this
    *values, = map(D.get, result)  # ordered values (for binary search)
    print('6********')
    print(values.pop())












if __name__ == '__main__':
   main()

我唯一想理解的是

groups = [tuple(g) for _, (*g,) in groupby(keys, lambda _: next(G)) if len(g) > 1]

        print('3********')

相关问题