我有一个函数,它可以从轮廓向量创建节点和边缘向量。它按预期工作,但这个函数是到目前为止我的完整脚本中最慢的部分。运行大约需要150毫秒。
机器:Linux PC:
intel i7-12700k, 128 GB DDR4 3200 MHz RAM, Ubuntu LTS 20.04
Windows(WSL 2)笔记本电脑:
Ryzen 9 6900HX, 16 DB DDR5 RAM, Ubuntu 22.04 LTS (8 GB DDR5 RAM)
都使用gcc编译器和CMake和std=C++17
我的源文件是我整个程序的基础:
#ifndef DEFINITIONS_H
#define DEFINITIONS_H
#include <string>
#include <vector>
#include <string_view>
#include <string>
#include <iostream>
#include <unordered_map>
#include <algorithm>
#include <utility>
#include <immintrin.h>
#include <chrono>
typedef std::string s;
typedef std::string_view stv;
struct Experience
{
s from_date;
s to_date;
s position_title;
float duration;
s location;
s institution_name;
float salary;
Experience(s from_date, s to_date, s position_title, float duration, s location, s institution_name, float salary)
{
this->from_date = from_date;
this->to_date = to_date;
this->position_title = position_title;
this->duration = duration;
this->location = location;
this->institution_name = institution_name;
this->salary = salary;
}
Experience()
{
this->from_date = "";
this->to_date = "";
this->position_title = "";
this->duration = 0;
this->location = "";
this->institution_name = "";
this->salary = 0;
}
friend std::ostream& operator<<(std::ostream& os, const Experience& e)
{
os << "from_date: " << e.from_date << std::endl;
os << "to_date: " << e.to_date << std::endl;
os << "position_title: " << e.position_title << std::endl;
os << "duration: " << e.duration << std::endl;
os << "location: " << e.location << std::endl;
os << "institution_name: " << e.institution_name << std::endl;
os << "salary: " << e.salary << std::endl;
return os;
}
};
typedef std::vector<Experience> Experiences;
struct Profile
{
s linkedin_url;
s name;
Experiences experiences;
std::vector<s> skills;
Profile(s linkedin_url, s name, std::vector<s> skills, Experiences experiences)
{
this->linkedin_url = linkedin_url;
this->name = name;
this->skills = skills;
this->experiences = experiences;
}
Profile()
{
this->linkedin_url = "";
this->name = "";
this->skills = {};
this->experiences = {};
}
friend std::ostream& operator<<(std::ostream& os, const Profile& p)
{
os << "linkedin_url: " << p.linkedin_url << std::endl;
os << "name: " << p.name << std::endl;
os << "experiences: " << std::endl;
for (auto e : p.experiences)
{
os << '\t' << e << std::endl;
}
return os;
}
};
typedef std::vector<Profile> Profiles;
struct Node
{
s name;
s position_title;
s institution_name;
s location;
s industry;
s linkedin_url;
float duration;
int company_size;
float median_tenure;
float salary;
float headcount_growth;
float current_experience_duration;
Node(s t_name,s t_position_title, s t_institution_name, s t_location, s t_industry,s linkedin_url, float t_duration, float t_current_experience_duration, int t_company_size, float t_median_tenure, float t_salary, float t_headcount_growth )
{
this->name = t_name;
this->position_title = t_position_title;
this->institution_name = t_institution_name;
this->location = t_location;
this->industry = t_industry;
this->linkedin_url = linkedin_url;
this->duration = t_duration;
this->current_experience_duration = t_current_experience_duration;
this->company_size = t_company_size;
this->median_tenure = t_median_tenure;
this->salary = t_salary;
this->headcount_growth = t_headcount_growth;
}
Node()
{
this->name = "";
this->position_title = "";
this->institution_name = "";
this->location = "";
this->industry = "";
this->linkedin_url = "";
this->duration = 0;
this->current_experience_duration = 0;
this->company_size = 0;
this->median_tenure = 0;
this->salary = 0;
this->headcount_growth = 0;
}
bool operator==(const Node& other) const
{
if (this->name == other.name && this->position_title == other.position_title && this->institution_name == other.institution_name && this->location == other.location)
return true;
else
return false;
}
friend std::ostream& operator<<(std::ostream& os, const Node& node)
{
os << "Name: " << node.name << std::endl;
os << "Position Title: " << node.position_title << std::endl;
os << "Institution Name: " << node.institution_name << std::endl;
os << "Location: " << node.location << std::endl;
os << "Industry: " << node.industry << std::endl;
os << "Linkedin URL: " << node.linkedin_url << std::endl;
os << "Duration: " << node.duration << std::endl;
os << "Current Experience Duration "<< node.current_experience_duration << std::endl;
os << "Company Size: " << node.company_size << std::endl;
os << "Median Tenure: " << node.median_tenure << std::endl;
os << "Salary: " << node.salary << std::endl;
os << "Headcount Growth: " << node.headcount_growth << std::endl;
return os;
}
};
typedef std::vector<Node> Nodes;
struct Edge
{
Node source;
Node target;
s linkedin_url;
float duration;
int company_size;
float median_tenure;
float salary;
float headcount_growth;
Edge(Node t_source, Node t_target)
{
this->source = t_source;
this->target = t_target;
this->linkedin_url = this->target.linkedin_url;
this->duration = this->target.duration;
this->company_size = this->target.company_size;
this->median_tenure = this->target.median_tenure;
this->salary = this->target.salary;
this->headcount_growth = this->target.headcount_growth;
}
Edge()
{
this->source = Node();
this->target = Node();
this->linkedin_url = "";
this->duration = 0;
this->company_size = 0;
this->median_tenure = 0;
this->salary = 0;
this->headcount_growth = 0;
}
bool operator==(const Edge& other) const
{
if (this->source == other.source && this->target == other.target)
return true;
else
return false;
}
friend std::ostream& operator<<(std::ostream& os, const Edge& edge)
{
os << "Source: " << edge.source << std::endl;
os << "Target: " << edge.target << std::endl;
os << "Linkedin URL: " << edge.linkedin_url << std::endl;
os << "Duration: " << edge.duration << std::endl;
os << "Company Size: " << edge.company_size << std::endl;
os << "Median Tenure: " << edge.median_tenure << std::endl;
os << "Salary: " << edge.salary << std::endl;
os << "Headcount Growth: " << edge.headcount_growth << std::endl;
return os;
}
};
typedef std::vector<Edge> Edges;
struct Company
{
s name;
s industry;
float headcount_growth;
float median_tenure;
int company_size;
Company(s name, s industry, float headcount_growth, float median_tenure, int company_size)
{
this->name = name;
this->industry = industry;
this->headcount_growth = headcount_growth;
this->median_tenure = median_tenure;
this->company_size = company_size;
}
Company()
{
this->name = "";
this->industry = "";
this->headcount_growth = 0;
this->median_tenure = 0;
this->company_size = 0;
}
bool operator==(const Company& other) const
{
if (this->name == other.name)
{
return true;
}
else
{
return false;
}
}
friend std::ostream& operator<<(std::ostream& os, const Company& company)
{
os << "Name: " << company.name << std::endl;
os << "Industry: " << company.industry << std::endl;
os << "Headcount Growth: " << company.headcount_growth << std::endl;
os << "Median Tenure: " << company.median_tenure << std::endl;
os << "Company Size: " << company.company_size << std::endl;
return os;
}
};
typedef std::vector<Company> Companies;
#endif // DEFINITIONS_H
profiles.h源文件(未包含无关代码):
#ifndef PROFILES_H
#define PROFILES_H
#include "definitions.h"
std::pair<Nodes,Edges> create_edges_and_nodes_from_profiles(Profiles & profiles, Companies & companies );
#endif // PROFILES_H
profiles.cpp文件:
std::pair<Nodes,Edges> create_edges_and_nodes_from_profiles(Profiles & profiles, Companies & companies)
{
using namespace std::literals;
Nodes nodes;
Edges edges;
nodes.reserve(40'000);
edges.reserve(40'000);
bool use_profile;
std::unordered_map<s,float> company_name_median_tenure_map;
std::unordered_map<s,float> company_name_headcount_growth_map;
std::unordered_map<s,int> company_name_company_size_map;
std::unordered_map<s,s> company_name_industry_map;
float current_duration;
float t_company_size, t_median_tenure, t_headcount_growth, t_duration, t_salary;
s t_name, t_position_title, t_location, t_institution_name, t_industry;
#ifdef MONITOR
auto before_map_createion = std::chrono::high_resolution_clock::now();
#endif
for (Company & company : companies)
{
company_name_median_tenure_map[company.name] = company.median_tenure;
company_name_headcount_growth_map[company.name] = company.headcount_growth;
company_name_company_size_map[company.name] = company.company_size;
company_name_industry_map[company.name] = company.industry;
}
#ifdef MONITOR
auto after_map_creation = std::chrono::high_resolution_clock::now();
auto temp = std::chrono::duration_cast<std::chrono::milliseconds>(after_map_creation-before_map_createion).count();
std::cout<<temp<<std::endl;
#endif
for (Profile & profile : profiles)
{
Nodes nodes_temp;
nodes_temp.reserve(profile.experiences.size());
current_duration = 0.0;
use_profile = false;
for (Experience & experience : profile.experiences)
{
stv location = stv(experience.location);
if (location.compare(""sv)!=0)
{
//if (location.find("United Kingdom"sv)!=stv::npos)
t_name = profile.name;
t_position_title = experience.position_title;
t_location = experience.location;
t_duration = experience.duration;
t_institution_name = experience.institution_name;
t_company_size = company_name_company_size_map[t_institution_name];
t_median_tenure = company_name_median_tenure_map[t_institution_name];
t_headcount_growth = company_name_headcount_growth_map[t_institution_name];
t_salary = experience.salary;
t_industry = company_name_industry_map[t_institution_name];
Node node_obj = Node(std::move(t_name),
std::move(t_position_title),
std::move(t_institution_name),
std::move(t_location),
std::move(t_industry),
profile.linkedin_url,
t_duration,
current_duration,
t_company_size,
t_median_tenure,
t_salary,
t_headcount_growth);
nodes_temp.push_back(std::move(node_obj));
current_duration += t_duration;
}
}
for (Node & node : nodes_temp)
{
if (node.location.compare(""sv)!=0)
{
use_profile = true;
break;
}
}
if (!use_profile)
continue;
current_duration = 0.0;
for (Nodes::reverse_iterator rit=nodes_temp.rbegin();rit!=nodes_temp.rend();++rit)
{
rit->current_experience_duration = current_duration;
current_duration += rit->duration;
}
if (nodes_temp.size()>1)
{
for (int i=0;i<nodes_temp.size()-1;i++)
{
//Edge edge = Edge(&nodes_temp[i+1],&nodes_temp[i]);
edges.emplace_back(std::move(nodes_temp[i+1]),std::move(nodes_temp[i]));
}
}
for (auto & node : nodes_temp)
{
nodes.push_back(std::move(node));
}
}
return std::make_pair(std::move(nodes),std::move(edges));
}
在我的主驱动程序中:
#include "definitions.h"
#include "profiles.h"
int main()
{
// not shown (but assume this works and created the profile objects as shown in definitions.h)
Profiles profiles = get_profiles(coll_profiles);
// Make the same assumption as before
Companies companies = get_companies(coll_companies);
#ifdef MONITOR
auto before_creating_edges_and_nodes = std::chrono::high_resolution_clock::now();
#endif
// Create nodes and edges from profiles and companies
auto[nodes, edges] = create_edges_and_nodes_from_profiles(profiles,companies);
#ifdef MONITOR
auto after_creating_edges_and_nodes = std::chrono::high_resolution_clock::now();
auto time_creating_edges_and_nodes = std::chrono::duration_cast<std::chrono::milliseconds>(after_creating_edges_and_nodes - before_creating_edges_and_nodes).count();
#endif
// lots of functions using Nodes and Edges
//...
//...
return 0;
}
注意,我不是C++的Maven。上面显示的代码可以工作,但需要花费大量的时间(~150毫秒)。脚本的其余部分(大约15个函数)总共只需要不到100毫秒。
我的问题基本上是,我如何重构数据和/或对象的初始化,以便它尽可能快。
我问这个问题的原因是,我发现在谷歌上找不到如何做到这一点。可能是因为我不知道正确的术语。
1条答案
按热度按时间xxhby3vn1#
更换
与